pax_global_header00006660000000000000000000000064126532550460014522gustar00rootroot0000000000000052 comment=c9df88a284d448da5434c6ad2737b54a907f888c win-iconv-0.0.8/000077500000000000000000000000001265325504600134405ustar00rootroot00000000000000win-iconv-0.0.8/CMakeLists.txt000066400000000000000000000052331265325504600162030ustar00rootroot00000000000000project(win_iconv) cmake_minimum_required(VERSION 2.6) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) set(CPACK_GENERATOR "TBZ2") include(CPack) option(BUILD_TEST "build test executable" OFF) if(WINCE) option(BUILD_STATIC "build the static library" ON) option(BUILD_SHARED "build the shared library" OFF) option(BUILD_EXECUTABLE "build the win_iconv executable" OFF) else(WINCE) option(BUILD_STATIC "build the static library" OFF) option(BUILD_SHARED "build the shared library" ON) option(BUILD_EXECUTABLE "build the win_iconv executable" ON) endif(WINCE) if(BUILD_TEST) enable_testing() endif(BUILD_TEST) if(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) endif(MSVC) if(NOT WINCE) add_definitions(-DUSE_LIBICONV_DLL) if(DEFAULT_LIBICONV_DLL) add_definitions(-DDEFAULT_LIBICONV_DLL=${DEFAULT_LIBICONV_DLL}) endif(DEFAULT_LIBICONV_DLL) else(NOT WINCE) find_package(Wcecompat REQUIRED) include_directories(${WCECOMPAT_INCLUDE_DIR}) endif(NOT WINCE) if(BUILD_SHARED) add_library(iconv SHARED win_iconv.c iconv.def) set_target_properties(iconv PROPERTIES COMPILE_FLAGS "-DMAKE_DLL" PREFIX "") if(WINCE) target_link_libraries(iconv ${WCECOMPAT_LIBRARIES}) endif(WINCE) install(TARGETS iconv RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) endif(BUILD_SHARED) if(BUILD_EXECUTABLE) add_executable(win_iconv win_iconv.c) set_target_properties(win_iconv PROPERTIES COMPILE_FLAGS "-DMAKE_EXE") if(WINCE) target_link_libraries(win_iconv ${WCECOMPAT_LIBRARIES}) endif(WINCE) install(TARGETS win_iconv RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) endif(BUILD_EXECUTABLE) if(BUILD_STATIC) add_library(iconv-static STATIC win_iconv.c) set_target_properties(iconv-static PROPERTIES OUTPUT_NAME "iconv") if(WINCE) target_link_libraries(iconv-static ${WCECOMPAT_LIBRARIES}) endif(WINCE) install(TARGETS iconv-static RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) endif(BUILD_STATIC) install(FILES iconv.h DESTINATION include) if(BUILD_TEST) # tests: add_executable(win_iconv_test win_iconv_test.c) add_test(win_iconv_test win_iconv_test) if(WINCE) target_link_libraries(win_iconv_test ${WCECOMPAT_LIBRARIES}) endif(WINCE) endif(BUILD_TEST) win-iconv-0.0.8/ChangeLog000066400000000000000000000105031265325504600152110ustar00rootroot000000000000002016-01-12 Yukihiro Nakadaira * win_iconv.c, win_iconv_test.c: Make >=0x80 byte illegal in ascii. * iconv.h: Add WINICONV_CONST macro. 2014-02-05 Yukihiro Nakadaira * win_iconv.c: Added alias. ISO_8859-* ISO_8859_* * win_iconv.c, win_iconv_test.c: Fixed for compiler warning. 2013-09-15 Yukihiro Nakadaira * iconv.h: Fixed c++ style comment. (Issue 21) (Thanks to bgilbert) 2012-11-22 Yukihiro Nakadaira * win_iconv.c: Fix warnings. (Issue 19) (Thanks to yselkowitz) 2012-10-21 Yukihiro Nakadaira * win_iconv.c, win_iconv_test.c: Add //ignore and -c flag. 2012-10-15 Yukihiro Nakadaira * win_iconv.c, win_iconv_test.c: cosmetic change. 2012-09-19 Yukihiro Nakadaira * iconv.h, win_iconv.c, win_iconv_test.c: Change iconv(3) prototype. "const char **inbuf" -> "char **inbuf" (Issue 8) * win_iconv.c: Change to not use TEXT macro for GetProcAddress. (Issue 17) (Thanks to EPienkowskia) * win_iconv_test.c: Fix for -DUNICODE. Use GetModuleFileNameA. 2011-10-28 Yukihiro Nakadaira * win_iconv.c: Add UCS-2. (Issue 14) (Thanks to j.g.rennison) 2011-10-24 Yukihiro Nakadaira * win_iconv.c: Add Big5-HKSCS alias. (Issue 13) (Thanks to timothy.ty.lee) 2011-09-06 Yukihiro Nakadaira * Makefile: Improvement of the creation of the DLL. (Issue 10) (Thanks to vincent.torri) 2011-08-19 Yukihiro Nakadaira * win_iconv.c: Fixed a bug that assumption that sizeof(DWORD)==sizeof(void*) in find_imported_module_by_funcname. (Issue 7) (Thanks to j.g.rennison) 2011-08-13 Yukihiro Nakadaira * win_iconv.c, win_iconv_test.c: Fixed a bug that //translit flag does not work when transliterating to the default character. (Issue 9) (Thanks to j.g.rennison) 2011-07-26 Yukihiro Nakadaira * CMakeLists.txt: fix dll name with mingw. (Issue 6) (Thanks to kalevlember) 2011-05-19 Yukihiro Nakadaira * win_iconv.c: Add some more UCS aliases. Merge from Tor Lillqvist version. (Issue 4) (Thanks to mkbosmans) 2011-05-15 Yukihiro Nakadaira * Makefile: use variable for tools in Makefile (Issue 3) (Thanks to mkbosmans) 2011-01-13 Yukihiro Nakadaira * win_iconv_test.c: Removed unused variable. * win_iconv_test.c: Added USE_ICONV_H flag to compile with -liconv. (Issue 2) (Thanks to amorilia.gamebox) 2010-04-14 Patrick von Reth * added c++ support 2010-03-28 Patrick Spendrin * CMakeLists.txt, win_iconv.c: add CMake buildsystem, fix bug from issue tracker 2009-07-25 Yukihiro Nakadaira * win_iconv.c, readme.txt: doc fix 2009-07-06 Yukihiro Nakadaira * win_iconv.c, Makefile, readme.txt: doc fix 2009-06-19 Yukihiro Nakadaira * win_iconv.c: cosmetic change * win_iconv.c: Change Unicode BOM behavior 1. Remove the BOM when "fromcode" is utf-16 or utf-32. 2. Add the BOM when "tocode" is utf-16 or utf-32. 2009-06-18 Yukihiro Nakadaira * win_iconv.c: Fixed a bug that invalid input may cause an endless loop 2009-06-18 Yukihiro Nakadaira * win_iconv.c: Fixed a bug that libiconv_iconv_open() doesn't work (Christophe Benoit) 2008-04-01 Yukihiro Nakadaira * win_iconv.c: Added //TRANSLIT option. http://bugzilla.gnome.org/show_bug.cgi?id=524314 2008-03-20 Yukihiro Nakadaira * win_iconv.c: The dwFlags parameter to MultiByteToWideChars() must be zero for some code pages (Tor Lillqvist) 2008-03-19 Yukihiro Nakadaira * win_iconv.c: Added support for UCS-2 and GB18030 (Tor Lillqvist) 2007-12-03 Yukihiro Nakadaira * iconv.h: #include to use size_t 2007-11-28 Yukihiro Nakadaira * win_iconv.c: bug fix for two things (Tor Lillqvist) 1) This is probably not important: Add a function must_use_null_useddefaultchar() that checks for those codepages for which the docs for WideCharToMultiByte() say one has to use a NULL lpDefaultChar pointer. Don't know if this is actually needed, but better to be safe than sorry. 2) This is essential: In kernel_wctomb(), the code should first check if bufsize is zero, and return the E2BIG error in that case. 2007-11-26 Yukihiro Nakadaira * win_iconv.c: ISO-8859-1 should be CP28591, not CP1252 (Tor Lillqvist) 2007-11-26 Yukihiro Nakadaira * win_iconv.c: patch from Tor Lillqvist (with alteration) 2007-09-04 Yukihiro Nakadaira * : Initial import win-iconv-0.0.8/FindWcecompat.cmake000066400000000000000000000022761265325504600171740ustar00rootroot00000000000000# Try to find Wcecompat functionality # Once done this will define # # WCECOMPAT_FOUND - system has Wcecompat # WCECOMPAT_INCLUDE_DIR - Wcecompat include directory # WCECOMPAT_LIBRARIES - Libraries needed to use Wcecompat # # Copyright (c) 2010, Andreas Holzammer, # # Redistribution and use is allowed according to the terms of the BSD license. if(WCECOMPAT_INCLUDE_DIR AND WCECOMPAT_LIB_FOUND) set(Wcecompat_FIND_QUIETLY TRUE) endif(WCECOMPAT_INCLUDE_DIR AND WCECOMPAT_LIB_FOUND) find_path(WCECOMPAT_INCLUDE_DIR errno.h PATH_SUFFIXES wcecompat) set(WCECOMPAT_LIB_FOUND FALSE) if(WCECOMPAT_INCLUDE_DIR) find_library(WCECOMPAT_LIBRARIES NAMES wcecompat wcecompatex ) if(WCECOMPAT_LIBRARIES) set(WCECOMPAT_LIB_FOUND TRUE) endif(WCECOMPAT_LIBRARIES) endif(WCECOMPAT_INCLUDE_DIR) # I have no idea what this is about, but it seems to be used quite often, so I add this here set(WCECOMPAT_CONST const) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Wcecompat DEFAULT_MSG WCECOMPAT_LIBRARIES WCECOMPAT_LIB_FOUND) mark_as_advanced(WCECOMPAT_INCLUDE_DIR WCECOMPAT_LIBRARIES WCECOMPAT_CONST WCECOMPAT_LIB_FOUND) win-iconv-0.0.8/Makefile000066400000000000000000000054251265325504600151060ustar00rootroot00000000000000# Makefile for win-iconv # # Variables that can be overriden: # # CC, AR, RANLIB, DLLTOOL # MKDIR_P, INSTALL, RM # prefix, BINARY_PATH, INCLUDE_PATH, LIBRARY_PATH CC ?= gcc AR ?= ar RANLIB ?= ranlib DLLTOOL ?= dlltool MKDIR_P = mkdir -p INSTALL = install -c RM = rm -f # comma separated list (e.g. "iconv.dll,libiconv.dll") DEFAULT_LIBICONV_DLL ?= \"\" CFLAGS += -pedantic -Wall CFLAGS += -DUSE_LIBICONV_DLL CFLAGS += -DDEFAULT_LIBICONV_DLL=$(DEFAULT_LIBICONV_DLL) prefix ?= /usr/local BINARY_PATH = $(prefix)/bin INCLUDE_PATH = $(prefix)/include LIBRARY_PATH = $(prefix)/lib all: iconv.dll libiconv.a win_iconv.exe dist: test win_iconv.zip iconv.dll: win_iconv.c $(CC) $(CFLAGS) -c win_iconv.c -DMAKE_DLL $(CC) -shared -o iconv.dll -Wl,-s -Wl,--out-implib=libiconv.dll.a -Wl,--export-all-symbols win_iconv.o $(SPECS_FLAGS) libiconv.a: win_iconv.c $(CC) $(CFLAGS) -c win_iconv.c $(AR) rcs libiconv.a win_iconv.o $(RANLIB) libiconv.a win_iconv.exe: win_iconv.c $(CC) $(CFLAGS) -s -o win_iconv.exe win_iconv.c -DMAKE_EXE libmlang.a: mlang.def $(DLLTOOL) --kill-at --input-def mlang.def --output-lib libmlang.a test: $(CC) $(CFLAGS) -s -o win_iconv_test.exe win_iconv_test.c ./win_iconv_test.exe win_iconv.zip: msvcrt msvcr70 msvcr71 rm -rf win_iconv svn export . win_iconv cp msvcrt/iconv.dll msvcrt/win_iconv.exe win_iconv/ mkdir win_iconv/msvcr70 cp msvcr70/iconv.dll win_iconv/msvcr70/ mkdir win_iconv/msvcr71 cp msvcr71/iconv.dll win_iconv/msvcr71/ zip -r win_iconv.zip win_iconv msvcrt: svn export . msvcrt; \ cd msvcrt; \ $(MAKE); msvcr70: svn export . msvcr70; \ cd msvcr70; \ gcc -dumpspecs | sed s/-lmsvcrt/-lmsvcr70/ > specs; \ $(MAKE) "SPECS_FLAGS=-specs=$$PWD/specs"; msvcr71: svn export . msvcr71; \ cd msvcr71; \ gcc -dumpspecs | sed s/-lmsvcrt/-lmsvcr71/ > specs; \ $(MAKE) "SPECS_FLAGS=-specs=$$PWD/specs"; install: iconv.dll libiconv.a win_iconv.exe -@$(MKDIR_P) '$(DESTDIR)$(BINARY_PATH)' -@$(MKDIR_P) '$(DESTDIR)$(INCLUDE_PATH)' -@$(MKDIR_P) '$(DESTDIR)$(LIBRARY_PATH)' -$(INSTALL) iconv.dll '$(DESTDIR)$(BINARY_PATH)' -$(INSTALL) win_iconv.exe '$(DESTDIR)$(BINARY_PATH)' -$(INSTALL) iconv.h '$(DESTDIR)$(INCLUDE_PATH)' -$(INSTALL) libiconv.dll.a '$(DESTDIR)$(LIBRARY_PATH)' -$(INSTALL) libiconv.a '$(DESTDIR)$(LIBRARY_PATH)' uninstall: -$(RM) '$(DESTDIR)$(LIBRARY_PATH)'/libiconv.a -$(RM) '$(DESTDIR)$(LIBRARY_PATH)'/libiconv.dll.a -$(RM) '$(DESTDIR)$(INCLUDE_PATH)'/iconv.h -$(RM) '$(DESTDIR)$(BINARY_PATH)'/win_iconv.exe -$(RM) '$(DESTDIR)$(BINARY_PATH)'/iconv.dll clean: rm -f win_iconv.exe rm -f win_iconv.o rm -f iconv.dll* rm -f libiconv.a rm -f libiconv.dll rm -f win_iconv_test.exe rm -f libmlang.a rm -rf win_iconv rm -rf win_iconv.zip rm -rf msvcrt rm -rf msvcr70 rm -rf msvcr71 win-iconv-0.0.8/iconv.def000066400000000000000000000007611265325504600152420ustar00rootroot00000000000000EXPORTS iconv iconv_open iconv_close iconvctl libiconv=iconv libiconv_open=iconv_open libiconv_close=iconv_close libiconvctl=iconvctl ;; libiconv-1.11.dll ;; TODO for binary compatibility ; _libiconv_version @1 ; aliases2_lookup @2 ; aliases_lookup @3 ; iconv_canonicalize @4 ; libiconv @5 ; libiconv_close @6 ; libiconv_open @7 ; libiconv_relocate @8 ; libiconv_set_relocation_prefix @9 ; libiconvctl @10 ; libiconvlist @11 ; locale_charset @12 win-iconv-0.0.8/iconv.h000066400000000000000000000010031265325504600147210ustar00rootroot00000000000000#ifndef _LIBICONV_H #define _LIBICONV_H #include #ifndef WINICONV_CONST # ifdef ICONV_CONST # define WINICONV_CONST ICONV_CONST # else # define WINICONV_CONST const # endif #endif #ifdef __cplusplus extern "C" { #endif typedef void* iconv_t; iconv_t iconv_open(const char *tocode, const char *fromcode); int iconv_close(iconv_t cd); size_t iconv(iconv_t cd, WINICONV_CONST char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); #ifdef __cplusplus } #endif #endif win-iconv-0.0.8/mlang.def000066400000000000000000000004301265325504600152130ustar00rootroot00000000000000LIBRARY MLANG.DLL EXPORTS ConvertINetMultiByteToUnicode@24 ;; ConvertINetReset (not documented) ConvertINetString@28 ConvertINetUnicodeToMultiByte@24 IsConvertINetStringAvailable@8 LcidToRfc1766A@12 LcidToRfc1766W@12 Rfc1766ToLcidA@8 Rfc1766ToLcidW@8 win-iconv-0.0.8/mlang.h000066400000000000000000000020451265325504600147100ustar00rootroot00000000000000HRESULT WINAPI ConvertINetString( LPDWORD lpdwMode, DWORD dwSrcEncoding, DWORD dwDstEncoding, LPCSTR lpSrcStr, LPINT lpnSrcSize, LPBYTE lpDstStr, LPINT lpnDstSize ); HRESULT WINAPI ConvertINetMultiByteToUnicode( LPDWORD lpdwMode, DWORD dwSrcEncoding, LPCSTR lpSrcStr, LPINT lpnMultiCharCount, LPWSTR lpDstStr, LPINT lpnWideCharCount ); HRESULT WINAPI ConvertINetUnicodeToMultiByte( LPDWORD lpdwMode, DWORD dwEncoding, LPCWSTR lpSrcStr, LPINT lpnWideCharCount, LPSTR lpDstStr, LPINT lpnMultiCharCount ); HRESULT WINAPI IsConvertINetStringAvailable( DWORD dwSrcEncoding, DWORD dwDstEncoding ); HRESULT WINAPI LcidToRfc1766A( LCID Locale, LPSTR pszRfc1766, int nChar ); HRESULT WINAPI LcidToRfc1766W( LCID Locale, LPWSTR pszRfc1766, int nChar ); HRESULT WINAPI Rfc1766ToLcidA( LCID *pLocale, LPSTR pszRfc1766 ); HRESULT WINAPI Rfc1766ToLcidW( LCID *pLocale, LPWSTR pszRfc1766 ); win-iconv-0.0.8/readme.txt000066400000000000000000000016501265325504600154400ustar00rootroot00000000000000win_iconv is a iconv implementation using Win32 API to convert. win_iconv is placed in the public domain. ENVIRONMENT VARIABLE: WINICONV_LIBICONV_DLL If $WINICONV_LIBICONV_DLL is set, win_iconv uses the DLL. If loading the DLL or iconv_open() failed, falls back to internal conversion. If a few DLL are specified as comma separated list, the first loadable DLL is used. The DLL should have iconv_open(), iconv_close() and iconv(). Or libiconv_open(), libiconv_close() and libiconv(). (only available when USE_LIBICONV_DLL is defined at compile time) Win32 API does not support strict encoding conversion for some codepage. And MLang function drops or replaces invalid bytes and does not return useful error status as iconv does. This implementation cannot be used for encoding validation purpose. Yukihiro Nakadaira win-iconv-0.0.8/win_iconv.c000066400000000000000000001747671265325504600156250ustar00rootroot00000000000000/* * iconv implementation using Win32 API to convert. * * This file is placed in the public domain. */ /* for WC_NO_BEST_FIT_CHARS */ #ifndef WINVER # define WINVER 0x0500 #endif #define STRICT #include #include #include #include #ifdef __GNUC__ #define UNUSED __attribute__((unused)) #else #define UNUSED #endif /* WORKAROUND: */ #ifndef UNDER_CE #define GetProcAddressA GetProcAddress #endif #if 0 # define MAKE_EXE # define MAKE_DLL # define USE_LIBICONV_DLL #endif #if !defined(DEFAULT_LIBICONV_DLL) # define DEFAULT_LIBICONV_DLL "" #endif #define MB_CHAR_MAX 16 #define UNICODE_MODE_BOM_DONE 1 #define UNICODE_MODE_SWAPPED 2 #define FLAG_USE_BOM 1 #define FLAG_TRANSLIT 2 /* //TRANSLIT */ #define FLAG_IGNORE 4 /* //IGNORE */ typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; typedef void* iconv_t; iconv_t iconv_open(const char *tocode, const char *fromcode); int iconv_close(iconv_t cd); size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); /* libiconv interface for vim */ #if defined(MAKE_DLL) int iconvctl (iconv_t cd, int request, void* argument) { /* not supported */ return 0; } #endif typedef struct compat_t compat_t; typedef struct csconv_t csconv_t; typedef struct rec_iconv_t rec_iconv_t; typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode); typedef int (*f_iconv_close)(iconv_t cd); typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); typedef int* (*f_errno)(void); typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); #define COMPAT_IN 1 #define COMPAT_OUT 2 /* unicode mapping for compatibility with other conversion table. */ struct compat_t { uint in; uint out; uint flag; }; struct csconv_t { int codepage; int flags; f_mbtowc mbtowc; f_wctomb wctomb; f_mblen mblen; f_flush flush; DWORD mode; compat_t *compat; }; struct rec_iconv_t { iconv_t cd; f_iconv_close iconv_close; f_iconv iconv; f_errno _errno; csconv_t from; csconv_t to; #if defined(USE_LIBICONV_DLL) HMODULE hlibiconv; #endif }; static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); static int win_iconv_close(iconv_t cd); static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); static int load_mlang(void); static int make_csconv(const char *name, csconv_t *cv); static int name_to_codepage(const char *name); static uint utf16_to_ucs4(const ushort *wbuf); static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); static int mbtowc_flags(int codepage); static int must_use_null_useddefaultchar(int codepage); static char *strrstr(const char *str, const char *token); static char *xstrndup(const char *s, size_t n); static int seterror(int err); #if defined(USE_LIBICONV_DLL) static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size); static FARPROC find_imported_function(HMODULE hModule, const char *funcname); static HMODULE hwiniconv; #endif static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); static struct { int codepage; const char *name; } codepage_alias[] = { {65001, "CP65001"}, {65001, "UTF8"}, {65001, "UTF-8"}, {1200, "CP1200"}, {1200, "UTF16LE"}, {1200, "UTF-16LE"}, {1200, "UCS2LE"}, {1200, "UCS-2LE"}, {1201, "CP1201"}, {1201, "UTF16BE"}, {1201, "UTF-16BE"}, {1201, "UCS2BE"}, {1201, "UCS-2BE"}, {1201, "unicodeFFFE"}, {12000, "CP12000"}, {12000, "UTF32LE"}, {12000, "UTF-32LE"}, {12000, "UCS4LE"}, {12000, "UCS-4LE"}, {12001, "CP12001"}, {12001, "UTF32BE"}, {12001, "UTF-32BE"}, {12001, "UCS4BE"}, {12001, "UCS-4BE"}, #ifndef GLIB_COMPILATION /* * Default is big endian. * See rfc2781 4.3 Interpreting text labelled as UTF-16. */ {1201, "UTF16"}, {1201, "UTF-16"}, {1201, "UCS2"}, {1201, "UCS-2"}, {12001, "UTF32"}, {12001, "UTF-32"}, {12001, "UCS-4"}, {12001, "UCS4"}, #else /* Default is little endian, because the platform is */ {1200, "UTF16"}, {1200, "UTF-16"}, {1200, "UCS2"}, {1200, "UCS-2"}, {12000, "UTF32"}, {12000, "UTF-32"}, {12000, "UCS4"}, {12000, "UCS-4"}, #endif /* copy from libiconv `iconv -l` */ /* !IsValidCodePage(367) */ {20127, "ANSI_X3.4-1968"}, {20127, "ANSI_X3.4-1986"}, {20127, "ASCII"}, {20127, "CP367"}, {20127, "IBM367"}, {20127, "ISO-IR-6"}, {20127, "ISO646-US"}, {20127, "ISO_646.IRV:1991"}, {20127, "US"}, {20127, "US-ASCII"}, {20127, "CSASCII"}, /* !IsValidCodePage(819) */ {1252, "CP819"}, {1252, "IBM819"}, {28591, "ISO-8859-1"}, {28591, "ISO-IR-100"}, {28591, "ISO8859-1"}, {28591, "ISO_8859-1"}, {28591, "ISO_8859-1:1987"}, {28591, "L1"}, {28591, "LATIN1"}, {28591, "CSISOLATIN1"}, {1250, "CP1250"}, {1250, "MS-EE"}, {1250, "WINDOWS-1250"}, {1251, "CP1251"}, {1251, "MS-CYRL"}, {1251, "WINDOWS-1251"}, {1252, "CP1252"}, {1252, "MS-ANSI"}, {1252, "WINDOWS-1252"}, {1253, "CP1253"}, {1253, "MS-GREEK"}, {1253, "WINDOWS-1253"}, {1254, "CP1254"}, {1254, "MS-TURK"}, {1254, "WINDOWS-1254"}, {1255, "CP1255"}, {1255, "MS-HEBR"}, {1255, "WINDOWS-1255"}, {1256, "CP1256"}, {1256, "MS-ARAB"}, {1256, "WINDOWS-1256"}, {1257, "CP1257"}, {1257, "WINBALTRIM"}, {1257, "WINDOWS-1257"}, {1258, "CP1258"}, {1258, "WINDOWS-1258"}, {850, "850"}, {850, "CP850"}, {850, "IBM850"}, {850, "CSPC850MULTILINGUAL"}, /* !IsValidCodePage(862) */ {862, "862"}, {862, "CP862"}, {862, "IBM862"}, {862, "CSPC862LATINHEBREW"}, {866, "866"}, {866, "CP866"}, {866, "IBM866"}, {866, "CSIBM866"}, /* !IsValidCodePage(154) */ {154, "CP154"}, {154, "CYRILLIC-ASIAN"}, {154, "PT154"}, {154, "PTCP154"}, {154, "CSPTCP154"}, /* !IsValidCodePage(1133) */ {1133, "CP1133"}, {1133, "IBM-CP1133"}, {874, "CP874"}, {874, "WINDOWS-874"}, /* !IsValidCodePage(51932) */ {51932, "CP51932"}, {51932, "MS51932"}, {51932, "WINDOWS-51932"}, {51932, "EUC-JP"}, {932, "CP932"}, {932, "MS932"}, {932, "SHIFFT_JIS"}, {932, "SHIFFT_JIS-MS"}, {932, "SJIS"}, {932, "SJIS-MS"}, {932, "SJIS-OPEN"}, {932, "SJIS-WIN"}, {932, "WINDOWS-31J"}, {932, "WINDOWS-932"}, {932, "CSWINDOWS31J"}, {50221, "CP50221"}, {50221, "ISO-2022-JP"}, {50221, "ISO-2022-JP-MS"}, {50221, "ISO2022-JP"}, {50221, "ISO2022-JP-MS"}, {50221, "MS50221"}, {50221, "WINDOWS-50221"}, {936, "CP936"}, {936, "GBK"}, {936, "MS936"}, {936, "WINDOWS-936"}, {950, "CP950"}, {950, "BIG5"}, {950, "BIG5HKSCS"}, {950, "BIG5-HKSCS"}, {949, "CP949"}, {949, "UHC"}, {949, "EUC-KR"}, {1361, "CP1361"}, {1361, "JOHAB"}, {437, "437"}, {437, "CP437"}, {437, "IBM437"}, {437, "CSPC8CODEPAGE437"}, {737, "CP737"}, {775, "CP775"}, {775, "IBM775"}, {775, "CSPC775BALTIC"}, {852, "852"}, {852, "CP852"}, {852, "IBM852"}, {852, "CSPCP852"}, /* !IsValidCodePage(853) */ {853, "CP853"}, {855, "855"}, {855, "CP855"}, {855, "IBM855"}, {855, "CSIBM855"}, {857, "857"}, {857, "CP857"}, {857, "IBM857"}, {857, "CSIBM857"}, /* !IsValidCodePage(858) */ {858, "CP858"}, {860, "860"}, {860, "CP860"}, {860, "IBM860"}, {860, "CSIBM860"}, {861, "861"}, {861, "CP-IS"}, {861, "CP861"}, {861, "IBM861"}, {861, "CSIBM861"}, {863, "863"}, {863, "CP863"}, {863, "IBM863"}, {863, "CSIBM863"}, {864, "CP864"}, {864, "IBM864"}, {864, "CSIBM864"}, {865, "865"}, {865, "CP865"}, {865, "IBM865"}, {865, "CSIBM865"}, {869, "869"}, {869, "CP-GR"}, {869, "CP869"}, {869, "IBM869"}, {869, "CSIBM869"}, /* !IsValidCodePage(1152) */ {1125, "CP1125"}, /* * Code Page Identifiers * http://msdn2.microsoft.com/en-us/library/ms776446.aspx */ {37, "IBM037"}, /* IBM EBCDIC US-Canada */ {437, "IBM437"}, /* OEM United States */ {500, "IBM500"}, /* IBM EBCDIC International */ {708, "ASMO-708"}, /* Arabic (ASMO 708) */ /* 709 Arabic (ASMO-449+, BCON V4) */ /* 710 Arabic - Transparent Arabic */ {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */ {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */ {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */ {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */ {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */ {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */ {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */ {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */ {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */ {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */ {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */ {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */ {864, "IBM864"}, /* OEM Arabic; Arabic (864) */ {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */ {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */ {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */ {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ {875, "cp875"}, /* IBM EBCDIC Greek Modern */ {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ {932, "shift-jis"}, /* alternative name for it */ {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */ {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ {950, "big5-hkscs"}, /* alternative name for it */ {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */ {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */ {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */ {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */ {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */ {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */ {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */ {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */ {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */ {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */ {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ {1361, "Johab"}, /* Korean (Johab) */ {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */ {10001, "x-mac-japanese"}, /* Japanese (Mac) */ {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ {10003, "x-mac-korean"}, /* Korean (Mac) */ {10004, "x-mac-arabic"}, /* Arabic (Mac) */ {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */ {10006, "x-mac-greek"}, /* Greek (Mac) */ {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */ {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ {10010, "x-mac-romanian"}, /* Romanian (Mac) */ {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */ {10021, "x-mac-thai"}, /* Thai (Mac) */ {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */ {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */ {10081, "x-mac-turkish"}, /* Turkish (Mac) */ {10082, "x-mac-croatian"}, /* Croatian (Mac) */ {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */ {20001, "x-cp20001"}, /* TCA Taiwan */ {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */ {20003, "x-cp20003"}, /* IBM5550 Taiwan */ {20004, "x-cp20004"}, /* TeleText Taiwan */ {20005, "x-cp20005"}, /* Wang Taiwan */ {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ {20106, "x-IA5-German"}, /* IA5 German (7-bit) */ {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */ {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */ {20127, "us-ascii"}, /* US-ASCII (7-bit) */ {20261, "x-cp20261"}, /* T.61 */ {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */ {20273, "IBM273"}, /* IBM EBCDIC Germany */ {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */ {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */ {20280, "IBM280"}, /* IBM EBCDIC Italy */ {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */ {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */ {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */ {20297, "IBM297"}, /* IBM EBCDIC France */ {20420, "IBM420"}, /* IBM EBCDIC Arabic */ {20423, "IBM423"}, /* IBM EBCDIC Greek */ {20424, "IBM424"}, /* IBM EBCDIC Hebrew */ {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */ {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */ {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */ {20871, "IBM871"}, /* IBM EBCDIC Icelandic */ {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */ {20905, "IBM905"}, /* IBM EBCDIC Turkish */ {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */ {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ {20949, "x-cp20949"}, /* Korean Wansung */ {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ /* 21027 (deprecated) */ {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ {28591, "iso_8859-1"}, {28591, "iso_8859_1"}, {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ {28592, "iso_8859-2"}, {28592, "iso_8859_2"}, {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */ {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */ {28593, "iso_8859-3"}, {28593, "iso_8859_3"}, {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */ {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */ {28594, "iso_8859-4"}, {28594, "iso_8859_4"}, {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */ {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */ {28595, "iso_8859-5"}, {28595, "iso_8859_5"}, {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */ {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */ {28596, "iso_8859-6"}, {28596, "iso_8859_6"}, {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */ {28597, "iso8859-7"}, /* ISO 8859-7 Greek */ {28597, "iso_8859-7"}, {28597, "iso_8859_7"}, {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ {28598, "iso_8859-8"}, {28598, "iso_8859_8"}, {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */ {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */ {28599, "iso_8859-9"}, {28599, "iso_8859_9"}, {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */ {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */ {28603, "iso_8859-13"}, {28603, "iso_8859_13"}, {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */ {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */ {28605, "iso_8859-15"}, {28605, "iso_8859_15"}, {29001, "x-Europa"}, /* Europa 3 */ {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ {38598, "iso_8859-8-i"}, {38598, "iso_8859_8-i"}, {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ {50225, "iso-2022-kr"}, /* ISO 2022 Korean */ {50225, "iso2022-kr"}, /* ISO 2022 Korean */ {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ /* 50229 ISO 2022 Traditional Chinese */ /* 50930 EBCDIC Japanese (Katakana) Extended */ /* 50931 EBCDIC US-Canada and Japanese */ /* 50933 EBCDIC Korean Extended and Korean */ /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ /* 50936 EBCDIC Simplified Chinese */ /* 50937 EBCDIC US-Canada and Traditional Chinese */ /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ {51932, "euc-jp"}, /* EUC Japanese */ {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */ {51949, "euc-kr"}, /* EUC Korean */ /* 51950 EUC Traditional Chinese */ {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ {57002, "x-iscii-de"}, /* ISCII Devanagari */ {57003, "x-iscii-be"}, /* ISCII Bengali */ {57004, "x-iscii-ta"}, /* ISCII Tamil */ {57005, "x-iscii-te"}, /* ISCII Telugu */ {57006, "x-iscii-as"}, /* ISCII Assamese */ {57007, "x-iscii-or"}, /* ISCII Oriya */ {57008, "x-iscii-ka"}, /* ISCII Kannada */ {57009, "x-iscii-ma"}, /* ISCII Malayalam */ {57010, "x-iscii-gu"}, /* ISCII Gujarati */ {57011, "x-iscii-pa"}, /* ISCII Punjabi */ {0, NULL} }; /* * SJIS SHIFTJIS table CP932 table * ---- --------------------------- -------------------------------- * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS * 7E U+203E OVERLINE U+007E TILDE * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN * * EUC-JP and ISO-2022-JP should be compatible with CP932. * * Kernel and MLang have different Unicode mapping table. Make sure * which API is used. */ static compat_t cp932_compat[] = { {0x00A5, 0x005C, COMPAT_OUT}, {0x203E, 0x007E, COMPAT_OUT}, {0x2014, 0x2015, COMPAT_OUT}, {0x301C, 0xFF5E, COMPAT_OUT}, {0x2016, 0x2225, COMPAT_OUT}, {0x2212, 0xFF0D, COMPAT_OUT}, {0x00A2, 0xFFE0, COMPAT_OUT}, {0x00A3, 0xFFE1, COMPAT_OUT}, {0x00AC, 0xFFE2, COMPAT_OUT}, {0, 0, 0} }; static compat_t cp20932_compat[] = { {0x00A5, 0x005C, COMPAT_OUT}, {0x203E, 0x007E, COMPAT_OUT}, {0x2014, 0x2015, COMPAT_OUT}, {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, {0, 0, 0} }; static compat_t *cp51932_compat = cp932_compat; /* cp20932_compat for kernel. cp932_compat for mlang. */ static compat_t *cp5022x_compat = cp932_compat; typedef HRESULT (WINAPI *CONVERTINETSTRING)( LPDWORD lpdwMode, DWORD dwSrcEncoding, DWORD dwDstEncoding, LPCSTR lpSrcStr, LPINT lpnSrcSize, LPBYTE lpDstStr, LPINT lpnDstSize ); typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( LPDWORD lpdwMode, DWORD dwSrcEncoding, LPCSTR lpSrcStr, LPINT lpnMultiCharCount, LPWSTR lpDstStr, LPINT lpnWideCharCount ); typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( LPDWORD lpdwMode, DWORD dwEncoding, LPCWSTR lpSrcStr, LPINT lpnWideCharCount, LPSTR lpDstStr, LPINT lpnMultiCharCount ); typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)( DWORD dwSrcEncoding, DWORD dwDstEncoding ); typedef HRESULT (WINAPI *LCIDTORFC1766A)( LCID Locale, LPSTR pszRfc1766, int nChar ); typedef HRESULT (WINAPI *LCIDTORFC1766W)( LCID Locale, LPWSTR pszRfc1766, int nChar ); typedef HRESULT (WINAPI *RFC1766TOLCIDA)( LCID *pLocale, LPSTR pszRfc1766 ); typedef HRESULT (WINAPI *RFC1766TOLCIDW)( LCID *pLocale, LPWSTR pszRfc1766 ); static CONVERTINETSTRING ConvertINetString; static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable; static LCIDTORFC1766A LcidToRfc1766A; static RFC1766TOLCIDA Rfc1766ToLcidA; static int load_mlang(void) { HMODULE h; if (ConvertINetString != NULL) return TRUE; h = LoadLibrary(TEXT("mlang.dll")); if (!h) return FALSE; ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString"); ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable"); LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A"); Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA"); return TRUE; } iconv_t iconv_open(const char *tocode, const char *fromcode) { rec_iconv_t *cd; cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t)); if (cd == NULL) return (iconv_t)(-1); #if defined(USE_LIBICONV_DLL) errno = 0; if (libiconv_iconv_open(cd, tocode, fromcode)) return (iconv_t)cd; #endif /* reset the errno to prevent reporting wrong error code. * 0 for unsorted error. */ errno = 0; if (win_iconv_open(cd, tocode, fromcode)) return (iconv_t)cd; free(cd); return (iconv_t)(-1); } int iconv_close(iconv_t _cd) { rec_iconv_t *cd = (rec_iconv_t *)_cd; int r = cd->iconv_close(cd->cd); int e = *(cd->_errno()); #if defined(USE_LIBICONV_DLL) if (cd->hlibiconv != NULL) FreeLibrary(cd->hlibiconv); #endif free(cd); errno = e; return r; } size_t iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { rec_iconv_t *cd = (rec_iconv_t *)_cd; size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); errno = *(cd->_errno()); return r; } static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) { if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) return FALSE; cd->iconv_close = win_iconv_close; cd->iconv = win_iconv; cd->_errno = _errno; cd->cd = (iconv_t)cd; return TRUE; } static int win_iconv_close(iconv_t cd UNUSED) { return 0; } static size_t win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { rec_iconv_t *cd = (rec_iconv_t *)_cd; ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ int insize; int outsize; int wsize; DWORD frommode; DWORD tomode; uint wc; compat_t *cp; int i; if (inbuf == NULL || *inbuf == NULL) { if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) { tomode = cd->to.mode; outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); if (outsize == -1) { if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) { outsize = 0; } else { cd->to.mode = tomode; return (size_t)(-1); } } *outbuf += outsize; *outbytesleft -= outsize; } cd->from.mode = 0; cd->to.mode = 0; return 0; } while (*inbytesleft != 0) { frommode = cd->from.mode; tomode = cd->to.mode; wsize = MB_CHAR_MAX; insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); if (insize == -1) { if (cd->to.flags & FLAG_IGNORE) { cd->from.mode = frommode; insize = 1; wsize = 0; } else { cd->from.mode = frommode; return (size_t)(-1); } } if (wsize == 0) { *inbuf += insize; *inbytesleft -= insize; continue; } if (cd->from.compat != NULL) { wc = utf16_to_ucs4(wbuf); cp = cd->from.compat; for (i = 0; cp[i].in != 0; ++i) { if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) { ucs4_to_utf16(cp[i].in, wbuf, &wsize); break; } } } if (cd->to.compat != NULL) { wc = utf16_to_ucs4(wbuf); cp = cd->to.compat; for (i = 0; cp[i].in != 0; ++i) { if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) { ucs4_to_utf16(cp[i].out, wbuf, &wsize); break; } } } outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); if (outsize == -1) { if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) { cd->to.mode = tomode; outsize = 0; } else { cd->from.mode = frommode; cd->to.mode = tomode; return (size_t)(-1); } } *inbuf += insize; *outbuf += outsize; *inbytesleft -= insize; *outbytesleft -= outsize; } return 0; } static int make_csconv(const char *_name, csconv_t *cv) { CPINFO cpinfo; int use_compat = TRUE; int flag = 0; char *name; char *p; name = xstrndup(_name, strlen(_name)); if (name == NULL) return FALSE; /* check for option "enc_name//opt1//opt2" */ while ((p = strrstr(name, "//")) != NULL) { if (_stricmp(p + 2, "nocompat") == 0) use_compat = FALSE; else if (_stricmp(p + 2, "translit") == 0) flag |= FLAG_TRANSLIT; else if (_stricmp(p + 2, "ignore") == 0) flag |= FLAG_IGNORE; *p = 0; } cv->mode = 0; cv->flags = flag; cv->mblen = NULL; cv->flush = NULL; cv->compat = NULL; cv->codepage = name_to_codepage(name); if (cv->codepage == 1200 || cv->codepage == 1201) { cv->mbtowc = utf16_mbtowc; cv->wctomb = utf16_wctomb; if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0) cv->flags |= FLAG_USE_BOM; } else if (cv->codepage == 12000 || cv->codepage == 12001) { cv->mbtowc = utf32_mbtowc; cv->wctomb = utf32_wctomb; if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) cv->flags |= FLAG_USE_BOM; } else if (cv->codepage == 65001) { cv->mbtowc = kernel_mbtowc; cv->wctomb = kernel_wctomb; cv->mblen = utf8_mblen; } else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) { cv->mbtowc = iso2022jp_mbtowc; cv->wctomb = iso2022jp_wctomb; cv->flush = iso2022jp_flush; } else if (cv->codepage == 51932 && load_mlang()) { cv->mbtowc = mlang_mbtowc; cv->wctomb = mlang_wctomb; cv->mblen = eucjp_mblen; } else if (IsValidCodePage(cv->codepage) && GetCPInfo(cv->codepage, &cpinfo) != 0) { cv->mbtowc = kernel_mbtowc; cv->wctomb = kernel_wctomb; if (cpinfo.MaxCharSize == 1) cv->mblen = sbcs_mblen; else if (cpinfo.MaxCharSize == 2) cv->mblen = dbcs_mblen; else cv->mblen = mbcs_mblen; } else { /* not supported */ free(name); errno = EINVAL; return FALSE; } if (use_compat) { switch (cv->codepage) { case 932: cv->compat = cp932_compat; break; case 20932: cv->compat = cp20932_compat; break; case 51932: cv->compat = cp51932_compat; break; case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; } } free(name); return TRUE; } static int name_to_codepage(const char *name) { int i; if (*name == '\0' || strcmp(name, "char") == 0) return GetACP(); else if (strcmp(name, "wchar_t") == 0) return 1200; else if (_strnicmp(name, "cp", 2) == 0) return atoi(name + 2); /* CP123 */ else if ('0' <= name[0] && name[0] <= '9') return atoi(name); /* 123 */ else if (_strnicmp(name, "xx", 2) == 0) return atoi(name + 2); /* XX123 for debug */ for (i = 0; codepage_alias[i].name != NULL; ++i) if (_stricmp(name, codepage_alias[i].name) == 0) return codepage_alias[i].codepage; return -1; } /* * http://www.faqs.org/rfcs/rfc2781.html */ static uint utf16_to_ucs4(const ushort *wbuf) { uint wc = wbuf[0]; if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; return wc; } static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) { if (wc < 0x10000) { wbuf[0] = wc; *wbufsize = 1; } else { wc -= 0x10000; wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); wbuf[1] = 0xDC00 | (wc & 0x3FF); *wbufsize = 2; } } /* * Check if codepage is one of those for which the dwFlags parameter * to MultiByteToWideChar() must be zero. Return zero or * MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows * Server 2003 R2 claims that also codepage 65001 is one of these, but * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave * out 65001 (UTF-8), and that indeed seems to be the case on XP, it * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting * from UTF-8. */ static int mbtowc_flags(int codepage) { return (codepage == 50220 || codepage == 50221 || codepage == 50222 || codepage == 50225 || codepage == 50227 || codepage == 50229 || codepage == 52936 || codepage == 54936 || (codepage >= 57002 && codepage <= 57011) || codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; } /* * Check if codepage is one those for which the lpUsedDefaultChar * parameter to WideCharToMultiByte() must be NULL. The docs in * Platform SDK for Windows Server 2003 R2 claims that this is the * list below, while the MSDN docs for MSVS2008 claim that it is only * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform * SDK seems to be correct, at least for XP. */ static int must_use_null_useddefaultchar(int codepage) { return (codepage == 65000 || codepage == 65001 || codepage == 50220 || codepage == 50221 || codepage == 50222 || codepage == 50225 || codepage == 50227 || codepage == 50229 || codepage == 52936 || codepage == 54936 || (codepage >= 57002 && codepage <= 57011) || codepage == 42); } static char * strrstr(const char *str, const char *token) { int len = strlen(token); const char *p = str + strlen(str); while (str <= --p) if (p[0] == token[0] && strncmp(p, token, len) == 0) return (char *)p; return NULL; } static char * xstrndup(const char *s, size_t n) { char *p; p = (char *)malloc(n + 1); if (p == NULL) return NULL; memcpy(p, s, n); p[n] = '\0'; return p; } static int seterror(int err) { errno = err; return -1; } #if defined(USE_LIBICONV_DLL) static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) { HMODULE hlibiconv = NULL; char *dllname; const char *p; const char *e; f_iconv_open _iconv_open; /* * always try to load dll, so that we can switch dll in runtime. */ /* XXX: getenv() can't get variable set by SetEnvironmentVariable() */ p = getenv("WINICONV_LIBICONV_DLL"); if (p == NULL) p = DEFAULT_LIBICONV_DLL; /* parse comma separated value */ for ( ; *p != 0; p = (*e == ',') ? e + 1 : e) { e = strchr(p, ','); if (p == e) continue; else if (e == NULL) e = p + strlen(p); dllname = xstrndup(p, e - p); if (dllname == NULL) return FALSE; hlibiconv = LoadLibraryA(dllname); free(dllname); if (hlibiconv != NULL) { if (hlibiconv == hwiniconv) { FreeLibrary(hlibiconv); hlibiconv = NULL; continue; } break; } } if (hlibiconv == NULL) goto failed; _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "libiconv_open"); if (_iconv_open == NULL) _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "iconv_open"); cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "libiconv_close"); if (cd->iconv_close == NULL) cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "iconv_close"); cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "libiconv"); if (cd->iconv == NULL) cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "iconv"); cd->_errno = (f_errno)find_imported_function(hlibiconv, "_errno"); if (_iconv_open == NULL || cd->iconv_close == NULL || cd->iconv == NULL || cd->_errno == NULL) goto failed; cd->cd = _iconv_open(tocode, fromcode); if (cd->cd == (iconv_t)(-1)) goto failed; cd->hlibiconv = hlibiconv; return TRUE; failed: if (hlibiconv != NULL) FreeLibrary(hlibiconv); return FALSE; } /* * Reference: * http://forums.belution.com/ja/vc/000/234/78s.shtml * http://nienie.com/~masapico/api_ImageDirectoryEntryToData.html * * The formal way is * imagehlp.h or dbghelp.h * imagehlp.lib or dbghelp.lib * ImageDirectoryEntryToData() */ #define TO_DOS_HEADER(base) ((PIMAGE_DOS_HEADER)(base)) #define TO_NT_HEADERS(base) ((PIMAGE_NT_HEADERS)((LPBYTE)(base) + TO_DOS_HEADER(base)->e_lfanew)) static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size) { /* TODO: MappedAsImage? */ PIMAGE_DATA_DIRECTORY p; p = TO_NT_HEADERS(Base)->OptionalHeader.DataDirectory + DirectoryEntry; if (p->VirtualAddress == 0) { *Size = 0; return NULL; } *Size = p->Size; return (PVOID)((LPBYTE)Base + p->VirtualAddress); } static FARPROC find_imported_function(HMODULE hModule, const char *funcname) { DWORD_PTR Base; ULONG Size; PIMAGE_IMPORT_DESCRIPTOR Imp; PIMAGE_THUNK_DATA Address; /* Import Address Table */ PIMAGE_THUNK_DATA Name; /* Import Name Table */ PIMAGE_IMPORT_BY_NAME ImpName; Base = (DWORD_PTR)hModule; Imp = (PIMAGE_IMPORT_DESCRIPTOR)MyImageDirectoryEntryToData( (LPVOID)Base, TRUE, IMAGE_DIRECTORY_ENTRY_IMPORT, &Size); if (Imp == NULL) return NULL; for ( ; Imp->OriginalFirstThunk != 0; ++Imp) { Address = (PIMAGE_THUNK_DATA)(Base + Imp->FirstThunk); Name = (PIMAGE_THUNK_DATA)(Base + Imp->OriginalFirstThunk); for ( ; Name->u1.Ordinal != 0; ++Name, ++Address) { if (!IMAGE_SNAP_BY_ORDINAL(Name->u1.Ordinal)) { ImpName = (PIMAGE_IMPORT_BY_NAME) (Base + (DWORD_PTR)Name->u1.AddressOfData); if (strcmp((char *)ImpName->Name, funcname) == 0) return (FARPROC)Address->u1.Function; } } } return NULL; } #endif static int sbcs_mblen(csconv_t *cv UNUSED, const uchar *buf UNUSED, int bufsize UNUSED) { return 1; } static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) { int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; if (bufsize < len) return seterror(EINVAL); return len; } static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) { int len = 0; if (cv->codepage == 54936) { if (buf[0] <= 0x7F) len = 1; else if (buf[0] >= 0x81 && buf[0] <= 0xFE && bufsize >= 2 && ((buf[1] >= 0x40 && buf[1] <= 0x7E) || (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2; else if (buf[0] >= 0x81 && buf[0] <= 0xFE && bufsize >= 4 && buf[1] >= 0x30 && buf[1] <= 0x39) len = 4; else return seterror(EINVAL); return len; } else return seterror(EINVAL); } static int utf8_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) { int len = 0; if (buf[0] < 0x80) len = 1; else if ((buf[0] & 0xE0) == 0xC0) len = 2; else if ((buf[0] & 0xF0) == 0xE0) len = 3; else if ((buf[0] & 0xF8) == 0xF0) len = 4; else if ((buf[0] & 0xFC) == 0xF8) len = 5; else if ((buf[0] & 0xFE) == 0xFC) len = 6; if (len == 0) return seterror(EILSEQ); else if (bufsize < len) return seterror(EINVAL); return len; } static int eucjp_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) { if (buf[0] < 0x80) /* ASCII */ return 1; else if (buf[0] == 0x8E) /* JIS X 0201 */ { if (bufsize < 2) return seterror(EINVAL); else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) return seterror(EILSEQ); return 2; } else if (buf[0] == 0x8F) /* JIS X 0212 */ { if (bufsize < 3) return seterror(EINVAL); else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) return seterror(EILSEQ); return 3; } else /* JIS X 0208 */ { if (bufsize < 2) return seterror(EINVAL); else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) return seterror(EILSEQ); return 2; } } static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) { int len; len = cv->mblen(cv, buf, bufsize); if (len == -1) return -1; /* If converting from ASCII, reject 8bit * chars. MultiByteToWideChar() doesn't. Note that for ASCII we * know that the mblen function is sbcs_mblen() so len is 1. */ if (cv->codepage == 20127 && buf[0] >= 0x80) return seterror(EILSEQ); *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); if (*wbufsize == 0) return seterror(EILSEQ); return len; } static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) { BOOL usedDefaultChar = 0; BOOL *p = NULL; int flags = 0; int len; if (bufsize == 0) return seterror(E2BIG); if (!must_use_null_useddefaultchar(cv->codepage)) { p = &usedDefaultChar; #ifdef WC_NO_BEST_FIT_CHARS if (!(cv->flags & FLAG_TRANSLIT)) flags |= WC_NO_BEST_FIT_CHARS; #endif } len = WideCharToMultiByte(cv->codepage, flags, (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); if (len == 0) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) return seterror(E2BIG); return seterror(EILSEQ); } else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) return seterror(EILSEQ); else if (cv->mblen(cv, buf, len) != len) /* validate result */ return seterror(EILSEQ); return len; } /* * It seems that the mode (cv->mode) is fixnum. * For example, when converting iso-2022-jp(cp50221) to unicode: * in ascii sequence: mode=0xC42C0000 * in jisx0208 sequence: mode=0xC42C0001 * "C42C" is same for each convert session. * It should be: ((codepage-1)<<16)|state */ static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) { int len; int insize; HRESULT hr; len = cv->mblen(cv, buf, bufsize); if (len == -1) return -1; insize = len; hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); if (hr != S_OK || insize != len) return seterror(EILSEQ); return len; } static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) { char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ int tmpsize = MB_CHAR_MAX; int insize = wbufsize; HRESULT hr; hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); if (hr != S_OK || insize != wbufsize) return seterror(EILSEQ); else if (bufsize < tmpsize) return seterror(E2BIG); else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) return seterror(EILSEQ); memcpy(buf, tmpbuf, tmpsize); return tmpsize; } static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) { int codepage = cv->codepage; /* swap endian: 1200 <-> 1201 */ if (cv->mode & UNICODE_MODE_SWAPPED) codepage ^= 1; if (bufsize < 2) return seterror(EINVAL); if (codepage == 1200) /* little endian */ wbuf[0] = (buf[1] << 8) | buf[0]; else if (codepage == 1201) /* big endian */ wbuf[0] = (buf[0] << 8) | buf[1]; if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) { cv->mode |= UNICODE_MODE_BOM_DONE; if (wbuf[0] == 0xFFFE) { cv->mode |= UNICODE_MODE_SWAPPED; *wbufsize = 0; return 2; } else if (wbuf[0] == 0xFEFF) { *wbufsize = 0; return 2; } } if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) return seterror(EILSEQ); if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) { if (bufsize < 4) return seterror(EINVAL); if (codepage == 1200) /* little endian */ wbuf[1] = (buf[3] << 8) | buf[2]; else if (codepage == 1201) /* big endian */ wbuf[1] = (buf[2] << 8) | buf[3]; if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) return seterror(EILSEQ); *wbufsize = 2; return 4; } *wbufsize = 1; return 2; } static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) { if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) { int r; cv->mode |= UNICODE_MODE_BOM_DONE; if (bufsize < 2) return seterror(E2BIG); if (cv->codepage == 1200) /* little endian */ memcpy(buf, "\xFF\xFE", 2); else if (cv->codepage == 1201) /* big endian */ memcpy(buf, "\xFE\xFF", 2); r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); if (r == -1) return -1; return r + 2; } if (bufsize < 2) return seterror(E2BIG); if (cv->codepage == 1200) /* little endian */ { buf[0] = (wbuf[0] & 0x00FF); buf[1] = (wbuf[0] & 0xFF00) >> 8; } else if (cv->codepage == 1201) /* big endian */ { buf[0] = (wbuf[0] & 0xFF00) >> 8; buf[1] = (wbuf[0] & 0x00FF); } if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) { if (bufsize < 4) return seterror(E2BIG); if (cv->codepage == 1200) /* little endian */ { buf[2] = (wbuf[1] & 0x00FF); buf[3] = (wbuf[1] & 0xFF00) >> 8; } else if (cv->codepage == 1201) /* big endian */ { buf[2] = (wbuf[1] & 0xFF00) >> 8; buf[3] = (wbuf[1] & 0x00FF); } return 4; } return 2; } static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) { int codepage = cv->codepage; uint wc = 0xD800; /* swap endian: 12000 <-> 12001 */ if (cv->mode & UNICODE_MODE_SWAPPED) codepage ^= 1; if (bufsize < 4) return seterror(EINVAL); if (codepage == 12000) /* little endian */ wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; else if (codepage == 12001) /* big endian */ wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) { cv->mode |= UNICODE_MODE_BOM_DONE; if (wc == 0xFFFE0000) { cv->mode |= UNICODE_MODE_SWAPPED; *wbufsize = 0; return 4; } else if (wc == 0x0000FEFF) { *wbufsize = 0; return 4; } } if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) return seterror(EILSEQ); ucs4_to_utf16(wc, wbuf, wbufsize); return 4; } static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) { uint wc; if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) { int r; cv->mode |= UNICODE_MODE_BOM_DONE; if (bufsize < 4) return seterror(E2BIG); if (cv->codepage == 12000) /* little endian */ memcpy(buf, "\xFF\xFE\x00\x00", 4); else if (cv->codepage == 12001) /* big endian */ memcpy(buf, "\x00\x00\xFE\xFF", 4); r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); if (r == -1) return -1; return r + 4; } if (bufsize < 4) return seterror(E2BIG); wc = utf16_to_ucs4(wbuf); if (cv->codepage == 12000) /* little endian */ { buf[0] = wc & 0x000000FF; buf[1] = (wc & 0x0000FF00) >> 8; buf[2] = (wc & 0x00FF0000) >> 16; buf[3] = (wc & 0xFF000000) >> 24; } else if (cv->codepage == 12001) /* big endian */ { buf[0] = (wc & 0xFF000000) >> 24; buf[1] = (wc & 0x00FF0000) >> 16; buf[2] = (wc & 0x0000FF00) >> 8; buf[3] = wc & 0x000000FF; } return 4; } /* * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow * 1 byte Kana) * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte * Kana - SO/SI) * * MultiByteToWideChar() and WideCharToMultiByte() behave differently * depending on Windows version. On XP, WideCharToMultiByte() doesn't * terminate result sequence with ascii escape. But Vista does. * Use MLang instead. */ #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) #define ISO2022_SI 0 #define ISO2022_SO 1 /* shift in */ static const char iso2022_SI_seq[] = "\x0F"; /* shift out */ static const char iso2022_SO_seq[] = "\x0E"; typedef struct iso2022_esc_t iso2022_esc_t; struct iso2022_esc_t { const char *esc; int esc_len; int len; int cs; }; #define ISO2022JP_CS_ASCII 0 #define ISO2022JP_CS_JISX0201_ROMAN 1 #define ISO2022JP_CS_JISX0201_KANA 2 #define ISO2022JP_CS_JISX0208_1978 3 #define ISO2022JP_CS_JISX0208_1983 4 #define ISO2022JP_CS_JISX0212 5 static iso2022_esc_t iso2022jp_esc[] = { {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, {NULL, 0, 0, 0} }; static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) { iso2022_esc_t *iesc = iso2022jp_esc; char tmp[MB_CHAR_MAX]; int insize; HRESULT hr; DWORD dummy = 0; int len; int esc_len; int cs; int shift; int i; if (buf[0] == 0x1B) { for (i = 0; iesc[i].esc != NULL; ++i) { esc_len = iesc[i].esc_len; if (bufsize < esc_len) { if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) return seterror(EINVAL); } else { if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) { cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); *wbufsize = 0; return esc_len; } } } /* not supported escape sequence */ return seterror(EILSEQ); } else if (buf[0] == iso2022_SO_seq[0]) { cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); *wbufsize = 0; return 1; } else if (buf[0] == iso2022_SI_seq[0]) { cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); *wbufsize = 0; return 1; } cs = ISO2022_MODE_CS(cv->mode); shift = ISO2022_MODE_SHIFT(cv->mode); /* reset the mode for informal sequence */ if (buf[0] < 0x20) { cs = ISO2022JP_CS_ASCII; shift = ISO2022_SI; } len = iesc[cs].len; if (bufsize < len) return seterror(EINVAL); for (i = 0; i < len; ++i) if (!(buf[i] < 0x80)) return seterror(EILSEQ); esc_len = iesc[cs].esc_len; memcpy(tmp, iesc[cs].esc, esc_len); if (shift == ISO2022_SO) { memcpy(tmp + esc_len, iso2022_SO_seq, 1); esc_len += 1; } memcpy(tmp + esc_len, buf, len); if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && shift == ISO2022_SO) { /* XXX: shift-out cannot be used for mbtowc (both kernel and * mlang) */ esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); memcpy(tmp + esc_len, buf, len); } insize = len + esc_len; hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); if (hr != S_OK || insize != len + esc_len) return seterror(EILSEQ); /* Check for conversion error. Assuming defaultChar is 0x3F. */ /* ascii should be converted from ascii */ if (wbuf[0] == buf[0] && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) return seterror(EILSEQ); /* reset the mode for informal sequence */ if (cv->mode != ISO2022_MODE(cs, shift)) cv->mode = ISO2022_MODE(cs, shift); return len; } static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) { iso2022_esc_t *iesc = iso2022jp_esc; char tmp[MB_CHAR_MAX]; int tmpsize = MB_CHAR_MAX; int insize = wbufsize; HRESULT hr; DWORD dummy = 0; int len; int esc_len; int cs; int shift; int i; /* * MultiByte = [escape sequence] + character + [escape sequence] * * Whether trailing escape sequence is added depends on which API is * used (kernel or MLang, and its version). */ hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); if (hr != S_OK || insize != wbufsize) return seterror(EILSEQ); else if (bufsize < tmpsize) return seterror(E2BIG); if (tmpsize == 1) { cs = ISO2022JP_CS_ASCII; esc_len = 0; } else { for (i = 1; iesc[i].esc != NULL; ++i) { esc_len = iesc[i].esc_len; if (strncmp(tmp, iesc[i].esc, esc_len) == 0) { cs = iesc[i].cs; break; } } if (iesc[i].esc == NULL) /* not supported escape sequence */ return seterror(EILSEQ); } shift = ISO2022_SI; if (tmp[esc_len] == iso2022_SO_seq[0]) { shift = ISO2022_SO; esc_len += 1; } len = iesc[cs].len; /* Check for converting error. Assuming defaultChar is 0x3F. */ /* ascii should be converted from ascii */ if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) return seterror(EILSEQ); else if (tmpsize < esc_len + len) return seterror(EILSEQ); if (cv->mode == ISO2022_MODE(cs, shift)) { /* remove escape sequence */ if (esc_len != 0) memmove(tmp, tmp + esc_len, len); esc_len = 0; } else { if (cs == ISO2022JP_CS_ASCII) { esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; memmove(tmp + esc_len, tmp, len); memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); } if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) { /* shift-in before changing to other mode */ memmove(tmp + 1, tmp, len + esc_len); memcpy(tmp, iso2022_SI_seq, 1); esc_len += 1; } } if (bufsize < len + esc_len) return seterror(E2BIG); memcpy(buf, tmp, len + esc_len); cv->mode = ISO2022_MODE(cs, shift); return len + esc_len; } static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) { iso2022_esc_t *iesc = iso2022jp_esc; int esc_len; if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) { esc_len = 0; if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) esc_len += 1; if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; if (bufsize < esc_len) return seterror(E2BIG); esc_len = 0; if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) { memcpy(buf, iso2022_SI_seq, 1); esc_len += 1; } if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) { memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, iesc[ISO2022JP_CS_ASCII].esc_len); esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; } return esc_len; } return 0; } #if defined(MAKE_DLL) && defined(USE_LIBICONV_DLL) BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { switch( fdwReason ) { case DLL_PROCESS_ATTACH: hwiniconv = (HMODULE)hinstDLL; break; case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: case DLL_PROCESS_DETACH: break; } return TRUE; } #endif #if defined(MAKE_EXE) #include #include #include int main(int argc, char **argv) { char *fromcode = NULL; char *tocode = NULL; int i; char inbuf[BUFSIZ]; char outbuf[BUFSIZ]; const char *pin; char *pout; size_t inbytesleft; size_t outbytesleft; size_t rest = 0; iconv_t cd; size_t r; FILE *in = stdin; FILE *out = stdout; int ignore = 0; char *p; _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); for (i = 1; i < argc; ++i) { if (strcmp(argv[i], "-l") == 0) { for (i = 0; codepage_alias[i].name != NULL; ++i) printf("%s\n", codepage_alias[i].name); return 0; } if (strcmp(argv[i], "-f") == 0) fromcode = argv[++i]; else if (strcmp(argv[i], "-t") == 0) tocode = argv[++i]; else if (strcmp(argv[i], "-c") == 0) ignore = 1; else if (strcmp(argv[i], "--output") == 0) { out = fopen(argv[++i], "wb"); if(out == NULL) { fprintf(stderr, "cannot open %s\n", argv[i]); return 1; } } else { in = fopen(argv[i], "rb"); if (in == NULL) { fprintf(stderr, "cannot open %s\n", argv[i]); return 1; } break; } } if (fromcode == NULL || tocode == NULL) { printf("usage: %s [-c] -f from-enc -t to-enc [file]\n", argv[0]); return 0; } if (ignore) { p = tocode; tocode = (char *)malloc(strlen(p) + strlen("//IGNORE") + 1); if (tocode == NULL) { perror("fatal error"); return 1; } strcpy(tocode, p); strcat(tocode, "//IGNORE"); } cd = iconv_open(tocode, fromcode); if (cd == (iconv_t)(-1)) { perror("iconv_open error"); return 1; } while ((inbytesleft = fread(inbuf + rest, 1, sizeof(inbuf) - rest, in)) != 0 || rest != 0) { inbytesleft += rest; pin = inbuf; pout = outbuf; outbytesleft = sizeof(outbuf); r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in))) { perror("conversion error"); return 1; } memmove(inbuf, pin, inbytesleft); rest = inbytesleft; } pout = outbuf; outbytesleft = sizeof(outbuf); r = iconv(cd, NULL, NULL, &pout, &outbytesleft); fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); if (r == (size_t)(-1)) { perror("conversion error"); return 1; } iconv_close(cd); return 0; } #endif win-iconv-0.0.8/win_iconv_test.c000066400000000000000000000236121265325504600166420ustar00rootroot00000000000000 #ifdef USE_ICONV_H #include #include #include #include #include #else #include "win_iconv.c" #endif #include const char * tohex(const char *str, int size) { static char buf[BUFSIZ]; char *pbuf = buf; int i; buf[0] = 0; for (i = 0; i < size; ++i) pbuf += sprintf(pbuf, "%02X", str[i] & 0xFF); return buf; } const char * errstr(int errcode) { static char buf[BUFSIZ]; switch (errcode) { case 0: return "NOERROR"; case EINVAL: return "EINVAL"; case EILSEQ: return "EILSEQ"; case E2BIG: return "E2BIG"; } sprintf(buf, "%d\n", errcode); return buf; } #ifdef USE_LIBICONV_DLL int use_dll; int setdll(const char *dllpath) { char buf[BUFSIZ]; rec_iconv_t cd; sprintf(buf, "WINICONV_LIBICONV_DLL=%s", dllpath); putenv(buf); if (libiconv_iconv_open(&cd, "ascii", "ascii")) { FreeLibrary(cd.hlibiconv); use_dll = TRUE; return TRUE; } use_dll = FALSE; return FALSE; } #endif /* * We can test the codepage that is installed in the system. */ int check_enc(const char *encname, int codepage) { iconv_t cd; int cp; cd = iconv_open("utf-8", encname); if (cd == (iconv_t)(-1)) { printf("%s(%d) IS NOT SUPPORTED: SKIP THE TEST\n", encname, codepage); return FALSE; } #ifndef USE_ICONV_H cp = ((rec_iconv_t *)cd)->from.codepage; if (cp != codepage) { printf("%s(%d) ALIAS IS MAPPED TO DIFFERENT CODEPAGE (%d)\n", encname, codepage, cp); exit(1); } #endif iconv_close(cd); return TRUE; } void test(const char *from, const char *fromstr, int fromsize, const char *to, const char *tostr, int tosize, int errcode, int bufsize, int line) { char outbuf[BUFSIZ]; const char *pin; char *pout; size_t inbytesleft; size_t outbytesleft; iconv_t cd; size_t r; #ifdef USE_LIBICONV_DLL char dllpath[_MAX_PATH]; #endif cd = iconv_open(to, from); if (cd == (iconv_t)(-1)) { printf("%s -> %s: NG: INVALID ENCODING NAME: line=%d\n", from, to, line); exit(1); } #ifdef USE_LIBICONV_DLL if (((rec_iconv_t *)cd)->hlibiconv != NULL) GetModuleFileNameA(((rec_iconv_t *)cd)->hlibiconv, dllpath, sizeof(dllpath)); if (use_dll && ((rec_iconv_t *)cd)->hlibiconv == NULL) { printf("%s: %s -> %s: NG: FAILED TO USE DLL: line=%d\n", dllpath, from, to, line); exit(1); } else if (!use_dll && ((rec_iconv_t *)cd)->hlibiconv != NULL) { printf("%s: %s -> %s: NG: DLL IS LOADED UNEXPECTEDLY: line=%d\n", dllpath, from, to, line); exit(1); } #endif errno = 0; pin = (char *)fromstr; pout = outbuf; inbytesleft = fromsize; outbytesleft = bufsize; r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); if (r != (size_t)(-1)) r = iconv(cd, NULL, NULL, &pout, &outbytesleft); *pout = 0; #ifdef USE_LIBICONV_DLL if (use_dll) printf("%s: ", dllpath); #endif printf("%s(%s) -> ", from, tohex(fromstr, fromsize)); printf("%s(%s%s%s): ", to, tohex(tostr, tosize), errcode == 0 ? "" : ":", errcode == 0 ? "" : errstr(errcode)); if (strcmp(outbuf, tostr) == 0 && errno == errcode) printf("OK\n"); else { printf("RESULT(%s:%s): ", tohex(outbuf, sizeof(outbuf) - outbytesleft), errstr(errno)); printf("NG: line=%d\n", line); exit(1); } } #define STATIC_STRLEN(arr) (sizeof(arr) - 1) #define success(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), 0, BUFSIZ, __LINE__) #define einval(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EINVAL, BUFSIZ, __LINE__) #define eilseq(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EILSEQ, BUFSIZ, __LINE__) #define e2big(from, fromstr, to, tostr, bufsize) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), E2BIG, bufsize, __LINE__) int main(int argc, char **argv) { #ifdef USE_LIBICONV_DLL /* test use of dll if $DEFAULT_LIBICONV_DLL was defined. */ if (setdll("")) { success("ascii", "ABC", "ascii", "ABC"); success("ascii", "ABC", "utf-16be", "\x00\x41\x00\x42\x00\x43"); } else { printf("\nDLL TEST IS SKIPPED\n\n"); } setdll("none"); #endif if (check_enc("ascii", 20127)) { success("ascii", "ABC", "ascii", "ABC"); eilseq("ascii", "\x80", "ascii", ""); eilseq("ascii", "\xFF", "ascii", ""); } /* unicode (CP1200 CP1201 CP12000 CP12001 CP65001) */ if (check_enc("utf-8", 65001) && check_enc("utf-16be", 1201) && check_enc("utf-16le", 1200) && check_enc("utf-32be", 12001) && check_enc("utf-32le", 12000) ) { /* Test the BOM behavior * 1. Remove the BOM when "fromcode" is utf-16 or utf-32. * 2. Add the BOM when "tocode" is utf-16 or utf-32. */ success("utf-16", "\xFE\xFF\x01\x02", "utf-16be", "\x01\x02"); success("utf-16", "\xFF\xFE\x02\x01", "utf-16be", "\x01\x02"); success("utf-32", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\x01\x02"); success("utf-32", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\x01\x02"); success("utf-16", "\xFE\xFF\x00\x01", "utf-8", "\x01"); #ifndef GLIB_COMPILATION success("utf-8", "\x01", "utf-16", "\xFE\xFF\x00\x01"); success("utf-8", "\x01", "utf-32", "\x00\x00\xFE\xFF\x00\x00\x00\x01"); #else success("utf-8", "\x01", "utf-16", "\xFF\xFE\x01\x00"); success("utf-8", "\x01", "utf-32", "\xFF\xFE\x00\x00\x01\x00\x00\x00"); #endif success("utf-16be", "\xFE\xFF\x01\x02", "utf-16be", "\xFE\xFF\x01\x02"); success("utf-16le", "\xFF\xFE\x02\x01", "utf-16be", "\xFE\xFF\x01\x02"); success("utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); success("utf-32le", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); success("utf-16be", "\xFE\xFF\x00\x01", "utf-8", "\xEF\xBB\xBF\x01"); success("utf-8", "\xEF\xBB\xBF\x01", "utf-8", "\xEF\xBB\xBF\x01"); success("utf-16be", "\x01\x02", "utf-16le", "\x02\x01"); success("utf-16le", "\x02\x01", "utf-16be", "\x01\x02"); success("utf-16be", "\xFE\xFF", "utf-16le", "\xFF\xFE"); success("utf-16le", "\xFF\xFE", "utf-16be", "\xFE\xFF"); success("utf-32be", "\x00\x00\x03\x04", "utf-32le", "\x04\x03\x00\x00"); success("utf-32le", "\x04\x03\x00\x00", "utf-32be", "\x00\x00\x03\x04"); success("utf-32be", "\x00\x00\xFF\xFF", "utf-16be", "\xFF\xFF"); success("utf-16be", "\xFF\xFF", "utf-32be", "\x00\x00\xFF\xFF"); success("utf-32be", "\x00\x01\x00\x00", "utf-16be", "\xD8\x00\xDC\x00"); success("utf-16be", "\xD8\x00\xDC\x00", "utf-32be", "\x00\x01\x00\x00"); success("utf-32be", "\x00\x10\xFF\xFF", "utf-16be", "\xDB\xFF\xDF\xFF"); success("utf-16be", "\xDB\xFF\xDF\xFF", "utf-32be", "\x00\x10\xFF\xFF"); eilseq("utf-32be", "\x00\x11\x00\x00", "utf-16be", ""); eilseq("utf-16be", "\xDB\xFF\xE0\x00", "utf-32be", ""); success("utf-8", "\xE3\x81\x82", "utf-16be", "\x30\x42"); einval("utf-8", "\xE3", "utf-16be", ""); } /* Japanese (CP932 CP20932 CP50220 CP50221 CP50222 CP51932) */ if (check_enc("cp932", 932) && check_enc("cp20932", 20932) && check_enc("euc-jp", 51932) && check_enc("cp50220", 50220) && check_enc("cp50221", 50221) && check_enc("cp50222", 50222) && check_enc("iso-2022-jp", 50221)) { /* Test the compatibility for each other Japanese codepage. * And validate the escape sequence handling for iso-2022-jp. */ success("utf-16be", "\xFF\x5E", "cp932", "\x81\x60"); success("utf-16be", "\x30\x1C", "cp932", "\x81\x60"); success("utf-16be", "\xFF\x5E", "cp932//nocompat", "\x81\x60"); eilseq("utf-16be", "\x30\x1C", "cp932//nocompat", ""); success("euc-jp", "\xA4\xA2", "utf-16be", "\x30\x42"); einval("euc-jp", "\xA4\xA2\xA4", "utf-16be", "\x30\x42"); eilseq("euc-jp", "\xA4\xA2\xFF\xFF", "utf-16be", "\x30\x42"); success("cp932", "\x81\x60", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); success("UTF-16BE", "\xFF\x5E", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); eilseq("UTF-16BE", "\x30\x1C", "iso-2022-jp//nocompat", ""); success("UTF-16BE", "\x30\x42\x30\x44", "iso-2022-jp", "\x1B\x24\x42\x24\x22\x24\x24\x1B\x28\x42"); success("iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42", "UTF-16BE", "\xFF\x5E"); } /* * test for //translit * U+FF41 (FULLWIDTH LATIN SMALL LETTER A) <-> U+0062 (LATIN SMALL LETTER A) */ eilseq("UTF-16BE", "\xFF\x41", "iso-8859-1", ""); success("UTF-16BE", "\xFF\x41", "iso-8859-1//translit", "a"); /* * test for //translit * Some character, not in "to" encoding -> DEFAULT CHARACTER (maybe "?") */ eilseq("UTF-16BE", "\x30\x42", "ascii", ""); success("UTF-16BE", "\x30\x42", "ascii//translit", "?"); /* * test for //ignore */ eilseq("UTF-8", "\xFF A \xFF B", "ascii//ignore", " A B"); eilseq("UTF-8", "\xEF\xBC\xA1 A \xEF\xBC\xA2 B", "ascii//ignore", " A B"); eilseq("UTF-8", "\xEF\x01 A \xEF\x02 B", "ascii//ignore", "\x01 A \x02 B"); /* * TODO: * Test for state after iconv() failed. * Ensure iconv() error is safe and continuable. */ return 0; }