• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

ext/nkf/nkf-utf8/nkf.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
00003  * Copyright (c) 1996-2010, The nkf Project.
00004  *
00005  * This software is provided 'as-is', without any express or implied
00006  * warranty. In no event will the authors be held liable for any damages
00007  * arising from the use of this software.
00008  *
00009  * Permission is granted to anyone to use this software for any purpose,
00010  * including commercial applications, and to alter it and redistribute it
00011  * freely, subject to the following restrictions:
00012  *
00013  * 1. The origin of this software must not be misrepresented; you must not
00014  * claim that you wrote the original software. If you use this software
00015  * in a product, an acknowledgment in the product documentation would be
00016  * appreciated but is not required.
00017  *
00018  * 2. Altered source versions must be plainly marked as such, and must not be
00019  * misrepresented as being the original software.
00020  *
00021  * 3. This notice may not be removed or altered from any source distribution.
00022  */
00023 #define NKF_VERSION "2.1.1"
00024 #define NKF_RELEASE_DATE "2010-04-28"
00025 #define COPY_RIGHT \
00026     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
00027     "Copyright (C) 1996-2010, The nkf Project."
00028 
00029 #include "config.h"
00030 #include "nkf.h"
00031 #include "utf8tbl.h"
00032 #ifdef __WIN32__
00033 #include <windows.h>
00034 #include <locale.h>
00035 #endif
00036 #if defined(__OS2__)
00037 # define INCL_DOS
00038 # define INCL_DOSERRORS
00039 # include <os2.h>
00040 #endif
00041 #include <assert.h>
00042 
00043 
00044 /* state of output_mode and input_mode
00045 
00046    c2           0 means ASCII
00047    JIS_X_0201_1976_K
00048    ISO_8859_1
00049    JIS_X_0208
00050    EOF      all termination
00051    c1           32bit data
00052 
00053  */
00054 
00055 /* MIME ENCODE */
00056 
00057 #define         FIXED_MIME      7
00058 #define         STRICT_MIME     8
00059 
00060 /* byte order */
00061 enum byte_order {
00062     ENDIAN_BIG    = 1,
00063     ENDIAN_LITTLE = 2,
00064     ENDIAN_2143   = 3,
00065     ENDIAN_3412   = 4
00066 };
00067 
00068 /* ASCII CODE */
00069 
00070 #define         BS      0x08
00071 #define         TAB     0x09
00072 #define         LF      0x0a
00073 #define         CR      0x0d
00074 #define         ESC     0x1b
00075 #define         SP      0x20
00076 #define         DEL     0x7f
00077 #define         SI      0x0f
00078 #define         SO      0x0e
00079 #define         SS2     0x8e
00080 #define         SS3     0x8f
00081 #define         CRLF    0x0D0A
00082 
00083 
00084 /* encodings */
00085 
00086 enum nkf_encodings {
00087     ASCII,
00088     ISO_8859_1,
00089     ISO_2022_JP,
00090     CP50220,
00091     CP50221,
00092     CP50222,
00093     ISO_2022_JP_1,
00094     ISO_2022_JP_3,
00095     ISO_2022_JP_2004,
00096     SHIFT_JIS,
00097     WINDOWS_31J,
00098     CP10001,
00099     EUC_JP,
00100     EUCJP_NKF,
00101     CP51932,
00102     EUCJP_MS,
00103     EUCJP_ASCII,
00104     SHIFT_JISX0213,
00105     SHIFT_JIS_2004,
00106     EUC_JISX0213,
00107     EUC_JIS_2004,
00108     UTF_8,
00109     UTF_8N,
00110     UTF_8_BOM,
00111     UTF8_MAC,
00112     UTF_16,
00113     UTF_16BE,
00114     UTF_16BE_BOM,
00115     UTF_16LE,
00116     UTF_16LE_BOM,
00117     UTF_32,
00118     UTF_32BE,
00119     UTF_32BE_BOM,
00120     UTF_32LE,
00121     UTF_32LE_BOM,
00122     BINARY,
00123     NKF_ENCODING_TABLE_SIZE,
00124     JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
00125     /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
00126     /* JIS_X_0208_1978   = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
00127     /* JIS_X_0208_1983   = 0x1087, */ /* B */ /* JIS C 6226-1983 */
00128     JIS_X_0208        = 0x1168, /* @B */
00129     JIS_X_0212        = 0x1159, /* D */
00130     /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
00131     JIS_X_0213_2 = 0x1229, /* P */
00132     JIS_X_0213_1 = 0x1233 /* Q */
00133 };
00134 
00135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
00139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
00140 static void j_oconv(nkf_char c2, nkf_char c1);
00141 static void s_oconv(nkf_char c2, nkf_char c1);
00142 static void e_oconv(nkf_char c2, nkf_char c1);
00143 static void w_oconv(nkf_char c2, nkf_char c1);
00144 static void w_oconv16(nkf_char c2, nkf_char c1);
00145 static void w_oconv32(nkf_char c2, nkf_char c1);
00146 
00147 typedef struct {
00148     const char *name;
00149     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
00150     void (*oconv)(nkf_char c2, nkf_char c1);
00151 } nkf_native_encoding;
00152 
00153 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
00154 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
00155 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
00156 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
00157 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
00158 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
00159 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
00160 
00161 typedef struct {
00162     const int id;
00163     const char *name;
00164     const nkf_native_encoding *base_encoding;
00165 } nkf_encoding;
00166 
00167 nkf_encoding nkf_encoding_table[] = {
00168     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
00169     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
00170     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
00171     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
00172     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
00173     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
00174     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
00175     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
00176     {ISO_2022_JP_2004,  "ISO-2022-JP-2004",     &NkfEncodingISO_2022_JP},
00177     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
00178     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
00179     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
00180     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
00181     {EUCJP_NKF,         "eucJP-nkf",            &NkfEncodingEUC_JP},
00182     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
00183     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
00184     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
00185     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
00186     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
00187     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
00188     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
00189     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
00190     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
00191     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
00192     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
00193     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
00194     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
00195     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
00196     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
00197     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
00198     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
00199     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
00200     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
00201     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
00202     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
00203     {BINARY,            "BINARY",               &NkfEncodingASCII},
00204     {-1,                NULL,                   NULL}
00205 };
00206 
00207 struct {
00208     const char *name;
00209     const int id;
00210 } encoding_name_to_id_table[] = {
00211     {"US-ASCII",                ASCII},
00212     {"ASCII",                   ASCII},
00213     {"646",                     ASCII},
00214     {"ROMAN8",                  ASCII},
00215     {"ISO-2022-JP",             ISO_2022_JP},
00216     {"ISO2022JP-CP932",         CP50220},
00217     {"CP50220",                 CP50220},
00218     {"CP50221",                 CP50221},
00219     {"CSISO2022JP",             CP50221},
00220     {"CP50222",                 CP50222},
00221     {"ISO-2022-JP-1",           ISO_2022_JP_1},
00222     {"ISO-2022-JP-3",           ISO_2022_JP_3},
00223     {"ISO-2022-JP-2004",        ISO_2022_JP_2004},
00224     {"SHIFT_JIS",               SHIFT_JIS},
00225     {"SJIS",                    SHIFT_JIS},
00226     {"MS_Kanji",                SHIFT_JIS},
00227     {"PCK",                     SHIFT_JIS},
00228     {"WINDOWS-31J",             WINDOWS_31J},
00229     {"CSWINDOWS31J",            WINDOWS_31J},
00230     {"CP932",                   WINDOWS_31J},
00231     {"MS932",                   WINDOWS_31J},
00232     {"CP10001",                 CP10001},
00233     {"EUCJP",                   EUC_JP},
00234     {"EUC-JP",                  EUC_JP},
00235     {"EUCJP-NKF",               EUCJP_NKF},
00236     {"CP51932",                 CP51932},
00237     {"EUC-JP-MS",               EUCJP_MS},
00238     {"EUCJP-MS",                EUCJP_MS},
00239     {"EUCJPMS",                 EUCJP_MS},
00240     {"EUC-JP-ASCII",            EUCJP_ASCII},
00241     {"EUCJP-ASCII",             EUCJP_ASCII},
00242     {"SHIFT_JISX0213",          SHIFT_JISX0213},
00243     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
00244     {"EUC-JISX0213",            EUC_JISX0213},
00245     {"EUC-JIS-2004",            EUC_JIS_2004},
00246     {"UTF-8",                   UTF_8},
00247     {"UTF-8N",                  UTF_8N},
00248     {"UTF-8-BOM",               UTF_8_BOM},
00249     {"UTF8-MAC",                UTF8_MAC},
00250     {"UTF-8-MAC",               UTF8_MAC},
00251     {"UTF-16",                  UTF_16},
00252     {"UTF-16BE",                UTF_16BE},
00253     {"UTF-16BE-BOM",            UTF_16BE_BOM},
00254     {"UTF-16LE",                UTF_16LE},
00255     {"UTF-16LE-BOM",            UTF_16LE_BOM},
00256     {"UTF-32",                  UTF_32},
00257     {"UTF-32BE",                UTF_32BE},
00258     {"UTF-32BE-BOM",            UTF_32BE_BOM},
00259     {"UTF-32LE",                UTF_32LE},
00260     {"UTF-32LE-BOM",            UTF_32LE_BOM},
00261     {"BINARY",                  BINARY},
00262     {NULL,                      -1}
00263 };
00264 
00265 #if defined(DEFAULT_CODE_JIS)
00266 #define     DEFAULT_ENCIDX ISO_2022_JP
00267 #elif defined(DEFAULT_CODE_SJIS)
00268 #define     DEFAULT_ENCIDX SHIFT_JIS
00269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
00270 #define     DEFAULT_ENCIDX WINDOWS_31J
00271 #elif defined(DEFAULT_CODE_EUC)
00272 #define     DEFAULT_ENCIDX EUC_JP
00273 #elif defined(DEFAULT_CODE_UTF8)
00274 #define     DEFAULT_ENCIDX UTF_8
00275 #endif
00276 
00277 
00278 #define         is_alnum(c)  \
00279     (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
00280 
00281 /* I don't trust portablity of toupper */
00282 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
00283 #define nkf_isoctal(c)  ('0'<=c && c<='7')
00284 #define nkf_isdigit(c)  ('0'<=c && c<='9')
00285 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
00286 #define nkf_isblank(c) (c == SP || c == TAB)
00287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
00288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
00289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
00290 #define nkf_isprint(c) (SP<=c && c<='~')
00291 #define nkf_isgraph(c) ('!'<=c && c<='~')
00292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
00293                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
00294                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
00295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
00296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
00297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
00298                               ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
00299                                && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
00300 
00301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
00302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
00303 
00304 #define         HOLD_SIZE       1024
00305 #if defined(INT_IS_SHORT)
00306 #define         IOBUF_SIZE      2048
00307 #else
00308 #define         IOBUF_SIZE      16384
00309 #endif
00310 
00311 #define         DEFAULT_J       'B'
00312 #define         DEFAULT_R       'B'
00313 
00314 
00315 #define         GETA1   0x22
00316 #define         GETA2   0x2e
00317 
00318 
00319 /* MIME preprocessor */
00320 
00321 #ifdef EASYWIN /*Easy Win */
00322 extern POINT _BufferSize;
00323 #endif
00324 
00325 struct input_code{
00326     const char *name;
00327     nkf_char stat;
00328     nkf_char score;
00329     nkf_char index;
00330     nkf_char buf[3];
00331     void (*status_func)(struct input_code *, nkf_char);
00332     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
00333     int _file_stat;
00334 };
00335 
00336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
00337 static nkf_encoding *input_encoding = NULL;
00338 static nkf_encoding *output_encoding = NULL;
00339 
00340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
00341 /* UCS Mapping
00342  * 0: Shift_JIS, eucJP-ascii
00343  * 1: eucJP-ms
00344  * 2: CP932, CP51932
00345  * 3: CP10001
00346  */
00347 #define UCS_MAP_ASCII   0
00348 #define UCS_MAP_MS      1
00349 #define UCS_MAP_CP932   2
00350 #define UCS_MAP_CP10001 3
00351 static int ms_ucs_map_f = UCS_MAP_ASCII;
00352 #endif
00353 #ifdef UTF8_INPUT_ENABLE
00354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
00355 static  int     no_cp932ext_f = FALSE;
00356 /* ignore ZERO WIDTH NO-BREAK SPACE */
00357 static  int     no_best_fit_chars_f = FALSE;
00358 static  int     input_endian = ENDIAN_BIG;
00359 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
00360 static  void    (*encode_fallback)(nkf_char c) = NULL;
00361 static  void    w_status(struct input_code *, nkf_char);
00362 #endif
00363 #ifdef UTF8_OUTPUT_ENABLE
00364 static  int     output_bom_f = FALSE;
00365 static  int     output_endian = ENDIAN_BIG;
00366 #endif
00367 
00368 static  void    std_putc(nkf_char c);
00369 static  nkf_char     std_getc(FILE *f);
00370 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
00371 
00372 static  nkf_char     broken_getc(FILE *f);
00373 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
00374 
00375 static  nkf_char     mime_getc(FILE *f);
00376 
00377 static void mime_putc(nkf_char c);
00378 
00379 /* buffers */
00380 
00381 #if !defined(PERL_XS) && !defined(WIN32DLL)
00382 static unsigned char   stdibuf[IOBUF_SIZE];
00383 static unsigned char   stdobuf[IOBUF_SIZE];
00384 #endif
00385 
00386 #define NKF_UNSPECIFIED (-TRUE)
00387 
00388 /* flags */
00389 static int             unbuf_f = FALSE;
00390 static int             estab_f = FALSE;
00391 static int             nop_f = FALSE;
00392 static int             binmode_f = TRUE;       /* binary mode */
00393 static int             rot_f = FALSE;          /* rot14/43 mode */
00394 static int             hira_f = FALSE;          /* hira/kata henkan */
00395 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
00396 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
00397 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
00398 static int             mimebuf_f = FALSE;      /* MIME buffered input */
00399 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
00400 static int             iso8859_f = FALSE;      /* ISO8859 through */
00401 static int             mimeout_f = FALSE;       /* base64 mode */
00402 static int             x0201_f = NKF_UNSPECIFIED;   /* convert JIS X 0201 */
00403 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
00404 
00405 #ifdef UNICODE_NORMALIZATION
00406 static int nfc_f = FALSE;
00407 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
00408 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
00409 #endif
00410 
00411 #ifdef INPUT_OPTION
00412 static int cap_f = FALSE;
00413 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
00414 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
00415 
00416 static int url_f = FALSE;
00417 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
00418 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
00419 #endif
00420 
00421 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
00422 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
00423 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
00424 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
00425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
00426 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
00427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
00428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
00429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
00430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
00431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
00432 
00433 #ifdef NUMCHAR_OPTION
00434 static int numchar_f = FALSE;
00435 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
00436 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
00437 #endif
00438 
00439 #ifdef CHECK_OPTION
00440 static int noout_f = FALSE;
00441 static void no_putc(nkf_char c);
00442 static int debug_f = FALSE;
00443 static void debug(const char *str);
00444 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
00445 #endif
00446 
00447 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
00448 static  void    set_input_codename(const char *codename);
00449 
00450 #ifdef EXEC_IO
00451 static int exec_f = 0;
00452 #endif
00453 
00454 #ifdef SHIFTJIS_CP932
00455 /* invert IBM extended characters to others */
00456 static int cp51932_f = FALSE;
00457 
00458 /* invert NEC-selected IBM extended characters to IBM extended characters */
00459 static int cp932inv_f = TRUE;
00460 
00461 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
00462 #endif /* SHIFTJIS_CP932 */
00463 
00464 static int x0212_f = FALSE;
00465 static int x0213_f = FALSE;
00466 
00467 static unsigned char prefix_table[256];
00468 
00469 static void e_status(struct input_code *, nkf_char);
00470 static void s_status(struct input_code *, nkf_char);
00471 
00472 struct input_code input_code_list[] = {
00473     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
00474     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
00475 #ifdef UTF8_INPUT_ENABLE
00476     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
00477     {"UTF-16",     0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
00478     {"UTF-32",     0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
00479 #endif
00480     {NULL,        0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
00481 };
00482 
00483 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
00484 static int              base64_count = 0;
00485 
00486 /* X0208 -> ASCII converter */
00487 
00488 /* fold parameter */
00489 static int             f_line = 0;    /* chars in line */
00490 static int             f_prev = 0;
00491 static int             fold_preserve_f = FALSE; /* preserve new lines */
00492 static int             fold_f  = FALSE;
00493 static int             fold_len  = 0;
00494 
00495 /* options */
00496 static unsigned char   kanji_intro = DEFAULT_J;
00497 static unsigned char   ascii_intro = DEFAULT_R;
00498 
00499 /* Folding */
00500 
00501 #define FOLD_MARGIN  10
00502 #define DEFAULT_FOLD 60
00503 
00504 static int             fold_margin  = FOLD_MARGIN;
00505 
00506 /* process default */
00507 
00508 static nkf_char
00509 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
00510 {
00511     fprintf(stderr,"nkf internal module connection failure.\n");
00512     exit(EXIT_FAILURE);
00513     return 0; /* LINT */
00514 }
00515 
00516 static void
00517 no_connection(nkf_char c2, nkf_char c1)
00518 {
00519     no_connection2(c2,c1,0);
00520 }
00521 
00522 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
00523 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
00524 
00525 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
00526 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
00527 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
00528 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
00529 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
00530 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
00531 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
00532 
00533 /* static redirections */
00534 
00535 static  void   (*o_putc)(nkf_char c) = std_putc;
00536 
00537 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
00538 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
00539 
00540 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
00541 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
00542 
00543 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
00544 
00545 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
00546 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
00547 
00548 /* for strict mime */
00549 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
00550 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
00551 
00552 /* Global states */
00553 static int output_mode = ASCII;    /* output kanji mode */
00554 static int input_mode =  ASCII;    /* input kanji mode */
00555 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
00556 
00557 /* X0201 / X0208 conversion tables */
00558 
00559 /* X0201 kana conversion table */
00560 /* 90-9F A0-DF */
00561 static const unsigned char cv[]= {
00562     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
00563     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
00564     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
00565     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
00566     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
00567     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
00568     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
00569     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
00570     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
00571     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
00572     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
00573     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
00574     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
00575     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
00576     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
00577     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
00578     0x00,0x00};
00579 
00580 
00581 /* X0201 kana conversion table for daguten */
00582 /* 90-9F A0-DF */
00583 static const unsigned char dv[]= {
00584     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00585     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00586     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00587     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00588     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
00589     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
00590     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
00591     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
00592     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
00593     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
00594     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
00595     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
00596     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00597     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00598     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00599     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00600     0x00,0x00};
00601 
00602 /* X0201 kana conversion table for han-daguten */
00603 /* 90-9F A0-DF */
00604 static const unsigned char ev[]= {
00605     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00606     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00607     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00608     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00609     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00610     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00611     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00612     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00613     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00614     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00615     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
00616     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
00617     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00618     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00619     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00620     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00621     0x00,0x00};
00622 
00623 
00624 /* X0208 kigou conversion table */
00625 /* 0x8140 - 0x819e */
00626 static const unsigned char fv[] = {
00627 
00628     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
00629     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
00630     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
00631     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
00632     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
00633     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
00634     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
00635     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
00636     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
00637     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00638     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
00639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
00640 } ;
00641 
00642 
00643 
00644 static int option_mode = 0;
00645 static int             file_out_f = FALSE;
00646 #ifdef OVERWRITE
00647 static int             overwrite_f = FALSE;
00648 static int             preserve_time_f = FALSE;
00649 static int             backup_f = FALSE;
00650 static char            *backup_suffix = "";
00651 #endif
00652 
00653 static int eolmode_f = 0;   /* CR, LF, CRLF */
00654 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
00655 static nkf_char prev_cr = 0; /* CR or 0 */
00656 #ifdef EASYWIN /*Easy Win */
00657 static int             end_check;
00658 #endif /*Easy Win */
00659 
00660 static void *
00661 nkf_xmalloc(size_t size)
00662 {
00663     void *ptr;
00664 
00665     if (size == 0) size = 1;
00666 
00667     ptr = malloc(size);
00668     if (ptr == NULL) {
00669         perror("can't malloc");
00670         exit(EXIT_FAILURE);
00671     }
00672 
00673     return ptr;
00674 }
00675 
00676 static void *
00677 nkf_xrealloc(void *ptr, size_t size)
00678 {
00679     if (size == 0) size = 1;
00680 
00681     ptr = realloc(ptr, size);
00682     if (ptr == NULL) {
00683         perror("can't realloc");
00684         exit(EXIT_FAILURE);
00685     }
00686 
00687     return ptr;
00688 }
00689 
00690 #define nkf_xfree(ptr) free(ptr)
00691 
00692 static int
00693 nkf_str_caseeql(const char *src, const char *target)
00694 {
00695     int i;
00696     for (i = 0; src[i] && target[i]; i++) {
00697         if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
00698     }
00699     if (src[i] || target[i]) return FALSE;
00700     else return TRUE;
00701 }
00702 
00703 static nkf_encoding*
00704 nkf_enc_from_index(int idx)
00705 {
00706     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
00707         return 0;
00708     }
00709     return &nkf_encoding_table[idx];
00710 }
00711 
00712 static int
00713 nkf_enc_find_index(const char *name)
00714 {
00715     int i;
00716     if (name[0] == 'X' && *(name+1) == '-') name += 2;
00717     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
00718         if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
00719             return encoding_name_to_id_table[i].id;
00720         }
00721     }
00722     return -1;
00723 }
00724 
00725 static nkf_encoding*
00726 nkf_enc_find(const char *name)
00727 {
00728     int idx = -1;
00729     idx = nkf_enc_find_index(name);
00730     if (idx < 0) return 0;
00731     return nkf_enc_from_index(idx);
00732 }
00733 
00734 #define nkf_enc_name(enc) (enc)->name
00735 #define nkf_enc_to_index(enc) (enc)->id
00736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
00737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
00738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
00739 #define nkf_enc_asciicompat(enc) (\
00740                                   nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
00741                                   nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
00742 #define nkf_enc_unicode_p(enc) (\
00743                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
00744                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
00745                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
00746 #define nkf_enc_cp5022x_p(enc) (\
00747                                 nkf_enc_to_index(enc) == CP50220 ||\
00748                                 nkf_enc_to_index(enc) == CP50221 ||\
00749                                 nkf_enc_to_index(enc) == CP50222)
00750 
00751 #ifdef DEFAULT_CODE_LOCALE
00752 static const char*
00753 nkf_locale_charmap()
00754 {
00755 #ifdef HAVE_LANGINFO_H
00756     return nl_langinfo(CODESET);
00757 #elif defined(__WIN32__)
00758     static char buf[16];
00759     sprintf(buf, "CP%d", GetACP());
00760     return buf;
00761 #elif defined(__OS2__)
00762 # if defined(INT_IS_SHORT)
00763     /* OS/2 1.x */
00764     return NULL;
00765 # else
00766     /* OS/2 32bit */
00767     static char buf[16];
00768     ULONG ulCP[1], ulncp;
00769     DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
00770     if (ulCP[0] == 932 || ulCP[0] == 943)
00771         strcpy(buf, "Shift_JIS");
00772     else
00773         sprintf(buf, "CP%lu", ulCP[0]);
00774     return buf;
00775 # endif
00776 #endif
00777     return NULL;
00778 }
00779 
00780 static nkf_encoding*
00781 nkf_locale_encoding()
00782 {
00783     nkf_encoding *enc = 0;
00784     const char *encname = nkf_locale_charmap();
00785     if (encname)
00786         enc = nkf_enc_find(encname);
00787     return enc;
00788 }
00789 #endif /* DEFAULT_CODE_LOCALE */
00790 
00791 static nkf_encoding*
00792 nkf_utf8_encoding()
00793 {
00794     return &nkf_encoding_table[UTF_8];
00795 }
00796 
00797 static nkf_encoding*
00798 nkf_default_encoding()
00799 {
00800     nkf_encoding *enc = 0;
00801 #ifdef DEFAULT_CODE_LOCALE
00802     enc = nkf_locale_encoding();
00803 #elif defined(DEFAULT_ENCIDX)
00804     enc = nkf_enc_from_index(DEFAULT_ENCIDX);
00805 #endif
00806     if (!enc) enc = nkf_utf8_encoding();
00807     return enc;
00808 }
00809 
00810 typedef struct {
00811     long capa;
00812     long len;
00813     nkf_char *ptr;
00814 } nkf_buf_t;
00815 
00816 static nkf_buf_t *
00817 nkf_buf_new(int length)
00818 {
00819     nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
00820     buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
00821     buf->capa = length;
00822     buf->len = 0;
00823     return buf;
00824 } 
00825 
00826 #if 0
00827 static void
00828 nkf_buf_dispose(nkf_buf_t *buf)
00829 {
00830     nkf_xfree(buf->ptr);
00831     nkf_xfree(buf);
00832 }
00833 #endif
00834 
00835 #define nkf_buf_length(buf) ((buf)->len)
00836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
00837 
00838 static nkf_char
00839 nkf_buf_at(nkf_buf_t *buf, int index)
00840 {
00841     assert(index <= buf->len);
00842     return buf->ptr[index];
00843 }
00844 
00845 static void
00846 nkf_buf_clear(nkf_buf_t *buf)
00847 {
00848     buf->len = 0;
00849 }
00850 
00851 static void
00852 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
00853 {
00854     if (buf->capa <= buf->len) {
00855         exit(EXIT_FAILURE);
00856     }
00857     buf->ptr[buf->len++] = c;
00858 }
00859 
00860 static nkf_char
00861 nkf_buf_pop(nkf_buf_t *buf)
00862 {
00863     assert(!nkf_buf_empty_p(buf));
00864     return buf->ptr[--buf->len];
00865 }
00866 
00867 /* Normalization Form C */
00868 #ifndef PERL_XS
00869 #ifdef WIN32DLL
00870 #define fprintf dllprintf
00871 #endif
00872 
00873 static void
00874 version(void)
00875 {
00876     fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
00877 }
00878 
00879 static void
00880 usage(void)
00881 {
00882     fprintf(HELP_OUTPUT,
00883             "Usage:  nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
00884 #ifdef UTF8_OUTPUT_ENABLE
00885             " j/s/e/w  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00886             "          UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
00887 #else
00888 #endif
00889 #ifdef UTF8_INPUT_ENABLE
00890             " J/S/E/W  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00891             "          UTF option is -W[8,[16,32][B,L]]\n"
00892 #else
00893             " J/S/E    Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00894 #endif
00895             );
00896     fprintf(HELP_OUTPUT,
00897             " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
00898             " M[BQ]    MIME encode [B:base64 Q:quoted]\n"
00899             " f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
00900             );
00901     fprintf(HELP_OUTPUT,
00902             " Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
00903             "          1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
00904             "          4: JISX0208 Katakana to JISX0201 Katakana\n"
00905             " X,x      Convert Halfwidth Katakana to Fullwidth or preserve it\n"
00906             );
00907     fprintf(HELP_OUTPUT,
00908             " O        Output to File (DEFAULT 'nkf.out')\n"
00909             " L[uwm]   Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
00910             );
00911     fprintf(HELP_OUTPUT,
00912             " --ic=<encoding>        Specify the input encoding\n"
00913             " --oc=<encoding>        Specify the output encoding\n"
00914             " --hiragana --katakana  Hiragana/Katakana Conversion\n"
00915             " --katakana-hiragana    Converts each other\n"
00916             );
00917     fprintf(HELP_OUTPUT,
00918 #ifdef INPUT_OPTION
00919             " --{cap, url}-input     Convert hex after ':' or '%%'\n"
00920 #endif
00921 #ifdef NUMCHAR_OPTION
00922             " --numchar-input        Convert Unicode Character Reference\n"
00923 #endif
00924 #ifdef UTF8_INPUT_ENABLE
00925             " --fb-{skip, html, xml, perl, java, subchar}\n"
00926             "                        Specify unassigned character's replacement\n"
00927 #endif
00928             );
00929     fprintf(HELP_OUTPUT,
00930 #ifdef OVERWRITE
00931             " --in-place[=SUF]       Overwrite original files\n"
00932             " --overwrite[=SUF]      Preserve timestamp of original files\n"
00933 #endif
00934             " -g --guess             Guess the input code\n"
00935             " -v --version           Print the version\n"
00936             " --help/-V              Print this help / configuration\n"
00937             );
00938     version();
00939 }
00940 
00941 static void
00942 show_configuration(void)
00943 {
00944     fprintf(HELP_OUTPUT,
00945             "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
00946             "  Compile-time options:\n"
00947             "    Compiled at:                 " __DATE__ " " __TIME__ "\n"
00948            );
00949     fprintf(HELP_OUTPUT,
00950             "    Default output encoding:     "
00951 #ifdef DEFAULT_CODE_LOCALE
00952             "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
00953 #elif defined(DEFAULT_ENCIDX)
00954             "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
00955 #else
00956             "NONE\n"
00957 #endif
00958            );
00959     fprintf(HELP_OUTPUT,
00960             "    Default output end of line:  "
00961 #if DEFAULT_NEWLINE == CR
00962             "CR"
00963 #elif DEFAULT_NEWLINE == CRLF
00964             "CRLF"
00965 #else
00966             "LF"
00967 #endif
00968             "\n"
00969             "    Decode MIME encoded string:  "
00970 #if MIME_DECODE_DEFAULT
00971             "ON"
00972 #else
00973             "OFF"
00974 #endif
00975             "\n"
00976             "    Convert JIS X 0201 Katakana: "
00977 #if X0201_DEFAULT
00978             "ON"
00979 #else
00980             "OFF"
00981 #endif
00982             "\n"
00983             "    --help, --version output:    "
00984 #if HELP_OUTPUT_HELP_OUTPUT
00985             "HELP_OUTPUT"
00986 #else
00987             "STDOUT"
00988 #endif
00989             "\n");
00990 }
00991 #endif /*PERL_XS*/
00992 
00993 #ifdef OVERWRITE
00994 static char*
00995 get_backup_filename(const char *suffix, const char *filename)
00996 {
00997     char *backup_filename;
00998     int asterisk_count = 0;
00999     int i, j;
01000     int filename_length = strlen(filename);
01001 
01002     for(i = 0; suffix[i]; i++){
01003         if(suffix[i] == '*') asterisk_count++;
01004     }
01005 
01006     if(asterisk_count){
01007         backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
01008         for(i = 0, j = 0; suffix[i];){
01009             if(suffix[i] == '*'){
01010                 backup_filename[j] = '\0';
01011                 strncat(backup_filename, filename, filename_length);
01012                 i++;
01013                 j += filename_length;
01014             }else{
01015                 backup_filename[j++] = suffix[i++];
01016             }
01017         }
01018         backup_filename[j] = '\0';
01019     }else{
01020         j = filename_length + strlen(suffix);
01021         backup_filename = nkf_xmalloc(j + 1);
01022         strcpy(backup_filename, filename);
01023         strcat(backup_filename, suffix);
01024         backup_filename[j] = '\0';
01025     }
01026     return backup_filename;
01027 }
01028 #endif
01029 
01030 #ifdef UTF8_INPUT_ENABLE
01031 static void
01032 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
01033 {
01034     int shift = 20;
01035     c &= VALUE_MASK;
01036     while(shift >= 0){
01037         if(c >= NKF_INT32_C(1)<<shift){
01038             while(shift >= 0){
01039                 (*f)(0, bin2hex(c>>shift));
01040                 shift -= 4;
01041             }
01042         }else{
01043             shift -= 4;
01044         }
01045     }
01046     return;
01047 }
01048 
01049 static void
01050 encode_fallback_html(nkf_char c)
01051 {
01052     (*oconv)(0, '&');
01053     (*oconv)(0, '#');
01054     c &= VALUE_MASK;
01055     if(c >= NKF_INT32_C(1000000))
01056         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
01057     if(c >= NKF_INT32_C(100000))
01058         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
01059     if(c >= 10000)
01060         (*oconv)(0, 0x30+(c/10000  )%10);
01061     if(c >= 1000)
01062         (*oconv)(0, 0x30+(c/1000   )%10);
01063     if(c >= 100)
01064         (*oconv)(0, 0x30+(c/100    )%10);
01065     if(c >= 10)
01066         (*oconv)(0, 0x30+(c/10     )%10);
01067     if(c >= 0)
01068         (*oconv)(0, 0x30+ c         %10);
01069     (*oconv)(0, ';');
01070     return;
01071 }
01072 
01073 static void
01074 encode_fallback_xml(nkf_char c)
01075 {
01076     (*oconv)(0, '&');
01077     (*oconv)(0, '#');
01078     (*oconv)(0, 'x');
01079     nkf_each_char_to_hex(oconv, c);
01080     (*oconv)(0, ';');
01081     return;
01082 }
01083 
01084 static void
01085 encode_fallback_java(nkf_char c)
01086 {
01087     (*oconv)(0, '\\');
01088     c &= VALUE_MASK;
01089     if(!nkf_char_unicode_bmp_p(c)){
01090         (*oconv)(0, 'U');
01091         (*oconv)(0, '0');
01092         (*oconv)(0, '0');
01093         (*oconv)(0, bin2hex(c>>20));
01094         (*oconv)(0, bin2hex(c>>16));
01095     }else{
01096         (*oconv)(0, 'u');
01097     }
01098     (*oconv)(0, bin2hex(c>>12));
01099     (*oconv)(0, bin2hex(c>> 8));
01100     (*oconv)(0, bin2hex(c>> 4));
01101     (*oconv)(0, bin2hex(c    ));
01102     return;
01103 }
01104 
01105 static void
01106 encode_fallback_perl(nkf_char c)
01107 {
01108     (*oconv)(0, '\\');
01109     (*oconv)(0, 'x');
01110     (*oconv)(0, '{');
01111     nkf_each_char_to_hex(oconv, c);
01112     (*oconv)(0, '}');
01113     return;
01114 }
01115 
01116 static void
01117 encode_fallback_subchar(nkf_char c)
01118 {
01119     c = unicode_subchar;
01120     (*oconv)((c>>8)&0xFF, c&0xFF);
01121     return;
01122 }
01123 #endif
01124 
01125 static const struct {
01126     const char *name;
01127     const char *alias;
01128 } long_option[] = {
01129     {"ic=", ""},
01130     {"oc=", ""},
01131     {"base64","jMB"},
01132     {"euc","e"},
01133     {"euc-input","E"},
01134     {"fj","jm"},
01135     {"help",""},
01136     {"jis","j"},
01137     {"jis-input","J"},
01138     {"mac","sLm"},
01139     {"mime","jM"},
01140     {"mime-input","m"},
01141     {"msdos","sLw"},
01142     {"sjis","s"},
01143     {"sjis-input","S"},
01144     {"unix","eLu"},
01145     {"version","v"},
01146     {"windows","sLw"},
01147     {"hiragana","h1"},
01148     {"katakana","h2"},
01149     {"katakana-hiragana","h3"},
01150     {"guess=", ""},
01151     {"guess", "g2"},
01152     {"cp932", ""},
01153     {"no-cp932", ""},
01154 #ifdef X0212_ENABLE
01155     {"x0212", ""},
01156 #endif
01157 #ifdef UTF8_OUTPUT_ENABLE
01158     {"utf8", "w"},
01159     {"utf16", "w16"},
01160     {"ms-ucs-map", ""},
01161     {"fb-skip", ""},
01162     {"fb-html", ""},
01163     {"fb-xml", ""},
01164     {"fb-perl", ""},
01165     {"fb-java", ""},
01166     {"fb-subchar", ""},
01167     {"fb-subchar=", ""},
01168 #endif
01169 #ifdef UTF8_INPUT_ENABLE
01170     {"utf8-input", "W"},
01171     {"utf16-input", "W16"},
01172     {"no-cp932ext", ""},
01173     {"no-best-fit-chars",""},
01174 #endif
01175 #ifdef UNICODE_NORMALIZATION
01176     {"utf8mac-input", ""},
01177 #endif
01178 #ifdef OVERWRITE
01179     {"overwrite", ""},
01180     {"overwrite=", ""},
01181     {"in-place", ""},
01182     {"in-place=", ""},
01183 #endif
01184 #ifdef INPUT_OPTION
01185     {"cap-input", ""},
01186     {"url-input", ""},
01187 #endif
01188 #ifdef NUMCHAR_OPTION
01189     {"numchar-input", ""},
01190 #endif
01191 #ifdef CHECK_OPTION
01192     {"no-output", ""},
01193     {"debug", ""},
01194 #endif
01195 #ifdef SHIFTJIS_CP932
01196     {"cp932inv", ""},
01197 #endif
01198 #ifdef EXEC_IO
01199     {"exec-in", ""},
01200     {"exec-out", ""},
01201 #endif
01202     {"prefix=", ""},
01203 };
01204 
01205 static void
01206 set_input_encoding(nkf_encoding *enc)
01207 {
01208     switch (nkf_enc_to_index(enc)) {
01209     case ISO_8859_1:
01210         iso8859_f = TRUE;
01211         break;
01212     case CP50221:
01213     case CP50222:
01214         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01215     case CP50220:
01216 #ifdef SHIFTJIS_CP932
01217         cp51932_f = TRUE;
01218 #endif
01219 #ifdef UTF8_OUTPUT_ENABLE
01220         ms_ucs_map_f = UCS_MAP_CP932;
01221 #endif
01222         break;
01223     case ISO_2022_JP_1:
01224         x0212_f = TRUE;
01225         break;
01226     case ISO_2022_JP_3:
01227         x0212_f = TRUE;
01228         x0213_f = TRUE;
01229         break;
01230     case ISO_2022_JP_2004:
01231         x0212_f = TRUE;
01232         x0213_f = TRUE;
01233         break;
01234     case SHIFT_JIS:
01235         break;
01236     case WINDOWS_31J:
01237         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01238 #ifdef SHIFTJIS_CP932
01239         cp51932_f = TRUE;
01240 #endif
01241 #ifdef UTF8_OUTPUT_ENABLE
01242         ms_ucs_map_f = UCS_MAP_CP932;
01243 #endif
01244         break;
01245         break;
01246     case CP10001:
01247 #ifdef SHIFTJIS_CP932
01248         cp51932_f = TRUE;
01249 #endif
01250 #ifdef UTF8_OUTPUT_ENABLE
01251         ms_ucs_map_f = UCS_MAP_CP10001;
01252 #endif
01253         break;
01254     case EUC_JP:
01255         break;
01256     case EUCJP_NKF:
01257         break;
01258     case CP51932:
01259         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01260 #ifdef SHIFTJIS_CP932
01261         cp51932_f = TRUE;
01262 #endif
01263 #ifdef UTF8_OUTPUT_ENABLE
01264         ms_ucs_map_f = UCS_MAP_CP932;
01265 #endif
01266         break;
01267     case EUCJP_MS:
01268         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01269 #ifdef SHIFTJIS_CP932
01270         cp51932_f = FALSE;
01271 #endif
01272 #ifdef UTF8_OUTPUT_ENABLE
01273         ms_ucs_map_f = UCS_MAP_MS;
01274 #endif
01275         break;
01276     case EUCJP_ASCII:
01277         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01278 #ifdef SHIFTJIS_CP932
01279         cp51932_f = FALSE;
01280 #endif
01281 #ifdef UTF8_OUTPUT_ENABLE
01282         ms_ucs_map_f = UCS_MAP_ASCII;
01283 #endif
01284         break;
01285     case SHIFT_JISX0213:
01286     case SHIFT_JIS_2004:
01287         x0213_f = TRUE;
01288 #ifdef SHIFTJIS_CP932
01289         cp51932_f = FALSE;
01290 #endif
01291         break;
01292     case EUC_JISX0213:
01293     case EUC_JIS_2004:
01294         x0213_f = TRUE;
01295 #ifdef SHIFTJIS_CP932
01296         cp51932_f = FALSE;
01297 #endif
01298         break;
01299 #ifdef UTF8_INPUT_ENABLE
01300 #ifdef UNICODE_NORMALIZATION
01301     case UTF8_MAC:
01302         nfc_f = TRUE;
01303         break;
01304 #endif
01305     case UTF_16:
01306     case UTF_16BE:
01307     case UTF_16BE_BOM:
01308         input_endian = ENDIAN_BIG;
01309         break;
01310     case UTF_16LE:
01311     case UTF_16LE_BOM:
01312         input_endian = ENDIAN_LITTLE;
01313         break;
01314     case UTF_32:
01315     case UTF_32BE:
01316     case UTF_32BE_BOM:
01317         input_endian = ENDIAN_BIG;
01318         break;
01319     case UTF_32LE:
01320     case UTF_32LE_BOM:
01321         input_endian = ENDIAN_LITTLE;
01322         break;
01323 #endif
01324     }
01325 }
01326 
01327 static void
01328 set_output_encoding(nkf_encoding *enc)
01329 {
01330     switch (nkf_enc_to_index(enc)) {
01331     case CP50220:
01332 #ifdef SHIFTJIS_CP932
01333         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01334 #endif
01335 #ifdef UTF8_OUTPUT_ENABLE
01336         ms_ucs_map_f = UCS_MAP_CP932;
01337 #endif
01338         break;
01339     case CP50221:
01340         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01341 #ifdef SHIFTJIS_CP932
01342         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01343 #endif
01344 #ifdef UTF8_OUTPUT_ENABLE
01345         ms_ucs_map_f = UCS_MAP_CP932;
01346 #endif
01347         break;
01348     case ISO_2022_JP:
01349 #ifdef SHIFTJIS_CP932
01350         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01351 #endif
01352         break;
01353     case ISO_2022_JP_1:
01354         x0212_f = TRUE;
01355 #ifdef SHIFTJIS_CP932
01356         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01357 #endif
01358         break;
01359     case ISO_2022_JP_3:
01360         x0212_f = TRUE;
01361         x0213_f = TRUE;
01362 #ifdef SHIFTJIS_CP932
01363         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01364 #endif
01365         break;
01366     case SHIFT_JIS:
01367         break;
01368     case WINDOWS_31J:
01369         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01370 #ifdef UTF8_OUTPUT_ENABLE
01371         ms_ucs_map_f = UCS_MAP_CP932;
01372 #endif
01373         break;
01374     case CP10001:
01375 #ifdef UTF8_OUTPUT_ENABLE
01376         ms_ucs_map_f = UCS_MAP_CP10001;
01377 #endif
01378         break;
01379     case EUC_JP:
01380         x0212_f = TRUE;
01381 #ifdef SHIFTJIS_CP932
01382         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01383 #endif
01384 #ifdef UTF8_OUTPUT_ENABLE
01385         ms_ucs_map_f = UCS_MAP_ASCII;
01386 #endif
01387         break;
01388     case EUCJP_NKF:
01389         x0212_f = FALSE;
01390 #ifdef SHIFTJIS_CP932
01391         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01392 #endif
01393 #ifdef UTF8_OUTPUT_ENABLE
01394         ms_ucs_map_f = UCS_MAP_ASCII;
01395 #endif
01396         break;
01397     case CP51932:
01398         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01399 #ifdef SHIFTJIS_CP932
01400         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01401 #endif
01402 #ifdef UTF8_OUTPUT_ENABLE
01403         ms_ucs_map_f = UCS_MAP_CP932;
01404 #endif
01405         break;
01406     case EUCJP_MS:
01407         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01408         x0212_f = TRUE;
01409 #ifdef UTF8_OUTPUT_ENABLE
01410         ms_ucs_map_f = UCS_MAP_MS;
01411 #endif
01412         break;
01413     case EUCJP_ASCII:
01414         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01415         x0212_f = TRUE;
01416 #ifdef UTF8_OUTPUT_ENABLE
01417         ms_ucs_map_f = UCS_MAP_ASCII;
01418 #endif
01419         break;
01420     case SHIFT_JISX0213:
01421     case SHIFT_JIS_2004:
01422         x0213_f = TRUE;
01423 #ifdef SHIFTJIS_CP932
01424         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01425 #endif
01426         break;
01427     case EUC_JISX0213:
01428     case EUC_JIS_2004:
01429         x0212_f = TRUE;
01430         x0213_f = TRUE;
01431 #ifdef SHIFTJIS_CP932
01432         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01433 #endif
01434         break;
01435 #ifdef UTF8_OUTPUT_ENABLE
01436     case UTF_8_BOM:
01437         output_bom_f = TRUE;
01438         break;
01439     case UTF_16:
01440     case UTF_16BE_BOM:
01441         output_bom_f = TRUE;
01442         break;
01443     case UTF_16LE:
01444         output_endian = ENDIAN_LITTLE;
01445         output_bom_f = FALSE;
01446         break;
01447     case UTF_16LE_BOM:
01448         output_endian = ENDIAN_LITTLE;
01449         output_bom_f = TRUE;
01450         break;
01451     case UTF_32:
01452     case UTF_32BE_BOM:
01453         output_bom_f = TRUE;
01454         break;
01455     case UTF_32LE:
01456         output_endian = ENDIAN_LITTLE;
01457         output_bom_f = FALSE;
01458         break;
01459     case UTF_32LE_BOM:
01460         output_endian = ENDIAN_LITTLE;
01461         output_bom_f = TRUE;
01462         break;
01463 #endif
01464     }
01465 }
01466 
01467 static struct input_code*
01468 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01469 {
01470     if (iconv_func){
01471         struct input_code *p = input_code_list;
01472         while (p->name){
01473             if (iconv_func == p->iconv_func){
01474                 return p;
01475             }
01476             p++;
01477         }
01478     }
01479     return 0;
01480 }
01481 
01482 static void
01483 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01484 {
01485 #ifdef INPUT_CODE_FIX
01486     if (f || !input_encoding)
01487 #endif
01488         if (estab_f != f){
01489             estab_f = f;
01490         }
01491 
01492     if (iconv_func
01493 #ifdef INPUT_CODE_FIX
01494         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
01495 #endif
01496        ){
01497         iconv = iconv_func;
01498     }
01499 #ifdef CHECK_OPTION
01500     if (estab_f && iconv_for_check != iconv){
01501         struct input_code *p = find_inputcode_byfunc(iconv);
01502         if (p){
01503             set_input_codename(p->name);
01504             debug(p->name);
01505         }
01506         iconv_for_check = iconv;
01507     }
01508 #endif
01509 }
01510 
01511 #ifdef X0212_ENABLE
01512 static nkf_char
01513 x0212_shift(nkf_char c)
01514 {
01515     nkf_char ret = c;
01516     c &= 0x7f;
01517     if (is_eucg3(ret)){
01518         if (0x75 <= c && c <= 0x7f){
01519             ret = c + (0x109 - 0x75);
01520         }
01521     }else{
01522         if (0x75 <= c && c <= 0x7f){
01523             ret = c + (0x113 - 0x75);
01524         }
01525     }
01526     return ret;
01527 }
01528 
01529 
01530 static nkf_char
01531 x0212_unshift(nkf_char c)
01532 {
01533     nkf_char ret = c;
01534     if (0x7f <= c && c <= 0x88){
01535         ret = c + (0x75 - 0x7f);
01536     }else if (0x89 <= c && c <= 0x92){
01537         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
01538     }
01539     return ret;
01540 }
01541 #endif /* X0212_ENABLE */
01542 
01543 static nkf_char
01544 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01545 {
01546     nkf_char ndx;
01547     if (is_eucg3(c2)){
01548         ndx = c2 & 0x7f;
01549         if (x0213_f){
01550             if((0x21 <= ndx && ndx <= 0x2F)){
01551                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
01552                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01553                 return 0;
01554             }else if(0x6E <= ndx && ndx <= 0x7E){
01555                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
01556                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01557                 return 0;
01558             }
01559             return 1;
01560         }
01561 #ifdef X0212_ENABLE
01562         else if(nkf_isgraph(ndx)){
01563             nkf_char val = 0;
01564             const unsigned short *ptr;
01565             ptr = x0212_shiftjis[ndx - 0x21];
01566             if (ptr){
01567                 val = ptr[(c1 & 0x7f) - 0x21];
01568             }
01569             if (val){
01570                 c2 = val >> 8;
01571                 c1 = val & 0xff;
01572                 if (p2) *p2 = c2;
01573                 if (p1) *p1 = c1;
01574                 return 0;
01575             }
01576             c2 = x0212_shift(c2);
01577         }
01578 #endif /* X0212_ENABLE */
01579     }
01580     if(0x7F < c2) return 1;
01581     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
01582     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01583     return 0;
01584 }
01585 
01586 static nkf_char
01587 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01588 {
01589 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
01590     nkf_char val;
01591 #endif
01592     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
01593     if (0xFC < c1) return 1;
01594 #ifdef SHIFTJIS_CP932
01595     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
01596         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
01597         if (val){
01598             c2 = val >> 8;
01599             c1 = val & 0xff;
01600         }
01601     }
01602     if (cp932inv_f
01603         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
01604         val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
01605         if (val){
01606             c2 = val >> 8;
01607             c1 = val & 0xff;
01608         }
01609     }
01610 #endif /* SHIFTJIS_CP932 */
01611 #ifdef X0212_ENABLE
01612     if (!x0213_f && is_ibmext_in_sjis(c2)){
01613         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
01614         if (val){
01615             if (val > 0x7FFF){
01616                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
01617                 c1 = val & 0xff;
01618             }else{
01619                 c2 = val >> 8;
01620                 c1 = val & 0xff;
01621             }
01622             if (p2) *p2 = c2;
01623             if (p1) *p1 = c1;
01624             return 0;
01625         }
01626     }
01627 #endif
01628     if(c2 >= 0x80){
01629         if(x0213_f && c2 >= 0xF0){
01630             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
01631                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
01632             }else{ /* 78<=k<=94 */
01633                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
01634                 if (0x9E < c1) c2++;
01635             }
01636         }else{
01637 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
01638 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
01639             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
01640             if (0x9E < c1) c2++;
01641         }
01642         if (c1 < 0x9F)
01643             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
01644         else {
01645             c1 = c1 - 0x7E;
01646         }
01647     }
01648 
01649 #ifdef X0212_ENABLE
01650     c2 = x0212_unshift(c2);
01651 #endif
01652     if (p2) *p2 = c2;
01653     if (p1) *p1 = c1;
01654     return 0;
01655 }
01656 
01657 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
01658 static void
01659 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
01660 {
01661     val &= VALUE_MASK;
01662     if (val < 0x80){
01663         *p1 = val;
01664         *p2 = 0;
01665         *p3 = 0;
01666         *p4 = 0;
01667     }else if (val < 0x800){
01668         *p1 = 0xc0 | (val >> 6);
01669         *p2 = 0x80 | (val & 0x3f);
01670         *p3 = 0;
01671         *p4 = 0;
01672     } else if (nkf_char_unicode_bmp_p(val)) {
01673         *p1 = 0xe0 |  (val >> 12);
01674         *p2 = 0x80 | ((val >>  6) & 0x3f);
01675         *p3 = 0x80 | ( val        & 0x3f);
01676         *p4 = 0;
01677     } else if (nkf_char_unicode_value_p(val)) {
01678         *p1 = 0xf0 |  (val >> 18);
01679         *p2 = 0x80 | ((val >> 12) & 0x3f);
01680         *p3 = 0x80 | ((val >>  6) & 0x3f);
01681         *p4 = 0x80 | ( val        & 0x3f);
01682     } else {
01683         *p1 = 0;
01684         *p2 = 0;
01685         *p3 = 0;
01686         *p4 = 0;
01687     }
01688 }
01689 
01690 static nkf_char
01691 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
01692 {
01693     nkf_char wc;
01694     if (c1 <= 0x7F) {
01695         /* single byte */
01696         wc = c1;
01697     }
01698     else if (c1 <= 0xC3) {
01699         /* trail byte or invalid */
01700         return -1;
01701     }
01702     else if (c1 <= 0xDF) {
01703         /* 2 bytes */
01704         wc  = (c1 & 0x1F) << 6;
01705         wc |= (c2 & 0x3F);
01706     }
01707     else if (c1 <= 0xEF) {
01708         /* 3 bytes */
01709         wc  = (c1 & 0x0F) << 12;
01710         wc |= (c2 & 0x3F) << 6;
01711         wc |= (c3 & 0x3F);
01712     }
01713     else if (c2 <= 0xF4) {
01714         /* 4 bytes */
01715         wc  = (c1 & 0x0F) << 18;
01716         wc |= (c2 & 0x3F) << 12;
01717         wc |= (c3 & 0x3F) << 6;
01718         wc |= (c4 & 0x3F);
01719     }
01720     else {
01721         return -1;
01722     }
01723     return wc;
01724 }
01725 #endif
01726 
01727 #ifdef UTF8_INPUT_ENABLE
01728 static int
01729 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
01730                        const unsigned short *const *pp, nkf_char psize,
01731                        nkf_char *p2, nkf_char *p1)
01732 {
01733     nkf_char c2;
01734     const unsigned short *p;
01735     unsigned short val;
01736 
01737     if (pp == 0) return 1;
01738 
01739     c1 -= 0x80;
01740     if (c1 < 0 || psize <= c1) return 1;
01741     p = pp[c1];
01742     if (p == 0)  return 1;
01743 
01744     c0 -= 0x80;
01745     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
01746     val = p[c0];
01747     if (val == 0) return 1;
01748     if (no_cp932ext_f && (
01749                           (val>>8) == 0x2D || /* NEC special characters */
01750                           val > NKF_INT32_C(0xF300) /* IBM extended characters */
01751                          )) return 1;
01752 
01753     c2 = val >> 8;
01754     if (val > 0x7FFF){
01755         c2 &= 0x7f;
01756         c2 |= PREFIX_EUCG3;
01757     }
01758     if (c2 == SO) c2 = JIS_X_0201_1976_K;
01759     c1 = val & 0xFF;
01760     if (p2) *p2 = c2;
01761     if (p1) *p1 = c1;
01762     return 0;
01763 }
01764 
01765 static int
01766 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01767 {
01768     const unsigned short *const *pp;
01769     const unsigned short *const *const *ppp;
01770     static const char no_best_fit_chars_table_C2[] =
01771     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01772         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01773         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
01774         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
01775     static const char no_best_fit_chars_table_C2_ms[] =
01776     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01777         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01778         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
01779         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
01780     static const char no_best_fit_chars_table_932_C2[] =
01781     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01782         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01783         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
01784         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
01785     static const char no_best_fit_chars_table_932_C3[] =
01786     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01787         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
01788         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01789         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
01790     nkf_char ret = 0;
01791 
01792     if(c2 < 0x80){
01793         *p2 = 0;
01794         *p1 = c2;
01795     }else if(c2 < 0xe0){
01796         if(no_best_fit_chars_f){
01797             if(ms_ucs_map_f == UCS_MAP_CP932){
01798                 switch(c2){
01799                 case 0xC2:
01800                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
01801                     break;
01802                 case 0xC3:
01803                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01804                     break;
01805                 }
01806             }else if(!cp932inv_f){
01807                 switch(c2){
01808                 case 0xC2:
01809                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
01810                     break;
01811                 case 0xC3:
01812                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01813                     break;
01814                 }
01815             }else if(ms_ucs_map_f == UCS_MAP_MS){
01816                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
01817             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01818                 switch(c2){
01819                 case 0xC2:
01820                     switch(c1){
01821                     case 0xA2:
01822                     case 0xA3:
01823                     case 0xA5:
01824                     case 0xA6:
01825                     case 0xAC:
01826                     case 0xAF:
01827                     case 0xB8:
01828                         return 1;
01829                     }
01830                     break;
01831                 }
01832             }
01833         }
01834         pp =
01835             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
01836             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
01837             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
01838             utf8_to_euc_2bytes;
01839         ret =  unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
01840     }else if(c0 < 0xF0){
01841         if(no_best_fit_chars_f){
01842             if(ms_ucs_map_f == UCS_MAP_CP932){
01843                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
01844             }else if(ms_ucs_map_f == UCS_MAP_MS){
01845                 switch(c2){
01846                 case 0xE2:
01847                     switch(c1){
01848                     case 0x80:
01849                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
01850                         break;
01851                     case 0x88:
01852                         if(c0 == 0x92) return 1;
01853                         break;
01854                     }
01855                     break;
01856                 case 0xE3:
01857                     if(c1 == 0x80 || c0 == 0x9C) return 1;
01858                     break;
01859                 }
01860             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01861                 switch(c2){
01862                 case 0xE3:
01863                     switch(c1){
01864                     case 0x82:
01865                         if(c0 == 0x94) return 1;
01866                         break;
01867                     case 0x83:
01868                         if(c0 == 0xBB) return 1;
01869                         break;
01870                     }
01871                     break;
01872                 }
01873             }else{
01874                 switch(c2){
01875                 case 0xE2:
01876                     switch(c1){
01877                     case 0x80:
01878                         if(c0 == 0x95) return 1;
01879                         break;
01880                     case 0x88:
01881                         if(c0 == 0xA5) return 1;
01882                         break;
01883                     }
01884                     break;
01885                 case 0xEF:
01886                     switch(c1){
01887                     case 0xBC:
01888                         if(c0 == 0x8D) return 1;
01889                         break;
01890                     case 0xBD:
01891                         if(c0 == 0x9E && !cp932inv_f) return 1;
01892                         break;
01893                     case 0xBF:
01894                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
01895                         break;
01896                     }
01897                     break;
01898                 }
01899             }
01900         }
01901         ppp =
01902             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
01903             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
01904             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
01905             utf8_to_euc_3bytes;
01906         ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
01907     }else return -1;
01908 #ifdef SHIFTJIS_CP932
01909     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
01910         nkf_char s2, s1;
01911         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
01912             s2e_conv(s2, s1, p2, p1);
01913         }else{
01914             ret = 1;
01915         }
01916     }
01917 #endif
01918     return ret;
01919 }
01920 
01921 #ifdef UTF8_OUTPUT_ENABLE
01922 static nkf_char
01923 e2w_conv(nkf_char c2, nkf_char c1)
01924 {
01925     const unsigned short *p;
01926 
01927     if (c2 == JIS_X_0201_1976_K) {
01928         if (ms_ucs_map_f == UCS_MAP_CP10001) {
01929             switch (c1) {
01930             case 0x20:
01931                 return 0xA0;
01932             case 0x7D:
01933                 return 0xA9;
01934             }
01935         }
01936         p = euc_to_utf8_1byte;
01937 #ifdef X0212_ENABLE
01938     } else if (is_eucg3(c2)){
01939         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
01940             return 0xA6;
01941         }
01942         c2 = (c2&0x7f) - 0x21;
01943         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01944             p = x0212_to_utf8_2bytes[c2];
01945         else
01946             return 0;
01947 #endif
01948     } else {
01949         c2 &= 0x7f;
01950         c2 = (c2&0x7f) - 0x21;
01951         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01952             p =
01953                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
01954                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
01955                 euc_to_utf8_2bytes_ms[c2];
01956         else
01957             return 0;
01958     }
01959     if (!p) return 0;
01960     c1 = (c1 & 0x7f) - 0x21;
01961     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
01962         return p[c1];
01963     return 0;
01964 }
01965 #endif
01966 
01967 static nkf_char
01968 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01969 {
01970     nkf_char ret = 0;
01971 
01972     if (!c1){
01973         *p2 = 0;
01974         *p1 = c2;
01975     }else if (0xc0 <= c2 && c2 <= 0xef) {
01976         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
01977 #ifdef NUMCHAR_OPTION
01978         if (ret > 0){
01979             if (p2) *p2 = 0;
01980             if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
01981             ret = 0;
01982         }
01983 #endif
01984     }
01985     return ret;
01986 }
01987 
01988 #ifdef UTF8_INPUT_ENABLE
01989 static nkf_char
01990 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
01991 {
01992     nkf_char c1, c2, c3, c4;
01993     nkf_char ret = 0;
01994     val &= VALUE_MASK;
01995     if (val < 0x80) {
01996         *p2 = 0;
01997         *p1 = val;
01998     }
01999     else if (nkf_char_unicode_bmp_p(val)){
02000         nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02001         ret =  unicode_to_jis_common(c1, c2, c3, p2, p1);
02002         if (ret > 0){
02003             *p2 = 0;
02004             *p1 = nkf_char_unicode_new(val);
02005             ret = 0;
02006         }
02007     }
02008     else {
02009         *p2 = 0;
02010         *p1 = nkf_char_unicode_new(val);
02011     }
02012     return ret;
02013 }
02014 #endif
02015 
02016 static nkf_char
02017 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02018 {
02019     if (c2 == JIS_X_0201_1976_K || c2 == SS2){
02020         if (iso2022jp_f && !x0201_f) {
02021             c2 = GETA1; c1 = GETA2;
02022         } else {
02023             c2 = JIS_X_0201_1976_K;
02024             c1 &= 0x7f;
02025         }
02026 #ifdef X0212_ENABLE
02027     }else if (c2 == 0x8f){
02028         if (c0 == 0){
02029             return -1;
02030         }
02031         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
02032             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
02033             c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
02034             c2 = 0;
02035         } else {
02036             c2 = (c2 << 8) | (c1 & 0x7f);
02037             c1 = c0 & 0x7f;
02038 #ifdef SHIFTJIS_CP932
02039             if (cp51932_f){
02040                 nkf_char s2, s1;
02041                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02042                     s2e_conv(s2, s1, &c2, &c1);
02043                     if (c2 < 0x100){
02044                         c1 &= 0x7f;
02045                         c2 &= 0x7f;
02046                     }
02047                 }
02048             }
02049 #endif /* SHIFTJIS_CP932 */
02050         }
02051 #endif /* X0212_ENABLE */
02052     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
02053         /* NOP */
02054     } else {
02055         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
02056             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
02057             c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
02058             c2 = 0;
02059         } else {
02060             c1 &= 0x7f;
02061             c2 &= 0x7f;
02062 #ifdef SHIFTJIS_CP932
02063             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
02064                 nkf_char s2, s1;
02065                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02066                     s2e_conv(s2, s1, &c2, &c1);
02067                     if (c2 < 0x100){
02068                         c1 &= 0x7f;
02069                         c2 &= 0x7f;
02070                     }
02071                 }
02072             }
02073 #endif /* SHIFTJIS_CP932 */
02074         }
02075     }
02076     (*oconv)(c2, c1);
02077     return 0;
02078 }
02079 
02080 static nkf_char
02081 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02082 {
02083     if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
02084         if (iso2022jp_f && !x0201_f) {
02085             c2 = GETA1; c1 = GETA2;
02086         } else {
02087             c1 &= 0x7f;
02088         }
02089     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
02090         /* NOP */
02091     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
02092         /* CP932 UDC */
02093         if(c1 == 0x7F) return 0;
02094         c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
02095         c2 = 0;
02096     } else {
02097         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
02098         if (ret) return ret;
02099     }
02100     (*oconv)(c2, c1);
02101     return 0;
02102 }
02103 
02104 static nkf_char
02105 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
02106 {
02107     nkf_char ret = 0, c4 = 0;
02108     static const char w_iconv_utf8_1st_byte[] =
02109     { /* 0xC0 - 0xFF */
02110         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02111         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02112         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
02113         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
02114 
02115     if (c3 > 0xFF) {
02116         c4 = c3 & 0xFF;
02117         c3 >>= 8;
02118     }
02119 
02120     if (c1 < 0 || 0xff < c1) {
02121     }else if (c1 == 0) { /* 0 : 1 byte*/
02122         c3 = 0;
02123     } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
02124         return 0;
02125     } else{
02126         switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
02127         case 21:
02128             if (c2 < 0x80 || 0xBF < c2) return 0;
02129             break;
02130         case 30:
02131             if (c3 == 0) return -1;
02132             if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
02133                 return 0;
02134             break;
02135         case 31:
02136         case 33:
02137             if (c3 == 0) return -1;
02138             if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
02139                 return 0;
02140             break;
02141         case 32:
02142             if (c3 == 0) return -1;
02143             if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
02144                 return 0;
02145             break;
02146         case 40:
02147             if (c3 == 0) return -2;
02148             if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02149                 return 0;
02150             break;
02151         case 41:
02152             if (c3 == 0) return -2;
02153             if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02154                 return 0;
02155             break;
02156         case 42:
02157             if (c3 == 0) return -2;
02158             if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02159                 return 0;
02160             break;
02161         default:
02162             return 0;
02163             break;
02164         }
02165     }
02166     if (c1 == 0 || c1 == EOF){
02167     } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
02168         c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
02169         c1 = 0;
02170     } else {
02171         ret = w2e_conv(c1, c2, c3, &c1, &c2);
02172     }
02173     if (ret == 0){
02174         (*oconv)(c1, c2);
02175     }
02176     return ret;
02177 }
02178 
02179 #define NKF_ICONV_INVALID_CODE_RANGE -13
02180 static size_t
02181 unicode_iconv(nkf_char wc)
02182 {
02183     nkf_char c1, c2;
02184     int ret = 0;
02185 
02186     if (wc < 0x80) {
02187         c2 = 0;
02188         c1 = wc;
02189     }else if ((wc>>11) == 27) {
02190         /* unpaired surrogate */
02191         return NKF_ICONV_INVALID_CODE_RANGE;
02192     }else if (wc < 0xFFFF) {
02193         ret = w16e_conv(wc, &c2, &c1);
02194         if (ret) return ret;
02195     }else if (wc < 0x10FFFF) {
02196         c2 = 0;
02197         c1 = nkf_char_unicode_new(wc);
02198     } else {
02199         return NKF_ICONV_INVALID_CODE_RANGE;
02200     }
02201     (*oconv)(c2, c1);
02202     return 0;
02203 }
02204 
02205 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
02206 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
02207 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
02208 static size_t
02209 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02210 {
02211     nkf_char wc;
02212 
02213     if (c1 == EOF) {
02214         (*oconv)(EOF, 0);
02215         return 0;
02216     }
02217 
02218     if (input_endian == ENDIAN_BIG) {
02219         if (0xD8 <= c1 && c1 <= 0xDB) {
02220             if (0xDC <= c3 && c3 <= 0xDF) {
02221                 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
02222             } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02223         } else {
02224             wc = c1 << 8 | c2;
02225         }
02226     } else {
02227         if (0xD8 <= c2 && c2 <= 0xDB) {
02228             if (0xDC <= c4 && c4 <= 0xDF) {
02229                 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
02230             } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02231         } else {
02232             wc = c2 << 8 | c1;
02233         }
02234     }
02235 
02236     return (*unicode_iconv)(wc);
02237 }
02238 
02239 static nkf_char
02240 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
02241 {
02242     (*oconv)(c2, c1);
02243     return 16; /* different from w_iconv32 */
02244 }
02245 
02246 static nkf_char
02247 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
02248 {
02249     (*oconv)(c2, c1);
02250     return 32; /* different from w_iconv16 */
02251 }
02252 
02253 static size_t
02254 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02255 {
02256     nkf_char wc;
02257 
02258     if (c1 == EOF) {
02259         (*oconv)(EOF, 0);
02260         return 0;
02261     }
02262 
02263     switch(input_endian){
02264     case ENDIAN_BIG:
02265         wc = c2 << 16 | c3 << 8 | c4;
02266         break;
02267     case ENDIAN_LITTLE:
02268         wc = c3 << 16 | c2 << 8 | c1;
02269         break;
02270     case ENDIAN_2143:
02271         wc = c1 << 16 | c4 << 8 | c3;
02272         break;
02273     case ENDIAN_3412:
02274         wc = c4 << 16 | c1 << 8 | c2;
02275         break;
02276     default:
02277         return NKF_ICONV_INVALID_CODE_RANGE;
02278     }
02279 
02280     return (*unicode_iconv)(wc);
02281 }
02282 #endif
02283 
02284 #define output_ascii_escape_sequence(mode) do { \
02285             if (output_mode != ASCII && output_mode != ISO_8859_1) { \
02286                     (*o_putc)(ESC); \
02287                     (*o_putc)('('); \
02288                     (*o_putc)(ascii_intro); \
02289                     output_mode = mode; \
02290             } \
02291     } while (0)
02292 
02293 static void
02294 output_escape_sequence(int mode)
02295 {
02296     if (output_mode == mode)
02297         return;
02298     switch(mode) {
02299     case ISO_8859_1:
02300         (*o_putc)(ESC);
02301         (*o_putc)('.');
02302         (*o_putc)('A');
02303         break;
02304     case JIS_X_0201_1976_K:
02305         (*o_putc)(ESC);
02306         (*o_putc)('(');
02307         (*o_putc)('I');
02308         break;
02309     case JIS_X_0208:
02310         (*o_putc)(ESC);
02311         (*o_putc)('$');
02312         (*o_putc)(kanji_intro);
02313         break;
02314     case JIS_X_0212:
02315         (*o_putc)(ESC);
02316         (*o_putc)('$');
02317         (*o_putc)('(');
02318         (*o_putc)('D');
02319         break;
02320     case JIS_X_0213_1:
02321         (*o_putc)(ESC);
02322         (*o_putc)('$');
02323         (*o_putc)('(');
02324         (*o_putc)('Q');
02325         break;
02326     case JIS_X_0213_2:
02327         (*o_putc)(ESC);
02328         (*o_putc)('$');
02329         (*o_putc)('(');
02330         (*o_putc)('P');
02331         break;
02332     }
02333     output_mode = mode;
02334 }
02335 
02336 static void
02337 j_oconv(nkf_char c2, nkf_char c1)
02338 {
02339 #ifdef NUMCHAR_OPTION
02340     if (c2 == 0 && nkf_char_unicode_p(c1)){
02341         w16e_conv(c1, &c2, &c1);
02342         if (c2 == 0 && nkf_char_unicode_p(c1)){
02343             c2 = c1 & VALUE_MASK;
02344             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
02345                 /* CP5022x UDC */
02346                 c1 &= 0xFFF;
02347                 c2 = 0x7F + c1 / 94;
02348                 c1 = 0x21 + c1 % 94;
02349             } else {
02350                 if (encode_fallback) (*encode_fallback)(c1);
02351                 return;
02352             }
02353         }
02354     }
02355 #endif
02356     if (c2 == 0) {
02357         output_ascii_escape_sequence(ASCII);
02358         (*o_putc)(c1);
02359     }
02360     else if (c2 == EOF) {
02361         output_ascii_escape_sequence(ASCII);
02362         (*o_putc)(EOF);
02363     }
02364     else if (c2 == ISO_8859_1) {
02365         output_ascii_escape_sequence(ISO_8859_1);
02366         (*o_putc)(c1|0x80);
02367     }
02368     else if (c2 == JIS_X_0201_1976_K) {
02369         output_escape_sequence(JIS_X_0201_1976_K);
02370         (*o_putc)(c1);
02371 #ifdef X0212_ENABLE
02372     } else if (is_eucg3(c2)){
02373         output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
02374         (*o_putc)(c2 & 0x7f);
02375         (*o_putc)(c1);
02376 #endif
02377     } else {
02378         if(ms_ucs_map_f
02379            ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
02380            : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
02381         output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
02382         (*o_putc)(c2);
02383         (*o_putc)(c1);
02384     }
02385 }
02386 
02387 static void
02388 e_oconv(nkf_char c2, nkf_char c1)
02389 {
02390     if (c2 == 0 && nkf_char_unicode_p(c1)){
02391         w16e_conv(c1, &c2, &c1);
02392         if (c2 == 0 && nkf_char_unicode_p(c1)){
02393             c2 = c1 & VALUE_MASK;
02394             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
02395                 /* eucJP-ms UDC */
02396                 c1 &= 0xFFF;
02397                 c2 = c1 / 94;
02398                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
02399                 c1 = 0x21 + c1 % 94;
02400                 if (is_eucg3(c2)){
02401                     (*o_putc)(0x8f);
02402                     (*o_putc)((c2 & 0x7f) | 0x080);
02403                     (*o_putc)(c1 | 0x080);
02404                 }else{
02405                     (*o_putc)((c2 & 0x7f) | 0x080);
02406                     (*o_putc)(c1 | 0x080);
02407                 }
02408                 return;
02409             } else {
02410                 if (encode_fallback) (*encode_fallback)(c1);
02411                 return;
02412             }
02413         }
02414     }
02415 
02416     if (c2 == EOF) {
02417         (*o_putc)(EOF);
02418     } else if (c2 == 0) {
02419         output_mode = ASCII;
02420         (*o_putc)(c1);
02421     } else if (c2 == JIS_X_0201_1976_K) {
02422         output_mode = EUC_JP;
02423         (*o_putc)(SS2); (*o_putc)(c1|0x80);
02424     } else if (c2 == ISO_8859_1) {
02425         output_mode = ISO_8859_1;
02426         (*o_putc)(c1 | 0x080);
02427 #ifdef X0212_ENABLE
02428     } else if (is_eucg3(c2)){
02429         output_mode = EUC_JP;
02430 #ifdef SHIFTJIS_CP932
02431         if (!cp932inv_f){
02432             nkf_char s2, s1;
02433             if (e2s_conv(c2, c1, &s2, &s1) == 0){
02434                 s2e_conv(s2, s1, &c2, &c1);
02435             }
02436         }
02437 #endif
02438         if (c2 == 0) {
02439             output_mode = ASCII;
02440             (*o_putc)(c1);
02441         }else if (is_eucg3(c2)){
02442             if (x0212_f){
02443                 (*o_putc)(0x8f);
02444                 (*o_putc)((c2 & 0x7f) | 0x080);
02445                 (*o_putc)(c1 | 0x080);
02446             }
02447         }else{
02448             (*o_putc)((c2 & 0x7f) | 0x080);
02449             (*o_putc)(c1 | 0x080);
02450         }
02451 #endif
02452     } else {
02453         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
02454             set_iconv(FALSE, 0);
02455             return; /* too late to rescue this char */
02456         }
02457         output_mode = EUC_JP;
02458         (*o_putc)(c2 | 0x080);
02459         (*o_putc)(c1 | 0x080);
02460     }
02461 }
02462 
02463 static void
02464 s_oconv(nkf_char c2, nkf_char c1)
02465 {
02466 #ifdef NUMCHAR_OPTION
02467     if (c2 == 0 && nkf_char_unicode_p(c1)){
02468         w16e_conv(c1, &c2, &c1);
02469         if (c2 == 0 && nkf_char_unicode_p(c1)){
02470             c2 = c1 & VALUE_MASK;
02471             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
02472                 /* CP932 UDC */
02473                 c1 &= 0xFFF;
02474                 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
02475                 c1 = c1 % 188;
02476                 c1 += 0x40 + (c1 > 0x3e);
02477                 (*o_putc)(c2);
02478                 (*o_putc)(c1);
02479                 return;
02480             } else {
02481                 if(encode_fallback)(*encode_fallback)(c1);
02482                 return;
02483             }
02484         }
02485     }
02486 #endif
02487     if (c2 == EOF) {
02488         (*o_putc)(EOF);
02489         return;
02490     } else if (c2 == 0) {
02491         output_mode = ASCII;
02492         (*o_putc)(c1);
02493     } else if (c2 == JIS_X_0201_1976_K) {
02494         output_mode = SHIFT_JIS;
02495         (*o_putc)(c1|0x80);
02496     } else if (c2 == ISO_8859_1) {
02497         output_mode = ISO_8859_1;
02498         (*o_putc)(c1 | 0x080);
02499 #ifdef X0212_ENABLE
02500     } else if (is_eucg3(c2)){
02501         output_mode = SHIFT_JIS;
02502         if (e2s_conv(c2, c1, &c2, &c1) == 0){
02503             (*o_putc)(c2);
02504             (*o_putc)(c1);
02505         }
02506 #endif
02507     } else {
02508         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
02509             set_iconv(FALSE, 0);
02510             return; /* too late to rescue this char */
02511         }
02512         output_mode = SHIFT_JIS;
02513         e2s_conv(c2, c1, &c2, &c1);
02514 
02515 #ifdef SHIFTJIS_CP932
02516         if (cp932inv_f
02517             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
02518             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
02519             if (c){
02520                 c2 = c >> 8;
02521                 c1 = c & 0xff;
02522             }
02523         }
02524 #endif /* SHIFTJIS_CP932 */
02525 
02526         (*o_putc)(c2);
02527         if (prefix_table[(unsigned char)c1]){
02528             (*o_putc)(prefix_table[(unsigned char)c1]);
02529         }
02530         (*o_putc)(c1);
02531     }
02532 }
02533 
02534 #ifdef UTF8_OUTPUT_ENABLE
02535 static void
02536 w_oconv(nkf_char c2, nkf_char c1)
02537 {
02538     nkf_char c3, c4;
02539     nkf_char val;
02540 
02541     if (output_bom_f) {
02542         output_bom_f = FALSE;
02543         (*o_putc)('\357');
02544         (*o_putc)('\273');
02545         (*o_putc)('\277');
02546     }
02547 
02548     if (c2 == EOF) {
02549         (*o_putc)(EOF);
02550         return;
02551     }
02552 
02553     if (c2 == 0 && nkf_char_unicode_p(c1)){
02554         val = c1 & VALUE_MASK;
02555         nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02556         (*o_putc)(c1);
02557         if (c2) (*o_putc)(c2);
02558         if (c3) (*o_putc)(c3);
02559         if (c4) (*o_putc)(c4);
02560         return;
02561     }
02562 
02563     if (c2 == 0) {
02564         (*o_putc)(c1);
02565     } else {
02566         val = e2w_conv(c2, c1);
02567         if (val){
02568             nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02569             (*o_putc)(c1);
02570             if (c2) (*o_putc)(c2);
02571             if (c3) (*o_putc)(c3);
02572             if (c4) (*o_putc)(c4);
02573         }
02574     }
02575 }
02576 
02577 static void
02578 w_oconv16(nkf_char c2, nkf_char c1)
02579 {
02580     if (output_bom_f) {
02581         output_bom_f = FALSE;
02582         if (output_endian == ENDIAN_LITTLE){
02583             (*o_putc)(0xFF);
02584             (*o_putc)(0xFE);
02585         }else{
02586             (*o_putc)(0xFE);
02587             (*o_putc)(0xFF);
02588         }
02589     }
02590 
02591     if (c2 == EOF) {
02592         (*o_putc)(EOF);
02593         return;
02594     }
02595 
02596     if (c2 == 0 && nkf_char_unicode_p(c1)) {
02597         if (nkf_char_unicode_bmp_p(c1)) {
02598             c2 = (c1 >> 8) & 0xff;
02599             c1 &= 0xff;
02600         } else {
02601             c1 &= VALUE_MASK;
02602             if (c1 <= UNICODE_MAX) {
02603                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
02604                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
02605                 if (output_endian == ENDIAN_LITTLE){
02606                     (*o_putc)(c2 & 0xff);
02607                     (*o_putc)((c2 >> 8) & 0xff);
02608                     (*o_putc)(c1 & 0xff);
02609                     (*o_putc)((c1 >> 8) & 0xff);
02610                 }else{
02611                     (*o_putc)((c2 >> 8) & 0xff);
02612                     (*o_putc)(c2 & 0xff);
02613                     (*o_putc)((c1 >> 8) & 0xff);
02614                     (*o_putc)(c1 & 0xff);
02615                 }
02616             }
02617             return;
02618         }
02619     } else if (c2) {
02620         nkf_char val = e2w_conv(c2, c1);
02621         c2 = (val >> 8) & 0xff;
02622         c1 = val & 0xff;
02623         if (!val) return;
02624     }
02625 
02626     if (output_endian == ENDIAN_LITTLE){
02627         (*o_putc)(c1);
02628         (*o_putc)(c2);
02629     }else{
02630         (*o_putc)(c2);
02631         (*o_putc)(c1);
02632     }
02633 }
02634 
02635 static void
02636 w_oconv32(nkf_char c2, nkf_char c1)
02637 {
02638     if (output_bom_f) {
02639         output_bom_f = FALSE;
02640         if (output_endian == ENDIAN_LITTLE){
02641             (*o_putc)(0xFF);
02642             (*o_putc)(0xFE);
02643             (*o_putc)(0);
02644             (*o_putc)(0);
02645         }else{
02646             (*o_putc)(0);
02647             (*o_putc)(0);
02648             (*o_putc)(0xFE);
02649             (*o_putc)(0xFF);
02650         }
02651     }
02652 
02653     if (c2 == EOF) {
02654         (*o_putc)(EOF);
02655         return;
02656     }
02657 
02658     if (c2 == ISO_8859_1) {
02659         c1 |= 0x80;
02660     } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
02661         c1 &= VALUE_MASK;
02662     } else if (c2) {
02663         c1 = e2w_conv(c2, c1);
02664         if (!c1) return;
02665     }
02666     if (output_endian == ENDIAN_LITTLE){
02667         (*o_putc)( c1        & 0xFF);
02668         (*o_putc)((c1 >>  8) & 0xFF);
02669         (*o_putc)((c1 >> 16) & 0xFF);
02670         (*o_putc)(0);
02671     }else{
02672         (*o_putc)(0);
02673         (*o_putc)((c1 >> 16) & 0xFF);
02674         (*o_putc)((c1 >>  8) & 0xFF);
02675         (*o_putc)( c1        & 0xFF);
02676     }
02677 }
02678 #endif
02679 
02680 #define SCORE_L2       (1)                   /* Kanji Level 2 */
02681 #define SCORE_KANA     (SCORE_L2 << 1)       /* Halfwidth Katakana */
02682 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* MD Characters */
02683 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* IBM extended characters */
02684 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
02685 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* Undefined Characters */
02686 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME selected */
02687 #define SCORE_ERROR    (SCORE_iMIME << 1) /* Error */
02688 
02689 #define SCORE_INIT (SCORE_iMIME)
02690 
02691 static const nkf_char score_table_A0[] = {
02692     0, 0, 0, 0,
02693     0, 0, 0, 0,
02694     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
02695     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
02696 };
02697 
02698 static const nkf_char score_table_F0[] = {
02699     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
02700     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
02701     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
02702     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
02703 };
02704 
02705 static void
02706 set_code_score(struct input_code *ptr, nkf_char score)
02707 {
02708     if (ptr){
02709         ptr->score |= score;
02710     }
02711 }
02712 
02713 static void
02714 clr_code_score(struct input_code *ptr, nkf_char score)
02715 {
02716     if (ptr){
02717         ptr->score &= ~score;
02718     }
02719 }
02720 
02721 static void
02722 code_score(struct input_code *ptr)
02723 {
02724     nkf_char c2 = ptr->buf[0];
02725 #ifdef UTF8_OUTPUT_ENABLE
02726     nkf_char c1 = ptr->buf[1];
02727 #endif
02728     if (c2 < 0){
02729         set_code_score(ptr, SCORE_ERROR);
02730     }else if (c2 == SS2){
02731         set_code_score(ptr, SCORE_KANA);
02732     }else if (c2 == 0x8f){
02733         set_code_score(ptr, SCORE_X0212);
02734 #ifdef UTF8_OUTPUT_ENABLE
02735     }else if (!e2w_conv(c2, c1)){
02736         set_code_score(ptr, SCORE_NO_EXIST);
02737 #endif
02738     }else if ((c2 & 0x70) == 0x20){
02739         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
02740     }else if ((c2 & 0x70) == 0x70){
02741         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
02742     }else if ((c2 & 0x70) >= 0x50){
02743         set_code_score(ptr, SCORE_L2);
02744     }
02745 }
02746 
02747 static void
02748 status_disable(struct input_code *ptr)
02749 {
02750     ptr->stat = -1;
02751     ptr->buf[0] = -1;
02752     code_score(ptr);
02753     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
02754 }
02755 
02756 static void
02757 status_push_ch(struct input_code *ptr, nkf_char c)
02758 {
02759     ptr->buf[ptr->index++] = c;
02760 }
02761 
02762 static void
02763 status_clear(struct input_code *ptr)
02764 {
02765     ptr->stat = 0;
02766     ptr->index = 0;
02767 }
02768 
02769 static void
02770 status_reset(struct input_code *ptr)
02771 {
02772     status_clear(ptr);
02773     ptr->score = SCORE_INIT;
02774 }
02775 
02776 static void
02777 status_reinit(struct input_code *ptr)
02778 {
02779     status_reset(ptr);
02780     ptr->_file_stat = 0;
02781 }
02782 
02783 static void
02784 status_check(struct input_code *ptr, nkf_char c)
02785 {
02786     if (c <= DEL && estab_f){
02787         status_reset(ptr);
02788     }
02789 }
02790 
02791 static void
02792 s_status(struct input_code *ptr, nkf_char c)
02793 {
02794     switch(ptr->stat){
02795     case -1:
02796         status_check(ptr, c);
02797         break;
02798     case 0:
02799         if (c <= DEL){
02800             break;
02801         }else if (nkf_char_unicode_p(c)){
02802             break;
02803         }else if (0xa1 <= c && c <= 0xdf){
02804             status_push_ch(ptr, SS2);
02805             status_push_ch(ptr, c);
02806             code_score(ptr);
02807             status_clear(ptr);
02808         }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
02809             ptr->stat = 1;
02810             status_push_ch(ptr, c);
02811         }else if (0xed <= c && c <= 0xee){
02812             ptr->stat = 3;
02813             status_push_ch(ptr, c);
02814 #ifdef SHIFTJIS_CP932
02815         }else if (is_ibmext_in_sjis(c)){
02816             ptr->stat = 2;
02817             status_push_ch(ptr, c);
02818 #endif /* SHIFTJIS_CP932 */
02819 #ifdef X0212_ENABLE
02820         }else if (0xf0 <= c && c <= 0xfc){
02821             ptr->stat = 1;
02822             status_push_ch(ptr, c);
02823 #endif /* X0212_ENABLE */
02824         }else{
02825             status_disable(ptr);
02826         }
02827         break;
02828     case 1:
02829         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
02830             status_push_ch(ptr, c);
02831             s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
02832             code_score(ptr);
02833             status_clear(ptr);
02834         }else{
02835             status_disable(ptr);
02836         }
02837         break;
02838     case 2:
02839 #ifdef SHIFTJIS_CP932
02840         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
02841             status_push_ch(ptr, c);
02842             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
02843                 set_code_score(ptr, SCORE_CP932);
02844                 status_clear(ptr);
02845                 break;
02846             }
02847         }
02848 #endif /* SHIFTJIS_CP932 */
02849         status_disable(ptr);
02850         break;
02851     case 3:
02852         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
02853             status_push_ch(ptr, c);
02854             s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
02855             set_code_score(ptr, SCORE_CP932);
02856             status_clear(ptr);
02857         }else{
02858             status_disable(ptr);
02859         }
02860         break;
02861     }
02862 }
02863 
02864 static void
02865 e_status(struct input_code *ptr, nkf_char c)
02866 {
02867     switch (ptr->stat){
02868     case -1:
02869         status_check(ptr, c);
02870         break;
02871     case 0:
02872         if (c <= DEL){
02873             break;
02874         }else if (nkf_char_unicode_p(c)){
02875             break;
02876         }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
02877             ptr->stat = 1;
02878             status_push_ch(ptr, c);
02879 #ifdef X0212_ENABLE
02880         }else if (0x8f == c){
02881             ptr->stat = 2;
02882             status_push_ch(ptr, c);
02883 #endif /* X0212_ENABLE */
02884         }else{
02885             status_disable(ptr);
02886         }
02887         break;
02888     case 1:
02889         if (0xa1 <= c && c <= 0xfe){
02890             status_push_ch(ptr, c);
02891             code_score(ptr);
02892             status_clear(ptr);
02893         }else{
02894             status_disable(ptr);
02895         }
02896         break;
02897 #ifdef X0212_ENABLE
02898     case 2:
02899         if (0xa1 <= c && c <= 0xfe){
02900             ptr->stat = 1;
02901             status_push_ch(ptr, c);
02902         }else{
02903             status_disable(ptr);
02904         }
02905 #endif /* X0212_ENABLE */
02906     }
02907 }
02908 
02909 #ifdef UTF8_INPUT_ENABLE
02910 static void
02911 w_status(struct input_code *ptr, nkf_char c)
02912 {
02913     switch (ptr->stat){
02914     case -1:
02915         status_check(ptr, c);
02916         break;
02917     case 0:
02918         if (c <= DEL){
02919             break;
02920         }else if (nkf_char_unicode_p(c)){
02921             break;
02922         }else if (0xc0 <= c && c <= 0xdf){
02923             ptr->stat = 1;
02924             status_push_ch(ptr, c);
02925         }else if (0xe0 <= c && c <= 0xef){
02926             ptr->stat = 2;
02927             status_push_ch(ptr, c);
02928         }else if (0xf0 <= c && c <= 0xf4){
02929             ptr->stat = 3;
02930             status_push_ch(ptr, c);
02931         }else{
02932             status_disable(ptr);
02933         }
02934         break;
02935     case 1:
02936     case 2:
02937         if (0x80 <= c && c <= 0xbf){
02938             status_push_ch(ptr, c);
02939             if (ptr->index > ptr->stat){
02940                 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
02941                            && ptr->buf[2] == 0xbf);
02942                 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
02943                          &ptr->buf[0], &ptr->buf[1]);
02944                 if (!bom){
02945                     code_score(ptr);
02946                 }
02947                 status_clear(ptr);
02948             }
02949         }else{
02950             status_disable(ptr);
02951         }
02952         break;
02953     case 3:
02954         if (0x80 <= c && c <= 0xbf){
02955             if (ptr->index < ptr->stat){
02956                 status_push_ch(ptr, c);
02957             } else {
02958                 status_clear(ptr);
02959             }
02960         }else{
02961             status_disable(ptr);
02962         }
02963         break;
02964     }
02965 }
02966 #endif
02967 
02968 static void
02969 code_status(nkf_char c)
02970 {
02971     int action_flag = 1;
02972     struct input_code *result = 0;
02973     struct input_code *p = input_code_list;
02974     while (p->name){
02975         if (!p->status_func) {
02976             ++p;
02977             continue;
02978         }
02979         if (!p->status_func)
02980             continue;
02981         (p->status_func)(p, c);
02982         if (p->stat > 0){
02983             action_flag = 0;
02984         }else if(p->stat == 0){
02985             if (result){
02986                 action_flag = 0;
02987             }else{
02988                 result = p;
02989             }
02990         }
02991         ++p;
02992     }
02993 
02994     if (action_flag){
02995         if (result && !estab_f){
02996             set_iconv(TRUE, result->iconv_func);
02997         }else if (c <= DEL){
02998             struct input_code *ptr = input_code_list;
02999             while (ptr->name){
03000                 status_reset(ptr);
03001                 ++ptr;
03002             }
03003         }
03004     }
03005 }
03006 
03007 typedef struct {
03008     nkf_buf_t *std_gc_buf;
03009     nkf_char broken_state;
03010     nkf_buf_t *broken_buf;
03011     nkf_char mimeout_state;
03012     nkf_buf_t *nfc_buf;
03013 } nkf_state_t;
03014 
03015 static nkf_state_t *nkf_state = NULL;
03016 
03017 #define STD_GC_BUFSIZE (256)
03018 
03019 static void
03020 nkf_state_init(void)
03021 {
03022     if (nkf_state) {
03023         nkf_buf_clear(nkf_state->std_gc_buf);
03024         nkf_buf_clear(nkf_state->broken_buf);
03025         nkf_buf_clear(nkf_state->nfc_buf);
03026     }
03027     else {
03028         nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
03029         nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
03030         nkf_state->broken_buf = nkf_buf_new(3);
03031         nkf_state->nfc_buf = nkf_buf_new(9);
03032     }
03033     nkf_state->broken_state = 0;
03034     nkf_state->mimeout_state = 0;
03035 }
03036 
03037 #ifndef WIN32DLL
03038 static nkf_char
03039 std_getc(FILE *f)
03040 {
03041     if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
03042         return nkf_buf_pop(nkf_state->std_gc_buf);
03043     }
03044     return getc(f);
03045 }
03046 #endif /*WIN32DLL*/
03047 
03048 static nkf_char
03049 std_ungetc(nkf_char c, FILE *f)
03050 {
03051     nkf_buf_push(nkf_state->std_gc_buf, c);
03052     return c;
03053 }
03054 
03055 #ifndef WIN32DLL
03056 static void
03057 std_putc(nkf_char c)
03058 {
03059     if(c!=EOF)
03060         putchar(c);
03061 }
03062 #endif /*WIN32DLL*/
03063 
03064 static nkf_char   hold_buf[HOLD_SIZE*2];
03065 static int             hold_count = 0;
03066 static nkf_char
03067 push_hold_buf(nkf_char c2)
03068 {
03069     if (hold_count >= HOLD_SIZE*2)
03070         return (EOF);
03071     hold_buf[hold_count++] = c2;
03072     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
03073 }
03074 
03075 static int
03076 h_conv(FILE *f, nkf_char c1, nkf_char c2)
03077 {
03078     int ret;
03079     int hold_index;
03080     nkf_char c3, c4;
03081 
03086     hold_count = 0;
03087     push_hold_buf(c1);
03088     push_hold_buf(c2);
03089 
03090     while ((c2 = (*i_getc)(f)) != EOF) {
03091         if (c2 == ESC){
03092             (*i_ungetc)(c2,f);
03093             break;
03094         }
03095         code_status(c2);
03096         if (push_hold_buf(c2) == EOF || estab_f) {
03097             break;
03098         }
03099     }
03100 
03101     if (!estab_f) {
03102         struct input_code *p = input_code_list;
03103         struct input_code *result = p;
03104         if (c2 == EOF) {
03105             code_status(c2);
03106         }
03107         while (p->name) {
03108             if (p->status_func && p->score < result->score) {
03109                 result = p;
03110             }
03111             p++;
03112         }
03113         set_iconv(TRUE, result->iconv_func);
03114     }
03115 
03116 
03126     ret = c2;
03127     hold_index = 0;
03128     while (hold_index < hold_count){
03129         c1 = hold_buf[hold_index++];
03130         if (nkf_char_unicode_p(c1)) {
03131             (*oconv)(0, c1);
03132             continue;
03133         }
03134         else if (c1 <= DEL){
03135             (*iconv)(0, c1, 0);
03136             continue;
03137         }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
03138             (*iconv)(JIS_X_0201_1976_K, c1, 0);
03139             continue;
03140         }
03141         if (hold_index < hold_count){
03142             c2 = hold_buf[hold_index++];
03143         }else{
03144             c2 = (*i_getc)(f);
03145             if (c2 == EOF){
03146                 c4 = EOF;
03147                 break;
03148             }
03149             code_status(c2);
03150         }
03151         c3 = 0;
03152         switch ((*iconv)(c1, c2, 0)) {  /* can be EUC/SJIS/UTF-8 */
03153         case -2:
03154             /* 4 bytes UTF-8 */
03155             if (hold_index < hold_count){
03156                 c3 = hold_buf[hold_index++];
03157             } else if ((c3 = (*i_getc)(f)) == EOF) {
03158                 ret = EOF;
03159                 break;
03160             }
03161             code_status(c3);
03162             if (hold_index < hold_count){
03163                 c4 = hold_buf[hold_index++];
03164             } else if ((c4 = (*i_getc)(f)) == EOF) {
03165                 c3 = ret = EOF;
03166                 break;
03167             }
03168             code_status(c4);
03169             (*iconv)(c1, c2, (c3<<8)|c4);
03170             break;
03171         case -1:
03172             /* 3 bytes EUC or UTF-8 */
03173             if (hold_index < hold_count){
03174                 c3 = hold_buf[hold_index++];
03175             } else if ((c3 = (*i_getc)(f)) == EOF) {
03176                 ret = EOF;
03177                 break;
03178             } else {
03179                 code_status(c3);
03180             }
03181             (*iconv)(c1, c2, c3);
03182             break;
03183         }
03184         if (c3 == EOF) break;
03185     }
03186     return ret;
03187 }
03188 
03189 /*
03190  * Check and Ignore BOM
03191  */
03192 static void
03193 check_bom(FILE *f)
03194 {
03195     int c2;
03196     switch(c2 = (*i_getc)(f)){
03197     case 0x00:
03198         if((c2 = (*i_getc)(f)) == 0x00){
03199             if((c2 = (*i_getc)(f)) == 0xFE){
03200                 if((c2 = (*i_getc)(f)) == 0xFF){
03201                     if(!input_encoding){
03202                         set_iconv(TRUE, w_iconv32);
03203                     }
03204                     if (iconv == w_iconv32) {
03205                         input_endian = ENDIAN_BIG;
03206                         return;
03207                     }
03208                     (*i_ungetc)(0xFF,f);
03209                 }else (*i_ungetc)(c2,f);
03210                 (*i_ungetc)(0xFE,f);
03211             }else if(c2 == 0xFF){
03212                 if((c2 = (*i_getc)(f)) == 0xFE){
03213                     if(!input_encoding){
03214                         set_iconv(TRUE, w_iconv32);
03215                     }
03216                     if (iconv == w_iconv32) {
03217                         input_endian = ENDIAN_2143;
03218                         return;
03219                     }
03220                     (*i_ungetc)(0xFF,f);
03221                 }else (*i_ungetc)(c2,f);
03222                 (*i_ungetc)(0xFF,f);
03223             }else (*i_ungetc)(c2,f);
03224             (*i_ungetc)(0x00,f);
03225         }else (*i_ungetc)(c2,f);
03226         (*i_ungetc)(0x00,f);
03227         break;
03228     case 0xEF:
03229         if((c2 = (*i_getc)(f)) == 0xBB){
03230             if((c2 = (*i_getc)(f)) == 0xBF){
03231                 if(!input_encoding){
03232                     set_iconv(TRUE, w_iconv);
03233                 }
03234                 if (iconv == w_iconv) {
03235                     return;
03236                 }
03237                 (*i_ungetc)(0xBF,f);
03238             }else (*i_ungetc)(c2,f);
03239             (*i_ungetc)(0xBB,f);
03240         }else (*i_ungetc)(c2,f);
03241         (*i_ungetc)(0xEF,f);
03242         break;
03243     case 0xFE:
03244         if((c2 = (*i_getc)(f)) == 0xFF){
03245             if((c2 = (*i_getc)(f)) == 0x00){
03246                 if((c2 = (*i_getc)(f)) == 0x00){
03247                     if(!input_encoding){
03248                         set_iconv(TRUE, w_iconv32);
03249                     }
03250                     if (iconv == w_iconv32) {
03251                         input_endian = ENDIAN_3412;
03252                         return;
03253                     }
03254                     (*i_ungetc)(0x00,f);
03255                 }else (*i_ungetc)(c2,f);
03256                 (*i_ungetc)(0x00,f);
03257             }else (*i_ungetc)(c2,f);
03258             if(!input_encoding){
03259                 set_iconv(TRUE, w_iconv16);
03260             }
03261             if (iconv == w_iconv16) {
03262                 input_endian = ENDIAN_BIG;
03263                 return;
03264             }
03265             (*i_ungetc)(0xFF,f);
03266         }else (*i_ungetc)(c2,f);
03267         (*i_ungetc)(0xFE,f);
03268         break;
03269     case 0xFF:
03270         if((c2 = (*i_getc)(f)) == 0xFE){
03271             if((c2 = (*i_getc)(f)) == 0x00){
03272                 if((c2 = (*i_getc)(f)) == 0x00){
03273                     if(!input_encoding){
03274                         set_iconv(TRUE, w_iconv32);
03275                     }
03276                     if (iconv == w_iconv32) {
03277                         input_endian = ENDIAN_LITTLE;
03278                         return;
03279                     }
03280                     (*i_ungetc)(0x00,f);
03281                 }else (*i_ungetc)(c2,f);
03282                 (*i_ungetc)(0x00,f);
03283             }else (*i_ungetc)(c2,f);
03284             if(!input_encoding){
03285                 set_iconv(TRUE, w_iconv16);
03286             }
03287             if (iconv == w_iconv16) {
03288                 input_endian = ENDIAN_LITTLE;
03289                 return;
03290             }
03291             (*i_ungetc)(0xFE,f);
03292         }else (*i_ungetc)(c2,f);
03293         (*i_ungetc)(0xFF,f);
03294         break;
03295     default:
03296         (*i_ungetc)(c2,f);
03297         break;
03298     }
03299 }
03300 
03301 static nkf_char
03302 broken_getc(FILE *f)
03303 {
03304     nkf_char c, c1;
03305 
03306     if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
03307         return nkf_buf_pop(nkf_state->broken_buf);
03308     }
03309     c = (*i_bgetc)(f);
03310     if (c=='$' && nkf_state->broken_state != ESC
03311         && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
03312         c1= (*i_bgetc)(f);
03313         nkf_state->broken_state = 0;
03314         if (c1=='@'|| c1=='B') {
03315             nkf_buf_push(nkf_state->broken_buf, c1);
03316             nkf_buf_push(nkf_state->broken_buf, c);
03317             return ESC;
03318         } else {
03319             (*i_bungetc)(c1,f);
03320             return c;
03321         }
03322     } else if (c=='(' && nkf_state->broken_state != ESC
03323                && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
03324         c1= (*i_bgetc)(f);
03325         nkf_state->broken_state = 0;
03326         if (c1=='J'|| c1=='B') {
03327             nkf_buf_push(nkf_state->broken_buf, c1);
03328             nkf_buf_push(nkf_state->broken_buf, c);
03329             return ESC;
03330         } else {
03331             (*i_bungetc)(c1,f);
03332             return c;
03333         }
03334     } else {
03335         nkf_state->broken_state = c;
03336         return c;
03337     }
03338 }
03339 
03340 static nkf_char
03341 broken_ungetc(nkf_char c, FILE *f)
03342 {
03343     if (nkf_buf_length(nkf_state->broken_buf) < 2)
03344         nkf_buf_push(nkf_state->broken_buf, c);
03345     return c;
03346 }
03347 
03348 static void
03349 eol_conv(nkf_char c2, nkf_char c1)
03350 {
03351     if (guess_f && input_eol != EOF) {
03352         if (c2 == 0 && c1 == LF) {
03353             if (!input_eol) input_eol = prev_cr ? CRLF : LF;
03354             else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
03355         } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
03356         else if (!prev_cr);
03357         else if (!input_eol) input_eol = CR;
03358         else if (input_eol != CR) input_eol = EOF;
03359     }
03360     if (prev_cr || (c2 == 0 && c1 == LF)) {
03361         prev_cr = 0;
03362         if (eolmode_f != LF) (*o_eol_conv)(0, CR);
03363         if (eolmode_f != CR) (*o_eol_conv)(0, LF);
03364     }
03365     if (c2 == 0 && c1 == CR) prev_cr = CR;
03366     else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
03367 }
03368 
03369 static void
03370 put_newline(void (*func)(nkf_char))
03371 {
03372     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03373       case CRLF:
03374         (*func)(0x0D);
03375         (*func)(0x0A);
03376         break;
03377       case CR:
03378         (*func)(0x0D);
03379         break;
03380       case LF:
03381         (*func)(0x0A);
03382         break;
03383     }
03384 }
03385 
03386 static void
03387 oconv_newline(void (*func)(nkf_char, nkf_char))
03388 {
03389     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03390       case CRLF:
03391         (*func)(0, 0x0D);
03392         (*func)(0, 0x0A);
03393         break;
03394       case CR:
03395         (*func)(0, 0x0D);
03396         break;
03397       case LF:
03398         (*func)(0, 0x0A);
03399         break;
03400     }
03401 }
03402 
03403 /*
03404    Return value of fold_conv()
03405 
03406    LF  add newline  and output char
03407    CR  add newline  and output nothing
03408    SP  space
03409    0   skip
03410    1   (or else) normal output
03411 
03412    fold state in prev (previous character)
03413 
03414    >0x80 Japanese (X0208/X0201)
03415    <0x80 ASCII
03416    LF    new line
03417    SP    space
03418 
03419    This fold algorthm does not preserve heading space in a line.
03420    This is the main difference from fmt.
03421  */
03422 
03423 #define char_size(c2,c1) (c2?2:1)
03424 
03425 static void
03426 fold_conv(nkf_char c2, nkf_char c1)
03427 {
03428     nkf_char prev0;
03429     nkf_char fold_state;
03430 
03431     if (c1== CR && !fold_preserve_f) {
03432         fold_state=0;  /* ignore cr */
03433     }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
03434         f_prev = LF;
03435         fold_state=0;  /* ignore cr */
03436     } else if (c1== BS) {
03437         if (f_line>0) f_line--;
03438         fold_state =  1;
03439     } else if (c2==EOF && f_line != 0) {    /* close open last line */
03440         fold_state = LF;
03441     } else if ((c1==LF && !fold_preserve_f)
03442                || ((c1==CR||(c1==LF&&f_prev!=CR))
03443                    && fold_preserve_f)) {
03444         /* new line */
03445         if (fold_preserve_f) {
03446             f_prev = c1;
03447             f_line = 0;
03448             fold_state =  CR;
03449         } else if ((f_prev == c1 && !fold_preserve_f)
03450                    || (f_prev == LF && fold_preserve_f)
03451                   ) {        /* duplicate newline */
03452             if (f_line) {
03453                 f_line = 0;
03454                 fold_state =  LF;    /* output two newline */
03455             } else {
03456                 f_line = 0;
03457                 fold_state =  1;
03458             }
03459         } else  {
03460             if (f_prev&0x80) {     /* Japanese? */
03461                 f_prev = c1;
03462                 fold_state =  0;       /* ignore given single newline */
03463             } else if (f_prev==SP) {
03464                 fold_state =  0;
03465             } else {
03466                 f_prev = c1;
03467                 if (++f_line<=fold_len)
03468                     fold_state =  SP;
03469                 else {
03470                     f_line = 0;
03471                     fold_state =  CR;        /* fold and output nothing */
03472                 }
03473             }
03474         }
03475     } else if (c1=='\f') {
03476         f_prev = LF;
03477         f_line = 0;
03478         fold_state =  LF;            /* output newline and clear */
03479     } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
03480         /* X0208 kankaku or ascii space */
03481         if (f_prev == SP) {
03482             fold_state = 0;         /* remove duplicate spaces */
03483         } else {
03484             f_prev = SP;
03485             if (++f_line<=fold_len)
03486                 fold_state = SP;         /* output ASCII space only */
03487             else {
03488                 f_prev = SP; f_line = 0;
03489                 fold_state = CR;        /* fold and output nothing */
03490             }
03491         }
03492     } else {
03493         prev0 = f_prev; /* we still need this one... , but almost done */
03494         f_prev = c1;
03495         if (c2 || c2 == JIS_X_0201_1976_K)
03496             f_prev |= 0x80;  /* this is Japanese */
03497         f_line += char_size(c2,c1);
03498         if (f_line<=fold_len) {   /* normal case */
03499             fold_state = 1;
03500         } else {
03501             if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
03502                 f_line = char_size(c2,c1);
03503                 fold_state =  LF;       /* We can't wait, do fold now */
03504             } else if (c2 == JIS_X_0201_1976_K) {
03505                 /* simple kinsoku rules  return 1 means no folding  */
03506                 if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
03507                 else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
03508                 else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
03509                 else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
03510                 else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
03511                 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
03512                 else if (SP<=c1 && c1<=(0xdf&0x7f)) {      /* X0201 */
03513                     f_line = 1;
03514                     fold_state = LF;/* add one new f_line before this character */
03515                 } else {
03516                     f_line = 1;
03517                     fold_state = LF;/* add one new f_line before this character */
03518                 }
03519             } else if (c2==0) {
03520                 /* kinsoku point in ASCII */
03521                 if (  c1==')'||    /* { [ ( */
03522                     c1==']'||
03523                     c1=='}'||
03524                     c1=='.'||
03525                     c1==','||
03526                     c1=='!'||
03527                     c1=='?'||
03528                     c1=='/'||
03529                     c1==':'||
03530                     c1==';') {
03531                     fold_state = 1;
03532                     /* just after special */
03533                 } else if (!is_alnum(prev0)) {
03534                     f_line = char_size(c2,c1);
03535                     fold_state = LF;
03536                 } else if ((prev0==SP) ||   /* ignored new f_line */
03537                            (prev0==LF)||        /* ignored new f_line */
03538                            (prev0&0x80)) {        /* X0208 - ASCII */
03539                     f_line = char_size(c2,c1);
03540                     fold_state = LF;/* add one new f_line before this character */
03541                 } else {
03542                     fold_state = 1;  /* default no fold in ASCII */
03543                 }
03544             } else {
03545                 if (c2=='!') {
03546                     if (c1=='"')  fold_state = 1; /* $B!"(B */
03547                     else if (c1=='#')  fold_state = 1; /* $B!#(B */
03548                     else if (c1=='W')  fold_state = 1; /* $B!W(B */
03549                     else if (c1=='K')  fold_state = 1; /* $B!K(B */
03550                     else if (c1=='$')  fold_state = 1; /* $B!$(B */
03551                     else if (c1=='%')  fold_state = 1; /* $B!%(B */
03552                     else if (c1=='\'') fold_state = 1; /* $B!\(B */
03553                     else if (c1=='(')  fold_state = 1; /* $B!((B */
03554                     else if (c1==')')  fold_state = 1; /* $B!)(B */
03555                     else if (c1=='*')  fold_state = 1; /* $B!*(B */
03556                     else if (c1=='+')  fold_state = 1; /* $B!+(B */
03557                     else if (c1==',')  fold_state = 1; /* $B!,(B */
03558                     /* default no fold in kinsoku */
03559                     else {
03560                         fold_state = LF;
03561                         f_line = char_size(c2,c1);
03562                         /* add one new f_line before this character */
03563                     }
03564                 } else {
03565                     f_line = char_size(c2,c1);
03566                     fold_state = LF;
03567                     /* add one new f_line before this character */
03568                 }
03569             }
03570         }
03571     }
03572     /* terminator process */
03573     switch(fold_state) {
03574     case LF:
03575         oconv_newline(o_fconv);
03576         (*o_fconv)(c2,c1);
03577         break;
03578     case 0:
03579         return;
03580     case CR:
03581         oconv_newline(o_fconv);
03582         break;
03583     case TAB:
03584     case SP:
03585         (*o_fconv)(0,SP);
03586         break;
03587     default:
03588         (*o_fconv)(c2,c1);
03589     }
03590 }
03591 
03592 static nkf_char z_prev2=0,z_prev1=0;
03593 
03594 static void
03595 z_conv(nkf_char c2, nkf_char c1)
03596 {
03597 
03598     /* if (c2) c1 &= 0x7f; assertion */
03599 
03600     if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
03601         (*o_zconv)(c2,c1);
03602         return;
03603     }
03604 
03605     if (x0201_f) {
03606         if (z_prev2 == JIS_X_0201_1976_K) {
03607             if (c2 == JIS_X_0201_1976_K) {
03608                 if (c1 == (0xde&0x7f)) { /* $BByE@(B */
03609                     z_prev2 = 0;
03610                     (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
03611                     return;
03612                 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
03613                     z_prev2 = 0;
03614                     (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
03615                     return;
03616                 }
03617             }
03618             z_prev2 = 0;
03619             (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
03620         }
03621         if (c2 == JIS_X_0201_1976_K) {
03622             if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
03623                 /* wait for $BByE@(B or $BH>ByE@(B */
03624                 z_prev1 = c1;
03625                 z_prev2 = c2;
03626                 return;
03627             } else {
03628                 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
03629                 return;
03630             }
03631         }
03632     }
03633 
03634     if (c2 == EOF) {
03635         (*o_zconv)(c2, c1);
03636         return;
03637     }
03638 
03639     if (alpha_f&1 && c2 == 0x23) {
03640         /* JISX0208 Alphabet */
03641         c2 = 0;
03642     } else if (c2 == 0x21) {
03643         /* JISX0208 Kigou */
03644         if (0x21==c1) {
03645             if (alpha_f&2) {
03646                 c2 = 0;
03647                 c1 = SP;
03648             } else if (alpha_f&4) {
03649                 (*o_zconv)(0, SP);
03650                 (*o_zconv)(0, SP);
03651                 return;
03652             }
03653         } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
03654             c2 =  0;
03655             c1 = fv[c1-0x20];
03656         }
03657     }
03658 
03659     if (alpha_f&8 && c2 == 0) {
03660         /* HTML Entity */
03661         const char *entity = 0;
03662         switch (c1){
03663         case '>': entity = "&gt;"; break;
03664         case '<': entity = "&lt;"; break;
03665         case '\"': entity = "&quot;"; break;
03666         case '&': entity = "&amp;"; break;
03667         }
03668         if (entity){
03669             while (*entity) (*o_zconv)(0, *entity++);
03670             return;
03671         }
03672     }
03673 
03674     if (alpha_f & 16) {
03675         /* JIS X 0208 Katakana to JIS X 0201 Katakana */
03676         if (c2 == 0x21) {
03677             nkf_char c = 0;
03678             switch (c1) {
03679             case 0x23:
03680                 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
03681                 c = 0xA1;
03682                 break;
03683             case 0x56:
03684                 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
03685                 c = 0xA2;
03686                 break;
03687             case 0x57:
03688                 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
03689                 c = 0xA3;
03690                 break;
03691             case 0x22:
03692                 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
03693                 c = 0xA4;
03694                 break;
03695             case 0x26:
03696                 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
03697                 c = 0xA5;
03698                 break;
03699             case 0x3C:
03700                 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
03701                 c = 0xB0;
03702                 break;
03703             case 0x2B:
03704                 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
03705                 c = 0xDE;
03706                 break;
03707             case 0x2C:
03708                 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
03709                 c = 0xDF;
03710                 break;
03711             }
03712             if (c) {
03713                 (*o_zconv)(JIS_X_0201_1976_K, c);
03714                 return;
03715             }
03716         } else if (c2 == 0x25) {
03717             /* JISX0208 Katakana */
03718             static const int fullwidth_to_halfwidth[] =
03719             {
03720                 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
03721                 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
03722                 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
03723                 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
03724                 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
03725                 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
03726                 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
03727                 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
03728                 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
03729                 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
03730                 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
03731                 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
03732             };
03733             if (fullwidth_to_halfwidth[c1-0x20]){
03734                 c2 = fullwidth_to_halfwidth[c1-0x20];
03735                 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
03736                 if (c2 & 0xFF) {
03737                     (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
03738                 }
03739                 return;
03740             }
03741         }
03742     }
03743     (*o_zconv)(c2,c1);
03744 }
03745 
03746 
03747 #define rot13(c)  ( \
03748                    ( c < 'A') ? c: \
03749                    (c <= 'M')  ? (c + 13): \
03750                    (c <= 'Z')  ? (c - 13): \
03751                    (c < 'a')   ? (c): \
03752                    (c <= 'm')  ? (c + 13): \
03753                    (c <= 'z')  ? (c - 13): \
03754                    (c) \
03755                   )
03756 
03757 #define  rot47(c) ( \
03758                    ( c < '!') ? c: \
03759                    ( c <= 'O') ? (c + 47) : \
03760                    ( c <= '~') ?  (c - 47) : \
03761                    c \
03762                   )
03763 
03764 static void
03765 rot_conv(nkf_char c2, nkf_char c1)
03766 {
03767     if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
03768         c1 = rot13(c1);
03769     } else if (c2) {
03770         c1 = rot47(c1);
03771         c2 = rot47(c2);
03772     }
03773     (*o_rot_conv)(c2,c1);
03774 }
03775 
03776 static void
03777 hira_conv(nkf_char c2, nkf_char c1)
03778 {
03779     if (hira_f & 1) {
03780         if (c2 == 0x25) {
03781             if (0x20 < c1 && c1 < 0x74) {
03782                 c2 = 0x24;
03783                 (*o_hira_conv)(c2,c1);
03784                 return;
03785             } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
03786                 c2 = 0;
03787                 c1 = nkf_char_unicode_new(0x3094);
03788                 (*o_hira_conv)(c2,c1);
03789                 return;
03790             }
03791         } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
03792             c1 += 2;
03793             (*o_hira_conv)(c2,c1);
03794             return;
03795         }
03796     }
03797     if (hira_f & 2) {
03798         if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
03799             c2 = 0x25;
03800             c1 = 0x74;
03801         } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
03802             c2 = 0x25;
03803         } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
03804             c1 -= 2;
03805         }
03806     }
03807     (*o_hira_conv)(c2,c1);
03808 }
03809 
03810 
03811 static void
03812 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
03813 {
03814 #define RANGE_NUM_MAX 18
03815     static const nkf_char range[RANGE_NUM_MAX][2] = {
03816         {0x222f, 0x2239,},
03817         {0x2242, 0x2249,},
03818         {0x2251, 0x225b,},
03819         {0x226b, 0x2271,},
03820         {0x227a, 0x227d,},
03821         {0x2321, 0x232f,},
03822         {0x233a, 0x2340,},
03823         {0x235b, 0x2360,},
03824         {0x237b, 0x237e,},
03825         {0x2474, 0x247e,},
03826         {0x2577, 0x257e,},
03827         {0x2639, 0x2640,},
03828         {0x2659, 0x267e,},
03829         {0x2742, 0x2750,},
03830         {0x2772, 0x277e,},
03831         {0x2841, 0x287e,},
03832         {0x4f54, 0x4f7e,},
03833         {0x7425, 0x747e},
03834     };
03835     nkf_char i;
03836     nkf_char start, end, c;
03837 
03838     if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
03839         c2 = GETA1;
03840         c1 = GETA2;
03841     }
03842     if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
03843         c2 = GETA1;
03844         c1 = GETA2;
03845     }
03846 
03847     for (i = 0; i < RANGE_NUM_MAX; i++) {
03848         start = range[i][0];
03849         end   = range[i][1];
03850         c     = (c2 << 8) + c1;
03851         if (c >= start && c <= end) {
03852             c2 = GETA1;
03853             c1 = GETA2;
03854         }
03855     }
03856     (*o_iso2022jp_check_conv)(c2,c1);
03857 }
03858 
03859 
03860 /* This converts  =?ISO-2022-JP?B?HOGE HOGE?= */
03861 
03862 static const unsigned char *mime_pattern[] = {
03863     (const unsigned char *)"\075?EUC-JP?B?",
03864     (const unsigned char *)"\075?SHIFT_JIS?B?",
03865     (const unsigned char *)"\075?ISO-8859-1?Q?",
03866     (const unsigned char *)"\075?ISO-8859-1?B?",
03867     (const unsigned char *)"\075?ISO-2022-JP?B?",
03868     (const unsigned char *)"\075?ISO-2022-JP?B?",
03869     (const unsigned char *)"\075?ISO-2022-JP?Q?",
03870 #if defined(UTF8_INPUT_ENABLE)
03871     (const unsigned char *)"\075?UTF-8?B?",
03872     (const unsigned char *)"\075?UTF-8?Q?",
03873 #endif
03874     (const unsigned char *)"\075?US-ASCII?Q?",
03875     NULL
03876 };
03877 
03878 
03879 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
03880 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
03881     e_iconv, s_iconv, 0, 0, 0, 0,
03882 #if defined(UTF8_INPUT_ENABLE)
03883     w_iconv, w_iconv,
03884 #endif
03885     0,
03886 };
03887 
03888 static const nkf_char mime_encode[] = {
03889     EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
03890 #if defined(UTF8_INPUT_ENABLE)
03891     UTF_8, UTF_8,
03892 #endif
03893     ASCII,
03894     0
03895 };
03896 
03897 static const nkf_char mime_encode_method[] = {
03898     'B', 'B','Q', 'B', 'B', 'B', 'Q',
03899 #if defined(UTF8_INPUT_ENABLE)
03900     'B', 'Q',
03901 #endif
03902     'Q',
03903     0
03904 };
03905 
03906 
03907 /* MIME preprocessor fifo */
03908 
03909 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
03910 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
03911 #define mime_input_buf(n)        mime_input_state.buf[(n)&MIME_BUF_MASK]
03912 static struct {
03913     unsigned char buf[MIME_BUF_SIZE];
03914     unsigned int  top;
03915     unsigned int  last;  /* decoded */
03916     unsigned int  input; /* undecoded */
03917 } mime_input_state;
03918 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
03919 
03920 #define MAXRECOVER 20
03921 
03922 static void
03923 mime_input_buf_unshift(nkf_char c)
03924 {
03925     mime_input_buf(--mime_input_state.top) = (unsigned char)c;
03926 }
03927 
03928 static nkf_char
03929 mime_ungetc(nkf_char c, FILE *f)
03930 {
03931     mime_input_buf_unshift(c);
03932     return c;
03933 }
03934 
03935 static nkf_char
03936 mime_ungetc_buf(nkf_char c, FILE *f)
03937 {
03938     if (mimebuf_f)
03939         (*i_mungetc_buf)(c,f);
03940     else
03941         mime_input_buf(--mime_input_state.input) = (unsigned char)c;
03942     return c;
03943 }
03944 
03945 static nkf_char
03946 mime_getc_buf(FILE *f)
03947 {
03948     /* we don't keep eof of mime_input_buf, becase it contains ?= as
03949        a terminator. It was checked in mime_integrity. */
03950     return ((mimebuf_f)?
03951             (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
03952 }
03953 
03954 static void
03955 switch_mime_getc(void)
03956 {
03957     if (i_getc!=mime_getc) {
03958         i_mgetc = i_getc; i_getc = mime_getc;
03959         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
03960         if(mime_f==STRICT_MIME) {
03961             i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
03962             i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
03963         }
03964     }
03965 }
03966 
03967 static void
03968 unswitch_mime_getc(void)
03969 {
03970     if(mime_f==STRICT_MIME) {
03971         i_mgetc = i_mgetc_buf;
03972         i_mungetc = i_mungetc_buf;
03973     }
03974     i_getc = i_mgetc;
03975     i_ungetc = i_mungetc;
03976     if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
03977     mime_iconv_back = NULL;
03978 }
03979 
03980 static nkf_char
03981 mime_integrity(FILE *f, const unsigned char *p)
03982 {
03983     nkf_char c,d;
03984     unsigned int q;
03985     /* In buffered mode, read until =? or NL or buffer full
03986      */
03987     mime_input_state.input = mime_input_state.top;
03988     mime_input_state.last = mime_input_state.top;
03989 
03990     while(*p) mime_input_buf(mime_input_state.input++) = *p++;
03991     d = 0;
03992     q = mime_input_state.input;
03993     while((c=(*i_getc)(f))!=EOF) {
03994         if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
03995             break;   /* buffer full */
03996         }
03997         if (c=='=' && d=='?') {
03998             /* checked. skip header, start decode */
03999             mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04000             /* mime_last_input = mime_input_state.input; */
04001             mime_input_state.input = q;
04002             switch_mime_getc();
04003             return 1;
04004         }
04005         if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
04006             break;
04007         /* Should we check length mod 4? */
04008         mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04009         d=c;
04010     }
04011     /* In case of Incomplete MIME, no MIME decode  */
04012     mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04013     mime_input_state.last = mime_input_state.input;     /* point undecoded buffer */
04014     mime_decode_mode = 1;              /* no decode on mime_input_buf last in mime_getc */
04015     switch_mime_getc();         /* anyway we need buffered getc */
04016     return 1;
04017 }
04018 
04019 static nkf_char
04020 mime_begin_strict(FILE *f)
04021 {
04022     nkf_char c1 = 0;
04023     int i,j,k;
04024     const unsigned char *p,*q;
04025     nkf_char r[MAXRECOVER];    /* recovery buffer, max mime pattern length */
04026 
04027     mime_decode_mode = FALSE;
04028     /* =? has been checked */
04029     j = 0;
04030     p = mime_pattern[j];
04031     r[0]='='; r[1]='?';
04032 
04033     for(i=2;p[i]>SP;i++) {                   /* start at =? */
04034         if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
04035             /* pattern fails, try next one */
04036             q = p;
04037             while (mime_pattern[++j]) {
04038                 p = mime_pattern[j];
04039                 for(k=2;k<i;k++)              /* assume length(p) > i */
04040                     if (p[k]!=q[k]) break;
04041                 if (k==i && nkf_toupper(c1)==p[k]) break;
04042             }
04043             p = mime_pattern[j];
04044             if (p) continue;  /* found next one, continue */
04045             /* all fails, output from recovery buffer */
04046             (*i_ungetc)(c1,f);
04047             for(j=0;j<i;j++) {
04048                 (*oconv)(0,r[j]);
04049             }
04050             return c1;
04051         }
04052     }
04053     mime_decode_mode = p[i-2];
04054 
04055     mime_iconv_back = iconv;
04056     set_iconv(FALSE, mime_priority_func[j]);
04057     clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
04058 
04059     if (mime_decode_mode=='B') {
04060         mimebuf_f = unbuf_f;
04061         if (!unbuf_f) {
04062             /* do MIME integrity check */
04063             return mime_integrity(f,mime_pattern[j]);
04064         }
04065     }
04066     switch_mime_getc();
04067     mimebuf_f = TRUE;
04068     return c1;
04069 }
04070 
04071 static nkf_char
04072 mime_begin(FILE *f)
04073 {
04074     nkf_char c1;
04075     int i,k;
04076 
04077     /* In NONSTRICT mode, only =? is checked. In case of failure, we  */
04078     /* re-read and convert again from mime_buffer.  */
04079 
04080     /* =? has been checked */
04081     k = mime_input_state.last;
04082     mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
04083     for(i=2;i<MAXRECOVER;i++) {                   /* start at =? */
04084         /* We accept any character type even if it is breaked by new lines */
04085         c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04086         if (c1==LF||c1==SP||c1==CR||
04087             c1=='-'||c1=='_'||is_alnum(c1)) continue;
04088         if (c1=='=') {
04089             /* Failed. But this could be another MIME preemble */
04090             (*i_ungetc)(c1,f);
04091             mime_input_state.last--;
04092             break;
04093         }
04094         if (c1!='?') break;
04095         else {
04096             /* c1=='?' */
04097             c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04098             if (!(++i<MAXRECOVER) || c1==EOF) break;
04099             if (c1=='b'||c1=='B') {
04100                 mime_decode_mode = 'B';
04101             } else if (c1=='q'||c1=='Q') {
04102                 mime_decode_mode = 'Q';
04103             } else {
04104                 break;
04105             }
04106             c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04107             if (!(++i<MAXRECOVER) || c1==EOF) break;
04108             if (c1!='?') {
04109                 mime_decode_mode = FALSE;
04110             }
04111             break;
04112         }
04113     }
04114     switch_mime_getc();
04115     if (!mime_decode_mode) {
04116         /* false MIME premble, restart from mime_buffer */
04117         mime_decode_mode = 1;  /* no decode, but read from the mime_buffer */
04118         /* Since we are in MIME mode until buffer becomes empty,    */
04119         /* we never go into mime_begin again for a while.           */
04120         return c1;
04121     }
04122     /* discard mime preemble, and goto MIME mode */
04123     mime_input_state.last = k;
04124     /* do no MIME integrity check */
04125     return c1;   /* used only for checking EOF */
04126 }
04127 
04128 #ifdef CHECK_OPTION
04129 static void
04130 no_putc(nkf_char c)
04131 {
04132     ;
04133 }
04134 
04135 static void
04136 debug(const char *str)
04137 {
04138     if (debug_f){
04139         fprintf(stderr, "%s\n", str ? str : "NULL");
04140     }
04141 }
04142 #endif
04143 
04144 static void
04145 set_input_codename(const char *codename)
04146 {
04147     if (!input_codename) {
04148         input_codename = codename;
04149     } else if (strcmp(codename, input_codename) != 0) {
04150         input_codename = "";
04151     }
04152 }
04153 
04154 static const char*
04155 get_guessed_code(void)
04156 {
04157     if (input_codename && !*input_codename) {
04158         input_codename = "BINARY";
04159     } else {
04160         struct input_code *p = find_inputcode_byfunc(iconv);
04161         if (!input_codename) {
04162             input_codename = "ASCII";
04163         } else if (strcmp(input_codename, "Shift_JIS") == 0) {
04164             if (p->score & (SCORE_DEPEND|SCORE_CP932))
04165                 input_codename = "CP932";
04166         } else if (strcmp(input_codename, "EUC-JP") == 0) {
04167             if (p->score & (SCORE_X0212))
04168                 input_codename = "EUCJP-MS";
04169             else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04170                 input_codename = "CP51932";
04171         } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
04172             if (p->score & (SCORE_KANA))
04173                 input_codename = "CP50221";
04174             else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04175                 input_codename = "CP50220";
04176         }
04177     }
04178     return input_codename;
04179 }
04180 
04181 #if !defined(PERL_XS) && !defined(WIN32DLL)
04182 static void
04183 print_guessed_code(char *filename)
04184 {
04185     if (filename != NULL) printf("%s: ", filename);
04186     if (input_codename && !*input_codename) {
04187         printf("BINARY\n");
04188     } else {
04189         input_codename = get_guessed_code();
04190         if (guess_f == 1) {
04191             printf("%s\n", input_codename);
04192         } else {
04193             printf("%s%s\n",
04194                    input_codename,
04195                    input_eol == CR   ? " (CR)" :
04196                    input_eol == LF   ? " (LF)" :
04197                    input_eol == CRLF ? " (CRLF)" :
04198                    input_eol == EOF  ? " (MIXED NL)" :
04199                    "");
04200         }
04201     }
04202 }
04203 #endif /*WIN32DLL*/
04204 
04205 #ifdef INPUT_OPTION
04206 
04207 static nkf_char
04208 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
04209 {
04210     nkf_char c1, c2, c3;
04211     c1 = (*g)(f);
04212     if (c1 != ch){
04213         return c1;
04214     }
04215     c2 = (*g)(f);
04216     if (!nkf_isxdigit(c2)){
04217         (*u)(c2, f);
04218         return c1;
04219     }
04220     c3 = (*g)(f);
04221     if (!nkf_isxdigit(c3)){
04222         (*u)(c2, f);
04223         (*u)(c3, f);
04224         return c1;
04225     }
04226     return (hex2bin(c2) << 4) | hex2bin(c3);
04227 }
04228 
04229 static nkf_char
04230 cap_getc(FILE *f)
04231 {
04232     return hex_getc(':', f, i_cgetc, i_cungetc);
04233 }
04234 
04235 static nkf_char
04236 cap_ungetc(nkf_char c, FILE *f)
04237 {
04238     return (*i_cungetc)(c, f);
04239 }
04240 
04241 static nkf_char
04242 url_getc(FILE *f)
04243 {
04244     return hex_getc('%', f, i_ugetc, i_uungetc);
04245 }
04246 
04247 static nkf_char
04248 url_ungetc(nkf_char c, FILE *f)
04249 {
04250     return (*i_uungetc)(c, f);
04251 }
04252 #endif
04253 
04254 #ifdef NUMCHAR_OPTION
04255 static nkf_char
04256 numchar_getc(FILE *f)
04257 {
04258     nkf_char (*g)(FILE *) = i_ngetc;
04259     nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
04260     int i = 0, j;
04261     nkf_char buf[12];
04262     long c = -1;
04263 
04264     buf[i] = (*g)(f);
04265     if (buf[i] == '&'){
04266         buf[++i] = (*g)(f);
04267         if (buf[i] == '#'){
04268             c = 0;
04269             buf[++i] = (*g)(f);
04270             if (buf[i] == 'x' || buf[i] == 'X'){
04271                 for (j = 0; j < 7; j++){
04272                     buf[++i] = (*g)(f);
04273                     if (!nkf_isxdigit(buf[i])){
04274                         if (buf[i] != ';'){
04275                             c = -1;
04276                         }
04277                         break;
04278                     }
04279                     c <<= 4;
04280                     c |= hex2bin(buf[i]);
04281                 }
04282             }else{
04283                 for (j = 0; j < 8; j++){
04284                     if (j){
04285                         buf[++i] = (*g)(f);
04286                     }
04287                     if (!nkf_isdigit(buf[i])){
04288                         if (buf[i] != ';'){
04289                             c = -1;
04290                         }
04291                         break;
04292                     }
04293                     c *= 10;
04294                     c += hex2bin(buf[i]);
04295                 }
04296             }
04297         }
04298     }
04299     if (c != -1){
04300         return nkf_char_unicode_new(c);
04301     }
04302     while (i > 0){
04303         (*u)(buf[i], f);
04304         --i;
04305     }
04306     return buf[0];
04307 }
04308 
04309 static nkf_char
04310 numchar_ungetc(nkf_char c, FILE *f)
04311 {
04312     return (*i_nungetc)(c, f);
04313 }
04314 #endif
04315 
04316 #ifdef UNICODE_NORMALIZATION
04317 
04318 static nkf_char
04319 nfc_getc(FILE *f)
04320 {
04321     nkf_char (*g)(FILE *f) = i_nfc_getc;
04322     nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
04323     nkf_buf_t *buf = nkf_state->nfc_buf;
04324     const unsigned char *array;
04325     int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
04326     nkf_char c = (*g)(f);
04327 
04328     if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
04329 
04330     nkf_buf_push(buf, c);
04331     do {
04332         while (lower <= upper) {
04333             int mid = (lower+upper) / 2;
04334             int len;
04335             array = normalization_table[mid].nfd;
04336             for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
04337                 if (len >= nkf_buf_length(buf)) {
04338                     c = (*g)(f);
04339                     if (c == EOF) {
04340                         len = 0;
04341                         lower = 1, upper = 0;
04342                         break;
04343                     }
04344                     nkf_buf_push(buf, c);
04345                 }
04346                 if (array[len] != nkf_buf_at(buf, len)) {
04347                     if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
04348                     else  upper = mid - 1;
04349                     len = 0;
04350                     break;
04351                 }
04352             }
04353             if (len > 0) {
04354                 int i;
04355                 array = normalization_table[mid].nfc;
04356                 nkf_buf_clear(buf);
04357                 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
04358                     nkf_buf_push(buf, array[i]);
04359                 break;
04360             }
04361         }
04362     } while (lower <= upper);
04363 
04364     while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
04365     c = nkf_buf_pop(buf);
04366 
04367     return c;
04368 }
04369 
04370 static nkf_char
04371 nfc_ungetc(nkf_char c, FILE *f)
04372 {
04373     return (*i_nfc_ungetc)(c, f);
04374 }
04375 #endif /* UNICODE_NORMALIZATION */
04376 
04377 
04378 static nkf_char
04379 base64decode(nkf_char c)
04380 {
04381     int             i;
04382     if (c > '@') {
04383         if (c < '[') {
04384             i = c - 'A';                        /* A..Z 0-25 */
04385         } else if (c == '_') {
04386             i = '?'         /* 63 */ ;          /* _  63 */
04387         } else {
04388             i = c - 'G'     /* - 'a' + 26 */ ;  /* a..z 26-51 */
04389         }
04390     } else if (c > '/') {
04391         i = c - '0' + '4'   /* - '0' + 52 */ ;  /* 0..9 52-61 */
04392     } else if (c == '+' || c == '-') {
04393         i = '>'             /* 62 */ ;          /* + and -  62 */
04394     } else {
04395         i = '?'             /* 63 */ ;          /* / 63 */
04396     }
04397     return (i);
04398 }
04399 
04400 static nkf_char
04401 mime_getc(FILE *f)
04402 {
04403     nkf_char c1, c2, c3, c4, cc;
04404     nkf_char t1, t2, t3, t4, mode, exit_mode;
04405     nkf_char lwsp_count;
04406     char *lwsp_buf;
04407     char *lwsp_buf_new;
04408     nkf_char lwsp_size = 128;
04409 
04410     if (mime_input_state.top != mime_input_state.last) {  /* Something is in FIFO */
04411         return  mime_input_buf(mime_input_state.top++);
04412     }
04413     if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
04414         mime_decode_mode=FALSE;
04415         unswitch_mime_getc();
04416         return (*i_getc)(f);
04417     }
04418 
04419     if (mimebuf_f == FIXED_MIME)
04420         exit_mode = mime_decode_mode;
04421     else
04422         exit_mode = FALSE;
04423     if (mime_decode_mode == 'Q') {
04424         if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
04425       restart_mime_q:
04426         if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
04427         if (c1<=SP || DEL<=c1) {
04428             mime_decode_mode = exit_mode; /* prepare for quit */
04429             return c1;
04430         }
04431         if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
04432             return c1;
04433         }
04434 
04435         mime_decode_mode = exit_mode; /* prepare for quit */
04436         if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
04437         if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
04438             /* end Q encoding */
04439             input_mode = exit_mode;
04440             lwsp_count = 0;
04441             lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04442             while ((c1=(*i_getc)(f))!=EOF) {
04443                 switch (c1) {
04444                 case LF:
04445                 case CR:
04446                     if (c1==LF) {
04447                         if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04448                             i_ungetc(SP,f);
04449                             continue;
04450                         } else {
04451                             i_ungetc(c1,f);
04452                         }
04453                         c1 = LF;
04454                     } else {
04455                         if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
04456                             if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04457                                 i_ungetc(SP,f);
04458                                 continue;
04459                             } else {
04460                                 i_ungetc(c1,f);
04461                             }
04462                             i_ungetc(LF,f);
04463                         } else {
04464                             i_ungetc(c1,f);
04465                         }
04466                         c1 = CR;
04467                     }
04468                     break;
04469                 case SP:
04470                 case TAB:
04471                     lwsp_buf[lwsp_count] = (unsigned char)c1;
04472                     if (lwsp_count++>lwsp_size){
04473                         lwsp_size <<= 1;
04474                         lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04475                         lwsp_buf = lwsp_buf_new;
04476                     }
04477                     continue;
04478                 }
04479                 break;
04480             }
04481             if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04482                 i_ungetc(c1,f);
04483                 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04484                     i_ungetc(lwsp_buf[lwsp_count],f);
04485                 c1 = lwsp_buf[0];
04486             }
04487             nkf_xfree(lwsp_buf);
04488             return c1;
04489         }
04490         if (c1=='='&&c2<SP) { /* this is soft wrap */
04491             while((c1 =  (*i_mgetc)(f)) <=SP) {
04492                 if (c1 == EOF) return (EOF);
04493             }
04494             mime_decode_mode = 'Q'; /* still in MIME */
04495             goto restart_mime_q;
04496         }
04497         if (c1=='?') {
04498             mime_decode_mode = 'Q'; /* still in MIME */
04499             (*i_mungetc)(c2,f);
04500             return c1;
04501         }
04502         if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
04503         if (c2<=SP) return c2;
04504         mime_decode_mode = 'Q'; /* still in MIME */
04505         return ((hex2bin(c2)<<4) + hex2bin(c3));
04506     }
04507 
04508     if (mime_decode_mode != 'B') {
04509         mime_decode_mode = FALSE;
04510         return (*i_mgetc)(f);
04511     }
04512 
04513 
04514     /* Base64 encoding */
04515     /*
04516        MIME allows line break in the middle of
04517        Base64, but we are very pessimistic in decoding
04518        in unbuf mode because MIME encoded code may broken by
04519        less or editor's control sequence (such as ESC-[-K in unbuffered
04520        mode. ignore incomplete MIME.
04521      */
04522     mode = mime_decode_mode;
04523     mime_decode_mode = exit_mode;  /* prepare for quit */
04524 
04525     while ((c1 = (*i_mgetc)(f))<=SP) {
04526         if (c1==EOF)
04527             return (EOF);
04528     }
04529   mime_c2_retry:
04530     if ((c2 = (*i_mgetc)(f))<=SP) {
04531         if (c2==EOF)
04532             return (EOF);
04533         if (mime_f != STRICT_MIME) goto mime_c2_retry;
04534         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04535         return c2;
04536     }
04537     if ((c1 == '?') && (c2 == '=')) {
04538         input_mode = ASCII;
04539         lwsp_count = 0;
04540         lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04541         while ((c1=(*i_getc)(f))!=EOF) {
04542             switch (c1) {
04543             case LF:
04544             case CR:
04545                 if (c1==LF) {
04546                     if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04547                         i_ungetc(SP,f);
04548                         continue;
04549                     } else {
04550                         i_ungetc(c1,f);
04551                     }
04552                     c1 = LF;
04553                 } else {
04554                     if ((c1=(*i_getc)(f))!=EOF) {
04555                         if (c1==SP) {
04556                             i_ungetc(SP,f);
04557                             continue;
04558                         } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04559                             i_ungetc(SP,f);
04560                             continue;
04561                         } else {
04562                             i_ungetc(c1,f);
04563                         }
04564                         i_ungetc(LF,f);
04565                     } else {
04566                         i_ungetc(c1,f);
04567                     }
04568                     c1 = CR;
04569                 }
04570                 break;
04571             case SP:
04572             case TAB:
04573                 lwsp_buf[lwsp_count] = (unsigned char)c1;
04574                 if (lwsp_count++>lwsp_size){
04575                     lwsp_size <<= 1;
04576                     lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04577                     lwsp_buf = lwsp_buf_new;
04578                 }
04579                 continue;
04580             }
04581             break;
04582         }
04583         if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04584             i_ungetc(c1,f);
04585             for(lwsp_count--;lwsp_count>0;lwsp_count--)
04586                 i_ungetc(lwsp_buf[lwsp_count],f);
04587             c1 = lwsp_buf[0];
04588         }
04589         nkf_xfree(lwsp_buf);
04590         return c1;
04591     }
04592   mime_c3_retry:
04593     if ((c3 = (*i_mgetc)(f))<=SP) {
04594         if (c3==EOF)
04595             return (EOF);
04596         if (mime_f != STRICT_MIME) goto mime_c3_retry;
04597         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04598         return c3;
04599     }
04600   mime_c4_retry:
04601     if ((c4 = (*i_mgetc)(f))<=SP) {
04602         if (c4==EOF)
04603             return (EOF);
04604         if (mime_f != STRICT_MIME) goto mime_c4_retry;
04605         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04606         return c4;
04607     }
04608 
04609     mime_decode_mode = mode; /* still in MIME sigh... */
04610 
04611     /* BASE 64 decoding */
04612 
04613     t1 = 0x3f & base64decode(c1);
04614     t2 = 0x3f & base64decode(c2);
04615     t3 = 0x3f & base64decode(c3);
04616     t4 = 0x3f & base64decode(c4);
04617     cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
04618     if (c2 != '=') {
04619         mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04620         cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
04621         if (c3 != '=') {
04622             mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04623             cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
04624             if (c4 != '=')
04625                 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04626         }
04627     } else {
04628         return c1;
04629     }
04630     return  mime_input_buf(mime_input_state.top++);
04631 }
04632 
04633 static const char basis_64[] =
04634     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
04635 
04636 #define MIMEOUT_BUF_LENGTH 74
04637 static struct {
04638     char buf[MIMEOUT_BUF_LENGTH+1];
04639     int count;
04640 } mimeout_state;
04641 
04642 /*nkf_char mime_lastchar2, mime_lastchar1;*/
04643 
04644 static void
04645 open_mime(nkf_char mode)
04646 {
04647     const unsigned char *p;
04648     int i;
04649     int j;
04650     p  = mime_pattern[0];
04651     for(i=0;mime_pattern[i];i++) {
04652         if (mode == mime_encode[i]) {
04653             p = mime_pattern[i];
04654             break;
04655         }
04656     }
04657     mimeout_mode = mime_encode_method[i];
04658     i = 0;
04659     if (base64_count>45) {
04660         if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
04661             (*o_mputc)(mimeout_state.buf[i]);
04662             i++;
04663         }
04664         put_newline(o_mputc);
04665         (*o_mputc)(SP);
04666         base64_count = 1;
04667         if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
04668             i++;
04669         }
04670     }
04671     for (;i<mimeout_state.count;i++) {
04672         if (nkf_isspace(mimeout_state.buf[i])) {
04673             (*o_mputc)(mimeout_state.buf[i]);
04674             base64_count ++;
04675         } else {
04676             break;
04677         }
04678     }
04679     while(*p) {
04680         (*o_mputc)(*p++);
04681         base64_count ++;
04682     }
04683     j = mimeout_state.count;
04684     mimeout_state.count = 0;
04685     for (;i<j;i++) {
04686         mime_putc(mimeout_state.buf[i]);
04687     }
04688 }
04689 
04690 static void
04691 mime_prechar(nkf_char c2, nkf_char c1)
04692 {
04693     if (mimeout_mode > 0){
04694         if (c2 == EOF){
04695             if (base64_count + mimeout_state.count/3*4> 73){
04696                 (*o_base64conv)(EOF,0);
04697                 oconv_newline(o_base64conv);
04698                 (*o_base64conv)(0,SP);
04699                 base64_count = 1;
04700             }
04701         } else {
04702             if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
04703                 (*o_base64conv)(EOF,0);
04704                 oconv_newline(o_base64conv);
04705                 (*o_base64conv)(0,SP);
04706                 base64_count = 1;
04707                 mimeout_mode = -1;
04708             }
04709         }
04710     } else if (c2) {
04711         if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
04712             mimeout_mode =  (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
04713             open_mime(output_mode);
04714             (*o_base64conv)(EOF,0);
04715             oconv_newline(o_base64conv);
04716             (*o_base64conv)(0,SP);
04717             base64_count = 1;
04718             mimeout_mode = -1;
04719         }
04720     }
04721 }
04722 
04723 static void
04724 close_mime(void)
04725 {
04726     (*o_mputc)('?');
04727     (*o_mputc)('=');
04728     base64_count += 2;
04729     mimeout_mode = 0;
04730 }
04731 
04732 static void
04733 eof_mime(void)
04734 {
04735     switch(mimeout_mode) {
04736     case 'Q':
04737     case 'B':
04738         break;
04739     case 2:
04740         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
04741         (*o_mputc)('=');
04742         (*o_mputc)('=');
04743         base64_count += 3;
04744         break;
04745     case 1:
04746         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
04747         (*o_mputc)('=');
04748         base64_count += 2;
04749         break;
04750     }
04751     if (mimeout_mode > 0) {
04752         if (mimeout_f!=FIXED_MIME) {
04753             close_mime();
04754         } else if (mimeout_mode != 'Q')
04755             mimeout_mode = 'B';
04756     }
04757 }
04758 
04759 static void
04760 mimeout_addchar(nkf_char c)
04761 {
04762     switch(mimeout_mode) {
04763     case 'Q':
04764         if (c==CR||c==LF) {
04765             (*o_mputc)(c);
04766             base64_count = 0;
04767         } else if(!nkf_isalnum(c)) {
04768             (*o_mputc)('=');
04769             (*o_mputc)(bin2hex(((c>>4)&0xf)));
04770             (*o_mputc)(bin2hex((c&0xf)));
04771             base64_count += 3;
04772         } else {
04773             (*o_mputc)(c);
04774             base64_count++;
04775         }
04776         break;
04777     case 'B':
04778         nkf_state->mimeout_state=c;
04779         (*o_mputc)(basis_64[c>>2]);
04780         mimeout_mode=2;
04781         base64_count ++;
04782         break;
04783     case 2:
04784         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
04785         nkf_state->mimeout_state=c;
04786         mimeout_mode=1;
04787         base64_count ++;
04788         break;
04789     case 1:
04790         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
04791         (*o_mputc)(basis_64[c & 0x3F]);
04792         mimeout_mode='B';
04793         base64_count += 2;
04794         break;
04795     default:
04796         (*o_mputc)(c);
04797         base64_count++;
04798         break;
04799     }
04800 }
04801 
04802 static void
04803 mime_putc(nkf_char c)
04804 {
04805     int i, j;
04806     nkf_char lastchar;
04807 
04808     if (mimeout_f == FIXED_MIME){
04809         if (mimeout_mode == 'Q'){
04810             if (base64_count > 71){
04811                 if (c!=CR && c!=LF) {
04812                     (*o_mputc)('=');
04813                     put_newline(o_mputc);
04814                 }
04815                 base64_count = 0;
04816             }
04817         }else{
04818             if (base64_count > 71){
04819                 eof_mime();
04820                 put_newline(o_mputc);
04821                 base64_count = 0;
04822             }
04823             if (c == EOF) { /* c==EOF */
04824                 eof_mime();
04825             }
04826         }
04827         if (c != EOF) { /* c==EOF */
04828             mimeout_addchar(c);
04829         }
04830         return;
04831     }
04832 
04833     /* mimeout_f != FIXED_MIME */
04834 
04835     if (c == EOF) { /* c==EOF */
04836         if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
04837         j = mimeout_state.count;
04838         mimeout_state.count = 0;
04839         i = 0;
04840         if (mimeout_mode > 0) {
04841             if (!nkf_isblank(mimeout_state.buf[j-1])) {
04842                 for (;i<j;i++) {
04843                     if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
04844                         break;
04845                     }
04846                     mimeout_addchar(mimeout_state.buf[i]);
04847                 }
04848                 eof_mime();
04849                 for (;i<j;i++) {
04850                     mimeout_addchar(mimeout_state.buf[i]);
04851                 }
04852             } else {
04853                 for (;i<j;i++) {
04854                     mimeout_addchar(mimeout_state.buf[i]);
04855                 }
04856                 eof_mime();
04857             }
04858         } else {
04859             for (;i<j;i++) {
04860                 mimeout_addchar(mimeout_state.buf[i]);
04861             }
04862         }
04863         return;
04864     }
04865 
04866     if (mimeout_state.count > 0){
04867         lastchar = mimeout_state.buf[mimeout_state.count - 1];
04868     }else{
04869         lastchar = -1;
04870     }
04871 
04872     if (mimeout_mode=='Q') {
04873         if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
04874             if (c == CR || c == LF) {
04875                 close_mime();
04876                 (*o_mputc)(c);
04877                 base64_count = 0;
04878                 return;
04879             } else if (c <= SP) {
04880                 close_mime();
04881                 if (base64_count > 70) {
04882                     put_newline(o_mputc);
04883                     base64_count = 0;
04884                 }
04885                 if (!nkf_isblank(c)) {
04886                     (*o_mputc)(SP);
04887                     base64_count++;
04888                 }
04889             } else {
04890                 if (base64_count > 70) {
04891                     close_mime();
04892                     put_newline(o_mputc);
04893                     (*o_mputc)(SP);
04894                     base64_count = 1;
04895                     open_mime(output_mode);
04896                 }
04897                 if (!nkf_noescape_mime(c)) {
04898                     mimeout_addchar(c);
04899                     return;
04900                 }
04901             }
04902             if (c != 0x1B) {
04903                 (*o_mputc)(c);
04904                 base64_count++;
04905                 return;
04906             }
04907         }
04908     }
04909 
04910     if (mimeout_mode <= 0) {
04911         if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
04912                     output_mode == UTF_8)) {
04913             if (nkf_isspace(c)) {
04914                 int flag = 0;
04915                 if (mimeout_mode == -1) {
04916                     flag = 1;
04917                 }
04918                 if (c==CR || c==LF) {
04919                     if (flag) {
04920                         open_mime(output_mode);
04921                         output_mode = 0;
04922                     } else {
04923                         base64_count = 0;
04924                     }
04925                 }
04926                 for (i=0;i<mimeout_state.count;i++) {
04927                     (*o_mputc)(mimeout_state.buf[i]);
04928                     if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
04929                         base64_count = 0;
04930                     }else{
04931                         base64_count++;
04932                     }
04933                 }
04934                 if (flag) {
04935                     eof_mime();
04936                     base64_count = 0;
04937                     mimeout_mode = 0;
04938                 }
04939                 mimeout_state.buf[0] = (char)c;
04940                 mimeout_state.count = 1;
04941             }else{
04942                 if (base64_count > 1
04943                     && base64_count + mimeout_state.count > 76
04944                     && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
04945                     static const char *str = "boundary=\"";
04946                     static int len = 10;
04947                     i = 0;
04948 
04949                     for (; i < mimeout_state.count - len; ++i) {
04950                         if (!strncmp(mimeout_state.buf+i, str, len)) {
04951                             i += len - 2;
04952                             break;
04953                         }
04954                     }
04955 
04956                     if (i == 0 || i == mimeout_state.count - len) {
04957                         put_newline(o_mputc);
04958                         base64_count = 0;
04959                         if (!nkf_isspace(mimeout_state.buf[0])){
04960                             (*o_mputc)(SP);
04961                             base64_count++;
04962                         }
04963                     }
04964                     else {
04965                         int j;
04966                         for (j = 0; j <= i; ++j) {
04967                             (*o_mputc)(mimeout_state.buf[j]);
04968                         }
04969                         put_newline(o_mputc);
04970                         base64_count = 1;
04971                         for (; j <= mimeout_state.count; ++j) {
04972                             mimeout_state.buf[j - i] = mimeout_state.buf[j];
04973                         }
04974                         mimeout_state.count -= i;
04975                     }
04976                 }
04977                 mimeout_state.buf[mimeout_state.count++] = (char)c;
04978                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
04979                     open_mime(output_mode);
04980                 }
04981             }
04982             return;
04983         }else{
04984             if (lastchar==CR || lastchar == LF){
04985                 for (i=0;i<mimeout_state.count;i++) {
04986                     (*o_mputc)(mimeout_state.buf[i]);
04987                 }
04988                 base64_count = 0;
04989                 mimeout_state.count = 0;
04990             }
04991             if (lastchar==SP) {
04992                 for (i=0;i<mimeout_state.count-1;i++) {
04993                     (*o_mputc)(mimeout_state.buf[i]);
04994                     base64_count++;
04995                 }
04996                 mimeout_state.buf[0] = SP;
04997                 mimeout_state.count = 1;
04998             }
04999             open_mime(output_mode);
05000         }
05001     }else{
05002         /* mimeout_mode == 'B', 1, 2 */
05003         if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05004                     output_mode == UTF_8)) {
05005             if (lastchar == CR || lastchar == LF){
05006                 if (nkf_isblank(c)) {
05007                     for (i=0;i<mimeout_state.count;i++) {
05008                         mimeout_addchar(mimeout_state.buf[i]);
05009                     }
05010                     mimeout_state.count = 0;
05011                 } else {
05012                     eof_mime();
05013                     for (i=0;i<mimeout_state.count;i++) {
05014                         (*o_mputc)(mimeout_state.buf[i]);
05015                     }
05016                     base64_count = 0;
05017                     mimeout_state.count = 0;
05018                 }
05019                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05020                 return;
05021             }
05022             if (nkf_isspace(c)) {
05023                 for (i=0;i<mimeout_state.count;i++) {
05024                     if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
05025                         eof_mime();
05026                         for (i=0;i<mimeout_state.count;i++) {
05027                             (*o_mputc)(mimeout_state.buf[i]);
05028                             base64_count++;
05029                         }
05030                         mimeout_state.count = 0;
05031                     }
05032                 }
05033                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05034                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05035                     eof_mime();
05036                     for (i=0;i<mimeout_state.count;i++) {
05037                         (*o_mputc)(mimeout_state.buf[i]);
05038                         base64_count++;
05039                     }
05040                     mimeout_state.count = 0;
05041                 }
05042                 return;
05043             }
05044             if (mimeout_state.count>0 && SP<c && c!='=') {
05045                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05046                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05047                     j = mimeout_state.count;
05048                     mimeout_state.count = 0;
05049                     for (i=0;i<j;i++) {
05050                         mimeout_addchar(mimeout_state.buf[i]);
05051                     }
05052                 }
05053                 return;
05054             }
05055         }
05056     }
05057     if (mimeout_state.count>0) {
05058         j = mimeout_state.count;
05059         mimeout_state.count = 0;
05060         for (i=0;i<j;i++) {
05061             if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
05062                 break;
05063             mimeout_addchar(mimeout_state.buf[i]);
05064         }
05065         if (i<j) {
05066             eof_mime();
05067             base64_count=0;
05068             for (;i<j;i++) {
05069                 (*o_mputc)(mimeout_state.buf[i]);
05070             }
05071             open_mime(output_mode);
05072         }
05073     }
05074     mimeout_addchar(c);
05075 }
05076 
05077 static void
05078 base64_conv(nkf_char c2, nkf_char c1)
05079 {
05080     mime_prechar(c2, c1);
05081     (*o_base64conv)(c2,c1);
05082 }
05083 
05084 #ifdef HAVE_ICONV_H
05085 typedef struct nkf_iconv_t {
05086     iconv_t cd;
05087     char *input_buffer;
05088     size_t input_buffer_size;
05089     char *output_buffer;
05090     size_t output_buffer_size;
05091 }
05092 
05093 static nkf_iconv_t
05094 nkf_iconv_new(char *tocode, char *fromcode)
05095 {
05096     nkf_iconv_t converter;
05097 
05098     converter->input_buffer_size = IOBUF_SIZE;
05099     converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
05100     converter->output_buffer_size = IOBUF_SIZE * 2;
05101     converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
05102     converter->cd = iconv_open(tocode, fromcode);
05103     if (converter->cd == (iconv_t)-1)
05104     {
05105         switch (errno) {
05106         case EINVAL:
05107             perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
05108             return -1;
05109         default:
05110             perror("can't iconv_open");
05111         }
05112     }
05113 }
05114 
05115 static size_t
05116 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
05117 {
05118     size_t invalid = (size_t)0;
05119     char *input_buffer = converter->input_buffer;
05120     size_t input_length = (size_t)0;
05121     char *output_buffer = converter->output_buffer;
05122     size_t output_length = converter->output_buffer_size;
05123     int c;
05124 
05125     do {
05126         if (c != EOF) {
05127             while ((c = (*i_getc)(f)) != EOF) {
05128                 input_buffer[input_length++] = c;
05129                 if (input_length < converter->input_buffer_size) break;
05130             }
05131         }
05132 
05133         size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
05134         while (output_length-- > 0) {
05135             (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
05136         }
05137         if (ret == (size_t) - 1) {
05138             switch (errno) {
05139             case EINVAL:
05140                 if (input_buffer != converter->input_buffer)
05141                     memmove(converter->input_buffer, input_buffer, input_length);
05142                 break;
05143             case E2BIG:
05144                 converter->output_buffer_size *= 2;
05145                 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
05146                 if (output_buffer == NULL) {
05147                     perror("can't realloc");
05148                     return -1;
05149                 }
05150                 converter->output_buffer = output_buffer;
05151                 break;
05152             default:
05153                 perror("can't iconv");
05154                 return -1;
05155             }
05156         } else {
05157             invalid += ret;
05158         }
05159     } while (1);
05160 
05161     return invalid;
05162 }
05163 
05164 
05165 static void
05166 nkf_iconv_close(nkf_iconv_t *convert)
05167 {
05168     nkf_xfree(converter->inbuf);
05169     nkf_xfree(converter->outbuf);
05170     iconv_close(converter->cd);
05171 }
05172 #endif
05173 
05174 
05175 static void
05176 reinit(void)
05177 {
05178     {
05179         struct input_code *p = input_code_list;
05180         while (p->name){
05181             status_reinit(p++);
05182         }
05183     }
05184     unbuf_f = FALSE;
05185     estab_f = FALSE;
05186     nop_f = FALSE;
05187     binmode_f = TRUE;
05188     rot_f = FALSE;
05189     hira_f = FALSE;
05190     alpha_f = FALSE;
05191     mime_f = MIME_DECODE_DEFAULT;
05192     mime_decode_f = FALSE;
05193     mimebuf_f = FALSE;
05194     broken_f = FALSE;
05195     iso8859_f = FALSE;
05196     mimeout_f = FALSE;
05197     x0201_f = NKF_UNSPECIFIED;
05198     iso2022jp_f = FALSE;
05199 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
05200     ms_ucs_map_f = UCS_MAP_ASCII;
05201 #endif
05202 #ifdef UTF8_INPUT_ENABLE
05203     no_cp932ext_f = FALSE;
05204     no_best_fit_chars_f = FALSE;
05205     encode_fallback = NULL;
05206     unicode_subchar  = '?';
05207     input_endian = ENDIAN_BIG;
05208 #endif
05209 #ifdef UTF8_OUTPUT_ENABLE
05210     output_bom_f = FALSE;
05211     output_endian = ENDIAN_BIG;
05212 #endif
05213 #ifdef UNICODE_NORMALIZATION
05214     nfc_f = FALSE;
05215 #endif
05216 #ifdef INPUT_OPTION
05217     cap_f = FALSE;
05218     url_f = FALSE;
05219     numchar_f = FALSE;
05220 #endif
05221 #ifdef CHECK_OPTION
05222     noout_f = FALSE;
05223     debug_f = FALSE;
05224 #endif
05225     guess_f = 0;
05226 #ifdef EXEC_IO
05227     exec_f = 0;
05228 #endif
05229 #ifdef SHIFTJIS_CP932
05230     cp51932_f = TRUE;
05231     cp932inv_f = TRUE;
05232 #endif
05233 #ifdef X0212_ENABLE
05234     x0212_f = FALSE;
05235     x0213_f = FALSE;
05236 #endif
05237     {
05238         int i;
05239         for (i = 0; i < 256; i++){
05240             prefix_table[i] = 0;
05241         }
05242     }
05243     hold_count = 0;
05244     mimeout_state.count = 0;
05245     mimeout_mode = 0;
05246     base64_count = 0;
05247     f_line = 0;
05248     f_prev = 0;
05249     fold_preserve_f = FALSE;
05250     fold_f = FALSE;
05251     fold_len = 0;
05252     kanji_intro = DEFAULT_J;
05253     ascii_intro = DEFAULT_R;
05254     fold_margin  = FOLD_MARGIN;
05255     o_zconv = no_connection;
05256     o_fconv = no_connection;
05257     o_eol_conv = no_connection;
05258     o_rot_conv = no_connection;
05259     o_hira_conv = no_connection;
05260     o_base64conv = no_connection;
05261     o_iso2022jp_check_conv = no_connection;
05262     o_putc = std_putc;
05263     i_getc = std_getc;
05264     i_ungetc = std_ungetc;
05265     i_bgetc = std_getc;
05266     i_bungetc = std_ungetc;
05267     o_mputc = std_putc;
05268     i_mgetc = std_getc;
05269     i_mungetc  = std_ungetc;
05270     i_mgetc_buf = std_getc;
05271     i_mungetc_buf = std_ungetc;
05272     output_mode = ASCII;
05273     input_mode =  ASCII;
05274     mime_decode_mode = FALSE;
05275     file_out_f = FALSE;
05276     eolmode_f = 0;
05277     input_eol = 0;
05278     prev_cr = 0;
05279     option_mode = 0;
05280     z_prev2=0,z_prev1=0;
05281 #ifdef CHECK_OPTION
05282     iconv_for_check = 0;
05283 #endif
05284     input_codename = NULL;
05285     input_encoding = NULL;
05286     output_encoding = NULL;
05287     nkf_state_init();
05288 #ifdef WIN32DLL
05289     reinitdll();
05290 #endif /*WIN32DLL*/
05291 }
05292 
05293 static int
05294 module_connection(void)
05295 {
05296     if (input_encoding) set_input_encoding(input_encoding);
05297     if (!output_encoding) {
05298         output_encoding = nkf_default_encoding();
05299     }
05300     if (!output_encoding) {
05301         if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
05302         else return -1;
05303     }
05304     set_output_encoding(output_encoding);
05305     oconv = nkf_enc_to_oconv(output_encoding);
05306     o_putc = std_putc;
05307     if (nkf_enc_unicode_p(output_encoding))
05308         output_mode = UTF_8;
05309 
05310         if (x0201_f == NKF_UNSPECIFIED) {
05311                 x0201_f = X0201_DEFAULT;
05312         }
05313 
05314     /* replace continucation module, from output side */
05315 
05316     /* output redicrection */
05317 #ifdef CHECK_OPTION
05318     if (noout_f || guess_f){
05319         o_putc = no_putc;
05320     }
05321 #endif
05322     if (mimeout_f) {
05323         o_mputc = o_putc;
05324         o_putc = mime_putc;
05325         if (mimeout_f == TRUE) {
05326             o_base64conv = oconv; oconv = base64_conv;
05327         }
05328         /* base64_count = 0; */
05329     }
05330 
05331     if (eolmode_f || guess_f) {
05332         o_eol_conv = oconv; oconv = eol_conv;
05333     }
05334     if (rot_f) {
05335         o_rot_conv = oconv; oconv = rot_conv;
05336     }
05337     if (iso2022jp_f) {
05338         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
05339     }
05340     if (hira_f) {
05341         o_hira_conv = oconv; oconv = hira_conv;
05342     }
05343     if (fold_f) {
05344         o_fconv = oconv; oconv = fold_conv;
05345         f_line = 0;
05346     }
05347     if (alpha_f || x0201_f) {
05348         o_zconv = oconv; oconv = z_conv;
05349     }
05350 
05351     i_getc = std_getc;
05352     i_ungetc = std_ungetc;
05353     /* input redicrection */
05354 #ifdef INPUT_OPTION
05355     if (cap_f){
05356         i_cgetc = i_getc; i_getc = cap_getc;
05357         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
05358     }
05359     if (url_f){
05360         i_ugetc = i_getc; i_getc = url_getc;
05361         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
05362     }
05363 #endif
05364 #ifdef NUMCHAR_OPTION
05365     if (numchar_f){
05366         i_ngetc = i_getc; i_getc = numchar_getc;
05367         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
05368     }
05369 #endif
05370 #ifdef UNICODE_NORMALIZATION
05371     if (nfc_f){
05372         i_nfc_getc = i_getc; i_getc = nfc_getc;
05373         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
05374     }
05375 #endif
05376     if (mime_f && mimebuf_f==FIXED_MIME) {
05377         i_mgetc = i_getc; i_getc = mime_getc;
05378         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
05379     }
05380     if (broken_f & 1) {
05381         i_bgetc = i_getc; i_getc = broken_getc;
05382         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
05383     }
05384     if (input_encoding) {
05385         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
05386     } else {
05387         set_iconv(FALSE, e_iconv);
05388     }
05389 
05390     {
05391         struct input_code *p = input_code_list;
05392         while (p->name){
05393             status_reinit(p++);
05394         }
05395     }
05396     return 0;
05397 }
05398 
05399 /*
05400    Conversion main loop. Code detection only.
05401  */
05402 
05403 #if !defined(PERL_XS) && !defined(WIN32DLL)
05404 static nkf_char
05405 noconvert(FILE *f)
05406 {
05407     nkf_char    c;
05408 
05409     if (nop_f == 2)
05410         module_connection();
05411     while ((c = (*i_getc)(f)) != EOF)
05412         (*o_putc)(c);
05413     (*o_putc)(EOF);
05414     return 1;
05415 }
05416 #endif
05417 
05418 #define NEXT continue        /* no output, get next */
05419 #define SKIP c2=0;continue        /* no output, get next */
05420 #define MORE c2=c1;continue  /* need one more byte */
05421 #define SEND (void)0         /* output c1 and c2, get next */
05422 #define LAST break           /* end of loop, go closing  */
05423 #define set_input_mode(mode) do { \
05424     input_mode = mode; \
05425     shift_mode = 0; \
05426     set_input_codename("ISO-2022-JP"); \
05427     debug("ISO-2022-JP"); \
05428 } while (0)
05429 
05430 static int
05431 kanji_convert(FILE *f)
05432 {
05433     nkf_char c1=0, c2=0, c3=0, c4=0;
05434     int shift_mode = 0; /* 0, 1, 2, 3 */
05435     int g2 = 0;
05436     int is_8bit = FALSE;
05437 
05438     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
05439         is_8bit = TRUE;
05440     }
05441 
05442     input_mode = ASCII;
05443     output_mode = ASCII;
05444 
05445     if (module_connection() < 0) {
05446 #if !defined(PERL_XS) && !defined(WIN32DLL)
05447         fprintf(stderr, "no output encoding given\n");
05448 #endif
05449         return -1;
05450     }
05451     check_bom(f);
05452 
05453 #ifdef UTF8_INPUT_ENABLE
05454     if(iconv == w_iconv32){
05455         while ((c1 = (*i_getc)(f)) != EOF &&
05456                (c2 = (*i_getc)(f)) != EOF &&
05457                (c3 = (*i_getc)(f)) != EOF &&
05458                (c4 = (*i_getc)(f)) != EOF) {
05459             nkf_iconv_utf_32(c1, c2, c3, c4);
05460         }
05461         goto finished;
05462     }
05463     else if (iconv == w_iconv16) {
05464         while ((c1 = (*i_getc)(f)) != EOF &&
05465                (c2 = (*i_getc)(f)) != EOF) {
05466             if (nkf_iconv_utf_16(c1, c2, 0, 0) == NKF_ICONV_NEED_TWO_MORE_BYTES &&
05467                 (c3 = (*i_getc)(f)) != EOF &&
05468                 (c4 = (*i_getc)(f)) != EOF) {
05469                 nkf_iconv_utf_16(c1, c2, c3, c4);
05470             }
05471         }
05472         goto finished;
05473     }
05474 #endif
05475 
05476     while ((c1 = (*i_getc)(f)) != EOF) {
05477 #ifdef INPUT_CODE_FIX
05478         if (!input_encoding)
05479 #endif
05480             code_status(c1);
05481         if (c2) {
05482             /* second byte */
05483             if (c2 > DEL) {
05484                 /* in case of 8th bit is on */
05485                 if (!estab_f&&!mime_decode_mode) {
05486                     /* in case of not established yet */
05487                     /* It is still ambiguious */
05488                     if (h_conv(f, c2, c1)==EOF) {
05489                         LAST;
05490                     }
05491                     else {
05492                         SKIP;
05493                     }
05494                 }
05495                 else {
05496                     /* in case of already established */
05497                     if (c1 < 0x40) {
05498                         /* ignore bogus code */
05499                         SKIP;
05500                     } else {
05501                         SEND;
05502                     }
05503                 }
05504             }
05505             else {
05506                 /* 2nd byte of 7 bit code or SJIS */
05507                 SEND;
05508             }
05509         }
05510         else if (nkf_char_unicode_p(c1)) {
05511             (*oconv)(0, c1);
05512             NEXT;
05513         }
05514         else {
05515             /* first byte */
05516             if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
05517                 /* CP5022x */
05518                 MORE;
05519             }else if (input_codename && input_codename[0] == 'I' &&
05520                     0xA1 <= c1 && c1 <= 0xDF) {
05521                 /* JIS X 0201 Katakana in 8bit JIS */
05522                 c2 = JIS_X_0201_1976_K;
05523                 c1 &= 0x7f;
05524                 SEND;
05525             } else if (c1 > DEL) {
05526                 /* 8 bit code */
05527                 if (!estab_f && !iso8859_f) {
05528                     /* not established yet */
05529                     MORE;
05530                 } else { /* estab_f==TRUE */
05531                     if (iso8859_f) {
05532                         c2 = ISO_8859_1;
05533                         c1 &= 0x7f;
05534                         SEND;
05535                     }
05536                     else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
05537                              (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
05538                         /* JIS X 0201 */
05539                         c2 = JIS_X_0201_1976_K;
05540                         c1 &= 0x7f;
05541                         SEND;
05542                     }
05543                     else {
05544                         /* already established */
05545                         MORE;
05546                     }
05547                 }
05548             } else if (SP < c1 && c1 < DEL) {
05549                 /* in case of Roman characters */
05550                 if (shift_mode) {
05551                     /* output 1 shifted byte */
05552                     if (iso8859_f) {
05553                         c2 = ISO_8859_1;
05554                         SEND;
05555                     } else if (nkf_byte_jisx0201_katakana_p(c1)){
05556                         /* output 1 shifted byte */
05557                         c2 = JIS_X_0201_1976_K;
05558                         SEND;
05559                     } else {
05560                         /* look like bogus code */
05561                         SKIP;
05562                     }
05563                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
05564                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
05565                     /* in case of Kanji shifted */
05566                     MORE;
05567                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
05568                     /* Check MIME code */
05569                     if ((c1 = (*i_getc)(f)) == EOF) {
05570                         (*oconv)(0, '=');
05571                         LAST;
05572                     } else if (c1 == '?') {
05573                         /* =? is mime conversion start sequence */
05574                         if(mime_f == STRICT_MIME) {
05575                             /* check in real detail */
05576                             if (mime_begin_strict(f) == EOF)
05577                                 LAST;
05578                             SKIP;
05579                         } else if (mime_begin(f) == EOF)
05580                             LAST;
05581                         SKIP;
05582                     } else {
05583                         (*oconv)(0, '=');
05584                         (*i_ungetc)(c1,f);
05585                         SKIP;
05586                     }
05587                 } else {
05588                     /* normal ASCII code */
05589                     SEND;
05590                 }
05591             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
05592                 shift_mode = 0;
05593                 SKIP;
05594             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
05595                 shift_mode = 1;
05596                 SKIP;
05597             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
05598                 if ((c1 = (*i_getc)(f)) == EOF) {
05599                     (*oconv)(0, ESC);
05600                     LAST;
05601                 }
05602                 else if (c1 == '&') {
05603                     /* IRR */
05604                     if ((c1 = (*i_getc)(f)) == EOF) {
05605                         LAST;
05606                     } else {
05607                         SKIP;
05608                     }
05609                 }
05610                 else if (c1 == '$') {
05611                     /* GZDMx */
05612                     if ((c1 = (*i_getc)(f)) == EOF) {
05613                         /* don't send bogus code
05614                            (*oconv)(0, ESC);
05615                            (*oconv)(0, '$'); */
05616                         LAST;
05617                     } else if (c1 == '@' || c1 == 'B') {
05618                         /* JIS X 0208 */
05619                         set_input_mode(JIS_X_0208);
05620                         SKIP;
05621                     } else if (c1 == '(') {
05622                         /* GZDM4 */
05623                         if ((c1 = (*i_getc)(f)) == EOF) {
05624                             /* don't send bogus code
05625                                (*oconv)(0, ESC);
05626                                (*oconv)(0, '$');
05627                                (*oconv)(0, '(');
05628                              */
05629                             LAST;
05630                         } else if (c1 == '@'|| c1 == 'B') {
05631                             /* JIS X 0208 */
05632                             set_input_mode(JIS_X_0208);
05633                             SKIP;
05634 #ifdef X0212_ENABLE
05635                         } else if (c1 == 'D'){
05636                             set_input_mode(JIS_X_0212);
05637                             SKIP;
05638 #endif /* X0212_ENABLE */
05639                         } else if (c1 == 'O' || c1 == 'Q'){
05640                             set_input_mode(JIS_X_0213_1);
05641                             SKIP;
05642                         } else if (c1 == 'P'){
05643                             set_input_mode(JIS_X_0213_2);
05644                             SKIP;
05645                         } else {
05646                             /* could be some special code */
05647                             (*oconv)(0, ESC);
05648                             (*oconv)(0, '$');
05649                             (*oconv)(0, '(');
05650                             (*oconv)(0, c1);
05651                             SKIP;
05652                         }
05653                     } else if (broken_f&0x2) {
05654                         /* accept any ESC-(-x as broken code ... */
05655                         input_mode = JIS_X_0208;
05656                         shift_mode = 0;
05657                         SKIP;
05658                     } else {
05659                         (*oconv)(0, ESC);
05660                         (*oconv)(0, '$');
05661                         (*oconv)(0, c1);
05662                         SKIP;
05663                     }
05664                 } else if (c1 == '(') {
05665                     /* GZD4 */
05666                     if ((c1 = (*i_getc)(f)) == EOF) {
05667                         /* don't send bogus code
05668                            (*oconv)(0, ESC);
05669                            (*oconv)(0, '('); */
05670                         LAST;
05671                     }
05672                     else if (c1 == 'I') {
05673                         /* JIS X 0201 Katakana */
05674                         set_input_mode(JIS_X_0201_1976_K);
05675                         SKIP;
05676                     }
05677                     else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
05678                         /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
05679                         set_input_mode(ASCII);
05680                         SKIP;
05681                     }
05682                     else if (broken_f&0x2) {
05683                         set_input_mode(ASCII);
05684                         SKIP;
05685                     }
05686                     else {
05687                         (*oconv)(0, ESC);
05688                         (*oconv)(0, '(');
05689                         SEND;
05690                     }
05691                 }
05692                 else if (c1 == '.') {
05693                     /* G2D6 */
05694                     if ((c1 = (*i_getc)(f)) == EOF) {
05695                         LAST;
05696                     }
05697                     else if (c1 == 'A') {
05698                         /* ISO-8859-1 */
05699                         g2 = ISO_8859_1;
05700                         SKIP;
05701                     }
05702                     else {
05703                         (*oconv)(0, ESC);
05704                         (*oconv)(0, '.');
05705                         SEND;
05706                     }
05707                 }
05708                 else if (c1 == 'N') {
05709                     /* SS2 */
05710                     c1 = (*i_getc)(f);
05711                     if (g2 == ISO_8859_1) {
05712                         c2 = ISO_8859_1;
05713                         SEND;
05714                     }else{
05715                         (*i_ungetc)(c1, f);
05716                         /* lonely ESC  */
05717                         (*oconv)(0, ESC);
05718                         SEND;
05719                     }
05720                 }
05721                 else {
05722                     /* lonely ESC  */
05723                     (*oconv)(0, ESC);
05724                     SEND;
05725                 }
05726             } else if (c1 == ESC && iconv == s_iconv) {
05727                 /* ESC in Shift_JIS */
05728                 if ((c1 = (*i_getc)(f)) == EOF) {
05729                     (*oconv)(0, ESC);
05730                     LAST;
05731                 } else if (c1 == '$') {
05732                     /* J-PHONE emoji */
05733                     if ((c1 = (*i_getc)(f)) == EOF) {
05734                         LAST;
05735                     } else if (('E' <= c1 && c1 <= 'G') ||
05736                                ('O' <= c1 && c1 <= 'Q')) {
05737                         /*
05738                            NUM : 0 1 2 3 4 5
05739                            BYTE: G E F O P Q
05740                            C%7 : 1 6 0 2 3 4
05741                            C%7 : 0 1 2 3 4 5 6
05742                            NUM : 2 0 3 4 5 X 1
05743                          */
05744                         static const nkf_char jphone_emoji_first_table[7] =
05745                         {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
05746                         c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
05747                         if ((c1 = (*i_getc)(f)) == EOF) LAST;
05748                         while (SP <= c1 && c1 <= 'z') {
05749                             (*oconv)(0, c1 + c3);
05750                             if ((c1 = (*i_getc)(f)) == EOF) LAST;
05751                         }
05752                         SKIP;
05753                     }
05754                     else {
05755                         (*oconv)(0, ESC);
05756                         (*oconv)(0, '$');
05757                         SEND;
05758                     }
05759                 }
05760                 else {
05761                     /* lonely ESC  */
05762                     (*oconv)(0, ESC);
05763                     SEND;
05764                 }
05765             } else if (c1 == LF || c1 == CR) {
05766                 if (broken_f&4) {
05767                     input_mode = ASCII; set_iconv(FALSE, 0);
05768                     SEND;
05769                 } else if (mime_decode_f && !mime_decode_mode){
05770                     if (c1 == LF) {
05771                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
05772                             i_ungetc(SP,f);
05773                             continue;
05774                         } else {
05775                             i_ungetc(c1,f);
05776                         }
05777                         c1 = LF;
05778                         SEND;
05779                     } else  { /* if (c1 == CR)*/
05780                         if ((c1=(*i_getc)(f))!=EOF) {
05781                             if (c1==SP) {
05782                                 i_ungetc(SP,f);
05783                                 continue;
05784                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
05785                                 i_ungetc(SP,f);
05786                                 continue;
05787                             } else {
05788                                 i_ungetc(c1,f);
05789                             }
05790                             i_ungetc(LF,f);
05791                         } else {
05792                             i_ungetc(c1,f);
05793                         }
05794                         c1 = CR;
05795                         SEND;
05796                     }
05797                 }
05798             } else
05799                 SEND;
05800         }
05801         /* send: */
05802         switch(input_mode){
05803         case ASCII:
05804             switch ((*iconv)(c2, c1, 0)) {  /* can be EUC / SJIS / UTF-8 */
05805             case -2:
05806                 /* 4 bytes UTF-8 */
05807                 if ((c3 = (*i_getc)(f)) != EOF) {
05808                     code_status(c3);
05809                     c3 <<= 8;
05810                     if ((c4 = (*i_getc)(f)) != EOF) {
05811                         code_status(c4);
05812                         (*iconv)(c2, c1, c3|c4);
05813                     }
05814                 }
05815                 break;
05816             case -1:
05817                 /* 3 bytes EUC or UTF-8 */
05818                 if ((c3 = (*i_getc)(f)) != EOF) {
05819                     code_status(c3);
05820                     (*iconv)(c2, c1, c3);
05821                 }
05822                 break;
05823             }
05824             break;
05825         case JIS_X_0208:
05826         case JIS_X_0213_1:
05827             if (ms_ucs_map_f &&
05828                 0x7F <= c2 && c2 <= 0x92 &&
05829                 0x21 <= c1 && c1 <= 0x7E) {
05830                 /* CP932 UDC */
05831                 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
05832                 c2 = 0;
05833             }
05834             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
05835             break;
05836 #ifdef X0212_ENABLE
05837         case JIS_X_0212:
05838             (*oconv)(PREFIX_EUCG3 | c2, c1);
05839             break;
05840 #endif /* X0212_ENABLE */
05841         case JIS_X_0213_2:
05842             (*oconv)(PREFIX_EUCG3 | c2, c1);
05843             break;
05844         default:
05845             (*oconv)(input_mode, c1);  /* other special case */
05846         }
05847 
05848         c2 = 0;
05849         c3 = 0;
05850         continue;
05851         /* goto next_word */
05852     }
05853 
05854 finished:
05855     /* epilogue */
05856     (*iconv)(EOF, 0, 0);
05857     if (!input_codename)
05858     {
05859         if (is_8bit) {
05860             struct input_code *p = input_code_list;
05861             struct input_code *result = p;
05862             while (p->name){
05863                 if (p->score < result->score) result = p;
05864                 ++p;
05865             }
05866             set_input_codename(result->name);
05867 #ifdef CHECK_OPTION
05868             debug(result->name);
05869 #endif
05870         }
05871     }
05872     return 0;
05873 }
05874 
05875 /*
05876  * int options(unsigned char *cp)
05877  * 
05878  * return values:
05879  *    0: success
05880  *   -1: ArgumentError
05881  */
05882 static int
05883 options(unsigned char *cp)
05884 {
05885     nkf_char i, j;
05886     unsigned char *p;
05887     unsigned char *cp_back = NULL;
05888     nkf_encoding *enc;
05889 
05890     if (option_mode==1)
05891         return 0;
05892     while(*cp && *cp++!='-');
05893     while (*cp || cp_back) {
05894         if(!*cp){
05895             cp = cp_back;
05896             cp_back = NULL;
05897             continue;
05898         }
05899         p = 0;
05900         switch (*cp++) {
05901         case '-':  /* literal options */
05902             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
05903                 option_mode = 1;
05904                 return 0;
05905             }
05906             for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
05907                 p = (unsigned char *)long_option[i].name;
05908                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
05909                 if (*p == cp[j] || cp[j] == SP){
05910                     p = &cp[j] + 1;
05911                     break;
05912                 }
05913                 p = 0;
05914             }
05915             if (p == 0) {
05916 #if !defined(PERL_XS) && !defined(WIN32DLL)
05917                 fprintf(stderr, "unknown long option: --%s\n", cp);
05918 #endif
05919                 return -1;
05920             }
05921             while(*cp && *cp != SP && cp++);
05922             if (long_option[i].alias[0]){
05923                 cp_back = cp;
05924                 cp = (unsigned char *)long_option[i].alias;
05925             }else{
05926 #ifndef PERL_XS
05927                 if (strcmp(long_option[i].name, "help") == 0){
05928                     usage();
05929                     exit(EXIT_SUCCESS);
05930                 }
05931 #endif
05932                 if (strcmp(long_option[i].name, "ic=") == 0){
05933                     enc = nkf_enc_find((char *)p);
05934                     if (!enc) continue;
05935                     input_encoding = enc;
05936                     continue;
05937                 }
05938                 if (strcmp(long_option[i].name, "oc=") == 0){
05939                     enc = nkf_enc_find((char *)p);
05940                     /* if (enc <= 0) continue; */
05941                     if (!enc) continue;
05942                     output_encoding = enc;
05943                     continue;
05944                 }
05945                 if (strcmp(long_option[i].name, "guess=") == 0){
05946                     if (p[0] == '0' || p[0] == '1') {
05947                         guess_f = 1;
05948                     } else {
05949                         guess_f = 2;
05950                     }
05951                     continue;
05952                 }
05953 #ifdef OVERWRITE
05954                 if (strcmp(long_option[i].name, "overwrite") == 0){
05955                     file_out_f = TRUE;
05956                     overwrite_f = TRUE;
05957                     preserve_time_f = TRUE;
05958                     continue;
05959                 }
05960                 if (strcmp(long_option[i].name, "overwrite=") == 0){
05961                     file_out_f = TRUE;
05962                     overwrite_f = TRUE;
05963                     preserve_time_f = TRUE;
05964                     backup_f = TRUE;
05965                     backup_suffix = (char *)p;
05966                     continue;
05967                 }
05968                 if (strcmp(long_option[i].name, "in-place") == 0){
05969                     file_out_f = TRUE;
05970                     overwrite_f = TRUE;
05971                     preserve_time_f = FALSE;
05972                     continue;
05973                 }
05974                 if (strcmp(long_option[i].name, "in-place=") == 0){
05975                     file_out_f = TRUE;
05976                     overwrite_f = TRUE;
05977                     preserve_time_f = FALSE;
05978                     backup_f = TRUE;
05979                     backup_suffix = (char *)p;
05980                     continue;
05981                 }
05982 #endif
05983 #ifdef INPUT_OPTION
05984                 if (strcmp(long_option[i].name, "cap-input") == 0){
05985                     cap_f = TRUE;
05986                     continue;
05987                 }
05988                 if (strcmp(long_option[i].name, "url-input") == 0){
05989                     url_f = TRUE;
05990                     continue;
05991                 }
05992 #endif
05993 #ifdef NUMCHAR_OPTION
05994                 if (strcmp(long_option[i].name, "numchar-input") == 0){
05995                     numchar_f = TRUE;
05996                     continue;
05997                 }
05998 #endif
05999 #ifdef CHECK_OPTION
06000                 if (strcmp(long_option[i].name, "no-output") == 0){
06001                     noout_f = TRUE;
06002                     continue;
06003                 }
06004                 if (strcmp(long_option[i].name, "debug") == 0){
06005                     debug_f = TRUE;
06006                     continue;
06007                 }
06008 #endif
06009                 if (strcmp(long_option[i].name, "cp932") == 0){
06010 #ifdef SHIFTJIS_CP932
06011                     cp51932_f = TRUE;
06012                     cp932inv_f = -TRUE;
06013 #endif
06014 #ifdef UTF8_OUTPUT_ENABLE
06015                     ms_ucs_map_f = UCS_MAP_CP932;
06016 #endif
06017                     continue;
06018                 }
06019                 if (strcmp(long_option[i].name, "no-cp932") == 0){
06020 #ifdef SHIFTJIS_CP932
06021                     cp51932_f = FALSE;
06022                     cp932inv_f = FALSE;
06023 #endif
06024 #ifdef UTF8_OUTPUT_ENABLE
06025                     ms_ucs_map_f = UCS_MAP_ASCII;
06026 #endif
06027                     continue;
06028                 }
06029 #ifdef SHIFTJIS_CP932
06030                 if (strcmp(long_option[i].name, "cp932inv") == 0){
06031                     cp932inv_f = -TRUE;
06032                     continue;
06033                 }
06034 #endif
06035 
06036 #ifdef X0212_ENABLE
06037                 if (strcmp(long_option[i].name, "x0212") == 0){
06038                     x0212_f = TRUE;
06039                     continue;
06040                 }
06041 #endif
06042 
06043 #ifdef EXEC_IO
06044                 if (strcmp(long_option[i].name, "exec-in") == 0){
06045                     exec_f = 1;
06046                     return 0;
06047                 }
06048                 if (strcmp(long_option[i].name, "exec-out") == 0){
06049                     exec_f = -1;
06050                     return 0;
06051                 }
06052 #endif
06053 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
06054                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
06055                     no_cp932ext_f = TRUE;
06056                     continue;
06057                 }
06058                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
06059                     no_best_fit_chars_f = TRUE;
06060                     continue;
06061                 }
06062                 if (strcmp(long_option[i].name, "fb-skip") == 0){
06063                     encode_fallback = NULL;
06064                     continue;
06065                 }
06066                 if (strcmp(long_option[i].name, "fb-html") == 0){
06067                     encode_fallback = encode_fallback_html;
06068                     continue;
06069                 }
06070                 if (strcmp(long_option[i].name, "fb-xml") == 0){
06071                     encode_fallback = encode_fallback_xml;
06072                     continue;
06073                 }
06074                 if (strcmp(long_option[i].name, "fb-java") == 0){
06075                     encode_fallback = encode_fallback_java;
06076                     continue;
06077                 }
06078                 if (strcmp(long_option[i].name, "fb-perl") == 0){
06079                     encode_fallback = encode_fallback_perl;
06080                     continue;
06081                 }
06082                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
06083                     encode_fallback = encode_fallback_subchar;
06084                     continue;
06085                 }
06086                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
06087                     encode_fallback = encode_fallback_subchar;
06088                     unicode_subchar = 0;
06089                     if (p[0] != '0'){
06090                         /* decimal number */
06091                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
06092                             unicode_subchar *= 10;
06093                             unicode_subchar += hex2bin(p[i]);
06094                         }
06095                     }else if(p[1] == 'x' || p[1] == 'X'){
06096                         /* hexadecimal number */
06097                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
06098                             unicode_subchar <<= 4;
06099                             unicode_subchar |= hex2bin(p[i]);
06100                         }
06101                     }else{
06102                         /* octal number */
06103                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
06104                             unicode_subchar *= 8;
06105                             unicode_subchar += hex2bin(p[i]);
06106                         }
06107                     }
06108                     w16e_conv(unicode_subchar, &i, &j);
06109                     unicode_subchar = i<<8 | j;
06110                     continue;
06111                 }
06112 #endif
06113 #ifdef UTF8_OUTPUT_ENABLE
06114                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
06115                     ms_ucs_map_f = UCS_MAP_MS;
06116                     continue;
06117                 }
06118 #endif
06119 #ifdef UNICODE_NORMALIZATION
06120                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
06121                     nfc_f = TRUE;
06122                     continue;
06123                 }
06124 #endif
06125                 if (strcmp(long_option[i].name, "prefix=") == 0){
06126                     if (nkf_isgraph(p[0])){
06127                         for (i = 1; nkf_isgraph(p[i]); i++){
06128                             prefix_table[p[i]] = p[0];
06129                         }
06130                     }
06131                     continue;
06132                 }
06133 #if !defined(PERL_XS) && !defined(WIN32DLL)
06134                 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
06135 #endif
06136                 return -1;
06137             }
06138             continue;
06139         case 'b':           /* buffered mode */
06140             unbuf_f = FALSE;
06141             continue;
06142         case 'u':           /* non bufferd mode */
06143             unbuf_f = TRUE;
06144             continue;
06145         case 't':           /* transparent mode */
06146             if (*cp=='1') {
06147                 /* alias of -t */
06148                 cp++;
06149                 nop_f = TRUE;
06150             } else if (*cp=='2') {
06151                 /*
06152                  * -t with put/get
06153                  *
06154                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
06155                  *
06156                  */
06157                 cp++;
06158                 nop_f = 2;
06159             } else
06160                 nop_f = TRUE;
06161             continue;
06162         case 'j':           /* JIS output */
06163         case 'n':
06164             output_encoding = nkf_enc_from_index(ISO_2022_JP);
06165             continue;
06166         case 'e':           /* AT&T EUC output */
06167             output_encoding = nkf_enc_from_index(EUCJP_NKF);
06168             continue;
06169         case 's':           /* SJIS output */
06170             output_encoding = nkf_enc_from_index(SHIFT_JIS);
06171             continue;
06172         case 'l':           /* ISO8859 Latin-1 support, no conversion */
06173             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
06174             input_encoding = nkf_enc_from_index(ISO_8859_1);
06175             continue;
06176         case 'i':           /* Kanji IN ESC-$-@/B */
06177             if (*cp=='@'||*cp=='B')
06178                 kanji_intro = *cp++;
06179             continue;
06180         case 'o':           /* ASCII IN ESC-(-J/B/H */
06181             /* ESC ( H was used in initial JUNET messages */
06182             if (*cp=='J'||*cp=='B'||*cp=='H')
06183                 ascii_intro = *cp++;
06184             continue;
06185         case 'h':
06186             /*
06187                bit:1   katakana->hiragana
06188                bit:2   hiragana->katakana
06189              */
06190             if ('9'>= *cp && *cp>='0')
06191                 hira_f |= (*cp++ -'0');
06192             else
06193                 hira_f |= 1;
06194             continue;
06195         case 'r':
06196             rot_f = TRUE;
06197             continue;
06198 #if defined(MSDOS) || defined(__OS2__)
06199         case 'T':
06200             binmode_f = FALSE;
06201             continue;
06202 #endif
06203 #ifndef PERL_XS
06204         case 'V':
06205             show_configuration();
06206             exit(EXIT_SUCCESS);
06207             break;
06208         case 'v':
06209             version();
06210             exit(EXIT_SUCCESS);
06211             break;
06212 #endif
06213 #ifdef UTF8_OUTPUT_ENABLE
06214         case 'w':           /* UTF-{8,16,32} output */
06215             if (cp[0] == '8') {
06216                 cp++;
06217                 if (cp[0] == '0'){
06218                     cp++;
06219                     output_encoding = nkf_enc_from_index(UTF_8N);
06220                 } else {
06221                     output_bom_f = TRUE;
06222                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
06223                 }
06224             } else {
06225                 int enc_idx;
06226                 if ('1'== cp[0] && '6'==cp[1]) {
06227                     cp += 2;
06228                     enc_idx = UTF_16;
06229                 } else if ('3'== cp[0] && '2'==cp[1]) {
06230                     cp += 2;
06231                     enc_idx = UTF_32;
06232                 } else {
06233                     output_encoding = nkf_enc_from_index(UTF_8);
06234                     continue;
06235                 }
06236                 if (cp[0]=='L') {
06237                     cp++;
06238                     output_endian = ENDIAN_LITTLE;
06239                     output_bom_f = TRUE;
06240                 } else if (cp[0] == 'B') {
06241                     cp++;
06242                     output_bom_f = TRUE;
06243                 }
06244                 if (cp[0] == '0'){
06245                     output_bom_f = FALSE;
06246                     cp++;
06247                     enc_idx = enc_idx == UTF_16
06248                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06249                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
06250                 } else {
06251                     enc_idx = enc_idx == UTF_16
06252                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
06253                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
06254                 }
06255                 output_encoding = nkf_enc_from_index(enc_idx);
06256             }
06257             continue;
06258 #endif
06259 #ifdef UTF8_INPUT_ENABLE
06260         case 'W':           /* UTF input */
06261             if (cp[0] == '8') {
06262                 cp++;
06263                 input_encoding = nkf_enc_from_index(UTF_8);
06264             }else{
06265                 int enc_idx;
06266                 if ('1'== cp[0] && '6'==cp[1]) {
06267                     cp += 2;
06268                     input_endian = ENDIAN_BIG;
06269                     enc_idx = UTF_16;
06270                 } else if ('3'== cp[0] && '2'==cp[1]) {
06271                     cp += 2;
06272                     input_endian = ENDIAN_BIG;
06273                     enc_idx = UTF_32;
06274                 } else {
06275                     input_encoding = nkf_enc_from_index(UTF_8);
06276                     continue;
06277                 }
06278                 if (cp[0]=='L') {
06279                     cp++;
06280                     input_endian = ENDIAN_LITTLE;
06281                 } else if (cp[0] == 'B') {
06282                     cp++;
06283                     input_endian = ENDIAN_BIG;
06284                 }
06285                 enc_idx = (enc_idx == UTF_16
06286                     ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06287                     : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
06288                 input_encoding = nkf_enc_from_index(enc_idx);
06289             }
06290             continue;
06291 #endif
06292             /* Input code assumption */
06293         case 'J':   /* ISO-2022-JP input */
06294             input_encoding = nkf_enc_from_index(ISO_2022_JP);
06295             continue;
06296         case 'E':   /* EUC-JP input */
06297             input_encoding = nkf_enc_from_index(EUCJP_NKF);
06298             continue;
06299         case 'S':   /* Shift_JIS input */
06300             input_encoding = nkf_enc_from_index(SHIFT_JIS);
06301             continue;
06302         case 'Z':   /* Convert X0208 alphabet to asii */
06303             /* alpha_f
06304                bit:0   Convert JIS X 0208 Alphabet to ASCII
06305                bit:1   Convert Kankaku to one space
06306                bit:2   Convert Kankaku to two spaces
06307                bit:3   Convert HTML Entity
06308                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
06309              */
06310             while ('0'<= *cp && *cp <='4') {
06311                 alpha_f |= 1 << (*cp++ - '0');
06312             }
06313             alpha_f |= 1;
06314             continue;
06315         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
06316             x0201_f = FALSE;    /* No X0201->X0208 conversion */
06317             /* accept  X0201
06318                ESC-(-I     in JIS, EUC, MS Kanji
06319                SI/SO       in JIS, EUC, MS Kanji
06320                SS2         in EUC, JIS, not in MS Kanji
06321                MS Kanji (0xa0-0xdf)
06322                output  X0201
06323                ESC-(-I     in JIS (0x20-0x5f)
06324                SS2         in EUC (0xa0-0xdf)
06325                0xa0-0xd    in MS Kanji (0xa0-0xdf)
06326              */
06327             continue;
06328         case 'X':   /* Convert X0201 kana to X0208 */
06329             x0201_f = TRUE;
06330             continue;
06331         case 'F':   /* prserve new lines */
06332             fold_preserve_f = TRUE;
06333         case 'f':   /* folding -f60 or -f */
06334             fold_f = TRUE;
06335             fold_len = 0;
06336             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
06337                 fold_len *= 10;
06338                 fold_len += *cp++ - '0';
06339             }
06340             if (!(0<fold_len && fold_len<BUFSIZ))
06341                 fold_len = DEFAULT_FOLD;
06342             if (*cp=='-') {
06343                 fold_margin = 0;
06344                 cp++;
06345                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
06346                     fold_margin *= 10;
06347                     fold_margin += *cp++ - '0';
06348                 }
06349             }
06350             continue;
06351         case 'm':   /* MIME support */
06352             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
06353             if (*cp=='B'||*cp=='Q') {
06354                 mime_decode_mode = *cp++;
06355                 mimebuf_f = FIXED_MIME;
06356             } else if (*cp=='N') {
06357                 mime_f = TRUE; cp++;
06358             } else if (*cp=='S') {
06359                 mime_f = STRICT_MIME; cp++;
06360             } else if (*cp=='0') {
06361                 mime_decode_f = FALSE;
06362                 mime_f = FALSE; cp++;
06363             } else {
06364                 mime_f = STRICT_MIME;
06365             }
06366             continue;
06367         case 'M':   /* MIME output */
06368             if (*cp=='B') {
06369                 mimeout_mode = 'B';
06370                 mimeout_f = FIXED_MIME; cp++;
06371             } else if (*cp=='Q') {
06372                 mimeout_mode = 'Q';
06373                 mimeout_f = FIXED_MIME; cp++;
06374             } else {
06375                 mimeout_f = TRUE;
06376             }
06377             continue;
06378         case 'B':   /* Broken JIS support */
06379             /*  bit:0   no ESC JIS
06380                bit:1   allow any x on ESC-(-x or ESC-$-x
06381                bit:2   reset to ascii on NL
06382              */
06383             if ('9'>= *cp && *cp>='0')
06384                 broken_f |= 1<<(*cp++ -'0');
06385             else
06386                 broken_f |= TRUE;
06387             continue;
06388 #ifndef PERL_XS
06389         case 'O':/* for Output file */
06390             file_out_f = TRUE;
06391             continue;
06392 #endif
06393         case 'c':/* add cr code */
06394             eolmode_f = CRLF;
06395             continue;
06396         case 'd':/* delete cr code */
06397             eolmode_f = LF;
06398             continue;
06399         case 'I':   /* ISO-2022-JP output */
06400             iso2022jp_f = TRUE;
06401             continue;
06402         case 'L':  /* line mode */
06403             if (*cp=='u') {         /* unix */
06404                 eolmode_f = LF; cp++;
06405             } else if (*cp=='m') { /* mac */
06406                 eolmode_f = CR; cp++;
06407             } else if (*cp=='w') { /* windows */
06408                 eolmode_f = CRLF; cp++;
06409             } else if (*cp=='0') { /* no conversion  */
06410                 eolmode_f = 0; cp++;
06411             }
06412             continue;
06413 #ifndef PERL_XS
06414         case 'g':
06415             if ('2' <= *cp && *cp <= '9') {
06416                 guess_f = 2;
06417                 cp++;
06418             } else if (*cp == '0' || *cp == '1') {
06419                 guess_f = 1;
06420                 cp++;
06421             } else {
06422                 guess_f = 1;
06423             }
06424             continue;
06425 #endif
06426         case SP:
06427             /* module muliple options in a string are allowed for Perl moudle  */
06428             while(*cp && *cp++!='-');
06429             continue;
06430         default:
06431 #if !defined(PERL_XS) && !defined(WIN32DLL)
06432             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
06433 #endif
06434             /* bogus option but ignored */
06435             return -1;
06436         }
06437     }
06438     return 0;
06439 }
06440 
06441 #ifdef WIN32DLL
06442 #include "nkf32dll.c"
06443 #elif defined(PERL_XS)
06444 #else /* WIN32DLL */
06445 int
06446 main(int argc, char **argv)
06447 {
06448     FILE  *fin;
06449     unsigned char  *cp;
06450 
06451     char *outfname = NULL;
06452     char *origfname;
06453 
06454 #ifdef EASYWIN /*Easy Win */
06455     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
06456 #endif
06457 #ifdef DEFAULT_CODE_LOCALE
06458     setlocale(LC_CTYPE, "");
06459 #endif
06460     nkf_state_init();
06461 
06462     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
06463         cp = (unsigned char *)*argv;
06464         options(cp);
06465 #ifdef EXEC_IO
06466         if (exec_f){
06467             int fds[2], pid;
06468             if (pipe(fds) < 0 || (pid = fork()) < 0){
06469                 abort();
06470             }
06471             if (pid == 0){
06472                 if (exec_f > 0){
06473                     close(fds[0]);
06474                     dup2(fds[1], 1);
06475                 }else{
06476                     close(fds[1]);
06477                     dup2(fds[0], 0);
06478                 }
06479                 execvp(argv[1], &argv[1]);
06480             }
06481             if (exec_f > 0){
06482                 close(fds[1]);
06483                 dup2(fds[0], 0);
06484             }else{
06485                 close(fds[0]);
06486                 dup2(fds[1], 1);
06487             }
06488             argc = 0;
06489             break;
06490         }
06491 #endif
06492     }
06493 
06494     if (guess_f) {
06495 #ifdef CHECK_OPTION
06496         int debug_f_back = debug_f;
06497 #endif
06498 #ifdef EXEC_IO
06499         int exec_f_back = exec_f;
06500 #endif
06501 #ifdef X0212_ENABLE
06502         int x0212_f_back = x0212_f;
06503 #endif
06504         int x0213_f_back = x0213_f;
06505         int guess_f_back = guess_f;
06506         reinit();
06507         guess_f = guess_f_back;
06508         mime_f = FALSE;
06509 #ifdef CHECK_OPTION
06510         debug_f = debug_f_back;
06511 #endif
06512 #ifdef EXEC_IO
06513         exec_f = exec_f_back;
06514 #endif
06515         x0212_f = x0212_f_back;
06516         x0213_f = x0213_f_back;
06517     }
06518 
06519     if (binmode_f == TRUE)
06520 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06521         if (freopen("","wb",stdout) == NULL)
06522             return (-1);
06523 #else
06524     setbinmode(stdout);
06525 #endif
06526 
06527     if (unbuf_f)
06528         setbuf(stdout, (char *) NULL);
06529     else
06530         setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
06531 
06532     if (argc == 0) {
06533         if (binmode_f == TRUE)
06534 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06535             if (freopen("","rb",stdin) == NULL) return (-1);
06536 #else
06537         setbinmode(stdin);
06538 #endif
06539         setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
06540         if (nop_f)
06541             noconvert(stdin);
06542         else {
06543             kanji_convert(stdin);
06544             if (guess_f) print_guessed_code(NULL);
06545         }
06546     } else {
06547         int nfiles = argc;
06548         int is_argument_error = FALSE;
06549         while (argc--) {
06550             input_codename = NULL;
06551             input_eol = 0;
06552 #ifdef CHECK_OPTION
06553             iconv_for_check = 0;
06554 #endif
06555             if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
06556                 perror(*(argv-1));
06557                 is_argument_error = TRUE;
06558                 continue;
06559             } else {
06560 #ifdef OVERWRITE
06561                 int fd = 0;
06562                 int fd_backup = 0;
06563 #endif
06564 
06565                 /* reopen file for stdout */
06566                 if (file_out_f == TRUE) {
06567 #ifdef OVERWRITE
06568                     if (overwrite_f){
06569                         outfname = nkf_xmalloc(strlen(origfname)
06570                                           + strlen(".nkftmpXXXXXX")
06571                                           + 1);
06572                         strcpy(outfname, origfname);
06573 #ifdef MSDOS
06574                         {
06575                             int i;
06576                             for (i = strlen(outfname); i; --i){
06577                                 if (outfname[i - 1] == '/'
06578                                     || outfname[i - 1] == '\\'){
06579                                     break;
06580                                 }
06581                             }
06582                             outfname[i] = '\0';
06583                         }
06584                         strcat(outfname, "ntXXXXXX");
06585                         mktemp(outfname);
06586                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
06587                                   S_IREAD | S_IWRITE);
06588 #else
06589                         strcat(outfname, ".nkftmpXXXXXX");
06590                         fd = mkstemp(outfname);
06591 #endif
06592                         if (fd < 0
06593                             || (fd_backup = dup(fileno(stdout))) < 0
06594                             || dup2(fd, fileno(stdout)) < 0
06595                            ){
06596                             perror(origfname);
06597                             return -1;
06598                         }
06599                     }else
06600 #endif
06601                     if(argc == 1) {
06602                         outfname = *argv++;
06603                         argc--;
06604                     } else {
06605                         outfname = "nkf.out";
06606                     }
06607 
06608                     if(freopen(outfname, "w", stdout) == NULL) {
06609                         perror (outfname);
06610                         return (-1);
06611                     }
06612                     if (binmode_f == TRUE) {
06613 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06614                         if (freopen("","wb",stdout) == NULL)
06615                             return (-1);
06616 #else
06617                         setbinmode(stdout);
06618 #endif
06619                     }
06620                 }
06621                 if (binmode_f == TRUE)
06622 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06623                     if (freopen("","rb",fin) == NULL)
06624                         return (-1);
06625 #else
06626                 setbinmode(fin);
06627 #endif
06628                 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
06629                 if (nop_f)
06630                     noconvert(fin);
06631                 else {
06632                     char *filename = NULL;
06633                     kanji_convert(fin);
06634                     if (nfiles > 1) filename = origfname;
06635                     if (guess_f) print_guessed_code(filename);
06636                 }
06637                 fclose(fin);
06638 #ifdef OVERWRITE
06639                 if (overwrite_f) {
06640                     struct stat     sb;
06641 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
06642                     time_t tb[2];
06643 #else
06644                     struct utimbuf  tb;
06645 #endif
06646 
06647                     fflush(stdout);
06648                     close(fd);
06649                     if (dup2(fd_backup, fileno(stdout)) < 0){
06650                         perror("dup2");
06651                     }
06652                     if (stat(origfname, &sb)) {
06653                         fprintf(stderr, "Can't stat %s\n", origfname);
06654                     }
06655                     /* $B%Q!<%_%C%7%g%s$rI|85(B */
06656                     if (chmod(outfname, sb.st_mode)) {
06657                         fprintf(stderr, "Can't set permission %s\n", outfname);
06658                     }
06659 
06660                     /* $B%?%$%`%9%?%s%W$rI|85(B */
06661                     if(preserve_time_f){
06662 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
06663                         tb[0] = tb[1] = sb.st_mtime;
06664                         if (utime(outfname, tb)) {
06665                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
06666                         }
06667 #else
06668                         tb.actime  = sb.st_atime;
06669                         tb.modtime = sb.st_mtime;
06670                         if (utime(outfname, &tb)) {
06671                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
06672                         }
06673 #endif
06674                     }
06675                     if(backup_f){
06676                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
06677 #ifdef MSDOS
06678                         unlink(backup_filename);
06679 #endif
06680                         if (rename(origfname, backup_filename)) {
06681                             perror(backup_filename);
06682                             fprintf(stderr, "Can't rename %s to %s\n",
06683                                     origfname, backup_filename);
06684                         }
06685                         nkf_xfree(backup_filename);
06686                     }else{
06687 #ifdef MSDOS
06688                         if (unlink(origfname)){
06689                             perror(origfname);
06690                         }
06691 #endif
06692                     }
06693                     if (rename(outfname, origfname)) {
06694                         perror(origfname);
06695                         fprintf(stderr, "Can't rename %s to %s\n",
06696                                 outfname, origfname);
06697                     }
06698                     nkf_xfree(outfname);
06699                 }
06700 #endif
06701             }
06702         }
06703         if (is_argument_error)
06704             return(-1);
06705     }
06706 #ifdef EASYWIN /*Easy Win */
06707     if (file_out_f == FALSE)
06708         scanf("%d",&end_check);
06709     else
06710         fclose(stdout);
06711 #else /* for Other OS */
06712     if (file_out_f == TRUE)
06713         fclose(stdout);
06714 #endif /*Easy Win */
06715     return (0);
06716 }
06717 #endif /* WIN32DLL */
06718 

Generated on Wed Sep 8 2010 09:55:08 for Ruby by  doxygen 1.7.1