• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

regenc.c

Go to the documentation of this file.
00001 /**********************************************************************
00002   regenc.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regint.h"
00031 
00032 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
00033 
00034 extern int
00035 onigenc_init(void)
00036 {
00037   return 0;
00038 }
00039 
00040 extern OnigEncoding
00041 onigenc_get_default_encoding(void)
00042 {
00043   return OnigEncDefaultCharEncoding;
00044 }
00045 
00046 extern int
00047 onigenc_set_default_encoding(OnigEncoding enc)
00048 {
00049   OnigEncDefaultCharEncoding = enc;
00050   return 0;
00051 }
00052 
00053 extern int
00054 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
00055 {
00056   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
00057   if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
00058     return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
00059   else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
00060     return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
00061   return 1;
00062 }
00063 
00064 extern UChar*
00065 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00066 {
00067   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00068   if (p < s) {
00069       p += enclen(enc, p, end);
00070   }
00071   return p;
00072 }
00073 
00074 extern UChar*
00075 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
00076                                    const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
00077 {
00078   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00079 
00080   if (p < s) {
00081     if (prev) *prev = (const UChar* )p;
00082     p += enclen(enc, p, end);
00083   }
00084   else {
00085     if (prev) *prev = (const UChar* )NULL; /* Sorry */
00086   }
00087   return p;
00088 }
00089 
00090 extern UChar*
00091 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00092 {
00093   if (s <= start)
00094     return (UChar* )NULL;
00095 
00096   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00097 }
00098 
00099 extern UChar*
00100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
00101 {
00102   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
00103     if (s <= start)
00104       return (UChar* )NULL;
00105 
00106     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00107   }
00108   return (UChar* )s;
00109 }
00110 
00111 extern UChar*
00112 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
00113 {
00114   UChar* q = (UChar* )p;
00115   while (n-- > 0) {
00116     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00117   }
00118   return (q <= end ? q : NULL);
00119 }
00120 
00121 extern int
00122 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
00123 {
00124   int n = 0;
00125   UChar* q = (UChar* )p;
00126 
00127   while (q < end) {
00128     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00129     n++;
00130   }
00131   return n;
00132 }
00133 
00134 extern int
00135 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
00136 {
00137   int n = 0;
00138   UChar* p = (UChar* )s;
00139   UChar* e;
00140 
00141   while (1) {
00142     if (*p == '\0') {
00143       UChar* q;
00144       int len = ONIGENC_MBC_MINLEN(enc);
00145 
00146       if (len == 1) return n;
00147       q = p + 1;
00148       while (len > 1) {
00149         if (*q != '\0') break;
00150         q++;
00151         len--;
00152       }
00153       if (len == 1) return n;
00154     }
00155     e = p + ONIGENC_MBC_MAXLEN(enc);
00156     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00157     n++;
00158   }
00159 }
00160 
00161 extern int
00162 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
00163 {
00164   UChar* start = (UChar* )s;
00165   UChar* p = (UChar* )s;
00166   UChar* e;
00167 
00168   while (1) {
00169     if (*p == '\0') {
00170       UChar* q;
00171       int len = ONIGENC_MBC_MINLEN(enc);
00172 
00173       if (len == 1) return (int )(p - start);
00174       q = p + 1;
00175       while (len > 1) {
00176         if (*q != '\0') break;
00177         q++;
00178         len--;
00179       }
00180       if (len == 1) return (int )(p - start);
00181     }
00182     e = p + ONIGENC_MBC_MAXLEN(enc);
00183     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00184   }
00185 }
00186 
00187 const UChar OnigEncAsciiToLowerCaseTable[] = {
00188   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00189   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00190   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00191   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00192   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00193   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00194   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00195   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00196   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00197   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00198   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00199   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00200   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00201   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00202   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00203   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00204   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00205   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00206   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00207   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00208   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00209   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00210   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00211   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00212   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00213   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00214   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00215   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00216   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00217   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00218   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00219   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00220 };
00221 
00222 #ifdef USE_UPPER_CASE_TABLE
00223 const UChar OnigEncAsciiToUpperCaseTable[256] = {
00224   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00225   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00226   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00227   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00228   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00229   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00230   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00231   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00232   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00233   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00234   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00235   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00236   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00237   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00238   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00239   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00240   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00241   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00242   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00243   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00244   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00245   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00246   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00247   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00248   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00249   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00250   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00251   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00252   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00253   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00254   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00255   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00256 };
00257 #endif
00258 
00259 const unsigned short OnigEncAsciiCtypeTable[256] = {
00260   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00261   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00262   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00263   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00264   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00265   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00266   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00267   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00268   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00269   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00270   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00271   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00272   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00273   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00274   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00275   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00276   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00277   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00278   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00279   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00280   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00281   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00282   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00283   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00284   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00285   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00286   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00287   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00288   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00289   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00290   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00291   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
00292 };
00293 
00294 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
00295   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00296   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00297   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00298   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00299   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00300   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00301   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00302   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00303   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00304   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00305   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00306   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00307   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00308   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00309   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00310   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00311   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00312   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00313   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00314   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00315   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00316   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00317   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00318   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00319   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00320   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00321   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
00322   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
00323   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00324   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00325   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00326   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
00327 };
00328 
00329 #ifdef USE_UPPER_CASE_TABLE
00330 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
00331   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00332   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00333   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00334   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00335   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00336   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00337   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00338   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00339   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00340   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00341   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00342   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00343   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00344   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00345   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00346   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00347   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00348   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00349   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00350   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00351   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00352   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00353   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00354   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00355   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00356   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00357   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00358   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00359   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00360   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00361   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
00362   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
00363 };
00364 #endif
00365 
00366 extern void
00367 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
00368 {
00369   /* nothing */
00370   /* obsoleted. */
00371 }
00372 
00373 extern UChar*
00374 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00375 {
00376   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00377 }
00378 
00379 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
00380   { 0x41, 0x61 },
00381   { 0x42, 0x62 },
00382   { 0x43, 0x63 },
00383   { 0x44, 0x64 },
00384   { 0x45, 0x65 },
00385   { 0x46, 0x66 },
00386   { 0x47, 0x67 },
00387   { 0x48, 0x68 },
00388   { 0x49, 0x69 },
00389   { 0x4a, 0x6a },
00390   { 0x4b, 0x6b },
00391   { 0x4c, 0x6c },
00392   { 0x4d, 0x6d },
00393   { 0x4e, 0x6e },
00394   { 0x4f, 0x6f },
00395   { 0x50, 0x70 },
00396   { 0x51, 0x71 },
00397   { 0x52, 0x72 },
00398   { 0x53, 0x73 },
00399   { 0x54, 0x74 },
00400   { 0x55, 0x75 },
00401   { 0x56, 0x76 },
00402   { 0x57, 0x77 },
00403   { 0x58, 0x78 },
00404   { 0x59, 0x79 },
00405   { 0x5a, 0x7a }
00406 };
00407 
00408 extern int
00409 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00410                                   OnigApplyAllCaseFoldFunc f, void* arg,
00411                                   OnigEncoding enc ARG_UNUSED)
00412 {
00413   OnigCodePoint code;
00414   int i, r;
00415 
00416   for (i = 0;
00417        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
00418        i++) {
00419     code = OnigAsciiLowerMap[i].to;
00420     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
00421     if (r != 0) return r;
00422 
00423     code = OnigAsciiLowerMap[i].from;
00424     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
00425     if (r != 0) return r;
00426   }
00427 
00428   return 0;
00429 }
00430 
00431 extern int
00432 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
00433     const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
00434      OnigEncoding enc ARG_UNUSED)
00435 {
00436   if (0x41 <= *p && *p <= 0x5a) {
00437     items[0].byte_len = 1;
00438     items[0].code_len = 1;
00439     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00440     return 1;
00441   }
00442   else if (0x61 <= *p && *p <= 0x7a) {
00443     items[0].byte_len = 1;
00444     items[0].code_len = 1;
00445     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00446     return 1;
00447   }
00448   else
00449     return 0;
00450 }
00451 
00452 static int
00453 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00454                        OnigApplyAllCaseFoldFunc f, void* arg)
00455 {
00456   OnigCodePoint ss[] = { 0x73, 0x73 };
00457 
00458   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
00459 }
00460 
00461 extern int
00462 onigenc_apply_all_case_fold_with_map(int map_size,
00463     const OnigPairCaseFoldCodes map[],
00464     int ess_tsett_flag, OnigCaseFoldType flag,
00465     OnigApplyAllCaseFoldFunc f, void* arg)
00466 {
00467   OnigCodePoint code;
00468   int i, r;
00469 
00470   r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
00471   if (r != 0) return r;
00472 
00473   for (i = 0; i < map_size; i++) {
00474     code = map[i].to;
00475     r = (*f)(map[i].from, &code, 1, arg);
00476     if (r != 0) return r;
00477 
00478     code = map[i].from;
00479     r = (*f)(map[i].to, &code, 1, arg);
00480     if (r != 0) return r;
00481   }
00482 
00483   if (ess_tsett_flag != 0)
00484     return ss_apply_all_case_fold(flag, f, arg);
00485 
00486   return 0;
00487 }
00488 
00489 extern int
00490 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
00491     const OnigPairCaseFoldCodes map[],
00492     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
00493     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
00494 {
00495   if (0x41 <= *p && *p <= 0x5a) {
00496     items[0].byte_len = 1;
00497     items[0].code_len = 1;
00498     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00499     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
00500         && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00501       /* SS */
00502       items[1].byte_len = 2;
00503       items[1].code_len = 1;
00504       items[1].code[0] = (OnigCodePoint )0xdf;
00505       return 2;
00506     }
00507     else
00508       return 1;
00509   }
00510   else if (0x61 <= *p && *p <= 0x7a) {
00511     items[0].byte_len = 1;
00512     items[0].code_len = 1;
00513     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00514     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
00515         && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00516       /* ss */
00517       items[1].byte_len = 2;
00518       items[1].code_len = 1;
00519       items[1].code[0] = (OnigCodePoint )0xdf;
00520       return 2;
00521     }
00522     else
00523       return 1;
00524   }
00525   else if (*p == 0xdf && ess_tsett_flag != 0) {
00526     items[0].byte_len = 1;
00527     items[0].code_len = 2;
00528     items[0].code[0] = (OnigCodePoint )'s';
00529     items[0].code[1] = (OnigCodePoint )'s';
00530 
00531     items[1].byte_len = 1;
00532     items[1].code_len = 2;
00533     items[1].code[0] = (OnigCodePoint )'S';
00534     items[1].code[1] = (OnigCodePoint )'S';
00535 
00536     items[2].byte_len = 1;
00537     items[2].code_len = 2;
00538     items[2].code[0] = (OnigCodePoint )'s';
00539     items[2].code[1] = (OnigCodePoint )'S';
00540 
00541     items[3].byte_len = 1;
00542     items[3].code_len = 2;
00543     items[3].code[0] = (OnigCodePoint )'S';
00544     items[3].code[1] = (OnigCodePoint )'s';
00545 
00546     return 4;
00547   }
00548   else {
00549     int i;
00550 
00551     for (i = 0; i < map_size; i++) {
00552       if (*p == map[i].from) {
00553         items[0].byte_len = 1;
00554         items[0].code_len = 1;
00555         items[0].code[0] = map[i].to;
00556         return 1;
00557       }
00558       else if (*p == map[i].to) {
00559         items[0].byte_len = 1;
00560         items[0].code_len = 1;
00561         items[0].code[0] = map[i].from;
00562         return 1;
00563       }
00564     }
00565   }
00566 
00567   return 0;
00568 }
00569 
00570 
00571 extern int
00572 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
00573                        OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
00574                        OnigEncoding enc)
00575 {
00576   return ONIG_NO_SUPPORT_CONFIG;
00577 }
00578 
00579 extern int
00580 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00581 {
00582   if (p < end) {
00583     if (*p == 0x0a) return 1;
00584   }
00585   return 0;
00586 }
00587 
00588 /* for single byte encodings */
00589 extern int
00590 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
00591                             const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
00592 {
00593   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
00594 
00595   (*p)++;
00596   return 1; /* return byte length of converted char to lower */
00597 }
00598 
00599 #if 0
00600 extern int
00601 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
00602                                const UChar** pp, const UChar* end ARG_UNUSED)
00603 {
00604   const UChar* p = *pp;
00605 
00606   (*pp)++;
00607   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00608 }
00609 #endif
00610 
00611 extern int
00612 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
00613                                 OnigEncoding enc ARG_UNUSED)
00614 {
00615   return 1;
00616 }
00617 
00618 extern OnigCodePoint
00619 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00620                                 OnigEncoding enc ARG_UNUSED)
00621 {
00622   return (OnigCodePoint )(*p);
00623 }
00624 
00625 extern int
00626 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00627 {
00628   return 1;
00629 }
00630 
00631 extern int
00632 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
00633 {
00634   if (code > 0xff)
00635       rb_raise(rb_eRangeError, "%u out of char range", code);
00636   *buf = (UChar )(code & 0xff);
00637   return 1;
00638 }
00639 
00640 extern UChar*
00641 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
00642                                           const UChar* end,
00643                                           OnigEncoding enc ARG_UNUSED)
00644 {
00645   return (UChar* )s;
00646 }
00647 
00648 extern int
00649 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00650                                              OnigEncoding enc ARG_UNUSED)
00651 {
00652   return TRUE;
00653 }
00654 
00655 extern int
00656 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00657                                               OnigEncoding enc ARG_UNUSED)
00658 {
00659   return FALSE;
00660 }
00661 
00662 extern int
00663 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
00664                             OnigEncoding enc ARG_UNUSED)
00665 {
00666   if (code < 128)
00667     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00668   else
00669     return FALSE;
00670 }
00671 
00672 extern OnigCodePoint
00673 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
00674 {
00675   int c, i, len;
00676   OnigCodePoint n;
00677 
00678   len = enclen(enc, p, end);
00679   n = (OnigCodePoint )(*p++);
00680   if (len == 1) return n;
00681 
00682   for (i = 1; i < len; i++) {
00683     if (p >= end) break;
00684     c = *p++;
00685     n <<= 8;  n += c;
00686   }
00687   return n;
00688 }
00689 
00690 extern int
00691 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
00692                           const UChar** pp, const UChar* end ARG_UNUSED,
00693                           UChar* lower)
00694 {
00695   int len;
00696   const UChar *p = *pp;
00697 
00698   if (ONIGENC_IS_MBC_ASCII(p)) {
00699     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
00700     (*pp)++;
00701     return 1;
00702   }
00703   else {
00704     int i;
00705 
00706     len = enclen(enc, p, end);
00707     for (i = 0; i < len; i++) {
00708       *lower++ = *p++;
00709     }
00710     (*pp) += len;
00711     return len; /* return byte length of converted to lower char */
00712   }
00713 }
00714 
00715 #if 0
00716 extern int
00717 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
00718                              const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
00719 {
00720   const UChar* p = *pp;
00721 
00722   if (ONIGENC_IS_MBC_ASCII(p)) {
00723     (*pp)++;
00724     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00725   }
00726 
00727   (*pp) += enclen(enc, p);
00728   return FALSE;
00729 }
00730 #endif
00731 
00732 extern int
00733 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00734 {
00735   if ((code & 0xff00) != 0) return 2;
00736   else return 1;
00737 }
00738 
00739 extern int
00740 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00741 {
00742        if ((code & 0xff000000) != 0) return 4;
00743   else if ((code & 0xff0000) != 0) return 3;
00744   else if ((code & 0xff00) != 0) return 2;
00745   else return 1;
00746 }
00747 
00748 extern int
00749 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00750 {
00751   UChar *p = buf;
00752 
00753   if ((code & 0xff00) != 0) {
00754     *p++ = (UChar )((code >>  8) & 0xff);
00755   }
00756   *p++ = (UChar )(code & 0xff);
00757 
00758 #if 1
00759   if (enclen(enc, buf, p) != (p - buf))
00760     return ONIGERR_INVALID_CODE_POINT_VALUE;
00761 #endif
00762   return (int)(p - buf);
00763 }
00764 
00765 extern int
00766 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00767 {
00768   UChar *p = buf;
00769 
00770   if ((code & 0xff000000) != 0) {
00771     *p++ = (UChar )((code >> 24) & 0xff);
00772   }
00773   if ((code & 0xff0000) != 0 || p != buf) {
00774     *p++ = (UChar )((code >> 16) & 0xff);
00775   }
00776   if ((code & 0xff00) != 0 || p != buf) {
00777     *p++ = (UChar )((code >> 8) & 0xff);
00778   }
00779   *p++ = (UChar )(code & 0xff);
00780 
00781 #if 1
00782   if (enclen(enc, buf, p) != (p - buf))
00783     return ONIGERR_INVALID_CODE_POINT_VALUE;
00784 #endif
00785   return (int)(p - buf);
00786 }
00787 
00788 extern int
00789 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
00790 {
00791   static const PosixBracketEntryType PBS[] = {
00792     PosixBracketEntryInit("Alnum",  ONIGENC_CTYPE_ALNUM),
00793     PosixBracketEntryInit("Alpha",  ONIGENC_CTYPE_ALPHA),
00794     PosixBracketEntryInit("Blank",  ONIGENC_CTYPE_BLANK),
00795     PosixBracketEntryInit("Cntrl",  ONIGENC_CTYPE_CNTRL),
00796     PosixBracketEntryInit("Digit",  ONIGENC_CTYPE_DIGIT),
00797     PosixBracketEntryInit("Graph",  ONIGENC_CTYPE_GRAPH),
00798     PosixBracketEntryInit("Lower",  ONIGENC_CTYPE_LOWER),
00799     PosixBracketEntryInit("Print",  ONIGENC_CTYPE_PRINT),
00800     PosixBracketEntryInit("Punct",  ONIGENC_CTYPE_PUNCT),
00801     PosixBracketEntryInit("Space",  ONIGENC_CTYPE_SPACE),
00802     PosixBracketEntryInit("Upper",  ONIGENC_CTYPE_UPPER),
00803     PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
00804     PosixBracketEntryInit("ASCII",  ONIGENC_CTYPE_ASCII),
00805     PosixBracketEntryInit("Word",   ONIGENC_CTYPE_WORD),
00806   };
00807 
00808   const PosixBracketEntryType *pb, *pbe;
00809   int len;
00810 
00811   len = onigenc_strlen(enc, p, end);
00812   for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
00813     if (len == pb->len &&
00814         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
00815       return pb->ctype;
00816   }
00817 
00818   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
00819 }
00820 
00821 extern int
00822 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00823                           unsigned int ctype)
00824 {
00825   if (code < 128)
00826     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00827   else {
00828     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00829       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00830     }
00831   }
00832 
00833   return FALSE;
00834 }
00835 
00836 extern int
00837 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00838                           unsigned int ctype)
00839 {
00840   if (code < 128)
00841     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00842   else {
00843     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00844       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00845     }
00846   }
00847 
00848   return FALSE;
00849 }
00850 
00851 extern int
00852 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
00853                            const UChar* sascii /* ascii */, int n)
00854 {
00855   int x, c;
00856 
00857   while (n-- > 0) {
00858     if (p >= end) return (int )(*sascii);
00859 
00860     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00861     x = *sascii - c;
00862     if (x) return x;
00863 
00864     sascii++;
00865     p += enclen(enc, p, end);
00866   }
00867   return 0;
00868 }
00869 
00870 /* Property management */
00871 static int
00872 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
00873 {
00874   size_t size;
00875   const OnigCodePoint **list = *plist;
00876 
00877   size = sizeof(OnigCodePoint*) * new_size;
00878   if (IS_NULL(list)) {
00879     list = (const OnigCodePoint** )xmalloc(size);
00880   }
00881   else {
00882     list = (const OnigCodePoint** )xrealloc((void* )list, size);
00883   }
00884 
00885   if (IS_NULL(list)) return ONIGERR_MEMORY;
00886 
00887   *plist = list;
00888   *psize = new_size;
00889 
00890   return 0;
00891 }
00892 
00893 extern int
00894 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
00895      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
00896      int *psize)
00897 {
00898 #define PROP_INIT_SIZE     16
00899 
00900   int r;
00901 
00902   if (*psize <= *pnum) {
00903     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
00904     r = resize_property_list(new_size, plist, psize);
00905     if (r != 0) return r;
00906   }
00907 
00908   (*plist)[*pnum] = prop;
00909 
00910   if (ONIG_IS_NULL(*table)) {
00911     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
00912     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
00913   }
00914 
00915   *pnum = *pnum + 1;
00916   onig_st_insert_strend(*table, name, name + strlen((char* )name),
00917                         (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
00918   return 0;
00919 }
00920 
00921 extern int
00922 onigenc_property_list_init(int (*f)(void))
00923 {
00924   int r;
00925 
00926   THREAD_ATOMIC_START;
00927 
00928   r = f();
00929 
00930   THREAD_ATOMIC_END;
00931   return r;
00932 }
00933 

Generated on Wed Sep 8 2010 09:56:11 for Ruby by  doxygen 1.7.1