00001 #include <psych.h>
00002
00003 VALUE cPsychParser;
00004 VALUE ePsychSyntaxError;
00005
00006 static ID id_read;
00007 static ID id_empty;
00008 static ID id_start_stream;
00009 static ID id_end_stream;
00010 static ID id_start_document;
00011 static ID id_end_document;
00012 static ID id_alias;
00013 static ID id_scalar;
00014 static ID id_start_sequence;
00015 static ID id_end_sequence;
00016 static ID id_start_mapping;
00017 static ID id_end_mapping;
00018
00019 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
00020 do { \
00021 rb_enc_associate_index(_str, _yaml_enc); \
00022 if(_internal_enc) \
00023 _str = rb_str_export_to_enc(_str, _internal_enc); \
00024 } while (0)
00025
00026 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
00027 {
00028 VALUE io = (VALUE)data;
00029 VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
00030
00031 *read = 0;
00032
00033 if(! NIL_P(string)) {
00034 void * str = (void *)StringValuePtr(string);
00035 *read = (size_t)RSTRING_LEN(string);
00036 memcpy(buf, str, *read);
00037 }
00038
00039 return 1;
00040 }
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051 static VALUE parse(VALUE self, VALUE yaml)
00052 {
00053 yaml_parser_t parser;
00054 yaml_event_t event;
00055 int done = 0;
00056 #ifdef HAVE_RUBY_ENCODING_H
00057 int encoding = rb_enc_find_index("ASCII-8BIT");
00058 rb_encoding * internal_enc;
00059 #endif
00060 VALUE handler = rb_iv_get(self, "@handler");
00061
00062
00063 yaml_parser_initialize(&parser);
00064
00065 if(rb_respond_to(yaml, id_read)) {
00066 yaml_parser_set_input(&parser, io_reader, (void *)yaml);
00067 } else {
00068 StringValue(yaml);
00069 yaml_parser_set_input_string(
00070 &parser,
00071 (const unsigned char *)RSTRING_PTR(yaml),
00072 (size_t)RSTRING_LEN(yaml)
00073 );
00074 }
00075
00076 while(!done) {
00077 if(!yaml_parser_parse(&parser, &event)) {
00078 size_t line = parser.mark.line;
00079 size_t column = parser.mark.column;
00080
00081 yaml_parser_delete(&parser);
00082 rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
00083 (int)line, (int)column);
00084 }
00085
00086 switch(event.type) {
00087 case YAML_STREAM_START_EVENT:
00088
00089 #ifdef HAVE_RUBY_ENCODING_H
00090 switch(event.data.stream_start.encoding) {
00091 case YAML_ANY_ENCODING:
00092 break;
00093 case YAML_UTF8_ENCODING:
00094 encoding = rb_enc_find_index("UTF-8");
00095 break;
00096 case YAML_UTF16LE_ENCODING:
00097 encoding = rb_enc_find_index("UTF-16LE");
00098 break;
00099 case YAML_UTF16BE_ENCODING:
00100 encoding = rb_enc_find_index("UTF-16BE");
00101 break;
00102 default:
00103 break;
00104 }
00105 internal_enc = rb_default_internal_encoding();
00106 #endif
00107
00108 rb_funcall(handler, id_start_stream, 1,
00109 INT2NUM((long)event.data.stream_start.encoding)
00110 );
00111 break;
00112 case YAML_DOCUMENT_START_EVENT:
00113 {
00114
00115 VALUE tag_directives = rb_ary_new();
00116
00117 VALUE version = event.data.document_start.version_directive ?
00118 rb_ary_new3(
00119 (long)2,
00120 INT2NUM((long)event.data.document_start.version_directive->major),
00121 INT2NUM((long)event.data.document_start.version_directive->minor)
00122 ) : rb_ary_new();
00123
00124 if(event.data.document_start.tag_directives.start) {
00125 yaml_tag_directive_t *start =
00126 event.data.document_start.tag_directives.start;
00127 yaml_tag_directive_t *end =
00128 event.data.document_start.tag_directives.end;
00129 for(; start != end; start++) {
00130 VALUE handle = Qnil;
00131 VALUE prefix = Qnil;
00132 if(start->handle) {
00133 handle = rb_str_new2((const char *)start->handle);
00134 #ifdef HAVE_RUBY_ENCODING_H
00135 PSYCH_TRANSCODE(handle, encoding, internal_enc);
00136 #endif
00137 }
00138
00139 if(start->prefix) {
00140 prefix = rb_str_new2((const char *)start->prefix);
00141 #ifdef HAVE_RUBY_ENCODING_H
00142 PSYCH_TRANSCODE(prefix, encoding, internal_enc);
00143 #endif
00144 }
00145
00146 rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
00147 }
00148 }
00149 rb_funcall(handler, id_start_document, 3,
00150 version, tag_directives,
00151 event.data.document_start.implicit == 1 ? Qtrue : Qfalse
00152 );
00153 }
00154 break;
00155 case YAML_DOCUMENT_END_EVENT:
00156 rb_funcall(handler, id_end_document, 1,
00157 event.data.document_end.implicit == 1 ? Qtrue : Qfalse
00158 );
00159 break;
00160 case YAML_ALIAS_EVENT:
00161 {
00162 VALUE alias = Qnil;
00163 if(event.data.alias.anchor) {
00164 alias = rb_str_new2((const char *)event.data.alias.anchor);
00165 #ifdef HAVE_RUBY_ENCODING_H
00166 PSYCH_TRANSCODE(alias, encoding, internal_enc);
00167 #endif
00168 }
00169
00170 rb_funcall(handler, id_alias, 1, alias);
00171 }
00172 break;
00173 case YAML_SCALAR_EVENT:
00174 {
00175 VALUE anchor = Qnil;
00176 VALUE tag = Qnil;
00177 VALUE plain_implicit, quoted_implicit, style;
00178 VALUE val = rb_str_new(
00179 (const char *)event.data.scalar.value,
00180 (long)event.data.scalar.length
00181 );
00182
00183 #ifdef HAVE_RUBY_ENCODING_H
00184 PSYCH_TRANSCODE(val, encoding, internal_enc);
00185 #endif
00186
00187 if(event.data.scalar.anchor) {
00188 anchor = rb_str_new2((const char *)event.data.scalar.anchor);
00189 #ifdef HAVE_RUBY_ENCODING_H
00190 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00191 #endif
00192 }
00193
00194 if(event.data.scalar.tag) {
00195 tag = rb_str_new2((const char *)event.data.scalar.tag);
00196 #ifdef HAVE_RUBY_ENCODING_H
00197 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00198 #endif
00199 }
00200
00201 plain_implicit =
00202 event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
00203
00204 quoted_implicit =
00205 event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
00206
00207 style = INT2NUM((long)event.data.scalar.style);
00208
00209 rb_funcall(handler, id_scalar, 6,
00210 val, anchor, tag, plain_implicit, quoted_implicit, style);
00211 }
00212 break;
00213 case YAML_SEQUENCE_START_EVENT:
00214 {
00215 VALUE anchor = Qnil;
00216 VALUE tag = Qnil;
00217 VALUE implicit, style;
00218 if(event.data.sequence_start.anchor) {
00219 anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
00220 #ifdef HAVE_RUBY_ENCODING_H
00221 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00222 #endif
00223 }
00224
00225 tag = Qnil;
00226 if(event.data.sequence_start.tag) {
00227 tag = rb_str_new2((const char *)event.data.sequence_start.tag);
00228 #ifdef HAVE_RUBY_ENCODING_H
00229 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00230 #endif
00231 }
00232
00233 implicit =
00234 event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
00235
00236 style = INT2NUM((long)event.data.sequence_start.style);
00237
00238 rb_funcall(handler, id_start_sequence, 4,
00239 anchor, tag, implicit, style);
00240 }
00241 break;
00242 case YAML_SEQUENCE_END_EVENT:
00243 rb_funcall(handler, id_end_sequence, 0);
00244 break;
00245 case YAML_MAPPING_START_EVENT:
00246 {
00247 VALUE anchor = Qnil;
00248 VALUE tag = Qnil;
00249 VALUE implicit, style;
00250 if(event.data.mapping_start.anchor) {
00251 anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
00252 #ifdef HAVE_RUBY_ENCODING_H
00253 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00254 #endif
00255 }
00256
00257 if(event.data.mapping_start.tag) {
00258 tag = rb_str_new2((const char *)event.data.mapping_start.tag);
00259 #ifdef HAVE_RUBY_ENCODING_H
00260 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00261 #endif
00262 }
00263
00264 implicit =
00265 event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
00266
00267 style = INT2NUM((long)event.data.mapping_start.style);
00268
00269 rb_funcall(handler, id_start_mapping, 4,
00270 anchor, tag, implicit, style);
00271 }
00272 break;
00273 case YAML_MAPPING_END_EVENT:
00274 rb_funcall(handler, id_end_mapping, 0);
00275 break;
00276 case YAML_NO_EVENT:
00277 rb_funcall(handler, id_empty, 0);
00278 break;
00279 case YAML_STREAM_END_EVENT:
00280 rb_funcall(handler, id_end_stream, 0);
00281 done = 1;
00282 break;
00283 }
00284 }
00285
00286 return self;
00287 }
00288
00289 void Init_psych_parser()
00290 {
00291 #if 0
00292 mPsych = rb_define_module("Psych");
00293 #endif
00294
00295 cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
00296
00297
00298 rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
00299
00300
00301 rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
00302
00303
00304 rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
00305
00306
00307 rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
00308
00309 ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
00310
00311 rb_define_method(cPsychParser, "parse", parse, 1);
00312
00313 id_read = rb_intern("read");
00314 id_empty = rb_intern("empty");
00315 id_start_stream = rb_intern("start_stream");
00316 id_end_stream = rb_intern("end_stream");
00317 id_start_document = rb_intern("start_document");
00318 id_end_document = rb_intern("end_document");
00319 id_alias = rb_intern("alias");
00320 id_scalar = rb_intern("scalar");
00321 id_start_sequence = rb_intern("start_sequence");
00322 id_end_sequence = rb_intern("end_sequence");
00323 id_start_mapping = rb_intern("start_mapping");
00324 id_end_mapping = rb_intern("end_mapping");
00325 }
00326
00327