| Gnome XML Library Reference Manual | |||
|---|---|---|---|
| <<< Previous Page | Home | Up | Next Page >>> | 
| struct htmlElemDesc {
    const char *name;	/* The tag name */
    char startTag;      /* Whether the start tag can be implied */
    char endTag;        /* Whether the end tag can be implied */
    char saveEndTag;    /* Whether the end tag should be saved */
    char empty;         /* Is this an empty element ? */
    char depr;          /* Is this a deprecated element ? */
    char dtd;           /* 1: only in Loose DTD, 2: only Frameset one */
    char isinline;      /* is this a block 0 or inline 1 element */
    const char *desc;   /* the description */
/* NRK Jan.2003
 * New fields encapsulating HTML structure
 *
 * Bugs:
 *	This is a very limited representation.  It fails to tell us when
 *	an element *requires* subelements (we only have whether they're
 *	allowed or not), and it doesn't tell us where CDATA and PCDATA
 *	are allowed.  Some element relationships are not fully represented:
 *	these are flagged with the word MODIFIER
 */
    const char** subelts;		/* allowed sub-elements of this element */
    const char* defaultsubelt;	/* subelement for suggested auto-repair
					   if necessary or NULL */
    const char** attrs_opt;		/* Optional Attributes */
    const char** attrs_depr;		/* Additional deprecated attributes */
    const char** attrs_req;		/* Required attributes */
}; | 
| struct htmlEntityDesc {
    unsigned int value;	/* the UNICODE value for the character */
    const char *name;	/* The entity name */
    const char *desc;   /* the description */
}; | 
| const htmlElemDesc* htmlTagLookup (const xmlChar *tag); | 
Lookup the HTML tag in the ElementTable
| const htmlEntityDesc* htmlEntityLookup (const xmlChar *name); | 
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
| const htmlEntityDesc* htmlEntityValueLookup (unsigned int value); | 
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
| int htmlIsAutoClosed (htmlDocPtr doc, htmlNodePtr elem); | 
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child
| int htmlAutoCloseTag (htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem); | 
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
| const htmlEntityDesc* htmlParseEntityRef (htmlParserCtxtPtr ctxt, xmlChar **str); | 
parse an HTML ENTITY references
[68] EntityRef ::= '&' Name ';'
| int htmlParseCharRef (htmlParserCtxtPtr ctxt); | 
parse Reference declarations
[66] CharRef ::= '&#' [0-9]+ ';' |
                 '&
| void htmlParseElement (htmlParserCtxtPtr ctxt); | 
parse an HTML element, this is highly recursive
[39] element ::= EmptyElemTag | STag content ETag
[41] Attribute ::= Name Eq AttValue
| int htmlParseDocument (htmlParserCtxtPtr ctxt); | 
parse an HTML document (and build a tree if using the standard SAX interface).
| htmlDocPtr htmlSAXParseDoc (xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData); | 
Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.
| htmlDocPtr htmlParseDoc (xmlChar *cur, const char *encoding); | 
parse an HTML in-memory document and build a tree.
| htmlDocPtr htmlSAXParseFile (const char *filename, const char *encoding, htmlSAXHandlerPtr sax, void *userData); | 
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
| htmlDocPtr htmlParseFile (const char *filename, const char *encoding); | 
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
| int         UTF8ToHtml                      (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen); | 
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
| int         htmlEncodeEntities              (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen,
                                             int quoteChar); | 
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
| int htmlIsScriptAttribute (const xmlChar *name); | 
Check if an attribute is of content type Script
| int htmlHandleOmittedElem (int val); | 
Set and return the previous value for handling HTML omitted tags.
| void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); | 
Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
| htmlParserCtxtPtr htmlCreatePushParserCtxt (htmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc); | 
Create a parser context for using the HTML parser in push mode The value of filename is used for fetching external entities and error/warning reports.
| int htmlParseChunk (htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate); | 
Parse a Chunk of memory