Main Page   Modules   Data Structures   File List   Data Fields   Examples  

SgmlParser.h

00001 /*
00002  * libsgml -- SGML state machine parsing library.
00003  *                                                                  
00004  * Copyright (c) 2002 Uninformed Research (http://www.uninformed.org)
00005  * All rights reserved.
00006  *
00007  * skape
00008  * mmiller@hick.org
00009  */
00019 #ifndef _LIBHTTP_SGMLPARSER_H
00020 #define _LIBHTTP_SGMLPARSER_H
00021 
00022 enum SgmlExtensionType {
00023         SGML_EXTENSION_TYPE_XML = 0,
00024         SGML_EXTENSION_TYPE_HTML,
00025 
00026         SGML_EXTENSION_TYPE_CUSTOM = 255
00027 };
00028 
00029 struct _sgml_parser;
00030 
00036 typedef struct _sgml_handlers {
00037 
00041         void (*preparse)(struct _sgml_parser *parser, void *userContext);
00045         void (*postparse)(struct _sgml_parser *parser, void *userContext);
00046 
00050         void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00054         void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00055 
00059         void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00060 
00064         void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00065 
00069         void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00070 
00071 } SGML_HANDLERS;
00072 
00077 #define SGML_STC_LETTER_TYPE_SPECIFIC   0x00
00078 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00079 #define SGML_STC_LETTER_TYPE_NOT        0x02
00080 #define SGML_STC_LETTER_TYPE_NOTWS      0x03
00081 #define SGML_STC_LETTER_TYPE_ANY        0x04
00082 
00083 #define SGML_STC_FLAG_DIVERT           (1 << 0)
00084 #define SGML_STC_FLAG_UPDATE_STATE     (1 << 1)
00085 #define SGML_STC_FLAG_INCL_IN_BUFFER   (1 << 2)
00086 
00087 #define SGML_PARSER_STATE_INTEXT                      (1 << 0)
00088 
00089 #define SGML_PARSER_STATE_INELEMENT                   (1 << 1)
00090 #define SGML_PARSER_STATE_INELEMENTNAME               (1 << 2)
00091 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL        (1 << 3)
00092 #define SGML_PARSER_STATE_INELEMENTCLOSURE            (1 << 4)
00093         
00094 #define SGML_PARSER_STATE_INATTRIBUTENAME             (1 << 5)
00095 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL      (1 << 6)
00096 
00097 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS         (1 << 10)
00098 
00099 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S          (1 << 12) 
00100 
00101 #define SGML_PARSER_STATE_INCOMMENT                   (1 << 15)
00102 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION     (1 << 16)
00103 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1           (1 << 17)
00104 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2           (1 << 18)
00105 
00106 typedef struct _sgml_state_table_rule {
00107 
00108         unsigned long stateIndexId;
00109 
00110         unsigned char letterType;
00111         unsigned char letter;
00112 
00113         unsigned long flags;
00114 
00115         unsigned long divertTableId;
00116 
00117         unsigned long isState;
00118         unsigned long notState;
00119 
00120         unsigned long addState; 
00121         unsigned long remState;
00122 
00123 } SGML_STATE_TABLE_RULE;
00124 
00125 typedef struct _sgml_state_table {
00126 
00127         unsigned long          stateIndexId;
00128 
00129         SGML_STATE_TABLE_RULE  *rules;
00130         unsigned long          ruleSize;
00131 
00132 } SGML_STATE_TABLE;
00133 
00134 typedef struct _sgml_parser {
00135 
00136         enum SgmlExtensionType type;
00137 
00138         SGML_HANDLERS          handlers;        
00139 
00140         SGML_STATE_TABLE       *stateTable;
00141         unsigned long          stateTableElements;
00142         SGML_STATE_TABLE_RULE  *stateTableRules;
00143         unsigned long          stateTableRuleElements;
00144 
00145         struct {
00146 
00147                 SGML_STATE_TABLE   *currentState;
00148 
00149                 char               *lastElementName;
00150                 char               *lastAttributeName;
00151 
00152                 char               *currentBuffer;
00153                 unsigned long      currentBufferSize;
00154 
00155                 unsigned long      state;
00156 
00157                 void               *extensionContext;
00158                 void               *userContext;
00159 
00160                 void               (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00161                 void               (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00162 
00163                 void               (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00164                 void               (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00165 
00166         } internal;
00167 
00168 } SGML_PARSER;
00169 
00181 SGML_PARSER *sgmlParserNew();
00200 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00207 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00208 
00217 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00225 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00226 
00239 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00252 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00253 
00254 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00255 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00256 
00261 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00262 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00263 
00264 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00265 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00266 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00267 
00268 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00269 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00270 
00271 #endif

Generated on Sun Dec 1 04:59:01 2002 for libsgml by doxygen1.3-rc1