00001
00002
00003
00004
00005
00006
00007
00008
00009
00019 #ifndef _LIBHTTP_SGMLPARSER_H
00020 #define _LIBHTTP_SGMLPARSER_H
00021
00022 enum SgmlExtensionType {
00023 SGML_EXTENSION_TYPE_XML = 0,
00024 SGML_EXTENSION_TYPE_HTML,
00025
00026 SGML_EXTENSION_TYPE_CUSTOM = 255
00027 };
00028
00029 struct _sgml_parser;
00030
00036 typedef struct _sgml_handlers {
00037
00041 void (*preparse)(struct _sgml_parser *parser, void *userContext);
00045 void (*postparse)(struct _sgml_parser *parser, void *userContext);
00046
00050 void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00054 void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00055
00059 void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00060
00064 void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00065
00069 void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00070
00071 } SGML_HANDLERS;
00072
00077 #define SGML_STC_LETTER_TYPE_SPECIFIC 0x00
00078 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00079 #define SGML_STC_LETTER_TYPE_NOT 0x02
00080 #define SGML_STC_LETTER_TYPE_NOTWS 0x03
00081 #define SGML_STC_LETTER_TYPE_ANY 0x04
00082
00083 #define SGML_STC_FLAG_DIVERT (1 << 0)
00084 #define SGML_STC_FLAG_UPDATE_STATE (1 << 1)
00085 #define SGML_STC_FLAG_INCL_IN_BUFFER (1 << 2)
00086
00087 #define SGML_PARSER_STATE_INTEXT (1 << 0)
00088
00089 #define SGML_PARSER_STATE_INELEMENT (1 << 1)
00090 #define SGML_PARSER_STATE_INELEMENTNAME (1 << 2)
00091 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL (1 << 3)
00092 #define SGML_PARSER_STATE_INELEMENTCLOSURE (1 << 4)
00093
00094 #define SGML_PARSER_STATE_INATTRIBUTENAME (1 << 5)
00095 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL (1 << 6)
00096
00097 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS (1 << 10)
00098
00099 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S (1 << 12)
00100
00101 #define SGML_PARSER_STATE_INCOMMENT (1 << 15)
00102 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION (1 << 16)
00103 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1 (1 << 17)
00104 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2 (1 << 18)
00105
00106 typedef struct _sgml_state_table_rule {
00107
00108 unsigned long stateIndexId;
00109
00110 unsigned char letterType;
00111 unsigned char letter;
00112
00113 unsigned long flags;
00114
00115 unsigned long divertTableId;
00116
00117 unsigned long isState;
00118 unsigned long notState;
00119
00120 unsigned long addState;
00121 unsigned long remState;
00122
00123 } SGML_STATE_TABLE_RULE;
00124
00125 typedef struct _sgml_state_table {
00126
00127 unsigned long stateIndexId;
00128
00129 SGML_STATE_TABLE_RULE *rules;
00130 unsigned long ruleSize;
00131
00132 } SGML_STATE_TABLE;
00133
00134 typedef struct _sgml_parser {
00135
00136 enum SgmlExtensionType type;
00137
00138 SGML_HANDLERS handlers;
00139
00140 SGML_STATE_TABLE *stateTable;
00141 unsigned long stateTableElements;
00142 SGML_STATE_TABLE_RULE *stateTableRules;
00143 unsigned long stateTableRuleElements;
00144
00145 struct {
00146
00147 SGML_STATE_TABLE *currentState;
00148
00149 char *lastElementName;
00150 char *lastAttributeName;
00151
00152 char *currentBuffer;
00153 unsigned long currentBufferSize;
00154
00155 unsigned long state;
00156
00157 void *extensionContext;
00158 void *userContext;
00159
00160 void (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00161 void (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00162
00163 void (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00164 void (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00165
00166 } internal;
00167
00168 } SGML_PARSER;
00169
00181 SGML_PARSER *sgmlParserNew();
00200 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00207 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00208
00217 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00225 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00226
00239 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00252 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00253
00254 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00255 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00256
00261 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00262 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00263
00264 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00265 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00266 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00267
00268 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00269 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00270
00271 #endif