00001
00002
00003
00004
00005
00006
00007
00008
00009
00020 #ifndef _LIBHTTP_SGMLPARSER_H
00021 #define _LIBHTTP_SGMLPARSER_H
00022
00023 #ifdef __cplusplus
00024 extern "C" {
00025 #endif
00026
00027 enum SgmlExtensionType {
00028 SGML_EXTENSION_TYPE_XML = 0,
00029 SGML_EXTENSION_TYPE_HTML,
00030
00031 SGML_EXTENSION_TYPE_CUSTOM = 255
00032 };
00033
00034 struct _sgml_parser;
00035
00041 typedef struct _sgml_handlers {
00042
00046 void (*preparse)(struct _sgml_parser *parser, void *userContext);
00050 void (*postparse)(struct _sgml_parser *parser, void *userContext);
00051
00055 void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00059 void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00060
00064 void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00065
00069 void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00070
00074 void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00075
00076 } SGML_HANDLERS;
00077
00082 #define SGML_STC_LETTER_TYPE_SPECIFIC 0x00
00083 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00084 #define SGML_STC_LETTER_TYPE_NOT 0x02
00085 #define SGML_STC_LETTER_TYPE_NOTWS 0x03
00086 #define SGML_STC_LETTER_TYPE_ANY 0x04
00087
00088 #define SGML_STC_FLAG_DIVERT (1 << 0)
00089 #define SGML_STC_FLAG_UPDATE_STATE (1 << 1)
00090 #define SGML_STC_FLAG_INCL_IN_BUFFER (1 << 2)
00091
00092 #define SGML_PARSER_STATE_INTEXT (1 << 0)
00093
00094 #define SGML_PARSER_STATE_INELEMENT (1 << 1)
00095 #define SGML_PARSER_STATE_INELEMENTNAME (1 << 2)
00096 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL (1 << 3)
00097 #define SGML_PARSER_STATE_INELEMENTCLOSURE (1 << 4)
00098
00099 #define SGML_PARSER_STATE_INATTRIBUTENAME (1 << 5)
00100 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL (1 << 6)
00101
00102 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS (1 << 10)
00103
00104 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S (1 << 12)
00105
00106 #define SGML_PARSER_STATE_INCOMMENT (1 << 15)
00107 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION (1 << 16)
00108 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1 (1 << 17)
00109 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2 (1 << 18)
00110
00111 typedef struct _sgml_state_table_rule {
00112
00113 unsigned long stateIndexId;
00114
00115 unsigned char letterType;
00116 unsigned char letter;
00117
00118 unsigned long flags;
00119
00120 unsigned long divertTableId;
00121
00122 unsigned long isState;
00123 unsigned long notState;
00124
00125 unsigned long addState;
00126 unsigned long remState;
00127
00128 } SGML_STATE_TABLE_RULE;
00129
00130 typedef struct _sgml_state_table {
00131
00132 unsigned long stateIndexId;
00133
00134 SGML_STATE_TABLE_RULE *rules;
00135 unsigned long ruleSize;
00136
00137 } SGML_STATE_TABLE;
00138
00139 typedef struct _sgml_parser {
00140
00141 enum SgmlExtensionType type;
00142
00143 SGML_HANDLERS handlers;
00144
00145 SGML_STATE_TABLE *stateTable;
00146 unsigned long stateTableElements;
00147 SGML_STATE_TABLE_RULE *stateTableRules;
00148 unsigned long stateTableRuleElements;
00149
00150 struct {
00151
00152 SGML_STATE_TABLE *currentState;
00153
00154 char *lastElementName;
00155 char *lastAttributeName;
00156
00157 char *currentBuffer;
00158 unsigned long currentBufferSize;
00159
00160 unsigned long state;
00161
00162 void *extensionContext;
00163 void *userContext;
00164
00165 void (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00166 void (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00167
00168 void (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00169 void (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00170
00171 } internal;
00172
00173 } SGML_PARSER;
00174
00186 SGML_PARSER *sgmlParserNew();
00205 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00212 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00213
00222 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00230 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00231
00244 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00257 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00258
00259 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00260 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00261
00266 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00267 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00268
00269 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00270 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00271 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00272
00273 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00274 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00275
00276 #ifdef __cplusplus
00277 }
00278 #endif
00279
00280 #endif