00001
00002
00003
00004
00005
00006
00007
00008
00009
00020 #ifndef _LIBHTTP_SGMLPARSER_H
00021 #define _LIBHTTP_SGMLPARSER_H
00022
00023 #ifdef __cplusplus
00024 extern "C" {
00025 #endif
00026
00027 enum SgmlExtensionType {
00028 SGML_EXTENSION_TYPE_XML = 0,
00029 SGML_EXTENSION_TYPE_HTML,
00030
00031 SGML_EXTENSION_TYPE_CUSTOM = 255
00032 };
00033
00034 struct _sgml_parser;
00035
00041 typedef struct _sgml_handlers {
00042
00046 void (*preparse)(struct _sgml_parser *parser, void *userContext);
00050 void (*postparse)(struct _sgml_parser *parser, void *userContext);
00051
00055 void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00059 void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00060
00064 void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00065
00069 void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00070
00074 void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00075
00076 } SGML_HANDLERS;
00077
00082 #define SGML_STC_LETTER_TYPE_SPECIFIC 0x00
00083 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00084 #define SGML_STC_LETTER_TYPE_NOT 0x02
00085 #define SGML_STC_LETTER_TYPE_NOTWS 0x03
00086 #define SGML_STC_LETTER_TYPE_ANY 0x04
00087
00088 #define SGML_STC_FLAG_DIVERT (1 << 0)
00089 #define SGML_STC_FLAG_UPDATE_STATE (1 << 1)
00090 #define SGML_STC_FLAG_INCL_IN_BUFFER (1 << 2)
00091
00092 #define SGML_PARSER_STATE_INTEXT (1 << 0)
00093
00094 #define SGML_PARSER_STATE_INELEMENT (1 << 1)
00095 #define SGML_PARSER_STATE_INELEMENTNAME (1 << 2)
00096 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL (1 << 3)
00097 #define SGML_PARSER_STATE_INELEMENTCLOSURE (1 << 4)
00098
00099 #define SGML_PARSER_STATE_INATTRIBUTENAME (1 << 5)
00100 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL (1 << 6)
00101
00102 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS (1 << 10)
00103
00104 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S (1 << 12)
00105
00106 #define SGML_PARSER_STATE_INCOMMENT (1 << 15)
00107 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION (1 << 16)
00108 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1 (1 << 17)
00109 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2 (1 << 18)
00110
00111 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_DBLQUOTE (1 << 19)
00112
00113 typedef struct _sgml_state_table_rule {
00114
00115 unsigned long stateIndexId;
00116
00117 unsigned char letterType;
00118 unsigned char letter;
00119
00120 unsigned long flags;
00121
00122 unsigned long divertTableId;
00123
00124 unsigned long isState;
00125 unsigned long notState;
00126
00127 unsigned long addState;
00128 unsigned long remState;
00129
00130 } SGML_STATE_TABLE_RULE;
00131
00132 typedef struct _sgml_state_table {
00133
00134 unsigned long stateIndexId;
00135
00136 SGML_STATE_TABLE_RULE *rules;
00137 unsigned long ruleSize;
00138
00139 } SGML_STATE_TABLE;
00140
00141 typedef struct _sgml_parser {
00142
00143 enum SgmlExtensionType type;
00144
00145 SGML_HANDLERS handlers;
00146
00147 SGML_STATE_TABLE *stateTable;
00148 unsigned long stateTableElements;
00149 SGML_STATE_TABLE_RULE *stateTableRules;
00150 unsigned long stateTableRuleElements;
00151
00152 struct {
00153
00154 SGML_STATE_TABLE *currentState;
00155
00156 char *lastElementName;
00157 char *lastAttributeName;
00158
00159 char *currentBuffer;
00160 unsigned long currentBufferSize;
00161
00162 unsigned long state;
00163
00164 void *extensionContext;
00165 void *userContext;
00166
00167 void (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00168 void (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00169
00170 void (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00171 void (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00172
00173 } internal;
00174
00175 } SGML_PARSER;
00176
00188 SGML_PARSER *sgmlParserNew();
00207 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00214 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00215
00224 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00232 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00233
00246 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00259 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00260
00261 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00262 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00263
00268 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00269 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00270
00271 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00272 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00273 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00274
00275 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00276 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00277
00278 #ifdef __cplusplus
00279 }
00280 #endif
00281
00282 #endif