Main Page   Modules   Namespace List   Data Structures   File List   Data Fields   Examples  

SgmlParser.h

00001 /*
00002  * libsgml -- SGML state machine parsing library.
00003  *                                                                  
00004  * Copyright (c) 2002 Uninformed Research (http://www.uninformed.org)
00005  * All rights reserved.
00006  *
00007  * skape
00008  * mmiller@hick.org
00009  */
00020 #ifndef _LIBHTTP_SGMLPARSER_H
00021 #define _LIBHTTP_SGMLPARSER_H
00022 
00023 #ifdef __cplusplus
00024 extern "C" {
00025 #endif
00026 
00027 enum SgmlExtensionType {
00028         SGML_EXTENSION_TYPE_XML = 0,
00029         SGML_EXTENSION_TYPE_HTML,
00030 
00031         SGML_EXTENSION_TYPE_CUSTOM = 255
00032 };
00033 
00034 struct _sgml_parser;
00035 
00041 typedef struct _sgml_handlers {
00042 
00046         void (*preparse)(struct _sgml_parser *parser, void *userContext);
00050         void (*postparse)(struct _sgml_parser *parser, void *userContext);
00051 
00055         void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00059         void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00060 
00064         void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00065 
00069         void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00070 
00074         void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00075 
00076 } SGML_HANDLERS;
00077 
00082 #define SGML_STC_LETTER_TYPE_SPECIFIC   0x00
00083 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00084 #define SGML_STC_LETTER_TYPE_NOT        0x02
00085 #define SGML_STC_LETTER_TYPE_NOTWS      0x03
00086 #define SGML_STC_LETTER_TYPE_ANY        0x04
00087 
00088 #define SGML_STC_FLAG_DIVERT           (1 << 0)
00089 #define SGML_STC_FLAG_UPDATE_STATE     (1 << 1)
00090 #define SGML_STC_FLAG_INCL_IN_BUFFER   (1 << 2)
00091 
00092 #define SGML_PARSER_STATE_INTEXT                      (1 << 0)
00093 
00094 #define SGML_PARSER_STATE_INELEMENT                   (1 << 1)
00095 #define SGML_PARSER_STATE_INELEMENTNAME               (1 << 2)
00096 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL        (1 << 3)
00097 #define SGML_PARSER_STATE_INELEMENTCLOSURE            (1 << 4)
00098         
00099 #define SGML_PARSER_STATE_INATTRIBUTENAME             (1 << 5)
00100 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL      (1 << 6)
00101 
00102 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS         (1 << 10)
00103 
00104 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S          (1 << 12) 
00105 
00106 #define SGML_PARSER_STATE_INCOMMENT                   (1 << 15)
00107 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION     (1 << 16)
00108 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1           (1 << 17)
00109 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2           (1 << 18)
00110 
00111 typedef struct _sgml_state_table_rule {
00112 
00113         unsigned long stateIndexId;
00114 
00115         unsigned char letterType;
00116         unsigned char letter;
00117 
00118         unsigned long flags;
00119 
00120         unsigned long divertTableId;
00121 
00122         unsigned long isState;
00123         unsigned long notState;
00124 
00125         unsigned long addState; 
00126         unsigned long remState;
00127 
00128 } SGML_STATE_TABLE_RULE;
00129 
00130 typedef struct _sgml_state_table {
00131 
00132         unsigned long          stateIndexId;
00133 
00134         SGML_STATE_TABLE_RULE  *rules;
00135         unsigned long          ruleSize;
00136 
00137 } SGML_STATE_TABLE;
00138 
00139 typedef struct _sgml_parser {
00140 
00141         enum SgmlExtensionType type;
00142 
00143         SGML_HANDLERS          handlers;        
00144 
00145         SGML_STATE_TABLE       *stateTable;
00146         unsigned long          stateTableElements;
00147         SGML_STATE_TABLE_RULE  *stateTableRules;
00148         unsigned long          stateTableRuleElements;
00149 
00150         struct {
00151 
00152                 SGML_STATE_TABLE   *currentState;
00153 
00154                 char               *lastElementName;
00155                 char               *lastAttributeName;
00156 
00157                 char               *currentBuffer;
00158                 unsigned long      currentBufferSize;
00159 
00160                 unsigned long      state;
00161 
00162                 void               *extensionContext;
00163                 void               *userContext;
00164 
00165                 void               (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00166                 void               (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00167 
00168                 void               (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00169                 void               (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00170 
00171         } internal;
00172 
00173 } SGML_PARSER;
00174 
00186 SGML_PARSER *sgmlParserNew();
00205 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00212 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00213 
00222 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00230 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00231 
00244 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00257 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00258 
00259 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00260 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00261 
00266 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00267 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00268 
00269 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00270 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00271 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00272 
00273 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00274 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00275 
00276 #ifdef __cplusplus
00277 }
00278 #endif
00279 
00280 #endif

Generated on Mon Dec 9 01:20:58 2002 for libsgml by doxygen1.2.15