Lexer.h (12074B)
1 /* 2 =========================================================================== 3 4 Doom 3 BFG Edition GPL Source Code 5 Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. 6 7 This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code"). 8 9 Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation, either version 3 of the License, or 12 (at your option) any later version. 13 14 Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>. 21 22 In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below. 23 24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA. 25 26 =========================================================================== 27 */ 28 29 #ifndef __LEXER_H__ 30 #define __LEXER_H__ 31 32 /* 33 =============================================================================== 34 35 Lexicographical parser 36 37 Does not use memory allocation during parsing. The lexer uses no 38 memory allocation if a source is loaded with LoadMemory(). 39 However, idToken may still allocate memory for large strings. 40 41 A number directly following the escape character '\' in a string is 42 assumed to be in decimal format instead of octal. Binary numbers of 43 the form 0b.. or 0B.. can also be used. 44 45 =============================================================================== 46 */ 47 48 // lexer flags 49 typedef enum { 50 LEXFL_NOERRORS = BIT(0), // don't print any errors 51 LEXFL_NOWARNINGS = BIT(1), // don't print any warnings 52 LEXFL_NOFATALERRORS = BIT(2), // errors aren't fatal 53 LEXFL_NOSTRINGCONCAT = BIT(3), // multiple strings seperated by whitespaces are not concatenated 54 LEXFL_NOSTRINGESCAPECHARS = BIT(4), // no escape characters inside strings 55 LEXFL_NODOLLARPRECOMPILE = BIT(5), // don't use the $ sign for precompilation 56 LEXFL_NOBASEINCLUDES = BIT(6), // don't include files embraced with < > 57 LEXFL_ALLOWPATHNAMES = BIT(7), // allow path seperators in names 58 LEXFL_ALLOWNUMBERNAMES = BIT(8), // allow names to start with a number 59 LEXFL_ALLOWIPADDRESSES = BIT(9), // allow ip addresses to be parsed as numbers 60 LEXFL_ALLOWFLOATEXCEPTIONS = BIT(10), // allow float exceptions like 1.#INF or 1.#IND to be parsed 61 LEXFL_ALLOWMULTICHARLITERALS = BIT(11), // allow multi character literals 62 LEXFL_ALLOWBACKSLASHSTRINGCONCAT = BIT(12), // allow multiple strings seperated by '\' to be concatenated 63 LEXFL_ONLYSTRINGS = BIT(13) // parse as whitespace deliminated strings (quoted strings keep quotes) 64 } lexerFlags_t; 65 66 // punctuation ids 67 #define P_RSHIFT_ASSIGN 1 68 #define P_LSHIFT_ASSIGN 2 69 #define P_PARMS 3 70 #define P_PRECOMPMERGE 4 71 72 #define P_LOGIC_AND 5 73 #define P_LOGIC_OR 6 74 #define P_LOGIC_GEQ 7 75 #define P_LOGIC_LEQ 8 76 #define P_LOGIC_EQ 9 77 #define P_LOGIC_UNEQ 10 78 79 #define P_MUL_ASSIGN 11 80 #define P_DIV_ASSIGN 12 81 #define P_MOD_ASSIGN 13 82 #define P_ADD_ASSIGN 14 83 #define P_SUB_ASSIGN 15 84 #define P_INC 16 85 #define P_DEC 17 86 87 #define P_BIN_AND_ASSIGN 18 88 #define P_BIN_OR_ASSIGN 19 89 #define P_BIN_XOR_ASSIGN 20 90 #define P_RSHIFT 21 91 #define P_LSHIFT 22 92 93 #define P_POINTERREF 23 94 #define P_CPP1 24 95 #define P_CPP2 25 96 #define P_MUL 26 97 #define P_DIV 27 98 #define P_MOD 28 99 #define P_ADD 29 100 #define P_SUB 30 101 #define P_ASSIGN 31 102 103 #define P_BIN_AND 32 104 #define P_BIN_OR 33 105 #define P_BIN_XOR 34 106 #define P_BIN_NOT 35 107 108 #define P_LOGIC_NOT 36 109 #define P_LOGIC_GREATER 37 110 #define P_LOGIC_LESS 38 111 112 #define P_REF 39 113 #define P_COMMA 40 114 #define P_SEMICOLON 41 115 #define P_COLON 42 116 #define P_QUESTIONMARK 43 117 118 #define P_PARENTHESESOPEN 44 119 #define P_PARENTHESESCLOSE 45 120 #define P_BRACEOPEN 46 121 #define P_BRACECLOSE 47 122 #define P_SQBRACKETOPEN 48 123 #define P_SQBRACKETCLOSE 49 124 #define P_BACKSLASH 50 125 126 #define P_PRECOMP 51 127 #define P_DOLLAR 52 128 129 // punctuation 130 typedef struct punctuation_s 131 { 132 char *p; // punctuation character(s) 133 int n; // punctuation id 134 } punctuation_t; 135 136 137 class idLexer { 138 139 friend class idParser; 140 141 public: 142 // constructor 143 idLexer(); 144 idLexer( int flags ); 145 idLexer( const char *filename, int flags = 0, bool OSPath = false ); 146 idLexer( const char *ptr, int length, const char *name, int flags = 0 ); 147 // destructor 148 ~idLexer(); 149 // load a script from the given file at the given offset with the given length 150 int LoadFile( const char *filename, bool OSPath = false ); 151 // load a script from the given memory with the given length and a specified line offset, 152 // so source strings extracted from a file can still refer to proper line numbers in the file 153 // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0' 154 int LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 ); 155 // free the script 156 void FreeSource(); 157 // returns true if a script is loaded 158 int IsLoaded() { return idLexer::loaded; }; 159 // read a token 160 int ReadToken( idToken *token ); 161 // expect a certain token, reads the token when available 162 int ExpectTokenString( const char *string ); 163 // expect a certain token type 164 int ExpectTokenType( int type, int subtype, idToken *token ); 165 // expect a token 166 int ExpectAnyToken( idToken *token ); 167 // returns true when the token is available 168 int CheckTokenString( const char *string ); 169 // returns true an reads the token when a token with the given type is available 170 int CheckTokenType( int type, int subtype, idToken *token ); 171 // returns true if the next token equals the given string but does not remove the token from the source 172 int PeekTokenString( const char *string ); 173 // returns true if the next token equals the given type but does not remove the token from the source 174 int PeekTokenType( int type, int subtype, idToken *token ); 175 // skip tokens until the given token string is read 176 int SkipUntilString( const char *string ); 177 // skip the rest of the current line 178 int SkipRestOfLine(); 179 // skip the braced section 180 int SkipBracedSection( bool parseFirstBrace = true ); 181 // skips spaces, tabs, C-like comments etc. Returns false if there is no token left to read. 182 bool SkipWhiteSpace( bool currentLine ); 183 // unread the given token 184 void UnreadToken( const idToken *token ); 185 // read a token only if on the same line 186 int ReadTokenOnLine( idToken *token ); 187 188 //Returns the rest of the current line 189 const char* ReadRestOfLine(idStr& out); 190 191 // read a signed integer 192 int ParseInt(); 193 // read a boolean 194 bool ParseBool(); 195 // read a floating point number. If errorFlag is NULL, a non-numeric token will 196 // issue an Error(). If it isn't NULL, it will issue a Warning() and set *errorFlag = true 197 float ParseFloat( bool *errorFlag = NULL ); 198 // parse matrices with floats 199 int Parse1DMatrix( int x, float *m ); 200 int Parse2DMatrix( int y, int x, float *m ); 201 int Parse3DMatrix( int z, int y, int x, float *m ); 202 // parse a braced section into a string 203 const char * ParseBracedSection( idStr &out ); 204 // parse a braced section into a string, maintaining indents and newlines 205 const char * ParseBracedSectionExact ( idStr &out, int tabs = -1 ); 206 // parse the rest of the line 207 const char * ParseRestOfLine( idStr &out ); 208 // pulls the entire line, including the \n at the end 209 const char * ParseCompleteLine( idStr &out ); 210 // retrieves the white space characters before the last read token 211 int GetLastWhiteSpace( idStr &whiteSpace ) const; 212 // returns start index into text buffer of last white space 213 int GetLastWhiteSpaceStart() const; 214 // returns end index into text buffer of last white space 215 int GetLastWhiteSpaceEnd() const; 216 // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example 217 void SetPunctuations( const punctuation_t *p ); 218 // returns a pointer to the punctuation with the given id 219 const char * GetPunctuationFromId( int id ); 220 // get the id for the given punctuation 221 int GetPunctuationId( const char *p ); 222 // set lexer flags 223 void SetFlags( int flags ); 224 // get lexer flags 225 int GetFlags(); 226 // reset the lexer 227 void Reset(); 228 // returns true if at the end of the file 229 bool EndOfFile(); 230 // returns the current filename 231 const char * GetFileName(); 232 // get offset in script 233 const int GetFileOffset(); 234 // get file time 235 const ID_TIME_T GetFileTime(); 236 // returns the current line number 237 const int GetLineNum(); 238 // print an error message 239 void Error( VERIFY_FORMAT_STRING const char *str, ... ); 240 // print a warning message 241 void Warning( VERIFY_FORMAT_STRING const char *str, ... ); 242 // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set 243 bool HadError() const; 244 245 // set the base folder to load files from 246 static void SetBaseFolder( const char *path ); 247 248 private: 249 int loaded; // set when a script file is loaded from file or memory 250 idStr filename; // file name of the script 251 int allocated; // true if buffer memory was allocated 252 const char * buffer; // buffer containing the script 253 const char * script_p; // current pointer in the script 254 const char * end_p; // pointer to the end of the script 255 const char * lastScript_p; // script pointer before reading token 256 const char * whiteSpaceStart_p; // start of last white space 257 const char * whiteSpaceEnd_p; // end of last white space 258 ID_TIME_T fileTime; // file time 259 int length; // length of the script in bytes 260 int line; // current line in script 261 int lastline; // line before reading token 262 int tokenavailable; // set by unreadToken 263 int flags; // several script flags 264 const punctuation_t *punctuations; // the punctuations used in the script 265 int * punctuationtable; // ASCII table with punctuations 266 int * nextpunctuation; // next punctuation in chain 267 idToken token; // available token 268 idLexer * next; // next script in a chain 269 bool hadError; // set by idLexer::Error, even if the error is supressed 270 271 static char baseFolder[ 256 ]; // base folder to load files from 272 273 private: 274 void CreatePunctuationTable( const punctuation_t *punctuations ); 275 int ReadWhiteSpace(); 276 int ReadEscapeCharacter( char *ch ); 277 int ReadString( idToken *token, int quote ); 278 int ReadName( idToken *token ); 279 int ReadNumber( idToken *token ); 280 int ReadPunctuation( idToken *token ); 281 int ReadPrimitive( idToken *token ); 282 int CheckString( const char *str ) const; 283 int NumLinesCrossed(); 284 }; 285 286 ID_INLINE const char *idLexer::GetFileName() { 287 return idLexer::filename; 288 } 289 290 ID_INLINE const int idLexer::GetFileOffset() { 291 return idLexer::script_p - idLexer::buffer; 292 } 293 294 ID_INLINE const ID_TIME_T idLexer::GetFileTime() { 295 return idLexer::fileTime; 296 } 297 298 ID_INLINE const int idLexer::GetLineNum() { 299 return idLexer::line; 300 } 301 302 ID_INLINE void idLexer::SetFlags( int flags ) { 303 idLexer::flags = flags; 304 } 305 306 ID_INLINE int idLexer::GetFlags() { 307 return idLexer::flags; 308 } 309 310 #endif /* !__LEXER_H__ */ 311