Lexer.h - DOOM-3-BFG - DOOM 3 BFG Edition

Lexer.h (12074B)
      1 /*
      2 ===========================================================================
      3 
      4 Doom 3 BFG Edition GPL Source Code
      5 Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. 
      6 
      7 This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").  
      8 
      9 Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
     10 it under the terms of the GNU General Public License as published by
     11 the Free Software Foundation, either version 3 of the License, or
     12 (at your option) any later version.
     13 
     14 Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
     15 but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 GNU General Public License for more details.
     18 
     19 You should have received a copy of the GNU General Public License
     20 along with Doom 3 BFG Edition Source Code.  If not, see <http://www.gnu.org/licenses/>.
     21 
     22 In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code.  If not, please request a copy in writing from id Software at the address below.
     23 
     24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
     25 
     26 ===========================================================================
     27 */
     28 
     29 #ifndef __LEXER_H__
     30 #define __LEXER_H__
     31 
     32 /*
     33 ===============================================================================
     34 
     35 	Lexicographical parser
     36 
     37 	Does not use memory allocation during parsing. The lexer uses no
     38 	memory allocation if a source is loaded with LoadMemory().
     39 	However, idToken may still allocate memory for large strings.
     40 	
     41 	A number directly following the escape character '\' in a string is
     42 	assumed to be in decimal format instead of octal. Binary numbers of
     43 	the form 0b.. or 0B.. can also be used.
     44 
     45 ===============================================================================
     46 */
     47 
     48 // lexer flags
     49 typedef enum {
     50 	LEXFL_NOERRORS						= BIT(0),	// don't print any errors
     51 	LEXFL_NOWARNINGS					= BIT(1),	// don't print any warnings
     52 	LEXFL_NOFATALERRORS					= BIT(2),	// errors aren't fatal
     53 	LEXFL_NOSTRINGCONCAT				= BIT(3),	// multiple strings seperated by whitespaces are not concatenated
     54 	LEXFL_NOSTRINGESCAPECHARS			= BIT(4),	// no escape characters inside strings
     55 	LEXFL_NODOLLARPRECOMPILE			= BIT(5),	// don't use the $ sign for precompilation
     56 	LEXFL_NOBASEINCLUDES				= BIT(6),	// don't include files embraced with < >
     57 	LEXFL_ALLOWPATHNAMES				= BIT(7),	// allow path seperators in names
     58 	LEXFL_ALLOWNUMBERNAMES				= BIT(8),	// allow names to start with a number
     59 	LEXFL_ALLOWIPADDRESSES				= BIT(9),	// allow ip addresses to be parsed as numbers
     60 	LEXFL_ALLOWFLOATEXCEPTIONS			= BIT(10),	// allow float exceptions like 1.#INF or 1.#IND to be parsed
     61 	LEXFL_ALLOWMULTICHARLITERALS		= BIT(11),	// allow multi character literals
     62 	LEXFL_ALLOWBACKSLASHSTRINGCONCAT	= BIT(12),	// allow multiple strings seperated by '\' to be concatenated
     63 	LEXFL_ONLYSTRINGS					= BIT(13)	// parse as whitespace deliminated strings (quoted strings keep quotes)
     64 } lexerFlags_t;
     65 
     66 // punctuation ids
     67 #define P_RSHIFT_ASSIGN				1
     68 #define P_LSHIFT_ASSIGN				2
     69 #define P_PARMS						3
     70 #define P_PRECOMPMERGE				4
     71 
     72 #define P_LOGIC_AND					5
     73 #define P_LOGIC_OR					6
     74 #define P_LOGIC_GEQ					7
     75 #define P_LOGIC_LEQ					8
     76 #define P_LOGIC_EQ					9
     77 #define P_LOGIC_UNEQ				10
     78 
     79 #define P_MUL_ASSIGN				11
     80 #define P_DIV_ASSIGN				12
     81 #define P_MOD_ASSIGN				13
     82 #define P_ADD_ASSIGN				14
     83 #define P_SUB_ASSIGN				15
     84 #define P_INC						16
     85 #define P_DEC						17
     86 
     87 #define P_BIN_AND_ASSIGN			18
     88 #define P_BIN_OR_ASSIGN				19
     89 #define P_BIN_XOR_ASSIGN			20
     90 #define P_RSHIFT					21
     91 #define P_LSHIFT					22
     92 
     93 #define P_POINTERREF				23
     94 #define P_CPP1						24
     95 #define P_CPP2						25
     96 #define P_MUL						26
     97 #define P_DIV						27
     98 #define P_MOD						28
     99 #define P_ADD						29
    100 #define P_SUB						30
    101 #define P_ASSIGN					31
    102 
    103 #define P_BIN_AND					32
    104 #define P_BIN_OR					33
    105 #define P_BIN_XOR					34
    106 #define P_BIN_NOT					35
    107 
    108 #define P_LOGIC_NOT					36
    109 #define P_LOGIC_GREATER				37
    110 #define P_LOGIC_LESS				38
    111 
    112 #define P_REF						39
    113 #define P_COMMA						40
    114 #define P_SEMICOLON					41
    115 #define P_COLON						42
    116 #define P_QUESTIONMARK				43
    117 
    118 #define P_PARENTHESESOPEN			44
    119 #define P_PARENTHESESCLOSE			45
    120 #define P_BRACEOPEN					46
    121 #define P_BRACECLOSE				47
    122 #define P_SQBRACKETOPEN				48
    123 #define P_SQBRACKETCLOSE			49
    124 #define P_BACKSLASH					50
    125 
    126 #define P_PRECOMP					51
    127 #define P_DOLLAR					52
    128 
    129 // punctuation
    130 typedef struct punctuation_s
    131 {
    132 	char *p;						// punctuation character(s)
    133 	int n;							// punctuation id
    134 } punctuation_t;
    135 
    136 
    137 class idLexer {
    138 
    139 	friend class idParser;
    140 
    141 public:
    142 					// constructor
    143 					idLexer();
    144 					idLexer( int flags );
    145 					idLexer( const char *filename, int flags = 0, bool OSPath = false );
    146 					idLexer( const char *ptr, int length, const char *name, int flags = 0 );
    147 					// destructor
    148 					~idLexer();
    149 					// load a script from the given file at the given offset with the given length
    150 	int				LoadFile( const char *filename, bool OSPath = false );
    151 					// load a script from the given memory with the given length and a specified line offset,
    152 					// so source strings extracted from a file can still refer to proper line numbers in the file
    153 					// NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0'
    154 	int				LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 );
    155 					// free the script
    156 	void			FreeSource();
    157 					// returns true if a script is loaded
    158 	int				IsLoaded() { return idLexer::loaded; };
    159 					// read a token
    160 	int				ReadToken( idToken *token );
    161 					// expect a certain token, reads the token when available
    162 	int				ExpectTokenString( const char *string );
    163 					// expect a certain token type
    164 	int				ExpectTokenType( int type, int subtype, idToken *token );
    165 					// expect a token
    166 	int				ExpectAnyToken( idToken *token );
    167 					// returns true when the token is available
    168 	int				CheckTokenString( const char *string );
    169 					// returns true an reads the token when a token with the given type is available
    170 	int				CheckTokenType( int type, int subtype, idToken *token );
    171 					// returns true if the next token equals the given string but does not remove the token from the source
    172 	int				PeekTokenString( const char *string );
    173 					// returns true if the next token equals the given type but does not remove the token from the source
    174 	int				PeekTokenType( int type, int subtype, idToken *token );
    175 					// skip tokens until the given token string is read
    176 	int				SkipUntilString( const char *string );
    177 					// skip the rest of the current line
    178 	int				SkipRestOfLine();
    179 					// skip the braced section
    180 	int				SkipBracedSection( bool parseFirstBrace = true );
    181 	// skips spaces, tabs, C-like comments etc. Returns false if there is no token left to read.
    182 	bool			SkipWhiteSpace( bool currentLine );
    183 					// unread the given token
    184 	void			UnreadToken( const idToken *token );
    185 					// read a token only if on the same line
    186 	int				ReadTokenOnLine( idToken *token );
    187 		
    188 					//Returns the rest of the current line
    189 	const char*		ReadRestOfLine(idStr& out);
    190 
    191 					// read a signed integer
    192 	int				ParseInt();
    193 					// read a boolean
    194 	bool			ParseBool();
    195 					// read a floating point number.  If errorFlag is NULL, a non-numeric token will
    196 					// issue an Error().  If it isn't NULL, it will issue a Warning() and set *errorFlag = true
    197 	float			ParseFloat( bool *errorFlag = NULL );
    198 					// parse matrices with floats
    199 	int				Parse1DMatrix( int x, float *m );
    200 	int				Parse2DMatrix( int y, int x, float *m );
    201 	int				Parse3DMatrix( int z, int y, int x, float *m );
    202 					// parse a braced section into a string
    203 	const char *	ParseBracedSection( idStr &out );
    204 					// parse a braced section into a string, maintaining indents and newlines
    205 	const char *	ParseBracedSectionExact ( idStr &out, int tabs = -1 );
    206 					// parse the rest of the line
    207 	const char *	ParseRestOfLine( idStr &out );
    208 					// pulls the entire line, including the \n at the end
    209 	const char *	ParseCompleteLine( idStr &out );
    210 					// retrieves the white space characters before the last read token
    211 	int				GetLastWhiteSpace( idStr &whiteSpace ) const;
    212 					// returns start index into text buffer of last white space
    213 	int				GetLastWhiteSpaceStart() const;
    214 					// returns end index into text buffer of last white space
    215 	int				GetLastWhiteSpaceEnd() const;
    216 					// set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example
    217 	void			SetPunctuations( const punctuation_t *p );
    218 					// returns a pointer to the punctuation with the given id
    219 	const char *	GetPunctuationFromId( int id );
    220 					// get the id for the given punctuation
    221 	int				GetPunctuationId( const char *p );
    222 					// set lexer flags
    223 	void			SetFlags( int flags );
    224 					// get lexer flags
    225 	int				GetFlags();
    226 					// reset the lexer
    227 	void			Reset();
    228 					// returns true if at the end of the file
    229 	bool			EndOfFile();
    230 					// returns the current filename
    231 	const char *	GetFileName();
    232 					// get offset in script
    233 	const int		GetFileOffset();
    234 					// get file time
    235 	const ID_TIME_T	GetFileTime();
    236 					// returns the current line number
    237 	const int		GetLineNum();
    238 					// print an error message
    239 	void			Error( VERIFY_FORMAT_STRING const char *str, ... );
    240 					// print a warning message
    241 	void			Warning( VERIFY_FORMAT_STRING const char *str, ... );
    242 					// returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set
    243 	bool			HadError() const;
    244 
    245 					// set the base folder to load files from
    246 	static void		SetBaseFolder( const char *path );
    247 
    248 private:
    249 	int				loaded;					// set when a script file is loaded from file or memory
    250 	idStr			filename;				// file name of the script
    251 	int				allocated;				// true if buffer memory was allocated
    252 	const char *	buffer;					// buffer containing the script
    253 	const char *	script_p;				// current pointer in the script
    254 	const char *	end_p;					// pointer to the end of the script
    255 	const char *	lastScript_p;			// script pointer before reading token
    256 	const char *	whiteSpaceStart_p;		// start of last white space
    257 	const char *	whiteSpaceEnd_p;		// end of last white space
    258 	ID_TIME_T			fileTime;				// file time
    259 	int				length;					// length of the script in bytes
    260 	int				line;					// current line in script
    261 	int				lastline;				// line before reading token
    262 	int				tokenavailable;			// set by unreadToken
    263 	int				flags;					// several script flags
    264 	const punctuation_t *punctuations;		// the punctuations used in the script
    265 	int *			punctuationtable;		// ASCII table with punctuations
    266 	int *			nextpunctuation;		// next punctuation in chain
    267 	idToken			token;					// available token
    268 	idLexer *		next;					// next script in a chain
    269 	bool			hadError;				// set by idLexer::Error, even if the error is supressed
    270 
    271 	static char		baseFolder[ 256 ];		// base folder to load files from
    272 
    273 private:
    274 	void			CreatePunctuationTable( const punctuation_t *punctuations );
    275 	int				ReadWhiteSpace();
    276 	int				ReadEscapeCharacter( char *ch );
    277 	int				ReadString( idToken *token, int quote );
    278 	int				ReadName( idToken *token );
    279 	int				ReadNumber( idToken *token );
    280 	int				ReadPunctuation( idToken *token );
    281 	int				ReadPrimitive( idToken *token );
    282 	int				CheckString( const char *str ) const;
    283 	int				NumLinesCrossed();
    284 };
    285 
    286 ID_INLINE const char *idLexer::GetFileName() {
    287 	return idLexer::filename;
    288 }
    289 
    290 ID_INLINE const int idLexer::GetFileOffset() {
    291 	return idLexer::script_p - idLexer::buffer;
    292 }
    293 
    294 ID_INLINE const ID_TIME_T idLexer::GetFileTime() {
    295 	return idLexer::fileTime;
    296 }
    297 
    298 ID_INLINE const int idLexer::GetLineNum() {
    299 	return idLexer::line;
    300 }
    301 
    302 ID_INLINE void idLexer::SetFlags( int flags ) {
    303 	idLexer::flags = flags;
    304 }
    305 
    306 ID_INLINE int idLexer::GetFlags() {
    307 	return idLexer::flags;
    308 }
    309 
    310 #endif /* !__LEXER_H__ */
    311