lex.c (21594B)
1 #include "c.h" 2 #include <float.h> 3 #include <errno.h> 4 5 6 #define MAXTOKEN 32 7 8 enum { BLANK=01, NEWLINE=02, LETTER=04, 9 DIGIT=010, HEX=020, OTHER=040 }; 10 11 static unsigned char map[256] = { /* 000 nul */ 0, 12 /* 001 soh */ 0, 13 /* 002 stx */ 0, 14 /* 003 etx */ 0, 15 /* 004 eot */ 0, 16 /* 005 enq */ 0, 17 /* 006 ack */ 0, 18 /* 007 bel */ 0, 19 /* 010 bs */ 0, 20 /* 011 ht */ BLANK, 21 /* 012 nl */ NEWLINE, 22 /* 013 vt */ BLANK, 23 /* 014 ff */ BLANK, 24 /* 015 cr */ 0, 25 /* 016 so */ 0, 26 /* 017 si */ 0, 27 /* 020 dle */ 0, 28 /* 021 dc1 */ 0, 29 /* 022 dc2 */ 0, 30 /* 023 dc3 */ 0, 31 /* 024 dc4 */ 0, 32 /* 025 nak */ 0, 33 /* 026 syn */ 0, 34 /* 027 etb */ 0, 35 /* 030 can */ 0, 36 /* 031 em */ 0, 37 /* 032 sub */ 0, 38 /* 033 esc */ 0, 39 /* 034 fs */ 0, 40 /* 035 gs */ 0, 41 /* 036 rs */ 0, 42 /* 037 us */ 0, 43 /* 040 sp */ BLANK, 44 /* 041 ! */ OTHER, 45 /* 042 " */ OTHER, 46 /* 043 # */ OTHER, 47 /* 044 $ */ 0, 48 /* 045 % */ OTHER, 49 /* 046 & */ OTHER, 50 /* 047 ' */ OTHER, 51 /* 050 ( */ OTHER, 52 /* 051 ) */ OTHER, 53 /* 052 * */ OTHER, 54 /* 053 + */ OTHER, 55 /* 054 , */ OTHER, 56 /* 055 - */ OTHER, 57 /* 056 . */ OTHER, 58 /* 057 / */ OTHER, 59 /* 060 0 */ DIGIT, 60 /* 061 1 */ DIGIT, 61 /* 062 2 */ DIGIT, 62 /* 063 3 */ DIGIT, 63 /* 064 4 */ DIGIT, 64 /* 065 5 */ DIGIT, 65 /* 066 6 */ DIGIT, 66 /* 067 7 */ DIGIT, 67 /* 070 8 */ DIGIT, 68 /* 071 9 */ DIGIT, 69 /* 072 : */ OTHER, 70 /* 073 ; */ OTHER, 71 /* 074 < */ OTHER, 72 /* 075 = */ OTHER, 73 /* 076 > */ OTHER, 74 /* 077 ? */ OTHER, 75 /* 100 @ */ 0, 76 /* 101 A */ LETTER|HEX, 77 /* 102 B */ LETTER|HEX, 78 /* 103 C */ LETTER|HEX, 79 /* 104 D */ LETTER|HEX, 80 /* 105 E */ LETTER|HEX, 81 /* 106 F */ LETTER|HEX, 82 /* 107 G */ LETTER, 83 /* 110 H */ LETTER, 84 /* 111 I */ LETTER, 85 /* 112 J */ LETTER, 86 /* 113 K */ LETTER, 87 /* 114 L */ LETTER, 88 /* 115 M */ LETTER, 89 /* 116 N */ LETTER, 90 /* 117 O */ LETTER, 91 /* 120 P */ LETTER, 92 /* 121 Q */ LETTER, 93 /* 122 R */ LETTER, 94 /* 123 S */ LETTER, 95 /* 124 T */ LETTER, 96 /* 125 U */ LETTER, 97 /* 126 V */ LETTER, 98 /* 127 W */ LETTER, 99 /* 130 X */ LETTER, 100 /* 131 Y */ LETTER, 101 /* 132 Z */ LETTER, 102 /* 133 [ */ OTHER, 103 /* 134 \ */ OTHER, 104 /* 135 ] */ OTHER, 105 /* 136 ^ */ OTHER, 106 /* 137 _ */ LETTER, 107 /* 140 ` */ 0, 108 /* 141 a */ LETTER|HEX, 109 /* 142 b */ LETTER|HEX, 110 /* 143 c */ LETTER|HEX, 111 /* 144 d */ LETTER|HEX, 112 /* 145 e */ LETTER|HEX, 113 /* 146 f */ LETTER|HEX, 114 /* 147 g */ LETTER, 115 /* 150 h */ LETTER, 116 /* 151 i */ LETTER, 117 /* 152 j */ LETTER, 118 /* 153 k */ LETTER, 119 /* 154 l */ LETTER, 120 /* 155 m */ LETTER, 121 /* 156 n */ LETTER, 122 /* 157 o */ LETTER, 123 /* 160 p */ LETTER, 124 /* 161 q */ LETTER, 125 /* 162 r */ LETTER, 126 /* 163 s */ LETTER, 127 /* 164 t */ LETTER, 128 /* 165 u */ LETTER, 129 /* 166 v */ LETTER, 130 /* 167 w */ LETTER, 131 /* 170 x */ LETTER, 132 /* 171 y */ LETTER, 133 /* 172 z */ LETTER, 134 /* 173 { */ OTHER, 135 /* 174 | */ OTHER, 136 /* 175 } */ OTHER, 137 /* 176 ~ */ OTHER, }; 138 static struct symbol tval; 139 static char cbuf[BUFSIZE+1]; 140 static unsigned int wcbuf[BUFSIZE+1]; 141 142 Coordinate src; /* current source coordinate */ 143 int t; 144 char *token; /* current token */ 145 Symbol tsym; /* symbol table entry for current token */ 146 147 static void *cput(int c, void *cl); 148 static void *wcput(int c, void *cl); 149 static void *scon(int q, void *put(int c, void *cl), void *cl); 150 static int backslash(int q); 151 static Symbol fcon(void); 152 static Symbol icon(unsigned long, int, int); 153 static void ppnumber(char *); 154 155 int gettok(void) { 156 for (;;) { 157 register unsigned char *rcp = cp; 158 while (map[*rcp]&BLANK) 159 rcp++; 160 if (limit - rcp < MAXTOKEN) { 161 cp = rcp; 162 fillbuf(); 163 rcp = cp; 164 } 165 src.file = file; 166 src.x = (char *)rcp - line; 167 src.y = lineno; 168 cp = rcp + 1; 169 switch (*rcp++) { 170 case '/': if (*rcp == '*') { 171 int c = 0; 172 for (rcp++; *rcp != '/' || c != '*'; ) 173 if (map[*rcp]&NEWLINE) { 174 if (rcp < limit) 175 c = *rcp; 176 cp = rcp + 1; 177 nextline(); 178 rcp = cp; 179 if (rcp == limit) 180 break; 181 } else 182 c = *rcp++; 183 if (rcp < limit) 184 rcp++; 185 else 186 error("unclosed comment\n"); 187 cp = rcp; 188 continue; 189 } 190 return '/'; 191 case '<': 192 if (*rcp == '=') return cp++, LEQ; 193 if (*rcp == '<') return cp++, LSHIFT; 194 return '<'; 195 case '>': 196 if (*rcp == '=') return cp++, GEQ; 197 if (*rcp == '>') return cp++, RSHIFT; 198 return '>'; 199 case '-': 200 if (*rcp == '>') return cp++, DEREF; 201 if (*rcp == '-') return cp++, DECR; 202 return '-'; 203 case '=': return *rcp == '=' ? cp++, EQL : '='; 204 case '!': return *rcp == '=' ? cp++, NEQ : '!'; 205 case '|': return *rcp == '|' ? cp++, OROR : '|'; 206 case '&': return *rcp == '&' ? cp++, ANDAND : '&'; 207 case '+': return *rcp == '+' ? cp++, INCR : '+'; 208 case ';': case ',': case ':': 209 case '*': case '~': case '%': case '^': case '?': 210 case '[': case ']': case '{': case '}': case '(': case ')': 211 return rcp[-1]; 212 case '\n': case '\v': case '\r': case '\f': 213 nextline(); 214 if (cp == limit) { 215 tsym = NULL; 216 return EOI; 217 } 218 continue; 219 220 case 'i': 221 if (rcp[0] == 'f' 222 && !(map[rcp[1]]&(DIGIT|LETTER))) { 223 cp = rcp + 1; 224 return IF; 225 } 226 if (rcp[0] == 'n' 227 && rcp[1] == 't' 228 && !(map[rcp[2]]&(DIGIT|LETTER))) { 229 cp = rcp + 2; 230 tsym = inttype->u.sym; 231 return INT; 232 } 233 goto id; 234 case 'h': case 'j': case 'k': case 'm': case 'n': case 'o': 235 case 'p': case 'q': case 'x': case 'y': case 'z': 236 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 237 case 'G': case 'H': case 'I': case 'J': case 'K': 238 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 239 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 240 case 'Y': case 'Z': 241 id: 242 if (limit - rcp < MAXLINE) { 243 cp = rcp - 1; 244 fillbuf(); 245 rcp = ++cp; 246 } 247 assert(cp == rcp); 248 token = (char *)rcp - 1; 249 while (map[*rcp]&(DIGIT|LETTER)) 250 rcp++; 251 token = stringn(token, (char *)rcp - token); 252 tsym = lookup(token, identifiers); 253 cp = rcp; 254 return ID; 255 case '0': case '1': case '2': case '3': case '4': 256 case '5': case '6': case '7': case '8': case '9': { 257 unsigned long n = 0; 258 if (limit - rcp < MAXLINE) { 259 cp = rcp - 1; 260 fillbuf(); 261 rcp = ++cp; 262 } 263 assert(cp == rcp); 264 token = (char *)rcp - 1; 265 if (*token == '0' && (*rcp == 'x' || *rcp == 'X')) { 266 int d, overflow = 0; 267 while (*++rcp) { 268 if (map[*rcp]&DIGIT) 269 d = *rcp - '0'; 270 else if (*rcp >= 'a' && *rcp <= 'f') 271 d = *rcp - 'a' + 10; 272 else if (*rcp >= 'A' && *rcp <= 'F') 273 d = *rcp - 'A' + 10; 274 else 275 break; 276 if (n&~(~0UL >> 4)) 277 overflow = 1; 278 else 279 n = (n<<4) + d; 280 } 281 if ((char *)rcp - token <= 2) 282 error("invalid hexadecimal constant `%S'\n", token, (char *)rcp-token); 283 cp = rcp; 284 tsym = icon(n, overflow, 16); 285 } else if (*token == '0') { 286 int err = 0, overflow = 0; 287 for ( ; map[*rcp]&DIGIT; rcp++) { 288 if (*rcp == '8' || *rcp == '9') 289 err = 1; 290 if (n&~(~0UL >> 3)) 291 overflow = 1; 292 else 293 n = (n<<3) + (*rcp - '0'); 294 } 295 if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') { 296 cp = rcp; 297 tsym = fcon(); 298 return FCON; 299 } 300 cp = rcp; 301 tsym = icon(n, overflow, 8); 302 if (err) 303 error("invalid octal constant `%S'\n", token, (char*)cp-token); 304 } else { 305 int overflow = 0; 306 for (n = *token - '0'; map[*rcp]&DIGIT; ) { 307 int d = *rcp++ - '0'; 308 if (n > (ULONG_MAX - d)/10) 309 overflow = 1; 310 else 311 n = 10*n + d; 312 } 313 if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') { 314 cp = rcp; 315 tsym = fcon(); 316 return FCON; 317 } 318 cp = rcp; 319 tsym = icon(n, overflow, 10); 320 } 321 return ICON; 322 } 323 case '.': 324 if (rcp[0] == '.' && rcp[1] == '.') { 325 cp += 2; 326 return ELLIPSIS; 327 } 328 if ((map[*rcp]&DIGIT) == 0) 329 return '.'; 330 if (limit - rcp < MAXLINE) { 331 cp = rcp - 1; 332 fillbuf(); 333 rcp = ++cp; 334 } 335 assert(cp == rcp); 336 cp = rcp - 1; 337 token = (char *)cp; 338 tsym = fcon(); 339 return FCON; 340 case 'L': 341 if (*rcp == '\'') { 342 unsigned int *s = scon(*cp, wcput, wcbuf); 343 if (s - wcbuf > 2) 344 warning("excess characters in wide-character literal ignored\n"); 345 tval.type = widechar; 346 tval.u.c.v.u = wcbuf[0]; 347 tsym = &tval; 348 return ICON; 349 } else if (*rcp == '"') { 350 unsigned int *s = scon(*cp, wcput, wcbuf); 351 tval.type = array(widechar, s - wcbuf, 0); 352 tval.u.c.v.p = wcbuf; 353 tsym = &tval; 354 return SCON; 355 } else 356 goto id; 357 case '\'': { 358 char *s = scon(*--cp, cput, cbuf); 359 if (s - cbuf > 2) 360 warning("excess characters in multibyte character literal ignored\n"); 361 tval.type = inttype; 362 if (chartype->op == INT) 363 tval.u.c.v.i = extend(cbuf[0], chartype); 364 else 365 tval.u.c.v.i = cbuf[0]&0xFF; 366 tsym = &tval; 367 return ICON; 368 } 369 case '"': { 370 char *s = scon(*--cp, cput, cbuf); 371 tval.type = array(chartype, s - cbuf, 0); 372 tval.u.c.v.p = cbuf; 373 tsym = &tval; 374 return SCON; 375 } 376 case 'a': 377 if (rcp[0] == 'u' 378 && rcp[1] == 't' 379 && rcp[2] == 'o' 380 && !(map[rcp[3]]&(DIGIT|LETTER))) { 381 cp = rcp + 3; 382 return AUTO; 383 } 384 goto id; 385 case 'b': 386 if (rcp[0] == 'r' 387 && rcp[1] == 'e' 388 && rcp[2] == 'a' 389 && rcp[3] == 'k' 390 && !(map[rcp[4]]&(DIGIT|LETTER))) { 391 cp = rcp + 4; 392 return BREAK; 393 } 394 goto id; 395 case 'c': 396 if (rcp[0] == 'a' 397 && rcp[1] == 's' 398 && rcp[2] == 'e' 399 && !(map[rcp[3]]&(DIGIT|LETTER))) { 400 cp = rcp + 3; 401 return CASE; 402 } 403 if (rcp[0] == 'h' 404 && rcp[1] == 'a' 405 && rcp[2] == 'r' 406 && !(map[rcp[3]]&(DIGIT|LETTER))) { 407 cp = rcp + 3; 408 tsym = chartype->u.sym; 409 return CHAR; 410 } 411 if (rcp[0] == 'o' 412 && rcp[1] == 'n' 413 && rcp[2] == 's' 414 && rcp[3] == 't' 415 && !(map[rcp[4]]&(DIGIT|LETTER))) { 416 cp = rcp + 4; 417 return CONST; 418 } 419 if (rcp[0] == 'o' 420 && rcp[1] == 'n' 421 && rcp[2] == 't' 422 && rcp[3] == 'i' 423 && rcp[4] == 'n' 424 && rcp[5] == 'u' 425 && rcp[6] == 'e' 426 && !(map[rcp[7]]&(DIGIT|LETTER))) { 427 cp = rcp + 7; 428 return CONTINUE; 429 } 430 goto id; 431 case 'd': 432 if (rcp[0] == 'e' 433 && rcp[1] == 'f' 434 && rcp[2] == 'a' 435 && rcp[3] == 'u' 436 && rcp[4] == 'l' 437 && rcp[5] == 't' 438 && !(map[rcp[6]]&(DIGIT|LETTER))) { 439 cp = rcp + 6; 440 return DEFAULT; 441 } 442 if (rcp[0] == 'o' 443 && rcp[1] == 'u' 444 && rcp[2] == 'b' 445 && rcp[3] == 'l' 446 && rcp[4] == 'e' 447 && !(map[rcp[5]]&(DIGIT|LETTER))) { 448 cp = rcp + 5; 449 tsym = doubletype->u.sym; 450 return DOUBLE; 451 } 452 if (rcp[0] == 'o' 453 && !(map[rcp[1]]&(DIGIT|LETTER))) { 454 cp = rcp + 1; 455 return DO; 456 } 457 goto id; 458 case 'e': 459 if (rcp[0] == 'l' 460 && rcp[1] == 's' 461 && rcp[2] == 'e' 462 && !(map[rcp[3]]&(DIGIT|LETTER))) { 463 cp = rcp + 3; 464 return ELSE; 465 } 466 if (rcp[0] == 'n' 467 && rcp[1] == 'u' 468 && rcp[2] == 'm' 469 && !(map[rcp[3]]&(DIGIT|LETTER))) { 470 cp = rcp + 3; 471 return ENUM; 472 } 473 if (rcp[0] == 'x' 474 && rcp[1] == 't' 475 && rcp[2] == 'e' 476 && rcp[3] == 'r' 477 && rcp[4] == 'n' 478 && !(map[rcp[5]]&(DIGIT|LETTER))) { 479 cp = rcp + 5; 480 return EXTERN; 481 } 482 goto id; 483 case 'f': 484 if (rcp[0] == 'l' 485 && rcp[1] == 'o' 486 && rcp[2] == 'a' 487 && rcp[3] == 't' 488 && !(map[rcp[4]]&(DIGIT|LETTER))) { 489 cp = rcp + 4; 490 tsym = floattype->u.sym; 491 return FLOAT; 492 } 493 if (rcp[0] == 'o' 494 && rcp[1] == 'r' 495 && !(map[rcp[2]]&(DIGIT|LETTER))) { 496 cp = rcp + 2; 497 return FOR; 498 } 499 goto id; 500 case 'g': 501 if (rcp[0] == 'o' 502 && rcp[1] == 't' 503 && rcp[2] == 'o' 504 && !(map[rcp[3]]&(DIGIT|LETTER))) { 505 cp = rcp + 3; 506 return GOTO; 507 } 508 goto id; 509 case 'l': 510 if (rcp[0] == 'o' 511 && rcp[1] == 'n' 512 && rcp[2] == 'g' 513 && !(map[rcp[3]]&(DIGIT|LETTER))) { 514 cp = rcp + 3; 515 return LONG; 516 } 517 goto id; 518 case 'r': 519 if (rcp[0] == 'e' 520 && rcp[1] == 'g' 521 && rcp[2] == 'i' 522 && rcp[3] == 's' 523 && rcp[4] == 't' 524 && rcp[5] == 'e' 525 && rcp[6] == 'r' 526 && !(map[rcp[7]]&(DIGIT|LETTER))) { 527 cp = rcp + 7; 528 return REGISTER; 529 } 530 if (rcp[0] == 'e' 531 && rcp[1] == 't' 532 && rcp[2] == 'u' 533 && rcp[3] == 'r' 534 && rcp[4] == 'n' 535 && !(map[rcp[5]]&(DIGIT|LETTER))) { 536 cp = rcp + 5; 537 return RETURN; 538 } 539 goto id; 540 case 's': 541 if (rcp[0] == 'h' 542 && rcp[1] == 'o' 543 && rcp[2] == 'r' 544 && rcp[3] == 't' 545 && !(map[rcp[4]]&(DIGIT|LETTER))) { 546 cp = rcp + 4; 547 return SHORT; 548 } 549 if (rcp[0] == 'i' 550 && rcp[1] == 'g' 551 && rcp[2] == 'n' 552 && rcp[3] == 'e' 553 && rcp[4] == 'd' 554 && !(map[rcp[5]]&(DIGIT|LETTER))) { 555 cp = rcp + 5; 556 return SIGNED; 557 } 558 if (rcp[0] == 'i' 559 && rcp[1] == 'z' 560 && rcp[2] == 'e' 561 && rcp[3] == 'o' 562 && rcp[4] == 'f' 563 && !(map[rcp[5]]&(DIGIT|LETTER))) { 564 cp = rcp + 5; 565 return SIZEOF; 566 } 567 if (rcp[0] == 't' 568 && rcp[1] == 'a' 569 && rcp[2] == 't' 570 && rcp[3] == 'i' 571 && rcp[4] == 'c' 572 && !(map[rcp[5]]&(DIGIT|LETTER))) { 573 cp = rcp + 5; 574 return STATIC; 575 } 576 if (rcp[0] == 't' 577 && rcp[1] == 'r' 578 && rcp[2] == 'u' 579 && rcp[3] == 'c' 580 && rcp[4] == 't' 581 && !(map[rcp[5]]&(DIGIT|LETTER))) { 582 cp = rcp + 5; 583 return STRUCT; 584 } 585 if (rcp[0] == 'w' 586 && rcp[1] == 'i' 587 && rcp[2] == 't' 588 && rcp[3] == 'c' 589 && rcp[4] == 'h' 590 && !(map[rcp[5]]&(DIGIT|LETTER))) { 591 cp = rcp + 5; 592 return SWITCH; 593 } 594 goto id; 595 case 't': 596 if (rcp[0] == 'y' 597 && rcp[1] == 'p' 598 && rcp[2] == 'e' 599 && rcp[3] == 'd' 600 && rcp[4] == 'e' 601 && rcp[5] == 'f' 602 && !(map[rcp[6]]&(DIGIT|LETTER))) { 603 cp = rcp + 6; 604 return TYPEDEF; 605 } 606 goto id; 607 case 'u': 608 if (rcp[0] == 'n' 609 && rcp[1] == 'i' 610 && rcp[2] == 'o' 611 && rcp[3] == 'n' 612 && !(map[rcp[4]]&(DIGIT|LETTER))) { 613 cp = rcp + 4; 614 return UNION; 615 } 616 if (rcp[0] == 'n' 617 && rcp[1] == 's' 618 && rcp[2] == 'i' 619 && rcp[3] == 'g' 620 && rcp[4] == 'n' 621 && rcp[5] == 'e' 622 && rcp[6] == 'd' 623 && !(map[rcp[7]]&(DIGIT|LETTER))) { 624 cp = rcp + 7; 625 return UNSIGNED; 626 } 627 goto id; 628 case 'v': 629 if (rcp[0] == 'o' 630 && rcp[1] == 'i' 631 && rcp[2] == 'd' 632 && !(map[rcp[3]]&(DIGIT|LETTER))) { 633 cp = rcp + 3; 634 tsym = voidtype->u.sym; 635 return VOID; 636 } 637 if (rcp[0] == 'o' 638 && rcp[1] == 'l' 639 && rcp[2] == 'a' 640 && rcp[3] == 't' 641 && rcp[4] == 'i' 642 && rcp[5] == 'l' 643 && rcp[6] == 'e' 644 && !(map[rcp[7]]&(DIGIT|LETTER))) { 645 cp = rcp + 7; 646 return VOLATILE; 647 } 648 goto id; 649 case 'w': 650 if (rcp[0] == 'h' 651 && rcp[1] == 'i' 652 && rcp[2] == 'l' 653 && rcp[3] == 'e' 654 && !(map[rcp[4]]&(DIGIT|LETTER))) { 655 cp = rcp + 4; 656 return WHILE; 657 } 658 goto id; 659 case '_': 660 if (rcp[0] == '_' 661 && rcp[1] == 't' 662 && rcp[2] == 'y' 663 && rcp[3] == 'p' 664 && rcp[4] == 'e' 665 && rcp[5] == 'c' 666 && rcp[6] == 'o' 667 && rcp[7] == 'd' 668 && rcp[8] == 'e' 669 && !(map[rcp[9]]&(DIGIT|LETTER))) { 670 cp = rcp + 9; 671 return TYPECODE; 672 } 673 if (rcp[0] == '_' 674 && rcp[1] == 'f' 675 && rcp[2] == 'i' 676 && rcp[3] == 'r' 677 && rcp[4] == 's' 678 && rcp[5] == 't' 679 && rcp[6] == 'a' 680 && rcp[7] == 'r' 681 && rcp[8] == 'g' 682 && !(map[rcp[9]]&(DIGIT|LETTER))) { 683 cp = rcp + 9; 684 return FIRSTARG; 685 } 686 goto id; 687 default: 688 if ((map[cp[-1]]&BLANK) == 0) 689 if (cp[-1] < ' ' || cp[-1] >= 0177) 690 error("illegal character `\\0%o'\n", cp[-1]); 691 else 692 error("illegal character `%c'\n", cp[-1]); 693 } 694 } 695 } 696 static Symbol icon(unsigned long n, int overflow, int base) { 697 if ((*cp=='u'||*cp=='U') && (cp[1]=='l'||cp[1]=='L') 698 || (*cp=='l'||*cp=='L') && (cp[1]=='u'||cp[1]=='U')) { 699 tval.type = unsignedlong; 700 cp += 2; 701 } else if (*cp == 'u' || *cp == 'U') { 702 if (overflow || n > unsignedtype->u.sym->u.limits.max.i) 703 tval.type = unsignedlong; 704 else 705 tval.type = unsignedtype; 706 cp += 1; 707 } else if (*cp == 'l' || *cp == 'L') { 708 if (overflow || n > longtype->u.sym->u.limits.max.i) 709 tval.type = unsignedlong; 710 else 711 tval.type = longtype; 712 cp += 1; 713 } else if (overflow || n > longtype->u.sym->u.limits.max.i) 714 tval.type = unsignedlong; 715 else if (n > inttype->u.sym->u.limits.max.i) 716 tval.type = longtype; 717 else if (base != 10 && n > inttype->u.sym->u.limits.max.i) 718 tval.type = unsignedtype; 719 else 720 tval.type = inttype; 721 switch (tval.type->op) { 722 case INT: 723 if (overflow || n > tval.type->u.sym->u.limits.max.i) { 724 warning("overflow in constant `%S'\n", token, 725 (char*)cp - token); 726 tval.u.c.v.i = tval.type->u.sym->u.limits.max.i; 727 } else 728 tval.u.c.v.i = n; 729 break; 730 case UNSIGNED: 731 if (overflow || n > tval.type->u.sym->u.limits.max.u) { 732 warning("overflow in constant `%S'\n", token, 733 (char*)cp - token); 734 tval.u.c.v.u = tval.type->u.sym->u.limits.max.u; 735 } else 736 tval.u.c.v.u = n; 737 break; 738 default: assert(0); 739 } 740 ppnumber("integer"); 741 return &tval; 742 } 743 static void ppnumber(char *which) { 744 unsigned char *rcp = cp--; 745 746 for ( ; (map[*cp]&(DIGIT|LETTER)) || *cp == '.'; cp++) 747 if ((cp[0] == 'E' || cp[0] == 'e') 748 && (cp[1] == '-' || cp[1] == '+')) 749 cp++; 750 if (cp > rcp) 751 error("`%S' is a preprocessing number but an invalid %s constant\n", token, 752 753 (char*)cp-token, which); 754 } 755 static Symbol fcon(void) { 756 if (*cp == '.') 757 do 758 cp++; 759 while (map[*cp]&DIGIT); 760 if (*cp == 'e' || *cp == 'E') { 761 if (*++cp == '-' || *cp == '+') 762 cp++; 763 if (map[*cp]&DIGIT) 764 do 765 cp++; 766 while (map[*cp]&DIGIT); 767 else 768 error("invalid floating constant `%S'\n", token, 769 (char*)cp - token); 770 } 771 772 errno = 0; 773 tval.u.c.v.d = strtod(token, NULL); 774 if (errno == ERANGE) 775 warning("overflow in floating constant `%S'\n", token, 776 (char*)cp - token); 777 if (*cp == 'f' || *cp == 'F') { 778 ++cp; 779 if (tval.u.c.v.d > floattype->u.sym->u.limits.max.d) 780 warning("overflow in floating constant `%S'\n", token, 781 (char*)cp - token); 782 tval.type = floattype; 783 } else if (*cp == 'l' || *cp == 'L') { 784 cp++; 785 tval.type = longdouble; 786 } else { 787 if (tval.u.c.v.d > doubletype->u.sym->u.limits.max.d) 788 warning("overflow in floating constant `%S'\n", token, 789 (char*)cp - token); 790 tval.type = doubletype; 791 } 792 ppnumber("floating"); 793 return &tval; 794 } 795 796 static void *cput(int c, void *cl) { 797 char *s = cl; 798 799 if (c < 0 || c > 255) 800 warning("overflow in escape sequence with resulting value `%d'\n", c); 801 *s++ = c; 802 return s; 803 } 804 805 static void *wcput(int c, void *cl) { 806 unsigned int *s = cl; 807 808 *s++ = c; 809 return s; 810 } 811 812 static void *scon(int q, void *put(int c, void *cl), void *cl) { 813 int n = 0, nbad = 0; 814 815 do { 816 cp++; 817 while (*cp != q) { 818 int c; 819 if (map[*cp]&NEWLINE) { 820 if (cp < limit) 821 break; 822 cp++; 823 nextline(); 824 if (cp == limit) 825 break; 826 continue; 827 } 828 c = *cp++; 829 if (c == '\\') { 830 if (map[*cp]&NEWLINE) { 831 if (cp < limit) 832 break; 833 cp++; 834 nextline(); 835 } 836 if (limit - cp < MAXTOKEN) 837 fillbuf(); 838 c = backslash(q); 839 } else if (c < 0 || c > 255 || map[c] == 0) 840 nbad++; 841 if (n++ < BUFSIZE) 842 cl = put(c, cl); 843 } 844 if (*cp == q) 845 cp++; 846 else 847 error("missing %c\n", q); 848 } while (q == '"' && getchr() == '"'); 849 cl = put(0, cl); 850 if (n >= BUFSIZE) 851 error("%s literal too long\n", q == '"' ? "string" : "character"); 852 if (Aflag >= 2 && q == '"' && n > 509) 853 warning("more than 509 characters in a string literal\n"); 854 if (Aflag >= 2 && nbad > 0) 855 warning("%s literal contains non-portable characters\n", 856 q == '"' ? "string" : "character"); 857 return cl; 858 } 859 int getchr(void) { 860 for (;;) { 861 while (map[*cp]&BLANK) 862 cp++; 863 if (!(map[*cp]&NEWLINE)) 864 return *cp; 865 cp++; 866 nextline(); 867 if (cp == limit) 868 return EOI; 869 } 870 } 871 static int backslash(int q) { 872 unsigned int c; 873 874 switch (*cp++) { 875 case 'a': return 7; 876 case 'b': return '\b'; 877 case 'f': return '\f'; 878 case 'n': return '\n'; 879 case 'r': return '\r'; 880 case 't': return '\t'; 881 case 'v': return '\v'; 882 case '\'': case '"': case '\\': case '\?': break; 883 case 'x': { 884 int overflow = 0; 885 if ((map[*cp]&(DIGIT|HEX)) == 0) { 886 if (*cp < ' ' || *cp == 0177) 887 error("ill-formed hexadecimal escape sequence\n"); 888 else 889 error("ill-formed hexadecimal escape sequence `\\x%c'\n", *cp); 890 if (*cp != q) 891 cp++; 892 return 0; 893 } 894 for (c = 0; map[*cp]&(DIGIT|HEX); cp++) { 895 if (c >> (8*widechar->size - 4)) 896 overflow = 1; 897 if (map[*cp]&DIGIT) 898 c = (c<<4) + *cp - '0'; 899 else 900 c = (c<<4) + (*cp&~040) - 'A' + 10; 901 } 902 if (overflow) 903 warning("overflow in hexadecimal escape sequence\n"); 904 return c&ones(8*widechar->size); 905 } 906 case '0': case '1': case '2': case '3': 907 case '4': case '5': case '6': case '7': 908 c = *(cp-1) - '0'; 909 if (*cp >= '0' && *cp <= '7') { 910 c = (c<<3) + *cp++ - '0'; 911 if (*cp >= '0' && *cp <= '7') 912 c = (c<<3) + *cp++ - '0'; 913 } 914 return c; 915 default: 916 if (cp[-1] < ' ' || cp[-1] >= 0177) 917 warning("unrecognized character escape sequence\n"); 918 else 919 warning("unrecognized character escape sequence `\\%c'\n", cp[-1]); 920 } 921 return cp[-1]; 922 }