lex.c

   1 /*
   2  * Copyright (C) 2012
   3  *     Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include "gmqcc.h"
  24
  25 /*
  26  * Keywords are multichar, punctuation lexing is a bit more complicated
  27  * than keyword lexing.
  28  */
  29 static const char *const lex_keywords[] = {
  30     "do",    "else",     "if",     "while",
  31     "break", "continue", "return", "goto",
  32     "for",   "typedef"
  33 };
  34
  35 void lex_init(const char *file, lex_file **set) {
  36     lex_file *lex = mem_a(sizeof(lex_file));
  37     if (!lex)
  38         return;
  39
  40     lex->file = fopen(file, "r");
  41     if (!lex->file) {
  42         mem_d(lex);
  43         return;
  44     }
  45
  46     fseek(lex->file, 0, SEEK_END);
  47     lex->length = ftell(lex->file);
  48     lex->size   = lex->length; /* copy, this is never changed */
  49     fseek(lex->file, 0, SEEK_SET);
  50     lex->last = 0;
  51     lex->line = 1;
  52
  53     memset(lex->peek, 0, sizeof(lex->peek));
  54     *set = lex;
  55 }
  56
  57 void lex_close(lex_file *file) {
  58     if (!file) return;
  59
  60     fclose(file->file); /* may already be closed */
  61     mem_d (file);
  62 }
  63
  64 static void lex_addch(int ch, lex_file *file) {
  65     if (file->current <  sizeof(file->lastok)-1)
  66         file->lastok[file->current++] = (char)ch;
  67     if (file->current == sizeof(file->lastok)-1)
  68         file->lastok[file->current]   = (char)'\0';
  69 }
  70 static GMQCC_INLINE void lex_clear(lex_file *file) {
  71     file->current = 0;
  72 }
  73
  74 /*
  75  * read in inget/unget character from a lexer stream.
  76  * This doesn't play with file streams, the lexer has
  77  * it's own internal state for this.
  78  */
  79 static int lex_inget(lex_file *file) {
  80     char  get;
  81     file->length --;
  82
  83     if (file->last > 0) {
  84         if ((get = file->peek[--file->last]) == '\n')
  85             file->line ++;
  86         return get;
  87     }
  88     if ((get = fgetc(file->file)) == '\n')
  89         file->line++;
  90
  91     return get;
  92 }
  93 static void lex_unget(int ch, lex_file *file) {
  94     if (file->last < sizeof(file->peek)) {
  95         if (ch == '\n')
  96             file->line --;
  97         file->peek[file->last++] = ch;
  98     }
  99     file->length ++;
 100 }
 101
 102 /*
 103  * This is trigraph and digraph support, a feature not qc compiler
 104  * supports.  Moving up in this world!
 105  */
 106 static int lex_trigraph(lex_file *file) {
 107     int  ch;
 108     if ((ch = lex_inget(file)) != '?') {
 109         lex_unget(ch, file);
 110         return '?';
 111     }
 112
 113     ch = lex_inget(file);
 114     switch (ch) {
 115         case '(' : return '[' ;
 116         case ')' : return ']' ;
 117         case '/' : return '\\';
 118         case '\'': return '^' ;
 119         case '<' : return '{' ;
 120         case '>' : return '}' ;
 121         case '!' : return '|' ;
 122         case '-' : return '~' ;
 123         case '=' : return '#' ;
 124         default:
 125             lex_unget('?', file);
 126             lex_unget(ch , file);
 127     }
 128     return '?';
 129 }
 130 static int lex_digraph(lex_file *file, int first) {
 131     int ch = lex_inget(file);
 132     switch (first) {
 133         case '<':
 134             if (ch == '%') return '{';
 135             if (ch == ':') return '[';
 136             break;
 137         case '%':
 138             if (ch == '>') return '}';
 139             if (ch == ':') return '#';
 140             break;
 141         case ':':
 142             if (ch == '>') return ']';
 143             break;
 144     }
 145
 146     lex_unget(ch, file);
 147     return first;
 148 }
 149
 150 static int lex_getch(lex_file *file) {
 151     int ch = lex_inget(file);
 152     if (ch == '?')
 153         return lex_trigraph(file);
 154     if (ch == '<' || ch == ':' || ch == '%')
 155         return lex_digraph(file, ch);
 156     return ch;
 157 }
 158
 159 static int lex_get(lex_file *file) {
 160     int ch;
 161     if (!isspace(ch = lex_getch(file)))
 162         return ch;
 163
 164     /* skip over all spaces */
 165     while (isspace(ch) && ch != '\n')
 166         ch = lex_getch(file);
 167
 168     if (ch == '\n')
 169         return ch;
 170     lex_unget(ch, file);
 171     return ' ';
 172 }
 173
 174 static int lex_skipchr(lex_file *file) {
 175     int ch;
 176     int it;
 177
 178     lex_clear(file);
 179     lex_addch('\'', file);
 180
 181     for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
 182         lex_addch(ch, file);
 183
 184         if (ch == '\n')
 185             return ERROR_LEX;
 186         if (ch == '\\')
 187             lex_addch(lex_getch(file), file);
 188     }
 189     lex_addch('\'', file);
 190     lex_addch('\0', file);
 191
 192     if (it > 2)
 193         return ERROR_LEX;
 194
 195     return LEX_CHRLIT;
 196 }
 197
 198 static int lex_skipstr(lex_file *file) {
 199     int ch;
 200     lex_clear(file);
 201     lex_addch('"', file);
 202
 203     while ((ch = lex_getch(file)) != '"') {
 204         if (ch == '\n' || ch == EOF)
 205             return ERROR_LEX;
 206
 207         lex_addch(ch, file);
 208         if (ch == '\\')
 209             lex_addch(lex_inget(file), file);
 210     }
 211
 212     lex_addch('"', file);
 213     lex_addch('\0', file);
 214
 215     return LEX_STRLIT;
 216 }
 217 static int lex_skipcmt(lex_file *file) {
 218     int ch;
 219     lex_clear(file);
 220     ch = lex_getch(file);
 221
 222     if (ch == '/') {
 223         lex_addch('/', file);
 224         lex_addch('/', file);
 225
 226         while ((ch = lex_getch(file)) != '\n') {
 227             if (ch == '\\') {
 228                 lex_addch(ch, file);
 229                 lex_addch(lex_getch(file), file);
 230             } else {
 231                 lex_addch(ch, file);
 232             }
 233         }
 234         lex_addch('\0', file);
 235         return LEX_COMMENT;
 236     }
 237
 238     if (ch != '*') {
 239         lex_unget(ch, file);
 240         return '/';
 241     }
 242
 243     lex_addch('/', file);
 244
 245     /* hate this */
 246     do {
 247         lex_addch(ch, file);
 248         while ((ch = lex_getch(file)) != '*') {
 249             if (ch == EOF)
 250                 return error(file, ERROR_LEX, "malformatted comment");
 251             else
 252                 lex_addch(ch, file);
 253         }
 254         lex_addch(ch, file);
 255     } while ((ch = lex_getch(file)) != '/');
 256
 257     lex_addch('/',  file);
 258     lex_addch('\0', file);
 259
 260     return LEX_COMMENT;
 261 }
 262
 263 static int lex_getsource(lex_file *file) {
 264     int ch = lex_get(file);
 265
 266     /* skip char/string/comment */
 267     switch (ch) {
 268         case '\'': return lex_skipchr(file);
 269         case '"':  return lex_skipstr(file);
 270         case '/':  return lex_skipcmt(file);
 271         default:
 272             return ch;
 273     }
 274 }
 275
 276 int lex_token(lex_file *file) {
 277     int ch = lex_getsource(file);
 278     int it;
 279
 280     /* valid identifier */
 281     if (ch > 0 && (ch == '_' || isalpha(ch))) {
 282         lex_clear(file);
 283
 284         while (ch > 0 && (ch == '_' || isalpha(ch))) {
 285             lex_addch(ch, file);
 286             ch = lex_getsource(file);
 287         }
 288         lex_unget(ch,   file);
 289         lex_addch('\0', file);
 290
 291         /* look inside the table for a keyword .. */
 292         for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
 293             if (!strncmp(file->lastok, lex_keywords[it], strlen(lex_keywords[it])))
 294                 return it;
 295
 296         /* try a type? */
 297         #define TEST_TYPE(X)                                 \
 298             do {                                             \
 299                 if (!strncmp(X, "float",  sizeof("float")))  \
 300                     return TOKEN_FLOAT;                      \
 301                 if (!strncmp(X, "vector", sizeof("vector"))) \
 302                     return TOKEN_VECTOR;                     \
 303                 if (!strncmp(X, "string", sizeof("string"))) \
 304                     return TOKEN_STRING;                     \
 305                 if (!strncmp(X, "entity", sizeof("entity"))) \
 306                     return TOKEN_ENTITY;                     \
 307                 if (!strncmp(X, "void"  , sizeof("void")))   \
 308                     return TOKEN_VOID;                       \
 309             } while(0)
 310
 311         TEST_TYPE(file->lastok);
 312
 313         /* try the hashtable for typedefs? */
 314         if (typedef_find(file->lastok))
 315             TEST_TYPE(typedef_find(file->lastok)->name);
 316
 317         #undef TEST_TYPE
 318         return LEX_IDENT;
 319     }
 320     return (ch != ' ') ? ch : lex_token(file);
 321 }
 322
 323 void lex_reset(lex_file *file) {
 324     file->current = 0;
 325     file->last    = 0;
 326     file->length  = file->size;
 327     fseek(file->file, 0, SEEK_SET);
 328
 329     memset(file->peek,   0, sizeof(file->peek  ));
 330     memset(file->lastok, 0, sizeof(file->lastok));
 331 }
 332
 333 void lex_parse(lex_file *file) {
 334     if (!file) return;
 335     parse_gen(file); /* run parser */
 336 }
 337
 338 /*
 339  * Include a file into the lexer / parsing process:  This really
 340  * should check if names are the same to prevent endless include
 341  * recrusion.
 342  */
 343 lex_file *lex_include(lex_file *lex, const char *file) {
 344     lex_file *set = NULL;
 345
 346     util_strrq(file);
 347     if (strncmp(lex->name, file, strlen(lex->name)) == 0) {
 348         error(lex, ERROR_LEX, "Source file cannot include itself\n");
 349         exit (-1);
 350     }
 351     lex_init(file, &set);
 352
 353     return set;
 354 }