return lex;
}
+lex_file* lex_open_string(const char *str, size_t len, const char *name)
+{
+ lex_file *lex;
+
+ lex = (lex_file*)mem_a(sizeof(*lex));
+ if (!lex) {
+ lexerror(NULL, "out of memory\n");
+ return NULL;
+ }
+
+ memset(lex, 0, sizeof(*lex));
+
+ lex->file = NULL;
+ lex->open_string = str;
+ lex->open_string_length = len;
+ lex->open_string_pos = 0;
+
+ lex->name = util_strdup(name ? name : "<string-source>");
+ lex->line = 1; /* we start counting at 1 */
+
+ lex->peekpos = 0;
+ lex->eof = false;
+
+ lex_filenames_add(lex->name);
+
+ return lex;
+}
+
void lex_cleanup(void)
{
size_t i;
mem_d(lex);
}
+static int lex_fgetc(lex_file *lex)
+{
+ if (lex->file)
+ return fgetc(lex->file);
+ if (lex->open_string) {
+ if (lex->open_string_pos >= lex->open_string_length)
+ return EOF;
+ return lex->open_string[lex->open_string_pos++];
+ }
+ return EOF;
+}
+
/* Get or put-back data
* The following to functions do NOT understand what kind of data they
* are working on.
static int lex_try_trigraph(lex_file *lex, int old)
{
int c2, c3;
- c2 = fgetc(lex->file);
+ c2 = lex_fgetc(lex);
if (c2 != '?') {
lex_ungetch(lex, c2);
return old;
}
- c3 = fgetc(lex->file);
+ c3 = lex_fgetc(lex);
switch (c3) {
case '=': return '#';
case '/': return '\\';
}
}
+static int lex_try_digraph(lex_file *lex, int ch)
+{
+ int c2;
+ c2 = lex_fgetc(lex);
+ if (ch == '<' && c2 == ':')
+ return '[';
+ else if (ch == ':' && c2 == '>')
+ return ']';
+ else if (ch == '<' && c2 == '%')
+ return '{';
+ else if (ch == '%' && c2 == '>')
+ return '}';
+ else if (ch == '%' && c2 == ':')
+ return '#';
+ lex_ungetch(lex, c2);
+ return ch;
+}
+
static int lex_getch(lex_file *lex)
{
int ch;
return lex->peek[lex->peekpos];
}
- ch = fgetc(lex->file);
+ ch = lex_fgetc(lex);
if (ch == '\n')
lex->line++;
else if (ch == '?')
return lex_try_trigraph(lex, ch);
+ else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
+ return lex_try_digraph(lex, ch);
return ch;
}
return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
}
+/* Append a character to the token buffer */
+static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
+{
+ if (!token_value_add(&lex->tok, ch)) {
+ lexerror(lex, "out of memory");
+ return false;
+ }
+ return true;
+}
+
+/* Append a trailing null-byte */
+static bool GMQCC_WARN lex_endtoken(lex_file *lex)
+{
+ if (!token_value_add(&lex->tok, 0)) {
+ lexerror(lex, "out of memory");
+ return false;
+ }
+ lex->tok.value_count--;
+ return true;
+}
+
/* Skip whitespace and comments and return the first
* non-white character.
* As this makes use of the above getch() ungetch() functions,
static int lex_skipwhite(lex_file *lex)
{
int ch = 0;
+ bool haswhite = false;
do
{
ch = lex_getch(lex);
- while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
+ while (ch != EOF && isspace(ch)) {
+ if (lex->flags.preprocessing) {
+ if (ch == '\n') {
+ /* end-of-line */
+ /* see if there was whitespace first */
+ if (haswhite) { /* (lex->tok.value_count) { */
+ lex_ungetch(lex, ch);
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ return TOKEN_WHITE;
+ }
+ /* otherwise return EOL */
+ return TOKEN_EOL;
+ }
+ haswhite = true;
+ if (!lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ }
+ ch = lex_getch(lex);
+ }
if (ch == '/') {
ch = lex_getch(lex);
/* one line comment */
ch = lex_getch(lex);
- /* check for special: '/', '/', '*', '/' */
- if (ch == '*') {
- ch = lex_getch(lex);
- if (ch == '/') {
- ch = ' ';
- continue;
+ if (lex->flags.preprocessing) {
+ haswhite = true;
+ if (!lex_tokench(lex, '/') ||
+ !lex_tokench(lex, '/'))
+ {
+ return TOKEN_FATAL;
}
}
while (ch != EOF && ch != '\n') {
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
ch = lex_getch(lex);
}
+ if (lex->flags.preprocessing) {
+ lex_ungetch(lex, '\n');
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ return TOKEN_WHITE;
+ }
continue;
}
if (ch == '*')
{
/* multiline comment */
+ if (lex->flags.preprocessing) {
+ haswhite = true;
+ if (!lex_tokench(lex, '/') ||
+ !lex_tokench(lex, '*'))
+ {
+ return TOKEN_FATAL;
+ }
+ }
+
while (ch != EOF)
{
ch = lex_getch(lex);
if (ch == '*') {
ch = lex_getch(lex);
if (ch == '/') {
- ch = lex_getch(lex);
+ if (lex->flags.preprocessing) {
+ if (!lex_tokench(lex, '*') ||
+ !lex_tokench(lex, '/'))
+ {
+ return TOKEN_FATAL;
+ }
+ }
break;
}
}
+ if (lex->flags.preprocessing) {
+ if (!lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ }
}
- if (ch == '/') /* allow *//* direct following comment */
- {
- lex_ungetch(lex, ch);
- ch = ' '; /* cause TRUE in the isspace check */
- }
+ ch = ' '; /* cause TRUE in the isspace check */
continue;
}
/* Otherwise roll back to the slash and break out of the loop */
}
} while (ch != EOF && isspace(ch));
- return ch;
-}
-
-/* Append a character to the token buffer */
-static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
-{
- if (!token_value_add(&lex->tok, ch)) {
- lexerror(lex, "out of memory");
- return false;
- }
- return true;
-}
-
-/* Append a trailing null-byte */
-static bool GMQCC_WARN lex_endtoken(lex_file *lex)
-{
- if (!token_value_add(&lex->tok, 0)) {
- lexerror(lex, "out of memory");
- return false;
+ if (haswhite) {
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ lex_ungetch(lex, ch);
+ return TOKEN_WHITE;
}
- lex->tok.value_count--;
- return true;
+ return ch;
}
/* Get a token */
if (ch == quote)
return TOKEN_STRINGCONST;
- if (ch == '\\') {
+ if (!lex->flags.preprocessing && ch == '\\') {
ch = lex_getch(lex);
if (ch == EOF) {
lexerror(lex, "unexpected end of file");
lex->tok.ctx.line = lex->sline;
lex->tok.ctx.file = lex->name;
+ if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
+ return (lex->tok.ttype = ch);
+ }
+
if (lex->eof)
return (lex->tok.ttype = TOKEN_FATAL);
!strcmp(v, "local") ||
!strcmp(v, "return") ||
!strcmp(v, "const"))
+ {
lex->tok.ttype = TOKEN_KEYWORD;
+ }
+ else if (opts_standard != COMPILER_QCC)
+ {
+ /* other standards reserve these keywords */
+ if (!strcmp(v, "switch") ||
+ !strcmp(v, "struct") ||
+ !strcmp(v, "union") ||
+ !strcmp(v, "break") ||
+ !strcmp(v, "continue"))
+ {
+ lex->tok.ttype = TOKEN_KEYWORD;
+ }
+ }
return lex->tok.ttype;
}
if (ch == '"')
{
+ lex->flags.nodigraphs = true;
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
lex->tok.ttype = lex_finish_string(lex, '"');
- while (lex->tok.ttype == TOKEN_STRINGCONST)
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
{
/* Allow c style "string" "continuation" */
ch = lex_skipwhite(lex);
lex->tok.ttype = lex_finish_string(lex, '"');
}
+ lex->flags.nodigraphs = false;
if (!lex_endtoken(lex))
return (lex->tok.ttype = TOKEN_FATAL);
return lex->tok.ttype;
* Likewise actual unescaping has to be done by the parser.
* The difference is we don't allow 'char' 'continuation'.
*/
- lex->tok.ttype = lex_finish_string(lex, '\'');
- if (!lex_endtoken(lex))
- return (lex->tok.ttype = TOKEN_FATAL);
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ lex->tok.ttype = lex_finish_string(lex, '\'');
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ if (!lex_endtoken(lex))
+ return (lex->tok.ttype = TOKEN_FATAL);
/* It's a vector if we can successfully scan 3 floats */
#ifdef WIN32
- if (sscanf_s(lex->tok.value, " %f %f %f ",
- &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+ if (sscanf_s(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
#else
- if (sscanf(lex->tok.value, " %f %f %f ",
- &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+ if (sscanf(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
#endif
- {
- lex->tok.ttype = TOKEN_VECTORCONST;
- }
- return lex->tok.ttype;
+ {
+ lex->tok.ttype = TOKEN_VECTORCONST;
+ }
+
+ return lex->tok.ttype;
}
if (isdigit(ch))