libs/script/scripttokeniser.h

   1 /*
   2 Copyright (C) 2001-2006, William Joseph.
   3 All Rights Reserved.
   4
   5 This file is part of GtkRadiant.
   6
   7 GtkRadiant is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GtkRadiant is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GtkRadiant; if not, write to the Free Software
  19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  20 */
  21
  22 #if !defined(INCLUDED_SCRIPT_SCRIPTTOKENISER_H)
  23 #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H
  24
  25 #include "iscriplib.h"
  26
  27 class ScriptTokeniser : public Tokeniser
  28 {
  29   enum CharType
  30   {
  31     eWhitespace,
  32     eCharToken,
  33     eNewline,
  34     eCharQuote,
  35     eCharSolidus,
  36     eCharStar,
  37     eCharSpecial,
  38   };
  39
  40   typedef bool (ScriptTokeniser::*Tokenise)(char c);
  41
  42   Tokenise m_stack[3];
  43   Tokenise* m_state;
  44   SingleCharacterInputStream<TextInputStream> m_istream;
  45   std::size_t m_scriptline;
  46   std::size_t m_scriptcolumn;
  47
  48   char m_token[MAXTOKEN];
  49   char* m_write;
  50
  51   char m_current;
  52   bool m_eof;
  53   bool m_crossline;
  54   bool m_unget;
  55   bool m_emit;
  56
  57   bool m_special;
  58
  59   CharType charType(const char c)
  60   {
  61     switch(c)
  62     {
  63     case '\n': return eNewline;
  64     case '"': return eCharQuote;
  65     case '/': return eCharSolidus;
  66     case '*': return eCharStar;
  67     case '{': case '(': case '}': case ')': case '[': case ']': case ',': case ':': return (m_special) ? eCharSpecial : eCharToken;
  68     }
  69
  70     if(c > 32)
  71     {
  72       return eCharToken;
  73     }
  74     return eWhitespace;
  75   }
  76
  77   Tokenise state()
  78   {
  79     return *m_state;
  80   }
  81   void push(Tokenise state)
  82   {
  83     ASSERT_MESSAGE(m_state != m_stack + 2, "token parser: illegal stack push");
  84     *(++m_state) = state;
  85   }
  86   void pop()
  87   {
  88     ASSERT_MESSAGE(m_state != m_stack, "token parser: illegal stack pop");
  89     --m_state;
  90   }
  91   void add(const char c)
  92   {
  93     if(m_write < m_token + MAXTOKEN - 1)
  94     {
  95       *m_write++ = c;
  96     }
  97   }
  98   void remove()
  99   {
 100     ASSERT_MESSAGE(m_write > m_token, "no char to remove");
 101     --m_write;
 102   }
 103
 104   bool tokeniseDefault(char c)
 105   {
 106     switch(charType(c))
 107     {
 108     case eNewline:
 109       if(!m_crossline)
 110       {
 111         globalErrorStream() << Unsigned(getLine()) << ":" << Unsigned(getColumn()) << ": unexpected end-of-line before token\n";
 112         return false;
 113       }
 114       break;
 115     case eCharToken:
 116     case eCharStar:
 117       push(Tokenise(&ScriptTokeniser::tokeniseToken));
 118       add(c);
 119       break;
 120     case eCharSpecial:
 121       push(Tokenise(&ScriptTokeniser::tokeniseSpecial));
 122       add(c);
 123       break;
 124     case eCharQuote:
 125       push(Tokenise(&ScriptTokeniser::tokeniseQuotedToken));
 126       break;
 127     case eCharSolidus:
 128       push(Tokenise(&ScriptTokeniser::tokeniseSolidus));
 129       break;
 130     default:
 131       break;
 132     }
 133     return true;
 134   }
 135   bool tokeniseToken(char c)
 136   {
 137     switch(charType(c))
 138     {
 139     case eNewline:
 140     case eWhitespace:
 141     case eCharQuote:
 142     case eCharSpecial:
 143       pop();
 144       m_emit = true; // emit token
 145       break;
 146     case eCharSolidus:
 147 #if 0 //SPoG: ignore comments in the middle of tokens.
 148       push(Tokenise(&ScriptTokeniser::tokeniseSolidus));
 149       break;
 150 #endif
 151     case eCharToken:
 152     case eCharStar:
 153       add(c);
 154       break;
 155     default:
 156       break;
 157     }
 158     return true;
 159   }
 160   bool tokeniseQuotedToken(char c)
 161   {
 162     switch(charType(c))
 163     {
 164     case eNewline:
 165       if(m_crossline)
 166       {
 167         globalErrorStream() << Unsigned(getLine()) << ":" << Unsigned(getColumn()) << ": unexpected end-of-line in quoted token\n";
 168         return false;
 169       }
 170       break;
 171     case eWhitespace:
 172     case eCharToken:
 173     case eCharSolidus:
 174     case eCharStar:
 175     case eCharSpecial:
 176       add(c);
 177       break;
 178     case eCharQuote:
 179       pop();
 180       push(Tokenise(&ScriptTokeniser::tokeniseEndQuote));
 181       break;
 182     default:
 183       break;
 184     }
 185     return true;
 186   }
 187   bool tokeniseSolidus(char c)
 188   {
 189     switch(charType(c))
 190     {
 191     case eNewline:
 192     case eWhitespace:
 193     case eCharQuote:
 194     case eCharSpecial:
 195       pop();
 196       add('/');
 197       m_emit = true; // emit single slash
 198       break;
 199     case eCharToken:
 200       pop();
 201       add('/');
 202       add(c);
 203       break;
 204     case eCharSolidus:
 205       pop();
 206       push(Tokenise(&ScriptTokeniser::tokeniseComment));
 207       break; // dont emit single slash
 208     case eCharStar:
 209       pop();
 210       push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
 211       break; // dont emit single slash
 212     default:
 213       break;
 214     }
 215     return true;
 216   }
 217   bool tokeniseComment(char c)
 218   {
 219     if(c == '\n')
 220     {
 221       pop();
 222       if(state() == Tokenise(&ScriptTokeniser::tokeniseToken))
 223       {
 224         pop();
 225         m_emit = true; // emit token immediatly preceding comment
 226       }
 227     }
 228     return true;
 229   }
 230   bool tokeniseBlockComment(char c)
 231   {
 232     if(c == '*')
 233     {
 234       pop();
 235       push(Tokenise(&ScriptTokeniser::tokeniseEndBlockComment));
 236     }
 237     return true;
 238   }
 239   bool tokeniseEndBlockComment(char c)
 240   {
 241     switch(c)
 242     {
 243     case '/':
 244       pop();
 245       if(state() == Tokenise(&ScriptTokeniser::tokeniseToken))
 246       {
 247         pop();
 248         m_emit = true; // emit token immediatly preceding comment
 249       }
 250       break; // dont emit comment
 251     case '*':
 252       break; // no state change
 253     default:
 254       pop();
 255       push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
 256       break;
 257     }
 258     return true;
 259   }
 260   bool tokeniseEndQuote(char c)
 261   {
 262     pop();
 263     m_emit = true; // emit quoted token
 264     return true;
 265   }
 266   bool tokeniseSpecial(char c)
 267   {
 268     pop();
 269     m_emit = true; // emit single-character token
 270     return true;
 271   }
 272
 273   /// Returns true if a token was successfully parsed.
 274   bool tokenise()
 275   {
 276     m_write = m_token;
 277     while(!eof())
 278     {
 279       char c = m_current;
 280
 281       if(!((*this).*state())(c))
 282       {
 283         // parse error
 284         m_eof = true;
 285         return false;
 286       }
 287       if(m_emit)
 288       {
 289         m_emit = false;
 290         return true;
 291       }
 292
 293       if(c == '\n')
 294       {
 295         ++m_scriptline;
 296         m_scriptcolumn = 1;
 297       }
 298       else
 299       {
 300         ++m_scriptcolumn;
 301       }
 302
 303       m_eof = !m_istream.readChar(m_current);
 304     }
 305     return m_write != m_token;
 306   }
 307
 308   const char* fillToken()
 309   {
 310     if(!tokenise())
 311     {
 312       return 0;
 313     }
 314
 315     add('\0');
 316     return m_token;
 317   }
 318
 319   bool eof()
 320   {
 321     return m_eof;
 322   }
 323
 324 public:
 325   ScriptTokeniser(TextInputStream& istream, bool special)
 326     : m_state(m_stack),
 327     m_istream(istream),
 328     m_scriptline(1),
 329     m_scriptcolumn(1),
 330     m_crossline(false),
 331     m_unget(false),
 332     m_emit(false),
 333     m_special(special)
 334   {
 335     m_stack[0] = Tokenise(&ScriptTokeniser::tokeniseDefault);
 336     m_eof = !m_istream.readChar(m_current);
 337     m_token[MAXTOKEN - 1] = '\0';
 338   }
 339   void release()
 340   {
 341     delete this;
 342   }
 343   void nextLine()
 344   {
 345     m_crossline = true;
 346   }
 347   const char* getToken()
 348   {
 349     if(m_unget)
 350     {
 351       m_unget = false;
 352       return m_token;
 353     }
 354
 355     return fillToken();
 356   }
 357   void ungetToken()
 358   {
 359     ASSERT_MESSAGE(!m_unget, "can't unget more than one token");
 360     m_unget = true;
 361   }
 362   std::size_t getLine() const
 363   {
 364     return m_scriptline;
 365   }
 366   std::size_t getColumn() const
 367   {
 368     return m_scriptcolumn;
 369   }
 370 };
 371
 372
 373 inline Tokeniser& NewScriptTokeniser(TextInputStream& istream)
 374 {
 375   return *(new ScriptTokeniser(istream, true));
 376 }
 377
 378 inline Tokeniser& NewSimpleTokeniser(TextInputStream& istream)
 379 {
 380   return *(new ScriptTokeniser(istream, false));
 381 }
 382
 383 #endif