Update specification.tex

[xonotic/gmqcc.git] / doc / specification.tex
diff --git a/doc/specification.tex b/doc/specification.tex

index 10584f612e688ec8f9a5c78c1473096c71dd5f84..2687946f1da77fe0e1572fe2d042e95f86f0b3f7 100644 (file)
--- a/doc/specification.tex
+++ b/doc/specification.tex
@@ -46,9 +46,6 @@
    otherkeywords={::=,|},
    morecomment=[s][\color{comment}]{(*}{*)},
    stringstyle=\color{string},
-  morestring=[b]",
-  morestring=[b]',
-  morestring=[b]/,
    showstringspaces=false,
    frame=none,
    rulecolor=\color{rule},
@@ -74,7 +71,7 @@ document). It specifies:
         \item the representation of QuakeC programs;
         \item the syntax and constraints of the QuakeC language;
         \item the semantic rules for interpreting QuakeC programs;
-       \item the representation of input data to be processes by QuakeC programs;
+       \item the representation of input data to be processed by QuakeC programs;
         \item the representation of output data produced by QuakeC programs;
         \item the restrictions and limits imposed by a conforming implementation of QuakeC.
  \end{itemize}
@@ -135,7 +132,7 @@ Region of data storage in the execution environment, the contents of which can r
  values.
  
  \subsection*{parameter}
-Object declare as part of a function declaration or definition that acquires a value on
+Object declared as part of a function declaration or definition that acquires a value on
  entry to the function, or an identifier from the comma-separated list bounded by the
  parentheses immediately following the macro name in a function-like macro definition.
  
@@ -146,6 +143,11 @@ document, but that may be impractical for some implementations.
  \subsection*{value}
  Precise meaning of the contents of an object when interpreted as having a specific type.
  
+\subsection*{implementation}
+Particular set of software, running in a particular translation environment under
+particular control options, that performs translation of programs for, and supports
+execution of functions in, a particular execution environment.
+
  \subsection*{implementation-defined value}
  Unspecified value where each implementation documents how the choice is made.
  
@@ -174,7 +176,7 @@ rules for conforming implementations.
  \subsubsection{Translation environment}
  \paragraph*{Translation steps}
  The precedence among the syntax rules of translation is specified by the following steps
-\begin{itemize}
+\begin{enumerate}
         \item Physical source file characters are mapped, in an implementation-defined manner,
               to the source character set (introducing new-line characters for end-of-line
               indicators) if necessary.  Trigraph and digraph sequences are replaced by their
@@ -187,7 +189,7 @@ The precedence among the syntax rules of translation is specified by the followi
               by one space character is implementation-defined.
         \item Preprocessing directives are executed, macro invocations are expanded 
               recursively. A \#include preprocessing directive causes the named header or
-             source file to be processes from step one through step three, recursively. All
+             source file to be processed from step one through step three, recursively. All
               preprocessing directives are then deleted.
         \item Each source character set member and escape sequence in character constants and
               string literals is converted to the corresponding member of the execution
@@ -197,9 +199,9 @@ The precedence among the syntax rules of translation is specified by the followi
         \item White-space characters seperating tokens are no longer significant. Each
               preprocessing token is converted into a token. The resulting tokens are then
               syntactically and semantically analyzed and translated.                         
-\end{itemize}
+\end{enumerate}
  \subparagraph*{Footnotes}
-Implementations shall behave as if these separate steps occur, even though many are likely
+Implementations shall behave as if these steps occur separately, even though many are likely
  to be folded together in practice. Source files need not be stored as file, nor need there
  be any one-to-one correspondence between these items and any external representation. The
  description is conceptual only, and does not specify any particular implementation.
@@ -211,9 +213,33 @@ constraint, even if the behavior is also explicitly specified as undefined or
  implementation-defined. Diagnostic messages need not be produced in other circumstances.
  
  %% ->-> Execution environments %%
-\subsubsection{Execution environments}
-Two execution environments are defined
+\subsubsection{Execution environment}
+A conforming execution environment shall provide at minimal the following 15 definitions
+for built in functions, with an accompanying header or source file that defines them.
+\begin{enumerate}
+       \item entity ()                                spawn
+       \item void   (entity)                          remove
+       
+       \item string (float)                           ftos
+       \item string (vector)                          vtos
+       \item string (entity)                          etos
+       \item float  (string)                          stof
+       
+       \item void   (string, ...)                     dprint
+       \item void   (entity)                          eprint
+       
+       \item float  (float)                           rint
+       \item float  (float)                           floor
+       \item float  (float)                           ceil
+       \item float  (float)                           fabs
+       \item float  (float)                           sin
+       \item float  (float)                           cos
+       \item float  (float)                           sqrt
+\end{enumerate}
+The numbers of which these built-ins are assigned is implementation-defined;
+an implementation is allowed to use these built-ins however it sees fit.
  
+\pagebreak
  %% -> Language %%
  \section{Language}
  \subsection{Notation}
@@ -227,6 +253,7 @@ symbols; more sequences are separated by a vertical bar \textbar, indicating a c
  the whole being a possible substitution for the symbol on the left.  Symbols that never
  appear on the left side are terminals.
  \linebreak
+
  This document defines language syntax throughout it's way at defining language
  constructs If you're interested in a summary of the language syntax, one is given in 
  annex A.
@@ -236,8 +263,8 @@ annex A.
  %% ->-> Scopes of identifiers %%
  \subsubsection{Scopes of identifiers}
  An identifier can denote an object; a function, or enumeration; a label name; a macro
-name; or a macro parameter. The same identifier can denote difference items at different
-point in the program. A member of an enumeration is called an enumeration constant.
+name; or a macro parameter. The same identifier can denote different items at different
+points in the program. A member of an enumeration is called an enumeration constant.
  Macro names and macro parameters are not considered further here, because prior to the
  semantic phase of program translation any occurrences of macro names in the source file
  are replaced by the preprocessing token sequences that constitute their macro definitions.
@@ -253,11 +280,12 @@ parameters.)
  
  A label name is the only kind of identifier that has function scope.  It can be used (in 
  a goto statement) anywhere in the function in which it appears, and is declared
-implicitly by its syntactic appearance (prefixed by a : and a statement).
+implicitly by its syntactic appearance (prefixed by a colon :, and suffixed with a 
+statement).
  \linebreak
  
  Every other identifier has scope determined by the placement of its declaration (in a 
-declarator or type specifier).  If the declarator or types specifier that declares the 
+declarator or type specifier).  If the declarator or type specifier that declares the 
  identifier appears outside any block or list of parameters, the identifier has file 
  scope,  which terminates at the end of the file.  If the declartor or type specifier that
  declares the identifier appears inside a block or within the list of parameter
@@ -278,7 +306,7 @@ refer to some item (as opposed to the syntactic construct), it refers to the ite
  relevant name space whose declaration is visible at the point the identifier occurs.
  \linebreak
  
-Two identifiers have the same scope it and only if their scopes terminate at the same 
+Two identifiers have the same scope if and only if their scopes terminate at the same 
  point.
  \linebreak
  
@@ -293,7 +321,7 @@ source file, the syntactic context disambiguates uses that refer to different it
  Thus, there  are separate name spaces for various categories of identifiers, as follows:
  \linebreak
  \begin{itemize}
-       \item Label names (disambiguated by the syntax of the lbvel declaration and use);
+       \item Label names (disambiguated by the syntax of the label declaration and use);
         \item Enumerations (disambiguated by following the keyword enum);
         \item All other identifiers, called ordinary identifiers (declared in ordinary
               declarators or as enumeration constants).
@@ -335,12 +363,12 @@ types, as follows:
  
  \begin{itemize}
         \item An array type describes a contiguously allocated nonempty set of objects with a 
-             particular object types, called the element type. Array types are characterized 
+             particular object type, called the element type. Array types are characterized 
               by their element type and by the number of elements in the array.  An array type
               is said to be derived     from its element type, and if its element is type T, the 
               array type is sometimes called "array of T".  The construction of an array type
                   from an element type is called "array type derivation".
-       \item A function type described a function with a specified return type. A function
+       \item A function type describes a function with a specified return type. A function
                   type is characterized by its return type and the number and types of its
                   parameters. A function type is said to be derived from its return type, and if 
                   its return type is T, the function type is sometimes called "function returning
@@ -348,14 +376,12 @@ types, as follows:
                    type derivation".
  \end{itemize}
  
-\
-
  Arithmetic types are collectively called scalar types. Arrays and vectors are 
  collectively called aggregate types.
  \linebreak
  
  An array of unknown size is an incomplete type.  It is completed, for an identifier of 
-that byte, by specifying the size in a later declaration.  Arrays are required to have 
+that type, by specifying the size in a later declaration.  Arrays are required to have 
  known constant size.
  \linebreak
  
@@ -365,7 +391,7 @@ itself if the type consists of no derived types.
  \linebreak
  
  Any type so far mentioned is an unqualified type.  Each unqualified type has several 
-qualified version of its type, corresponding to the combinations of one, two, or all 
+qualified versions of its type, corresponding to the combinations of one, two, or all 
  two of const and volatile qualifiers.  The qualified or unqualified versions of a type 
  are distinct types that belong to the same type category and have the same representation.
  A derived type is not qualified by the qualifiers (if any) of the type from which it 
@@ -392,19 +418,21 @@ is compatible with both of the two types and satisfies the following conditions:
               parameter in the composite parameter type list is the composite type of the 
               corresponding parameters.
  \end{itemize}
-These rules apply recursively to types from which the twp types are derived.
+These rules apply recursively to types from which the two types are derived.
  \linebreak
  
  %% ->Conversions %%
  \subsection{Conversions}
  Several operators convert operand values from one type to another automatically. This 
-sub-clause specified the result required from such an implicit conversion.
+sub-clause specifies the result required from such an implicit conversion.
  \linebreak
  
  Conversion from an operand value to a compatible type causes no change to the value or
  the representation.
  \linebreak
  
+TODO: Specify all implicit conversions.
+
  %% ->->Aritmetic operands %%
  \subsubsection{Arithmetic operands}
  \paragraph*{Boolean type}
@@ -418,7 +446,7 @@ An lvalue is an expression with an object type or an incomplete type other than
  if an lvalue does not designate an object when it is evaluated, the behavior is undefined.
  When an object is said to have a particular type, the type is specified by the lvalue 
  used to designate the object.  A modifiable lvalue is an lvalue that does not have an 
-array type, does not have an incomplete type, does not have a const-qualified type.
+array type, does not have an incomplete type, and does not have a const-qualified type.
  \linebreak
  
  Except when it is the operand of the unary \& operator, the ++ operator, the -- operator,
@@ -430,48 +458,65 @@ the lvalue has an incomplete type and does not have array type, the behavior is
  \linebreak
  
  A function designator is an expression that has function type.
-\linebreak
  
  \paragraph*{void}
  The (nonexistent) value of a void expression (an expression that has type void) shall not 
  be used in any way, and implicit conversions (except to void) shall not be applied to 
-such an expression.  If an expression of any other type is evaluated as a void expression
-, its value or designator is discarded. (A void expression is only evaluated for its 
+such an expression.  If an expression of any other type is evaluated as a void expression,
+its value or designator is discarded. (A void expression is only evaluated for its 
  side effects.)
  \pagebreak
  
  \subsection{Lexical elements}
  \paragraph*{Syntax}
  \begin{lstlisting}[language=bnf]
-token ::= keyword    | identifier
-        | constant   | string-literal
+token ::= keyword    
+        | identifier
+        | constant   
+        | string-literal
          | punctuator
-       
-preprocessing-token := header-name | identifier
-                     | pp-number   | string-literal
-                     | punctuator
+preprocessing-token ::= header-name 
+                      | identifier
+                      | pp-number   
+                      | string-literal
+                      | punctuator
  \end{lstlisting}
  \paragraph*{Constraints}
  Each preprocessing token that is converted to a token shall have the lexical form of a 
  keyword, an identifier, a constant, a string literal, or a punctuator.
  
  \paragraph*{Semantics}
-TODO
+A token is the minimal lexical element of the language in translation steps six and seven.
+The categories of tokens are: keywords, identifiers, constants, string literals, and
+punctuators. A preprocessing token is the minimal lexical element of the language in
+translation steps three through five. The categories of preprocessing tokens are: header
+names, identifiers, preprocessing numbers, string literals, punctuators and other single
+non-white-space characters that do not lexically match the other preprocessing token
+categories. If a ' or a " character matches the last category, the behavior is undefined.
+Preprocessing tokens can be separated by white space; this consists of comments (described
+later), or white-space characters (space, horizontal tab, new-line, vertical tab, and form
+-feed), or both. In certain circumstances during translation step four, white space (or
+the absence thereof) serves as more than preprocessing token separation. White space may 
+appear within a preprocessing token only as part of a header name or between the quotation
+characters in a string literal.
+\linebreak
+
+If the input stream has been parsed into preprocessing tokens up to a given character, the
+next preprocessing token is the longest sequence of characters that could constitute a
+preprocessing token. There is one exception to this rule: header name preprocessing tokens
+are recognized only within \#include preprocessing directives and in implementation-defined
+locations within \#pragma directives. In such contexts, a sequence of characters that
+could be either a header name or string literal is recognized as the former.
  
  %% ->-> Keywords %%
  \subsubsection{Keywords}
  \paragraph*{Syntax}
  \begin{lstlisting}[language=bnf]
-keyword ::= enum     | break
-          | return   | void
-          | case     | float
-          | volatile | for
-          | while    | const
-          | goto     | bool
-          | continue | if
-          | static   | default
-          | inline   | do
-          | switch   | else
+keyword ::= enum     | break  | return   | void
+          | case     | float  | volatile | for
+          | while    | const  | goto     | bool
+          | continue | if     | static   | default
+          | inline   | do     | switch   | else
            | vector   | entity
  \end{lstlisting}
  \paragraph*{Semantics}
@@ -480,21 +525,34 @@ use as keywords, and shall not be used otherwise.
  
  %% ->->Identifiers %%
  \subsubsection{Identifiers}
-\paragraph*{Syntax}
  \begin{lstlisting}[language=bnf]
  identifier ::= nondigit
               | identifier nondigit
               | identifier digit
               
-nondigit  ::= _ | a | b | c | d | e | f | g | h | i 
-            | j | k | l | m | n | o | p | q | r | s
-            | t | u | v | w | x | y | z | A | B | C
-            | D | E | F | G | H | I | J | K | L | M
-            | N | P | Q | R | S | T | U | V | W | X
-            | Y | Z
-
-digit     ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
+nondigit ::= _ | a | b | c | d | e | f | g | h | i 
+           | j | k | l | m | n | o | p | q | r | s
+           | t | u | v | w | x | y | z | A | B | C
+           | D | E | F | G | H | I | J | K | L | M
+           | N | P | Q | R | S | T | U | V | W | X
+           | Y | Z
+
+digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
  \end{lstlisting}
+\paragraph*{Semantics}
+An identifier is a sequence of nondigit characters (including the underscore \_, the lower
+case and upper case Latin letters, and other characters) and digits, which designates one
+or more items. Lowercase and uppercase letters are distinct. There is a specific limit of
+65535 characters for an identifier.
+\linebreak
+
+When preprocessing tokens are converted to tokens during translation step six, if a 
+preprocessing token could not be converted to either a keyword or an identifier, it is
+converted to a keyword.
+
+\paragraph*{Predefined identifiers}
+Any identifiers that begin with the prefix \_\_builtin, or are within the reserved name
+space are reserved by the implementation.
  
  %% ->->Constants %%
  \subsubsection{Constants}
@@ -586,13 +644,13 @@ TODO
  \subsubsection{Comments}
  Except within a character constant, a string literal, or a comment, the characters /* 
  introduce a comment.  The contents of such a comment are examined only to identify 
-characters and to find the characters /* that terminate it.
+characters and to find the characters */ that terminate it.
  \linebreak
  
  Except within a character constant, a string literal, or a comment, the characters // 
  introduce a comment that includes all characters up to, but not including, the next 
  new-line character.  The contents of such a comment are examined only to identify 
-characters and to find the terminating new-line characters.
+characters and to find the terminating new-line character.
  \linebreak
  
  %% -> Expressions %%
@@ -627,14 +685,14 @@ its type), the behavior is undefined.
  \subsubsection{Primary expressions}
  \paragraph*{Syntax}
  \begin{lstlisting}[language=bnf]
-primary-expression := identifier
-                    | constant
-                    | string-literal
-                    ( expression )
+primary-expression ::= identifier
+                     | constant
+                     | string-literal
+                     ( expression )
  \end{lstlisting}
  \paragraph*{Semantics}
  An identifier is a primary expression, provided it has been declared as designating an 
-object(in which case it is an lvalye) or a function(in which case it is a function 
+object(in which case it is an lvalue) or a function(in which case it is a function 
  designator).
  \linebreak