MINGW = $(findstring MINGW32, $(UNAME))
CC ?= clang
-CFLAGS += -Wall -Wextra -I. -pedantic-errors -fno-strict-aliasing
+CFLAGS += -Wall -Wextra -I. -fno-strict-aliasing -fsigned-char
+CFLAGS += -DGMQCC_GITINFO="`git describe`"
#turn on tons of warnings if clang is present
# but also turn off the STUPID ONES
ifeq ($(CC), clang)
-Wno-missing-prototypes \
-Wno-float-equal \
-Wno-cast-align
+else
+ #Tiny C Compiler doesn't know what -pedantic-errors is
+ # and instead of ignoring .. just errors.
+ ifneq ($(CC), tcc)
+ CFLAGS +=-pedantic-errors
+ else
+ CFLAGS += -Wno-pointer-sign -fno-common
+ endif
endif
+
ifeq ($(track), no)
CFLAGS += -DNOTRACK
endif
GMQCC = gmqcc.exe
TESTSUITE = testsuite.exe
else
+ #arm support for linux .. we need to allow unaligned accesses
+ #to memory otherwise we just segfault everywhere
+ ifneq (, $(findstring arm, $(shell uname -m)))
+ CFLAGS += -munaligned-access
+ endif
+
QCVM = qcvm
GMQCC = gmqcc
TESTSUITE = testsuite
mem_d(self);
}
-ast_breakcont* ast_breakcont_new(lex_ctx ctx, bool iscont)
+ast_breakcont* ast_breakcont_new(lex_ctx ctx, bool iscont, unsigned int levels)
{
ast_instantiate(ast_breakcont, ctx, ast_breakcont_delete);
ast_expression_init((ast_expression*)self, (ast_expression_codegen*)&ast_breakcont_codegen);
self->is_continue = iscont;
+ self->levels = levels;
return self;
}
self->ir_func = NULL;
self->curblock = NULL;
- self->breakblock = NULL;
- self->continueblock = NULL;
+ self->breakblocks = NULL;
+ self->continueblocks = NULL;
vtype->hasvalue = true;
vtype->constval.vfunc = self;
for (i = 0; i < vec_size(self->blocks); ++i)
ast_delete(self->blocks[i]);
vec_free(self->blocks);
+ vec_free(self->breakblocks);
+ vec_free(self->continueblocks);
mem_d(self);
}
{
(void)func;
(void)lvalue;
+ if (self->expression.vtype == TYPE_NIL) {
+ *out = func->ir_func->owner->nil;
+ return true;
+ }
/* NOTE: This is the codegen for a variable used in an expression.
* It is not the codegen to generate the value. For this purpose,
* ast_local_codegen and ast_global_codegen are to be used before this
{
ir_value *v = NULL;
+ if (self->expression.vtype == TYPE_NIL) {
+ compile_error(ast_ctx(self), "internal error: trying to generate a variable of TYPE_NIL");
+ return false;
+ }
+
if (self->hasvalue && self->expression.vtype == TYPE_FUNCTION)
{
ir_function *func = ir_builder_create_function(ir, self->name, self->expression.next->expression.vtype);
bool ast_local_codegen(ast_value *self, ir_function *func, bool param)
{
ir_value *v = NULL;
+
+ if (self->expression.vtype == TYPE_NIL) {
+ compile_error(ast_ctx(self), "internal error: trying to generate a variable of TYPE_NIL");
+ return false;
+ }
+
if (self->hasvalue && self->expression.vtype == TYPE_FUNCTION)
{
/* Do we allow local functions? I think not...
ir_block *bcontinue = NULL;
ir_block *bbreak = NULL;
- ir_block *old_bcontinue = NULL;
- ir_block *old_bbreak = NULL;
-
ir_block *tmpblock = NULL;
(void)lvalue;
/* enter */
func->curblock = bbody;
- old_bbreak = func->breakblock;
- old_bcontinue = func->continueblock;
- func->breakblock = bbreak;
- func->continueblock = bcontinue;
- if (!func->continueblock)
- func->continueblock = bbody;
+ vec_push(func->breakblocks, bbreak);
+ if (bcontinue)
+ vec_push(func->continueblocks, bcontinue);
+ else
+ vec_push(func->continueblocks, bbody);
/* generate */
if (self->body) {
}
end_bbody = func->curblock;
- func->breakblock = old_bbreak;
- func->continueblock = old_bcontinue;
+ vec_pop(func->breakblocks);
+ vec_pop(func->continueblocks);
}
/* post-loop-condition */
self->expression.outr = (ir_value*)1;
if (self->is_continue)
- target = func->continueblock;
+ target = func->continueblocks[vec_size(func->continueblocks)-1-self->levels];
else
- target = func->breakblock;
+ target = func->breakblocks[vec_size(func->breakblocks)-1-self->levels];
if (!target) {
compile_error(ast_ctx(self), "%s is lacking a target block", (self->is_continue ? "continue" : "break"));
ir_value *dummy = NULL;
ir_value *irop = NULL;
- ir_block *old_break = NULL;
ir_block *bout = NULL;
ir_block *bfall = NULL;
size_t bout_id;
return false;
/* setup the break block */
- old_break = func->breakblock;
- func->breakblock = bout;
+ vec_push(func->breakblocks, bout);
/* Now create all cases */
for (c = 0; c < vec_size(self->cases); ++c) {
func->curblock = bout;
/* restore the break block */
- func->breakblock = old_break;
+ vec_pop(func->breakblocks);
/* Move 'bout' to the end, it's nicer */
vec_remove(func->ir_func->blocks, bout_id, 1);
bool side_effects;
} ast_node_common;
-#define ast_delete(x) ( ( (ast_node*)(x) ) -> node.destroy )((ast_node*)(x))
+#define ast_delete(x) (*( ((ast_node*)(x))->node.destroy ))((ast_node*)(x))
#define ast_unref(x) do \
{ \
if (! (((ast_node*)(x))->node.keep) ) { \
} ast_expression_common;
#define AST_FLAG_VARIADIC (1<<0)
#define AST_FLAG_NORETURN (1<<1)
+#define AST_FLAG_INLINE (1<<2)
/* Value
*
struct ast_breakcont_s
{
ast_expression_common expression;
- bool is_continue;
+ bool is_continue;
+ unsigned int levels;
};
-ast_breakcont* ast_breakcont_new(lex_ctx ctx, bool iscont);
+ast_breakcont* ast_breakcont_new(lex_ctx ctx, bool iscont, unsigned int levels);
void ast_breakcont_delete(ast_breakcont*);
bool ast_breakcont_codegen(ast_breakcont*, ast_function*, bool lvalue, ir_value**);
ir_function *ir_func;
ir_block *curblock;
- ir_block *breakblock;
- ir_block *continueblock;
+ ir_block **breakblocks;
+ ir_block **continueblocks;
#if 0
/* In order for early-out logic not to go over
}
void code_init() {
- prog_section_function empty_function = {0,0,0,0,0,0,0,{0}};
+ prog_section_function empty_function = {0,0,0,0,0,0,0,{0,0,0,0,0,0,0,0}};
prog_section_statement empty_statement = {0,{0},{0},{0}};
prog_section_def empty_def = {0, 0, 0};
int i = 0;
vec_free(code_functions);
vec_free(code_globals);
vec_free(code_chars);
+ util_htdel(code_string_cache);
+
file_close(fp);
return true;
}
Warn on an unknown attribute. The warning will inlclude only the first
token inside the enclosing attribute-brackets. This may change when
the actual attribute syntax is better defined.
+.TP
+.B -Wreserved-names
+Warn when using reserved names such as 'nil'.
+.TP
+.B -Wuninitialized-constant
+Warn about global constants (using the 'const' keyword) with no
+assigned value.
+.TP
+.B -Wuninitialized-global
+Warn about global variables with no initializing value. This is off by
+default, and is added mostly to help find null-values which are
+supposed to be replaced by the untyped 'nil' constant.
.SH COMPILE FLAGS
.TP
.B -fdarkplaces-string-table-bug
\-fno-bail-on-werror, compilation will continue until the end, but no
output is generated. Instead the first such error message's context is
shown.
+.TP
+.B -floop-labels
+Allow loops to be labeled, and allow 'break' and 'continue' to take an
+optional label to decide which loop to actually jump out of or
+continue.
+.sp
+.in +4
+.nf
+for :outer (i = 0; i < n; ++i) {
+ while (inner) {
+ ...;
+ if (something)
+ continue outer;
+ }
+}
+.fi
+.in
+.TP
+.B -funtyped-nil
+Adds a global named 'nil' which is of no type and can be assigned to
+anything. No typechecking will be performed on assignments. Assigning
+to it is forbidden, using it in any other kind of expression is also
+not allowed.
+.TP
+.B -fpermissive
+Various effects, usually to weaken some conditions.
+.RS
+.IP "with -funtyped-nil"
+Allow local variables named 'nil'. (This will not allow declaring a
+global of that name.)
.SH OPTIMIZATIONS
.TP
.B -Opeephole
done:
if (len < (int)sizeof(spaces)-1) {
spaces[sizeof(spaces)-1-len] = 0;
- printf(spaces);
+ file_puts(stdout, spaces);
spaces[sizeof(spaces)-1-len] = ' ';
}
}
FILE *handle = NULL;
file_init();
- return ((fopen_s(&handle, filename, mode) != 0) ? NULL : handle;
+ return (fopen_s(&handle, filename, mode) != 0) ? NULL : handle;
}
size_t file_read(void *buffer, size_t size, size_t count, FILE *fp) {
/* __LINE__ */
char *ftepp_predef_line(lex_file *context) {
- char *value = (char*)mem_a(128);
- sprintf(value, "%d", (int)context->line);
+ char *value;
+ util_asprintf(&value, "%d", (int)context->line);
return value;
}
/* __FILE__ */
}
/* __COUNTER_LAST__ */
char *ftepp_predef_counterlast(lex_file *context) {
- char *value = (char*)mem_a(128);
- sprintf(value, "%u", ftepp_predef_countval);
+ char *value;
+ util_asprintf(&value, "%u", ftepp_predef_countval);
(void)context;
return value;
}
/* __COUNTER__ */
char *ftepp_predef_counter(lex_file *context) {
- char *value = (char*)mem_a(128);
+ char *value;
ftepp_predef_countval ++;
- sprintf(value, "%u", ftepp_predef_countval);
+ util_asprintf(&value, "%u", ftepp_predef_countval);
(void)context;
return value;
}
/* __RANDOM__ */
char *ftepp_predef_random(lex_file *context) {
- char *value = (char*)mem_a(128);
+ char *value;
ftepp_predef_randval = (util_rand() % 0xFF) + 1;
- sprintf(value, "%u", ftepp_predef_randval);
+ util_asprintf(&value, "%u", ftepp_predef_randval);
(void)context;
return value;
}
/* __RANDOM_LAST__ */
char *ftepp_predef_randomlast(lex_file *context) {
- char *value = (char*)mem_a(128);
- sprintf(value, "%u", ftepp_predef_randval);
+ char *value;
+ util_asprintf(&value, "%u", ftepp_predef_randval);
(void)context;
return value;
#define GMQCC_VERSION \
GMQCC_VERSION_BUILD(GMQCC_VERSION_MAJOR, GMQCC_VERSION_MINOR, GMQCC_VERSION_PATCH)
+#ifndef GMQCC_GITINFO
+# define GMQCC_GITINFO "(no git info)"
+#endif
+
/*
- * We cannoy rely on C99 at all, since compilers like MSVC
+ * We cannot rely on C99 at all, since compilers like MSVC
* simply don't support it. We define our own boolean type
* as a result (since we cannot include <stdbool.h>). For
* compilers that are in 1999 mode (C99 compliant) we can use
# endif /* !__STDC_VERSION__ */
#endif /* !__cplusplus */
-
-
/*
* Of some functions which are generated we want to make sure
* that the result isn't ignored. To find such function calls,
void util_seed(uint32_t);
uint32_t util_rand();
+int util_vasprintf(char **ret, const char *fmt, va_list);
+int util_asprintf (char **ret, const char *fmt, ...);
+
+
#ifdef NOTRACK
# define mem_a(x) malloc (x)
# define mem_d(x) free ((void*)x)
TYPE_UNION ,
TYPE_ARRAY ,
+ TYPE_NIL , /* it's its own type / untyped */
+
TYPE_COUNT
};
# Enabling this corrects ternary percedence bugs present in fteqcc.
CORRECT_TERNARY = true
+ # Prevent the creation of _x, _y and _z progdefs for vectors
+ SINGLE_VECTOR_DEFS = false
+
+ # Cast vectors to real booleans when used in logic expressions.
+ # This is achieved by using NOT_V.
+ CORRECT_LOGIC = false
+
+ # Always treat empty strings as true. Usuall !"" yields true, because
+ # the string-NOT instruction considers empty strings to be false, while
+ # an empty string as condition for 'if' will be considered true, since
+ # only the numerical value of the global is looked at.
+ TRUE_EMPTY_STRINGS = false
+
+ # Opposite of the above, empty strings are always false. Similar to
+ # CORRECT_LOGIC this will always use NOT_S to cast a string to a real
+ # boolean value.
+ FALSE_EMPTY_STRINGS = false
+
+ # Recognize utf-8 characters in character constants, and encode
+ # codepoint escape sequences in strings as utf-8. This essentially allows
+ # \{1234} escape sequences to be higher than 255.
+ UTF8
+
+ # When a warning is printed and it is set to be treated as error via
+ # a -Werror switch, compilation will be stopped, unless this is false.
+ # When this is false, the rest of the code will be compiled, and at the end
+ # the file and line of the first warning will be shown.
+ BAIL_ON_WERROR = true
+
+ # Allow loops and switches to be labeled and break and continue to take an
+ # optional label to target a specific loop/switch.
+ LOOP_LABELS = false
+
+ # Enable the 'nil' null constant, which has no type. It can be used as the
+ # right hand of any assignment regardless of the required type.
+ UNTYPED_NIL = false
+
+ # Be "permissive". For instance, when -funtyped-nil is used, this allows local
+ # variables with the name 'nil' to be declared.
+ PREMISSIVE = false
+
# These are all the warnings, usually present via the -W prefix from
# the command line.
[warnings]
# Enables preprocessor "#warnings"
CPP = true
+ # With the [[attribute]] syntax enabled, warn when an unknown
+ # attribute is encountered. Its first token will be included in the
+ # message.
+ UNKNOWN_ATTRIBUTE = true
+
+ # Warn when declaring variables or fields with a reserved name like 'nil'
+ RESERVED_NAMES = true
+
+ # Warn about 'const'-qualified global variables with no initializing value.
+ UNINITIALIZED_CONSTANT = true
+
+ # Warn about non-constant global variables with no initializing value.
+ UNINITIALIZED_GLOBAL = true
+
# Finally these are all the optimizations, usually present via the -O
# prefix from the command line.
[optimizations]
# Enables tail recrusion optimizationd.
TAIL_RECURSION = true
- # Enables tail-call optimizations.
+ # Enables tail-call optimizations. (Not implemented)
TAIL_CALLS = true
+
+ # Every function where it is safe to do so will share its local data section
+ # with the others. The criteria are currently that the function must not have
+ # any possibly uninitialized locals, or local arrays regardless of how they
+ # are initialized.
+ OVERLAP_LOCALS = false
+
+ # Strip out the names of constants to save some space in the progs.dat
+ STRIP_CONSTANT_NAMES = true
+
+ # Aggressivly reuse strings in the string-section
+ OVERLAP_STRINGS = true
+
+ # Have expressions which are used as function parameters evaluate directly
+ # into the parameter-globals if possible.
+ # This avoids a whole lot of copying.
+ CALL_STORES = true
+
+ # Do not create a RETURN instruction at the end functions of return-type void.
+ VOID_RETURN = true
"variant",
"struct",
"union",
- "array"
+ "array",
+
+ "nil"
};
size_t type_sizeof_[TYPE_COUNT] = {
0, /* TYPE_STRUCT */
0, /* TYPE_UNION */
0, /* TYPE_ARRAY */
+ 0, /* TYPE_NIL */
};
uint16_t type_store_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
uint16_t field_store_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
uint16_t type_storep_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
uint16_t type_eq_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
uint16_t type_ne_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
uint16_t type_not_instr[TYPE_COUNT] = {
AINSTR_END, /* struct */
AINSTR_END, /* union */
AINSTR_END, /* array */
+ AINSTR_END, /* nil */
};
/* protos */
return NULL;
}
+ self->nil = ir_value_var("nil", store_value, TYPE_NIL);
+ self->nil->cvq = CV_CONST;
+
return self;
}
for (i = 0; i != vec_size(self->fields); ++i) {
ir_value_delete(self->fields[i]);
}
+ ir_value_delete(self->nil);
vec_free(self->fields);
vec_free(self->filenames);
vec_free(self->filestrings);
size_t i, o, p, mem;
/* bitmasks which operands are read from or written to */
size_t read, write;
- char dbg_ind[16] = { '#', '0' };
+ char dbg_ind[16];
+ dbg_ind[0] = '#';
+ dbg_ind[1] = '0';
(void)dbg_ind;
if (prev)
*/
#ifndef GMQCC_IR_HDR
#define GMQCC_IR_HDR
-
+#include "gmqcc.h"
/* ir_value */
typedef struct
const char **filenames;
qcint *filestrings;
/* we cache the #IMMEDIATE string here */
- qcint str_immediate;
+ qcint str_immediate;
+ /* there should just be this one nil */
+ ir_value *nil;
} ir_builder;
ir_builder* ir_builder_new(const char *modulename);
vec_shrinkto(lex->tok.value, 0);
vec_push(lex->frames, m);
} while (true);
+
+ return false;
}
static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
if (options_long_gcc("std", &argc, &argv, &argarg)) {
if (!strcmp(argarg, "gmqcc") || !strcmp(argarg, "default")) {
- opts_set(opts.flags, ADJUST_VECTOR_FIELDS, true);
+ opts_set(opts.flags, ADJUST_VECTOR_FIELDS, true);
+ opts_set(opts.flags, CORRECT_LOGIC, true);
+ opts_set(opts.flags, FALSE_EMPTY_STRINGS, false);
+ opts_set(opts.flags, TRUE_EMPTY_STRINGS, true);
+ opts_set(opts.flags, LOOP_LABELS, true);
opts.standard = COMPILER_GMQCC;
} else if (!strcmp(argarg, "qcc")) {
if (OPTS_FLAG(TRUE_EMPTY_STRINGS) && OPTS_FLAG(FALSE_EMPTY_STRINGS)) {
con_err("-ftrue-empty-strings and -ffalse-empty-strings are mutually exclusive");
- exit(1);
+ exit(EXIT_FAILURE);
}
/* the standard decides which set of operators to use */
operators[operator_count-1].id != opid2(':','?'))
{
con_err("internal error: operator precedence table wasn't updated correctly!\n");
- exit(1);
+ exit(EXIT_FAILURE);
}
operators_free = true;
newops = (oper_info*)mem_a(sizeof(operators[0]) * operator_count);
opts_set(opts.warn, WARN_UNREACHABLE_CODE, true);
opts_set(opts.warn, WARN_CPP, true);
opts_set(opts.warn, WARN_UNKNOWN_ATTRIBUTE, true);
+ opts_set(opts.warn, WARN_RESERVED_NAMES, true);
+ opts_set(opts.warn, WARN_UNINITIALIZED_CONSTANT, true);
/* flags */
opts_set(opts.flags, ADJUST_VECTOR_FIELDS, true);
opts_set(opts.flags, FTEPP, false);
/* codegen flags */
#ifdef GMQCC_TYPE_FLAGS
- GMQCC_DEFINE_FLAG(OVERLAP_LOCALS)
GMQCC_DEFINE_FLAG(DARKPLACES_STRING_TABLE_BUG)
GMQCC_DEFINE_FLAG(ADJUST_VECTOR_FIELDS)
GMQCC_DEFINE_FLAG(FTEPP)
GMQCC_DEFINE_FLAG(FALSE_EMPTY_STRINGS)
GMQCC_DEFINE_FLAG(UTF8)
GMQCC_DEFINE_FLAG(BAIL_ON_WERROR)
+ GMQCC_DEFINE_FLAG(LOOP_LABELS)
+ GMQCC_DEFINE_FLAG(UNTYPED_NIL)
+ GMQCC_DEFINE_FLAG(PERMISSIVE)
#endif
/* warning flags */
GMQCC_DEFINE_FLAG(UNREACHABLE_CODE)
GMQCC_DEFINE_FLAG(CPP)
GMQCC_DEFINE_FLAG(UNKNOWN_ATTRIBUTE)
+ GMQCC_DEFINE_FLAG(RESERVED_NAMES)
+ GMQCC_DEFINE_FLAG(UNINITIALIZED_CONSTANT)
+ GMQCC_DEFINE_FLAG(UNINITIALIZED_GLOBAL)
#endif
#ifdef GMQCC_TYPE_OPTIMIZATIONS
ast_value *imm_float_zero;
ast_value *imm_float_one;
ast_value *imm_vector_zero;
+ ast_value *nil;
size_t crc_globals;
size_t crc_fields;
/* All the labels the function defined...
* Should they be in ast_function instead?
*/
- ast_label **labels;
- ast_goto **gotos;
+ ast_label **labels;
+ ast_goto **gotos;
+ const char **breaks;
+ const char **continues;
/* A list of hashtables for each scope */
ht *variables;
static bool parser_leaveblock(parser_t *parser);
static void parser_addlocal(parser_t *parser, const char *name, ast_expression *e);
static bool parse_typedef(parser_t *parser);
-static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofields, int qualifier, ast_value *cached_typedef, bool noref, bool noreturn, bool is_static);
+static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofields, int qualifier, ast_value *cached_typedef, bool noref, bool is_static, uint32_t qflags);
static ast_block* parse_block(parser_t *parser);
static bool parse_block_into(parser_t *parser, ast_block *block);
static bool parse_statement_or_block(parser_t *parser, ast_expression **out);
break;
case opid1('*'):
if (exprs[0]->expression.vtype != exprs[1]->expression.vtype &&
- exprs[0]->expression.vtype != TYPE_VECTOR &&
- exprs[0]->expression.vtype != TYPE_FLOAT &&
- exprs[1]->expression.vtype != TYPE_VECTOR &&
- exprs[1]->expression.vtype != TYPE_FLOAT)
+ !(exprs[0]->expression.vtype == TYPE_VECTOR &&
+ exprs[1]->expression.vtype == TYPE_FLOAT) &&
+ !(exprs[1]->expression.vtype == TYPE_VECTOR &&
+ exprs[0]->expression.vtype == TYPE_FLOAT)
+ )
{
parseerror(parser, "invalid types used in expression: cannot multiply types %s and %s",
type_name[exprs[1]->expression.vtype],
ast_type_to_string(exprs[1], ty2, sizeof(ty2));
parseerror(parser, "invalid types in assignment: cannot assign %s to %s", ty2, ty1);
}
- else if (!ast_compare_type(exprs[0], exprs[1])) {
+ else if (exprs[1]->expression.vtype != TYPE_NIL &&
+ !ast_compare_type(exprs[0], exprs[1]))
+ {
ast_type_to_string(exprs[0], ty1, sizeof(ty1));
ast_type_to_string(exprs[1], ty2, sizeof(ty2));
if (OPTS_FLAG(ASSIGN_FUNCTION_TYPES) &&
const oper_info *olast = NULL;
size_t o;
for (o = 0; o < operator_count; ++o) {
- if ((!(operators[o].flags & OP_PREFIX) == wantop) &&
+ if (((!(operators[o].flags & OP_PREFIX) == !!wantop)) &&
/* !(operators[o].flags & OP_SUFFIX) && / * remove this */
!strcmp(parser_tokval(parser), operators[o].op))
{
return true;
}
+static bool parse_while_go(parser_t *parser, ast_block *block, ast_expression **out);
static bool parse_while(parser_t *parser, ast_block *block, ast_expression **out)
+{
+ bool rv;
+ char *label = NULL;
+
+ /* skip the 'while' and get the body */
+ if (!parser_next(parser)) {
+ if (OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "expected loop label or 'while' condition in parenthesis");
+ else
+ parseerror(parser, "expected 'while' condition in parenthesis");
+ return false;
+ }
+
+ if (parser->tok == ':') {
+ if (!OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "labeled loops not activated, try using -floop-labels");
+ if (!parser_next(parser) || parser->tok != TOKEN_IDENT) {
+ parseerror(parser, "expected loop label");
+ return false;
+ }
+ label = util_strdup(parser_tokval(parser));
+ if (!parser_next(parser)) {
+ mem_d(label);
+ parseerror(parser, "expected 'while' condition in parenthesis");
+ return false;
+ }
+ }
+
+ if (parser->tok != '(') {
+ parseerror(parser, "expected 'while' condition in parenthesis");
+ return false;
+ }
+
+ vec_push(parser->breaks, label);
+ vec_push(parser->continues, label);
+
+ rv = parse_while_go(parser, block, out);
+ if (label)
+ mem_d(label);
+ if (vec_last(parser->breaks) != label || vec_last(parser->continues) != label) {
+ parseerror(parser, "internal error: label stack corrupted");
+ rv = false;
+ ast_delete(*out);
+ *out = NULL;
+ }
+ else {
+ vec_pop(parser->breaks);
+ vec_pop(parser->continues);
+ }
+ return rv;
+}
+
+static bool parse_while_go(parser_t *parser, ast_block *block, ast_expression **out)
{
ast_loop *aloop;
ast_expression *cond, *ontrue;
(void)block; /* not touching */
- /* skip the 'while' and check for opening paren */
- if (!parser_next(parser) || parser->tok != '(') {
- parseerror(parser, "expected 'while' condition in parenthesis");
- return false;
- }
/* parse into the expression */
if (!parser_next(parser)) {
parseerror(parser, "expected 'while' condition after opening paren");
return true;
}
+static bool parse_dowhile_go(parser_t *parser, ast_block *block, ast_expression **out);
static bool parse_dowhile(parser_t *parser, ast_block *block, ast_expression **out)
+{
+ bool rv;
+ char *label = NULL;
+
+ /* skip the 'do' and get the body */
+ if (!parser_next(parser)) {
+ if (OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "expected loop label or body");
+ else
+ parseerror(parser, "expected loop body");
+ return false;
+ }
+
+ if (parser->tok == ':') {
+ if (!OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "labeled loops not activated, try using -floop-labels");
+ if (!parser_next(parser) || parser->tok != TOKEN_IDENT) {
+ parseerror(parser, "expected loop label");
+ return false;
+ }
+ label = util_strdup(parser_tokval(parser));
+ if (!parser_next(parser)) {
+ mem_d(label);
+ parseerror(parser, "expected loop body");
+ return false;
+ }
+ }
+
+ vec_push(parser->breaks, label);
+ vec_push(parser->continues, label);
+
+ rv = parse_dowhile_go(parser, block, out);
+ if (label)
+ mem_d(label);
+ if (vec_last(parser->breaks) != label || vec_last(parser->continues) != label) {
+ parseerror(parser, "internal error: label stack corrupted");
+ rv = false;
+ ast_delete(*out);
+ *out = NULL;
+ }
+ else {
+ vec_pop(parser->breaks);
+ vec_pop(parser->continues);
+ }
+ return rv;
+}
+
+static bool parse_dowhile_go(parser_t *parser, ast_block *block, ast_expression **out)
{
ast_loop *aloop;
ast_expression *cond, *ontrue;
(void)block; /* not touching */
- /* skip the 'do' and get the body */
- if (!parser_next(parser)) {
- parseerror(parser, "expected loop body");
- return false;
- }
if (!parse_statement_or_block(parser, &ontrue))
return false;
return true;
}
+static bool parse_for_go(parser_t *parser, ast_block *block, ast_expression **out);
static bool parse_for(parser_t *parser, ast_block *block, ast_expression **out)
+{
+ bool rv;
+ char *label = NULL;
+
+ /* skip the 'for' and check for opening paren */
+ if (!parser_next(parser)) {
+ if (OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "expected loop label or 'for' expressions in parenthesis");
+ else
+ parseerror(parser, "expected 'for' expressions in parenthesis");
+ return false;
+ }
+
+ if (parser->tok == ':') {
+ if (!OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "labeled loops not activated, try using -floop-labels");
+ if (!parser_next(parser) || parser->tok != TOKEN_IDENT) {
+ parseerror(parser, "expected loop label");
+ return false;
+ }
+ label = util_strdup(parser_tokval(parser));
+ if (!parser_next(parser)) {
+ mem_d(label);
+ parseerror(parser, "expected 'for' expressions in parenthesis");
+ return false;
+ }
+ }
+
+ if (parser->tok != '(') {
+ parseerror(parser, "expected 'for' expressions in parenthesis");
+ return false;
+ }
+
+ vec_push(parser->breaks, label);
+ vec_push(parser->continues, label);
+
+ rv = parse_for_go(parser, block, out);
+ if (label)
+ mem_d(label);
+ if (vec_last(parser->breaks) != label || vec_last(parser->continues) != label) {
+ parseerror(parser, "internal error: label stack corrupted");
+ rv = false;
+ ast_delete(*out);
+ *out = NULL;
+ }
+ else {
+ vec_pop(parser->breaks);
+ vec_pop(parser->continues);
+ }
+ return rv;
+}
+static bool parse_for_go(parser_t *parser, ast_block *block, ast_expression **out)
{
ast_loop *aloop;
ast_expression *initexpr, *cond, *increment, *ontrue;
increment = NULL;
ontrue = NULL;
- /* skip the 'while' and check for opening paren */
- if (!parser_next(parser) || parser->tok != '(') {
- parseerror(parser, "expected 'for' expressions in parenthesis");
- goto onerr;
- }
/* parse into the expression */
if (!parser_next(parser)) {
parseerror(parser, "expected 'for' initializer after opening paren");
"current standard does not allow variable declarations in for-loop initializers"))
goto onerr;
}
- if (!parse_variable(parser, block, true, CV_VAR, typevar, false, false, false))
+ if (!parse_variable(parser, block, true, CV_VAR, typevar, false, false, 0))
goto onerr;
}
else if (parser->tok != ';')
static bool parse_break_continue(parser_t *parser, ast_block *block, ast_expression **out, bool is_continue)
{
- lex_ctx ctx = parser_ctx(parser);
+ size_t i;
+ unsigned int levels = 0;
+ lex_ctx ctx = parser_ctx(parser);
+ const char **loops = (is_continue ? parser->continues : parser->breaks);
(void)block; /* not touching */
+ if (!parser_next(parser)) {
+ parseerror(parser, "expected semicolon or loop label");
+ return false;
+ }
- if (!parser_next(parser) || parser->tok != ';') {
+ if (parser->tok == TOKEN_IDENT) {
+ if (!OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "labeled loops not activated, try using -floop-labels");
+ i = vec_size(loops);
+ while (i--) {
+ if (loops[i] && !strcmp(loops[i], parser_tokval(parser)))
+ break;
+ if (!i) {
+ parseerror(parser, "no such loop to %s: `%s`",
+ (is_continue ? "continue" : "break out of"),
+ parser_tokval(parser));
+ return false;
+ }
+ ++levels;
+ }
+ if (!parser_next(parser)) {
+ parseerror(parser, "expected semicolon");
+ return false;
+ }
+ }
+
+ if (parser->tok != ';') {
parseerror(parser, "expected semicolon");
return false;
}
if (!parser_next(parser))
parseerror(parser, "parse error");
- *out = (ast_expression*)ast_breakcont_new(ctx, is_continue);
+ *out = (ast_expression*)ast_breakcont_new(ctx, is_continue, levels);
return true;
}
/* returns true when it was a variable qualifier, false otherwise!
* on error, cvq is set to CV_WRONG
*/
-static bool parse_var_qualifiers(parser_t *parser, bool with_local, int *cvq, bool *noref, bool *noreturn, bool *is_static)
+static bool parse_qualifiers(parser_t *parser, bool with_local, int *cvq, bool *noref, bool *is_static, uint32_t *_flags)
{
bool had_const = false;
bool had_var = false;
bool had_noref = false;
- bool had_noreturn = false;
bool had_attrib = false;
bool had_static = false;
+ uint32_t flags = 0;
*cvq = CV_NONE;
for (;;) {
return false;
}
if (!strcmp(parser_tokval(parser), "noreturn")) {
- had_noreturn = true;
+ flags |= AST_FLAG_NORETURN;
if (!parser_next(parser) || parser->tok != TOKEN_ATTRIBUTE_CLOSE) {
parseerror(parser, "`noreturn` attribute has no parameters, expected `]]`");
*cvq = CV_WRONG;
return false;
}
}
+ else if (!strcmp(parser_tokval(parser), "inline")) {
+ flags |= AST_FLAG_INLINE;
+ if (!parser_next(parser) || parser->tok != TOKEN_ATTRIBUTE_CLOSE) {
+ parseerror(parser, "`noref` attribute has no parameters, expected `]]`");
+ *cvq = CV_WRONG;
+ return false;
+ }
+ }
else
{
/* Skip tokens until we hit a ]] */
had_var = true;
else if (!strcmp(parser_tokval(parser), "noref"))
had_noref = true;
- else if (!had_const && !had_var && !had_noref && !had_noreturn && !had_attrib && !had_static) {
+ else if (!had_const && !had_var && !had_noref && !had_attrib && !had_static && !flags) {
return false;
}
else
else
*cvq = CV_NONE;
*noref = had_noref;
- *noreturn = had_noreturn;
*is_static = had_static;
+ *_flags = flags;
return true;
onerr:
parseerror(parser, "parse error after variable qualifier");
return true;
}
+static bool parse_switch_go(parser_t *parser, ast_block *block, ast_expression **out);
static bool parse_switch(parser_t *parser, ast_block *block, ast_expression **out)
+{
+ bool rv;
+ char *label = NULL;
+
+ /* skip the 'while' and get the body */
+ if (!parser_next(parser)) {
+ if (OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "expected loop label or 'switch' operand in parenthesis");
+ else
+ parseerror(parser, "expected 'switch' operand in parenthesis");
+ return false;
+ }
+
+ if (parser->tok == ':') {
+ if (!OPTS_FLAG(LOOP_LABELS))
+ parseerror(parser, "labeled loops not activated, try using -floop-labels");
+ if (!parser_next(parser) || parser->tok != TOKEN_IDENT) {
+ parseerror(parser, "expected loop label");
+ return false;
+ }
+ label = util_strdup(parser_tokval(parser));
+ if (!parser_next(parser)) {
+ mem_d(label);
+ parseerror(parser, "expected 'switch' operand in parenthesis");
+ return false;
+ }
+ }
+
+ if (parser->tok != '(') {
+ parseerror(parser, "expected 'switch' operand in parenthesis");
+ return false;
+ }
+
+ vec_push(parser->breaks, label);
+
+ rv = parse_switch_go(parser, block, out);
+ if (label)
+ mem_d(label);
+ if (vec_last(parser->breaks) != label) {
+ parseerror(parser, "internal error: label stack corrupted");
+ rv = false;
+ ast_delete(*out);
+ *out = NULL;
+ }
+ else {
+ vec_pop(parser->breaks);
+ }
+ return rv;
+}
+
+static bool parse_switch_go(parser_t *parser, ast_block *block, ast_expression **out)
{
ast_expression *operand;
ast_value *opval;
ast_switch_case swcase;
int cvq;
- bool noref, noreturn, is_static;
+ bool noref, is_static;
+ uint32_t qflags = 0;
lex_ctx ctx = parser_ctx(parser);
(void)block; /* not touching */
(void)opval;
- /* parse over the opening paren */
- if (!parser_next(parser) || parser->tok != '(') {
- parseerror(parser, "expected switch operand in parenthesis");
- return false;
- }
-
/* parse into the expression */
if (!parser_next(parser)) {
parseerror(parser, "expected switch operand");
if (parser->tok == TOKEN_IDENT)
typevar = parser_find_typedef(parser, parser_tokval(parser), 0);
if (typevar || parser->tok == TOKEN_TYPENAME) {
- if (!parse_variable(parser, block, false, CV_NONE, typevar, false, false, false)) {
+ if (!parse_variable(parser, block, false, CV_NONE, typevar, false, false, 0)) {
ast_delete(switchnode);
return false;
}
continue;
}
- if (parse_var_qualifiers(parser, true, &cvq, &noref, &noreturn, &is_static))
+ if (parse_qualifiers(parser, true, &cvq, &noref, &is_static, &qflags))
{
if (cvq == CV_WRONG) {
ast_delete(switchnode);
return false;
}
- if (!parse_variable(parser, block, false, cvq, NULL, noref, noreturn, is_static)) {
+ if (!parse_variable(parser, block, false, cvq, NULL, noref, is_static, qflags)) {
ast_delete(switchnode);
return false;
}
static bool parse_statement(parser_t *parser, ast_block *block, ast_expression **out, bool allow_cases)
{
- bool noref, noreturn, is_static;
+ bool noref, is_static;
int cvq = CV_NONE;
+ uint32_t qflags = 0;
ast_value *typevar = NULL;
*out = NULL;
if (parsewarning(parser, WARN_EXTENSIONS, "missing 'local' keyword when declaring a local variable"))
return false;
}
- if (!parse_variable(parser, block, false, CV_NONE, typevar, false, false, false))
+ if (!parse_variable(parser, block, false, CV_NONE, typevar, false, false, 0))
return false;
return true;
}
- else if (parse_var_qualifiers(parser, !!block, &cvq, &noref, &noreturn, &is_static))
+ else if (parse_qualifiers(parser, !!block, &cvq, &noref, &is_static, &qflags))
{
if (cvq == CV_WRONG)
return false;
- return parse_variable(parser, block, true, cvq, NULL, noref, noreturn, is_static);
+ return parse_variable(parser, block, true, cvq, NULL, noref, is_static, qflags);
}
else if (parser->tok == TOKEN_KEYWORD)
{
return true;
}
-static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofields, int qualifier, ast_value *cached_typedef, bool noref, bool noreturn, bool is_static)
+static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofields, int qualifier, ast_value *cached_typedef, bool noref, bool is_static, uint32_t qflags)
{
ast_value *var;
ast_value *proto;
/* in a noref section we simply bump the usecount */
if (noref || parser->noref)
var->uses++;
- if (noreturn)
- var->expression.flags |= AST_FLAG_NORETURN;
+ var->expression.flags |= qflags;
/* Part 1:
* check for validity: (end_sys_..., multiple-definitions, prototypes, ...)
* Also: if there was a prototype, `var` will be deleted and set to `proto` which
* is then filled with the previous definition and the parameter-names replaced.
*/
+ if (!strcmp(var->name, "nil")) {
+ if (OPTS_FLAG(UNTYPED_NIL)) {
+ if (!localblock || !OPTS_FLAG(PERMISSIVE))
+ parseerror(parser, "name `nil` not allowed (try -fpermissive)");
+ } else
+ (void)!parsewarning(parser, WARN_RESERVED_NAMES, "variable name `nil` is reserved");
+ }
if (!localblock) {
/* Deal with end_sys_ vars */
was_end = false;
{
int cvq = CV_WRONG;
bool noref = false;
- bool noreturn = false;
bool is_static = false;
+ uint32_t qflags = 0;
ast_value *istype = NULL;
if (parser->tok == TOKEN_IDENT)
if (istype || parser->tok == TOKEN_TYPENAME || parser->tok == '.')
{
- return parse_variable(parser, NULL, false, CV_NONE, istype, false, false, false);
+ return parse_variable(parser, NULL, false, CV_NONE, istype, false, false, 0);
}
- else if (parse_var_qualifiers(parser, false, &cvq, &noref, &noreturn, &is_static))
+ else if (parse_qualifiers(parser, false, &cvq, &noref, &is_static, &qflags))
{
if (cvq == CV_WRONG)
return false;
- return parse_variable(parser, NULL, true, cvq, NULL, noref, noreturn, is_static);
+ return parse_variable(parser, NULL, true, cvq, NULL, noref, is_static, qflags);
}
else if (parser->tok == TOKEN_KEYWORD)
{
bool parser_init()
{
+ lex_ctx empty_ctx;
size_t i;
parser = (parser_t*)mem_a(sizeof(parser_t));
vec_push(parser->variables, parser->htglobals = util_htnew(PARSER_HT_SIZE));
vec_push(parser->typedefs, util_htnew(TYPEDEF_HT_SIZE));
vec_push(parser->_blocktypedefs, 0);
+
+ empty_ctx.file = "<internal>";
+ empty_ctx.line = 0;
+ parser->nil = ast_value_new(empty_ctx, "nil", TYPE_NIL);
+ if (OPTS_FLAG(UNTYPED_NIL))
+ util_htset(parser->htglobals, "nil", (void*)parser->nil);
return true;
}
vec_free(parser->labels);
vec_free(parser->gotos);
+ vec_free(parser->breaks);
+ vec_free(parser->continues);
mem_d(parser);
}
if (!ast_istype(parser->globals[i], ast_value))
continue;
asvalue = (ast_value*)(parser->globals[i]);
+ if (asvalue->cvq == CV_CONST && !asvalue->hasvalue)
+ (void)!compile_warning(ast_ctx(asvalue), WARN_UNINITIALIZED_CONSTANT,
+ "uninitialized constant: `%s`",
+ asvalue->name);
+ else if ((asvalue->cvq == CV_NONE || asvalue->cvq == CV_CONST) && !asvalue->hasvalue)
+ (void)!compile_warning(ast_ctx(asvalue), WARN_UNINITIALIZED_GLOBAL,
+ "uninitialized global: `%s`",
+ asvalue->name);
if (!ast_generate_accessors(asvalue, ir)) {
ir_builder_delete(ir);
return false;
--- /dev/null
+#!/bin/sh
+
+#these are stupid flags ... i.e to inhibit warnings that are just stupid
+FLAGS_STUPID="\
+ -redef \
+ -noeffect \
+ -nullderef \
+ -usedef \
+ -type \
+ -mustfreeonly \
+ -nullstate \
+ -varuse \
+ -mustfreefresh \
+ -compdestroy \
+ -compmempass \
+ -nullpass \
+ -onlytrans \
+ -predboolint \
+ -boolops \
+ -exportlocal \
+ -incondefs \
+ -macroredef \
+ -retvalint \
+ -nullret \
+ -predboolothers \
+ -globstate \
+ -dependenttrans \
+ -branchstate \
+ -compdef \
+ -temptrans \
+ -usereleased \
+ -warnposix"
+
+#flags that have no place anywhere else
+#mostly stupid
+FLAGS_OTHERS="\
+ -shiftimplementation \
+ +charindex \
+ -kepttrans \
+ -unqualifiedtrans \
+ +matchanyintegral \
+ -bufferoverflowhigh \
+ +voidabstract"
+
+#these are flags that MAYBE shouldn't be required
+# -nullassign should be surpressed in code with /*@null*/
+# (although that might be odd?)
+FLAGS_MAYBE="\
+ -nullassign \
+ -unrecog \
+ -casebreak \
+ -retvalbool \
+ -retvalother \
+ -mayaliasunique \
+ -realcompare \
+ -observertrans \
+ -shiftnegative \
+ -freshtrans \
+ -abstract \
+ -statictrans"
+
+#these are flags that shouldn't be required. I.e tofix in code so that
+#these don't need to be here to onhibit the warning
+# remove one flag from here at a time while fixing the code so that
+FLAGS_TOFIX="\
+ -castfcnptr \
+ -evalorder"
+
+
+splint $FLAGS_STUPID $FLAGS_MAYBE $FLAGS_TOFIX $FLAGS_OTHERS *.c *.h
close(2), dup(errhandle[1]);
execvp(*argv, argv);
- exit(1);
+ exit(EXIT_FAILURE);
} else {
/* fork failed */
goto task_popen_error_3;
}
- /*
- * clang is stupid, it doesn't understand that yes, this code
- * is actually reachable.
- */
-# ifdef __clang__
-# pragma clang diagnostic push
-# pragma clang diagnostic ignored "-Wunreachable-code"
-# endif
- if (argv)
- vec_free(argv);
-
-# ifdef __clang__
-# pragma clang diagnostic pop
-# endif
-
- return data->handles;
-
task_popen_error_3: close(errhandle[0]), close(errhandle[1]);
task_popen_error_2: close(outhandle[0]), close(outhandle[1]);
task_popen_error_1: close(inhandle [0]), close(inhandle [1]);
I: noref.qc
D: noref keyword and pragma
T: -compile
-C: -std=qcc -Wall -Werror
+C: -std=qcc -Wall -Werror -Wno-uninitialized-global
#include "gmqcc.h"
static unsigned char utf8_lengths[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* ascii characters */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0xBF are within multibyte sequences
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * they could be interpreted as 2-byte starts but
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * the codepoint would be < 127
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *
- 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, * C0 and C1 would also result in overlong encodings
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- /* with F5 the codepoint is above 0x10FFFF,
- * F8-FB would start 5-byte sequences
- * FC-FD would start 6-byte sequences
- * ...
- */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* ascii characters */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0xBF are within multibyte sequences */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* they could be interpreted as 2-byte starts but */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* the codepoint would be < 127 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0 and C1 would also result in overlong encodings */
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* */
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ /* with F5 the codepoint is above 0x10FFFF,
+ * F8-FB would start 5-byte sequences
+ * FC-FD would start 6-byte sequences
+ * ...
+ */
};
static uchar_t utf8_range[5] = {
- 1, /* invalid - let's not allow the creation of 0-bytes :P
- 1, * ascii minimum
- 0x80, * 2-byte minimum
- 0x800, * 3-byte minimum
- 0x10000, * 4-byte minimum */
+ 1, /* invalid - let's not allow the creation of 0-bytes :P */
+ 1, /* ascii minimum */
+ 0x80, /* 2-byte minimum */
+ 0x800, /* 3-byte minimum */
+ 0x10000, /* 4-byte minimum */
};
/** Analyze the next character and return various information if requested.
*/
bool u8_analyze(const char *_s, size_t *_start, size_t *_len, uchar_t *_ch, size_t _maxlen)
{
- const unsigned char *s = (const unsigned char*)_s;
- size_t i, j;
- size_t bits = 0;
- uchar_t ch;
+ const unsigned char *s = (const unsigned char*)_s;
+ size_t i, j;
+ size_t bits = 0;
+ uchar_t ch;
- i = 0;
+ i = 0;
/* findchar: */
- while (i < _maxlen && s[i] && (bits = utf8_lengths[s[i]]) == 0)
- ++i;
+ while (i < _maxlen && s[i] && (bits = utf8_lengths[s[i]]) == 0)
+ ++i;
- if (i >= _maxlen || !s[i]) {
- if (_start) *_start = i;
- if (_len) *_len = 0;
- return false;
- }
+ if (i >= _maxlen || !s[i]) {
+ if (_start) *_start = i;
+ if (_len) *_len = 0;
+ return false;
+ }
- if (bits == 1) { /* ascii */
- if (_start) *_start = i;
- if (_len) *_len = 1;
- if (_ch) *_ch = (uchar_t)s[i];
- return true;
- }
+ if (bits == 1) { /* ascii */
+ if (_start) *_start = i;
+ if (_len) *_len = 1;
+ if (_ch) *_ch = (uchar_t)s[i];
+ return true;
+ }
- ch = (s[i] & (0xFF >> bits));
- for (j = 1; j < bits; ++j)
- {
- if ( (s[i+j] & 0xC0) != 0x80 )
- {
- i += j;
- /* in gmqcc, invalid / overlong encodings are considered an error
- * goto findchar;
- */
- if (!s[i]) goto done;
- return false;
- }
- ch = (ch << 6) | (s[i+j] & 0x3F);
- }
- if (ch < utf8_range[bits] || ch >= 0x10FFFF)
- {
- /* same: error
- * i += bits;
- * goto findchar;
- */
- return false;
- }
+ ch = (s[i] & (0xFF >> bits));
+ for (j = 1; j < bits; ++j)
+ {
+ if ( (s[i+j] & 0xC0) != 0x80 )
+ {
+ i += j;
+ /* in gmqcc, invalid / overlong encodings are considered an error
+ * goto findchar;
+ */
+ if (!s[i]) goto done;
+ return false;
+ }
+ ch = (ch << 6) | (s[i+j] & 0x3F);
+ }
+ if (ch < utf8_range[bits] || ch >= 0x10FFFF)
+ {
+ /* same: error
+ * i += bits;
+ * goto findchar;
+ */
+ return false;
+ }
done:
- if (_start)
- *_start = i;
- if (_len)
- *_len = bits;
- if (_ch)
- *_ch = ch;
- return true;
+ if (_start)
+ *_start = i;
+ if (_len)
+ *_len = bits;
+ if (_ch)
+ *_ch = ch;
+ return true;
}
/* might come in handy */
size_t u8_strlen(const char *_s)
{
- size_t st, ln;
- size_t len = 0;
- const unsigned char *s = (const unsigned char*)_s;
+ size_t st, ln;
+ size_t len = 0;
+ const unsigned char *s = (const unsigned char*)_s;
- while (*s)
- {
- /* ascii char, skip u8_analyze */
- if (*s < 0x80)
- {
- ++len;
- ++s;
- continue;
- }
+ while (*s)
+ {
+ /* ascii char, skip u8_analyze */
+ if (*s < 0x80)
+ {
+ ++len;
+ ++s;
+ continue;
+ }
- /* invalid, skip u8_analyze */
- if (*s < 0xC2)
- {
- ++s;
- continue;
- }
+ /* invalid, skip u8_analyze */
+ if (*s < 0xC2)
+ {
+ ++s;
+ continue;
+ }
- if (!u8_analyze((const char*)s, &st, &ln, NULL, 0x10))
- break;
- /* valid character, skip after it */
- s += st + ln;
- ++len;
- }
- return len;
+ if (!u8_analyze((const char*)s, &st, &ln, NULL, 0x10))
+ break;
+ /* valid character, skip after it */
+ s += st + ln;
+ ++len;
+ }
+ return len;
}
size_t u8_strnlen(const char *_s, size_t n)
{
- size_t st, ln;
- size_t len = 0;
- const unsigned char *s = (const unsigned char*)_s;
+ size_t st, ln;
+ size_t len = 0;
+ const unsigned char *s = (const unsigned char*)_s;
- while (*s && n)
- {
- /* ascii char, skip u8_analyze */
- if (*s < 0x80)
- {
- ++len;
- ++s;
- --n;
- continue;
- }
+ while (*s && n)
+ {
+ /* ascii char, skip u8_analyze */
+ if (*s < 0x80)
+ {
+ ++len;
+ ++s;
+ --n;
+ continue;
+ }
- /* invalid, skip u8_analyze */
- if (*s < 0xC2)
- {
- ++s;
- --n;
- continue;
- }
+ /* invalid, skip u8_analyze */
+ if (*s < 0xC2)
+ {
+ ++s;
+ --n;
+ continue;
+ }
- if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
- break;
- /* valid character, see if it's still inside the range specified by n: */
- if (n < st + ln)
- return len;
- ++len;
- n -= st + ln;
- s += st + ln;
- }
- return len;
+ if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
+ break;
+ /* valid character, see if it's still inside the range specified by n: */
+ if (n < st + ln)
+ return len;
+ ++len;
+ n -= st + ln;
+ s += st + ln;
+ }
+ return len;
}
/* Required for character constants */
uchar_t u8_getchar(const char *_s, const char **_end)
{
- size_t st, ln;
- uchar_t ch;
+ size_t st, ln;
+ uchar_t ch;
- if (!u8_analyze(_s, &st, &ln, &ch, 0x10))
- ch = 0;
- if (_end)
- *_end = _s + st + ln;
- return ch;
+ if (!u8_analyze(_s, &st, &ln, &ch, 0x10))
+ ch = 0;
+ if (_end)
+ *_end = _s + st + ln;
+ return ch;
}
uchar_t u8_getnchar(const char *_s, const char **_end, size_t _maxlen)
{
- size_t st, ln;
- uchar_t ch;
+ size_t st, ln;
+ uchar_t ch;
- if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
- ch = 0;
- if (_end)
- *_end = _s + st + ln;
- return ch;
+ if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
+ ch = 0;
+ if (_end)
+ *_end = _s + st + ln;
+ return ch;
}
/* required for \x{asdf}-like string escape sequences */
int u8_fromchar(uchar_t w, char *to, size_t maxlen)
{
- if (maxlen < 1)
- return 0;
+ if (maxlen < 1)
+ return 0;
- if (!w)
- return 0;
+ if (!w)
+ return 0;
/* We may want an -f flag for this behaviour...
- if (w >= 0xE000)
- w -= 0xE000;
+ if (w >= 0xE000)
+ w -= 0xE000;
*/
- if (w < 0x80)
- {
- to[0] = (char)w;
- if (maxlen < 2)
- return -1;
- to[1] = 0;
- return 1;
- }
- /* for a little speedup */
- if (w < 0x800)
- {
- if (maxlen < 3)
- {
- to[0] = 0;
- return -1;
- }
- to[2] = 0;
- to[1] = 0x80 | (w & 0x3F); w >>= 6;
- to[0] = 0xC0 | w;
- return 2;
- }
- if (w < 0x10000)
- {
- if (maxlen < 4)
- {
- to[0] = 0;
- return -1;
- }
- to[3] = 0;
- to[2] = 0x80 | (w & 0x3F); w >>= 6;
- to[1] = 0x80 | (w & 0x3F); w >>= 6;
- to[0] = 0xE0 | w;
- return 3;
- }
+ if (w < 0x80)
+ {
+ to[0] = (char)w;
+ if (maxlen < 2)
+ return -1;
+ to[1] = 0;
+ return 1;
+ }
+ /* for a little speedup */
+ if (w < 0x800)
+ {
+ if (maxlen < 3)
+ {
+ to[0] = 0;
+ return -1;
+ }
+ to[2] = 0;
+ to[1] = 0x80 | (w & 0x3F); w >>= 6;
+ to[0] = 0xC0 | w;
+ return 2;
+ }
+ if (w < 0x10000)
+ {
+ if (maxlen < 4)
+ {
+ to[0] = 0;
+ return -1;
+ }
+ to[3] = 0;
+ to[2] = 0x80 | (w & 0x3F); w >>= 6;
+ to[1] = 0x80 | (w & 0x3F); w >>= 6;
+ to[0] = 0xE0 | w;
+ return 3;
+ }
- /* RFC 3629 */
- if (w <= 0x10FFFF)
- {
- if (maxlen < 5)
- {
- to[0] = 0;
- return -1;
- }
- to[4] = 0;
- to[3] = 0x80 | (w & 0x3F); w >>= 6;
- to[2] = 0x80 | (w & 0x3F); w >>= 6;
- to[1] = 0x80 | (w & 0x3F); w >>= 6;
- to[0] = 0xF0 | w;
- return 4;
- }
- return 0;
+ /* RFC 3629 */
+ if (w <= 0x10FFFF)
+ {
+ if (maxlen < 5)
+ {
+ to[0] = 0;
+ return -1;
+ }
+ to[4] = 0;
+ to[3] = 0x80 | (w & 0x3F); w >>= 6;
+ to[2] = 0x80 | (w & 0x3F); w >>= 6;
+ to[1] = 0x80 | (w & 0x3F); w >>= 6;
+ to[0] = 0xF0 | w;
+ return 4;
+ }
+ return 0;
}
mem_d(ht);
}
+/*
+ * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
+ * exists, otherwise compiler error.
+ */
+int util_vasprintf(char **ret, const char *fmt, va_list args) {
+ int read;
+ va_list copy;
+ va_copy(copy, args);
+
+ *ret = 0;
+ if ((read = vsnprintf(NULL, 0, fmt, args)) >= 0) {
+ char *buffer;
+ if ((buffer = (char*)mem_a(read + 1))) {
+ if ((read = vsnprintf(buffer, read + 1, fmt, copy)) < 0)
+ mem_d(buffer);
+ else
+ *ret = buffer;
+ }
+ }
+ va_end(copy);
+ return read;
+}
+int util_asprintf(char **ret, const char *fmt, ...) {
+ va_list args;
+ int read;
+ va_start(args, fmt);
+ read = util_vasprintf(ret, fmt, args);
+ va_end (args);
+
+ return read;
+}
+
/*
* Implementation of the Mersenne twister PRNG (pseudo random numer
* generator). Implementation of MT19937. Has a period of 2^19937-1