From 2d4ac18bc2061b2330fe886691af75bf86ca21db Mon Sep 17 00:00:00 2001
From: divverent <divverent@d7cf8633-e32d-0410-b094-e92efae38249>
Date: Wed, 23 Dec 2009 12:15:47 +0000
Subject: [PATCH] make utf8 functions safer: give them a size limit in memory,
 to allow working with non-NUL-terminated strings

git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@9646 d7cf8633-e32d-0410-b094-e92efae38249
---
 console.c |  2 +-
 gl_draw.c | 11 +++++++----
 utf8lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++---------
 utf8lib.h |  1 +
 4 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/console.c b/console.c
index 4d279b96..6a59b970 100644
--- a/console.c
+++ b/console.c
@@ -1464,7 +1464,7 @@ int Con_DisplayLineFunc(void *passthrough, const char *line, size_t length, floa
 		if(isContinuation && *ti->continuationString)
 			x += (int) DrawQ_String_Font(x, ti->y, ti->continuationString, strlen(ti->continuationString), ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, NULL, false, ti->font);
 		if(length > 0)
-			DrawQ_String_Font(x, ti->y, line, u8_strnlen(line, length), ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, &(ti->colorindex), false, ti->font);
+			DrawQ_String_Font(x, ti->y, line, length, ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, &(ti->colorindex), false, ti->font);
 	}
 
 	ti->y += ti->fontsize;
diff --git a/gl_draw.c b/gl_draw.c
index 66b2b89e..7f33797d 100644
--- a/gl_draw.c
+++ b/gl_draw.c
@@ -1075,6 +1075,7 @@ static void DrawQ_GetTextColor(float color[4], int colorindex, float r, float g,
 
 float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w, float h, size_t *maxlen, int *outcolor, qboolean ignorecolorcodes, const dp_font_t *fnt, float maxwidth)
 {
+	const char *text_start = text;
 	int colorindex = STRING_COLOR_DEFAULT;
 	size_t i;
 	float x = 0;
@@ -1083,6 +1084,7 @@ float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w
 	int tempcolorindex;
 	float kx;
 	int map_index = 0;
+	size_t bytes_left;
 	ft2_font_map_t *fontmap = NULL;
 	ft2_font_map_t *map = NULL;
 	ft2_font_map_t *prevmap = NULL;
@@ -1120,9 +1122,9 @@ float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w
 	// maxwidth /= fnt->scale; // w and h are multiplied by it already
 	// ftbase_x = snap_to_pixel_x(0);
 
-	for (i = 0;i < *maxlen && *text;)
+	for (i = 0;((bytes_left = *maxlen - (text - text_start)) > 0) && *text;)
 	{
-		nextch = ch = u8_getchar(text, &text);
+		nextch = ch = u8_getnchar(text, &text, bytes_left);
 		//i = text - text_start;
 		if (!ch)
 			break;
@@ -1253,6 +1255,7 @@ float DrawQ_String_Font(float startx, float starty, const char *text, size_t max
 	ft2_font_t *ft2 = fnt->ft2;
 	qboolean snap = true;
 	float pix_x, pix_y;
+	size_t bytes_left;
 
 	int tw, th;
 	tw = R_TextureWidth(fnt->tex);
@@ -1324,9 +1327,9 @@ float DrawQ_String_Font(float startx, float starty, const char *text, size_t max
 			y += r_textshadow.value * vid.height / vid_conheight.value;
 		}
 		*/
-		for (i = 0;i < maxlen && *text;)
+		for (i = 0;((bytes_left = maxlen - (text - text_start)) > 0) && *text;)
 		{
-			nextch = ch = u8_getchar(text, &text);
+			nextch = ch = u8_getnchar(text, &text, bytes_left);
 			//i = text - text_start;
 			if (!ch)
 				break;
diff --git a/utf8lib.c b/utf8lib.c
index 8b50e753..bc3ce39a 100644
--- a/utf8lib.c
+++ b/utf8lib.c
@@ -25,9 +25,11 @@ UTF-8 encoding and decoding functions follow.
  * @param _start  Filled with the start byte-offset of the next valid character
  * @param _len    Fileed with the length of the next valid character
  * @param _ch     Filled with the unicode value of the next character
+ * @param _maxlen Maximum number of bytes to read from _s
  * @return        Whether or not another valid character is in the string
  */
-static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch)
+#define U8_ANALYZE_INFINITY 7
+static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, size_t _maxlen)
 {
 	const unsigned char *s = (const unsigned char*)_s;
 	unsigned char bt, bc;
@@ -39,10 +41,12 @@ static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *
 findchar:
 
 	// <0xC2 is always an overlong encoding, they're invalid, thus skipped
-	while (s[i] && s[i] >= 0x80 && s[i] <= 0xC2) {
+	while (i < _maxlen && s[i] && s[i] >= 0x80 && s[i] <= 0xC2) {
 		//fprintf(stderr, "skipping\n");
 		++i;
 	}
+	if(i >= _maxlen)
+		return false;
 	//fprintf(stderr, "checking\n");
 
 	// If we hit the end, well, we're out and invalid
@@ -72,6 +76,8 @@ findchar:
 		++i;
 		goto findchar;
 	}
+	if(i + bits > _maxlen)
+		return false;
 	// turn bt into a mask and give ch a starting value
 	--bt;
 	ch = (s[i] & bt);
@@ -144,7 +150,7 @@ size_t u8_strlen(const char *_s)
 			continue;
 		}
 
-		if (!u8_analyze((const char*)s, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 			break;
 		// valid character, skip after it
 		s += st + ln;
@@ -189,7 +195,7 @@ size_t u8_strnlen(const char *_s, size_t n)
 			continue;
 		}
 
-		if (!u8_analyze((const char*)s, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
 			break;
 		// valid character, see if it's still inside the range specified by n:
 		if (n < st + ln)
@@ -234,7 +240,7 @@ size_t u8_bytelen(const char *_s, size_t n)
 			continue;
 		}
 
-		if (!u8_analyze((const char*)s, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 			break;
 		--n;
 		s += st + ln;
@@ -265,7 +271,7 @@ int u8_byteofs(const char *_s, size_t i, size_t *len)
 	do
 	{
 		ofs += ln;
-		if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 			return -1;
 		ofs += st;
 	} while(i-- > 0);
@@ -312,7 +318,7 @@ int u8_charidx(const char *_s, size_t i, size_t *len)
 			continue;
 		}
 
-		if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 			return -1;
 		// see if next char is after the bytemark
 		if (ofs + st > i)
@@ -373,7 +379,7 @@ size_t u8_prevbyte(const char *_s, size_t i)
 			continue;
 		}
 
-		if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL))
+		if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 			return lastofs;
 		if (ofs + st > i)
 			return lastofs;
@@ -429,7 +435,37 @@ Uchar u8_getchar(const char *_s, const char **_end)
 		return (Uchar)*(const unsigned char*)_s;
 	}
 	
-	if (!u8_analyze(_s, &st, &ln, &ch))
+	if (!u8_analyze(_s, &st, &ln, &ch, U8_ANALYZE_INFINITY))
+		return 0;
+	if (_end)
+		*_end = _s + st + ln;
+	return ch;
+}
+
+/** Fetch a character from an utf-8 encoded string.
+ * @param _s      The start of an utf-8 encoded multi-byte character.
+ * @param _end    Will point to after the first multi-byte character.
+ * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
+ */
+Uchar u8_getnchar(const char *_s, const char **_end, size_t _maxlen)
+{
+	size_t st, ln;
+	Uchar ch;
+
+	if (!utf8_enable.integer)
+	{
+		if (_end)
+			*_end = _s + 1;
+		/* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
+		 * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
+		 * rest:
+		 */
+		if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
+			return 0xE000 + (Uchar)*(const unsigned char*)_s;
+		return (Uchar)*(const unsigned char*)_s;
+	}
+	
+	if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
 		return 0;
 	if (_end)
 		*_end = _s + st + ln;
diff --git a/utf8lib.h b/utf8lib.h
index f435bbdf..4133908a 100644
--- a/utf8lib.h
+++ b/utf8lib.h
@@ -36,6 +36,7 @@ int    u8_charidx(const char*, size_t, size_t*);
 size_t u8_bytelen(const char*, size_t);
 size_t u8_prevbyte(const char*, size_t);
 Uchar  u8_getchar(const char*, const char**);
+Uchar  u8_getnchar(const char*, const char**, size_t);
 int    u8_fromchar(Uchar, char*, size_t);
 size_t u8_wcstombs(char*, const Uchar*, size_t);
 size_t u8_COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid);
-- 
2.39.2