utf8lib.c

   1 #include "quakedef.h"
   2 #include "utf8lib.h"
   3
   4 /*
   5 ================================================================================
   6 Initialization of UTF-8 support and new cvars.
   7 ================================================================================
   8 */
   9 // for compatibility this defaults to 0
  10 cvar_t    utf8_enable = {CVAR_SAVE, "utf8_enable", "0", "Enable UTF-8 support. For compatibility, this is disabled by default in most games."};
  11
  12 void   u8_Init(void)
  13 {
  14         Cvar_RegisterVariable(&utf8_enable);
  15 }
  16
  17 /*
  18 ================================================================================
  19 UTF-8 encoding and decoding functions follow.
  20 ================================================================================
  21 */
  22
  23 /** Analyze the next character and return various information if requested.
  24  * @param _s      An utf-8 string.
  25  * @param _start  Filled with the start byte-offset of the next valid character
  26  * @param _len    Fileed with the length of the next valid character
  27  * @param _ch     Filled with the unicode value of the next character
  28  * @param _maxlen Maximum number of bytes to read from _s
  29  * @return        Whether or not another valid character is in the string
  30  */
  31 #define U8_ANALYZE_INFINITY 7
  32 static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, size_t _maxlen)
  33 {
  34         const unsigned char *s = (const unsigned char*)_s;
  35         unsigned char bt, bc;
  36         size_t i;
  37         size_t bits, j;
  38         Uchar ch;
  39
  40         i = 0;
  41 findchar:
  42
  43         // <0xC2 is always an overlong encoding, they're invalid, thus skipped
  44         while (i < _maxlen && s[i] && s[i] >= 0x80 && s[i] < 0xC2) {
  45                 //fprintf(stderr, "skipping\n");
  46                 ++i;
  47         }
  48
  49         //fprintf(stderr, "checking\n");
  50         // If we hit the end, well, we're out and invalid
  51         if(i >= _maxlen || !s[i]) {
  52                 if (_start) *_start = i;
  53                 if (_len) *_len = 0;
  54                 return false;
  55         }
  56
  57         //fprintf(stderr, "checking ascii\n");
  58         // ascii characters
  59         if (s[i] < 0x80)
  60         {
  61                 if (_start) *_start = i;
  62                 if (_len) *_len = 1;
  63                 if (_ch) *_ch = (Uchar)s[i];
  64                 //fprintf(stderr, "valid ascii\n");
  65                 return true;
  66         }
  67         //fprintf(stderr, "checking length\n");
  68
  69         // Figure out the next char's length
  70         bc = s[i];
  71         bits = 1;
  72         // count the 1 bits, they're the # of bytes
  73         for (bt = 0x40; bt && (bc & bt); bt >>= 1, ++bits);
  74         if (!bt)
  75         {
  76                 //fprintf(stderr, "superlong\n");
  77                 ++i;
  78                 goto findchar;
  79         }
  80         if(i + bits > _maxlen) {
  81                 /*
  82                 if (_start) *_start = i;
  83                 if (_len) *_len = 0;
  84                 return false;
  85                 */
  86                 ++i;
  87                 goto findchar;
  88         }
  89         // turn bt into a mask and give ch a starting value
  90         --bt;
  91         ch = (s[i] & bt);
  92         // check the byte sequence for invalid bytes
  93         for (j = 1; j < bits; ++j)
  94         {
  95                 // valid bit value: 10xx xxxx
  96                 //if (s[i+j] < 0x80 || s[i+j] >= 0xC0)
  97                 if ( (s[i+j] & 0xC0) != 0x80 )
  98                 {
  99                         //fprintf(stderr, "sequence of %i f'd at %i by %x\n", bits, j, (unsigned int)s[i+j]);
 100                         // this byte sequence is invalid, skip it
 101                         i += j;
 102                         // find a character after it
 103                         goto findchar;
 104                 }
 105                 // at the same time, decode the character
 106                 ch = (ch << 6) | (s[i+j] & 0x3F);
 107         }
 108
 109         // Now check the decoded byte for an overlong encoding
 110         if ( (bits >= 2 && ch < 0x80) ||
 111              (bits >= 3 && ch < 0x800) ||
 112              (bits >= 4 && ch < 0x10000) ||
 113              ch >= 0x10FFFF // RFC 3629
 114                 )
 115         {
 116                 i += bits;
 117                 //fprintf(stderr, "overlong: %i bytes for %x\n", bits, ch);
 118                 goto findchar;
 119         }
 120
 121         if (_start)
 122                 *_start = i;
 123         if (_len)
 124                 *_len = bits;
 125         if (_ch)
 126                 *_ch = ch;
 127         //fprintf(stderr, "valid utf8\n");
 128         return true;
 129 }
 130
 131 /** Get the number of characters in an UTF-8 string.
 132  * @param _s    An utf-8 encoded null-terminated string.
 133  * @return      The number of unicode characters in the string.
 134  */
 135 size_t u8_strlen(const char *_s)
 136 {
 137         size_t st, ln;
 138         size_t len = 0;
 139         const unsigned char *s = (const unsigned char*)_s;
 140
 141         if (!utf8_enable.integer)
 142                 return strlen(_s);
 143
 144         while (*s)
 145         {
 146                 // ascii char, skip u8_analyze
 147                 if (*s < 0x80)
 148                 {
 149                         ++len;
 150                         ++s;
 151                         continue;
 152                 }
 153
 154                 // invalid, skip u8_analyze
 155                 if (*s < 0xC2)
 156                 {
 157                         ++s;
 158                         continue;
 159                 }
 160
 161                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 162                         break;
 163                 // valid character, skip after it
 164                 s += st + ln;
 165                 ++len;
 166         }
 167         return len;
 168 }
 169
 170 /** Get the number of characters in a part of an UTF-8 string.
 171  * @param _s    An utf-8 encoded null-terminated string.
 172  * @param n     The maximum number of bytes.
 173  * @return      The number of unicode characters in the string.
 174  */
 175 size_t u8_strnlen(const char *_s, size_t n)
 176 {
 177         size_t st, ln;
 178         size_t len = 0;
 179         const unsigned char *s = (const unsigned char*)_s;
 180
 181         if (!utf8_enable.integer)
 182         {
 183                 len = strlen(_s);
 184                 return (len < n) ? len : n;
 185         }
 186
 187         while (*s && n)
 188         {
 189                 // ascii char, skip u8_analyze
 190                 if (*s < 0x80)
 191                 {
 192                         ++len;
 193                         ++s;
 194                         --n;
 195                         continue;
 196                 }
 197
 198                 // invalid, skip u8_analyze
 199                 if (*s < 0xC2)
 200                 {
 201                         ++s;
 202                         --n;
 203                         continue;
 204                 }
 205
 206                 if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
 207                         break;
 208                 // valid character, see if it's still inside the range specified by n:
 209                 if (n < st + ln)
 210                         return len;
 211                 ++len;
 212                 n -= st + ln;
 213                 s += st + ln;
 214         }
 215         return len;
 216 }
 217
 218 /** Get the number of bytes used in a string to represent an amount of characters.
 219  * @param _s    An utf-8 encoded null-terminated string.
 220  * @param n     The number of characters we want to know the byte-size for.
 221  * @return      The number of bytes used to represent n characters.
 222  */
 223 size_t u8_bytelen(const char *_s, size_t n)
 224 {
 225         size_t st, ln;
 226         size_t len = 0;
 227         const unsigned char *s = (const unsigned char*)_s;
 228
 229         if (!utf8_enable.integer) {
 230                 len = strlen(_s);
 231                 return (len < n) ? len : n;
 232         }
 233
 234         while (*s && n)
 235         {
 236                 // ascii char, skip u8_analyze
 237                 if (*s < 0x80)
 238                 {
 239                         ++len;
 240                         ++s;
 241                         --n;
 242                         continue;
 243                 }
 244
 245                 // invalid, skip u8_analyze
 246                 if (*s < 0xC2)
 247                 {
 248                         ++s;
 249                         ++len;
 250                         continue;
 251                 }
 252
 253                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 254                         break;
 255                 --n;
 256                 s += st + ln;
 257                 len += st + ln;
 258         }
 259         return len;
 260 }
 261
 262 /** Get the byte-index for a character-index.
 263  * @param _s      An utf-8 encoded string.
 264  * @param i       The character-index for which you want the byte offset.
 265  * @param len     If not null, character's length will be stored in there.
 266  * @return        The byte-index at which the character begins, or -1 if the string is too short.
 267  */
 268 int u8_byteofs(const char *_s, size_t i, size_t *len)
 269 {
 270         size_t st, ln;
 271         size_t ofs = 0;
 272         const unsigned char *s = (const unsigned char*)_s;
 273
 274         if (!utf8_enable.integer)
 275         {
 276                 if (strlen(_s) < i)
 277                 {
 278                         if (len) *len = 0;
 279                         return -1;
 280                 }
 281
 282                 if (len) *len = 1;
 283                 return i;
 284         }
 285
 286         st = ln = 0;
 287         do
 288         {
 289                 ofs += ln;
 290                 if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 291                         return -1;
 292                 ofs += st;
 293         } while(i-- > 0);
 294         if (len)
 295                 *len = ln;
 296         return ofs;
 297 }
 298
 299 /** Get the char-index for a byte-index.
 300  * @param _s      An utf-8 encoded string.
 301  * @param i       The byte offset for which you want the character index.
 302  * @param len     If not null, the offset within the character is stored here.
 303  * @return        The character-index, or -1 if the string is too short.
 304  */
 305 int u8_charidx(const char *_s, size_t i, size_t *len)
 306 {
 307         size_t st, ln;
 308         size_t ofs = 0;
 309         size_t pofs = 0;
 310         int idx = 0;
 311         const unsigned char *s = (const unsigned char*)_s;
 312
 313         if (!utf8_enable.integer)
 314         {
 315                 if (len) *len = 0;
 316                 return i;
 317         }
 318
 319         while (ofs < i && s[ofs])
 320         {
 321                 // ascii character, skip u8_analyze
 322                 if (s[ofs] < 0x80)
 323                 {
 324                         pofs = ofs;
 325                         ++idx;
 326                         ++ofs;
 327                         continue;
 328                 }
 329
 330                 // invalid, skip u8_analyze
 331                 if (s[ofs] < 0xC2)
 332                 {
 333                         ++ofs;
 334                         continue;
 335                 }
 336
 337                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 338                         return -1;
 339                 // see if next char is after the bytemark
 340                 if (ofs + st > i)
 341                 {
 342                         if (len)
 343                                 *len = i - pofs;
 344                         return idx;
 345                 }
 346                 ++idx;
 347                 pofs = ofs + st;
 348                 ofs += st + ln;
 349                 // see if bytemark is within the char
 350                 if (ofs > i)
 351                 {
 352                         if (len)
 353                                 *len = i - pofs;
 354                         return idx;
 355                 }
 356         }
 357         if (len) *len = 0;
 358         return idx;
 359 }
 360
 361 /** Get the byte offset of the previous byte.
 362  * The result equals:
 363  * prevchar_pos = u8_byteofs(text, u8_charidx(text, thischar_pos, NULL) - 1, NULL)
 364  * @param _s      An utf-8 encoded string.
 365  * @param i       The current byte offset.
 366  * @return        The byte offset of the previous character
 367  */
 368 size_t u8_prevbyte(const char *_s, size_t i)
 369 {
 370         size_t st, ln;
 371         const unsigned char *s = (const unsigned char*)_s;
 372         size_t lastofs = 0;
 373         size_t ofs = 0;
 374
 375         if (!utf8_enable.integer)
 376         {
 377                 if (i > 0)
 378                         return i-1;
 379                 return 0;
 380         }
 381
 382         while (ofs < i && s[ofs])
 383         {
 384                 // ascii character, skip u8_analyze
 385                 if (s[ofs] < 0x80)
 386                 {
 387                         lastofs = ofs++;
 388                         continue;
 389                 }
 390
 391                 // invalid, skip u8_analyze
 392                 if (s[ofs] < 0xC2)
 393                 {
 394                         ++ofs;
 395                         continue;
 396                 }
 397
 398                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 399                         return lastofs;
 400                 if (ofs + st > i)
 401                         return lastofs;
 402                 if (ofs + st + ln >= i)
 403                         return ofs + st;
 404
 405                 lastofs = ofs;
 406                 ofs += st + ln;
 407         }
 408         return lastofs;
 409 }
 410
 411 static int char_usefont[256] = {
 412         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 413         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 414         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // shift+digit line
 415         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // digits
 416         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 417         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 418         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 419         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 420         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 421         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // faces
 422         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 423         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 424         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 425         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 426         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 427         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 428 };
 429
 430
 431 /** Fetch a character from an utf-8 encoded string.
 432  * @param _s      The start of an utf-8 encoded multi-byte character.
 433  * @param _end    Will point to after the first multi-byte character.
 434  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 435  */
 436 Uchar u8_getchar(const char *_s, const char **_end)
 437 {
 438         size_t st, ln;
 439         Uchar ch;
 440
 441         if (!utf8_enable.integer)
 442         {
 443                 if (_end)
 444                         *_end = _s + 1;
 445                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 446                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 447                  * rest:
 448                  */
 449                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 450                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 451                 return (Uchar)*(const unsigned char*)_s;
 452         }
 453
 454         if (!u8_analyze(_s, &st, &ln, &ch, U8_ANALYZE_INFINITY))
 455                 ch = 0;
 456         if (_end)
 457                 *_end = _s + st + ln;
 458         return ch;
 459 }
 460
 461 /** Fetch a character from an utf-8 encoded string.
 462  * @param _s      The start of an utf-8 encoded multi-byte character.
 463  * @param _end    Will point to after the first multi-byte character.
 464  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 465  */
 466 Uchar u8_getnchar(const char *_s, const char **_end, size_t _maxlen)
 467 {
 468         size_t st, ln;
 469         Uchar ch;
 470
 471         if (!utf8_enable.integer)
 472         {
 473                 if (_end)
 474                         *_end = _s + 1;
 475                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 476                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 477                  * rest:
 478                  */
 479                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 480                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 481                 return (Uchar)*(const unsigned char*)_s;
 482         }
 483
 484         if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
 485                 ch = 0;
 486         if (_end)
 487                 *_end = _s + st + ln;
 488         return ch;
 489 }
 490
 491 /** Encode a wide-character into utf-8.
 492  * @param w        The wide character to encode.
 493  * @param to       The target buffer the utf-8 encoded string is stored to.
 494  * @param maxlen   The maximum number of bytes that fit into the target buffer.
 495  * @return         Number of bytes written to the buffer not including the terminating null.
 496  *                 Less or equal to 0 if the buffer is too small.
 497  */
 498 int u8_fromchar(Uchar w, char *to, size_t maxlen)
 499 {
 500         if (maxlen < 1)
 501                 return 0;
 502
 503         if (!w)
 504                 return 0;
 505
 506         if (w >= 0xE000 && !utf8_enable.integer)
 507                 w -= 0xE000;
 508
 509         if (w < 0x80 || !utf8_enable.integer)
 510         {
 511                 to[0] = (char)w;
 512                 if (maxlen < 2)
 513                         return -1;
 514                 to[1] = 0;
 515                 return 1;
 516         }
 517         // for a little speedup
 518         if (w < 0x800)
 519         {
 520                 if (maxlen < 3)
 521                 {
 522                         to[0] = 0;
 523                         return -1;
 524                 }
 525                 to[2] = 0;
 526                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 527                 to[0] = 0xC0 | w;
 528                 return 2;
 529         }
 530         if (w < 0x10000)
 531         {
 532                 if (maxlen < 4)
 533                 {
 534                         to[0] = 0;
 535                         return -1;
 536                 }
 537                 to[3] = 0;
 538                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 539                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 540                 to[0] = 0xE0 | w;
 541                 return 3;
 542         }
 543
 544         // RFC 3629
 545         if (w <= 0x10FFFF)
 546         {
 547                 if (maxlen < 5)
 548                 {
 549                         to[0] = 0;
 550                         return -1;
 551                 }
 552                 to[4] = 0;
 553                 to[3] = 0x80 | (w & 0x3F); w >>= 6;
 554                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 555                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 556                 to[0] = 0xE0 | w;
 557                 return 4;
 558         }
 559         return 0;
 560 }
 561
 562 /** uses u8_fromchar on a static buffer
 563  * @param ch        The unicode character to convert to encode
 564  * @param l         The number of bytes without the terminating null.
 565  * @return          A statically allocated buffer containing the character's utf8 representation, or NULL if it fails.
 566  */
 567 char *u8_encodech(Uchar ch, size_t *l)
 568 {
 569         static char buf[16];
 570         size_t len;
 571         len = u8_fromchar(ch, buf, sizeof(buf));
 572         if (len > 0)
 573         {
 574                 if (l) *l = len;
 575                 return buf;
 576         }
 577         return NULL;
 578 }
 579
 580 /** Convert a utf-8 multibyte string to a wide character string.
 581  * @param wcs       The target wide-character buffer.
 582  * @param mb        The utf-8 encoded multibyte string to convert.
 583  * @param maxlen    The maximum number of wide-characters that fit into the target buffer.
 584  * @return          The number of characters written to the target buffer.
 585  */
 586 size_t u8_mbstowcs(Uchar *wcs, const char *mb, size_t maxlen)
 587 {
 588         size_t i;
 589         Uchar ch;
 590         if (maxlen < 1)
 591                 return 0;
 592         for (i = 0; *mb && i < maxlen-1; ++i)
 593         {
 594                 ch = u8_getchar(mb, &mb);
 595                 if (!ch)
 596                         break;
 597                 wcs[i] = ch;
 598         }
 599         wcs[i] = 0;
 600         return i;
 601 }
 602
 603 /** Convert a wide-character string to a utf-8 multibyte string.
 604  * @param mb      The target buffer the utf-8 string is written to.
 605  * @param wcs     The wide-character string to convert.
 606  * @param maxlen  The number bytes that fit into the multibyte target buffer.
 607  * @return        The number of bytes written, not including the terminating \0
 608  */
 609 size_t u8_wcstombs(char *mb, const Uchar *wcs, size_t maxlen)
 610 {
 611         size_t i;
 612         const char *start = mb;
 613         if (maxlen < 2)
 614                 return 0;
 615         for (i = 0; wcs[i] && i < maxlen-1; ++i)
 616         {
 617                 /*
 618                 int len;
 619                 if ( (len = u8_fromchar(wcs[i], mb, maxlen - i)) < 0)
 620                         return (mb - start);
 621                 mb += len;
 622                 */
 623                 mb += u8_fromchar(wcs[i], mb, maxlen - i);
 624         }
 625         *mb = 0;
 626         return (mb - start);
 627 }
 628
 629 /*
 630 ============
 631 UTF-8 aware COM_StringLengthNoColors
 632
 633 calculates the visible width of a color coded string.
 634
 635 *valid is filled with TRUE if the string is a valid colored string (that is, if
 636 it does not end with an unfinished color code). If it gets filled with FALSE, a
 637 fix would be adding a STRING_COLOR_TAG at the end of the string.
 638
 639 valid can be set to NULL if the caller doesn't care.
 640
 641 For size_s, specify the maximum number of characters from s to use, or 0 to use
 642 all characters until the zero terminator.
 643 ============
 644 */
 645 size_t
 646 COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid);
 647 size_t
 648 u8_COM_StringLengthNoColors(const char *_s, size_t size_s, qboolean *valid)
 649 {
 650         const unsigned char *s = (const unsigned char*)_s;
 651         const unsigned char *end;
 652         size_t len = 0;
 653
 654         if (!utf8_enable.integer)
 655                 return COM_StringLengthNoColors(_s, size_s, valid);
 656
 657         end = size_s ? (s + size_s) : NULL;
 658
 659         for(;;)
 660         {
 661                 switch((s == end) ? 0 : *s)
 662                 {
 663                         case 0:
 664                                 if(valid)
 665                                         *valid = TRUE;
 666                                 return len;
 667                         case STRING_COLOR_TAG:
 668                                 ++s;
 669                                 switch((s == end) ? 0 : *s)
 670                                 {
 671                                         case STRING_COLOR_RGB_TAG_CHAR:
 672                                                 if (s+1 != end && isxdigit(s[1]) &&
 673                                                         s+2 != end && isxdigit(s[2]) &&
 674                                                         s+3 != end && isxdigit(s[3]) )
 675                                                 {
 676                                                         s+=3;
 677                                                         break;
 678                                                 }
 679                                                 ++len; // STRING_COLOR_TAG
 680                                                 ++len; // STRING_COLOR_RGB_TAG_CHAR
 681                                                 break;
 682                                         case 0: // ends with unfinished color code!
 683                                                 ++len;
 684                                                 if(valid)
 685                                                         *valid = FALSE;
 686                                                 return len;
 687                                         case STRING_COLOR_TAG: // escaped ^
 688                                                 ++len;
 689                                                 break;
 690                                         case '0': case '1': case '2': case '3': case '4':
 691                                         case '5': case '6': case '7': case '8': case '9': // color code
 692                                                 break;
 693                                         default: // not a color code
 694                                                 ++len; // STRING_COLOR_TAG
 695                                                 ++len; // the character
 696                                                 break;
 697                                 }
 698                                 break;
 699                         default:
 700                                 ++len;
 701                                 break;
 702                 }
 703
 704                 // start of a wide character
 705                 if (*s & 0xC0)
 706                 {
 707                         for (++s; *s >= 0x80 && *s <= 0xC0; ++s);
 708                         continue;
 709                 }
 710                 // part of a wide character, we ignore that one
 711                 if (*s <= 0xBF)
 712                         --len;
 713                 ++s;
 714         }
 715         // never get here
 716 }
 717
 718 /** Pads a utf-8 string
 719  * @param out     The target buffer the utf-8 string is written to.
 720  * @param outsize The size of the target buffer, including the final NUL
 721  * @param in      The input utf-8 buffer
 722  * @param leftalign Left align the output string (by default right alignment is done)
 723  * @param minwidth The minimum output width
 724  * @param maxwidth The maximum output width
 725  * @return        The number of bytes written, not including the terminating \0
 726  */
 727 size_t u8_strpad(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth)
 728 {
 729         if(!utf8_enable.integer)
 730         {
 731                 return dpsnprintf(out, outsize, "%*.*s", leftalign ? -(int) minwidth : (int) minwidth, (int) maxwidth, in);
 732         }
 733         else
 734         {
 735                 size_t l = u8_bytelen(in, maxwidth);
 736                 size_t actual_width = u8_strnlen(in, l);
 737                 int pad = (actual_width >= minwidth) ? 0 : (minwidth - actual_width);
 738                 int prec = l;
 739                 int lpad = leftalign ? 0 : pad;
 740                 int rpad = leftalign ? pad : 0;
 741                 return dpsnprintf(out, outsize, "%*s%.*s%*s", lpad, "", prec, in, rpad, "");
 742         }
 743 }