|
| 1 | +/* |
| 2 | + * UTF-8 decoder copyright © 2008–2009 Björn Höhrmann <[email protected]> |
| 3 | + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
| 4 | + * |
| 5 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | + * of this software and associated documentation files (the "Software"), to |
| 7 | + * deal in the Software without restriction, including without limitation the |
| 8 | + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 9 | + * sell copies of the Software, and to permit persons to whom the Software is |
| 10 | + * furnished to do so, subject to the following conditions: |
| 11 | + * |
| 12 | + * The above copyright notice and this permission notice shall be included in |
| 13 | + * all copies or substantial portions of the Software. |
| 14 | + * |
| 15 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | + * IN THE SOFTWARE. |
| 22 | + */ |
| 23 | + |
| 24 | +#include <stdint.h> |
| 25 | + |
| 26 | +static const uint8_t utf8d[] = { |
| 27 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f |
| 28 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f |
| 29 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f |
| 30 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f |
| 31 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f |
| 32 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf |
| 33 | + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df |
| 34 | + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef |
| 35 | + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff |
| 36 | + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 |
| 37 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 |
| 38 | + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 |
| 39 | + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 |
| 40 | + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 |
| 41 | +}; |
| 42 | + |
| 43 | +uint32_t |
| 44 | +decode(uint32_t* state, uint32_t* codep, uint32_t byte) { |
| 45 | + uint32_t type = utf8d[byte]; |
| 46 | + |
| 47 | + *codep = (*state != 0) ? |
| 48 | + (byte & 0x3fu) | (*codep << 6) : |
| 49 | + (0xff >> type) & (byte); |
| 50 | + |
| 51 | + *state = utf8d[256 + *state*16 + type]; |
| 52 | + return *state; |
| 53 | +} |
| 54 | + |
1 | 55 | /*
|
2 | 56 | * Copyright © 2013 stag019 <[email protected]>
|
3 | 57 | *
|
@@ -28,30 +82,26 @@ struct Charmap globalCharmap = {0};
|
28 | 82 | extern struct Section *pCurrentSection;
|
29 | 83 |
|
30 | 84 | int
|
31 |
| -readUTF8Char(char *destination, char *source) |
| 85 | +readUTF8Char(char *dest, char *src) |
32 | 86 | {
|
33 |
| - int size; |
34 |
| - UBYTE first; |
35 |
| - first = source[0]; |
36 |
| - |
37 |
| - if (first >= 0xFC) { |
38 |
| - size = 6; |
39 |
| - } else if (first >= 0xF8) { |
40 |
| - size = 5; |
41 |
| - } else if (first >= 0xF0) { |
42 |
| - size = 4; |
43 |
| - } else if (first >= 0xE0) { |
44 |
| - size = 3; |
45 |
| - } else if (first >= 0xC0) { |
46 |
| - size = 2; |
47 |
| - } else if (first != '\0') { |
48 |
| - size = 1; |
49 |
| - } else { |
50 |
| - size = 0; |
| 87 | + uint32_t state; |
| 88 | + uint32_t codep; |
| 89 | + int i; |
| 90 | + |
| 91 | + for (i = 0, state = 0;; i++) { |
| 92 | + if (decode(&state, &codep, (uint8_t)src[i]) == 1) { |
| 93 | + fatalerror("invalid UTF-8 character"); |
| 94 | + } |
| 95 | + |
| 96 | + dest[i] = src[i]; |
| 97 | + |
| 98 | + i++; |
| 99 | + if (state == 0) { |
| 100 | + dest[i] = '\0'; |
| 101 | + return i; |
| 102 | + } |
| 103 | + dest[i] = src[i]; |
51 | 104 | }
|
52 |
| - strncpy(destination, source, size); |
53 |
| - destination[size] = 0; |
54 |
| - return size; |
55 | 105 | }
|
56 | 106 |
|
57 | 107 | int
|
|
0 commit comments