sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

utf8.c (2989B)


      1 #include <stdint.h>
      2 #include <stdlib.h>
      3 
      4 #include "utf8.h"
      5 
      6 /* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a copy
      9  * of this software and associated documentation files (the "Software"), to deal
     10  * in the Software without restriction, including without limitation the rights
     11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12  * copies of the Software, and to permit persons to whom the Software is
     13  * furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included in
     16  * all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     24  * SOFTWARE.
     25  */
     26 
     27 #define UTF8_ACCEPT 0
     28 #define UTF8_REJECT 1
     29 
     30 static const uint8_t utf8d[] = {
     31   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
     32   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
     33   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
     34   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
     35   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
     36   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
     37   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
     38   0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
     39   0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
     40   0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
     41   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
     42   1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
     43   1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
     44   1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
     45 };
     46 
     47 static uint32_t
     48 decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
     49   uint32_t type = utf8d[byte];
     50 
     51   *codep = (*state != UTF8_ACCEPT) ?
     52     (byte & 0x3fu) | (*codep << 6) :
     53     (0xff >> type) & (byte);
     54 
     55   *state = utf8d[256 + *state*16 + type];
     56   return *state;
     57 }
     58 
     59 char *utf8_decode(char *str, uint32_t *codep)
     60 {
     61     uint32_t state = UTF8_ACCEPT;
     62 
     63     *codep = 0;
     64     while (*str != 0)
     65     {
     66         int result = decode(&state, codep, (unsigned char)*str);
     67         str++;
     68         if (result == UTF8_ACCEPT)
     69             return str;
     70         if (result == UTF8_REJECT)
     71             return NULL;
     72     }
     73     return NULL;
     74 }