Native/C

UCS2UTF8 (unicode)

aucd29 2013. 10. 2. 18:54
출처: http://jinsuk.pe.kr/Unicode/public/UCS2UTF8.c

/* by Jinsuk Kim, http://www.jinsuk.pe.kr */

[code]
/* UCS2toUTF8
* converts UCS2 array to UTF8 string
*/
int UCS2toUTF8(unsigned short *ucs2, int count, char *dst)
{
    unsigned short unicode;
    unsigned char bytes[4];
    int nbytes;
    int i, j;
    int len=0;

    for (i=0; i<count; i++) {
unicode = ucs2[i];
if (unicode < 0x80) {
    nbytes = 1;
    bytes[0] = unicode;
} else if (unicode < 0x800) {
    nbytes = 2;
    bytes[1] = (unicode & 0x3f) | 0x80;
    bytes[0] = ((unicode << 2) & 0xcf00 | 0xc000) >> 8;
} else {
    nbytes = 3;
    bytes[2] = (unicode & 0x3f) | 0x80;
    bytes[1] = ((unicode << 2) & 0x3f00 | 0x8000) >> 8;
    bytes[0] = ((unicode << 4) & 0x3f0000 | 0xe00000) >> 16;
}

for (j=0; j<nbytes; j++) {
    dst[len] = bytes[j];
    len++;
}
    }
    dst[len] = '\0';
    return len;
}

/* UTF8toUCS2
* converts UTF8 string to UCS2 array
*/
int UTF8toUCS2(char *src, unsigned short *ucs2)
{
    unsigned char c;
    unsigned short unicode;
    int count=0;
    int len;
    int i;

    len = strlen(src);
    for (i=0; i<len;) {
c = src[i]&0xe0;
if (c < 0x80) {
    unicode = (unsigned short) src[i];
    i++;
}
else if (c < 0xe0) {
    unicode = (unsigned short) src[i] & 0x1f;
    i++;
    unicode = unicode << 6;
    unicode = unicode | ((unsigned short) src[i] & 0x3f);
    i++;
}
else if (c < 0xf0) {
    unicode = (unsigned short) src[i] & 0x0f;
    i++;
    unicode = unicode << 6;
    unicode = unicode | ((unsigned short) src[i] & 0x3f);
    i++;
    unicode = unicode << 6;
    unicode = unicode | ((unsigned short) src[i] & 0x3f);
    i++;
}
ucs2[count] = unicode;
count++;
    }

    return count;
}
[/code]

[출처] UCS2UTF8.c|작성자 형기