Native/C
UCS2UTF8 (unicode)
aucd29
2013. 10. 2. 18:54
출처: http://jinsuk.pe.kr/Unicode/public/UCS2UTF8.c
/* by Jinsuk Kim, http://www.jinsuk.pe.kr */
[code]
/* UCS2toUTF8
* converts UCS2 array to UTF8 string
*/
int UCS2toUTF8(unsigned short *ucs2, int count, char *dst)
{
unsigned short unicode;
unsigned char bytes[4];
int nbytes;
int i, j;
int len=0;
for (i=0; i<count; i++) {
unicode = ucs2[i];
if (unicode < 0x80) {
nbytes = 1;
bytes[0] = unicode;
} else if (unicode < 0x800) {
nbytes = 2;
bytes[1] = (unicode & 0x3f) | 0x80;
bytes[0] = ((unicode << 2) & 0xcf00 | 0xc000) >> 8;
} else {
nbytes = 3;
bytes[2] = (unicode & 0x3f) | 0x80;
bytes[1] = ((unicode << 2) & 0x3f00 | 0x8000) >> 8;
bytes[0] = ((unicode << 4) & 0x3f0000 | 0xe00000) >> 16;
}
for (j=0; j<nbytes; j++) {
dst[len] = bytes[j];
len++;
}
}
dst[len] = '\0';
return len;
}
/* UTF8toUCS2
* converts UTF8 string to UCS2 array
*/
int UTF8toUCS2(char *src, unsigned short *ucs2)
{
unsigned char c;
unsigned short unicode;
int count=0;
int len;
int i;
len = strlen(src);
for (i=0; i<len;) {
c = src[i]&0xe0;
if (c < 0x80) {
unicode = (unsigned short) src[i];
i++;
}
else if (c < 0xe0) {
unicode = (unsigned short) src[i] & 0x1f;
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
}
else if (c < 0xf0) {
unicode = (unsigned short) src[i] & 0x0f;
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
}
ucs2[count] = unicode;
count++;
}
return count;
}
[/code]
[출처] UCS2UTF8.c|작성자 형기
/* by Jinsuk Kim, http://www.jinsuk.pe.kr */
[code]
/* UCS2toUTF8
* converts UCS2 array to UTF8 string
*/
int UCS2toUTF8(unsigned short *ucs2, int count, char *dst)
{
unsigned short unicode;
unsigned char bytes[4];
int nbytes;
int i, j;
int len=0;
for (i=0; i<count; i++) {
unicode = ucs2[i];
if (unicode < 0x80) {
nbytes = 1;
bytes[0] = unicode;
} else if (unicode < 0x800) {
nbytes = 2;
bytes[1] = (unicode & 0x3f) | 0x80;
bytes[0] = ((unicode << 2) & 0xcf00 | 0xc000) >> 8;
} else {
nbytes = 3;
bytes[2] = (unicode & 0x3f) | 0x80;
bytes[1] = ((unicode << 2) & 0x3f00 | 0x8000) >> 8;
bytes[0] = ((unicode << 4) & 0x3f0000 | 0xe00000) >> 16;
}
for (j=0; j<nbytes; j++) {
dst[len] = bytes[j];
len++;
}
}
dst[len] = '\0';
return len;
}
/* UTF8toUCS2
* converts UTF8 string to UCS2 array
*/
int UTF8toUCS2(char *src, unsigned short *ucs2)
{
unsigned char c;
unsigned short unicode;
int count=0;
int len;
int i;
len = strlen(src);
for (i=0; i<len;) {
c = src[i]&0xe0;
if (c < 0x80) {
unicode = (unsigned short) src[i];
i++;
}
else if (c < 0xe0) {
unicode = (unsigned short) src[i] & 0x1f;
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
}
else if (c < 0xf0) {
unicode = (unsigned short) src[i] & 0x0f;
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
unicode = unicode << 6;
unicode = unicode | ((unsigned short) src[i] & 0x3f);
i++;
}
ucs2[count] = unicode;
count++;
}
return count;
}
[/code]
[출처] UCS2UTF8.c|작성자 형기