UTF-8编码与Unicode CS2的转换

/* Convert a UTF-8 string into a UCS-2 array. */
void tcstrutftoucs(const char *str, uint16_t *ary, int *np){
assert(str && ary && np);
const unsigned char *rp = (unsigned char *)str;
unsigned int wi = ;
while(*rp != '\0'){
int c = *(unsigned char *)rp;
if(c < 0x80){
ary[wi++] = c;
} else if(c < 0xe0){
if(rp[] >= 0x80){
ary[wi++] = ((rp[] & 0x1f) << ) | (rp[] & 0x3f);
rp++;
}
} else if(c < 0xf0){
if(rp[] >= 0x80 && rp[] >= 0x80){
ary[wi++] = ((rp[] & 0xf) << ) | ((rp[] & 0x3f) << ) | (rp[] & 0x3f);
rp += ;
}
}
rp++;
}
*np = wi;
} /* Convert a UCS-2 array into a UTF-8 string. */
int tcstrucstoutf(const uint16_t *ary, int num, char *str){
assert(ary && num >= && str);
unsigned char *wp = (unsigned char *)str;
for(int i = ; i < num; i++){
unsigned int c = ary[i];
if(c < 0x80){
*(wp++) = c;
} else if(c < 0x800){
*(wp++) = 0xc0 | (c >> );
*(wp++) = 0x80 | (c & 0x3f);
} else {
*(wp++) = 0xe0 | (c >> );
*(wp++) = 0x80 | ((c & 0xfff) >> );
*(wp++) = 0x80 | (c & 0x3f);
}
}
*wp = '\0';
return (char *)wp - str;
}
上一篇:js每天进步一点点3


下一篇:percentiles of live data capture