Using wchar_t
didn't quite worked out in my tests, so handling it on my own:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
// https://stackoverflow.com/a/44776334
int8_t utf8_length(char c) {
// 4-byte character (11110XXX)
if ((c & 0b11111000) == 0b11110000)
return 4;
// 3-byte character (1110XXXX)
if ((c & 0b11110000) == 0b11100000)
return 3;
// 2-byte character (110XXXXX)
if ((c & 0b11100000) == 0b11000000)
return 2;
// 1-byte ASCII character (0XXXXXXX)
if ((c & 0b10000000) == 0b00000000)
return 1;
// Probably a 10XXXXXXX continuation byte
return -1;
}
void main ()
{
const char* filepath = "example.txt";
FILE* file = fopen(filepath, "r");
if (!file) {
perror(filepath);
exit(1);
}
char c;
for(;;) {
c = getc(file);
if (c == EOF)
break;
putc(c, stdout);
int8_t length = utf8_length(c);
while (--length) {
c = getc(file);
putc(c, stdout);
}
getchar();
}
fclose (file);
}
And here's my test file:
Hello, World! ๐๐
Hello
ยกHola!
รa va?
ไฝ ๅฅฝ
ใใใซใกใฏ
์๋
ํ์ธ์
ยฉยฎโขโโ
๐๐ข๐๐ฅโจ
โฌ๐๐ญ
Top comments (0)