GUAC-584: Add support for CP1252 and ISO-8859-1.

This commit is contained in:
Michael Jumper 2014-04-02 14:08:41 -07:00
parent b203cf1fd2
commit da5404f612
3 changed files with 152 additions and 0 deletions

View File

@ -26,6 +26,44 @@
#include <guacamole/unicode.h> #include <guacamole/unicode.h>
#include <stdint.h> #include <stdint.h>
/**
* Lookup table for Unicode code points, indexed by CP-1252 codepoint.
*/
const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
0x20AC, /* 0x80 */
0xFFFD, /* 0x81 */
0x201A, /* 0x82 */
0x0192, /* 0x83 */
0x201E, /* 0x84 */
0x2026, /* 0x85 */
0x2020, /* 0x86 */
0x2021, /* 0x87 */
0x02C6, /* 0x88 */
0x2030, /* 0x89 */
0x0160, /* 0x8A */
0x2039, /* 0x8B */
0x0152, /* 0x8C */
0xFFFD, /* 0x8D */
0x017D, /* 0x8E */
0xFFFD, /* 0x8F */
0xFFFD, /* 0x90 */
0x2018, /* 0x91 */
0x2019, /* 0x92 */
0x201C, /* 0x93 */
0x201D, /* 0x94 */
0x2022, /* 0x95 */
0x2013, /* 0x96 */
0x2014, /* 0x97 */
0x02DC, /* 0x98 */
0x2122, /* 0x99 */
0x0161, /* 0x9A */
0x203A, /* 0x9B */
0x0153, /* 0x9C */
0xFFFD, /* 0x9D */
0x017E, /* 0x9E */
0x0178, /* 0x9F */
};
int guac_iconv(guac_iconv_read* reader, char** input, int in_remaining, int guac_iconv(guac_iconv_read* reader, char** input, int in_remaining,
guac_iconv_write* writer, char** output, int out_remaining) { guac_iconv_write* writer, char** output, int out_remaining) {
@ -81,6 +119,28 @@ int GUAC_READ_UTF16(char** input, int remaining) {
} }
int GUAC_READ_CP1252(char** input, int remaining) {
int value = *((unsigned char*) *input);
/* Replace value with exception if not identical to ISO-8859-1 */
if (value >= 0x80 && value <= 0x9F)
value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80];
(*input)++;
return value;
}
int GUAC_READ_ISO8859_1(char** input, int remaining) {
int value = *((unsigned char*) *input);
(*input)++;
return value;
}
void GUAC_WRITE_UTF8(char** output, int remaining, int value) { void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
*output += guac_utf8_write(value, *output, remaining); *output += guac_utf8_write(value, *output, remaining);
} }
@ -97,3 +157,39 @@ void GUAC_WRITE_UTF16(char** output, int remaining, int value) {
} }
void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
/* If not in ISO-8859-1 part of CP1252, check lookup table */
if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) {
int i;
int replacement_value = '?';
const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT;
/* Search lookup table for value */
for (i=0x80; i<=0x9F; i++, codepoint++) {
if (*codepoint == value) {
replacement_value = i;
break;
}
}
/* Replace value with discovered value (or question mark) */
value = replacement_value;
}
*((unsigned char*) *output) = (unsigned char) value;
(*output)++;
}
void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) {
/* Translate to question mark if out of range */
if (value > 0xFF)
value = '?';
*((unsigned char*) *output) = (unsigned char) value;
(*output)++;
}

View File

@ -69,6 +69,16 @@ guac_iconv_read GUAC_READ_UTF8;
*/ */
guac_iconv_read GUAC_READ_UTF16; guac_iconv_read GUAC_READ_UTF16;
/**
* Read function for CP-1252.
*/
guac_iconv_read GUAC_READ_CP1252;
/**
* Read function for ISO-8859-1
*/
guac_iconv_read GUAC_READ_ISO8859_1;
/** /**
* Write function for UTF8. * Write function for UTF8.
*/ */
@ -79,5 +89,15 @@ guac_iconv_write GUAC_WRITE_UTF8;
*/ */
guac_iconv_write GUAC_WRITE_UTF16; guac_iconv_write GUAC_WRITE_UTF16;
/**
* Write function for CP-1252.
*/
guac_iconv_write GUAC_WRITE_CP1252;
/**
* Write function for ISO-8859-1
*/
guac_iconv_write GUAC_WRITE_ISO8859_1;
#endif #endif

View File

@ -71,6 +71,22 @@ void test_guac_iconv() {
0x00, 0x00 0x00, 0x00
}; };
/* ISO-8859-1 for "papà è bello" */
unsigned char test_string_iso8859_1[] = {
'p', 'a', 'p', 0xE0, ' ',
0xE8, ' ',
'b', 'e', 'l', 'l', 'o',
0x00
};
/* CP1252 for "papà è bello" */
unsigned char test_string_cp1252[] = {
'p', 'a', 'p', 0xE0, ' ',
0xE8, ' ',
'b', 'e', 'l', 'l', 'o',
0x00
};
/* UTF8 identity */ /* UTF8 identity */
test_conversion( test_conversion(
GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8), GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8),
@ -91,5 +107,25 @@ void test_guac_iconv() {
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
/* UTF16 to ISO-8859-1 */
test_conversion(
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
GUAC_WRITE_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1));
/* UTF16 to CP1252 */
test_conversion(
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
GUAC_WRITE_CP1252, test_string_cp1252, sizeof(test_string_cp1252));
/* CP1252 to UTF8 */
test_conversion(
GUAC_READ_CP1252, test_string_cp1252, sizeof(test_string_cp1252),
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
/* ISO-8859-1 to UTF8 */
test_conversion(
GUAC_READ_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1),
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
} }