From da5404f61273043f40b61ac3c1de9cc03bf441f5 Mon Sep 17 00:00:00 2001 From: Michael Jumper Date: Wed, 2 Apr 2014 14:08:41 -0700 Subject: [PATCH] GUAC-584: Add support for CP1252 and ISO-8859-1. --- src/common/guac_iconv.c | 96 +++++++++++++++++++++++++++++++++++++++ src/common/guac_iconv.h | 20 ++++++++ tests/common/guac_iconv.c | 36 +++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/src/common/guac_iconv.c b/src/common/guac_iconv.c index 0dcab980..00f07661 100644 --- a/src/common/guac_iconv.c +++ b/src/common/guac_iconv.c @@ -26,6 +26,44 @@ #include #include +/** + * Lookup table for Unicode code points, indexed by CP-1252 codepoint. + */ +const static int __GUAC_RDP_CP1252_CODEPOINT[32] = { + 0x20AC, /* 0x80 */ + 0xFFFD, /* 0x81 */ + 0x201A, /* 0x82 */ + 0x0192, /* 0x83 */ + 0x201E, /* 0x84 */ + 0x2026, /* 0x85 */ + 0x2020, /* 0x86 */ + 0x2021, /* 0x87 */ + 0x02C6, /* 0x88 */ + 0x2030, /* 0x89 */ + 0x0160, /* 0x8A */ + 0x2039, /* 0x8B */ + 0x0152, /* 0x8C */ + 0xFFFD, /* 0x8D */ + 0x017D, /* 0x8E */ + 0xFFFD, /* 0x8F */ + 0xFFFD, /* 0x90 */ + 0x2018, /* 0x91 */ + 0x2019, /* 0x92 */ + 0x201C, /* 0x93 */ + 0x201D, /* 0x94 */ + 0x2022, /* 0x95 */ + 0x2013, /* 0x96 */ + 0x2014, /* 0x97 */ + 0x02DC, /* 0x98 */ + 0x2122, /* 0x99 */ + 0x0161, /* 0x9A */ + 0x203A, /* 0x9B */ + 0x0153, /* 0x9C */ + 0xFFFD, /* 0x9D */ + 0x017E, /* 0x9E */ + 0x0178, /* 0x9F */ +}; + int guac_iconv(guac_iconv_read* reader, char** input, int in_remaining, guac_iconv_write* writer, char** output, int out_remaining) { @@ -81,6 +119,28 @@ int GUAC_READ_UTF16(char** input, int remaining) { } +int GUAC_READ_CP1252(char** input, int remaining) { + + int value = *((unsigned char*) *input); + + /* Replace value with exception if not identical to ISO-8859-1 */ + if (value >= 0x80 && value <= 0x9F) + value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80]; + + (*input)++; + return value; + +} + +int GUAC_READ_ISO8859_1(char** input, int remaining) { + + int value = *((unsigned char*) *input); + + (*input)++; + return value; + +} + void GUAC_WRITE_UTF8(char** output, int remaining, int value) { *output += guac_utf8_write(value, *output, remaining); } @@ -97,3 +157,39 @@ void GUAC_WRITE_UTF16(char** output, int remaining, int value) { } +void GUAC_WRITE_CP1252(char** output, int remaining, int value) { + + /* If not in ISO-8859-1 part of CP1252, check lookup table */ + if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) { + + int i; + int replacement_value = '?'; + const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT; + + /* Search lookup table for value */ + for (i=0x80; i<=0x9F; i++, codepoint++) { + if (*codepoint == value) { + replacement_value = i; + break; + } + } + + /* Replace value with discovered value (or question mark) */ + value = replacement_value; + + } + + *((unsigned char*) *output) = (unsigned char) value; + (*output)++; +} + +void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) { + + /* Translate to question mark if out of range */ + if (value > 0xFF) + value = '?'; + + *((unsigned char*) *output) = (unsigned char) value; + (*output)++; +} + diff --git a/src/common/guac_iconv.h b/src/common/guac_iconv.h index 98cdce7c..8cc9788a 100644 --- a/src/common/guac_iconv.h +++ b/src/common/guac_iconv.h @@ -69,6 +69,16 @@ guac_iconv_read GUAC_READ_UTF8; */ guac_iconv_read GUAC_READ_UTF16; +/** + * Read function for CP-1252. + */ +guac_iconv_read GUAC_READ_CP1252; + +/** + * Read function for ISO-8859-1 + */ +guac_iconv_read GUAC_READ_ISO8859_1; + /** * Write function for UTF8. */ @@ -79,5 +89,15 @@ guac_iconv_write GUAC_WRITE_UTF8; */ guac_iconv_write GUAC_WRITE_UTF16; +/** + * Write function for CP-1252. + */ +guac_iconv_write GUAC_WRITE_CP1252; + +/** + * Write function for ISO-8859-1 + */ +guac_iconv_write GUAC_WRITE_ISO8859_1; + #endif diff --git a/tests/common/guac_iconv.c b/tests/common/guac_iconv.c index aca40e0e..cd7f548e 100644 --- a/tests/common/guac_iconv.c +++ b/tests/common/guac_iconv.c @@ -71,6 +71,22 @@ void test_guac_iconv() { 0x00, 0x00 }; + /* ISO-8859-1 for "papà è bello" */ + unsigned char test_string_iso8859_1[] = { + 'p', 'a', 'p', 0xE0, ' ', + 0xE8, ' ', + 'b', 'e', 'l', 'l', 'o', + 0x00 + }; + + /* CP1252 for "papà è bello" */ + unsigned char test_string_cp1252[] = { + 'p', 'a', 'p', 0xE0, ' ', + 0xE8, ' ', + 'b', 'e', 'l', 'l', 'o', + 0x00 + }; + /* UTF8 identity */ test_conversion( GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8), @@ -91,5 +107,25 @@ void test_guac_iconv() { GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); + /* UTF16 to ISO-8859-1 */ + test_conversion( + GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), + GUAC_WRITE_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1)); + + /* UTF16 to CP1252 */ + test_conversion( + GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), + GUAC_WRITE_CP1252, test_string_cp1252, sizeof(test_string_cp1252)); + + /* CP1252 to UTF8 */ + test_conversion( + GUAC_READ_CP1252, test_string_cp1252, sizeof(test_string_cp1252), + GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); + + /* ISO-8859-1 to UTF8 */ + test_conversion( + GUAC_READ_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1), + GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); + }