From 7472310a03b061dfe7f2e673b85edd21dcef4f63 Mon Sep 17 00:00:00 2001 From: Michael Jumper Date: Sat, 25 Dec 2021 00:07:47 -0800 Subject: [PATCH 1/2] GUACAMOLE-478: Implement encoding translation functions for normalizing newline sequences. --- src/common/common/iconv.h | 48 ++++++ src/common/iconv.c | 114 +++++++++++++ src/common/tests/Makefile.am | 4 + src/common/tests/iconv/convert-test-data.c | 153 ++++++++++++++++++ src/common/tests/iconv/convert-test-data.h | 121 ++++++++++++++ src/common/tests/iconv/convert.c | 176 +++++++-------------- 6 files changed, 500 insertions(+), 116 deletions(-) create mode 100644 src/common/tests/iconv/convert-test-data.c create mode 100644 src/common/tests/iconv/convert-test-data.h diff --git a/src/common/common/iconv.h b/src/common/common/iconv.h index 6381b0a5..16a2a634 100644 --- a/src/common/common/iconv.h +++ b/src/common/common/iconv.h @@ -76,6 +76,30 @@ guac_iconv_read GUAC_READ_CP1252; */ guac_iconv_read GUAC_READ_ISO8859_1; +/** + * Read function for UTF-8 which normalizes newline character sequences like + * "\r\n" to Unix-style newlines ('\n'). + */ +guac_iconv_read GUAC_READ_UTF8_NORMALIZED; + +/** + * Read function for UTF-16 which normalizes newline character sequences like + * "\r\n" to Unix-style newlines ('\n'). + */ +guac_iconv_read GUAC_READ_UTF16_NORMALIZED; + +/** + * Read function for CP-1252 which normalizes newline character sequences like + * "\r\n" to Unix-style newlines ('\n'). + */ +guac_iconv_read GUAC_READ_CP1252_NORMALIZED; + +/** + * Read function for ISO 8859-1 which normalizes newline character sequences + * like "\r\n" to Unix-style newlines ('\n'). + */ +guac_iconv_read GUAC_READ_ISO8859_1_NORMALIZED; + /** * Write function for UTF8. */ @@ -96,5 +120,29 @@ guac_iconv_write GUAC_WRITE_CP1252; */ guac_iconv_write GUAC_WRITE_ISO8859_1; +/** + * Write function for UTF-8 which writes newline characters ('\n') as + * Windows-style newlines ("\r\n"). + */ +guac_iconv_write GUAC_WRITE_UTF8_CRLF; + +/** + * Write function for UTF-16 which writes newline characters ('\n') as + * Windows-style newlines ("\r\n"). + */ +guac_iconv_write GUAC_WRITE_UTF16_CRLF; + +/** + * Write function for CP-1252 which writes newline characters ('\n') as + * Windows-style newlines ("\r\n"). + */ +guac_iconv_write GUAC_WRITE_CP1252_CRLF; + +/** + * Write function for ISO 8859-1 which writes newline characters ('\n') as + * Windows-style newlines ("\r\n"). + */ +guac_iconv_write GUAC_WRITE_ISO8859_1_CRLF; + #endif diff --git a/src/common/iconv.c b/src/common/iconv.c index f4cc6c50..3bd9f5d7 100644 --- a/src/common/iconv.c +++ b/src/common/iconv.c @@ -138,6 +138,70 @@ int GUAC_READ_ISO8859_1(const char** input, int remaining) { } +/** + * Invokes the given reader function, automatically normalizing newline + * sequences as Unix-style newline characters ('\n'). All other charaters are + * read verbatim. + * + * @param reader + * The reader to use to read the given character. + * + * @param input + * Pointer to the location within the input buffer that the next character + * should be read from. + * + * @param remaining + * The number of bytes remaining in the input buffer. + * + * @return + * The codepoint that was read, or zero if the end of the input string has + * been reached. + */ +static int guac_iconv_read_normalized(guac_iconv_read* reader, + const char** input, int remaining) { + + /* Read requested character */ + const char* input_start = *input; + int value = reader(input, remaining); + + /* Automatically translate CRLF pairs to simple newlines */ + if (value == '\r') { + + /* Peek ahead by one character, adjusting remaining bytes relative to + * last read */ + int peek_remaining = remaining - (*input - input_start); + const char* peek_input = *input; + int peek_value = reader(&peek_input, peek_remaining); + + /* Consider read value to be a newline if we have encountered a "\r\n" + * (CRLF) pair */ + if (peek_value == '\n') { + value = '\n'; + *input = peek_input; + } + + } + + return value; + +} + +int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) { + return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining); +} + +int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) { + return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining); +} + +int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) { + return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining); +} + +int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) { + return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining); +} + void GUAC_WRITE_UTF8(char** output, int remaining, int value) { *output += guac_utf8_write(value, *output, remaining); } @@ -190,3 +254,53 @@ void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) { (*output)++; } +/** + * Invokes the given writer function, automatically writing newline characters + * ('\n') as CRLF ("\r\n"). All other charaters are written verbatim. + * + * @param writer + * The writer to use to write the given character. + * + * @param output + * Pointer to the location within the output buffer that the next character + * should be written. + * + * @param remaining + * The number of bytes remaining in the output buffer. + * + * @param value + * The codepoint of the character to write. + */ +static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output, + int remaining, int value) { + + if (value != '\n') { + writer(output, remaining, value); + return; + } + + char* output_start = *output; + writer(output, remaining, '\r'); + + remaining -= *output - output_start; + if (remaining > 0) + writer(output, remaining, '\n'); + +} + +void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) { + guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value); +} + +void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) { + guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value); +} + +void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) { + guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value); +} + +void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) { + guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value); +} + diff --git a/src/common/tests/Makefile.am b/src/common/tests/Makefile.am index a9c1b559..526577b2 100644 --- a/src/common/tests/Makefile.am +++ b/src/common/tests/Makefile.am @@ -33,8 +33,12 @@ ACLOCAL_AMFLAGS = -I m4 check_PROGRAMS = test_common TESTS = $(check_PROGRAMS) +noinst_HEADERS = \ + iconv/convert-test-data.h + test_common_SOURCES = \ iconv/convert.c \ + iconv/convert-test-data.c \ rect/clip_and_split.c \ rect/constrain.c \ rect/expand_to_grid.c \ diff --git a/src/common/tests/iconv/convert-test-data.c b/src/common/tests/iconv/convert-test-data.c new file mode 100644 index 00000000..2032e9d4 --- /dev/null +++ b/src/common/tests/iconv/convert-test-data.c @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "common/iconv.h" +#include "convert-test-data.h" + +encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS] = { + + /* + * UTF-8 + */ + + { + "UTF-8", + GUAC_READ_UTF8, GUAC_READ_UTF8_NORMALIZED, + GUAC_WRITE_UTF8, GUAC_WRITE_UTF8_CRLF, + .test_mixed = TEST_STRING( + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello" + ), + .test_unix = TEST_STRING( + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello\n" + "pap\xC3\xA0 \xC3\xA8 bello" + ), + .test_windows = TEST_STRING( + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello\r\n" + "pap\xC3\xA0 \xC3\xA8 bello" + ) + }, + + /* + * UTF-16 + */ + + { + "UTF-16", + GUAC_READ_UTF16, GUAC_READ_UTF16_NORMALIZED, + GUAC_WRITE_UTF16, GUAC_WRITE_UTF16_CRLF, + .test_mixed = TEST_STRING( + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" + "\x00" + ), + .test_unix = TEST_STRING( + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" + "\x00" + ), + .test_windows = TEST_STRING( + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00" + "p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" + "\x00" + ) + }, + + /* + * ISO 8859-1 + */ + + { + "ISO 8859-1", + GUAC_READ_ISO8859_1, GUAC_READ_ISO8859_1_NORMALIZED, + GUAC_WRITE_ISO8859_1, GUAC_WRITE_ISO8859_1_CRLF, + .test_mixed = TEST_STRING( + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello" + ), + .test_unix = TEST_STRING( + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello" + ), + .test_windows = TEST_STRING( + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello" + ) + }, + + /* + * CP-1252 + */ + + { + "CP-1252", + GUAC_READ_CP1252, GUAC_READ_CP1252_NORMALIZED, + GUAC_WRITE_CP1252, GUAC_WRITE_CP1252_CRLF, + .test_mixed = TEST_STRING( + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello" + ), + .test_unix = TEST_STRING( + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello\n" + "pap\xE0 \xE8 bello" + ), + .test_windows = TEST_STRING( + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello\r\n" + "pap\xE0 \xE8 bello" + ) + } + +}; + diff --git a/src/common/tests/iconv/convert-test-data.h b/src/common/tests/iconv/convert-test-data.h new file mode 100644 index 00000000..f682cfb3 --- /dev/null +++ b/src/common/tests/iconv/convert-test-data.h @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "common/iconv.h" + +/** + * Representation of test string data and its length in bytes. + */ +typedef struct test_string { + + /** + * The raw content of the test string. + */ + unsigned char* buffer; + + /** + * The number of bytes within the test string, including null terminator. + */ + int size; + +} test_string; + +/** + * Convenience macro which statically-initializes a test_string with the given + * string value, automatically calculating its size in bytes. + * + * @param value + * The string value. + */ +#define TEST_STRING(value) { \ + .buffer = (unsigned char*) (value), \ + .size = sizeof(value) \ +} + +/** + * The parameters applicable to a unit test for a particular encoding supported + * by guac_iconv(). + */ +typedef struct encoding_test_parameters { + + /** + * The human-readable name of this encoding. This will be logged to the + * test suite log to assist with debugging test failures. + */ + const char* name; + + /** + * Reader function which reads using this encoding and does not perform any + * transformation on newline characters. + */ + guac_iconv_read* reader; + + /** + * Reader function which reads using this encoding and automatically + * normalizes newline sequences to Unix-style newline characters. + */ + guac_iconv_read* reader_normalized; + + /** + * Writer function which writes using this encoding and does not perform + * any transformation on newline characters. + */ + guac_iconv_write* writer; + + /** + * Writer function which writes using this encoding, but writes newline + * characters as CRLF sequences. + */ + guac_iconv_write* writer_crlf; + + /** + * A test string having both Windows- and Unix-style line endings. Except + * for the line endings, the characters represented within this test string + * must be identical to all other test strings. + */ + test_string test_mixed; + + /** + * A test string having only Unix-style line endings. Except for the line + * endings, the characters represented within this test string must be + * identical to all other test strings. + */ + test_string test_unix; + + /** + * A test string having only Windows-style line endings. Except for the + * line endings, the characters represented within this test string must be + * identical to all other test strings. + */ + test_string test_windows; + +} encoding_test_parameters; + +/** + * The total number of encodings supported by guac_iconv(). + */ +#define NUM_SUPPORTED_ENCODINGS 4 + +/** + * Test parameters for each supported encoding. The test strings included each + * consist of five repeated lines of "papà è bello", omitting the line ending + * of the final line. + */ +extern encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS]; + diff --git a/src/common/tests/iconv/convert.c b/src/common/tests/iconv/convert.c index 1a00994f..d177ba01 100644 --- a/src/common/tests/iconv/convert.c +++ b/src/common/tests/iconv/convert.c @@ -18,48 +18,10 @@ */ #include "common/iconv.h" +#include "convert-test-data.h" #include - -/** - * UTF8 for "papà è bello". - */ -unsigned char test_string_utf8[] = { - 'p', 'a', 'p', 0xC3, 0xA0, ' ', - 0xC3, 0xA8, ' ', - 'b', 'e', 'l', 'l', 'o', - 0x00 -}; - -/** - * UTF16 for "papà è bello". - */ -unsigned char test_string_utf16[] = { - 'p', 0x00, 'a', 0x00, 'p', 0x00, 0xE0, 0x00, ' ', 0x00, - 0xE8, 0x00, ' ', 0x00, - 'b', 0x00, 'e', 0x00, 'l', 0x00, 'l', 0x00, 'o', 0x00, - 0x00, 0x00 -}; - -/** - * ISO-8859-1 for "papà è bello". - */ -unsigned char test_string_iso8859_1[] = { - 'p', 'a', 'p', 0xE0, ' ', - 0xE8, ' ', - 'b', 'e', 'l', 'l', 'o', - 0x00 -}; - -/** - * CP1252 for "papà è bello". - */ -unsigned char test_string_cp1252[] = { - 'p', 'a', 'p', 0xE0, ' ', - 0xE8, ' ', - 'b', 'e', 'l', 'l', 'o', - 0x00 -}; +#include /** * Tests that conversion between character sets using the given guac_iconv_read @@ -69,25 +31,20 @@ unsigned char test_string_cp1252[] = { * The guac_iconv_read implementation to use to read the input string. * * @param in_string - * A pointer to the beginning of the input string. - * - * @param in_length - * The size of the input string in bytes. + * A pointer to the test_string structure describing the input string being + * tested. * * @param writer * The guac_iconv_write implementation to use to write the output string * (the converted input string). * * @param out_string - * A pointer to the beginning of a string which contains the expected - * result of the conversion. - * - * @param out_length - * The size of the expected result in bytes. + * A pointer to the test_string structure describing the expected result of + * the conversion. */ static void verify_conversion( - guac_iconv_read* reader, unsigned char* in_string, int in_length, - guac_iconv_write* writer, unsigned char* out_string, int out_length) { + guac_iconv_read* reader, test_string* in_string, + guac_iconv_write* writer, test_string* out_string) { char output[4096]; char input[4096]; @@ -95,91 +52,78 @@ static void verify_conversion( const char* current_input = input; char* current_output = output; - memcpy(input, in_string, in_length); + memcpy(input, in_string->buffer, in_string->size); guac_iconv(reader, ¤t_input, sizeof(input), writer, ¤t_output, sizeof(output)); /* Verify output length */ - CU_ASSERT_EQUAL(out_length, current_output - output); + CU_ASSERT_EQUAL(out_string->size, current_output - output); /* Verify entire input read */ - CU_ASSERT_EQUAL(in_length, current_input - input); + CU_ASSERT_EQUAL(in_string->size, current_input - input); /* Verify output content */ - CU_ASSERT_EQUAL(0, memcmp(output, out_string, out_length)); + CU_ASSERT_EQUAL(0, memcmp(output, out_string->buffer, out_string->size)); } /** - * Tests which verifies conversion of UTF-8 to itself. + * Test which verifies that every supported encoding can be correctly converted + * to every other supported encoding, with all line endings preserved verbatim + * (not normalized). */ -void test_iconv__utf8_to_utf8() { - verify_conversion( - GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8), - GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); +void test_iconv__preserve() { + for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) { + for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) { + + encoding_test_parameters* from = &test_params[i]; + encoding_test_parameters* to = &test_params[j]; + + printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name); + verify_conversion(from->reader, &from->test_mixed, + to->writer, &to->test_mixed); + + } + } } /** - * Tests which verifies conversion of UTF-16 to UTF-8. + * Test which verifies that every supported encoding can be correctly converted + * to every other supported encoding, normalizing all line endings to + * Unix-style line endings. */ -void test_iconv__utf8_to_utf16() { - verify_conversion( - GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8), - GUAC_WRITE_UTF16, test_string_utf16, sizeof(test_string_utf16)); +void test_iconv__normalize_unix() { + for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) { + for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) { + + encoding_test_parameters* from = &test_params[i]; + encoding_test_parameters* to = &test_params[j]; + + printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name); + verify_conversion(from->reader_normalized, &from->test_mixed, + to->writer, &to->test_unix); + + } + } } /** - * Tests which verifies conversion of UTF-16 to itself. + * Test which verifies that every supported encoding can be correctly converted + * to every other supported encoding, normalizing all line endings to + * Windows-style line endings. */ -void test_iconv__utf16_to_utf16() { - verify_conversion( - GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), - GUAC_WRITE_UTF16, test_string_utf16, sizeof(test_string_utf16)); -} - -/** - * Tests which verifies conversion of UTF-8 to UTF-16. - */ -void test_iconv__utf16_to_utf8() { - verify_conversion( - GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), - GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); -} - -/** - * Tests which verifies conversion of UTF-16 to ISO 8859-1. - */ -void test_iconv__utf16_to_iso8859_1() { - verify_conversion( - GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), - GUAC_WRITE_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1)); -} - -/** - * Tests which verifies conversion of UTF-16 to CP1252. - */ -void test_iconv__utf16_to_cp1252() { - verify_conversion( - GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16), - GUAC_WRITE_CP1252, test_string_cp1252, sizeof(test_string_cp1252)); -} - -/** - * Tests which verifies conversion of CP1252 to UTF-8. - */ -void test_iconv__cp1252_to_utf8() { - verify_conversion( - GUAC_READ_CP1252, test_string_cp1252, sizeof(test_string_cp1252), - GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); -} - -/** - * Tests which verifies conversion of ISO 8859-1 to UTF-8. - */ -void test_iconv__iso8859_1_to_utf8() { - verify_conversion( - GUAC_READ_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1), - GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8)); - +void test_iconv__normalize_crlf() { + for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) { + for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) { + + encoding_test_parameters* from = &test_params[i]; + encoding_test_parameters* to = &test_params[j]; + + printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name); + verify_conversion(from->reader_normalized, &from->test_mixed, + to->writer_crlf, &to->test_windows); + + } + } } From 09bd4af77ebca68f58a4ad5ec58aafe58e752c0c Mon Sep 17 00:00:00 2001 From: Michael Jumper Date: Sat, 25 Dec 2021 00:31:17 -0800 Subject: [PATCH 2/2] GUACAMOLE-478: Add optional clipboard line ending normalization for RDP. --- src/protocols/rdp/channels/cliprdr.c | 20 +++++++------ src/protocols/rdp/settings.c | 42 +++++++++++++++++++++++++++- src/protocols/rdp/settings.h | 13 +++++++++ 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/src/protocols/rdp/channels/cliprdr.c b/src/protocols/rdp/channels/cliprdr.c index 261f0c98..8f9d92a5 100644 --- a/src/protocols/rdp/channels/cliprdr.c +++ b/src/protocols/rdp/channels/cliprdr.c @@ -352,10 +352,11 @@ static UINT guac_rdp_cliprdr_format_data_request(CliprdrClientContext* cliprdr, guac_client* client = clipboard->client; guac_rdp_client* rdp_client = (guac_rdp_client*) client->data; + guac_rdp_settings* settings = rdp_client->settings; guac_client_log(client, GUAC_LOG_TRACE, "CLIPRDR: Received format data request."); - guac_iconv_write* writer; + guac_iconv_write* remote_writer; const char* input = clipboard->clipboard->buffer; char* output = malloc(GUAC_RDP_CLIPBOARD_MAX_LENGTH); @@ -363,11 +364,11 @@ static UINT guac_rdp_cliprdr_format_data_request(CliprdrClientContext* cliprdr, switch (format_data_request->requestedFormatId) { case CF_TEXT: - writer = GUAC_WRITE_CP1252; + remote_writer = settings->clipboard_crlf ? GUAC_WRITE_CP1252_CRLF : GUAC_WRITE_CP1252; break; case CF_UNICODETEXT: - writer = GUAC_WRITE_UTF16; + remote_writer = settings->clipboard_crlf ? GUAC_WRITE_UTF16_CRLF : GUAC_WRITE_UTF16; break; /* Warn if clipboard data cannot be sent as intended due to a violation @@ -386,8 +387,9 @@ static UINT guac_rdp_cliprdr_format_data_request(CliprdrClientContext* cliprdr, /* Send received clipboard data to the RDP server in the format * requested */ BYTE* start = (BYTE*) output; - guac_iconv(GUAC_READ_UTF8, &input, clipboard->clipboard->length, - writer, &output, GUAC_RDP_CLIPBOARD_MAX_LENGTH); + guac_iconv_read* local_reader = settings->normalize_clipboard ? GUAC_READ_UTF8_NORMALIZED : GUAC_READ_UTF8; + guac_iconv(local_reader, &input, clipboard->clipboard->length, + remote_writer, &output, GUAC_RDP_CLIPBOARD_MAX_LENGTH); CLIPRDR_FORMAT_DATA_RESPONSE data_response = { .requestedFormatData = (BYTE*) start, @@ -449,7 +451,7 @@ static UINT guac_rdp_cliprdr_format_data_response(CliprdrClientContext* cliprdr, char received_data[GUAC_RDP_CLIPBOARD_MAX_LENGTH]; - guac_iconv_read* reader; + guac_iconv_read* remote_reader; const char* input = (char*) format_data_response->requestedFormatData; char* output = received_data; @@ -458,12 +460,12 @@ static UINT guac_rdp_cliprdr_format_data_response(CliprdrClientContext* cliprdr, /* Non-Unicode (Windows CP-1252) */ case CF_TEXT: - reader = GUAC_READ_CP1252; + remote_reader = settings->normalize_clipboard ? GUAC_READ_CP1252_NORMALIZED : GUAC_READ_CP1252; break; /* Unicode (UTF-16) */ case CF_UNICODETEXT: - reader = GUAC_READ_UTF16; + remote_reader = settings->normalize_clipboard ? GUAC_READ_UTF16_NORMALIZED : GUAC_READ_UTF16; break; /* If the format ID stored within the guac_rdp_clipboard structure is actually @@ -481,7 +483,7 @@ static UINT guac_rdp_cliprdr_format_data_response(CliprdrClientContext* cliprdr, /* Convert, store, and forward the clipboard data received from RDP * server */ - if (guac_iconv(reader, &input, format_data_response->dataLen, + if (guac_iconv(remote_reader, &input, format_data_response->dataLen, GUAC_WRITE_UTF8, &output, sizeof(received_data))) { int length = strnlen(received_data, sizeof(received_data)); guac_common_clipboard_reset(clipboard->clipboard, "text/plain"); diff --git a/src/protocols/rdp/settings.c b/src/protocols/rdp/settings.c index 5a1d48e9..81dfedf9 100644 --- a/src/protocols/rdp/settings.c +++ b/src/protocols/rdp/settings.c @@ -130,6 +130,7 @@ const char* GUAC_RDP_CLIENT_ARGS[] = { "wol-wait-time", "force-lossless", + "normalize-clipboard", NULL }; @@ -647,6 +648,16 @@ enum RDP_ARGS_IDX { */ IDX_FORCE_LOSSLESS, + /** + * Controls whether the text content of the clipboard should be + * automatically normalized to use a particular line ending format. Valid + * values are "preserve", to preserve line endings verbatim, "windows" to + * transform all line endings to Windows-style CRLF sequences, or "unix" to + * transform all line endings to Unix-style newline characters ('\n'). By + * default, line endings within the clipboard are preserved. + */ + IDX_NORMALIZE_CLIPBOARD, + RDP_ARGS_COUNT }; @@ -1167,7 +1178,36 @@ guac_rdp_settings* guac_rdp_parse_args(guac_user* user, settings->disable_paste = guac_user_parse_args_boolean(user, GUAC_RDP_CLIENT_ARGS, argv, IDX_DISABLE_PASTE, 0); - + + /* Normalize clipboard line endings to Unix format */ + if (strcmp(argv[IDX_NORMALIZE_CLIPBOARD], "unix") == 0) { + guac_user_log(user, GUAC_LOG_INFO, "Clipboard line ending normalization: Unix (LF)"); + settings->normalize_clipboard = 1; + settings->clipboard_crlf = 0; + } + + /* Normalize clipboard line endings to Windows format */ + else if (strcmp(argv[IDX_NORMALIZE_CLIPBOARD], "windows") == 0) { + guac_user_log(user, GUAC_LOG_INFO, "Clipboard line ending normalization: Windows (CRLF)"); + settings->normalize_clipboard = 1; + settings->clipboard_crlf = 1; + } + + /* Preserve clipboard line ending format */ + else if (strcmp(argv[IDX_NORMALIZE_CLIPBOARD], "preserve") == 0) { + guac_user_log(user, GUAC_LOG_INFO, "Clipboard line ending normalization: Preserve (none)"); + settings->normalize_clipboard = 0; + settings->clipboard_crlf = 0; + } + + /* If nothing given, default to preserving line endings */ + else { + guac_user_log(user, GUAC_LOG_INFO, "No clipboard line-ending normalization specified. Defaulting to preserving the format of all line endings."); + settings->normalize_clipboard = 0; + settings->clipboard_crlf = 0; + } + + /* Parse Wake-on-LAN (WoL) settings */ settings->wol_send_packet = guac_user_parse_args_boolean(user, GUAC_RDP_CLIENT_ARGS, argv, diff --git a/src/protocols/rdp/settings.h b/src/protocols/rdp/settings.h index f80cde48..daaf3e9c 100644 --- a/src/protocols/rdp/settings.h +++ b/src/protocols/rdp/settings.h @@ -323,6 +323,19 @@ typedef struct guac_rdp_settings { */ int disable_paste; + /** + * Whether line endings within the clipboard should be automatically + * normalized to Unix-style newline characters. + */ + int normalize_clipboard; + + /** + * Whether Unix-style newline characters within the clipboard should be + * automatically translated to CRLF sequences before transmission to the + * RDP server. + */ + int clipboard_crlf; + /** * Whether the desktop wallpaper should be visible. If unset, the desktop * wallpaper will be hidden, reducing the amount of bandwidth required.