GUACAMOLE-478: Implement encoding translation functions for normalizing newline sequences.
This commit is contained in:
parent
b9cc76058b
commit
7472310a03
@ -76,6 +76,30 @@ guac_iconv_read GUAC_READ_CP1252;
|
|||||||
*/
|
*/
|
||||||
guac_iconv_read GUAC_READ_ISO8859_1;
|
guac_iconv_read GUAC_READ_ISO8859_1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read function for UTF-8 which normalizes newline character sequences like
|
||||||
|
* "\r\n" to Unix-style newlines ('\n').
|
||||||
|
*/
|
||||||
|
guac_iconv_read GUAC_READ_UTF8_NORMALIZED;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read function for UTF-16 which normalizes newline character sequences like
|
||||||
|
* "\r\n" to Unix-style newlines ('\n').
|
||||||
|
*/
|
||||||
|
guac_iconv_read GUAC_READ_UTF16_NORMALIZED;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read function for CP-1252 which normalizes newline character sequences like
|
||||||
|
* "\r\n" to Unix-style newlines ('\n').
|
||||||
|
*/
|
||||||
|
guac_iconv_read GUAC_READ_CP1252_NORMALIZED;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read function for ISO 8859-1 which normalizes newline character sequences
|
||||||
|
* like "\r\n" to Unix-style newlines ('\n').
|
||||||
|
*/
|
||||||
|
guac_iconv_read GUAC_READ_ISO8859_1_NORMALIZED;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write function for UTF8.
|
* Write function for UTF8.
|
||||||
*/
|
*/
|
||||||
@ -96,5 +120,29 @@ guac_iconv_write GUAC_WRITE_CP1252;
|
|||||||
*/
|
*/
|
||||||
guac_iconv_write GUAC_WRITE_ISO8859_1;
|
guac_iconv_write GUAC_WRITE_ISO8859_1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write function for UTF-8 which writes newline characters ('\n') as
|
||||||
|
* Windows-style newlines ("\r\n").
|
||||||
|
*/
|
||||||
|
guac_iconv_write GUAC_WRITE_UTF8_CRLF;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write function for UTF-16 which writes newline characters ('\n') as
|
||||||
|
* Windows-style newlines ("\r\n").
|
||||||
|
*/
|
||||||
|
guac_iconv_write GUAC_WRITE_UTF16_CRLF;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write function for CP-1252 which writes newline characters ('\n') as
|
||||||
|
* Windows-style newlines ("\r\n").
|
||||||
|
*/
|
||||||
|
guac_iconv_write GUAC_WRITE_CP1252_CRLF;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write function for ISO 8859-1 which writes newline characters ('\n') as
|
||||||
|
* Windows-style newlines ("\r\n").
|
||||||
|
*/
|
||||||
|
guac_iconv_write GUAC_WRITE_ISO8859_1_CRLF;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -138,6 +138,70 @@ int GUAC_READ_ISO8859_1(const char** input, int remaining) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invokes the given reader function, automatically normalizing newline
|
||||||
|
* sequences as Unix-style newline characters ('\n'). All other charaters are
|
||||||
|
* read verbatim.
|
||||||
|
*
|
||||||
|
* @param reader
|
||||||
|
* The reader to use to read the given character.
|
||||||
|
*
|
||||||
|
* @param input
|
||||||
|
* Pointer to the location within the input buffer that the next character
|
||||||
|
* should be read from.
|
||||||
|
*
|
||||||
|
* @param remaining
|
||||||
|
* The number of bytes remaining in the input buffer.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* The codepoint that was read, or zero if the end of the input string has
|
||||||
|
* been reached.
|
||||||
|
*/
|
||||||
|
static int guac_iconv_read_normalized(guac_iconv_read* reader,
|
||||||
|
const char** input, int remaining) {
|
||||||
|
|
||||||
|
/* Read requested character */
|
||||||
|
const char* input_start = *input;
|
||||||
|
int value = reader(input, remaining);
|
||||||
|
|
||||||
|
/* Automatically translate CRLF pairs to simple newlines */
|
||||||
|
if (value == '\r') {
|
||||||
|
|
||||||
|
/* Peek ahead by one character, adjusting remaining bytes relative to
|
||||||
|
* last read */
|
||||||
|
int peek_remaining = remaining - (*input - input_start);
|
||||||
|
const char* peek_input = *input;
|
||||||
|
int peek_value = reader(&peek_input, peek_remaining);
|
||||||
|
|
||||||
|
/* Consider read value to be a newline if we have encountered a "\r\n"
|
||||||
|
* (CRLF) pair */
|
||||||
|
if (peek_value == '\n') {
|
||||||
|
value = '\n';
|
||||||
|
*input = peek_input;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) {
|
||||||
|
return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) {
|
||||||
|
return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) {
|
||||||
|
return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) {
|
||||||
|
return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining);
|
||||||
|
}
|
||||||
|
|
||||||
void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
|
void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
|
||||||
*output += guac_utf8_write(value, *output, remaining);
|
*output += guac_utf8_write(value, *output, remaining);
|
||||||
}
|
}
|
||||||
@ -190,3 +254,53 @@ void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) {
|
|||||||
(*output)++;
|
(*output)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invokes the given writer function, automatically writing newline characters
|
||||||
|
* ('\n') as CRLF ("\r\n"). All other charaters are written verbatim.
|
||||||
|
*
|
||||||
|
* @param writer
|
||||||
|
* The writer to use to write the given character.
|
||||||
|
*
|
||||||
|
* @param output
|
||||||
|
* Pointer to the location within the output buffer that the next character
|
||||||
|
* should be written.
|
||||||
|
*
|
||||||
|
* @param remaining
|
||||||
|
* The number of bytes remaining in the output buffer.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* The codepoint of the character to write.
|
||||||
|
*/
|
||||||
|
static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output,
|
||||||
|
int remaining, int value) {
|
||||||
|
|
||||||
|
if (value != '\n') {
|
||||||
|
writer(output, remaining, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* output_start = *output;
|
||||||
|
writer(output, remaining, '\r');
|
||||||
|
|
||||||
|
remaining -= *output - output_start;
|
||||||
|
if (remaining > 0)
|
||||||
|
writer(output, remaining, '\n');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) {
|
||||||
|
guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) {
|
||||||
|
guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) {
|
||||||
|
guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) {
|
||||||
|
guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -33,8 +33,12 @@ ACLOCAL_AMFLAGS = -I m4
|
|||||||
check_PROGRAMS = test_common
|
check_PROGRAMS = test_common
|
||||||
TESTS = $(check_PROGRAMS)
|
TESTS = $(check_PROGRAMS)
|
||||||
|
|
||||||
|
noinst_HEADERS = \
|
||||||
|
iconv/convert-test-data.h
|
||||||
|
|
||||||
test_common_SOURCES = \
|
test_common_SOURCES = \
|
||||||
iconv/convert.c \
|
iconv/convert.c \
|
||||||
|
iconv/convert-test-data.c \
|
||||||
rect/clip_and_split.c \
|
rect/clip_and_split.c \
|
||||||
rect/constrain.c \
|
rect/constrain.c \
|
||||||
rect/expand_to_grid.c \
|
rect/expand_to_grid.c \
|
||||||
|
153
src/common/tests/iconv/convert-test-data.c
Normal file
153
src/common/tests/iconv/convert-test-data.c
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/iconv.h"
|
||||||
|
#include "convert-test-data.h"
|
||||||
|
|
||||||
|
encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS] = {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UTF-8
|
||||||
|
*/
|
||||||
|
|
||||||
|
{
|
||||||
|
"UTF-8",
|
||||||
|
GUAC_READ_UTF8, GUAC_READ_UTF8_NORMALIZED,
|
||||||
|
GUAC_WRITE_UTF8, GUAC_WRITE_UTF8_CRLF,
|
||||||
|
.test_mixed = TEST_STRING(
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello"
|
||||||
|
),
|
||||||
|
.test_unix = TEST_STRING(
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello"
|
||||||
|
),
|
||||||
|
.test_windows = TEST_STRING(
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello\r\n"
|
||||||
|
"pap\xC3\xA0 \xC3\xA8 bello"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UTF-16
|
||||||
|
*/
|
||||||
|
|
||||||
|
{
|
||||||
|
"UTF-16",
|
||||||
|
GUAC_READ_UTF16, GUAC_READ_UTF16_NORMALIZED,
|
||||||
|
GUAC_WRITE_UTF16, GUAC_WRITE_UTF16_CRLF,
|
||||||
|
.test_mixed = TEST_STRING(
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00"
|
||||||
|
"\x00"
|
||||||
|
),
|
||||||
|
.test_unix = TEST_STRING(
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00"
|
||||||
|
"\x00"
|
||||||
|
),
|
||||||
|
.test_windows = TEST_STRING(
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00" "\r\x00" "\n\x00"
|
||||||
|
"p\x00" "a\x00" "p\x00" "\xE0\x00" " \x00" "\xE8\x00" " \x00" "b\x00" "e\x00" "l\x00" "l\x00" "o\x00"
|
||||||
|
"\x00"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ISO 8859-1
|
||||||
|
*/
|
||||||
|
|
||||||
|
{
|
||||||
|
"ISO 8859-1",
|
||||||
|
GUAC_READ_ISO8859_1, GUAC_READ_ISO8859_1_NORMALIZED,
|
||||||
|
GUAC_WRITE_ISO8859_1, GUAC_WRITE_ISO8859_1_CRLF,
|
||||||
|
.test_mixed = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
),
|
||||||
|
.test_unix = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
),
|
||||||
|
.test_windows = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CP-1252
|
||||||
|
*/
|
||||||
|
|
||||||
|
{
|
||||||
|
"CP-1252",
|
||||||
|
GUAC_READ_CP1252, GUAC_READ_CP1252_NORMALIZED,
|
||||||
|
GUAC_WRITE_CP1252, GUAC_WRITE_CP1252_CRLF,
|
||||||
|
.test_mixed = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
),
|
||||||
|
.test_unix = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
),
|
||||||
|
.test_windows = TEST_STRING(
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello\r\n"
|
||||||
|
"pap\xE0 \xE8 bello"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
121
src/common/tests/iconv/convert-test-data.h
Normal file
121
src/common/tests/iconv/convert-test-data.h
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/iconv.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Representation of test string data and its length in bytes.
|
||||||
|
*/
|
||||||
|
typedef struct test_string {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The raw content of the test string.
|
||||||
|
*/
|
||||||
|
unsigned char* buffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bytes within the test string, including null terminator.
|
||||||
|
*/
|
||||||
|
int size;
|
||||||
|
|
||||||
|
} test_string;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience macro which statically-initializes a test_string with the given
|
||||||
|
* string value, automatically calculating its size in bytes.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* The string value.
|
||||||
|
*/
|
||||||
|
#define TEST_STRING(value) { \
|
||||||
|
.buffer = (unsigned char*) (value), \
|
||||||
|
.size = sizeof(value) \
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parameters applicable to a unit test for a particular encoding supported
|
||||||
|
* by guac_iconv().
|
||||||
|
*/
|
||||||
|
typedef struct encoding_test_parameters {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The human-readable name of this encoding. This will be logged to the
|
||||||
|
* test suite log to assist with debugging test failures.
|
||||||
|
*/
|
||||||
|
const char* name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reader function which reads using this encoding and does not perform any
|
||||||
|
* transformation on newline characters.
|
||||||
|
*/
|
||||||
|
guac_iconv_read* reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reader function which reads using this encoding and automatically
|
||||||
|
* normalizes newline sequences to Unix-style newline characters.
|
||||||
|
*/
|
||||||
|
guac_iconv_read* reader_normalized;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writer function which writes using this encoding and does not perform
|
||||||
|
* any transformation on newline characters.
|
||||||
|
*/
|
||||||
|
guac_iconv_write* writer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writer function which writes using this encoding, but writes newline
|
||||||
|
* characters as CRLF sequences.
|
||||||
|
*/
|
||||||
|
guac_iconv_write* writer_crlf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A test string having both Windows- and Unix-style line endings. Except
|
||||||
|
* for the line endings, the characters represented within this test string
|
||||||
|
* must be identical to all other test strings.
|
||||||
|
*/
|
||||||
|
test_string test_mixed;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A test string having only Unix-style line endings. Except for the line
|
||||||
|
* endings, the characters represented within this test string must be
|
||||||
|
* identical to all other test strings.
|
||||||
|
*/
|
||||||
|
test_string test_unix;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A test string having only Windows-style line endings. Except for the
|
||||||
|
* line endings, the characters represented within this test string must be
|
||||||
|
* identical to all other test strings.
|
||||||
|
*/
|
||||||
|
test_string test_windows;
|
||||||
|
|
||||||
|
} encoding_test_parameters;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The total number of encodings supported by guac_iconv().
|
||||||
|
*/
|
||||||
|
#define NUM_SUPPORTED_ENCODINGS 4
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test parameters for each supported encoding. The test strings included each
|
||||||
|
* consist of five repeated lines of "papà è bello", omitting the line ending
|
||||||
|
* of the final line.
|
||||||
|
*/
|
||||||
|
extern encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS];
|
||||||
|
|
@ -18,48 +18,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "common/iconv.h"
|
#include "common/iconv.h"
|
||||||
|
#include "convert-test-data.h"
|
||||||
|
|
||||||
#include <CUnit/CUnit.h>
|
#include <CUnit/CUnit.h>
|
||||||
|
#include <stdio.h>
|
||||||
/**
|
|
||||||
* UTF8 for "papà è bello".
|
|
||||||
*/
|
|
||||||
unsigned char test_string_utf8[] = {
|
|
||||||
'p', 'a', 'p', 0xC3, 0xA0, ' ',
|
|
||||||
0xC3, 0xA8, ' ',
|
|
||||||
'b', 'e', 'l', 'l', 'o',
|
|
||||||
0x00
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* UTF16 for "papà è bello".
|
|
||||||
*/
|
|
||||||
unsigned char test_string_utf16[] = {
|
|
||||||
'p', 0x00, 'a', 0x00, 'p', 0x00, 0xE0, 0x00, ' ', 0x00,
|
|
||||||
0xE8, 0x00, ' ', 0x00,
|
|
||||||
'b', 0x00, 'e', 0x00, 'l', 0x00, 'l', 0x00, 'o', 0x00,
|
|
||||||
0x00, 0x00
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ISO-8859-1 for "papà è bello".
|
|
||||||
*/
|
|
||||||
unsigned char test_string_iso8859_1[] = {
|
|
||||||
'p', 'a', 'p', 0xE0, ' ',
|
|
||||||
0xE8, ' ',
|
|
||||||
'b', 'e', 'l', 'l', 'o',
|
|
||||||
0x00
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CP1252 for "papà è bello".
|
|
||||||
*/
|
|
||||||
unsigned char test_string_cp1252[] = {
|
|
||||||
'p', 'a', 'p', 0xE0, ' ',
|
|
||||||
0xE8, ' ',
|
|
||||||
'b', 'e', 'l', 'l', 'o',
|
|
||||||
0x00
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that conversion between character sets using the given guac_iconv_read
|
* Tests that conversion between character sets using the given guac_iconv_read
|
||||||
@ -69,25 +31,20 @@ unsigned char test_string_cp1252[] = {
|
|||||||
* The guac_iconv_read implementation to use to read the input string.
|
* The guac_iconv_read implementation to use to read the input string.
|
||||||
*
|
*
|
||||||
* @param in_string
|
* @param in_string
|
||||||
* A pointer to the beginning of the input string.
|
* A pointer to the test_string structure describing the input string being
|
||||||
*
|
* tested.
|
||||||
* @param in_length
|
|
||||||
* The size of the input string in bytes.
|
|
||||||
*
|
*
|
||||||
* @param writer
|
* @param writer
|
||||||
* The guac_iconv_write implementation to use to write the output string
|
* The guac_iconv_write implementation to use to write the output string
|
||||||
* (the converted input string).
|
* (the converted input string).
|
||||||
*
|
*
|
||||||
* @param out_string
|
* @param out_string
|
||||||
* A pointer to the beginning of a string which contains the expected
|
* A pointer to the test_string structure describing the expected result of
|
||||||
* result of the conversion.
|
* the conversion.
|
||||||
*
|
|
||||||
* @param out_length
|
|
||||||
* The size of the expected result in bytes.
|
|
||||||
*/
|
*/
|
||||||
static void verify_conversion(
|
static void verify_conversion(
|
||||||
guac_iconv_read* reader, unsigned char* in_string, int in_length,
|
guac_iconv_read* reader, test_string* in_string,
|
||||||
guac_iconv_write* writer, unsigned char* out_string, int out_length) {
|
guac_iconv_write* writer, test_string* out_string) {
|
||||||
|
|
||||||
char output[4096];
|
char output[4096];
|
||||||
char input[4096];
|
char input[4096];
|
||||||
@ -95,91 +52,78 @@ static void verify_conversion(
|
|||||||
const char* current_input = input;
|
const char* current_input = input;
|
||||||
char* current_output = output;
|
char* current_output = output;
|
||||||
|
|
||||||
memcpy(input, in_string, in_length);
|
memcpy(input, in_string->buffer, in_string->size);
|
||||||
guac_iconv(reader, ¤t_input, sizeof(input),
|
guac_iconv(reader, ¤t_input, sizeof(input),
|
||||||
writer, ¤t_output, sizeof(output));
|
writer, ¤t_output, sizeof(output));
|
||||||
|
|
||||||
/* Verify output length */
|
/* Verify output length */
|
||||||
CU_ASSERT_EQUAL(out_length, current_output - output);
|
CU_ASSERT_EQUAL(out_string->size, current_output - output);
|
||||||
|
|
||||||
/* Verify entire input read */
|
/* Verify entire input read */
|
||||||
CU_ASSERT_EQUAL(in_length, current_input - input);
|
CU_ASSERT_EQUAL(in_string->size, current_input - input);
|
||||||
|
|
||||||
/* Verify output content */
|
/* Verify output content */
|
||||||
CU_ASSERT_EQUAL(0, memcmp(output, out_string, out_length));
|
CU_ASSERT_EQUAL(0, memcmp(output, out_string->buffer, out_string->size));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests which verifies conversion of UTF-8 to itself.
|
* Test which verifies that every supported encoding can be correctly converted
|
||||||
|
* to every other supported encoding, with all line endings preserved verbatim
|
||||||
|
* (not normalized).
|
||||||
*/
|
*/
|
||||||
void test_iconv__utf8_to_utf8() {
|
void test_iconv__preserve() {
|
||||||
verify_conversion(
|
for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) {
|
||||||
GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8),
|
for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) {
|
||||||
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
|
|
||||||
|
encoding_test_parameters* from = &test_params[i];
|
||||||
|
encoding_test_parameters* to = &test_params[j];
|
||||||
|
|
||||||
|
printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name);
|
||||||
|
verify_conversion(from->reader, &from->test_mixed,
|
||||||
|
to->writer, &to->test_mixed);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests which verifies conversion of UTF-16 to UTF-8.
|
* Test which verifies that every supported encoding can be correctly converted
|
||||||
|
* to every other supported encoding, normalizing all line endings to
|
||||||
|
* Unix-style line endings.
|
||||||
*/
|
*/
|
||||||
void test_iconv__utf8_to_utf16() {
|
void test_iconv__normalize_unix() {
|
||||||
verify_conversion(
|
for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) {
|
||||||
GUAC_READ_UTF8, test_string_utf8, sizeof(test_string_utf8),
|
for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) {
|
||||||
GUAC_WRITE_UTF16, test_string_utf16, sizeof(test_string_utf16));
|
|
||||||
|
encoding_test_parameters* from = &test_params[i];
|
||||||
|
encoding_test_parameters* to = &test_params[j];
|
||||||
|
|
||||||
|
printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name);
|
||||||
|
verify_conversion(from->reader_normalized, &from->test_mixed,
|
||||||
|
to->writer, &to->test_unix);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests which verifies conversion of UTF-16 to itself.
|
* Test which verifies that every supported encoding can be correctly converted
|
||||||
|
* to every other supported encoding, normalizing all line endings to
|
||||||
|
* Windows-style line endings.
|
||||||
*/
|
*/
|
||||||
void test_iconv__utf16_to_utf16() {
|
void test_iconv__normalize_crlf() {
|
||||||
verify_conversion(
|
for (int i = 0; i < NUM_SUPPORTED_ENCODINGS; i++) {
|
||||||
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
|
for (int j = 0; j < NUM_SUPPORTED_ENCODINGS; j++) {
|
||||||
GUAC_WRITE_UTF16, test_string_utf16, sizeof(test_string_utf16));
|
|
||||||
}
|
encoding_test_parameters* from = &test_params[i];
|
||||||
|
encoding_test_parameters* to = &test_params[j];
|
||||||
/**
|
|
||||||
* Tests which verifies conversion of UTF-8 to UTF-16.
|
printf("# \"%s\" -> \"%s\" ...\n", from->name, to->name);
|
||||||
*/
|
verify_conversion(from->reader_normalized, &from->test_mixed,
|
||||||
void test_iconv__utf16_to_utf8() {
|
to->writer_crlf, &to->test_windows);
|
||||||
verify_conversion(
|
|
||||||
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
|
}
|
||||||
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests which verifies conversion of UTF-16 to ISO 8859-1.
|
|
||||||
*/
|
|
||||||
void test_iconv__utf16_to_iso8859_1() {
|
|
||||||
verify_conversion(
|
|
||||||
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
|
|
||||||
GUAC_WRITE_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests which verifies conversion of UTF-16 to CP1252.
|
|
||||||
*/
|
|
||||||
void test_iconv__utf16_to_cp1252() {
|
|
||||||
verify_conversion(
|
|
||||||
GUAC_READ_UTF16, test_string_utf16, sizeof(test_string_utf16),
|
|
||||||
GUAC_WRITE_CP1252, test_string_cp1252, sizeof(test_string_cp1252));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests which verifies conversion of CP1252 to UTF-8.
|
|
||||||
*/
|
|
||||||
void test_iconv__cp1252_to_utf8() {
|
|
||||||
verify_conversion(
|
|
||||||
GUAC_READ_CP1252, test_string_cp1252, sizeof(test_string_cp1252),
|
|
||||||
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests which verifies conversion of ISO 8859-1 to UTF-8.
|
|
||||||
*/
|
|
||||||
void test_iconv__iso8859_1_to_utf8() {
|
|
||||||
verify_conversion(
|
|
||||||
GUAC_READ_ISO8859_1, test_string_iso8859_1, sizeof(test_string_iso8859_1),
|
|
||||||
GUAC_WRITE_UTF8, test_string_utf8, sizeof(test_string_utf8));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user