307 lines
8.0 KiB
C
307 lines
8.0 KiB
C
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. The ASF licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "common/iconv.h"
|
|
|
|
#include <guacamole/unicode.h>
|
|
#include <stdint.h>
|
|
|
|
/**
|
|
* Lookup table for Unicode code points, indexed by CP-1252 codepoint.
|
|
*/
|
|
const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
|
|
0x20AC, /* 0x80 */
|
|
0xFFFD, /* 0x81 */
|
|
0x201A, /* 0x82 */
|
|
0x0192, /* 0x83 */
|
|
0x201E, /* 0x84 */
|
|
0x2026, /* 0x85 */
|
|
0x2020, /* 0x86 */
|
|
0x2021, /* 0x87 */
|
|
0x02C6, /* 0x88 */
|
|
0x2030, /* 0x89 */
|
|
0x0160, /* 0x8A */
|
|
0x2039, /* 0x8B */
|
|
0x0152, /* 0x8C */
|
|
0xFFFD, /* 0x8D */
|
|
0x017D, /* 0x8E */
|
|
0xFFFD, /* 0x8F */
|
|
0xFFFD, /* 0x90 */
|
|
0x2018, /* 0x91 */
|
|
0x2019, /* 0x92 */
|
|
0x201C, /* 0x93 */
|
|
0x201D, /* 0x94 */
|
|
0x2022, /* 0x95 */
|
|
0x2013, /* 0x96 */
|
|
0x2014, /* 0x97 */
|
|
0x02DC, /* 0x98 */
|
|
0x2122, /* 0x99 */
|
|
0x0161, /* 0x9A */
|
|
0x203A, /* 0x9B */
|
|
0x0153, /* 0x9C */
|
|
0xFFFD, /* 0x9D */
|
|
0x017E, /* 0x9E */
|
|
0x0178, /* 0x9F */
|
|
};
|
|
|
|
int guac_iconv(guac_iconv_read* reader, const char** input, int in_remaining,
|
|
guac_iconv_write* writer, char** output, int out_remaining) {
|
|
|
|
while (in_remaining > 0 && out_remaining > 0) {
|
|
|
|
int value;
|
|
const char* read_start;
|
|
char* write_start;
|
|
|
|
/* Read character */
|
|
read_start = *input;
|
|
value = reader(input, in_remaining);
|
|
in_remaining -= *input - read_start;
|
|
|
|
/* Write character */
|
|
write_start = *output;
|
|
writer(output, out_remaining, value);
|
|
out_remaining -= *output - write_start;
|
|
|
|
/* Stop if null terminator reached */
|
|
if (value == 0)
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* Null terminator not reached */
|
|
return 0;
|
|
|
|
}
|
|
|
|
int GUAC_READ_UTF8(const char** input, int remaining) {
|
|
|
|
int value;
|
|
|
|
*input += guac_utf8_read(*input, remaining, &value);
|
|
return value;
|
|
|
|
}
|
|
|
|
int GUAC_READ_UTF16(const char** input, int remaining) {
|
|
|
|
int value;
|
|
|
|
/* Bail if not enough data */
|
|
if (remaining < 2)
|
|
return 0;
|
|
|
|
/* Read two bytes as integer */
|
|
value = *((uint16_t*) *input);
|
|
*input += 2;
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
int GUAC_READ_CP1252(const char** input, int remaining) {
|
|
|
|
int value = *((unsigned char*) *input);
|
|
|
|
/* Replace value with exception if not identical to ISO-8859-1 */
|
|
if (value >= 0x80 && value <= 0x9F)
|
|
value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80];
|
|
|
|
(*input)++;
|
|
return value;
|
|
|
|
}
|
|
|
|
int GUAC_READ_ISO8859_1(const char** input, int remaining) {
|
|
|
|
int value = *((unsigned char*) *input);
|
|
|
|
(*input)++;
|
|
return value;
|
|
|
|
}
|
|
|
|
/**
|
|
* Invokes the given reader function, automatically normalizing newline
|
|
* sequences as Unix-style newline characters ('\n'). All other charaters are
|
|
* read verbatim.
|
|
*
|
|
* @param reader
|
|
* The reader to use to read the given character.
|
|
*
|
|
* @param input
|
|
* Pointer to the location within the input buffer that the next character
|
|
* should be read from.
|
|
*
|
|
* @param remaining
|
|
* The number of bytes remaining in the input buffer.
|
|
*
|
|
* @return
|
|
* The codepoint that was read, or zero if the end of the input string has
|
|
* been reached.
|
|
*/
|
|
static int guac_iconv_read_normalized(guac_iconv_read* reader,
|
|
const char** input, int remaining) {
|
|
|
|
/* Read requested character */
|
|
const char* input_start = *input;
|
|
int value = reader(input, remaining);
|
|
|
|
/* Automatically translate CRLF pairs to simple newlines */
|
|
if (value == '\r') {
|
|
|
|
/* Peek ahead by one character, adjusting remaining bytes relative to
|
|
* last read */
|
|
int peek_remaining = remaining - (*input - input_start);
|
|
const char* peek_input = *input;
|
|
int peek_value = reader(&peek_input, peek_remaining);
|
|
|
|
/* Consider read value to be a newline if we have encountered a "\r\n"
|
|
* (CRLF) pair */
|
|
if (peek_value == '\n') {
|
|
value = '\n';
|
|
*input = peek_input;
|
|
}
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) {
|
|
return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining);
|
|
}
|
|
|
|
int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) {
|
|
return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining);
|
|
}
|
|
|
|
int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) {
|
|
return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining);
|
|
}
|
|
|
|
int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) {
|
|
return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining);
|
|
}
|
|
|
|
void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
|
|
*output += guac_utf8_write(value, *output, remaining);
|
|
}
|
|
|
|
void GUAC_WRITE_UTF16(char** output, int remaining, int value) {
|
|
|
|
/* Bail if not enough data */
|
|
if (remaining < 2)
|
|
return;
|
|
|
|
/* Write two bytes as integer */
|
|
*((uint16_t*) *output) = value;
|
|
*output += 2;
|
|
|
|
}
|
|
|
|
void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
|
|
|
|
/* If not in ISO-8859-1 part of CP1252, check lookup table */
|
|
if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) {
|
|
|
|
int i;
|
|
int replacement_value = '?';
|
|
const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT;
|
|
|
|
/* Search lookup table for value */
|
|
for (i=0x80; i<=0x9F; i++, codepoint++) {
|
|
if (*codepoint == value) {
|
|
replacement_value = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Replace value with discovered value (or question mark) */
|
|
value = replacement_value;
|
|
|
|
}
|
|
|
|
*((unsigned char*) *output) = (unsigned char) value;
|
|
(*output)++;
|
|
}
|
|
|
|
void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) {
|
|
|
|
/* Translate to question mark if out of range */
|
|
if (value > 0xFF)
|
|
value = '?';
|
|
|
|
*((unsigned char*) *output) = (unsigned char) value;
|
|
(*output)++;
|
|
}
|
|
|
|
/**
|
|
* Invokes the given writer function, automatically writing newline characters
|
|
* ('\n') as CRLF ("\r\n"). All other charaters are written verbatim.
|
|
*
|
|
* @param writer
|
|
* The writer to use to write the given character.
|
|
*
|
|
* @param output
|
|
* Pointer to the location within the output buffer that the next character
|
|
* should be written.
|
|
*
|
|
* @param remaining
|
|
* The number of bytes remaining in the output buffer.
|
|
*
|
|
* @param value
|
|
* The codepoint of the character to write.
|
|
*/
|
|
static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output,
|
|
int remaining, int value) {
|
|
|
|
if (value != '\n') {
|
|
writer(output, remaining, value);
|
|
return;
|
|
}
|
|
|
|
char* output_start = *output;
|
|
writer(output, remaining, '\r');
|
|
|
|
remaining -= *output - output_start;
|
|
if (remaining > 0)
|
|
writer(output, remaining, '\n');
|
|
|
|
}
|
|
|
|
void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) {
|
|
guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value);
|
|
}
|
|
|
|
void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) {
|
|
guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value);
|
|
}
|
|
|
|
void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) {
|
|
guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value);
|
|
}
|
|
|
|
void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) {
|
|
guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value);
|
|
}
|
|
|