Wireshark 4.5.0
The Wireshark network protocol analyzer
Loading...
Searching...
No Matches
charsets.h
Go to the documentation of this file.
1
10#ifndef __CHARSETS_H__
11#define __CHARSETS_H__
12
13#include "ws_symbol_export.h"
14
15#ifdef __cplusplus
16extern "C" {
17#endif /* __cplusplus */
18
19/*
20 * Translation tables that map the upper 128 code points in single-byte
21 * "extended ASCII" character encodings to Unicode code points in the
22 * Basic Multilingual Plane.
23 */
24
25/* Table for windows-1250 */
26extern const gunichar2 charset_table_cp1250[0x80];
27/* Table for windows-1251 */
28extern const gunichar2 charset_table_cp1251[0x80];
29/* Table for windows-1252 */
30extern const gunichar2 charset_table_cp1252[0x80];
31
32/* Tables for ISO-8859-X */
33extern const gunichar2 charset_table_iso_8859_2[0x80];
34extern const gunichar2 charset_table_iso_8859_3[0x80];
35extern const gunichar2 charset_table_iso_8859_4[0x80];
36extern const gunichar2 charset_table_iso_8859_5[0x80];
37extern const gunichar2 charset_table_iso_8859_6[0x80];
38extern const gunichar2 charset_table_iso_8859_7[0x80];
39extern const gunichar2 charset_table_iso_8859_8[0x80];
40extern const gunichar2 charset_table_iso_8859_9[0x80];
41extern const gunichar2 charset_table_iso_8859_10[0x80];
42extern const gunichar2 charset_table_iso_8859_11[0x80];
43extern const gunichar2 charset_table_iso_8859_13[0x80];
44extern const gunichar2 charset_table_iso_8859_14[0x80];
45extern const gunichar2 charset_table_iso_8859_15[0x80];
46extern const gunichar2 charset_table_iso_8859_16[0x80];
47
48/* Tables for Mac character sets */
49extern const gunichar2 charset_table_mac_roman[0x80];
50
51/* Tables for DOS code pages */
52extern const gunichar2 charset_table_cp437[0x80];
53extern const gunichar2 charset_table_cp855[0x80];
54extern const gunichar2 charset_table_cp866[0x80];
55
56/*
57 * Translation tables that map the lower 128 code points in single-byte
58 * ISO 646-based character encodings to Unicode code points in the
59 * Basic Multilingual Plane.
60 */
61extern const gunichar2 charset_table_iso_646_basic[0x80];
62
63/* Tables for EBCDIC code pages */
64extern const gunichar2 charset_table_ebcdic[256];
65extern const gunichar2 charset_table_ebcdic_cp037[256];
66extern const gunichar2 charset_table_ebcdic_cp500[256];
67
68/*
69 * Given a wmem scope, a pointer, and a length, treat the string of bytes
70 * referred to by the pointer and length as an ASCII string, with all bytes
71 * with the high-order bit set being invalid, and return a pointer to a
72 * UTF-8 string, allocated using the wmem scope.
73 *
74 * Octets with the highest bit set will be converted to the Unicode
75 * REPLACEMENT CHARACTER.
76 */
77WS_DLL_PUBLIC uint8_t *
78get_ascii_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
79
80/*
81 * Given a wmem scope, a pointer, and a length, treat the string of bytes
82 * referred to by the pointer and length as a UTF-8 string, and return a
83 * pointer to a UTF-8 string, allocated using the wmem scope, with all
84 * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER
85 * according to the recommended "best practices" given in the Unicode
86 * Standard and specified by W3C/WHATWG.
87 */
88WS_DLL_PUBLIC uint8_t *
89get_utf_8_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
90
91/*
92 * Given a wmem scope, a pointer, a length, and a translation table,
93 * treat the string of bytes referred to by the pointer and length as a
94 * string encoded using one octet per character, with octets with the
95 * high-order bit clear being mapped by the translation table to 2-byte
96 * Unicode Basic Multilingual Plane characters (including REPLACEMENT
97 * CHARACTER) and octets with the high-order bit set being mapped to
98 * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string,
99 * allocated using the wmem scope.
100 */
101WS_DLL_PUBLIC uint8_t *
102get_iso_646_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, const gunichar2 table[0x80]);
103
104/*
105 * Given a wmem scope, a pointer, and a length, treat the string of bytes
106 * referred to by the pointer and length as an ISO 8859/1 string, and
107 * return a pointer to a UTF-8 string, allocated using the wmem scope.
108 */
109WS_DLL_PUBLIC uint8_t *
110get_8859_1_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
111
112/*
113 * Given a wmem scope, a pointer, a length, and a translation table with
114 * 128 entries, treat the string of bytes referred to by the pointer and
115 * length as a string encoded using one octet per character, with octets
116 * with the high-order bit clear being ASCII and octets with the high-order
117 * bit set being mapped by the translation table to 2-byte Unicode Basic
118 * Multilingual Plane characters (including REPLACEMENT CHARACTER), and
119 * return a pointer to a UTF-8 string, allocated using the wmem scope.
120 */
121WS_DLL_PUBLIC uint8_t *
122get_unichar2_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, const gunichar2 table[0x80]);
123
124/*
125 * Given a wmem scope, a pointer, and a length, treat the string of bytes
126 * referred to by the pointer and length as a UCS-2 encoded string
127 * containing characters from the Basic Multilingual Plane (plane 0) of
128 * Unicode, and return a pointer to a UTF-8 string, allocated with the
129 * wmem scope.
130 *
131 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
132 * possibly ORed with ENC_BOM.
133 *
134 * Specify length in bytes.
135 */
136WS_DLL_PUBLIC uint8_t *
137get_ucs_2_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, unsigned encoding);
138
139/*
140 * Given a wmem scope, a pointer, and a length, treat the string of bytes
141 * referred to by the pointer and length as a UTF-16 encoded string, and
142 * return a pointer to a UTF-8 string, allocated with the wmem scope.
143 *
144 * See RFC 2781 section 2.2.
145 *
146 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
147 * possibly ORed with ENC_BOM.
148 *
149 * Specify length in bytes.
150 */
151WS_DLL_PUBLIC uint8_t *
152get_utf_16_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, unsigned encoding);
153
154/*
155 * Given a wmem scope, a pointer, and a length, treat the string of bytes
156 * referred to by the pointer and length as a UCS-4 encoded string, and
157 * return a pointer to a UTF-8 string, allocated with the wmem scope.
158 *
159 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
160 * possibly ORed with ENC_BOM.
161 *
162 * Specify length in bytes.
163 */
164WS_DLL_PUBLIC uint8_t *
165get_ucs_4_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, unsigned encoding);
166
167WS_DLL_PUBLIC uint8_t *
168get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const uint8_t *ptr,
169 const int bit_offset, int no_of_chars);
170
171WS_DLL_PUBLIC uint8_t *
172get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const uint8_t *ptr,
173 int length);
174
175WS_DLL_PUBLIC uint8_t *
176get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const uint8_t *ptr,
177 int length);
178
179WS_DLL_PUBLIC uint8_t *
180get_ascii_7bits_string(wmem_allocator_t *scope, const uint8_t *ptr,
181 const int bit_offset, int no_of_chars);
182
183/*
184 * Given a wmem scope, a pointer, a length, and a translation table with
185 * 256 entries, treat the string of bytes referred to by the pointer and
186 * length as a string encoded using one octet per character, with octets
187 * being mapped by the translation table to 2-byte Unicode Basic Multilingual
188 * Plane characters (including REPLACEMENT CHARACTER), and return a
189 * pointer to a UTF-8 string, allocated using the wmem scope.
190 */
191WS_DLL_PUBLIC uint8_t *
192get_nonascii_unichar2_string(wmem_allocator_t *scope, const uint8_t *ptr, int length, const gunichar2 table[256]);
193
194/*
195 * Given a wmem scope, a pointer, and a length, treat the bytes referred to
196 * by the pointer and length as a GB18030 encoded string, and return a pointer
197 * to a UTF-8 string, allocated using the wmem scope, converted having
198 * substituted REPLACEMENT CHARACTER according to the Unicode Standard
199 * 5.22 U+FFFD Substitution for Conversion.
200 * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
201 *
202 * As expected, this will also decode GBK and GB2312 strings.
203 */
204WS_DLL_PUBLIC uint8_t *
205get_gb18030_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
206
207/*
208 * Given a wmem scope, a pointer, and a length, treat the bytes referred to
209 * by the pointer and length as a EUC-KR encoded string, and return a pointer
210 * to a UTF-8 string, allocated using the wmem scope, converted having
211 * substituted REPLACEMENT CHARACTER according to the Unicode Standard
212 * 5.22 U+FFFD Substitution for Conversion.
213 * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
214 */
215WS_DLL_PUBLIC uint8_t *
216get_euc_kr_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
217
218WS_DLL_PUBLIC uint8_t *
219get_t61_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
220
221WS_DLL_PUBLIC uint8_t *
222get_dect_standard_8bits_string(wmem_allocator_t *scope, const uint8_t *ptr, int length);
223#ifdef __cplusplus
224}
225#endif /* __cplusplus */
226
227#endif /* __CHARSETS_H__ */
228
229/*
230 * Editor modelines - https://www.wireshark.org/tools/modelines.html
231 *
232 * Local variables:
233 * c-basic-offset: 4
234 * tab-width: 8
235 * indent-tabs-mode: nil
236 * End:
237 *
238 * vi: set shiftwidth=4 tabstop=8 expandtab:
239 * :indentSize=4:tabSize=8:noTabs=true:
240 */
Definition wmem_allocator.h:27