Wireshark 4.5.0
The Wireshark network protocol analyzer
Loading...
Searching...
No Matches
unicode-utils.h
Go to the documentation of this file.
1/* unicode-utils.h
2 * Unicode utility definitions
3 *
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <[email protected]>
6 * Copyright 2006 Gerald Combs
7 *
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
10
11#ifndef __UNICODEUTIL_H__
12#define __UNICODEUTIL_H__
13
14#include <wireshark.h>
15
16#ifdef _WIN32
17#include <windows.h>
18#include <tchar.h>
19#include <wchar.h>
20#endif
21
27#ifdef __cplusplus
28extern "C" {
29#endif
30
31#ifdef WS_DEBUG_UTF_8
32#define DEBUG_UTF_8_ENABLED true
33#else
34#define DEBUG_UTF_8_ENABLED false
35#endif
36
37#define _CHECK_UTF_8(level, str, len) \
38 do { \
39 const char *__uni_endptr; \
40 if (DEBUG_UTF_8_ENABLED && (str) != NULL && \
41 !g_utf8_validate(str, len, &__uni_endptr)) { \
42 ws_log_utf8(str, len, __uni_endptr); \
43 } \
44 } while (0)
45
46#define WS_UTF_8_CHECK(str, len) \
47 _CHECK_UTF_8(LOG_LEVEL_DEBUG, str, len)
48
49#define WS_UTF_8_DEBUG_HERE(str, len) \
50 _CHECK_UTF_8(LOG_LEVEL_ECHO, str, len)
51
52WSUTIL_EXPORT
53const int ws_utf8_seqlen[256];
54
60#define ws_utf8_char_len(ch) (ws_utf8_seqlen[(ch)])
61
62/*
63 * Given a wmem scope, a pointer, and a length, treat the string of bytes
64 * referred to by the pointer and length as a UTF-8 string, and return a
65 * pointer to a UTF-8 string, allocated using the wmem scope, with all
66 * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER
67 * according to the recommended "best practices" given in the Unicode
68 * Standard and specified by W3C/WHATWG.
69 */
70WS_DLL_PUBLIC uint8_t *
71ws_utf8_make_valid(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
72
73/*
74 * Same as ws_utf8_make_valid() but returns a wmem_strbuf_t.
75 */
76WS_DLL_PUBLIC wmem_strbuf_t *
77ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
78
79#ifdef _WIN32
80
88WS_DLL_PUBLIC
89const wchar_t * utf_8to16(const char *utf8str);
90
97WS_DLL_PUBLIC
98void utf_8to16_snprintf(TCHAR *utf16buf, int utf16buf_len, const char* fmt, ...)
99G_GNUC_PRINTF(3, 4);
100
108WS_DLL_PUBLIC
109char * utf_16to8(const wchar_t *utf16str);
110
118WS_DLL_PUBLIC
119char **arg_list_utf_16to8(int argc, wchar_t *wc_argv[]);
120
121#endif /* _WIN32 */
122
123/*
124 * defines for helping with UTF-16 surrogate pairs
125 */
126
127#define IS_LEAD_SURROGATE(uchar2) \
128 ((uchar2) >= 0xd800 && (uchar2) < 0xdc00)
129#define IS_TRAIL_SURROGATE(uchar2) \
130 ((uchar2) >= 0xdc00 && (uchar2) < 0xe000)
131#define SURROGATE_VALUE(lead, trail) \
132 (((((lead) - 0xd800) << 10) | ((trail) - 0xdc00)) + 0x10000)
133
134#ifdef __cplusplus
135}
136#endif
137
138#endif /* __UNICODEUTIL_H__ */
Definition wmem_allocator.h:27
Definition wmem_strbuf.h:42