Asterisk - The Open Source Telephony Project  18.5.0
utf8.h
Go to the documentation of this file.
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2020, Sean Bright
5  *
6  * Sean Bright <[email protected]>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18 
19 /*! \file
20  *
21  * \brief UTF-8 information and validation functions
22  */
23 
24 #ifndef ASTERISK_UTF8_H
25 #define ASTERISK_UTF8_H
26 
27 /*!
28  * \brief Check if a zero-terminated string is valid UTF-8
29  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
30  *
31  * \param str The zero-terminated string to check
32  *
33  * \retval 0 if the string is not valid UTF-8
34  * \retval Non-zero if the string is valid UTF-8
35  */
36 int ast_utf8_is_valid(const char *str);
37 
38 /*!
39  * \brief Check if the first \a size bytes of a string are valid UTF-8
40  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
41  *
42  * Similar to \a ast_utf8_is_valid() but checks the first \a size bytes or until
43  * a zero byte is reached, whichever comes first.
44  *
45  * \param str The string to check
46  * \param size The number of bytes to evaluate
47  *
48  * \retval 0 if the string is not valid UTF-8
49  * \retval Non-zero if the string is valid UTF-8
50  */
51 int ast_utf8_is_validn(const char *str, size_t size);
52 
53 /*!
54  * \brief Copy a string safely ensuring valid UTF-8
55  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
56  *
57  * This is similar to \a ast_copy_string, but it will only copy valid UTF-8
58  * sequences from the source string into the destination buffer. If an invalid
59  * UTF-8 sequence is encountered, or the available space in the destination
60  * buffer is exhausted in the middle of an otherwise valid UTF-8 sequence, the
61  * destination buffer will be truncated to ensure that it only contains valid
62  * UTF-8.
63  *
64  * \param dst The destination buffer.
65  * \param src The source string
66  * \param size The size of the destination buffer
67  * \return Nothing.
68  */
69 void ast_utf8_copy_string(char *dst, const char *src, size_t size);
70 
72  /*! \brief The consumed sequence is valid UTF-8
73  *
74  * The bytes consumed thus far by the validator represent a valid sequence of
75  * UTF-8 bytes. If additional bytes are fed into the validator, it can
76  * transition into either \a AST_UTF8_INVALID or \a AST_UTF8_UNKNOWN
77  */
79 
80  /*! \brief The consumed sequence is invalid UTF-8
81  *
82  * The bytes consumed thus far by the validator represent an invalid sequence
83  * of UTF-8 bytes. Feeding additional bytes into the validator will not
84  * change its state.
85  */
87 
88  /*! \brief The validator is in an intermediate state
89  *
90  * The validator is in the process of validating a multibyte UTF-8 sequence
91  * and requires additional data to be fed into it to determine validity. If
92  * additional bytes are fed into the validator, it can transition into either
93  * \a AST_UTF8_VALID or \a AST_UTF8_INVALID. If you have no additional data
94  * to feed into the validator the UTF-8 sequence is invalid.
95  */
97 };
98 
99 /*!
100  * \brief Opaque type for UTF-8 validator state.
101  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
102  */
103 struct ast_utf8_validator;
104 
105 /*!
106  * \brief Create a new UTF-8 validator
107  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
108  *
109  * \param[out] validator The validator instance
110  *
111  * \retval 0 on success
112  * \retval -1 on failure
113  */
114 int ast_utf8_validator_new(struct ast_utf8_validator **validator);
115 
116 /*!
117  * \brief Feed a zero-terminated string into the UTF-8 validator
118  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
119  *
120  * \param validator The validator instance
121  * \param data The zero-terminated string to feed into the validator
122  *
123  * \return The \ref ast_utf8_validation_result indicating the current state of
124  * the validator.
125  */
127  struct ast_utf8_validator *validator, const char *data);
128 
129 /*!
130  * \brief Feed a string into the UTF-8 validator
131  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
132  *
133  * Similar to \a ast_utf8_validator_feed but will stop feeding in data if a zero
134  * byte is encountered or \a size bytes have been read.
135  *
136  * \param validator The validator instance
137  * \param data The string to feed into the validator
138  * \param size The number of bytes to feed into the validator
139  *
140  * \return The \ref ast_utf8_validation_result indicating the current state of
141  * the validator.
142  */
144  struct ast_utf8_validator *validator, const char *data, size_t size);
145 
146 /*!
147  * \brief Get the current UTF-8 validator state
148  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
149  *
150  * \param validator The validator instance
151  *
152  * \return The \ref ast_utf8_validation_result indicating the current state of
153  * the validator.
154  */
156  struct ast_utf8_validator *validator);
157 
158 /*!
159  * \brief Reset the state of a UTF-8 validator
160  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
161  *
162  * Resets the provided UTF-8 validator to its initial state so that it can be
163  * reused.
164  *
165  * \param validator The validator instance to reset
166  */
168  struct ast_utf8_validator *validator);
169 
170 /*!
171  * \brief Destroy a UTF-8 validator
172  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
173  *
174  * \param validator The validator instance to destroy
175  */
176 void ast_utf8_validator_destroy(struct ast_utf8_validator *validator);
177 
178 /*!
179  * \brief Register UTF-8 tests
180  * \since 13.36.0, 16.13.0, 17.7.0, 18.0.0
181  *
182  * Does nothing unless TEST_FRAMEWORK is defined.
183  *
184  * \return Always returns 0
185  */
186 int ast_utf8_init(void);
187 
188 #endif /* ASTERISK_UTF8_H */
enum ast_utf8_validation_result ast_utf8_validator_feed(struct ast_utf8_validator *validator, const char *data)
Feed a zero-terminated string into the UTF-8 validator.
Definition: utf8.c:189
The consumed sequence is invalid UTF-8.
Definition: utf8.h:86
void ast_utf8_validator_destroy(struct ast_utf8_validator *validator)
Destroy a UTF-8 validator.
Definition: utf8.c:215
enum ast_utf8_validation_result ast_utf8_validator_state(struct ast_utf8_validator *validator)
Get the current UTF-8 validator state.
Definition: utf8.c:176
int ast_utf8_is_valid(const char *str)
Check if a zero-terminated string is valid UTF-8.
Definition: utf8.c:110
int ast_utf8_validator_new(struct ast_utf8_validator **validator)
Create a new UTF-8 validator.
Definition: utf8.c:163
const char * str
Definition: app_jack.c:147
int ast_utf8_init(void)
Register UTF-8 tests.
Definition: utf8.c:362
void ast_utf8_validator_reset(struct ast_utf8_validator *validator)
Reset the state of a UTF-8 validator.
Definition: utf8.c:210
The consumed sequence is valid UTF-8.
Definition: utf8.h:78
ast_utf8_validation_result
Definition: utf8.h:71
int ast_utf8_is_validn(const char *str, size_t size)
Check if the first size bytes of a string are valid UTF-8.
Definition: utf8.c:121
The validator is in an intermediate state.
Definition: utf8.h:96
enum ast_utf8_validation_result ast_utf8_validator_feedn(struct ast_utf8_validator *validator, const char *data, size_t size)
Feed a string into the UTF-8 validator.
Definition: utf8.c:199
void ast_utf8_copy_string(char *dst, const char *src, size_t size)
Copy a string safely ensuring valid UTF-8.
Definition: utf8.c:133