9 #ifndef UTF8_VALIDATOR_HPP 10 #define UTF8_VALIDATOR_HPP 16 static const unsigned int UTF8_ACCEPT = 0;
17 static const unsigned int UTF8_REJECT = 1;
19 static const uint8_t utf8d[] = {
20 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
21 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
23 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
24 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
25 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
26 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
27 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3,
28 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,
29 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1,
30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,
31 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,
32 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1,
33 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
37 decode(uint32_t* state, uint32_t* codep, uint8_t byte) {
38 uint32_t type = utf8d[byte];
40 *codep = (*state != UTF8_ACCEPT) ?
41 (byte & 0x3fu) | (*codep << 6) :
42 (0xff >> type) & (byte);
44 *state = utf8d[256 + *state*16 + type];
54 validator() : m_state(UTF8_ACCEPT),m_codepoint(0) {}
62 if (utf8_validator::decode(&m_state,&m_codepoint,byte) == UTF8_REJECT) {
68 template <
typename iterator_type>
75 bool decode (iterator_type b, iterator_type e) {
76 for (iterator_type i = b; i != e; i++) {
77 if (utf8_validator::decode(&m_state,&m_codepoint,*i) == UTF8_REJECT) {
89 return m_state == UTF8_ACCEPT;
94 m_state = UTF8_ACCEPT;
105 inline bool validate(
const std::string& s) {
107 if (!v.
decode(s.begin(),s.end())) {
115 #endif // UTF8_VALIDATOR_HPP A validator for UTF-8 strings.
void reset()
Reset the validator state.
bool consume(uint32_t byte)
Validate a single UTF-8 character.
bool decode(iterator_type b, iterator_type e)
Validate a range of UTF-8 characters.
bool complete()
Retrieve validator state.