| /* |
| * Copyright © 2011,2012 Google, Inc. |
| * |
| * This is part of HarfBuzz, a text shaping library. |
| * |
| * Permission is hereby granted, without written agreement and without |
| * license or royalty fees, to use, copy, modify, and distribute this |
| * software and its documentation for any purpose, provided that the |
| * above copyright notice and the following two paragraphs appear in |
| * all copies of this software. |
| * |
| * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| * |
| * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| * |
| * Google Author(s): Behdad Esfahbod |
| */ |
| |
| #ifndef HB_UTF_PRIVATE_HH |
| #define HB_UTF_PRIVATE_HH |
| |
| #include "hb-private.hh" |
| |
| |
| /* UTF-8 */ |
| |
| #define HB_UTF8_COMPUTE(Char, Mask, Len) \ |
| if (Char < 128) { Len = 1; Mask = 0x7f; } \ |
| else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ |
| else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ |
| else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ |
| else Len = 0; |
| |
| static inline const uint8_t * |
| hb_utf_next (const uint8_t *text, |
| const uint8_t *end, |
| hb_codepoint_t *unicode) |
| { |
| hb_codepoint_t c = *text, mask; |
| unsigned int len; |
| |
| /* TODO check for overlong sequences? */ |
| |
| HB_UTF8_COMPUTE (c, mask, len); |
| if (unlikely (!len || (unsigned int) (end - text) < len)) { |
| *unicode = -1; |
| return text + 1; |
| } else { |
| hb_codepoint_t result; |
| unsigned int i; |
| result = c & mask; |
| for (i = 1; i < len; i++) |
| { |
| if (unlikely ((text[i] & 0xc0) != 0x80)) |
| { |
| *unicode = -1; |
| return text + 1; |
| } |
| result <<= 6; |
| result |= (text[i] & 0x3f); |
| } |
| *unicode = result; |
| return text + len; |
| } |
| } |
| |
| static inline const uint8_t * |
| hb_utf_prev (const uint8_t *text, |
| const uint8_t *start, |
| hb_codepoint_t *unicode) |
| { |
| const uint8_t *end = text--; |
| while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
| text--; |
| |
| hb_codepoint_t c = *text, mask; |
| unsigned int len; |
| |
| /* TODO check for overlong sequences? */ |
| |
| HB_UTF8_COMPUTE (c, mask, len); |
| if (unlikely (!len || (unsigned int) (end - text) != len)) { |
| *unicode = -1; |
| return end - 1; |
| } else { |
| hb_codepoint_t result; |
| unsigned int i; |
| result = c & mask; |
| for (i = 1; i < len; i++) |
| { |
| result <<= 6; |
| result |= (text[i] & 0x3f); |
| } |
| *unicode = result; |
| return text; |
| } |
| } |
| |
| |
| static inline unsigned int |
| hb_utf_strlen (const uint8_t *text) |
| { |
| return strlen ((const char *) text); |
| } |
| |
| |
| /* UTF-16 */ |
| |
| static inline const uint16_t * |
| hb_utf_next (const uint16_t *text, |
| const uint16_t *end, |
| hb_codepoint_t *unicode) |
| { |
| hb_codepoint_t c = *text++; |
| |
| if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff))) |
| { |
| /* high surrogate */ |
| hb_codepoint_t l; |
| if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff)))) |
| { |
| /* low surrogate */ |
| *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); |
| text++; |
| } else |
| *unicode = -1; |
| } else |
| *unicode = c; |
| |
| return text; |
| } |
| |
| static inline const uint16_t * |
| hb_utf_prev (const uint16_t *text, |
| const uint16_t *start, |
| hb_codepoint_t *unicode) |
| { |
| hb_codepoint_t c = *--text; |
| |
| if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) |
| { |
| /* low surrogate */ |
| hb_codepoint_t h; |
| if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) |
| { |
| /* high surrogate */ |
| *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); |
| text--; |
| } else |
| *unicode = -1; |
| } else |
| *unicode = c; |
| |
| return text; |
| } |
| |
| |
| static inline unsigned int |
| hb_utf_strlen (const uint16_t *text) |
| { |
| unsigned int l = 0; |
| while (*text++) l++; |
| return l; |
| } |
| |
| |
| /* UTF-32 */ |
| |
| static inline const uint32_t * |
| hb_utf_next (const uint32_t *text, |
| const uint32_t *end HB_UNUSED, |
| hb_codepoint_t *unicode) |
| { |
| *unicode = *text++; |
| return text; |
| } |
| |
| static inline const uint32_t * |
| hb_utf_prev (const uint32_t *text, |
| const uint32_t *start HB_UNUSED, |
| hb_codepoint_t *unicode) |
| { |
| *unicode = *--text; |
| return text; |
| } |
| |
| static inline unsigned int |
| hb_utf_strlen (const uint32_t *text) |
| { |
| unsigned int l = 0; |
| while (*text++) l++; |
| return l; |
| } |
| |
| |
| #endif /* HB_UTF_PRIVATE_HH */ |