| /* |
| * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) |
| * |
| * This is part of HarfBuzz, an OpenType Layout engine library. |
| * |
| * Permission is hereby granted, without written agreement and without |
| * license or royalty fees, to use, copy, modify, and distribute this |
| * software and its documentation for any purpose, provided that the |
| * above copyright notice and the following two paragraphs appear in |
| * all copies of this software. |
| * |
| * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| * |
| * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| */ |
| |
| #include "harfbuzz-shaper.h" |
| #include "harfbuzz-shaper-private.h" |
| |
| #include <assert.h> |
| #include <stdio.h> |
| |
| #define FLAG(x) (1 << (x)) |
| |
| static HB_Bool isLetter(HB_UChar16 ucs) |
| { |
| const int test = FLAG(HB_Letter_Uppercase) | |
| FLAG(HB_Letter_Lowercase) | |
| FLAG(HB_Letter_Titlecase) | |
| FLAG(HB_Letter_Modifier) | |
| FLAG(HB_Letter_Other); |
| return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test); |
| } |
| |
| static HB_Bool isMark(HB_UChar16 ucs) |
| { |
| const int test = FLAG(HB_Mark_NonSpacing) | |
| FLAG(HB_Mark_SpacingCombining) | |
| FLAG(HB_Mark_Enclosing); |
| return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test); |
| } |
| |
| enum Form { |
| Invalid = 0x0, |
| UnknownForm = Invalid, |
| Consonant, |
| Nukta, |
| Halant, |
| Matra, |
| VowelMark, |
| StressMark, |
| IndependentVowel, |
| LengthMark, |
| Control, |
| Other |
| }; |
| |
| static const unsigned char indicForms[0xe00-0x900] = { |
| // Devangari |
| Invalid, VowelMark, VowelMark, VowelMark, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Matra, Matra, Matra, |
| Matra, Matra, Matra, Matra, |
| Matra, Halant, UnknownForm, UnknownForm, |
| |
| Other, StressMark, StressMark, StressMark, |
| StressMark, UnknownForm, UnknownForm, UnknownForm, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| IndependentVowel, IndependentVowel, VowelMark, VowelMark, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Consonant, |
| Consonant, Consonant /* ??? */, Consonant, Consonant, |
| |
| // Bengali |
| Invalid, VowelMark, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Invalid, Invalid, IndependentVowel, |
| |
| IndependentVowel, Invalid, Invalid, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Invalid, Consonant, Invalid, |
| Invalid, Invalid, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Invalid, Invalid, Matra, |
| Matra, Invalid, Invalid, Matra, |
| Matra, Halant, Consonant, UnknownForm, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, VowelMark, |
| Invalid, Invalid, Invalid, Invalid, |
| Consonant, Consonant, Invalid, Consonant, |
| |
| IndependentVowel, IndependentVowel, VowelMark, VowelMark, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Consonant, Consonant, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Gurmukhi |
| Invalid, VowelMark, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, Invalid, |
| Invalid, Invalid, Invalid, IndependentVowel, |
| |
| IndependentVowel, Invalid, Invalid, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Invalid, Consonant, Consonant, |
| Invalid, Consonant, Consonant, Invalid, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Invalid, |
| Invalid, Invalid, Invalid, Matra, |
| Matra, Invalid, Invalid, Matra, |
| Matra, Halant, UnknownForm, UnknownForm, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, UnknownForm, UnknownForm, UnknownForm, |
| Invalid, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Invalid, |
| |
| Other, Other, Invalid, Invalid, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| StressMark, StressMark, Consonant, Consonant, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Gujarati |
| Invalid, VowelMark, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, Invalid, IndependentVowel, |
| |
| IndependentVowel, IndependentVowel, Invalid, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Invalid, Consonant, Consonant, |
| Invalid, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Matra, Invalid, Matra, |
| Matra, Matra, Invalid, Matra, |
| Matra, Halant, UnknownForm, UnknownForm, |
| |
| Other, UnknownForm, UnknownForm, UnknownForm, |
| UnknownForm, UnknownForm, UnknownForm, UnknownForm, |
| UnknownForm, UnknownForm, UnknownForm, UnknownForm, |
| UnknownForm, UnknownForm, UnknownForm, UnknownForm, |
| |
| IndependentVowel, IndependentVowel, VowelMark, VowelMark, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Oriya |
| Invalid, VowelMark, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Invalid, Invalid, IndependentVowel, |
| |
| IndependentVowel, Invalid, Invalid, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Invalid, Consonant, Consonant, |
| Invalid, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Invalid, Invalid, Invalid, Matra, |
| Matra, Invalid, Invalid, Matra, |
| Matra, Halant, UnknownForm, UnknownForm, |
| |
| Other, Invalid, Invalid, Invalid, |
| Invalid, UnknownForm, LengthMark, LengthMark, |
| Invalid, Invalid, Invalid, Invalid, |
| Consonant, Consonant, Invalid, Consonant, |
| |
| IndependentVowel, IndependentVowel, Invalid, Invalid, |
| Invalid, Invalid, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Consonant, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| //Tamil |
| Invalid, Invalid, VowelMark, Other, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, Invalid, |
| Invalid, Invalid, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| IndependentVowel, Consonant, Invalid, Invalid, |
| Invalid, Consonant, Consonant, Invalid, |
| Consonant, Invalid, Consonant, Consonant, |
| |
| Invalid, Invalid, Invalid, Consonant, |
| Consonant, Invalid, Invalid, Invalid, |
| Consonant, Consonant, Consonant, Invalid, |
| Invalid, Invalid, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Invalid, Invalid, Matra, Matra, |
| |
| Matra, Matra, Matra, Invalid, |
| Invalid, Invalid, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Halant, Invalid, Invalid, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, LengthMark, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Telugu |
| Invalid, VowelMark, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Invalid, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Invalid, Invalid, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Halant, Invalid, Invalid, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, LengthMark, Matra, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| |
| IndependentVowel, IndependentVowel, Invalid, Invalid, |
| Invalid, Invalid, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Kannada |
| Invalid, Invalid, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Invalid, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Nukta, Other, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Halant, Invalid, Invalid, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, LengthMark, LengthMark, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Consonant, Invalid, |
| |
| IndependentVowel, IndependentVowel, VowelMark, VowelMark, |
| Invalid, Invalid, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Malayalam |
| Invalid, Invalid, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, Invalid, IndependentVowel, IndependentVowel, |
| IndependentVowel, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, UnknownForm, UnknownForm, |
| Invalid, Invalid, Matra, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Invalid, Invalid, Matra, Matra, |
| Matra, Invalid, Matra, Matra, |
| Matra, Halant, Invalid, Invalid, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Matra, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| |
| IndependentVowel, IndependentVowel, Invalid, Invalid, |
| Invalid, Invalid, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| |
| // Sinhala |
| Invalid, Invalid, VowelMark, VowelMark, |
| Invalid, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| |
| IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, |
| IndependentVowel, IndependentVowel, IndependentVowel, Invalid, |
| Invalid, Invalid, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| |
| Consonant, Consonant, Invalid, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Consonant, |
| Invalid, Consonant, Invalid, Invalid, |
| |
| Consonant, Consonant, Consonant, Consonant, |
| Consonant, Consonant, Consonant, Invalid, |
| Invalid, Invalid, Halant, Invalid, |
| Invalid, Invalid, Invalid, Matra, |
| |
| Matra, Matra, Matra, Matra, |
| Matra, Invalid, Matra, Invalid, |
| Matra, Matra, Matra, Matra, |
| Matra, Matra, Matra, Matra, |
| |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| Invalid, Invalid, Invalid, Invalid, |
| |
| Invalid, Invalid, Matra, Matra, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| Other, Other, Other, Other, |
| }; |
| |
| enum Position { |
| None, |
| Pre, |
| Above, |
| Below, |
| Post, |
| Split, |
| Base, |
| Reph, |
| Vattu, |
| Inherit |
| }; |
| |
| static const unsigned char indicPosition[0xe00-0x900] = { |
| // Devanagari |
| None, Above, Above, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| Below, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, Post, Pre, |
| |
| Post, Below, Below, Below, |
| Below, Above, Above, Above, |
| Above, Post, Post, Post, |
| Post, None, None, None, |
| |
| None, Above, Below, Above, |
| Above, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, Below, Below, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Bengali |
| None, Above, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| Below, None, None, Post, |
| |
| Below, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| Below, None, Post, Pre, |
| |
| Post, Below, Below, Below, |
| Below, None, None, Pre, |
| Pre, None, None, Split, |
| Split, Below, None, None, |
| |
| None, None, None, None, |
| None, None, None, Post, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, Below, Below, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| Below, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Gurmukhi |
| None, Above, Above, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, Post, |
| |
| Below, None, None, None, |
| None, Below, None, None, |
| None, Below, None, None, |
| Below, None, Post, Pre, |
| |
| Post, Below, Below, None, |
| None, None, None, Above, |
| Above, None, None, Above, |
| Above, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| Above, Above, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Gujarati |
| None, Above, Above, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| Below, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, Post, Pre, |
| |
| Post, Below, Below, Below, |
| Below, Above, None, Above, |
| Above, Post, None, Post, |
| Post, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, Below, Below, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Oriya |
| None, Above, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| Below, None, None, None, |
| Below, None, None, None, |
| Below, Below, Below, Post, |
| |
| Below, None, Below, Below, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, Post, Above, |
| |
| Post, Below, Below, Below, |
| None, None, None, Pre, |
| Split, None, None, Split, |
| Split, None, None, None, |
| |
| None, None, None, None, |
| None, None, Above, Post, |
| None, None, None, None, |
| None, None, None, Post, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, Below, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Tamil |
| None, None, Above, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, Post, Post, |
| |
| Above, Below, Below, None, |
| None, None, Pre, Pre, |
| Pre, None, Split, Split, |
| Split, Halant, None, None, |
| |
| None, None, None, None, |
| None, None, None, Post, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Telugu |
| None, Post, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, Below, Below, Below, |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| Below, None, Below, Below, |
| Below, Below, Below, Below, |
| |
| Below, None, Below, Below, |
| None, Below, Below, Below, |
| Below, Below, None, None, |
| None, None, Post, Above, |
| |
| Above, Post, Post, Post, |
| Post, None, Above, Above, |
| Split, None, Post, Above, |
| Above, Halant, None, None, |
| |
| None, None, None, None, |
| None, Above, Below, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Kannada |
| None, None, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, Below, Below, Below, |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| Below, Below, Below, Below, |
| |
| Below, None, Below, Below, |
| None, Below, Below, Below, |
| Below, Below, None, None, |
| None, None, Post, Above, |
| |
| Split, Post, Post, Post, |
| Post, None, Above, Split, |
| Split, None, Split, Split, |
| Above, Halant, None, None, |
| |
| None, None, None, None, |
| None, Post, Post, None, |
| None, None, None, None, |
| None, None, Below, None, |
| |
| None, None, Below, Below, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Malayalam |
| None, None, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, Post, |
| |
| Post, None, Below, None, |
| None, Post, None, None, |
| None, None, None, None, |
| None, None, Post, Post, |
| |
| Post, Post, Post, Post, |
| None, None, Pre, Pre, |
| Pre, None, Split, Split, |
| Split, Halant, None, None, |
| |
| None, None, None, None, |
| None, None, None, Post, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| // Sinhala |
| None, None, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, Post, |
| |
| Post, Post, Above, Above, |
| Below, None, Below, None, |
| Post, Pre, Split, Pre, |
| Split, Split, Split, Post, |
| |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None, |
| |
| None, None, Post, Post, |
| None, None, None, None, |
| None, None, None, None, |
| None, None, None, None |
| }; |
| |
| static inline Form form(unsigned short uc) { |
| if (uc < 0x900 || uc > 0xdff) { |
| if (uc == 0x25cc) |
| return Consonant; |
| if (uc == 0x200c || uc == 0x200d) |
| return Control; |
| return Other; |
| } |
| return (Form)indicForms[uc-0x900]; |
| } |
| |
| static inline Position indic_position(unsigned short uc) { |
| if (uc < 0x900 || uc > 0xdff) |
| return None; |
| return (Position) indicPosition[uc-0x900]; |
| } |
| |
| |
| enum IndicScriptProperties { |
| HasReph = 0x01, |
| HasSplit = 0x02 |
| }; |
| |
| const hb_uint8 scriptProperties[10] = { |
| // Devanagari, |
| HasReph, |
| // Bengali, |
| HasReph|HasSplit, |
| // Gurmukhi, |
| 0, |
| // Gujarati, |
| HasReph, |
| // Oriya, |
| HasReph|HasSplit, |
| // Tamil, |
| HasSplit, |
| // Telugu, |
| HasSplit, |
| // Kannada, |
| HasSplit|HasReph, |
| // Malayalam, |
| HasSplit, |
| // Sinhala, |
| HasSplit |
| }; |
| |
| struct IndicOrdering { |
| Form form; |
| Position position; |
| }; |
| |
| static const IndicOrdering devanagari_order [] = { |
| { Consonant, Below }, |
| { Matra, Below }, |
| { VowelMark, Below }, |
| { StressMark, Below }, |
| { Matra, Above }, |
| { Matra, Post }, |
| { Consonant, Reph }, |
| { VowelMark, Above }, |
| { StressMark, Above }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering bengali_order [] = { |
| { Consonant, Below }, |
| { Matra, Below }, |
| { Matra, Above }, |
| { Consonant, Reph }, |
| { VowelMark, Above }, |
| { Consonant, Post }, |
| { Matra, Post }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering gurmukhi_order [] = { |
| { Consonant, Below }, |
| { Matra, Below }, |
| { Matra, Above }, |
| { Consonant, Post }, |
| { Matra, Post }, |
| { VowelMark, Above }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering tamil_order [] = { |
| { Matra, Above }, |
| { Matra, Post }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering telugu_order [] = { |
| { Matra, Above }, |
| { Matra, Below }, |
| { Matra, Post }, |
| { Consonant, Below }, |
| { Consonant, Post }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering kannada_order [] = { |
| { Matra, Above }, |
| { Matra, Post }, |
| { Consonant, Below }, |
| { Consonant, Post }, |
| { LengthMark, Post }, |
| { Consonant, Reph }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering malayalam_order [] = { |
| { Consonant, Below }, |
| { Matra, Below }, |
| { Consonant, Reph }, |
| { Consonant, Post }, |
| { Matra, Post }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering sinhala_order [] = { |
| { Matra, Below }, |
| { Matra, Above }, |
| { Matra, Post }, |
| { VowelMark, Post }, |
| { (Form)0, None } |
| }; |
| |
| static const IndicOrdering * const indic_order[] = { |
| devanagari_order, // Devanagari |
| bengali_order, // Bengali |
| gurmukhi_order, // Gurmukhi |
| devanagari_order, // Gujarati |
| bengali_order, // Oriya |
| tamil_order, // Tamil |
| telugu_order, // Telugu |
| kannada_order, // Kannada |
| malayalam_order, // Malayalam |
| sinhala_order // Sinhala |
| }; |
| |
| |
| |
| // vowel matras that have to be split into two parts. |
| static const unsigned short split_matras[] = { |
| // matra, split1, split2, split3 |
| |
| // bengalis |
| 0x9cb, 0x9c7, 0x9be, 0x0, |
| 0x9cc, 0x9c7, 0x9d7, 0x0, |
| // oriya |
| 0xb48, 0xb47, 0xb56, 0x0, |
| 0xb4b, 0xb47, 0xb3e, 0x0, |
| 0xb4c, 0xb47, 0xb57, 0x0, |
| // tamil |
| 0xbca, 0xbc6, 0xbbe, 0x0, |
| 0xbcb, 0xbc7, 0xbbe, 0x0, |
| 0xbcc, 0xbc6, 0xbd7, 0x0, |
| // telugu |
| 0xc48, 0xc46, 0xc56, 0x0, |
| // kannada |
| 0xcc0, 0xcbf, 0xcd5, 0x0, |
| 0xcc7, 0xcc6, 0xcd5, 0x0, |
| 0xcc8, 0xcc6, 0xcd6, 0x0, |
| 0xcca, 0xcc6, 0xcc2, 0x0, |
| 0xccb, 0xcc6, 0xcc2, 0xcd5, |
| // malayalam |
| 0xd4a, 0xd46, 0xd3e, 0x0, |
| 0xd4b, 0xd47, 0xd3e, 0x0, |
| 0xd4c, 0xd46, 0xd57, 0x0, |
| // sinhala |
| 0xdda, 0xdd9, 0xdca, 0x0, |
| 0xddc, 0xdd9, 0xdcf, 0x0, |
| 0xddd, 0xdd9, 0xdcf, 0xdca, |
| 0xdde, 0xdd9, 0xddf, 0x0, |
| 0xffff |
| }; |
| |
| static inline void splitMatra(unsigned short *reordered, int matra, int &len) |
| { |
| unsigned short matra_uc = reordered[matra]; |
| //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]); |
| |
| const unsigned short *split = split_matras; |
| while (split[0] < matra_uc) |
| split += 4; |
| |
| assert(*split == matra_uc); |
| ++split; |
| |
| int added_chars = split[2] == 0x0 ? 1 : 2; |
| |
| memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short)); |
| reordered[matra] = split[0]; |
| reordered[matra+1] = split[1]; |
| if(added_chars == 2) |
| reordered[matra+2] = split[2]; |
| len += added_chars; |
| } |
| |
| #ifndef NO_OPENTYPE |
| static const HB_OpenTypeFeature indic_features[] = { |
| { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty }, |
| { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, |
| { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, |
| { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty }, |
| { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty }, |
| { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty }, |
| { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty }, |
| { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty }, |
| { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty }, |
| { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty }, |
| { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty }, |
| { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty }, |
| { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty }, |
| { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty }, |
| { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty }, |
| { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty }, |
| { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty }, |
| { 0, 0 } |
| }; |
| #endif |
| |
| // #define INDIC_DEBUG |
| #ifdef INDIC_DEBUG |
| #define IDEBUG hb_debug |
| #include <stdarg.h> |
| |
| static void hb_debug(const char *msg, ...) |
| { |
| va_list ap; |
| va_start(ap, msg); // use variable arg list |
| vfprintf(stderr, msg, ap); |
| va_end(ap); |
| fprintf(stderr, "\n"); |
| } |
| |
| #else |
| #define IDEBUG if(0) printf |
| #endif |
| |
| #if 0 //def INDIC_DEBUG |
| static QString propertiesToString(int properties) |
| { |
| QString res; |
| properties = ~properties; |
| if (properties & LocaProperty) |
| res += "Loca "; |
| if (properties & CcmpProperty) |
| res += "Ccmp "; |
| if (properties & InitProperty) |
| res += "Init "; |
| if (properties & NuktaProperty) |
| res += "Nukta "; |
| if (properties & AkhantProperty) |
| res += "Akhant "; |
| if (properties & RephProperty) |
| res += "Reph "; |
| if (properties & PreFormProperty) |
| res += "PreForm "; |
| if (properties & BelowFormProperty) |
| res += "BelowForm "; |
| if (properties & AboveFormProperty) |
| res += "AboveForm "; |
| if (properties & HalfFormProperty) |
| res += "HalfForm "; |
| if (properties & PostFormProperty) |
| res += "PostForm "; |
| if (properties & ConjunctFormProperty) |
| res += "PostForm "; |
| if (properties & VattuProperty) |
| res += "Vattu "; |
| if (properties & PreSubstProperty) |
| res += "PreSubst "; |
| if (properties & BelowSubstProperty) |
| res += "BelowSubst "; |
| if (properties & AboveSubstProperty) |
| res += "AboveSubst "; |
| if (properties & PostSubstProperty) |
| res += "PostSubst "; |
| if (properties & HalantProperty) |
| res += "Halant "; |
| if (properties & CligProperty) |
| res += "Clig "; |
| if (properties & IndicCaltProperty) |
| res += "Calt "; |
| return res; |
| } |
| #endif |
| |
| static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid) |
| { |
| HB_Script script = item->item.script; |
| assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala); |
| const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari); |
| const unsigned short ra = script_base + 0x30; |
| const unsigned short halant = script_base + 0x4d; |
| const unsigned short nukta = script_base + 0x3c; |
| bool control = false; |
| |
| int len = (int)item->item.length; |
| IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid); |
| |
| if ((int)item->num_glyphs < len+4) { |
| item->num_glyphs = len+4; |
| return false; |
| } |
| |
| HB_STACKARRAY(HB_UChar16, reordered, len + 4); |
| HB_STACKARRAY(hb_uint8, position, len + 4); |
| |
| unsigned char properties = scriptProperties[script-HB_Script_Devanagari]; |
| |
| if (invalid) { |
| *reordered = 0x25cc; |
| memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16)); |
| len++; |
| } else { |
| memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16)); |
| } |
| if (reordered[len-1] == 0x200c) // zero width non joiner |
| len--; |
| |
| int i; |
| int base = 0; |
| int reph = -1; |
| |
| #ifdef INDIC_DEBUG |
| IDEBUG("original:"); |
| for (i = 0; i < len; i++) { |
| IDEBUG(" %d: %4x", i, reordered[i]); |
| } |
| #endif |
| |
| if (len != 1) { |
| HB_UChar16 *uc = reordered; |
| bool beginsWithRa = false; |
| |
| // Rule 1: find base consonant |
| // |
| // The shaping engine finds the base consonant of the |
| // syllable, using the following algorithm: starting from the |
| // end of the syllable, move backwards until a consonant is |
| // found that does not have a below-base or post-base form |
| // (post-base forms have to follow below-base forms), or |
| // arrive at the first consonant. The consonant stopped at |
| // will be the base. |
| // |
| // * If the syllable starts with Ra + H (in a script that has |
| // 'Reph'), Ra is excluded from candidates for base |
| // consonants. |
| // |
| // * In Kannada and Telugu, the base consonant cannot be |
| // farther than 3 consonants from the end of the syllable. |
| // #### replace the HasReph property by testing if the feature exists in the font! |
| if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) { |
| if ((properties & HasReph) && (len > 2) && |
| (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant) |
| beginsWithRa = true; |
| |
| if (beginsWithRa && form(*(uc+2)) == Control) |
| beginsWithRa = false; |
| |
| base = (beginsWithRa ? 2 : 0); |
| IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base); |
| |
| int lastConsonant = 0; |
| int matra = -1; |
| // we remember: |
| // * the last consonant since we need it for rule 2 |
| // * the matras position for rule 3 and 4 |
| |
| // figure out possible base glyphs |
| memset(position, 0, len); |
| if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { |
| bool vattu = false; |
| for (i = base; i < len; ++i) { |
| position[i] = form(uc[i]); |
| if (position[i] == Consonant) { |
| lastConsonant = i; |
| vattu = (!vattu && uc[i] == ra); |
| if (vattu) { |
| IDEBUG("excluding vattu glyph at %d from base candidates", i); |
| position[i] = Vattu; |
| } |
| } else if (position[i] == Matra) { |
| matra = i; |
| } |
| } |
| } else { |
| for (i = base; i < len; ++i) { |
| position[i] = form(uc[i]); |
| if (position[i] == Consonant) |
| lastConsonant = i; |
| else if (matra < 0 && position[i] == Matra) |
| matra = i; |
| } |
| } |
| int skipped = 0; |
| Position pos = Post; |
| for (i = len-1; i >= base; i--) { |
| if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada)) |
| continue; |
| |
| if (i < len-1 && position[i] == Control && position[i+1] == Consonant) { |
| base = i+1; |
| break; |
| } |
| |
| Position charPosition = indic_position(uc[i]); |
| if (pos == Post && charPosition == Post) { |
| pos = Post; |
| } else if ((pos == Post || pos == Below) && charPosition == Below) { |
| if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) |
| base = i; |
| pos = Below; |
| } else { |
| base = i; |
| break; |
| } |
| if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) { |
| base = i; |
| break; |
| } |
| ++skipped; |
| } |
| |
| IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant); |
| |
| // Rule 2: |
| // |
| // If the base consonant is not the last one, Uniscribe |
| // moves the halant from the base consonant to the last |
| // one. |
| if (lastConsonant > base) { |
| int halantPos = 0; |
| if (uc[base+1] == halant) |
| halantPos = base + 1; |
| else if (uc[base+1] == nukta && uc[base+2] == halant) |
| halantPos = base + 2; |
| if (halantPos > 0) { |
| IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant); |
| for (i = halantPos; i < lastConsonant; i++) |
| uc[i] = uc[i+1]; |
| uc[lastConsonant] = halant; |
| } |
| } |
| |
| // Rule 3: |
| // |
| // If the syllable starts with Ra + H, Uniscribe moves |
| // this combination so that it follows either: |
| |
| // * the post-base 'matra' (if any) or the base consonant |
| // (in scripts that show similarity to Devanagari, i.e., |
| // Devanagari, Gujarati, Bengali) |
| // * the base consonant (other scripts) |
| // * the end of the syllable (Kannada) |
| |
| Position matra_position = None; |
| if (matra > 0) |
| matra_position = indic_position(uc[matra]); |
| IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base); |
| |
| if (beginsWithRa && base != 0) { |
| int toPos = base+1; |
| if (toPos < len && uc[toPos] == nukta) |
| toPos++; |
| if (toPos < len && uc[toPos] == halant) |
| toPos++; |
| if (toPos < len && uc[toPos] == 0x200d) |
| toPos++; |
| if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant) |
| toPos += 2; |
| if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) { |
| if (matra_position == Post || matra_position == Split) { |
| toPos = matra+1; |
| matra -= 2; |
| } |
| } else if (script == HB_Script_Kannada) { |
| toPos = len; |
| matra -= 2; |
| } |
| |
| IDEBUG("moving leading ra+halant to position %d", toPos); |
| for (i = 2; i < toPos; i++) |
| uc[i-2] = uc[i]; |
| uc[toPos-2] = ra; |
| uc[toPos-1] = halant; |
| base -= 2; |
| if (properties & HasReph) |
| reph = toPos-2; |
| } |
| |
| // Rule 4: |
| |
| // Uniscribe splits two- or three-part matras into their |
| // parts. This splitting is a character-to-character |
| // operation). |
| // |
| // Uniscribe describes some moving operations for these |
| // matras here. For shaping however all pre matras need |
| // to be at the beginning of the syllable, so we just move |
| // them there now. |
| if (matra_position == Split) { |
| splitMatra(uc, matra, len); |
| // Handle three-part matras (0xccb in Kannada) |
| matra_position = indic_position(uc[matra]); |
| } |
| |
| if (matra_position == Pre) { |
| unsigned short m = uc[matra]; |
| while (matra--) |
| uc[matra+1] = uc[matra]; |
| uc[0] = m; |
| base++; |
| } |
| } |
| |
| // Rule 5: |
| // |
| // Uniscribe classifies consonants and 'matra' parts as |
| // pre-base, above-base (Reph), below-base or post-base. This |
| // classification exists on the character code level and is |
| // language-dependent, not font-dependent. |
| for (i = 0; i < base; ++i) |
| position[i] = Pre; |
| position[base] = Base; |
| for (i = base+1; i < len; ++i) { |
| position[i] = indic_position(uc[i]); |
| // #### replace by adjusting table |
| if (uc[i] == nukta || uc[i] == halant) |
| position[i] = Inherit; |
| } |
| if (reph > 0) { |
| // recalculate reph, it might have changed. |
| for (i = base+1; i < len; ++i) |
| if (uc[i] == ra) |
| reph = i; |
| position[reph] = Reph; |
| position[reph+1] = Inherit; |
| } |
| |
| // all reordering happens now to the chars after the base |
| int fixed = base+1; |
| if (fixed < len && uc[fixed] == nukta) |
| fixed++; |
| if (fixed < len && uc[fixed] == halant) |
| fixed++; |
| if (fixed < len && uc[fixed] == 0x200d) |
| fixed++; |
| |
| #ifdef INDIC_DEBUG |
| for (i = fixed; i < len; ++i) |
| IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]); |
| #endif |
| // we continuosly position the matras and vowel marks and increase the fixed |
| // until we reached the end. |
| const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari]; |
| |
| IDEBUG(" reordering pass:"); |
| IDEBUG(" base=%d fixed=%d", base, fixed); |
| int toMove = 0; |
| while (finalOrder[toMove].form && fixed < len-1) { |
| IDEBUG(" fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position); |
| for (i = fixed; i < len; i++) { |
| // IDEBUG() << " i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i]) |
| // << "position=" << position[i]; |
| if (form(uc[i]) == finalOrder[toMove].form && |
| position[i] == finalOrder[toMove].position) { |
| // need to move this glyph |
| int to = fixed; |
| if (i < len-1 && position[i+1] == Inherit) { |
| IDEBUG(" moving two chars from %d to %d", i, to); |
| unsigned short ch = uc[i]; |
| unsigned short ch2 = uc[i+1]; |
| unsigned char pos = position[i]; |
| for (int j = i+1; j > to+1; j--) { |
| uc[j] = uc[j-2]; |
| position[j] = position[j-2]; |
| } |
| uc[to] = ch; |
| uc[to+1] = ch2; |
| position[to] = pos; |
| position[to+1] = pos; |
| fixed += 2; |
| } else { |
| IDEBUG(" moving one char from %d to %d", i, to); |
| unsigned short ch = uc[i]; |
| unsigned char pos = position[i]; |
| for (int j = i; j > to; j--) { |
| uc[j] = uc[j-1]; |
| position[j] = position[j-1]; |
| } |
| uc[to] = ch; |
| position[to] = pos; |
| fixed++; |
| } |
| } |
| } |
| toMove++; |
| } |
| |
| } |
| |
| if (reph > 0) { |
| // recalculate reph, it might have changed. |
| for (i = base+1; i < len; ++i) |
| if (reordered[i] == ra) |
| reph = i; |
| } |
| |
| #ifndef NO_OPENTYPE |
| const int availableGlyphs = item->num_glyphs; |
| #endif |
| if (!item->font->klass->convertStringToGlyphIndices(item->font, |
| reordered, len, |
| item->glyphs, &item->num_glyphs, |
| item->item.bidiLevel % 2)) |
| goto error; |
| |
| |
| IDEBUG(" base=%d, reph=%d", base, reph); |
| IDEBUG("reordered:"); |
| for (i = 0; i < len; i++) { |
| item->attributes[i].mark = false; |
| item->attributes[i].clusterStart = false; |
| item->attributes[i].justification = 0; |
| item->attributes[i].zeroWidth = false; |
| IDEBUG(" %d: %4x", i, reordered[i]); |
| } |
| |
| // now we have the syllable in the right order, and can start running it through open type. |
| |
| for (i = 0; i < len; ++i) |
| control |= (form(reordered[i]) == Control); |
| |
| #ifndef NO_OPENTYPE |
| if (openType) { |
| |
| // we need to keep track of where the base glyph is for some |
| // scripts and use the cluster feature for this. This |
| // also means we have to correct the logCluster output from |
| // the open type engine manually afterwards. for indic this |
| // is rather simple, as all chars just point to the first |
| // glyph in the syllable. |
| HB_STACKARRAY(unsigned short, clusters, len); |
| HB_STACKARRAY(unsigned int, properties, len); |
| |
| for (i = 0; i < len; ++i) |
| clusters[i] = i; |
| |
| // features we should always apply |
| for (i = 0; i < len; ++i) |
| properties[i] = ~(LocaProperty |
| | CcmpProperty |
| | NuktaProperty |
| | VattuProperty |
| | ConjunctFormProperty |
| | PreSubstProperty |
| | BelowSubstProperty |
| | AboveSubstProperty |
| | PostSubstProperty |
| | HalantProperty |
| | IndicCaltProperty |
| | PositioningProperties); |
| |
| // Loca always applies |
| // Ccmp always applies |
| // Init |
| if (item->item.pos == 0 |
| || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1]))) |
| properties[0] &= ~InitProperty; |
| |
| // Nukta always applies |
| // Akhant |
| for (i = 0; i <= base; ++i) |
| properties[i] &= ~AkhantProperty; |
| // Reph |
| if (reph >= 0) { |
| properties[reph] &= ~RephProperty; |
| properties[reph+1] &= ~RephProperty; |
| } |
| // BelowForm |
| for (i = base+1; i < len; ++i) |
| properties[i] &= ~BelowFormProperty; |
| |
| if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { |
| // vattu glyphs need this aswell |
| bool vattu = false; |
| for (i = base-2; i > 1; --i) { |
| if (form(reordered[i]) == Consonant) { |
| vattu = (!vattu && reordered[i] == ra); |
| if (vattu) { |
| IDEBUG("forming vattu ligature at %d", i); |
| properties[i] &= ~BelowFormProperty; |
| properties[i+1] &= ~BelowFormProperty; |
| } |
| } |
| } |
| } |
| // HalfFormProperty |
| for (i = 0; i < base; ++i) |
| properties[i] &= ~HalfFormProperty; |
| if (control) { |
| for (i = 2; i < len; ++i) { |
| if (reordered[i] == 0x200d /* ZWJ */) { |
| properties[i-1] &= ~HalfFormProperty; |
| properties[i-2] &= ~HalfFormProperty; |
| } else if (reordered[i] == 0x200c /* ZWNJ */) { |
| properties[i-1] &= ~HalfFormProperty; |
| properties[i-2] &= ~HalfFormProperty; |
| } |
| } |
| } |
| // PostFormProperty |
| for (i = base+1; i < len; ++i) |
| properties[i] &= ~PostFormProperty; |
| // vattu always applies |
| // pres always applies |
| // blws always applies |
| // abvs always applies |
| // psts always applies |
| // halant always applies |
| // calt always applies |
| |
| #ifdef INDIC_DEBUG |
| // { |
| // IDEBUG("OT properties:"); |
| // for (int i = 0; i < len; ++i) |
| // qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data()); |
| // } |
| #endif |
| |
| // initialize |
| item->log_clusters = clusters; |
| HB_OpenTypeShape(item, properties); |
| |
| int newLen = item->face->buffer->in_length; |
| HB_GlyphItem otl_glyphs = item->face->buffer->in_string; |
| |
| // move the left matra back to its correct position in malayalam and tamil |
| if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) { |
| // qDebug("reordering matra, len=%d", newLen); |
| // need to find the base in the shaped string and move the matra there |
| int basePos = 0; |
| while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base) |
| basePos++; |
| --basePos; |
| if (basePos < newLen && basePos > 1) { |
| // qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen); |
| HB_GlyphItemRec m = otl_glyphs[0]; |
| --basePos; |
| for (i = 0; i < basePos; ++i) |
| otl_glyphs[i] = otl_glyphs[i+1]; |
| otl_glyphs[basePos] = m; |
| } |
| } |
| |
| HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false); |
| |
| HB_FREE_STACKARRAY(clusters); |
| HB_FREE_STACKARRAY(properties); |
| |
| if (!positioned) |
| goto error; |
| |
| if (control) { |
| IDEBUG("found a control char in the syllable"); |
| hb_uint32 i = 0, j = 0; |
| while (i < item->num_glyphs) { |
| if (form(reordered[otl_glyphs[i].cluster]) == Control) { |
| ++i; |
| if (i >= item->num_glyphs) |
| break; |
| } |
| item->glyphs[j] = item->glyphs[i]; |
| item->attributes[j] = item->attributes[i]; |
| item->offsets[j] = item->offsets[i]; |
| item->advances[j] = item->advances[i]; |
| ++i; |
| ++j; |
| } |
| item->num_glyphs = j; |
| } |
| |
| } else { |
| HB_HeuristicPosition(item); |
| } |
| #endif // NO_OPENTYPE |
| item->attributes[0].clusterStart = true; |
| |
| HB_FREE_STACKARRAY(reordered); |
| HB_FREE_STACKARRAY(position); |
| |
| IDEBUG("<<<<<<"); |
| return true; |
| |
| error: |
| HB_FREE_STACKARRAY(reordered); |
| HB_FREE_STACKARRAY(position); |
| return false; |
| } |
| |
| /* syllables are of the form: |
| |
| (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark? |
| (Consonant Nukta? Halant)* Consonant Halant |
| IndependentVowel VowelMark? StressMark? |
| |
| We return syllable boundaries on invalid combinations aswell |
| */ |
| static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid) |
| { |
| *invalid = false; |
| IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end); |
| const HB_UChar16 *uc = s+start; |
| |
| int pos = 0; |
| Form state = form(uc[pos]); |
| IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]); |
| pos++; |
| |
| if (state != Consonant && state != IndependentVowel) { |
| if (state != Other) |
| *invalid = true; |
| goto finish; |
| } |
| |
| while (pos < end - start) { |
| Form newState = form(uc[pos]); |
| IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]); |
| switch(newState) { |
| case Control: |
| newState = state; |
| if (state == Halant && uc[pos] == 0x200d /* ZWJ */) |
| break; |
| // the control character should be the last char in the item |
| ++pos; |
| goto finish; |
| case Consonant: |
| if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */)) |
| break; |
| goto finish; |
| case Halant: |
| if (state == Nukta || state == Consonant) |
| break; |
| // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya |
| if (script == HB_Script_Bengali && pos == 1 && |
| (uc[0] == 0x0985 || uc[0] == 0x098f)) |
| break; |
| // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra. |
| if (script == HB_Script_Sinhala && state == Matra) { |
| ++pos; |
| continue; |
| } |
| if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) { |
| ++pos; |
| continue; |
| } |
| goto finish; |
| case Nukta: |
| if (state == Consonant) |
| break; |
| goto finish; |
| case StressMark: |
| if (state == VowelMark) |
| break; |
| // fall through |
| case VowelMark: |
| if (state == Matra || state == LengthMark || state == IndependentVowel) |
| break; |
| // fall through |
| case Matra: |
| if (state == Consonant || state == Nukta) |
| break; |
| if (state == Matra) { |
| // ### needs proper testing for correct two/three part matras |
| break; |
| } |
| // ### not sure if this is correct. If it is, does it apply only to Bengali or should |
| // it work for all Indic languages? |
| // the combination Independent_A + Vowel Sign AA is allowed. |
| if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985) |
| break; |
| if (script == HB_Script_Tamil && state == Matra) { |
| if (uc[pos-1] == 0x0bc6 && |
| (uc[pos] == 0xbbe || uc[pos] == 0xbd7)) |
| break; |
| if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe) |
| break; |
| } |
| goto finish; |
| |
| case LengthMark: |
| if (state == Matra) { |
| // ### needs proper testing for correct two/three part matras |
| break; |
| } |
| case IndependentVowel: |
| case Invalid: |
| case Other: |
| goto finish; |
| } |
| state = newState; |
| pos++; |
| } |
| finish: |
| return pos+start; |
| } |
| |
| HB_Bool HB_IndicShape(HB_ShaperItem *item) |
| { |
| assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala); |
| |
| HB_Bool openType = false; |
| #ifndef NO_OPENTYPE |
| openType = HB_SelectScript(item, indic_features); |
| #endif |
| unsigned short *logClusters = item->log_clusters; |
| |
| HB_ShaperItem syllable = *item; |
| int first_glyph = 0; |
| |
| int sstart = item->item.pos; |
| int end = sstart + item->item.length; |
| IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length); |
| while (sstart < end) { |
| bool invalid; |
| int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid); |
| IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, |
| invalid ? "true" : "false"); |
| syllable.item.pos = sstart; |
| syllable.item.length = send-sstart; |
| syllable.glyphs = item->glyphs + first_glyph; |
| syllable.attributes = item->attributes + first_glyph; |
| syllable.offsets = item->offsets + first_glyph; |
| syllable.advances = item->advances + first_glyph; |
| syllable.num_glyphs = item->num_glyphs - first_glyph; |
| if (!indic_shape_syllable(openType, &syllable, invalid)) { |
| IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); |
| item->num_glyphs += syllable.num_glyphs; |
| return false; |
| } |
| // fix logcluster array |
| IDEBUG("syllable:"); |
| hb_uint32 g; |
| for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g) |
| IDEBUG(" %d -> glyph %x", g, item->glyphs[g]); |
| IDEBUG(" logclusters:"); |
| int i; |
| for (i = sstart; i < send; ++i) { |
| IDEBUG(" %d -> glyph %d", i, first_glyph); |
| logClusters[i-item->item.pos] = first_glyph; |
| } |
| sstart = send; |
| first_glyph += syllable.num_glyphs; |
| } |
| item->num_glyphs = first_glyph; |
| return true; |
| } |