| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/i18n/rtl.h" |
| |
| #include "base/file_path.h" |
| #include "base/logging.h" |
| #include "base/string_util.h" |
| #include "base/utf_string_conversions.h" |
| #include "base/sys_string_conversions.h" |
| #include "unicode/coll.h" |
| #include "unicode/locid.h" |
| #include "unicode/uchar.h" |
| #include "unicode/uscript.h" |
| |
| #if defined(TOOLKIT_USES_GTK) |
| #include <gtk/gtk.h> |
| #endif |
| |
| namespace { |
| |
| // Extract language and country, ignore keywords, concatenate using dash. |
| std::string GetLocaleString(const icu::Locale& locale) { |
| const char* language = locale.getLanguage(); |
| const char* country = locale.getCountry(); |
| |
| std::string result = |
| (language != NULL && *language != '\0') ? language : "und"; |
| |
| if (country != NULL && *country != '\0') { |
| result += '-'; |
| result += country; |
| } |
| |
| return result; |
| } |
| |
| } // namespace |
| |
| namespace base { |
| namespace i18n { |
| |
| // Represents the locale-specific ICU text direction. |
| static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; |
| |
| // Convert the ICU default locale to a string. |
| std::string GetConfiguredLocale() { |
| return GetLocaleString(icu::Locale::getDefault()); |
| } |
| |
| // Convert the ICU canonicalized locale to a string. |
| std::string GetCanonicalLocale(const char* locale) { |
| return GetLocaleString(icu::Locale::createCanonical(locale)); |
| } |
| |
| // Convert Chrome locale name to ICU locale name |
| std::string ICULocaleName(const std::string& locale_string) { |
| // If not Spanish, just return it. |
| if (locale_string.substr(0, 2) != "es") |
| return locale_string; |
| // Expand es to es-ES. |
| if (LowerCaseEqualsASCII(locale_string, "es")) |
| return "es-ES"; |
| // Map es-419 (Latin American Spanish) to es-FOO depending on the system |
| // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map |
| // to es-MX (the most populous in Spanish-speaking Latin America). |
| if (LowerCaseEqualsASCII(locale_string, "es-419")) { |
| const icu::Locale& locale = icu::Locale::getDefault(); |
| std::string language = locale.getLanguage(); |
| const char* country = locale.getCountry(); |
| if (LowerCaseEqualsASCII(language, "es") && |
| !LowerCaseEqualsASCII(country, "es")) { |
| language += '-'; |
| language += country; |
| return language; |
| } |
| return "es-MX"; |
| } |
| // Currently, Chrome has only "es" and "es-419", but later we may have |
| // more specific "es-RR". |
| return locale_string; |
| } |
| |
| void SetICUDefaultLocale(const std::string& locale_string) { |
| icu::Locale locale(ICULocaleName(locale_string).c_str()); |
| UErrorCode error_code = U_ZERO_ERROR; |
| icu::Locale::setDefault(locale, error_code); |
| // This return value is actually bogus because Locale object is |
| // an ID and setDefault seems to always succeed (regardless of the |
| // presence of actual locale data). However, |
| // it does not hurt to have it as a sanity check. |
| DCHECK(U_SUCCESS(error_code)); |
| g_icu_text_direction = UNKNOWN_DIRECTION; |
| |
| // If we use Views toolkit on top of GtkWidget, then we need to keep |
| // GtkWidget's default text direction consistent with ICU's text direction. |
| // Because in this case ICU's text direction will be used instead. |
| // See IsRTL() function below. |
| #if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) |
| gtk_widget_set_default_direction( |
| ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); |
| #endif |
| } |
| |
| bool IsRTL() { |
| #if defined(TOOLKIT_GTK) |
| GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); |
| return (gtk_dir == GTK_TEXT_DIR_RTL); |
| #else |
| return ICUIsRTL(); |
| #endif |
| } |
| |
| bool ICUIsRTL() { |
| if (g_icu_text_direction == UNKNOWN_DIRECTION) { |
| const icu::Locale& locale = icu::Locale::getDefault(); |
| g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); |
| } |
| return g_icu_text_direction == RIGHT_TO_LEFT; |
| } |
| |
| TextDirection GetTextDirectionForLocale(const char* locale_name) { |
| UErrorCode status = U_ZERO_ERROR; |
| ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); |
| DCHECK(U_SUCCESS(status)); |
| // Treat anything other than RTL as LTR. |
| return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; |
| } |
| |
| TextDirection GetFirstStrongCharacterDirection(const string16& text) { |
| const UChar* string = text.c_str(); |
| size_t length = text.length(); |
| size_t position = 0; |
| while (position < length) { |
| UChar32 character; |
| size_t next_position = position; |
| U16_NEXT(string, next_position, length, character); |
| |
| // Now that we have the character, we use ICU in order to query for the |
| // appropriate Unicode BiDi character type. |
| int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); |
| if ((property == U_RIGHT_TO_LEFT) || |
| (property == U_RIGHT_TO_LEFT_ARABIC) || |
| (property == U_RIGHT_TO_LEFT_EMBEDDING) || |
| (property == U_RIGHT_TO_LEFT_OVERRIDE)) { |
| return RIGHT_TO_LEFT; |
| } else if ((property == U_LEFT_TO_RIGHT) || |
| (property == U_LEFT_TO_RIGHT_EMBEDDING) || |
| (property == U_LEFT_TO_RIGHT_OVERRIDE)) { |
| return LEFT_TO_RIGHT; |
| } |
| |
| position = next_position; |
| } |
| |
| return LEFT_TO_RIGHT; |
| } |
| |
| #if defined(WCHAR_T_IS_UTF32) |
| TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { |
| return GetFirstStrongCharacterDirection(WideToUTF16(text)); |
| } |
| #endif |
| |
| #if defined(OS_WIN) |
| bool AdjustStringForLocaleDirection(string16* text) { |
| if (!IsRTL() || text->empty()) |
| return false; |
| |
| // Marking the string as LTR if the locale is RTL and the string does not |
| // contain strong RTL characters. Otherwise, mark the string as RTL. |
| bool has_rtl_chars = StringContainsStrongRTLChars(*text); |
| if (!has_rtl_chars) |
| WrapStringWithLTRFormatting(text); |
| else |
| WrapStringWithRTLFormatting(text); |
| |
| return true; |
| } |
| #else |
| bool AdjustStringForLocaleDirection(string16* text) { |
| // On OS X & GTK the directionality of a label is determined by the first |
| // strongly directional character. |
| // However, we want to make sure that in an LTR-language-UI all strings are |
| // left aligned and vice versa. |
| // A problem can arise if we display a string which starts with user input. |
| // User input may be of the opposite directionality to the UI. So the whole |
| // string will be displayed in the opposite directionality, e.g. if we want to |
| // display in an LTR UI [such as US English]: |
| // |
| // EMAN_NOISNETXE is now installed. |
| // |
| // Since EXTENSION_NAME begins with a strong RTL char, the label's |
| // directionality will be set to RTL and the string will be displayed visually |
| // as: |
| // |
| // .is now installed EMAN_NOISNETXE |
| // |
| // In order to solve this issue, we prepend an LRM to the string. An LRM is a |
| // strongly directional LTR char. |
| // We also append an LRM at the end, which ensures that we're in an LTR |
| // context. |
| |
| // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the |
| // box so there is no issue with displaying zero-width bidi control characters |
| // on any system. Thus no need for the !IsRTL() check here. |
| if (text->empty()) |
| return false; |
| |
| bool ui_direction_is_rtl = IsRTL(); |
| |
| bool has_rtl_chars = StringContainsStrongRTLChars(*text); |
| if (!ui_direction_is_rtl && has_rtl_chars) { |
| WrapStringWithRTLFormatting(text); |
| text->insert(0, 1, kLeftToRightMark); |
| text->push_back(kLeftToRightMark); |
| } else if (ui_direction_is_rtl && has_rtl_chars) { |
| WrapStringWithRTLFormatting(text); |
| text->insert(0, 1, kRightToLeftMark); |
| text->push_back(kRightToLeftMark); |
| } else if (ui_direction_is_rtl) { |
| WrapStringWithLTRFormatting(text); |
| text->insert(0, 1, kRightToLeftMark); |
| text->push_back(kRightToLeftMark); |
| } |
| |
| return true; |
| } |
| |
| #endif // !OS_WIN |
| |
| #if defined(WCHAR_T_IS_UTF32) |
| bool AdjustStringForLocaleDirection(std::wstring* text) { |
| string16 temp = WideToUTF16(*text); |
| if (AdjustStringForLocaleDirection(&temp)) { |
| // We should only touch the output on success. |
| *text = UTF16ToWide(temp); |
| return true; |
| } |
| return false; |
| } |
| #endif |
| |
| bool StringContainsStrongRTLChars(const string16& text) { |
| const UChar* string = text.c_str(); |
| size_t length = text.length(); |
| size_t position = 0; |
| while (position < length) { |
| UChar32 character; |
| size_t next_position = position; |
| U16_NEXT(string, next_position, length, character); |
| |
| // Now that we have the character, we use ICU in order to query for the |
| // appropriate Unicode BiDi character type. |
| int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); |
| if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) |
| return true; |
| |
| position = next_position; |
| } |
| |
| return false; |
| } |
| |
| #if defined(WCHAR_T_IS_UTF32) |
| bool StringContainsStrongRTLChars(const std::wstring& text) { |
| return StringContainsStrongRTLChars(WideToUTF16(text)); |
| } |
| #endif |
| |
| void WrapStringWithLTRFormatting(string16* text) { |
| if (text->empty()) |
| return; |
| |
| // Inserting an LRE (Left-To-Right Embedding) mark as the first character. |
| text->insert(0, 1, kLeftToRightEmbeddingMark); |
| |
| // Inserting a PDF (Pop Directional Formatting) mark as the last character. |
| text->push_back(kPopDirectionalFormatting); |
| } |
| |
| #if defined(WCHAR_T_IS_UTF32) |
| void WrapStringWithLTRFormatting(std::wstring* text) { |
| if (text->empty()) |
| return; |
| |
| // Inserting an LRE (Left-To-Right Embedding) mark as the first character. |
| text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); |
| |
| // Inserting a PDF (Pop Directional Formatting) mark as the last character. |
| text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); |
| } |
| #endif |
| |
| void WrapStringWithRTLFormatting(string16* text) { |
| if (text->empty()) |
| return; |
| |
| // Inserting an RLE (Right-To-Left Embedding) mark as the first character. |
| text->insert(0, 1, kRightToLeftEmbeddingMark); |
| |
| // Inserting a PDF (Pop Directional Formatting) mark as the last character. |
| text->push_back(kPopDirectionalFormatting); |
| } |
| |
| #if defined(WCHAR_T_IS_UTF32) |
| void WrapStringWithRTLFormatting(std::wstring* text) { |
| if (text->empty()) |
| return; |
| |
| // Inserting an RLE (Right-To-Left Embedding) mark as the first character. |
| text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); |
| |
| // Inserting a PDF (Pop Directional Formatting) mark as the last character. |
| text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); |
| } |
| #endif |
| |
| void WrapPathWithLTRFormatting(const FilePath& path, |
| string16* rtl_safe_path) { |
| // Wrap the overall path with LRE-PDF pair which essentialy marks the |
| // string as a Left-To-Right string. |
| // Inserting an LRE (Left-To-Right Embedding) mark as the first character. |
| rtl_safe_path->push_back(kLeftToRightEmbeddingMark); |
| #if defined(OS_MACOSX) |
| rtl_safe_path->append(UTF8ToUTF16(path.value())); |
| #elif defined(OS_WIN) |
| rtl_safe_path->append(path.value()); |
| #else // defined(OS_POSIX) && !defined(OS_MACOSX) |
| std::wstring wide_path = base::SysNativeMBToWide(path.value()); |
| rtl_safe_path->append(WideToUTF16(wide_path)); |
| #endif |
| // Inserting a PDF (Pop Directional Formatting) mark as the last character. |
| rtl_safe_path->push_back(kPopDirectionalFormatting); |
| } |
| |
| string16 GetDisplayStringInLTRDirectionality(const string16& text) { |
| if (!IsRTL()) |
| return text; |
| string16 text_mutable(text); |
| WrapStringWithLTRFormatting(&text_mutable); |
| return text_mutable; |
| } |
| |
| const string16 StripWrappingBidiControlCharacters(const string16& text) { |
| if (text.empty()) |
| return text; |
| size_t begin_index = 0; |
| char16 begin = text[begin_index]; |
| if (begin == kLeftToRightEmbeddingMark || |
| begin == kRightToLeftEmbeddingMark || |
| begin == kLeftToRightOverride || |
| begin == kRightToLeftOverride) |
| ++begin_index; |
| size_t end_index = text.length() - 1; |
| if (text[end_index] == kPopDirectionalFormatting) |
| --end_index; |
| return text.substr(begin_index, end_index - begin_index + 1); |
| } |
| |
| } // namespace i18n |
| } // namespace base |