blob: 9da7d298a45796c88befd114418e5691ddca39a9 [file] [log] [blame]
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ANDROID_PHONETIC_STRING_UTILS_H
#define _ANDROID_PHONETIC_STRING_UTILS_H
#include <string.h> // For size_t.
#include <utils/String8.h>
namespace android {
// Returns Unicode codepoint relevant to string "src", and set "next" to the
// next index. Returns negative value when input is invalid.
int GetCodePointFromUtf8(const char *src, size_t len, size_t index, int *next);
// Returns codepoint which is "phonetically sortable", whose definition
// depends on each Locale. Note that currently this function considers only
// Japanese. The variable "next_is_consumed" is set to true if "next_codepoint"
// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
// when previous "codepoint" is appropriate). If the codepoint should not be
// considered when sorting (e.g. whitespaces), -1 is returned.
int GetPhoneticallySortableCodePoint(char32_t codepoint,
char32_t next_codepoint,
bool *next_is_consumed);
// Returns codepoint which is "normalized", whose definition depends on each
// Locale. Note that currently this function normalizes only Japanese; the
// other characters are remained as is.
// The variable "next_is_consumed" is set to true if "next_codepoint"
// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
// when previous "codepoint" is appropriate, like half-width "ka").
//
// In Japanese, "normalized" means that half-width and full-width katakana is
// appropriately converted to hiragana.
int GetNormalizedCodePoint(char32_t codepoint,
char32_t next_codepoint,
bool *next_is_consumed);
// Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful.
// If input is invalid or the length of the destination is not enough,
// returns false.
bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index);
// Creates a "phonetically sortable" Utf8 string and push it into "dst".
// *dst must be freed after being used outside.
// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
//
// Note that currently this function considers only Japanese.
bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len);
// Creates a "normalized" Utf8 string and push it into "dst". *dst must be
// freed after being used outside.
// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
//
// Note that currently this function considers only Japanese.
bool GetNormalizedString(const char *src, char **dst, size_t *len);
} // namespace android
#endif