blob: 1c384e1e172b5798abc08b365d2966a26c633807 [file] [log] [blame]
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picobase.h
*
* base functionality
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
#ifndef PICOBASE_H_
#define PICOBASE_H_
#include "picoos.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
/* maximum number of bytes of an UTF8 character */
#define PICOBASE_UTF8_MAXLEN 4
typedef picoos_uint8 picobase_utf8char[PICOBASE_UTF8_MAXLEN+1]; /* always zero terminated */
typedef picoos_uint8 picobase_utf8;
typedef picoos_uint16 picobase_utf16;
typedef picoos_uint32 picobase_utf32;
/* ***************************************************************/
/* Unicode UTF8 functions */
/* ***************************************************************/
/**
* Determines the number of UTF8 characters contained in
* the UTF8 string 'utf8str' of maximum length maxlen (in bytes)
* @param utf8str : a string encoded in UTF8
* @param maxlen : max length (in bytes) accessible in utf8str
* @return >=0 : length of the UTF8 string in number of UTF8 characters
* up to the first '\0' or maxlen
* @return <0 : not starting with a valid UTF8 character
* @remarks strict implementation, not allowing invalid utf8
*/
picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
const picoos_uint16 maxlen);
/**
* Determines the number of bytes an UTF8 character used based
* on the first byte of the UTF8 character
* @param firstchar: the first (and maybe only) byte of an UTF8 character
* @return positive value in {1,4} : number of bytes of the UTF8 character
* @return 0 :if not a valid UTF8 character start
* @remarks strict implementation, not allowing invalid utf8
*/
/* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */
#define picobase_det_utf8_length(x) ( ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) )
/**
* Converts the content of 'utf8str' to lowercase and stores it on 'lowercase'
* on the first byte of the UTF8 character
* @param utf8str : utf8 string
* @param lowercaseMaxLen : maximal number of bytes available in 'lowercase'
* @param lowercase : string converted to lowercase (output)
* @param done : flag to report success/failure of the operation (output)
* @return TRUE if successful, FALSE otherwise
*/
picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done);
/**
* Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase'
* @param utf8str : utf8 string
* @param uppercase : string converted to uppercase (output)
* @param uppercaseMaxLen : maximal number of bytes available in 'uppercase'
* @param done : flag to report success/failure of the operation (output)
* @return TRUE if successful, FALSE otherwise
*/
picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done);
/**
* Gets next UTF8 character 'utf8char' from the UTF8 string
* 'utf8s' starting at position 'pos'
* @param utf8s : UTF8 string
* @param utf8slenmax : max length accessible in utf8s
* @param pos : position from where the UTF8 character is checked and copied
* (set also as output to the position directly following the UTF8 char)
* @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
* @return TRUE if okay
* @return FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len
*/
picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmax,
picoos_uint32 *pos,
picobase_utf8char utf8char);
/**
* Same as picobase_get_next_utf8char
* without copying the char to utf8char
*/
picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmax,
picoos_uint32 *pos);
/**
* Gets previous UTF8 character 'utf8char' from the UTF8 string
* 'utf8s' starting the backward search at position 'pos-1'
* @param utf8s : UTF8 string
* @param utf8slenmin : min length accessible in utf8s
* @param pos : the search for the prev UTF8 char starts at [pos-1]
* (set also as output to the start position of the prev UTF8 character)
* @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
* @return TRUE if okay
* @return FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len
*/
picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmin,
picoos_uint32 *pos,
picobase_utf8char utf8char);
/**
* Same as picobase_get_prev_utf8char
* without copying the char to utf8char
*/
picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmin,
picoos_uint32 *pos);
/**
* returns TRUE if the input string is UTF8 and uppercase
* @param str : UTF8 string
* @param strmaxlen : max length for the input string
* @return TRUE if string is UTF8 and uppercase
* @return FALSE otherwise
*/
extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen);
/**
* returns TRUE if the input string is UTF8 and lowercase
* @param str : UTF8 string
* @param strmaxlen : max length for the input string
* @return TRUE if string is UTF8 and lowercase
* @return FALSE otherwise
*/
extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen);
#ifdef __cplusplus
}
#endif
#endif /*PICOBASE_H_*/