jni/include/userdict.h - platform/packages/inputmethods/PinyinIME - Git at Google

 /*
  * Copyright (C) 2009 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef PINYINIME_INCLUDE_USERDICT_H__
 #define PINYINIME_INCLUDE_USERDICT_H__

 #define ___CACHE_ENABLED___
 #define ___SYNC_ENABLED___
 #define ___PREDICT_ENABLED___

 // Debug performance for operations
 // #define ___DEBUG_PERF___

 #include <pthread.h>
 #include "atomdictbase.h"

 namespace ime_pinyin {

 class UserDict : public AtomDictBase {
  public:
   UserDict();
   ~UserDict();

   bool load_dict(const char *file_name, LemmaIdType start_id,
                  LemmaIdType end_id);

   bool close_dict();

   size_t number_of_lemmas();

   void reset_milestones(uint16 from_step, MileStoneHandle from_handle);

   MileStoneHandle extend_dict(MileStoneHandle from_handle,
                               const DictExtPara *dep, LmaPsbItem *lpi_items,
                               size_t lpi_max, size_t *lpi_num);

   size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
                   LmaPsbItem *lpi_items, size_t lpi_max);

   uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
                        uint16 str_max);

   uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
                           uint16 splids_max, bool arg_valid);

   size_t predict(const char16 last_hzs[], uint16 hzs_len,
                  NPredictItem *npre_items, size_t npre_max,
                  size_t b4_used);

   // Full spelling ids are required
   LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
                         uint16 lemma_len, uint16 count);

   LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
                            bool selected);

   LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
                            uint16 lemma_len);

   LmaScoreType get_lemma_score(LemmaIdType lemma_id);

   LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
                         uint16 lemma_len);

   bool remove_lemma(LemmaIdType lemma_id);

   size_t get_total_lemma_count();
   void set_total_lemma_count_of_others(size_t count);

   void flush_cache();

   void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
                  uint32 reclaim_ratio);

   void reclaim();

   void defragment();

 #ifdef ___SYNC_ENABLED___
   void clear_sync_lemmas(unsigned int start, unsigned int end);

   int get_sync_count();

   LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
                         uint16 lemma_len, uint16 count, uint64 lmt);
    /**
     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
     *
     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
     * @param len length of lemmas string in UTF-16LE
     * @return newly added lemma count
     */
   int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);

   /**
    * Get lemmas need sync to a UTF-16LE string of above format.
    * Note: input buffer (str) must not be too small. If str is too small to
    *       contain single one lemma, there might be a dead loop.
    *
    * @param str buffer to write lemmas
    * @param size buffer size in UTF-16LE
    * @param count output value of lemma returned
    * @return UTF-16LE string length
    */
   int get_sync_lemmas_in_utf16le_string_from_beginning(
       char16 * str, int size, int * count);

 #endif

   struct UserDictStat {
     uint32 version;
     const char * file_name;
     struct timeval load_time;
     struct timeval last_update;
     uint32 disk_size;
     uint32 lemma_count;
     uint32 lemma_size;
     uint32 delete_count;
     uint32 delete_size;
 #ifdef ___SYNC_ENABLED___
     uint32 sync_count;
 #endif
     uint32 reclaim_ratio;
     uint32 limit_lemma_count;
     uint32 limit_lemma_size;
   };

   bool state(UserDictStat * stat);

  private:
   uint32 total_other_nfreq_;
   struct timeval load_time_;
   LemmaIdType start_id_;
   uint32 version_;
   uint8 * lemmas_;

   // In-Memory-Only flag for each lemma
   static const uint8 kUserDictLemmaFlagRemove = 1;
   // Inuse lemmas' offset
   uint32 * offsets_;
   // Highest bit in offset tells whether corresponding lemma is removed
   static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
   // Maximum possible for the offset
   static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
   // Bit width for last modified time, from 1 to 16
   static const uint32 kUserDictLMTBitWidth = 16;
   // Granularity for last modified time in second
   static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
   // Maximum frequency count
   static const uint16 kUserDictMaxFrequency = 0xFFFF;

 #define COARSE_UTC(year, month, day, hour, minute, second) \
   ( \
     (year - 1970) * 365 * 24 * 60 * 60 + \
     (month - 1) * 30 * 24 * 60 * 60 + \
     (day - 1) * 24 * 60 * 60 + \
     (hour - 0) * 60 * 60 + \
     (minute - 0) * 60 + \
     (second - 0) \
   )
   static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);

   // Correspond to offsets_
   uint32 * scores_;
   // Following two fields are only valid in memory
   uint32 * ids_;
 #ifdef ___PREDICT_ENABLED___
   uint32 * predicts_;
 #endif
 #ifdef ___SYNC_ENABLED___
   uint32 * syncs_;
   size_t sync_count_size_;
 #endif
   uint32 * offsets_by_id_;

   size_t lemma_count_left_;
   size_t lemma_size_left_;

   const char * dict_file_;

   // Be sure size is 4xN
   struct UserDictInfo {
     // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
     uint32 reclaim_ratio;
     // maximum lemma count, 0 means no limitation
     uint32 limit_lemma_count;
     // Maximum lemma size, it's different from
     // whole disk file size or in-mem dict size
     // 0 means no limitation
     uint32 limit_lemma_size;
     // Total lemma count including deleted and inuse
     // Also indicate offsets_ size
     uint32 lemma_count;
     // Total size of lemmas including used and freed
     uint32 lemma_size;
     // Freed lemma count
     uint32 free_count;
     // Freed lemma size in byte
     uint32 free_size;
 #ifdef ___SYNC_ENABLED___
     uint32 sync_count;
 #endif
     int32 total_nfreq;
   } dict_info_;

   static const uint32 kUserDictVersion = 0x0ABCDEF0;

   static const uint32 kUserDictPreAlloc = 32;
   static const uint32 kUserDictAverageNchar = 8;

   enum UserDictState {
     // Keep in order
     USER_DICT_NONE = 0,
     USER_DICT_SYNC,
 #ifdef ___SYNC_ENABLED___
     USER_DICT_SYNC_DIRTY,
 #endif
     USER_DICT_SCORE_DIRTY,
     USER_DICT_OFFSET_DIRTY,
     USER_DICT_LEMMA_DIRTY,

     USER_DICT_DEFRAGMENTED,
   } state_;

   struct UserDictSearchable {
     uint16 splids_len;
     uint16 splid_start[kMaxLemmaSize];
     uint16 splid_count[kMaxLemmaSize];
     // Compact inital letters for both FuzzyCompareSpellId and cache system
     uint32 signature[kMaxLemmaSize / 4];
   };

 #ifdef ___CACHE_ENABLED___
   enum UserDictCacheType {
     USER_DICT_CACHE,
     USER_DICT_MISS_CACHE,
   };

   static const int kUserDictCacheSize = 4;
   static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;

   struct UserDictMissCache {
     uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
     uint16 head, tail;
   } miss_caches_[kMaxLemmaSize];

   struct UserDictCache {
     uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
     uint32 offsets[kUserDictCacheSize];
     uint32 lengths[kUserDictCacheSize];
     // Ring buffer
     uint16 head, tail;
   } caches_[kMaxLemmaSize];

   void cache_init();

   void cache_push(UserDictCacheType type,
                  UserDictSearchable *searchable,
                  uint32 offset, uint32 length);

   bool cache_hit(UserDictSearchable *searchable,
                  uint32 *offset, uint32 *length);

   bool load_cache(UserDictSearchable *searchable,
                   uint32 *offset, uint32 *length);

   void save_cache(UserDictSearchable *searchable,
                   uint32 offset, uint32 length);

   void reset_cache();

   bool load_miss_cache(UserDictSearchable *searchable);

   void save_miss_cache(UserDictSearchable *searchable);

   void reset_miss_cache();
 #endif

   LmaScoreType translate_score(int f);

   int extract_score_freq(int raw_score);

   uint64 extract_score_lmt(int raw_score);

   inline int build_score(uint64 lmt, int freq);

   inline int64 utf16le_atoll(uint16 *s, int len);

   inline int utf16le_lltoa(int64 v, uint16 *s, int size);

   LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
                         uint16 lemma_len, uint16 count, uint64 lmt);

   size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
                    LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);

   int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);

   int _get_lemma_score(LemmaIdType lemma_id);

   int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
                                const UserDictSearchable *searchable);

   bool is_prefix_spell_id(const uint16 * fullids,
                           uint16 fulllen, const UserDictSearchable *searchable);

   uint32 get_dict_file_size(UserDictInfo * info);

   bool reset(const char *file);

   bool validate(const char *file);

   bool load(const char *file, LemmaIdType start_id);

   bool is_valid_state();

   bool is_valid_lemma_id(LemmaIdType id);

   LemmaIdType get_max_lemma_id();

   void set_lemma_flag(uint32 offset, uint8 flag);

   char get_lemma_flag(uint32 offset);

   char get_lemma_nchar(uint32 offset);

   uint16 * get_lemma_spell_ids(uint32 offset);

   uint16 * get_lemma_word(uint32 offset);

   // Prepare searchable to fasten locate process
   void prepare_locate(UserDictSearchable *searchable,
                       const uint16 * splids, uint16 len);

   // Compare initial letters only
   int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
                                const UserDictSearchable *searchable);

   // Compare exactly two spell ids
   // First argument must be a full id spell id
   bool equal_spell_id(const uint16 * fullids,
                       uint16 fulllen, const UserDictSearchable *searchable);

   // Find first item by initial letters
   int32 locate_first_in_offsets(const UserDictSearchable *searchable);

   LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
                            uint16 lemma_len, uint16 count, uint64 lmt);

   // Check if a lemma is in dictionary
   int32 locate_in_offsets(char16 lemma_str[],
                           uint16 splid_str[], uint16 lemma_len);

   bool remove_lemma_by_offset_index(int offset_index);
 #ifdef ___PREDICT_ENABLED___
   uint32 locate_where_to_insert_in_predicts(const uint16 * words,
                                             int lemma_len);

   int32 locate_first_in_predicts(const uint16 * words, int lemma_len);

   void remove_lemma_from_predict_list(uint32 offset);
 #endif
 #ifdef ___SYNC_ENABLED___
   void queue_lemma_for_sync(LemmaIdType id);

   void remove_lemma_from_sync_list(uint32 offset);

   void write_back_sync(int fd);
 #endif
   void write_back_score(int fd);
   void write_back_offset(int fd);
   void write_back_lemma(int fd);
   void write_back_all(int fd);
   void write_back();

   struct UserDictScoreOffsetPair {
     int score;
     uint32 offset_index;
   };

   inline void swap(UserDictScoreOffsetPair * sop, int i, int j);

   void shift_down(UserDictScoreOffsetPair * sop, int i, int n);

   // On-disk format for each lemma
   // +-------------+
   // | Version (4) |
   // +-------------+
   // +-----------+-----------+--------------------+-------------------+
   // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
   // +-----------+-----------+--------------------+-------------------+
   // ...
   // +-----------------------+     +-------------+      <---Offset of offset
   // | Offset1 by_splids (4) | ... | OffsetN (4) |
   // +-----------------------+     +-------------+
 #ifdef ___PREDICT_ENABLED___
   // +----------------------+     +-------------+
   // | Offset1 by_lemma (4) | ... | OffsetN (4) |
   // +----------------------+     +-------------+
 #endif
   // +------------+     +------------+
   // | Score1 (4) | ... | ScoreN (4) |
   // +------------+     +------------+
 #ifdef ___SYNC_ENABLED___
   // +-------------+     +-------------+
   // | NewAdd1 (4) | ... | NewAddN (4) |
   // +-------------+     +-------------+
 #endif
   // +----------------+
   // | Dict Info (4x) |
   // +----------------+
 };
 }

 #endif
	/*
	* Copyright (C) 2009 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef PINYINIME_INCLUDE_USERDICT_H__
	#define PINYINIME_INCLUDE_USERDICT_H__

	#define ___CACHE_ENABLED___
	#define ___SYNC_ENABLED___
	#define ___PREDICT_ENABLED___

	// Debug performance for operations
	// #define ___DEBUG_PERF___

	#include <pthread.h>
	#include "atomdictbase.h"

	namespace ime_pinyin {

	class UserDict : public AtomDictBase {
	public:
	UserDict();
	~UserDict();

	bool load_dict(const char *file_name, LemmaIdType start_id,
	LemmaIdType end_id);

	bool close_dict();

	size_t number_of_lemmas();

	void reset_milestones(uint16 from_step, MileStoneHandle from_handle);

	MileStoneHandle extend_dict(MileStoneHandle from_handle,
	const DictExtPara dep, LmaPsbItem lpi_items,
	size_t lpi_max, size_t *lpi_num);

	size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
	LmaPsbItem *lpi_items, size_t lpi_max);

	uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
	uint16 str_max);

	uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
	uint16 splids_max, bool arg_valid);

	size_t predict(const char16 last_hzs[], uint16 hzs_len,
	NPredictItem *npre_items, size_t npre_max,
	size_t b4_used);

	// Full spelling ids are required
	LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len, uint16 count);

	LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
	bool selected);

	LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len);

	LmaScoreType get_lemma_score(LemmaIdType lemma_id);

	LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len);

	bool remove_lemma(LemmaIdType lemma_id);

	size_t get_total_lemma_count();
	void set_total_lemma_count_of_others(size_t count);

	void flush_cache();

	void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
	uint32 reclaim_ratio);

	void reclaim();

	void defragment();

	#ifdef ___SYNC_ENABLED___
	void clear_sync_lemmas(unsigned int start, unsigned int end);

	int get_sync_count();

	LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len, uint16 count, uint64 lmt);
	/**
	* Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
	*
	* @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
	* @param len length of lemmas string in UTF-16LE
	* @return newly added lemma count
	*/
	int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);

	/**
	* Get lemmas need sync to a UTF-16LE string of above format.
	* Note: input buffer (str) must not be too small. If str is too small to
	* contain single one lemma, there might be a dead loop.
	*
	* @param str buffer to write lemmas
	* @param size buffer size in UTF-16LE
	* @param count output value of lemma returned
	* @return UTF-16LE string length
	*/
	int get_sync_lemmas_in_utf16le_string_from_beginning(
	char16 * str, int size, int * count);

	#endif

	struct UserDictStat {
	uint32 version;
	const char * file_name;
	struct timeval load_time;
	struct timeval last_update;
	uint32 disk_size;
	uint32 lemma_count;
	uint32 lemma_size;
	uint32 delete_count;
	uint32 delete_size;
	#ifdef ___SYNC_ENABLED___
	uint32 sync_count;
	#endif
	uint32 reclaim_ratio;
	uint32 limit_lemma_count;
	uint32 limit_lemma_size;
	};

	bool state(UserDictStat * stat);

	private:
	uint32 total_other_nfreq_;
	struct timeval load_time_;
	LemmaIdType start_id_;
	uint32 version_;
	uint8 * lemmas_;

	// In-Memory-Only flag for each lemma
	static const uint8 kUserDictLemmaFlagRemove = 1;
	// Inuse lemmas' offset
	uint32 * offsets_;
	// Highest bit in offset tells whether corresponding lemma is removed
	static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
	// Maximum possible for the offset
	static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
	// Bit width for last modified time, from 1 to 16
	static const uint32 kUserDictLMTBitWidth = 16;
	// Granularity for last modified time in second
	static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
	// Maximum frequency count
	static const uint16 kUserDictMaxFrequency = 0xFFFF;

	#define COARSE_UTC(year, month, day, hour, minute, second) \
	( \
	(year - 1970) * 365 * 24 * 60 * 60 + \
	(month - 1) * 30 * 24 * 60 * 60 + \
	(day - 1) * 24 * 60 * 60 + \
	(hour - 0) * 60 * 60 + \
	(minute - 0) * 60 + \
	(second - 0) \
	)
	static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);

	// Correspond to offsets_
	uint32 * scores_;
	// Following two fields are only valid in memory
	uint32 * ids_;
	#ifdef ___PREDICT_ENABLED___
	uint32 * predicts_;
	#endif
	#ifdef ___SYNC_ENABLED___
	uint32 * syncs_;
	size_t sync_count_size_;
	#endif
	uint32 * offsets_by_id_;

	size_t lemma_count_left_;
	size_t lemma_size_left_;

	const char * dict_file_;

	// Be sure size is 4xN
	struct UserDictInfo {
	// When limitation reached, how much percentage will be reclaimed (1 ~ 100)
	uint32 reclaim_ratio;
	// maximum lemma count, 0 means no limitation
	uint32 limit_lemma_count;
	// Maximum lemma size, it's different from
	// whole disk file size or in-mem dict size
	// 0 means no limitation
	uint32 limit_lemma_size;
	// Total lemma count including deleted and inuse
	// Also indicate offsets_ size
	uint32 lemma_count;
	// Total size of lemmas including used and freed
	uint32 lemma_size;
	// Freed lemma count
	uint32 free_count;
	// Freed lemma size in byte
	uint32 free_size;
	#ifdef ___SYNC_ENABLED___
	uint32 sync_count;
	#endif
	int32 total_nfreq;
	} dict_info_;

	static const uint32 kUserDictVersion = 0x0ABCDEF0;

	static const uint32 kUserDictPreAlloc = 32;
	static const uint32 kUserDictAverageNchar = 8;

	enum UserDictState {
	// Keep in order
	USER_DICT_NONE = 0,
	USER_DICT_SYNC,
	#ifdef ___SYNC_ENABLED___
	USER_DICT_SYNC_DIRTY,
	#endif
	USER_DICT_SCORE_DIRTY,
	USER_DICT_OFFSET_DIRTY,
	USER_DICT_LEMMA_DIRTY,

	USER_DICT_DEFRAGMENTED,
	} state_;

	struct UserDictSearchable {
	uint16 splids_len;
	uint16 splid_start[kMaxLemmaSize];
	uint16 splid_count[kMaxLemmaSize];
	// Compact inital letters for both FuzzyCompareSpellId and cache system
	uint32 signature[kMaxLemmaSize / 4];
	};

	#ifdef ___CACHE_ENABLED___
	enum UserDictCacheType {
	USER_DICT_CACHE,
	USER_DICT_MISS_CACHE,
	};

	static const int kUserDictCacheSize = 4;
	static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;

	struct UserDictMissCache {
	uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
	uint16 head, tail;
	} miss_caches_[kMaxLemmaSize];

	struct UserDictCache {
	uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
	uint32 offsets[kUserDictCacheSize];
	uint32 lengths[kUserDictCacheSize];
	// Ring buffer
	uint16 head, tail;
	} caches_[kMaxLemmaSize];

	void cache_init();

	void cache_push(UserDictCacheType type,
	UserDictSearchable *searchable,
	uint32 offset, uint32 length);

	bool cache_hit(UserDictSearchable *searchable,
	uint32 offset, uint32 length);

	bool load_cache(UserDictSearchable *searchable,
	uint32 offset, uint32 length);

	void save_cache(UserDictSearchable *searchable,
	uint32 offset, uint32 length);

	void reset_cache();

	bool load_miss_cache(UserDictSearchable *searchable);

	void save_miss_cache(UserDictSearchable *searchable);

	void reset_miss_cache();
	#endif

	LmaScoreType translate_score(int f);

	int extract_score_freq(int raw_score);

	uint64 extract_score_lmt(int raw_score);

	inline int build_score(uint64 lmt, int freq);

	inline int64 utf16le_atoll(uint16 *s, int len);

	inline int utf16le_lltoa(int64 v, uint16 *s, int size);

	LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len, uint16 count, uint64 lmt);

	size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
	LmaPsbItem lpi_items, size_t lpi_max, bool need_extend);

	int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);

	int _get_lemma_score(LemmaIdType lemma_id);

	int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
	const UserDictSearchable *searchable);

	bool is_prefix_spell_id(const uint16 * fullids,
	uint16 fulllen, const UserDictSearchable *searchable);

	uint32 get_dict_file_size(UserDictInfo * info);

	bool reset(const char *file);

	bool validate(const char *file);

	bool load(const char *file, LemmaIdType start_id);

	bool is_valid_state();

	bool is_valid_lemma_id(LemmaIdType id);

	LemmaIdType get_max_lemma_id();

	void set_lemma_flag(uint32 offset, uint8 flag);

	char get_lemma_flag(uint32 offset);

	char get_lemma_nchar(uint32 offset);

	uint16 * get_lemma_spell_ids(uint32 offset);

	uint16 * get_lemma_word(uint32 offset);

	// Prepare searchable to fasten locate process
	void prepare_locate(UserDictSearchable *searchable,
	const uint16 * splids, uint16 len);

	// Compare initial letters only
	int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
	const UserDictSearchable *searchable);

	// Compare exactly two spell ids
	// First argument must be a full id spell id
	bool equal_spell_id(const uint16 * fullids,
	uint16 fulllen, const UserDictSearchable *searchable);

	// Find first item by initial letters
	int32 locate_first_in_offsets(const UserDictSearchable *searchable);

	LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
	uint16 lemma_len, uint16 count, uint64 lmt);

	// Check if a lemma is in dictionary
	int32 locate_in_offsets(char16 lemma_str[],
	uint16 splid_str[], uint16 lemma_len);

	bool remove_lemma_by_offset_index(int offset_index);
	#ifdef ___PREDICT_ENABLED___
	uint32 locate_where_to_insert_in_predicts(const uint16 * words,
	int lemma_len);

	int32 locate_first_in_predicts(const uint16 * words, int lemma_len);

	void remove_lemma_from_predict_list(uint32 offset);
	#endif
	#ifdef ___SYNC_ENABLED___
	void queue_lemma_for_sync(LemmaIdType id);

	void remove_lemma_from_sync_list(uint32 offset);

	void write_back_sync(int fd);
	#endif
	void write_back_score(int fd);
	void write_back_offset(int fd);
	void write_back_lemma(int fd);
	void write_back_all(int fd);
	void write_back();

	struct UserDictScoreOffsetPair {
	int score;
	uint32 offset_index;
	};

	inline void swap(UserDictScoreOffsetPair * sop, int i, int j);

	void shift_down(UserDictScoreOffsetPair * sop, int i, int n);

	// On-disk format for each lemma
	// +-------------+
	// \| Version (4) \|
	// +-------------+
	// +-----------+-----------+--------------------+-------------------+
	// \| Spare (1) \| Nchar (1) \| Splids (2 x Nchar) \| Lemma (2 x Nchar) \|
	// +-----------+-----------+--------------------+-------------------+
	// ...
	// +-----------------------+ +-------------+ <---Offset of offset
	// \| Offset1 by_splids (4) \| ... \| OffsetN (4) \|
	// +-----------------------+ +-------------+
	#ifdef ___PREDICT_ENABLED___
	// +----------------------+ +-------------+
	// \| Offset1 by_lemma (4) \| ... \| OffsetN (4) \|
	// +----------------------+ +-------------+
	#endif
	// +------------+ +------------+
	// \| Score1 (4) \| ... \| ScoreN (4) \|
	// +------------+ +------------+
	#ifdef ___SYNC_ENABLED___
	// +-------------+ +-------------+
	// \| NewAdd1 (4) \| ... \| NewAddN (4) \|
	// +-------------+ +-------------+
	#endif
	// +----------------+
	// \| Dict Info (4x) \|
	// +----------------+
	};
	}

	#endif