blob: 10931555e56e3581a8ad3ceedf3d3a3922efa7ae [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin;
import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* Reads and writes Binary files for a UserHistoryDictionary.
*
* All the methods in this class are static.
*/
public final class UserHistoryDictIOUtils {
private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
private static final boolean DEBUG = false;
public interface OnAddWordListener {
public void setUnigram(final String word, final String shortcutTarget, final int frequency);
public void setBigram(final String word1, final String word2, final int frequency);
}
@UsedForTesting
public interface BigramDictionaryInterface {
public int getFrequency(final String word1, final String word2);
}
public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
private byte[] mBuffer;
private int mPosition;
public ByteArrayWrapper(final byte[] buffer) {
mBuffer = buffer;
mPosition = 0;
}
@Override
public int readUnsignedByte() {
return mBuffer[mPosition++] & 0xFF;
}
@Override
public int readUnsignedShort() {
final int retval = readUnsignedByte();
return (retval << 8) + readUnsignedByte();
}
@Override
public int readUnsignedInt24() {
final int retval = readUnsignedShort();
return (retval << 8) + readUnsignedByte();
}
@Override
public int readInt() {
final int retval = readUnsignedShort();
return (retval << 16) + readUnsignedShort();
}
@Override
public int position() {
return mPosition;
}
@Override
public void position(int position) {
mPosition = position;
}
@Override
public void put(final byte b) {
mBuffer[mPosition++] = b;
}
@Override
public int limit() {
return mBuffer.length - 1;
}
@Override
public int capacity() {
return mBuffer.length;
}
}
/**
* Writes dictionary to file.
*/
public static void writeDictionaryBinary(final OutputStream destination,
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
final FormatOptions formatOptions) {
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
try {
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
Log.d(TAG, "end writing");
} catch (IOException e) {
Log.e(TAG, "IO exception while writing file", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
}
}
/**
* Constructs a new FusionDictionary from BigramDictionaryInterface.
*/
@UsedForTesting
static FusionDictionary constructFusionDictionary(
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
final FusionDictionary fusionDict = new FusionDictionary(new Node(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false));
int profTotal = 0;
for (final String word1 : bigrams.keySet()) {
final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
for (final String word2 : word1Bigrams.keySet()) {
final int freq = dict.getFrequency(word1, word2);
if (freq == -1) {
// don't add this bigram.
continue;
}
if (DEBUG) {
if (word1 == null) {
Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
} else {
Log.d(TAG, "add bigram: " + word1
+ "," + word2 + "," + Integer.toString(freq));
}
profTotal++;
}
if (word1 == null) { // unigram
fusionDict.add(word2, freq, null, false /* isNotAWord */);
} else { // bigram
if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
fusionDict.add(word1, 2, null, false /* isNotAWord */);
}
fusionDict.setBigram(word1, word2, freq);
}
bigrams.updateBigram(word1, word2, (byte)freq);
}
}
if (DEBUG) {
Log.d(TAG, "add " + profTotal + "words");
}
return fusionDict;
}
/**
* Reads dictionary from file.
*/
public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
final OnAddWordListener dict) {
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
try {
BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
bigrams);
} catch (IOException e) {
Log.e(TAG, "IO exception while reading file", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
} catch (ArrayIndexOutOfBoundsException e) {
Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
}
addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
}
/**
* Adds all unigrams and bigrams in maps to OnAddWordListener.
*/
@UsedForTesting
static void addWordsFromWordMap(final Map<Integer, String> unigrams,
final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {
for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
final String word1 = entry.getValue();
final int unigramFrequency = frequencies.get(entry.getKey());
to.setUnigram(word1, null, unigramFrequency);
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
if (attrList != null) {
for (final PendingAttribute attr : attrList) {
final String word2 = unigrams.get(attr.mAddress);
if (word1 == null || word2 == null) {
Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
continue;
}
to.setBigram(word1, word2,
BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
attr.mFrequency));
}
}
}
}
}