blob: 4d60bda53507f1e9a7fcf4788fd83dba873a9989 [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.research;
import android.os.SystemClock;
import android.text.TextUtils;
import android.util.JsonWriter;
import android.util.Log;
import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.define.ProductionFlag;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
/**
* A group of log statements related to each other.
*
* A LogUnit is collection of LogStatements, each of which is generated by at a particular point
* in the code. (There is no LogStatement class; the data is stored across the instance variables
* here.) A single LogUnit's statements can correspond to all the calls made while in the same
* composing region, or all the calls between committing the last composing region, and the first
* character of the next composing region.
*
* Individual statements in a log may be marked as potentially private. If so, then they are only
* published to a ResearchLog if the ResearchLogger determines that publishing the entire LogUnit
* will not violate the user's privacy. Checks for this may include whether other LogUnits have
* been published recently, or whether the LogUnit contains numbers, etc.
*/
public class LogUnit {
private static final String TAG = LogUnit.class.getSimpleName();
private static final boolean DEBUG = false
&& ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private static final String[] EMPTY_STRING_ARRAY = new String[0];
private final ArrayList<LogStatement> mLogStatementList;
private final ArrayList<Object[]> mValuesList;
// Assume that mTimeList is sorted in increasing order. Do not insert null values into
// mTimeList.
private final ArrayList<Long> mTimeList;
// Words that this LogUnit generates. Should be null if the data in the LogUnit does not
// generate a genuine word (i.e. separators alone do not count as a word). Should never be
// empty. Note that if the user types spaces explicitly, then normally mWords should contain
// only a single word; it will only contain space-separate multiple words if the user does not
// enter a space, and the system enters one automatically.
private String mWords;
private String[] mWordArray = EMPTY_STRING_ARRAY;
private boolean mMayContainDigit;
private boolean mIsPartOfMegaword;
private boolean mContainsCorrection;
// mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
// correction.
private int mCorrectionType;
// LogUnits start in this state. If a word is entered without being corrected, it will have
// this CorrectiontType.
public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
// The LogUnit was corrected manually by the user in an unspecified way.
public static final int CORRECTIONTYPE_CORRECTION = 1;
// The LogUnit was corrected manually by the user to a word not in the list of suggestions of
// the first word typed here. (Note: this is a heuristic value, it may be incorrect, for
// example, if the user repositions the cursor).
public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
// The LogUnit was corrected manually by the user to a word that was in the list of suggestions
// of the first word typed here. (Again, a heuristic). It is probably a typo correction.
public static final int CORRECTIONTYPE_TYPO = 3;
// TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
// state and statistics. This should include how many times it has been corrected, whether
// other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit
// previously contained a word, but was corrected to empty (because it was deleted, and there is
// no known replacement).
private SuggestedWords mSuggestedWords;
public LogUnit() {
mLogStatementList = new ArrayList<LogStatement>();
mValuesList = new ArrayList<Object[]>();
mTimeList = new ArrayList<Long>();
mIsPartOfMegaword = false;
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
mSuggestedWords = null;
}
private LogUnit(final ArrayList<LogStatement> logStatementList,
final ArrayList<Object[]> valuesList,
final ArrayList<Long> timeList,
final boolean isPartOfMegaword) {
mLogStatementList = logStatementList;
mValuesList = valuesList;
mTimeList = timeList;
mIsPartOfMegaword = isPartOfMegaword;
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
mSuggestedWords = null;
}
private static final Object[] NULL_VALUES = new Object[0];
/**
* Adds a new log statement. The time parameter in successive calls to this method must be
* monotonically increasing, or splitByTime() will not work.
*/
public void addLogStatement(final LogStatement logStatement, final long time,
Object... values) {
if (values == null) {
values = NULL_VALUES;
}
mLogStatementList.add(logStatement);
mValuesList.add(values);
mTimeList.add(time);
}
/**
* Publish the contents of this LogUnit to {@code researchLog}.
*
* For each publishable {@code LogStatement}, invoke {@link LogStatement#outputToLocked}.
*
* @param researchLog where to publish the contents of this {@code LogUnit}
* @param canIncludePrivateData whether the private data in this {@code LogUnit} should be
* included
*/
public synchronized void publishTo(final ResearchLog researchLog,
final boolean canIncludePrivateData) {
// Write out any logStatement that passes the privacy filter.
final int size = mLogStatementList.size();
if (size != 0) {
// Note that jsonWriter is only set to a non-null value if the logUnit start text is
// output and at least one logStatement is output.
JsonWriter jsonWriter = null;
for (int i = 0; i < size; i++) {
final LogStatement logStatement = mLogStatementList.get(i);
if (!canIncludePrivateData && logStatement.isPotentiallyPrivate()) {
continue;
}
if (mIsPartOfMegaword && logStatement.isPotentiallyRevealing()) {
continue;
}
// Only retrieve the jsonWriter if we need to. If we don't get this far, then
// researchLog.getInitializedJsonWriterLocked() will not ever be called, and the
// file will not have been opened for writing.
if (jsonWriter == null) {
jsonWriter = researchLog.getInitializedJsonWriterLocked();
outputLogUnitStart(jsonWriter, canIncludePrivateData);
}
logStatement.outputToLocked(jsonWriter, mTimeList.get(i), mValuesList.get(i));
}
if (jsonWriter != null) {
// We must have called logUnitStart earlier, so emit a logUnitStop.
outputLogUnitStop(jsonWriter);
}
}
}
private static final String WORD_KEY = "_wo";
private static final String CORRECTION_TYPE_KEY = "_corType";
private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart";
private static final String LOG_UNIT_END_KEY = "logUnitEnd";
final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA =
new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
false /* isPotentiallyRevealing */, WORD_KEY, CORRECTION_TYPE_KEY);
final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA =
new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
false /* isPotentiallyRevealing */);
private void outputLogUnitStart(final JsonWriter jsonWriter,
final boolean canIncludePrivateData) {
final LogStatement logStatement;
if (canIncludePrivateData) {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
} else {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
SystemClock.uptimeMillis());
}
}
final LogStatement LOGSTATEMENT_LOG_UNIT_END =
new LogStatement(LOG_UNIT_END_KEY, false /* isPotentiallyPrivate */,
false /* isPotentiallyRevealing */);
private void outputLogUnitStop(final JsonWriter jsonWriter) {
LOGSTATEMENT_LOG_UNIT_END.outputToLocked(jsonWriter, SystemClock.uptimeMillis());
}
/**
* Mark the current logUnit as containing data to generate {@code newWords}.
*
* If {@code setWord()} was previously called for this LogUnit, then the method will try to
* determine what kind of correction it is, and update its internal state of the correctionType
* accordingly.
*
* @param newWords The words this LogUnit generates. Caller should not pass null or the empty
* string.
*/
public void setWords(final String newWords) {
if (hasOneOrMoreWords()) {
// The word was already set once, and it is now being changed. See if the new word
// is close to the old word. If so, then the change is probably a typo correction.
// If not, the user may have decided to enter a different word, so flag it.
if (mSuggestedWords != null) {
if (isInSuggestedWords(newWords, mSuggestedWords)) {
mCorrectionType = CORRECTIONTYPE_TYPO;
} else {
mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
}
} else {
// No suggested words, so it's not clear whether it's a typo or different word.
// Mark it as a generic correction.
mCorrectionType = CORRECTIONTYPE_CORRECTION;
}
} else {
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
}
mWords = newWords;
// Update mWordArray
mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
: WHITESPACE_PATTERN.split(mWords);
if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
// Empty string at beginning of array. Must have been whitespace at the start of the
// word. Remove the empty string.
mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
}
}
public String getWordsAsString() {
return mWords;
}
/**
* Retuns the words generated by the data in this LogUnit.
*
* The first word may be an empty string, if the data in the LogUnit started by generating
* whitespace.
*
* @return the array of words. an empty list of there are no words associated with this LogUnit.
*/
public String[] getWordsAsStringArray() {
return mWordArray;
}
public boolean hasOneOrMoreWords() {
return mWordArray.length >= 1;
}
public int getNumWords() {
return mWordArray.length;
}
// TODO: Refactor to eliminate getter/setters
public void setMayContainDigit() {
mMayContainDigit = true;
}
// TODO: Refactor to eliminate getter/setters
public boolean mayContainDigit() {
return mMayContainDigit;
}
// TODO: Refactor to eliminate getter/setters
public void setContainsCorrection() {
mContainsCorrection = true;
}
// TODO: Refactor to eliminate getter/setters
public boolean containsCorrection() {
return mContainsCorrection;
}
// TODO: Refactor to eliminate getter/setters
public void setCorrectionType(final int correctionType) {
mCorrectionType = correctionType;
}
// TODO: Refactor to eliminate getter/setters
public int getCorrectionType() {
return mCorrectionType;
}
public boolean isEmpty() {
return mLogStatementList.isEmpty();
}
/**
* Split this logUnit, with all events before maxTime staying in the current logUnit, and all
* events after maxTime going into a new LogUnit that is returned.
*/
public LogUnit splitByTime(final long maxTime) {
// Assume that mTimeList is in sorted order.
final int length = mTimeList.size();
// TODO: find time by binary search, e.g. using Collections#binarySearch()
for (int index = 0; index < length; index++) {
if (mTimeList.get(index) > maxTime) {
final List<LogStatement> laterLogStatements =
mLogStatementList.subList(index, length);
final List<Object[]> laterValues = mValuesList.subList(index, length);
final List<Long> laterTimes = mTimeList.subList(index, length);
// Create the LogUnit containing the later logStatements and associated data.
final LogUnit newLogUnit = new LogUnit(
new ArrayList<LogStatement>(laterLogStatements),
new ArrayList<Object[]>(laterValues),
new ArrayList<Long>(laterTimes),
true /* isPartOfMegaword */);
newLogUnit.mWords = null;
newLogUnit.mMayContainDigit = mMayContainDigit;
newLogUnit.mContainsCorrection = mContainsCorrection;
// Purge the logStatements and associated data from this LogUnit.
laterLogStatements.clear();
laterValues.clear();
laterTimes.clear();
mIsPartOfMegaword = true;
return newLogUnit;
}
}
return new LogUnit();
}
public void append(final LogUnit logUnit) {
mLogStatementList.addAll(logUnit.mLogStatementList);
mValuesList.addAll(logUnit.mValuesList);
mTimeList.addAll(logUnit.mTimeList);
mWords = null;
if (logUnit.mWords != null) {
setWords(logUnit.mWords);
}
mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
mIsPartOfMegaword = false;
}
public SuggestedWords getSuggestions() {
return mSuggestedWords;
}
/**
* Initialize the suggestions.
*
* Once set to a non-null value, the suggestions may not be changed again. This is to keep
* track of the list of words that are close to the user's initial effort to type the word.
* Only words that are close to the initial effort are considered typo corrections.
*/
public void initializeSuggestions(final SuggestedWords suggestedWords) {
if (mSuggestedWords == null) {
mSuggestedWords = suggestedWords;
}
}
private static boolean isInSuggestedWords(final String queryWord,
final SuggestedWords suggestedWords) {
if (TextUtils.isEmpty(queryWord)) {
return false;
}
final int size = suggestedWords.size();
for (int i = 0; i < size; i++) {
final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i);
if (queryWord.equals(wordInfo.mWord)) {
return true;
}
}
return false;
}
/**
* Remove data associated with selecting the Research button.
*
* A LogUnit will capture all user interactions with the IME, including the "meta-interactions"
* of using the Research button to control the logging (e.g. by starting and stopping recording
* of a test case). Because meta-interactions should not be part of the normal log, calling
* this method will set a field in the LogStatements of the motion events to indiciate that
* they should be disregarded.
*
* This implementation assumes that the data recorded by the meta-interaction takes the
* form of all events following the first MotionEvent.ACTION_DOWN before the first long-press
* before the last onCodeEvent containing a code matching {@code LogStatement.VALUE_RESEARCH}.
*
* @returns true if data was removed
*/
public boolean removeResearchButtonInvocation() {
// This method is designed to be idempotent.
// First, find last invocation of "research" key
final int indexOfLastResearchKey = findLastIndexContainingKeyValue(
LogStatement.TYPE_POINTER_TRACKER_CALL_LISTENER_ON_CODE_INPUT,
LogStatement.KEY_CODE, LogStatement.VALUE_RESEARCH);
if (indexOfLastResearchKey < 0) {
// Could not find invocation of "research" key. Leave log as is.
if (DEBUG) {
Log.d(TAG, "Could not find research key");
}
return false;
}
// Look for the long press that started the invocation of the research key code input.
final int indexOfLastLongPressBeforeResearchKey =
findLastIndexBefore(LogStatement.TYPE_MAIN_KEYBOARD_VIEW_ON_LONG_PRESS,
indexOfLastResearchKey);
// Look for DOWN event preceding the long press
final int indexOfLastDownEventBeforeLongPress =
findLastIndexContainingKeyValueBefore(LogStatement.TYPE_MOTION_EVENT,
LogStatement.ACTION, LogStatement.VALUE_DOWN,
indexOfLastLongPressBeforeResearchKey);
// Flag all LatinKeyboardViewProcessMotionEvents from the DOWN event to the research key as
// logging-related
final int startingIndex = indexOfLastDownEventBeforeLongPress == -1 ? 0
: indexOfLastDownEventBeforeLongPress;
for (int index = startingIndex; index < indexOfLastResearchKey; index++) {
final LogStatement logStatement = mLogStatementList.get(index);
final String type = logStatement.getType();
final Object[] values = mValuesList.get(index);
if (type.equals(LogStatement.TYPE_MOTION_EVENT)) {
logStatement.setValue(LogStatement.KEY_IS_LOGGING_RELATED, values, true);
}
}
return true;
}
/**
* Find the index of the last LogStatement before {@code startingIndex} of type {@code type}.
*
* @param queryType a String that must be {@code String.equals()} to the LogStatement type
* @param startingIndex the index to start the backward search from. Must be less than the
* length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative,
* in which case -1 is returned.
*
* @return The index of the last LogStatement, -1 if none exists.
*/
private int findLastIndexBefore(final String queryType, final int startingIndex) {
return findLastIndexContainingKeyValueBefore(queryType, null, null, startingIndex);
}
/**
* Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
* containing the given key-value pair.
*
* @param queryType a String that must be {@code String.equals()} to the LogStatement type
* @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
* @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
* value
*
* @return The index of the last LogStatement, -1 if none exists.
*/
private int findLastIndexContainingKeyValue(final String queryType, final String queryKey,
final Object queryValue) {
return findLastIndexContainingKeyValueBefore(queryType, queryKey, queryValue,
mLogStatementList.size() - 1);
}
/**
* Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
* containing the given key-value pair.
*
* @param queryType a String that must be {@code String.equals()} to the LogStatement type
* @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
* @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
* value
* @param startingIndex the index to start the backward search from. Must be less than the
* length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative,
* in which case -1 is returned.
*
* @return The index of the last LogStatement, -1 if none exists.
*/
private int findLastIndexContainingKeyValueBefore(final String queryType, final String queryKey,
final Object queryValue, final int startingIndex) {
if (startingIndex < 0) {
return -1;
}
for (int index = startingIndex; index >= 0; index--) {
final LogStatement logStatement = mLogStatementList.get(index);
final String type = logStatement.getType();
if (type.equals(queryType) && (queryKey == null
|| logStatement.containsKeyValuePair(queryKey, queryValue,
mValuesList.get(index)))) {
return index;
}
}
return -1;
}
}