| /* |
| * Copyright (C) 2010 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.quicksearchbox.util; |
| |
| /** |
| * This class represents the matrix used in the Levenshtein distance algorithm, together |
| * with the algorithm itself which operates on the matrix. |
| * |
| * We also track of the individual operations applied to transform the source string into the |
| * target string so we can trace the path taken through the matrix afterwards, in order to |
| * perform the formatting as required. |
| */ |
| public class LevenshteinDistance { |
| public static final int EDIT_DELETE = 0; |
| public static final int EDIT_INSERT = 1; |
| public static final int EDIT_REPLACE = 2; |
| public static final int EDIT_UNCHANGED = 3; |
| |
| private final Token[] mSource; |
| private final Token[] mTarget; |
| private final int[][] mEditTypeTable; |
| private final int[][] mDistanceTable; |
| |
| public LevenshteinDistance(Token[] source, Token[] target) { |
| final int sourceSize = source.length; |
| final int targetSize = target.length; |
| final int[][] editTab = new int[sourceSize+1][targetSize+1]; |
| final int[][] distTab = new int[sourceSize+1][targetSize+1]; |
| editTab[0][0] = EDIT_UNCHANGED; |
| distTab[0][0] = 0; |
| for (int i = 1; i <= sourceSize; ++i) { |
| editTab[i][0] = EDIT_DELETE; |
| distTab[i][0] = i; |
| } |
| for (int i = 1; i <= targetSize; ++i) { |
| editTab[0][i] = EDIT_INSERT; |
| distTab[0][i] = i; |
| } |
| mEditTypeTable = editTab; |
| mDistanceTable = distTab; |
| mSource = source; |
| mTarget = target; |
| } |
| |
| /** |
| * Implementation of Levenshtein distance algorithm. |
| * |
| * @return The Levenshtein distance. |
| */ |
| public int calculate() { |
| final Token[] src = mSource; |
| final Token[] trg = mTarget; |
| final int sourceLen = src.length; |
| final int targetLen = trg.length; |
| final int[][] distTab = mDistanceTable; |
| final int[][] editTab = mEditTypeTable; |
| for (int s = 1; s <= sourceLen; ++s) { |
| Token sourceToken = src[s-1]; |
| for (int t = 1; t <= targetLen; ++t) { |
| Token targetToken = trg[t-1]; |
| int cost = sourceToken.prefixOf(targetToken) ? 0 : 1; |
| |
| int distance = distTab[s-1][t] + 1; |
| int type = EDIT_DELETE; |
| |
| int d = distTab[s][t - 1]; |
| if (d + 1 < distance ) { |
| distance = d + 1; |
| type = EDIT_INSERT; |
| } |
| |
| d = distTab[s - 1][t - 1]; |
| if (d + cost < distance) { |
| distance = d + cost; |
| type = cost == 0 ? EDIT_UNCHANGED : EDIT_REPLACE; |
| } |
| distTab[s][t] = distance; |
| editTab[s][t] = type; |
| } |
| } |
| return distTab[sourceLen][targetLen]; |
| } |
| |
| /** |
| * Gets the list of operations which were applied to each target token; {@link #calculate} must |
| * have been called on this object before using this method. |
| * @return A list of {@link EditOperation}s indicating the origin of each token in the target |
| * string. The position of the token indicates the position in the source string of the |
| * token that was unchanged/replaced, or the position in the source after which a target |
| * token was inserted. |
| */ |
| public EditOperation[] getTargetOperations() { |
| final int trgLen = mTarget.length; |
| final EditOperation[] ops = new EditOperation[trgLen]; |
| int targetPos = trgLen; |
| int sourcePos = mSource.length; |
| final int[][] editTab = mEditTypeTable; |
| while (targetPos > 0) { |
| int editType = editTab[sourcePos][targetPos]; |
| switch (editType) { |
| case LevenshteinDistance.EDIT_DELETE: |
| sourcePos--; |
| break; |
| case LevenshteinDistance.EDIT_INSERT: |
| targetPos--; |
| ops[targetPos] = new EditOperation(editType, sourcePos); |
| break; |
| case LevenshteinDistance.EDIT_UNCHANGED: |
| case LevenshteinDistance.EDIT_REPLACE: |
| targetPos--; |
| sourcePos--; |
| ops[targetPos] = new EditOperation(editType, sourcePos); |
| break; |
| } |
| } |
| |
| return ops; |
| } |
| |
| public static final class EditOperation { |
| private final int mType; |
| private final int mPosition; |
| public EditOperation(int type, int position) { |
| mType = type; |
| mPosition = position; |
| } |
| public int getType() { |
| return mType; |
| } |
| public int getPosition() { |
| return mPosition; |
| } |
| } |
| |
| public static final class Token implements CharSequence { |
| private final char[] mContainer; |
| public final int mStart; |
| public final int mEnd; |
| |
| public Token(char[] container, int start, int end) { |
| mContainer = container; |
| mStart = start; |
| mEnd = end; |
| } |
| |
| public int length() { |
| return mEnd - mStart; |
| } |
| |
| @Override |
| public String toString() { |
| // used in tests only. |
| return subSequence(0, length()); |
| } |
| |
| public boolean prefixOf(final Token that) { |
| final int len = length(); |
| if (len > that.length()) return false; |
| final int thisStart = mStart; |
| final int thatStart = that.mStart; |
| final char[] thisContainer = mContainer; |
| final char[] thatContainer = that.mContainer; |
| for (int i = 0; i < len; ++i) { |
| if (thisContainer[thisStart + i] != thatContainer[thatStart + i]) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| public char charAt(int index) { |
| return mContainer[index + mStart]; |
| } |
| |
| public String subSequence(int start, int end) { |
| return new String(mContainer, mStart + start, length()); |
| } |
| |
| } |
| } |