java/src/com/android/inputmethod/latin/CapsModeUtils.java - platform/packages/inputmethods/LatinIME - Git at Google

 /*
  * Copyright (C) 2013 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.android.inputmethod.latin;

 import android.text.InputType;
 import android.text.TextUtils;

 import java.util.Locale;

 public final class CapsModeUtils {
     private CapsModeUtils() {
         // This utility class is not publicly instantiable.
     }

     /**
      * Apply an auto-caps mode to a string.
      *
      * This intentionally does NOT apply manual caps mode. It only changes the capitalization if
      * the mode is one of the auto-caps modes.
      * @param s The string to capitalize.
      * @param capitalizeMode The mode in which to capitalize.
      * @param locale The locale for capitalizing.
      * @return The capitalized string.
      */
     public static String applyAutoCapsMode(final String s, final int capitalizeMode,
             final Locale locale) {
         if (WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == capitalizeMode) {
             return s.toUpperCase(locale);
         } else if (WordComposer.CAPS_MODE_AUTO_SHIFTED == capitalizeMode) {
             return StringUtils.capitalizeFirstCodePoint(s, locale);
         } else {
             return s;
         }
     }

     /**
      * Return whether a constant represents an auto-caps mode (either auto-shift or auto-shift-lock)
      * @param mode The mode to test for
      * @return true if this represents an auto-caps mode, false otherwise
      */
     public static boolean isAutoCapsMode(final int mode) {
         return WordComposer.CAPS_MODE_AUTO_SHIFTED == mode
                 || WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == mode;
     }

     /**
      * Determine what caps mode should be in effect at the current offset in
      * the text. Only the mode bits set in <var>reqModes</var> will be
      * checked. Note that the caps mode flags here are explicitly defined
      * to match those in {@link InputType}.
      *
      * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
      * issues). This will change in the future as we simplify the code for our use and fix bugs.
      *
      * @param cs The text that should be checked for caps modes.
      * @param reqModes The modes to be checked: may be any combination of
      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
      * {@link TextUtils#CAP_MODE_SENTENCES}.
      * @param locale The locale to consider for capitalization rules
      * @param hasSpaceBefore Whether we should consider there is a space inserted at the end of cs
      *
      * @return Returns the actual capitalization modes that can be in effect
      * at the current position, which is any combination of
      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
      * {@link TextUtils#CAP_MODE_SENTENCES}.
      */
     public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale,
             final boolean hasSpaceBefore) {
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
         // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
         //   of a sentence just before that.
         // We ignore opening parentheses and the like just before the cursor for purposes of
         // finding whitespace for WORDS and SENTENCES modes.
         // The end of a sentence ends with a period, question mark or exclamation mark. If it's
         // a period, it also needs not to be an abbreviation, which means it also needs to either
         // be immediately preceded by punctuation, or by a string of only letters with single
         // periods interleaved.

         // Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
         if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
             // Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
             // evaluated MODE_CHARACTERS, we can return.
             return TextUtils.CAP_MODE_CHARACTERS & reqModes;
         }

         // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
         // opening parentheses, brackets, opening quotes, everything that *opens* a span of
         // text in the linguistic sense. In RTL languages, this is still an opening sign, although
         // it may look like a right parenthesis for example. We also include double quote and
         // single quote since they aren't start punctuation in the unicode sense, but should still
         // be skipped for English. TODO: does this depend on the language?
         int i;
         if (hasSpaceBefore) {
             i = cs.length() + 1;
         } else {
             for (i = cs.length(); i > 0; i--) {
                 final char c = cs.charAt(i - 1);
                 if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
                         && Character.getType(c) != Character.START_PUNCTUATION) {
                     break;
                 }
             }
         }

         // We are now on the character that precedes any starting punctuation, so in the most
         // frequent case this will be whitespace or a letter, although it may occasionally be a
         // start of line, or some symbol.

         // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
         // we go back over any space or tab char sitting there. We find the start of a paragraph
         // if the first char that's not a space or tab is a start of line (as in \n, start of text,
         // or some other similar characters).
         int j = i;
         char prevChar = Constants.CODE_SPACE;
         if (hasSpaceBefore) --j;
         while (j > 0) {
             prevChar = cs.charAt(j - 1);
             if (!Character.isSpaceChar(prevChar) && prevChar != Constants.CODE_TAB) break;
             j--;
         }
         if (j <= 0 || Character.isWhitespace(prevChar)) {
             // There are only spacing chars between the start of the paragraph and the cursor,
             // defined as a isWhitespace() char that is neither a isSpaceChar() nor a tab. Both
             // MODE_WORDS and MODE_SENTENCES should be active.
             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
                     | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         }
         if (i == j) {
             // If we don't have whitespace before index i, it means neither MODE_WORDS
             // nor mode sentences should be on so we can return right away.
             return TextUtils.CAP_MODE_CHARACTERS & reqModes;
         }
         if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
             // Here we know we have whitespace before the cursor (if not, we returned in the above
             // if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
             // MODE_SENTENCES so we can return right away.
             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
         }
         // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
         // we know that MODE_SENTENCES is being requested.

         // Step 4 : Search for MODE_SENTENCES.
         // English is a special case in that "American typography" rules, which are the most common
         // in English, state that a sentence terminator immediately following a quotation mark
         // should be swapped with it and de-duplicated (included in the quotation mark),
         // e.g. <<Did he say, "let's go home?">>
         // No other language has such a rule as far as I know, instead putting inside the quotation
         // mark as the exact thing quoted and handling the surrounding punctuation independently,
         // e.g. <<Did he say, "let's go home"?>>
         // Hence, specifically for English, we treat this special case here.
         if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
             for (; j > 0; j--) {
                 // Here we look to go over any closing punctuation. This is because in dominant
                 // variants of English, the final period is placed within double quotes and maybe
                 // other closing punctuation signs. This is generally not true in other languages.
                 final char c = cs.charAt(j - 1);
                 if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
                         && Character.getType(c) != Character.END_PUNCTUATION) {
                     break;
                 }
             }
         }

         if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
         char c = cs.charAt(--j);

         // We found the next interesting chunk of text ; next we need to determine if it's the
         // end of a sentence. If we have a question mark or an exclamation mark, it's the end of
         // a sentence. If it's neither, the only remaining case is the period so we get the opposite
         // case out of the way.
         if (c == Constants.CODE_QUESTION_MARK || c == Constants.CODE_EXCLAMATION_MARK) {
             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         }
         if (c != Constants.CODE_PERIOD || j <= 0) {
             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
         }

         // We found out that we have a period. We need to determine if this is a full stop or
         // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
         // looks like (\w\.){2,}
         // To find out, we will have a simple state machine with the following states :
         // START, WORD, PERIOD, ABBREVIATION
         // On START : (just before the first period)
         //           letter => WORD
         //           whitespace => end with no caps (it was a stand-alone period)
         //           otherwise => end with caps (several periods/symbols in a row)
         // On WORD : (within the word just before the first period)
         //           letter => WORD
         //           period => PERIOD
         //           otherwise => end with caps (it was a word with a full stop at the end)
         // On PERIOD : (period within a potential abbreviation)
         //           letter => LETTER
         //           otherwise => end with caps (it was not an abbreviation)
         // On LETTER : (letter within a potential abbreviation)
         //           letter => LETTER
         //           period => PERIOD
         //           otherwise => end with no caps (it was an abbreviation)
         // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
         // should capitalize.

         final int START = 0;
         final int WORD = 1;
         final int PERIOD = 2;
         final int LETTER = 3;
         final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
                 | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
         int state = START;
         while (j > 0) {
             c = cs.charAt(--j);
             switch (state) {
             case START:
                 if (Character.isLetter(c)) {
                     state = WORD;
                 } else if (Character.isWhitespace(c)) {
                     return noCaps;
                 } else {
                     return caps;
                 }
                 break;
             case WORD:
                 if (Character.isLetter(c)) {
                     state = WORD;
                 } else if (c == Constants.CODE_PERIOD) {
                     state = PERIOD;
                 } else {
                     return caps;
                 }
                 break;
             case PERIOD:
                 if (Character.isLetter(c)) {
                     state = LETTER;
                 } else {
                     return caps;
                 }
                 break;
             case LETTER:
                 if (Character.isLetter(c)) {
                     state = LETTER;
                 } else if (c == Constants.CODE_PERIOD) {
                     state = PERIOD;
                 } else {
                     return noCaps;
                 }
             }
         }
         // Here we arrived at the start of the line. This should behave exactly like whitespace.
         return (START == state || LETTER == state) ? noCaps : caps;
     }
 }
	/*
	* Copyright (C) 2013 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.android.inputmethod.latin;

	import android.text.InputType;
	import android.text.TextUtils;

	import java.util.Locale;

	public final class CapsModeUtils {
	private CapsModeUtils() {
	// This utility class is not publicly instantiable.
	}

	/**
	* Apply an auto-caps mode to a string.
	*
	* This intentionally does NOT apply manual caps mode. It only changes the capitalization if
	* the mode is one of the auto-caps modes.
	* @param s The string to capitalize.
	* @param capitalizeMode The mode in which to capitalize.
	* @param locale The locale for capitalizing.
	* @return The capitalized string.
	*/
	public static String applyAutoCapsMode(final String s, final int capitalizeMode,
	final Locale locale) {
	if (WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == capitalizeMode) {
	return s.toUpperCase(locale);
	} else if (WordComposer.CAPS_MODE_AUTO_SHIFTED == capitalizeMode) {
	return StringUtils.capitalizeFirstCodePoint(s, locale);
	} else {
	return s;
	}
	}

	/**
	* Return whether a constant represents an auto-caps mode (either auto-shift or auto-shift-lock)
	* @param mode The mode to test for
	* @return true if this represents an auto-caps mode, false otherwise
	*/
	public static boolean isAutoCapsMode(final int mode) {
	return WordComposer.CAPS_MODE_AUTO_SHIFTED == mode
	\|\| WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == mode;
	}

	/**
	* Determine what caps mode should be in effect at the current offset in
	* the text. Only the mode bits set in <var>reqModes</var> will be
	* checked. Note that the caps mode flags here are explicitly defined
	* to match those in {@link InputType}.
	*
	* This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
	* issues). This will change in the future as we simplify the code for our use and fix bugs.
	*
	* @param cs The text that should be checked for caps modes.
	* @param reqModes The modes to be checked: may be any combination of
	* {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
	* {@link TextUtils#CAP_MODE_SENTENCES}.
	* @param locale The locale to consider for capitalization rules
	* @param hasSpaceBefore Whether we should consider there is a space inserted at the end of cs
	*
	* @return Returns the actual capitalization modes that can be in effect
	* at the current position, which is any combination of
	* {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
	* {@link TextUtils#CAP_MODE_SENTENCES}.
	*/
	public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale,
	final boolean hasSpaceBefore) {
	// Quick description of what we want to do:
	// CAP_MODE_CHARACTERS is always on.
	// CAP_MODE_WORDS is on if there is some whitespace before the cursor.
	// CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
	// of a sentence just before that.
	// We ignore opening parentheses and the like just before the cursor for purposes of
	// finding whitespace for WORDS and SENTENCES modes.
	// The end of a sentence ends with a period, question mark or exclamation mark. If it's
	// a period, it also needs not to be an abbreviation, which means it also needs to either
	// be immediately preceded by punctuation, or by a string of only letters with single
	// periods interleaved.

	// Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
	if ((reqModes & (TextUtils.CAP_MODE_WORDS \| TextUtils.CAP_MODE_SENTENCES)) == 0) {
	// Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
	// evaluated MODE_CHARACTERS, we can return.
	return TextUtils.CAP_MODE_CHARACTERS & reqModes;
	}

	// Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
	// opening parentheses, brackets, opening quotes, everything that opens a span of
	// text in the linguistic sense. In RTL languages, this is still an opening sign, although
	// it may look like a right parenthesis for example. We also include double quote and
	// single quote since they aren't start punctuation in the unicode sense, but should still
	// be skipped for English. TODO: does this depend on the language?
	int i;
	if (hasSpaceBefore) {
	i = cs.length() + 1;
	} else {
	for (i = cs.length(); i > 0; i--) {
	final char c = cs.charAt(i - 1);
	if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
	&& Character.getType(c) != Character.START_PUNCTUATION) {
	break;
	}
	}
	}

	// We are now on the character that precedes any starting punctuation, so in the most
	// frequent case this will be whitespace or a letter, although it may occasionally be a
	// start of line, or some symbol.

	// Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
	// we go back over any space or tab char sitting there. We find the start of a paragraph
	// if the first char that's not a space or tab is a start of line (as in \n, start of text,
	// or some other similar characters).
	int j = i;
	char prevChar = Constants.CODE_SPACE;
	if (hasSpaceBefore) --j;
	while (j > 0) {
	prevChar = cs.charAt(j - 1);
	if (!Character.isSpaceChar(prevChar) && prevChar != Constants.CODE_TAB) break;
	j--;
	}
	if (j <= 0 \|\| Character.isWhitespace(prevChar)) {
	// There are only spacing chars between the start of the paragraph and the cursor,
	// defined as a isWhitespace() char that is neither a isSpaceChar() nor a tab. Both
	// MODE_WORDS and MODE_SENTENCES should be active.
	return (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_WORDS
	\| TextUtils.CAP_MODE_SENTENCES) & reqModes;
	}
	if (i == j) {
	// If we don't have whitespace before index i, it means neither MODE_WORDS
	// nor mode sentences should be on so we can return right away.
	return TextUtils.CAP_MODE_CHARACTERS & reqModes;
	}
	if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
	// Here we know we have whitespace before the cursor (if not, we returned in the above
	// if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
	// MODE_SENTENCES so we can return right away.
	return (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_WORDS) & reqModes;
	}
	// Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
	// we know that MODE_SENTENCES is being requested.

	// Step 4 : Search for MODE_SENTENCES.
	// English is a special case in that "American typography" rules, which are the most common
	// in English, state that a sentence terminator immediately following a quotation mark
	// should be swapped with it and de-duplicated (included in the quotation mark),
	// e.g. <<Did he say, "let's go home?">>
	// No other language has such a rule as far as I know, instead putting inside the quotation
	// mark as the exact thing quoted and handling the surrounding punctuation independently,
	// e.g. <<Did he say, "let's go home"?>>
	// Hence, specifically for English, we treat this special case here.
	if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
	for (; j > 0; j--) {
	// Here we look to go over any closing punctuation. This is because in dominant
	// variants of English, the final period is placed within double quotes and maybe
	// other closing punctuation signs. This is generally not true in other languages.
	final char c = cs.charAt(j - 1);
	if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
	&& Character.getType(c) != Character.END_PUNCTUATION) {
	break;
	}
	}
	}

	if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
	char c = cs.charAt(--j);

	// We found the next interesting chunk of text ; next we need to determine if it's the
	// end of a sentence. If we have a question mark or an exclamation mark, it's the end of
	// a sentence. If it's neither, the only remaining case is the period so we get the opposite
	// case out of the way.
	if (c == Constants.CODE_QUESTION_MARK \|\| c == Constants.CODE_EXCLAMATION_MARK) {
	return (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_SENTENCES) & reqModes;
	}
	if (c != Constants.CODE_PERIOD \|\| j <= 0) {
	return (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_WORDS) & reqModes;
	}

	// We found out that we have a period. We need to determine if this is a full stop or
	// otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
	// looks like (\w\.){2,}
	// To find out, we will have a simple state machine with the following states :
	// START, WORD, PERIOD, ABBREVIATION
	// On START : (just before the first period)
	// letter => WORD
	// whitespace => end with no caps (it was a stand-alone period)
	// otherwise => end with caps (several periods/symbols in a row)
	// On WORD : (within the word just before the first period)
	// letter => WORD
	// period => PERIOD
	// otherwise => end with caps (it was a word with a full stop at the end)
	// On PERIOD : (period within a potential abbreviation)
	// letter => LETTER
	// otherwise => end with caps (it was not an abbreviation)
	// On LETTER : (letter within a potential abbreviation)
	// letter => LETTER
	// period => PERIOD
	// otherwise => end with no caps (it was an abbreviation)
	// "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
	// should capitalize.

	final int START = 0;
	final int WORD = 1;
	final int PERIOD = 2;
	final int LETTER = 3;
	final int caps = (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_WORDS
	\| TextUtils.CAP_MODE_SENTENCES) & reqModes;
	final int noCaps = (TextUtils.CAP_MODE_CHARACTERS \| TextUtils.CAP_MODE_WORDS) & reqModes;
	int state = START;
	while (j > 0) {
	c = cs.charAt(--j);
	switch (state) {
	case START:
	if (Character.isLetter(c)) {
	state = WORD;
	} else if (Character.isWhitespace(c)) {
	return noCaps;
	} else {
	return caps;
	}
	break;
	case WORD:
	if (Character.isLetter(c)) {
	state = WORD;
	} else if (c == Constants.CODE_PERIOD) {
	state = PERIOD;
	} else {
	return caps;
	}
	break;
	case PERIOD:
	if (Character.isLetter(c)) {
	state = LETTER;
	} else {
	return caps;
	}
	break;
	case LETTER:
	if (Character.isLetter(c)) {
	state = LETTER;
	} else if (c == Constants.CODE_PERIOD) {
	state = PERIOD;
	} else {
	return noCaps;
	}
	}
	}
	// Here we arrived at the start of the line. This should behave exactly like whitespace.
	return (START == state \|\| LETTER == state) ? noCaps : caps;
	}
	}