src/com/google/common/base/CharMatcher.java - platform/external/guava - Git at Google

 /*
  * Copyright (C) 2008 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.common.base;

 import com.google.common.annotations.GwtCompatible;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;

 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;

 /**
  * Determines a true or false value for any Java {@code char} value, just as
  * {@link Predicate} does for any {@link Object}. Also offers basic text
  * processing methods based on this function. Implementations are strongly
  * encouraged to be side-effect-free and immutable.
  *
  * <p>Throughout the documentation of this class, the phrase "matching
  * character" is used to mean "any character {@code c} for which {@code
  * this.matches(c)} returns {@code true}".
  *
  * <p><b>Note:</b> This class deals only with {@code char} values; it does not
  * understand supplementary Unicode code points in the range {@code 0x10000} to
  * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String}
  * using surrogate pairs, and a {@code CharMatcher} treats these just as two
  * separate characters.
  *
  * @author Kevin Bourrillion
  * @since 2009.09.15 <b>tentative</b>
  */
 @GwtCompatible
 public abstract class CharMatcher implements Predicate<Character> {

   // Constants

   // Excludes 2000-2000a, which is handled as a range
   private static final String BREAKING_WHITESPACE_CHARS =
       "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";

   // Excludes 2007, which is handled as a gap in a pair of ranges
   private static final String NON_BREAKING_WHITESPACE_CHARS =
       "\u00a0\u180e\u202f";

   /**
    * Determines whether a character is whitespace according to the latest
    * Unicode standard, as illustrated
    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
    * This is not the same definition used by other Java APIs. See a comparison
    * of several definitions of "whitespace" at
    * <a href="TODO">(TODO)</a>.
    *
    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this
    * constant to keep it up to date.
    */
   public static final CharMatcher WHITESPACE =
       anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
           .or(inRange('\u2000', '\u200a'));

   /**
    * Determines whether a character is a breaking whitespace (that is,
    * a whitespace which can be interpreted as a break between words
    * for formatting purposes).  See {@link #WHITESPACE} for a discussion
    * of that term.
    *
    * @since 2010.01.04 <b>tentative</b>
    */
   public static final CharMatcher BREAKING_WHITESPACE =
       anyOf(BREAKING_WHITESPACE_CHARS)
           .or(inRange('\u2000', '\u2006'))
           .or(inRange('\u2008', '\u200a'));

   /**
    * Determines whether a character is ASCII, meaning that its code point is
    * less than 128.
    */
   public static final CharMatcher ASCII = inRange('\0', '\u007f');

   /**
    * Determines whether a character is a digit according to
    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
    */
   public static final CharMatcher DIGIT;

   static {
     CharMatcher digit = inRange('0', '9');
     String zeroes =
         "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
             + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
             + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
     for (char base : zeroes.toCharArray()) {
       digit = digit.or(inRange(base, (char) (base + 9)));
     }
     DIGIT = digit;
   }

   /**
    * Determines whether a character is whitespace according to {@link
    * Character#isWhitespace(char) Java's definition}; it is usually preferable
    * to use {@link #WHITESPACE}. See a comparison of several definitions of
    * "whitespace" at <a href="http://go/white+space">go/white+space</a>.
    */
   public static final CharMatcher JAVA_WHITESPACE
       = inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
       .or(inRange('\u001c', '\u0020'))
       .or(is('\u1680'))
       .or(is('\u180e'))
       .or(inRange('\u2000', '\u2006'))
       .or(inRange('\u2008', '\u200b'))
       .or(inRange('\u2028', '\u2029'))
       .or(is('\u205f'))
       .or(is('\u3000'));

   /**
    * Determines whether a character is a digit according to {@link
    * Character#isDigit(char) Java's definition}. If you only care to match
    * ASCII digits, you can use {@code inRange('0', '9')}.
    */
   public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
     @Override public boolean matches(char c) {
       return Character.isDigit(c);
     }
   };

   /**
    * Determines whether a character is a letter according to {@link
    * Character#isLetter(char) Java's definition}. If you only care to match
    * letters of the Latin alphabet, you can use {@code
    * inRange('a', 'z').or(inRange('A', 'Z'))}.
    */
   public static final CharMatcher JAVA_LETTER = new CharMatcher() {
     @Override public boolean matches(char c) {
       return Character.isLetter(c);
     }
   };

   /**
    * Determines whether a character is a letter or digit according to {@link
    * Character#isLetterOrDigit(char) Java's definition}.
    */
   public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
     @Override public boolean matches(char c) {
       return Character.isLetterOrDigit(c);
     }
   };

   /**
    * Determines whether a character is upper case according to {@link
    * Character#isUpperCase(char) Java's definition}.
    */
   public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
     @Override public boolean matches(char c) {
       return Character.isUpperCase(c);
     }
   };

   /**
    * Determines whether a character is lower case according to {@link
    * Character#isLowerCase(char) Java's definition}.
    */
   public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
     @Override public boolean matches(char c) {
       return Character.isLowerCase(c);
     }
   };

   /**
    * Determines whether a character is an ISO control character according to
    * {@link Character#isISOControl(char)}.
    */
   public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f')
       .or(inRange('\u007f', '\u009f'));

   /**
    * Determines whether a character is invisible; that is, if its Unicode
    * category is any of SPACE_SEPARATOR, LINE_SEPARATOR,
    * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according
    * to ICU4J.
    */
   public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
       .or(inRange('\u007f', '\u00a0'))
       .or(is('\u00ad'))
       .or(inRange('\u0600', '\u0603'))
       .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
       .or(inRange('\u2000', '\u200f'))
       .or(inRange('\u2028', '\u202f'))
       .or(inRange('\u205f', '\u2064'))
       .or(inRange('\u206a', '\u206f'))
       .or(is('\u3000'))
       .or(inRange('\ud800', '\uf8ff'))
       .or(anyOf("\ufeff\ufff9\ufffa\ufffb"));

   /**
    * Determines whether a character is single-width (not double-width).  When
    * in doubt, this matcher errs on the side of returning {@code false} (that
    * is, it tends to assume a character is double-width).
    *
    * <b>Note:</b> as the reference file evolves, we will modify this constant
    * to keep it up to date.
    */
   public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
       .or(is('\u05be'))
       .or(inRange('\u05d0', '\u05ea'))
       .or(is('\u05f3'))
       .or(is('\u05f4'))
       .or(inRange('\u0600', '\u06ff'))
       .or(inRange('\u0750', '\u077f'))
       .or(inRange('\u0e00', '\u0e7f'))
       .or(inRange('\u1e00', '\u20af'))
       .or(inRange('\u2100', '\u213a'))
       .or(inRange('\ufb50', '\ufdff'))
       .or(inRange('\ufe70', '\ufeff'))
       .or(inRange('\uff61', '\uffdc'));

   /** Matches any character. */
   public static final CharMatcher ANY = new CharMatcher() {
     @Override public boolean matches(char c) {
       return true;
     }

     @Override public int indexIn(CharSequence sequence) {
       return (sequence.length() == 0) ? -1 : 0;
     }
     @Override public int indexIn(CharSequence sequence, int start) {
       int length = sequence.length();
       Preconditions.checkPositionIndex(start, length);
       return (start == length) ? -1 : start;
     }
     @Override public int lastIndexIn(CharSequence sequence) {
       return sequence.length() - 1;
     }
     @Override public boolean matchesAllOf(CharSequence sequence) {
       checkNotNull(sequence);
       return true;
     }
     @Override public boolean matchesNoneOf(CharSequence sequence) {
       return sequence.length() == 0;
     }
     @Override public String removeFrom(CharSequence sequence) {
       checkNotNull(sequence);
       return "";
     }
     @Override public String replaceFrom(
         CharSequence sequence, char replacement) {
       char[] array = new char[sequence.length()];
       Arrays.fill(array, replacement);
       return new String(array);
     }
     @Override public String replaceFrom(
         CharSequence sequence, CharSequence replacement) {
       StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
       for (int i = 0; i < sequence.length(); i++) {
         retval.append(replacement);
       }
       return retval.toString();
     }
     @Override public String collapseFrom(CharSequence sequence, char replacement) {
       return (sequence.length() == 0) ? "" : String.valueOf(replacement);
     }
     @Override public String trimFrom(CharSequence sequence) {
       checkNotNull(sequence);
       return "";
     }
     @Override public int countIn(CharSequence sequence) {
       return sequence.length();
     }
     @Override public CharMatcher and(CharMatcher other) {
       return checkNotNull(other);
     }
     @Override public CharMatcher or(CharMatcher other) {
       checkNotNull(other);
       return this;
     }
     @Override public CharMatcher negate() {
       return NONE;
     }
     @Override public CharMatcher precomputed() {
       return this;
     }
   };

   /** Matches no characters. */
   public static final CharMatcher NONE = new CharMatcher() {
     @Override public boolean matches(char c) {
       return false;
     }

     @Override public int indexIn(CharSequence sequence) {
       checkNotNull(sequence);
       return -1;
     }
     @Override public int indexIn(CharSequence sequence, int start) {
       int length = sequence.length();
       Preconditions.checkPositionIndex(start, length);
       return -1;
     }
     @Override public int lastIndexIn(CharSequence sequence) {
       checkNotNull(sequence);
       return -1;
     }
     @Override public boolean matchesAllOf(CharSequence sequence) {
       return sequence.length() == 0;
     }
     @Override public boolean matchesNoneOf(CharSequence sequence) {
       checkNotNull(sequence);
       return true;
     }
     @Override public String removeFrom(CharSequence sequence) {
       return sequence.toString();
     }
     @Override public String replaceFrom(
         CharSequence sequence, char replacement) {
       return sequence.toString();
     }
     @Override public String replaceFrom(
         CharSequence sequence, CharSequence replacement) {
       checkNotNull(replacement);
       return sequence.toString();
     }
     @Override public String collapseFrom(
         CharSequence sequence, char replacement) {
       return sequence.toString();
     }
     @Override public String trimFrom(CharSequence sequence) {
       return sequence.toString();
     }
     @Override public int countIn(CharSequence sequence) {
       checkNotNull(sequence);
       return 0;
     }
     @Override public CharMatcher and(CharMatcher other) {
       checkNotNull(other);
       return this;
     }
     @Override public CharMatcher or(CharMatcher other) {
       return checkNotNull(other);
     }
     @Override public CharMatcher negate() {
       return ANY;
     }
     @Override protected void setBits(LookupTable table) {
     }
     @Override public CharMatcher precomputed() {
       return this;
     }
   };

   // Static factories

   /**
    * Returns a {@code char} matcher that matches only one specified character.
    */
   public static CharMatcher is(final char match) {
     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return c == match;
       }

       @Override public String replaceFrom(
           CharSequence sequence, char replacement) {
         return sequence.toString().replace(match, replacement);
       }
       @Override public CharMatcher and(CharMatcher other) {
         return other.matches(match) ? this : NONE;
       }
       @Override public CharMatcher or(CharMatcher other) {
         return other.matches(match) ? other : super.or(other);
       }
       @Override public CharMatcher negate() {
         return isNot(match);
       }
       @Override protected void setBits(LookupTable table) {
         table.set(match);
       }
       @Override public CharMatcher precomputed() {
         return this;
       }
     };
   }

   /**
    * Returns a {@code char} matcher that matches any character except the one
    * specified.
    *
    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
    */
   public static CharMatcher isNot(final char match) {
     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return c != match;
       }

       @Override public CharMatcher and(CharMatcher other) {
         return other.matches(match) ? super.and(other) : other;
       }
       @Override public CharMatcher or(CharMatcher other) {
         return other.matches(match) ? ANY : this;
       }
       @Override public CharMatcher negate() {
         return is(match);
       }
     };
   }

   /**
    * Returns a {@code char} matcher that matches any character present in the
    * given character sequence.
    */
   public static CharMatcher anyOf(final CharSequence sequence) {
     switch (sequence.length()) {
       case 0:
         return NONE;
       case 1:
         return is(sequence.charAt(0));
       case 2:
         final char match1 = sequence.charAt(0);
         final char match2 = sequence.charAt(1);
         return new CharMatcher() {
           @Override public boolean matches(char c) {
             return c == match1 || c == match2;
           }
           @Override protected void setBits(LookupTable table) {
             table.set(match1);
             table.set(match2);
           }
           @Override public CharMatcher precomputed() {
             return this;
           }
         };
     }

     final char[] chars = sequence.toString().toCharArray();
     Arrays.sort(chars); // not worth collapsing duplicates

     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return Arrays.binarySearch(chars, c) >= 0;
       }
       @Override protected void setBits(LookupTable table) {
         for (char c : chars) {
           table.set(c);
         }
       }
     };
   }

   /**
    * Returns a {@code char} matcher that matches any character not present in
    * the given character sequence.
    */
   public static CharMatcher noneOf(CharSequence sequence) {
     return anyOf(sequence).negate();
   }

   /**
    * Returns a {@code char} matcher that matches any character in a given range
    * (both endpoints are inclusive). For example, to match any lowercase letter
    * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}.
    *
    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
    */
   public static CharMatcher inRange(
       final char startInclusive, final char endInclusive) {
     checkArgument(endInclusive >= startInclusive);
     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return startInclusive <= c && c <= endInclusive;
       }
       @Override protected void setBits(LookupTable table) {
         char c = startInclusive;
         while (true) {
           table.set(c);
           if (c++ == endInclusive) {
             break;
           }
         }
       }
       @Override public CharMatcher precomputed() {
         return this;
       }
     };
   }

   /**
    * Returns a matcher with identical behavior to the given {@link
    * Character}-based predicate, but which operates on primitive {@code char}
    * instances instead.
    */
   public static CharMatcher forPredicate(
       final Predicate<? super Character> predicate) {
     checkNotNull(predicate);
     if (predicate instanceof CharMatcher) {
       return (CharMatcher) predicate;
     }
     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return predicate.apply(c);
       }
       @Override public boolean apply(Character character) {
         return predicate.apply(checkNotNull(character));
       }
     };
   }

   // Abstract methods

   /** Determines a true or false value for the given character. */
   public abstract boolean matches(char c);

   // Non-static factories

   /**
    * Returns a matcher that matches any character not matched by this matcher.
    */
   public CharMatcher negate() {
     final CharMatcher original = this;
     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return !original.matches(c);
       }

       @Override public boolean matchesAllOf(CharSequence sequence) {
         return original.matchesNoneOf(sequence);
       }
       @Override public boolean matchesNoneOf(CharSequence sequence) {
         return original.matchesAllOf(sequence);
       }
       @Override public int countIn(CharSequence sequence) {
         return sequence.length() - original.countIn(sequence);
       }
       @Override public CharMatcher negate() {
         return original;
       }
     };
   }

   /**
    * Returns a matcher that matches any character matched by both this matcher
    * and {@code other}.
    */
   public CharMatcher and(CharMatcher other) {
     return new And(Arrays.asList(this, checkNotNull(other)));
   }

   private static class And extends CharMatcher {
     List<CharMatcher> components;

     And(List<CharMatcher> components) {
       this.components = components; // Skip defensive copy (private)
     }

     @Override public boolean matches(char c) {
       for (CharMatcher matcher : components) {
         if (!matcher.matches(c)) {
           return false;
         }
       }
       return true;
     }

     @Override public CharMatcher and(CharMatcher other) {
       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
       newComponents.add(checkNotNull(other));
       return new And(newComponents);
     }
   }

   /**
    * Returns a matcher that matches any character matched by either this matcher
    * or {@code other}.
    */
   public CharMatcher or(CharMatcher other) {
     return new Or(Arrays.asList(this, checkNotNull(other)));
   }

   private static class Or extends CharMatcher {
     List<CharMatcher> components;

     Or(List<CharMatcher> components) {
       this.components = components; // Skip defensive copy (private)
     }

     @Override public boolean matches(char c) {
       for (CharMatcher matcher : components) {
         if (matcher.matches(c)) {
           return true;
         }
       }
       return false;
     }

     @Override public CharMatcher or(CharMatcher other) {
       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
       newComponents.add(checkNotNull(other));
       return new Or(newComponents);
     }

     @Override protected void setBits(LookupTable table) {
       for (CharMatcher matcher : components) {
         matcher.setBits(table);
       }
     }
   }

   /**
    * Returns a {@code char} matcher functionally equivalent to this one, but
    * which may be faster to query than the original; your mileage may vary.
    * Precomputation takes time and is likely to be worthwhile only if the
    * precomputed matcher is queried many thousands of times.
    *
    * <p>This method has no effect (returns {@code this}) when called in GWT:
    * it's unclear whether a precomputed matcher is faster, but it certainly
    * consumes more memory, which doesn't seem like a worthwhile tradeoff in a
    * browser.
    */
   public CharMatcher precomputed() {
     return Platform.precomputeCharMatcher(this);
   }

   /**
    * This is the actual implementation of {@link #precomputed}, but we bounce
    * calls through a method on {@link Platform} so that we can have different
    * behavior in GWT.
    *
    * <p>The default precomputation is to cache the configuration of the original
    * matcher in an eight-kilobyte bit array. In some situations this produces a
    * matcher which is faster to query than the original.
    *
    * <p>The default implementation creates a new bit array and passes it to
    * {@link #setBits(LookupTable)}.
    */
   CharMatcher precomputedInternal() {
     final LookupTable table = new LookupTable();
     setBits(table);

     return new CharMatcher() {
       @Override public boolean matches(char c) {
         return table.get(c);
       }

       // TODO: make methods like negate() smart

       @Override public CharMatcher precomputed() {
         return this;
       }
     };
   }

   /**
    * For use by implementors; sets the bit corresponding to each character ('\0'
    * to '{@literal \}uFFFF') that matches this matcher in the given bit array,
    * leaving all other bits untouched.
    *
    * <p>The default implementation loops over every possible character value,
    * invoking {@link #matches} for each one.
    */
   protected void setBits(LookupTable table) {
     char c = Character.MIN_VALUE;
     while (true) {
       if (matches(c)) {
         table.set(c);
       }
       if (c++ == Character.MAX_VALUE) {
         break;
       }
     }
   }

   /**
    * A bit array with one bit per {@code char} value, used by {@link
    * CharMatcher#precomputed}.
    *
    * <p>TODO: possibly share a common BitArray class with BloomFilter
    * and others... a simpler java.util.BitSet.
    */
   protected static class LookupTable {
     int[] data = new int[2048];

     void set(char index) {
       data[index >> 5] |= (1 << index);
     }
     boolean get(char index) {
       return (data[index >> 5] & (1 << index)) != 0;
     }
   }

   // Text processing routines

   /**
    * Returns {@code true} if a character sequence contains only matching
    * characters.
    *
    * <p>The default implementation iterates over the sequence, invoking {@link
    * #matches} for each character, until this returns {@code false} or the end
    * is reached.
    *
    * @param sequence the character sequence to examine, possibly empty
    * @return {@code true} if this matcher matches every character in the
    *     sequence, including when the sequence is empty
    */
   public boolean matchesAllOf(CharSequence sequence) {
     for (int i = sequence.length() - 1; i >= 0; i--) {
       if (!matches(sequence.charAt(i))) {
         return false;
       }
     }
     return true;
   }

   /**
    * Returns {@code true} if a character sequence contains no matching
    * characters.
    *
    * <p>The default implementation iterates over the sequence, invoking {@link
    * #matches} for each character, until this returns {@code false} or the end is
    * reached.
    *
    * @param sequence the character sequence to examine, possibly empty
    * @return {@code true} if this matcher matches every character in the
    *     sequence, including when the sequence is empty
    */
   public boolean matchesNoneOf(CharSequence sequence) {
     return indexIn(sequence) == -1;
   }

   // TODO: perhaps add matchesAnyOf()

   /**
    * Returns the index of the first matching character in a character sequence,
    * or {@code -1} if no matching character is present.
    *
    * <p>The default implementation iterates over the sequence in forward order
    * calling {@link #matches} for each character.
    *
    * @param sequence the character sequence to examine from the beginning
    * @return an index, or {@code -1} if no character matches
    */
   public int indexIn(CharSequence sequence) {
     int length = sequence.length();
     for (int i = 0; i < length; i++) {
       if (matches(sequence.charAt(i))) {
         return i;
       }
     }
     return -1;
   }

   /**
    * Returns the index of the first matching character in a character sequence,
    * starting from a given position, or {@code -1} if no character matches after
    * that position.
    *
    * <p>The default implementation iterates over the sequence in forward order,
    * beginning at {@code start}, calling {@link #matches} for each character.
    *
    * @param sequence the character sequence to examine
    * @param start the first index to examine; must be nonnegative and no
    *     greater than {@code sequence.length()}
    * @return the index of the first matching character, guaranteed to be no less
    *     than {@code start}, or {@code -1} if no character matches
    * @throws IndexOutOfBoundsException if start is negative or greater than
    *     {@code sequence.length()}
    */
   public int indexIn(CharSequence sequence, int start) {
     int length = sequence.length();
     Preconditions.checkPositionIndex(start, length);
     for (int i = start; i < length; i++) {
       if (matches(sequence.charAt(i))) {
         return i;
       }
     }
     return -1;
   }

   /**
    * Returns the index of the last matching character in a character sequence,
    * or {@code -1} if no matching character is present.
    *
    * <p>The default implementation iterates over the sequence in reverse order
    * calling {@link #matches} for each character.
    *
    * @param sequence the character sequence to examine from the end
    * @return an index, or {@code -1} if no character matches
    */
   public int lastIndexIn(CharSequence sequence) {
     for (int i = sequence.length() - 1; i >= 0; i--) {
       if (matches(sequence.charAt(i))) {
         return i;
       }
     }
     return -1;
   }

   /**
    * Returns the number of matching characters found in a character sequence.
    */
   public int countIn(CharSequence sequence) {
     int count = 0;
     for (int i = 0; i < sequence.length(); i++) {
       if (matches(sequence.charAt(i))) {
         count++;
       }
     }
     return count;
   }

   /**
    * Returns a string containing all non-matching characters of a character
    * sequence, in order. For example: <pre>   {@code
    *
    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
    *
    * ... returns {@code "bzr"}.
    */
   public String removeFrom(CharSequence sequence) {
     String string = sequence.toString();
     int pos = indexIn(string);
     if (pos == -1) {
       return string;
     }

     char[] chars = string.toCharArray();
     int spread = 1;

     // This unusual loop comes from extensive benchmarking
     OUT:
     while (true) {
       pos++;
       while (true) {
         if (pos == chars.length) {
           break OUT;
         }
         if (matches(chars[pos])) {
           break;
         }
         chars[pos - spread] = chars[pos];
         pos++;
       }
       spread++;
     }
     return new String(chars, 0, pos - spread);
   }

   /**
    * Returns a string containing all matching characters of a character
    * sequence, in order. For example: <pre>   {@code
    *
    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
    *
    * ... returns {@code "aaa"}.
    */
   public String retainFrom(CharSequence sequence) {
     return negate().removeFrom(sequence);
   }

   /**
    * Returns a string copy of the input character sequence, with each character
    * that matches this matcher replaced by a given replacement character. For
    * example: <pre>   {@code
    *
    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
    *
    * ... returns {@code "rodor"}.
    *
    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
    * the first matching character, then iterates the remainder of the sequence
    * calling {@link #matches(char)} for each character.
    *
    * @param sequence the character sequence to replace matching characters in
    * @param replacement the character to append to the result string in place of
    *     each matching character in {@code sequence}
    * @return the new string
    */
   public String replaceFrom(CharSequence sequence, char replacement) {
     String string = sequence.toString();
     int pos = indexIn(string);
     if (pos == -1) {
       return string;
     }
     char[] chars = string.toCharArray();
     chars[pos] = replacement;
     for (int i = pos + 1; i < chars.length; i++) {
       if (matches(chars[i])) {
         chars[i] = replacement;
       }
     }
     return new String(chars);
   }

   /**
    * Returns a string copy of the input character sequence, with each character
    * that matches this matcher replaced by a given replacement sequence. For
    * example: <pre>   {@code
    *
    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
    *
    * ... returns {@code "yoohoo"}.
    *
    * <p><b>Note:</b> If the replacement is a fixed string with only one character,
    * you are better off calling {@link #replaceFrom(CharSequence, char)} directly.
    *
    * @param sequence the character sequence to replace matching characters in
    * @param replacement the characters to append to the result string in place
    *     of each matching character in {@code sequence}
    * @return the new string
    */
   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
     int replacementLen = replacement.length();
     if (replacementLen == 0) {
       return removeFrom(sequence);
     }
     if (replacementLen == 1) {
       return replaceFrom(sequence, replacement.charAt(0));
     }

     String string = sequence.toString();
     int pos = indexIn(string);
     if (pos == -1) {
       return string;
     }

     int len = string.length();
     StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16);

     int oldpos = 0;
     do {
       buf.append(string, oldpos, pos);
       buf.append(replacement);
       oldpos = pos + 1;
       pos = indexIn(string, oldpos);
     } while (pos != -1);

     buf.append(string, oldpos, len);
     return buf.toString();
   }

   /**
    * Returns a substring of the input character sequence that omits all
    * characters this matcher matches from the beginning and from the end of the
    * string. For example: <pre> {@code
    *
    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
    *
    * ... returns {@code "cat"}.
    *
    * <p>Note that<pre>   {@code
    *
    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
    *
    * ... is equivalent to {@link String#trim()}.
    */
   public String trimFrom(CharSequence sequence) {
     int len = sequence.length();
     int first;
     int last;

     for (first = 0; first < len; first++) {
       if (!matches(sequence.charAt(first))) {
         break;
       }
     }
     for (last = len - 1; last > first; last--) {
       if (!matches(sequence.charAt(last))) {
         break;
       }
     }

     return sequence.subSequence(first, last + 1).toString();
   }

   /**
    * Returns a substring of the input character sequence that omits all
    * characters this matcher matches from the beginning of the
    * string. For example: <pre> {@code
    *
    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
    *
    * ... returns {@code "catbab"}.
    */
   public String trimLeadingFrom(CharSequence sequence) {
     int len = sequence.length();
     int first;

     for (first = 0; first < len; first++) {
       if (!matches(sequence.charAt(first))) {
         break;
       }
     }

     return sequence.subSequence(first, len).toString();
   }

   /**
    * Returns a substring of the input character sequence that omits all
    * characters this matcher matches from the end of the
    * string. For example: <pre> {@code
    *
    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
    *
    * ... returns {@code "abacat"}.
    */
   public String trimTrailingFrom(CharSequence sequence) {
     int len = sequence.length();
     int last;

     for (last = len - 1; last >= 0; last--) {
       if (!matches(sequence.charAt(last))) {
         break;
       }
     }

     return sequence.subSequence(0, last + 1).toString();
   }

   /**
    * Returns a string copy of the input character sequence, with each group of
    * consecutive characters that match this matcher replaced by a single
    * replacement character. For example: <pre>   {@code
    *
    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
    *
    * ... returns {@code "b-p-r"}.
    *
    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
    * the first matching character, then iterates the remainder of the sequence
    * calling {@link #matches(char)} for each character.
    *
    * @param sequence the character sequence to replace matching groups of
    *     characters in
    * @param replacement the character to append to the result string in place of
    *     each group of matching characters in {@code sequence}
    * @return the new string
    */
   public String collapseFrom(CharSequence sequence, char replacement) {
     int first = indexIn(sequence);
     if (first == -1) {
       return sequence.toString();
     }

     // TODO: this implementation can probably be made faster.

     StringBuilder builder = new StringBuilder(sequence.length())
         .append(sequence.subSequence(0, first))
         .append(replacement);
     boolean in = true;
     for (int i = first + 1; i < sequence.length(); i++) {
       char c = sequence.charAt(i);
       if (apply(c)) {
         if (!in) {
           builder.append(replacement);
           in = true;
         }
       } else {
         builder.append(c);
         in = false;
       }
     }
     return builder.toString();
   }

   /**
    * Collapses groups of matching characters exactly as {@link #collapseFrom}
    * does, except that groups of matching characters at the start or end of the
    * sequence are removed without replacement.
    */
   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
     int first = negate().indexIn(sequence);
     if (first == -1) {
       return ""; // everything matches. nothing's left.
     }
     StringBuilder builder = new StringBuilder(sequence.length());
     boolean inMatchingGroup = false;
     for (int i = first; i < sequence.length(); i++) {
       char c = sequence.charAt(i);
       if (apply(c)) {
         inMatchingGroup = true;
       } else {
         if (inMatchingGroup) {
           builder.append(replacement);
           inMatchingGroup = false;
         }
         builder.append(c);
       }
     }
     return builder.toString();
   }

   // Predicate interface

   /**
    * Returns {@code true} if this matcher matches the given character.
    *
    * @throws NullPointerException if {@code character} is null
    */
   /*@Override*/ public boolean apply(Character character) {
     return matches(character);
   }
 }