src/string-search.h - platform/external/v8 - Git at Google

 // Copyright 2010 the V8 project authors. All rights reserved.
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 //       notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 //       copyright notice, this list of conditions and the following
 //       disclaimer in the documentation and/or other materials provided
 //       with the distribution.
 //     * Neither the name of Google Inc. nor the names of its
 //       contributors may be used to endorse or promote products derived
 //       from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifndef V8_STRING_SEARCH_H_
 #define V8_STRING_SEARCH_H_

 namespace v8 {
 namespace internal {


 //---------------------------------------------------------------------
 // String Search object.
 //---------------------------------------------------------------------

 // Class holding constants and methods that apply to all string search variants,
 // independently of subject and pattern char size.
 class StringSearchBase {
  protected:
   // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
   // limit, we can fix the size of tables. For a needle longer than this limit,
   // search will not be optimal, since we only build tables for a suffix
   // of the string, but it is a safe approximation.
   static const int kBMMaxShift = 250;

   // Reduce alphabet to this size.
   // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
   // proportional to the input alphabet. We reduce the alphabet size by
   // equating input characters modulo a smaller alphabet size. This gives
   // a potentially less efficient searching, but is a safe approximation.
   // For needles using only characters in the same Unicode 256-code point page,
   // there is no search speed degradation.
   static const int kAsciiAlphabetSize = 128;
   static const int kUC16AlphabetSize = 256;

   // Bad-char shift table stored in the state. It's length is the alphabet size.
   // For patterns below this length, the skip length of Boyer-Moore is too short
   // to compensate for the algorithmic overhead compared to simple brute force.
   static const int kBMMinPatternLength = 7;

   static inline bool IsAsciiString(Vector<const char>) {
     return true;
   }

   static inline bool IsAsciiString(Vector<const uc16> string) {
     return String::IsAscii(string.start(), string.length());
   }

   // The following tables are shared by all searches.
   // TODO(lrn): Introduce a way for a pattern to keep its tables
   // between searches (e.g., for an Atom RegExp).

   // Store for the BoyerMoore(Horspool) bad char shift table.
   static int kBadCharShiftTable[kUC16AlphabetSize];
   // Store for the BoyerMoore good suffix shift table.
   static int kGoodSuffixShiftTable[kBMMaxShift + 1];
   // Table used temporarily while building the BoyerMoore good suffix
   // shift table.
   static int kSuffixTable[kBMMaxShift + 1];
 };


 template <typename PatternChar, typename SubjectChar>
 class StringSearch : private StringSearchBase {
  public:
   explicit StringSearch(Vector<const PatternChar> pattern)
       : pattern_(pattern),
         start_(Max(0, pattern.length() - kBMMaxShift)) {
     if (sizeof(PatternChar) > sizeof(SubjectChar)) {
       if (!IsAsciiString(pattern_)) {
         strategy_ = &FailSearch;
         return;
       }
     }
     int pattern_length = pattern_.length();
     if (pattern_length < kBMMinPatternLength) {
       if (pattern_length == 1) {
         strategy_ = &SingleCharSearch;
         return;
       }
       strategy_ = &LinearSearch;
       return;
     }
     strategy_ = &InitialSearch;
   }

   int Search(Vector<const SubjectChar> subject, int index) {
     return strategy_(this, subject, index);
   }

   static inline int AlphabetSize() {
     if (sizeof(PatternChar) == 1) {
       // ASCII needle.
       return kAsciiAlphabetSize;
     } else {
       ASSERT(sizeof(PatternChar) == 2);
       // UC16 needle.
       return kUC16AlphabetSize;
     }
   }

  private:
   typedef int (*SearchFunction)(  // NOLINT - it's not a cast!
       StringSearch<PatternChar, SubjectChar>*,
       Vector<const SubjectChar>,
       int);

   static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
                         Vector<const SubjectChar>,
                         int) {
     return -1;
   }

   static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
                               Vector<const SubjectChar> subject,
                               int start_index);

   static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
                           Vector<const SubjectChar> subject,
                           int start_index);

   static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
                            Vector<const SubjectChar> subject,
                            int start_index);

   static int BoyerMooreHorspoolSearch(
       StringSearch<PatternChar, SubjectChar>* search,
       Vector<const SubjectChar> subject,
       int start_index);

   static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
                               Vector<const SubjectChar> subject,
                               int start_index);

   void PopulateBoyerMooreHorspoolTable();

   void PopulateBoyerMooreTable();

   static inline int CharOccurrence(int* bad_char_occurrence,
                                    SubjectChar char_code) {
     if (sizeof(SubjectChar) == 1) {
       return bad_char_occurrence[static_cast<int>(char_code)];
     }
     if (sizeof(PatternChar) == 1) {
       if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) {
         return -1;
       }
       return bad_char_occurrence[static_cast<unsigned int>(char_code)];
     }
     // Both pattern and subject are UC16. Reduce character to equivalence class.
     int equiv_class = char_code % kUC16AlphabetSize;
     return bad_char_occurrence[equiv_class];
   }

   // Return a table covering the last kBMMaxShift+1 positions of
   // pattern.
   int* bad_char_table() {
     return kBadCharShiftTable;
   }

   int* good_suffix_shift_table() {
     // Return biased pointer that maps the range  [start_..pattern_.length()
     // to the kGoodSuffixShiftTable array.
     return kGoodSuffixShiftTable - start_;
   }

   int* suffix_table() {
     // Return biased pointer that maps the range  [start_..pattern_.length()
     // to the kSuffixTable array.
     return kSuffixTable - start_;
   }

   // The pattern to search for.
   Vector<const PatternChar> pattern_;
   // Pointer to implementation of the search.
   SearchFunction strategy_;
   // Cache value of Max(0, pattern_length() - kBMMaxShift)
   int start_;
 };


 //---------------------------------------------------------------------
 // Single Character Pattern Search Strategy
 //---------------------------------------------------------------------

 template <typename PatternChar, typename SubjectChar>
 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
     StringSearch<PatternChar, SubjectChar>* search,
     Vector<const SubjectChar> subject,
     int index) {
   ASSERT_EQ(1, search->pattern_.length());
   PatternChar pattern_first_char = search->pattern_[0];
   int i = index;
   if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
     const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
         memchr(subject.start() + i,
                pattern_first_char,
                subject.length() - i));
     if (pos == NULL) return -1;
     return static_cast<int>(pos - subject.start());
   } else {
     if (sizeof(PatternChar) > sizeof(SubjectChar)) {
       if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) {
         return -1;
       }
     }
     SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
     int n = subject.length();
     while (i < n) {
       if (subject[i++] == search_char) return i - 1;
     }
     return -1;
   }
 }

 //---------------------------------------------------------------------
 // Linear Search Strategy
 //---------------------------------------------------------------------


 template <typename PatternChar, typename SubjectChar>
 static inline bool CharCompare(const PatternChar* pattern,
                                const SubjectChar* subject,
                                int length) {
   ASSERT(length > 0);
   int pos = 0;
   do {
     if (pattern[pos] != subject[pos]) {
       return false;
     }
     pos++;
   } while (pos < length);
   return true;
 }


 // Simple linear search for short patterns. Never bails out.
 template <typename PatternChar, typename SubjectChar>
 int StringSearch<PatternChar, SubjectChar>::LinearSearch(
     StringSearch<PatternChar, SubjectChar>* search,
     Vector<const SubjectChar> subject,
     int index) {
   Vector<const PatternChar> pattern = search->pattern_;
   ASSERT(pattern.length() > 1);
   int pattern_length = pattern.length();
   PatternChar pattern_first_char = pattern[0];
   int i = index;
   int n = subject.length() - pattern_length;
   while (i <= n) {
     if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
       const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
           memchr(subject.start() + i,
                  pattern_first_char,
                  n - i + 1));
       if (pos == NULL) return -1;
       i = static_cast<int>(pos - subject.start()) + 1;
     } else {
       if (subject[i++] != pattern_first_char) continue;
     }
     // Loop extracted to separate function to allow using return to do
     // a deeper break.
     if (CharCompare(pattern.start() + 1,
                     subject.start() + i,
                     pattern_length - 1)) {
       return i - 1;
     }
   }
   return -1;
 }

 //---------------------------------------------------------------------
 // Boyer-Moore string search
 //---------------------------------------------------------------------

 template <typename PatternChar, typename SubjectChar>
 int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
     StringSearch<PatternChar, SubjectChar>* search,
     Vector<const SubjectChar> subject,
     int start_index) {
   Vector<const PatternChar> pattern = search->pattern_;
   int subject_length = subject.length();
   int pattern_length = pattern.length();
   // Only preprocess at most kBMMaxShift last characters of pattern.
   int start = search->start_;

   int* bad_char_occurence = search->bad_char_table();
   int* good_suffix_shift = search->good_suffix_shift_table();

   PatternChar last_char = pattern[pattern_length - 1];
   int index = start_index;
   // Continue search from i.
   while (index <= subject_length - pattern_length) {
     int j = pattern_length - 1;
     int c;
     while (last_char != (c = subject[index + j])) {
       int shift =
           j - CharOccurrence(bad_char_occurence, c);
       index += shift;
       if (index > subject_length - pattern_length) {
         return -1;
       }
     }
     while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
     if (j < 0) {
       return index;
     } else if (j < start) {
       // we have matched more than our tables allow us to be smart about.
       // Fall back on BMH shift.
       index += pattern_length - 1
           - CharOccurrence(bad_char_occurence,
                            static_cast<SubjectChar>(last_char));
     } else {
       int gs_shift = good_suffix_shift[j + 1];
       int bc_occ =
           CharOccurrence(bad_char_occurence, c);
       int shift = j - bc_occ;
       if (gs_shift > shift) {
         shift = gs_shift;
       }
       index += shift;
     }
   }

   return -1;
 }


 template <typename PatternChar, typename SubjectChar>
 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
   int pattern_length = pattern_.length();
   const PatternChar* pattern = pattern_.start();
   // Only look at the last kBMMaxShift characters of pattern (from start_
   // to pattern_length).
   int start = start_;
   int length = pattern_length - start;

   // Biased tables so that we can use pattern indices as table indices,
   // even if we only cover the part of the pattern from offset start.
   int* shift_table = good_suffix_shift_table();
   int* suffix_table = this->suffix_table();

   // Initialize table.
   for (int i = start; i < pattern_length; i++) {
     shift_table[i] = length;
   }
   shift_table[pattern_length] = 1;
   suffix_table[pattern_length] = pattern_length + 1;

   // Find suffixes.
   PatternChar last_char = pattern[pattern_length - 1];
   int suffix = pattern_length + 1;
   {
     int i = pattern_length;
     while (i > start) {
       PatternChar c = pattern[i - 1];
       while (suffix <= pattern_length && c != pattern[suffix - 1]) {
         if (shift_table[suffix] == length) {
           shift_table[suffix] = suffix - i;
         }
         suffix = suffix_table[suffix];
       }
       suffix_table[--i] = --suffix;
       if (suffix == pattern_length) {
         // No suffix to extend, so we check against last_char only.
         while ((i > start) && (pattern[i - 1] != last_char)) {
           if (shift_table[pattern_length] == length) {
             shift_table[pattern_length] = pattern_length - i;
           }
           suffix_table[--i] = pattern_length;
         }
         if (i > start) {
           suffix_table[--i] = --suffix;
         }
       }
     }
   }
   // Build shift table using suffixes.
   if (suffix < pattern_length) {
     for (int i = start; i <= pattern_length; i++) {
       if (shift_table[i] == length) {
         shift_table[i] = suffix - start;
       }
       if (i == suffix) {
         suffix = suffix_table[suffix];
       }
     }
   }
 }

 //---------------------------------------------------------------------
 // Boyer-Moore-Horspool string search.
 //---------------------------------------------------------------------

 template <typename PatternChar, typename SubjectChar>
 int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
     StringSearch<PatternChar, SubjectChar>* search,
     Vector<const SubjectChar> subject,
     int start_index) {
   Vector<const PatternChar> pattern = search->pattern_;
   int subject_length = subject.length();
   int pattern_length = pattern.length();
   int* char_occurrences = search->bad_char_table();
   int badness = -pattern_length;

   // How bad we are doing without a good-suffix table.
   PatternChar last_char = pattern[pattern_length - 1];
   int last_char_shift = pattern_length - 1 -
       CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
   // Perform search
   int index = start_index;  // No matches found prior to this index.
   while (index <= subject_length - pattern_length) {
     int j = pattern_length - 1;
     int subject_char;
     while (last_char != (subject_char = subject[index + j])) {
       int bc_occ = CharOccurrence(char_occurrences, subject_char);
       int shift = j - bc_occ;
       index += shift;
       badness += 1 - shift;  // at most zero, so badness cannot increase.
       if (index > subject_length - pattern_length) {
         return -1;
       }
     }
     j--;
     while (j >= 0 && pattern[j] == (subject[index + j])) j--;
     if (j < 0) {
       return index;
     } else {
       index += last_char_shift;
       // Badness increases by the number of characters we have
       // checked, and decreases by the number of characters we
       // can skip by shifting. It's a measure of how we are doing
       // compared to reading each character exactly once.
       badness += (pattern_length - j) - last_char_shift;
       if (badness > 0) {
         search->PopulateBoyerMooreTable();
         search->strategy_ = &BoyerMooreSearch;
         return BoyerMooreSearch(search, subject, index);
       }
     }
   }
   return -1;
 }


 template <typename PatternChar, typename SubjectChar>
 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
   int pattern_length = pattern_.length();

   int* bad_char_occurrence = bad_char_table();

   // Only preprocess at most kBMMaxShift last characters of pattern.
   int start = start_;
   // Run forwards to populate bad_char_table, so that *last* instance
   // of character equivalence class is the one registered.
   // Notice: Doesn't include the last character.
   int table_size = AlphabetSize();
   if (start == 0) {  // All patterns less than kBMMaxShift in length.
     memset(bad_char_occurrence,
            -1,
            table_size * sizeof(*bad_char_occurrence));
   } else {
     for (int i = 0; i < table_size; i++) {
       bad_char_occurrence[i] = start - 1;
     }
   }
   for (int i = start; i < pattern_length - 1; i++) {
     PatternChar c = pattern_[i];
     int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
     bad_char_occurrence[bucket] = i;
   }
 }

 //---------------------------------------------------------------------
 // Linear string search with bailout to BMH.
 //---------------------------------------------------------------------

 // Simple linear search for short patterns, which bails out if the string
 // isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
 template <typename PatternChar, typename SubjectChar>
 int StringSearch<PatternChar, SubjectChar>::InitialSearch(
     StringSearch<PatternChar, SubjectChar>* search,
     Vector<const SubjectChar> subject,
     int index) {
   Vector<const PatternChar> pattern = search->pattern_;
   int pattern_length = pattern.length();
   // Badness is a count of how much work we have done.  When we have
   // done enough work we decide it's probably worth switching to a better
   // algorithm.
   int badness = -10 - (pattern_length << 2);

   // We know our pattern is at least 2 characters, we cache the first so
   // the common case of the first character not matching is faster.
   PatternChar pattern_first_char = pattern[0];
   for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
     badness++;
     if (badness <= 0) {
       if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
         const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
             memchr(subject.start() + i,
                    pattern_first_char,
                    n - i + 1));
         if (pos == NULL) {
           return -1;
         }
         i = static_cast<int>(pos - subject.start());
       } else {
         if (subject[i] != pattern_first_char) continue;
       }
       int j = 1;
       do {
         if (pattern[j] != subject[i + j]) {
           break;
         }
         j++;
       } while (j < pattern_length);
       if (j == pattern_length) {
         return i;
       }
       badness += j;
     } else {
       search->PopulateBoyerMooreHorspoolTable();
       search->strategy_ = &BoyerMooreHorspoolSearch;
       return BoyerMooreHorspoolSearch(search, subject, i);
     }
   }
   return -1;
 }


 // Perform a a single stand-alone search.
 // If searching multiple times for the same pattern, a search
 // object should be constructed once and the Search function then called
 // for each search.
 template <typename SubjectChar, typename PatternChar>
 static int SearchString(Vector<const SubjectChar> subject,
                         Vector<const PatternChar> pattern,
                         int start_index) {
   StringSearch<PatternChar, SubjectChar> search(pattern);
   return search.Search(subject, start_index);
 }

 }}  // namespace v8::internal

 #endif  // V8_STRING_SEARCH_H_
	// Copyright 2010 the V8 project authors. All rights reserved.
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following
	// disclaimer in the documentation and/or other materials provided
	// with the distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived
	// from this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#ifndef V8_STRING_SEARCH_H_
	#define V8_STRING_SEARCH_H_

	namespace v8 {
	namespace internal {


	//---------------------------------------------------------------------
	// String Search object.
	//---------------------------------------------------------------------

	// Class holding constants and methods that apply to all string search variants,
	// independently of subject and pattern char size.
	class StringSearchBase {
	protected:
	// Cap on the maximal shift in the Boyer-Moore implementation. By setting a
	// limit, we can fix the size of tables. For a needle longer than this limit,
	// search will not be optimal, since we only build tables for a suffix
	// of the string, but it is a safe approximation.
	static const int kBMMaxShift = 250;

	// Reduce alphabet to this size.
	// One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
	// proportional to the input alphabet. We reduce the alphabet size by
	// equating input characters modulo a smaller alphabet size. This gives
	// a potentially less efficient searching, but is a safe approximation.
	// For needles using only characters in the same Unicode 256-code point page,
	// there is no search speed degradation.
	static const int kAsciiAlphabetSize = 128;
	static const int kUC16AlphabetSize = 256;

	// Bad-char shift table stored in the state. It's length is the alphabet size.
	// For patterns below this length, the skip length of Boyer-Moore is too short
	// to compensate for the algorithmic overhead compared to simple brute force.
	static const int kBMMinPatternLength = 7;

	static inline bool IsAsciiString(Vector<const char>) {
	return true;
	}

	static inline bool IsAsciiString(Vector<const uc16> string) {
	return String::IsAscii(string.start(), string.length());
	}

	// The following tables are shared by all searches.
	// TODO(lrn): Introduce a way for a pattern to keep its tables
	// between searches (e.g., for an Atom RegExp).

	// Store for the BoyerMoore(Horspool) bad char shift table.
	static int kBadCharShiftTable[kUC16AlphabetSize];
	// Store for the BoyerMoore good suffix shift table.
	static int kGoodSuffixShiftTable[kBMMaxShift + 1];
	// Table used temporarily while building the BoyerMoore good suffix
	// shift table.
	static int kSuffixTable[kBMMaxShift + 1];
	};


	template <typename PatternChar, typename SubjectChar>
	class StringSearch : private StringSearchBase {
	public:
	explicit StringSearch(Vector<const PatternChar> pattern)
	: pattern_(pattern),
	start_(Max(0, pattern.length() - kBMMaxShift)) {
	if (sizeof(PatternChar) > sizeof(SubjectChar)) {
	if (!IsAsciiString(pattern_)) {
	strategy_ = &FailSearch;
	return;
	}
	}
	int pattern_length = pattern_.length();
	if (pattern_length < kBMMinPatternLength) {
	if (pattern_length == 1) {
	strategy_ = &SingleCharSearch;
	return;
	}
	strategy_ = &LinearSearch;
	return;
	}
	strategy_ = &InitialSearch;
	}

	int Search(Vector<const SubjectChar> subject, int index) {
	return strategy_(this, subject, index);
	}

	static inline int AlphabetSize() {
	if (sizeof(PatternChar) == 1) {
	// ASCII needle.
	return kAsciiAlphabetSize;
	} else {
	ASSERT(sizeof(PatternChar) == 2);
	// UC16 needle.
	return kUC16AlphabetSize;
	}
	}

	private:
	typedef int (*SearchFunction)( // NOLINT - it's not a cast!
	StringSearch<PatternChar, SubjectChar>*,
	Vector<const SubjectChar>,
	int);

	static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
	Vector<const SubjectChar>,
	int) {
	return -1;
	}

	static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index);

	static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index);

	static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index);

	static int BoyerMooreHorspoolSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index);

	static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index);

	void PopulateBoyerMooreHorspoolTable();

	void PopulateBoyerMooreTable();

	static inline int CharOccurrence(int* bad_char_occurrence,
	SubjectChar char_code) {
	if (sizeof(SubjectChar) == 1) {
	return bad_char_occurrence[static_cast<int>(char_code)];
	}
	if (sizeof(PatternChar) == 1) {
	if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) {
	return -1;
	}
	return bad_char_occurrence[static_cast<unsigned int>(char_code)];
	}
	// Both pattern and subject are UC16. Reduce character to equivalence class.
	int equiv_class = char_code % kUC16AlphabetSize;
	return bad_char_occurrence[equiv_class];
	}

	// Return a table covering the last kBMMaxShift+1 positions of
	// pattern.
	int* bad_char_table() {
	return kBadCharShiftTable;
	}

	int* good_suffix_shift_table() {
	// Return biased pointer that maps the range [start_..pattern_.length()
	// to the kGoodSuffixShiftTable array.
	return kGoodSuffixShiftTable - start_;
	}

	int* suffix_table() {
	// Return biased pointer that maps the range [start_..pattern_.length()
	// to the kSuffixTable array.
	return kSuffixTable - start_;
	}

	// The pattern to search for.
	Vector<const PatternChar> pattern_;
	// Pointer to implementation of the search.
	SearchFunction strategy_;
	// Cache value of Max(0, pattern_length() - kBMMaxShift)
	int start_;
	};


	//---------------------------------------------------------------------
	// Single Character Pattern Search Strategy
	//---------------------------------------------------------------------

	template <typename PatternChar, typename SubjectChar>
	int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int index) {
	ASSERT_EQ(1, search->pattern_.length());
	PatternChar pattern_first_char = search->pattern_[0];
	int i = index;
	if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
	const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
	memchr(subject.start() + i,
	pattern_first_char,
	subject.length() - i));
	if (pos == NULL) return -1;
	return static_cast<int>(pos - subject.start());
	} else {
	if (sizeof(PatternChar) > sizeof(SubjectChar)) {
	if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) {
	return -1;
	}
	}
	SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
	int n = subject.length();
	while (i < n) {
	if (subject[i++] == search_char) return i - 1;
	}
	return -1;
	}
	}

	//---------------------------------------------------------------------
	// Linear Search Strategy
	//---------------------------------------------------------------------


	template <typename PatternChar, typename SubjectChar>
	static inline bool CharCompare(const PatternChar* pattern,
	const SubjectChar* subject,
	int length) {
	ASSERT(length > 0);
	int pos = 0;
	do {
	if (pattern[pos] != subject[pos]) {
	return false;
	}
	pos++;
	} while (pos < length);
	return true;
	}


	// Simple linear search for short patterns. Never bails out.
	template <typename PatternChar, typename SubjectChar>
	int StringSearch<PatternChar, SubjectChar>::LinearSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int index) {
	Vector<const PatternChar> pattern = search->pattern_;
	ASSERT(pattern.length() > 1);
	int pattern_length = pattern.length();
	PatternChar pattern_first_char = pattern[0];
	int i = index;
	int n = subject.length() - pattern_length;
	while (i <= n) {
	if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
	const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
	memchr(subject.start() + i,
	pattern_first_char,
	n - i + 1));
	if (pos == NULL) return -1;
	i = static_cast<int>(pos - subject.start()) + 1;
	} else {
	if (subject[i++] != pattern_first_char) continue;
	}
	// Loop extracted to separate function to allow using return to do
	// a deeper break.
	if (CharCompare(pattern.start() + 1,
	subject.start() + i,
	pattern_length - 1)) {
	return i - 1;
	}
	}
	return -1;
	}

	//---------------------------------------------------------------------
	// Boyer-Moore string search
	//---------------------------------------------------------------------

	template <typename PatternChar, typename SubjectChar>
	int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index) {
	Vector<const PatternChar> pattern = search->pattern_;
	int subject_length = subject.length();
	int pattern_length = pattern.length();
	// Only preprocess at most kBMMaxShift last characters of pattern.
	int start = search->start_;

	int* bad_char_occurence = search->bad_char_table();
	int* good_suffix_shift = search->good_suffix_shift_table();

	PatternChar last_char = pattern[pattern_length - 1];
	int index = start_index;
	// Continue search from i.
	while (index <= subject_length - pattern_length) {
	int j = pattern_length - 1;
	int c;
	while (last_char != (c = subject[index + j])) {
	int shift =
	j - CharOccurrence(bad_char_occurence, c);
	index += shift;
	if (index > subject_length - pattern_length) {
	return -1;
	}
	}
	while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
	if (j < 0) {
	return index;
	} else if (j < start) {
	// we have matched more than our tables allow us to be smart about.
	// Fall back on BMH shift.
	index += pattern_length - 1
	- CharOccurrence(bad_char_occurence,
	static_cast<SubjectChar>(last_char));
	} else {
	int gs_shift = good_suffix_shift[j + 1];
	int bc_occ =
	CharOccurrence(bad_char_occurence, c);
	int shift = j - bc_occ;
	if (gs_shift > shift) {
	shift = gs_shift;
	}
	index += shift;
	}
	}

	return -1;
	}


	template <typename PatternChar, typename SubjectChar>
	void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
	int pattern_length = pattern_.length();
	const PatternChar* pattern = pattern_.start();
	// Only look at the last kBMMaxShift characters of pattern (from start_
	// to pattern_length).
	int start = start_;
	int length = pattern_length - start;

	// Biased tables so that we can use pattern indices as table indices,
	// even if we only cover the part of the pattern from offset start.
	int* shift_table = good_suffix_shift_table();
	int* suffix_table = this->suffix_table();

	// Initialize table.
	for (int i = start; i < pattern_length; i++) {
	shift_table[i] = length;
	}
	shift_table[pattern_length] = 1;
	suffix_table[pattern_length] = pattern_length + 1;

	// Find suffixes.
	PatternChar last_char = pattern[pattern_length - 1];
	int suffix = pattern_length + 1;
	{
	int i = pattern_length;
	while (i > start) {
	PatternChar c = pattern[i - 1];
	while (suffix <= pattern_length && c != pattern[suffix - 1]) {
	if (shift_table[suffix] == length) {
	shift_table[suffix] = suffix - i;
	}
	suffix = suffix_table[suffix];
	}
	suffix_table[--i] = --suffix;
	if (suffix == pattern_length) {
	// No suffix to extend, so we check against last_char only.
	while ((i > start) && (pattern[i - 1] != last_char)) {
	if (shift_table[pattern_length] == length) {
	shift_table[pattern_length] = pattern_length - i;
	}
	suffix_table[--i] = pattern_length;
	}
	if (i > start) {
	suffix_table[--i] = --suffix;
	}
	}
	}
	}
	// Build shift table using suffixes.
	if (suffix < pattern_length) {
	for (int i = start; i <= pattern_length; i++) {
	if (shift_table[i] == length) {
	shift_table[i] = suffix - start;
	}
	if (i == suffix) {
	suffix = suffix_table[suffix];
	}
	}
	}
	}

	//---------------------------------------------------------------------
	// Boyer-Moore-Horspool string search.
	//---------------------------------------------------------------------

	template <typename PatternChar, typename SubjectChar>
	int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int start_index) {
	Vector<const PatternChar> pattern = search->pattern_;
	int subject_length = subject.length();
	int pattern_length = pattern.length();
	int* char_occurrences = search->bad_char_table();
	int badness = -pattern_length;

	// How bad we are doing without a good-suffix table.
	PatternChar last_char = pattern[pattern_length - 1];
	int last_char_shift = pattern_length - 1 -
	CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
	// Perform search
	int index = start_index; // No matches found prior to this index.
	while (index <= subject_length - pattern_length) {
	int j = pattern_length - 1;
	int subject_char;
	while (last_char != (subject_char = subject[index + j])) {
	int bc_occ = CharOccurrence(char_occurrences, subject_char);
	int shift = j - bc_occ;
	index += shift;
	badness += 1 - shift; // at most zero, so badness cannot increase.
	if (index > subject_length - pattern_length) {
	return -1;
	}
	}
	j--;
	while (j >= 0 && pattern[j] == (subject[index + j])) j--;
	if (j < 0) {
	return index;
	} else {
	index += last_char_shift;
	// Badness increases by the number of characters we have
	// checked, and decreases by the number of characters we
	// can skip by shifting. It's a measure of how we are doing
	// compared to reading each character exactly once.
	badness += (pattern_length - j) - last_char_shift;
	if (badness > 0) {
	search->PopulateBoyerMooreTable();
	search->strategy_ = &BoyerMooreSearch;
	return BoyerMooreSearch(search, subject, index);
	}
	}
	}
	return -1;
	}


	template <typename PatternChar, typename SubjectChar>
	void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
	int pattern_length = pattern_.length();

	int* bad_char_occurrence = bad_char_table();

	// Only preprocess at most kBMMaxShift last characters of pattern.
	int start = start_;
	// Run forwards to populate bad_char_table, so that last instance
	// of character equivalence class is the one registered.
	// Notice: Doesn't include the last character.
	int table_size = AlphabetSize();
	if (start == 0) { // All patterns less than kBMMaxShift in length.
	memset(bad_char_occurrence,
	-1,
	table_size * sizeof(*bad_char_occurrence));
	} else {
	for (int i = 0; i < table_size; i++) {
	bad_char_occurrence[i] = start - 1;
	}
	}
	for (int i = start; i < pattern_length - 1; i++) {
	PatternChar c = pattern_[i];
	int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
	bad_char_occurrence[bucket] = i;
	}
	}

	//---------------------------------------------------------------------
	// Linear string search with bailout to BMH.
	//---------------------------------------------------------------------

	// Simple linear search for short patterns, which bails out if the string
	// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
	template <typename PatternChar, typename SubjectChar>
	int StringSearch<PatternChar, SubjectChar>::InitialSearch(
	StringSearch<PatternChar, SubjectChar>* search,
	Vector<const SubjectChar> subject,
	int index) {
	Vector<const PatternChar> pattern = search->pattern_;
	int pattern_length = pattern.length();
	// Badness is a count of how much work we have done. When we have
	// done enough work we decide it's probably worth switching to a better
	// algorithm.
	int badness = -10 - (pattern_length << 2);

	// We know our pattern is at least 2 characters, we cache the first so
	// the common case of the first character not matching is faster.
	PatternChar pattern_first_char = pattern[0];
	for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
	badness++;
	if (badness <= 0) {
	if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
	const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
	memchr(subject.start() + i,
	pattern_first_char,
	n - i + 1));
	if (pos == NULL) {
	return -1;
	}
	i = static_cast<int>(pos - subject.start());
	} else {
	if (subject[i] != pattern_first_char) continue;
	}
	int j = 1;
	do {
	if (pattern[j] != subject[i + j]) {
	break;
	}
	j++;
	} while (j < pattern_length);
	if (j == pattern_length) {
	return i;
	}
	badness += j;
	} else {
	search->PopulateBoyerMooreHorspoolTable();
	search->strategy_ = &BoyerMooreHorspoolSearch;
	return BoyerMooreHorspoolSearch(search, subject, i);
	}
	}
	return -1;
	}


	// Perform a a single stand-alone search.
	// If searching multiple times for the same pattern, a search
	// object should be constructed once and the Search function then called
	// for each search.
	template <typename SubjectChar, typename PatternChar>
	static int SearchString(Vector<const SubjectChar> subject,
	Vector<const PatternChar> pattern,
	int start_index) {
	StringSearch<PatternChar, SubjectChar> search(pattern);
	return search.Search(subject, start_index);
	}

	}} // namespace v8::internal

	#endif // V8_STRING_SEARCH_H_