| /* |
| * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) |
| * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. |
| * Copyright (C) 2009 Torch Mobile, Inc. |
| * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| * |
| */ |
| |
| #include "config.h" |
| #include "RegExp.h" |
| |
| #include "Lexer.h" |
| #include "yarr/Yarr.h" |
| #include "yarr/YarrJIT.h" |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <wtf/Assertions.h> |
| #include <wtf/OwnArrayPtr.h> |
| |
| namespace JSC { |
| |
| RegExpFlags regExpFlags(const UString& string) |
| { |
| RegExpFlags flags = NoFlags; |
| |
| for (unsigned i = 0; i < string.length(); ++i) { |
| switch (string.characters()[i]) { |
| case 'g': |
| if (flags & FlagGlobal) |
| return InvalidFlags; |
| flags = static_cast<RegExpFlags>(flags | FlagGlobal); |
| break; |
| |
| case 'i': |
| if (flags & FlagIgnoreCase) |
| return InvalidFlags; |
| flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); |
| break; |
| |
| case 'm': |
| if (flags & FlagMultiline) |
| return InvalidFlags; |
| flags = static_cast<RegExpFlags>(flags | FlagMultiline); |
| break; |
| |
| default: |
| return InvalidFlags; |
| } |
| } |
| |
| return flags; |
| } |
| |
| struct RegExpRepresentation { |
| #if ENABLE(YARR_JIT) |
| Yarr::YarrCodeBlock m_regExpJITCode; |
| #endif |
| OwnPtr<Yarr::BytecodePattern> m_regExpBytecode; |
| }; |
| |
| inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) |
| : m_patternString(patternString) |
| , m_flags(flags) |
| , m_constructionError(0) |
| , m_numSubpatterns(0) |
| #if ENABLE(REGEXP_TRACING) |
| , m_rtMatchCallCount(0) |
| , m_rtMatchFoundCount(0) |
| #endif |
| , m_representation(adoptPtr(new RegExpRepresentation)) |
| { |
| m_state = compile(globalData); |
| } |
| |
| RegExp::~RegExp() |
| { |
| } |
| |
| PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) |
| { |
| RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags)); |
| #if ENABLE(REGEXP_TRACING) |
| globalData->addRegExpToTrace(res); |
| #endif |
| return res.release(); |
| } |
| |
| RegExp::RegExpState RegExp::compile(JSGlobalData* globalData) |
| { |
| Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); |
| if (m_constructionError) |
| return ParseError; |
| |
| m_numSubpatterns = pattern.m_numSubpatterns; |
| |
| RegExpState res = ByteCode; |
| |
| #if ENABLE(YARR_JIT) |
| if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { |
| Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode); |
| #if ENABLE(YARR_JIT_DEBUG) |
| if (!m_representation->m_regExpJITCode.isFallBack()) |
| res = JITCode; |
| else |
| res = ByteCode; |
| #else |
| if (!m_representation->m_regExpJITCode.isFallBack()) |
| return JITCode; |
| #endif |
| } |
| #endif |
| |
| m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); |
| |
| return res; |
| } |
| |
| int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
| { |
| if (startOffset < 0) |
| startOffset = 0; |
| |
| #if ENABLE(REGEXP_TRACING) |
| m_rtMatchCallCount++; |
| #endif |
| |
| if (static_cast<unsigned>(startOffset) > s.length() || s.isNull()) |
| return -1; |
| |
| if (m_state != ParseError) { |
| int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
| int* offsetVector; |
| Vector<int, 32> nonReturnedOvector; |
| if (ovector) { |
| ovector->resize(offsetVectorSize); |
| offsetVector = ovector->data(); |
| } else { |
| nonReturnedOvector.resize(offsetVectorSize); |
| offsetVector = nonReturnedOvector.data(); |
| } |
| |
| ASSERT(offsetVector); |
| // Initialize offsetVector with the return value (index 0) and the |
| // first subpattern start indicies (even index values) set to -1. |
| // No need to init the subpattern end indicies. |
| for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) |
| offsetVector[j] = -1; |
| |
| int result; |
| #if ENABLE(YARR_JIT) |
| if (m_state == JITCode) { |
| result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector); |
| #if ENABLE(YARR_JIT_DEBUG) |
| matchCompareWithInterpreter(s, startOffset, offsetVector, result); |
| #endif |
| } else |
| #endif |
| result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector); |
| ASSERT(result >= -1); |
| |
| #if ENABLE(REGEXP_TRACING) |
| if (result != -1) |
| m_rtMatchFoundCount++; |
| #endif |
| |
| return result; |
| } |
| |
| return -1; |
| } |
| |
| |
| #if ENABLE(YARR_JIT_DEBUG) |
| void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) |
| { |
| int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
| Vector<int, 32> interpreterOvector; |
| interpreterOvector.resize(offsetVectorSize); |
| int* interpreterOffsetVector = interpreterOvector.data(); |
| int interpreterResult = 0; |
| int differences = 0; |
| |
| // Initialize interpreterOffsetVector with the return value (index 0) and the |
| // first subpattern start indicies (even index values) set to -1. |
| // No need to init the subpattern end indicies. |
| for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) |
| interpreterOffsetVector[j] = -1; |
| |
| interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector); |
| |
| if (jitResult != interpreterResult) |
| differences++; |
| |
| for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) |
| if ((offsetVector[j] != interpreterOffsetVector[j]) |
| || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) |
| differences++; |
| |
| if (differences) { |
| fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); |
| unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); |
| |
| fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); |
| |
| if (jitResult != interpreterResult) { |
| fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); |
| differences--; |
| } else { |
| fprintf(stderr, " Correct result = %d\n", jitResult); |
| } |
| |
| if (differences) { |
| for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { |
| if (offsetVector[j] != interpreterOffsetVector[j]) |
| fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); |
| if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) |
| fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); |
| } |
| } |
| } |
| } |
| #endif |
| |
| #if ENABLE(REGEXP_TRACING) |
| void RegExp::printTraceData() |
| { |
| char formattedPattern[41]; |
| char rawPattern[41]; |
| |
| strncpy(rawPattern, pattern().utf8().data(), 40); |
| rawPattern[40]= '\0'; |
| |
| int pattLen = strlen(rawPattern); |
| |
| snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); |
| |
| #if ENABLE(YARR_JIT) |
| Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode; |
| |
| const size_t jitAddrSize = 20; |
| char jitAddr[jitAddrSize]; |
| if (m_state == JITCode) |
| snprintf(jitAddr, jitAddrSize, "fallback"); |
| else |
| snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr())); |
| #else |
| const char* jitAddr = "JIT Off"; |
| #endif |
| |
| printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); |
| } |
| #endif |
| |
| } // namespace JSC |