[SCM] WebKit Debian packaging branch, debian/experimental, updated. debian/1.3.8-1-1049-g2e11a8e
barraclough at apple.com
barraclough at apple.com
Fri Jan 21 15:17:30 UTC 2011
The following commit has been merged in the debian/experimental branch:
commit 7dd3056d4d93d6c0ef0caf5e1a82b6d12351c32f
Author: barraclough at apple.com <barraclough at apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Mon Jan 10 21:08:28 2011 +0000
Rename the existing YARR files and classes
https://bugs.webkit.org/show_bug.cgi?id=51872
Patch by Peter Varga <pvarga at inf.u-szeged.hu> on 2011-01-10
Reviewed by Gavin Barraclough.
Replace the "Regex" prefix with "Yarr" in the name of YARR files and classes.
* Android.mk:
* CMakeLists.txt:
* GNUmakefile.am:
* JavaScriptCore.gypi:
* JavaScriptCore.pro:
* JavaScriptCore.xcodeproj/project.pbxproj:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
(JSC::RegExp::match):
(JSC::RegExp::printTraceData):
* yarr/YarrInterpreter.cpp: Renamed from Source/JavaScriptCore/yarr/RegexInterpreter.cpp.
(JSC::Yarr::Interpreter::appendParenthesesDisjunctionContext):
(JSC::Yarr::Interpreter::popParenthesesDisjunctionContext):
(JSC::Yarr::Interpreter::DisjunctionContext::DisjunctionContext):
(JSC::Yarr::Interpreter::DisjunctionContext::operator new):
(JSC::Yarr::Interpreter::allocDisjunctionContext):
(JSC::Yarr::Interpreter::freeDisjunctionContext):
(JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::ParenthesesDisjunctionContext):
(JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::operator new):
(JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::restoreOutput):
(JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::getDisjunctionContext):
(JSC::Yarr::Interpreter::allocParenthesesDisjunctionContext):
(JSC::Yarr::Interpreter::freeParenthesesDisjunctionContext):
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::next):
(JSC::Yarr::Interpreter::InputStream::rewind):
(JSC::Yarr::Interpreter::InputStream::read):
(JSC::Yarr::Interpreter::InputStream::readPair):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::InputStream::getPos):
(JSC::Yarr::Interpreter::InputStream::setPos):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::InputStream::checkInput):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::checkCharacterClass):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::matchAssertionBOL):
(JSC::Yarr::Interpreter::matchAssertionEOL):
(JSC::Yarr::Interpreter::matchAssertionWordBoundary):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::backtrackPatternCasedCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchBackReference):
(JSC::Yarr::Interpreter::backtrackBackReference):
(JSC::Yarr::Interpreter::recordParenthesesMatch):
(JSC::Yarr::Interpreter::resetMatches):
(JSC::Yarr::Interpreter::parenthesesDoBacktrack):
(JSC::Yarr::Interpreter::matchParenthesesOnceBegin):
(JSC::Yarr::Interpreter::matchParenthesesOnceEnd):
(JSC::Yarr::Interpreter::backtrackParenthesesOnceBegin):
(JSC::Yarr::Interpreter::backtrackParenthesesOnceEnd):
(JSC::Yarr::Interpreter::matchParenthesesTerminalBegin):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::backtrackParenthesesTerminalBegin):
(JSC::Yarr::Interpreter::backtrackParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchParentheticalAssertionBegin):
(JSC::Yarr::Interpreter::matchParentheticalAssertionEnd):
(JSC::Yarr::Interpreter::backtrackParentheticalAssertionBegin):
(JSC::Yarr::Interpreter::backtrackParentheticalAssertionEnd):
(JSC::Yarr::Interpreter::matchParentheses):
(JSC::Yarr::Interpreter::backtrackParentheses):
(JSC::Yarr::Interpreter::lookupForBeginChars):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::matchNonZeroDisjunction):
(JSC::Yarr::Interpreter::interpret):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::ParenthesesStackEntry::ParenthesesStackEntry):
(JSC::Yarr::ByteCompiler::ByteCompiler):
(JSC::Yarr::ByteCompiler::compile):
(JSC::Yarr::ByteCompiler::checkInput):
(JSC::Yarr::ByteCompiler::assertionBOL):
(JSC::Yarr::ByteCompiler::assertionEOL):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
(JSC::Yarr::ByteCompiler::atomCharacterClass):
(JSC::Yarr::ByteCompiler::atomBackReference):
(JSC::Yarr::ByteCompiler::atomParenthesesOnceBegin):
(JSC::Yarr::ByteCompiler::atomParenthesesTerminalBegin):
(JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin):
(JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin):
(JSC::Yarr::ByteCompiler::atomParentheticalAssertionEnd):
(JSC::Yarr::ByteCompiler::popParenthesesStack):
(JSC::Yarr::ByteCompiler::dumpDisjunction):
(JSC::Yarr::ByteCompiler::closeAlternative):
(JSC::Yarr::ByteCompiler::closeBodyAlternative):
(JSC::Yarr::ByteCompiler::atomParenthesesSubpatternEnd):
(JSC::Yarr::ByteCompiler::atomParenthesesOnceEnd):
(JSC::Yarr::ByteCompiler::atomParenthesesTerminalEnd):
(JSC::Yarr::ByteCompiler::regexBegin):
(JSC::Yarr::ByteCompiler::regexEnd):
(JSC::Yarr::ByteCompiler::alternativeBodyDisjunction):
(JSC::Yarr::ByteCompiler::alternativeDisjunction):
(JSC::Yarr::ByteCompiler::emitDisjunction):
(JSC::Yarr::byteCompile):
(JSC::Yarr::interpret):
* yarr/YarrInterpreter.h: Renamed from Source/JavaScriptCore/yarr/RegexInterpreter.h.
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::ByteTerm::BOL):
(JSC::Yarr::ByteTerm::CheckInput):
(JSC::Yarr::ByteTerm::EOL):
(JSC::Yarr::ByteTerm::WordBoundary):
(JSC::Yarr::ByteTerm::BackReference):
(JSC::Yarr::ByteTerm::BodyAlternativeBegin):
(JSC::Yarr::ByteTerm::BodyAlternativeDisjunction):
(JSC::Yarr::ByteTerm::BodyAlternativeEnd):
(JSC::Yarr::ByteTerm::AlternativeBegin):
(JSC::Yarr::ByteTerm::AlternativeDisjunction):
(JSC::Yarr::ByteTerm::AlternativeEnd):
(JSC::Yarr::ByteTerm::SubpatternBegin):
(JSC::Yarr::ByteTerm::SubpatternEnd):
(JSC::Yarr::ByteTerm::invert):
(JSC::Yarr::ByteTerm::capture):
(JSC::Yarr::ByteDisjunction::ByteDisjunction):
(JSC::Yarr::BytecodePattern::BytecodePattern):
(JSC::Yarr::BytecodePattern::~BytecodePattern):
* yarr/YarrJIT.cpp: Renamed from Source/JavaScriptCore/yarr/RegexJIT.cpp.
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::jumpIfNoAvailableInput):
(JSC::Yarr::YarrGenerator::jumpIfAvailableInput):
(JSC::Yarr::YarrGenerator::checkInput):
(JSC::Yarr::YarrGenerator::atEndOfInput):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharEquals):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::storeToFrame):
(JSC::Yarr::YarrGenerator::storeToFrameWithPatch):
(JSC::Yarr::YarrGenerator::loadFromFrame):
(JSC::Yarr::YarrGenerator::loadFromFrameAndJump):
(JSC::Yarr::YarrGenerator::IndirectJumpEntry::IndirectJumpEntry):
(JSC::Yarr::YarrGenerator::IndirectJumpEntry::addJump):
(JSC::Yarr::YarrGenerator::AlternativeBacktrackRecord::AlternativeBacktrackRecord):
(JSC::Yarr::YarrGenerator::GenerationState::GenerationState):
(JSC::Yarr::YarrGenerator::GenerationState::addIndirectJumpEntry):
(JSC::Yarr::YarrGenerator::GenerationState::emitIndirectJumpTable):
(JSC::Yarr::YarrGenerator::GenerationState::incrementParenNestingLevel):
(JSC::Yarr::YarrGenerator::GenerationState::decrementParenNestingLevel):
(JSC::Yarr::YarrGenerator::GenerationState::addParenthesesTail):
(JSC::Yarr::YarrGenerator::GenerationState::emitParenthesesTail):
(JSC::Yarr::YarrGenerator::GenerationState::addJumpToNextInteration):
(JSC::Yarr::YarrGenerator::GenerationState::addJumpsToNextInteration):
(JSC::Yarr::YarrGenerator::GenerationState::addDataLabelToNextIteration):
(JSC::Yarr::YarrGenerator::GenerationState::linkToNextIteration):
(JSC::Yarr::YarrGenerator::BacktrackDestination::BacktrackDestination):
(JSC::Yarr::YarrGenerator::BacktrackDestination::clear):
(JSC::Yarr::YarrGenerator::BacktrackDestination::clearDataLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::hasDestination):
(JSC::Yarr::YarrGenerator::BacktrackDestination::isStackOffset):
(JSC::Yarr::YarrGenerator::BacktrackDestination::isLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::isJumpList):
(JSC::Yarr::YarrGenerator::BacktrackDestination::hasDataLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::copyTarget):
(JSC::Yarr::YarrGenerator::BacktrackDestination::copyTo):
(JSC::Yarr::YarrGenerator::BacktrackDestination::addBacktrackJump):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setStackOffset):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setNextBacktrackLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::copyBacktrackToLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackToLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackJumpList):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackSourceLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setDataLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::setSubDataLabelPtr):
(JSC::Yarr::YarrGenerator::BacktrackDestination::linkToNextBacktrack):
(JSC::Yarr::YarrGenerator::BacktrackDestination::getStackOffset):
(JSC::Yarr::YarrGenerator::BacktrackDestination::getLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::getBacktrackJumps):
(JSC::Yarr::YarrGenerator::BacktrackDestination::getDataLabel):
(JSC::Yarr::YarrGenerator::BacktrackDestination::jumpToBacktrack):
(JSC::Yarr::YarrGenerator::BacktrackDestination::linkDataLabelToHereIfExists):
(JSC::Yarr::YarrGenerator::BacktrackDestination::plantJumpToBacktrackIfExists):
(JSC::Yarr::YarrGenerator::BacktrackDestination::linkAlternativeBacktracks):
(JSC::Yarr::YarrGenerator::BacktrackDestination::linkAlternativeBacktracksTo):
(JSC::Yarr::YarrGenerator::TermGenerationState::TermGenerationState):
(JSC::Yarr::YarrGenerator::TermGenerationState::resetAlternative):
(JSC::Yarr::YarrGenerator::TermGenerationState::alternativeValid):
(JSC::Yarr::YarrGenerator::TermGenerationState::nextAlternative):
(JSC::Yarr::YarrGenerator::TermGenerationState::alternative):
(JSC::Yarr::YarrGenerator::TermGenerationState::isLastAlternative):
(JSC::Yarr::YarrGenerator::TermGenerationState::resetTerm):
(JSC::Yarr::YarrGenerator::TermGenerationState::termValid):
(JSC::Yarr::YarrGenerator::TermGenerationState::nextTerm):
(JSC::Yarr::YarrGenerator::TermGenerationState::term):
(JSC::Yarr::YarrGenerator::TermGenerationState::isLastTerm):
(JSC::Yarr::YarrGenerator::TermGenerationState::getSubParenNum):
(JSC::Yarr::YarrGenerator::TermGenerationState::isMainDisjunction):
(JSC::Yarr::YarrGenerator::TermGenerationState::setParenthesesTail):
(JSC::Yarr::YarrGenerator::TermGenerationState::getParenthesesTail):
(JSC::Yarr::YarrGenerator::TermGenerationState::lookaheadTerm):
(JSC::Yarr::YarrGenerator::TermGenerationState::isSinglePatternCharacterLookaheadTerm):
(JSC::Yarr::YarrGenerator::TermGenerationState::inputOffset):
(JSC::Yarr::YarrGenerator::TermGenerationState::clearBacktrack):
(JSC::Yarr::YarrGenerator::TermGenerationState::jumpToBacktrack):
(JSC::Yarr::YarrGenerator::TermGenerationState::plantJumpToBacktrackIfExists):
(JSC::Yarr::YarrGenerator::TermGenerationState::linkDataLabelToBacktrackIfExists):
(JSC::Yarr::YarrGenerator::TermGenerationState::addBacktrackJump):
(JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackDataLabel):
(JSC::Yarr::YarrGenerator::TermGenerationState::setBackTrackStackOffset):
(JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackLabel):
(JSC::Yarr::YarrGenerator::TermGenerationState::linkAlternativeBacktracks):
(JSC::Yarr::YarrGenerator::TermGenerationState::linkAlternativeBacktracksTo):
(JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackLink):
(JSC::Yarr::YarrGenerator::TermGenerationState::chainBacktracks):
(JSC::Yarr::YarrGenerator::TermGenerationState::chainBacktrackJumps):
(JSC::Yarr::YarrGenerator::TermGenerationState::getBacktrackDestination):
(JSC::Yarr::YarrGenerator::TermGenerationState::propagateBacktrackingFrom):
(JSC::Yarr::YarrGenerator::ParenthesesTail::ParenthesesTail):
(JSC::Yarr::YarrGenerator::ParenthesesTail::processBacktracks):
(JSC::Yarr::YarrGenerator::ParenthesesTail::setNextIteration):
(JSC::Yarr::YarrGenerator::ParenthesesTail::addAfterParenJump):
(JSC::Yarr::YarrGenerator::ParenthesesTail::generateCode):
(JSC::Yarr::YarrGenerator::generateAssertionBOL):
(JSC::Yarr::YarrGenerator::generateAssertionEOL):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterSingle):
(JSC::Yarr::YarrGenerator::generatePatternCharacterPair):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassSingle):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generateParenthesesDisjunction):
(JSC::Yarr::YarrGenerator::generateParenthesesSingle):
(JSC::Yarr::YarrGenerator::generateParenthesesGreedyNoBacktrack):
(JSC::Yarr::YarrGenerator::generateParentheticalAssertion):
(JSC::Yarr::YarrGenerator::generateTerm):
(JSC::Yarr::YarrGenerator::generateDisjunction):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::jitCompile):
* yarr/YarrJIT.h: Renamed from Source/JavaScriptCore/yarr/RegexJIT.h.
(JSC::Yarr::YarrCodeBlock::YarrCodeBlock):
(JSC::Yarr::YarrCodeBlock::~YarrCodeBlock):
(JSC::Yarr::YarrCodeBlock::setFallBack):
(JSC::Yarr::YarrCodeBlock::isFallBack):
(JSC::Yarr::YarrCodeBlock::set):
(JSC::Yarr::YarrCodeBlock::execute):
(JSC::Yarr::YarrCodeBlock::getAddr):
(JSC::Yarr::execute):
* yarr/YarrParser.h: Renamed from Source/JavaScriptCore/yarr/RegexParser.h.
(JSC::Yarr::Parser::CharacterClassParserDelegate::CharacterClassParserDelegate):
(JSC::Yarr::Parser::CharacterClassParserDelegate::begin):
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
(JSC::Yarr::Parser::CharacterClassParserDelegate::end):
(JSC::Yarr::Parser::CharacterClassParserDelegate::assertionWordBoundary):
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomBackReference):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parseAtomEscape):
(JSC::Yarr::Parser::parseCharacterClassEscape):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseParenthesesBegin):
(JSC::Yarr::Parser::parseParenthesesEnd):
(JSC::Yarr::Parser::parseQuantifier):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::saveState):
(JSC::Yarr::Parser::restoreState):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::Parser::peekIsDigit):
(JSC::Yarr::Parser::peekDigit):
(JSC::Yarr::Parser::consume):
(JSC::Yarr::Parser::consumeDigit):
(JSC::Yarr::Parser::consumeNumber):
(JSC::Yarr::Parser::consumeOctal):
(JSC::Yarr::Parser::tryConsume):
(JSC::Yarr::Parser::tryConsumeHex):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp: Renamed from Source/JavaScriptCore/yarr/RegexPattern.cpp.
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::isUnicodeUpper):
(JSC::Yarr::CharacterClassConstructor::isUnicodeLower):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::BeginCharHelper::BeginCharHelper):
(JSC::Yarr::BeginCharHelper::addBeginChar):
(JSC::Yarr::BeginCharHelper::merge):
(JSC::Yarr::BeginCharHelper::addCharacter):
(JSC::Yarr::BeginCharHelper::linkHotTerms):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::~YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::reset):
(JSC::Yarr::YarrPatternConstructor::assertionBOL):
(JSC::Yarr::YarrPatternConstructor::assertionEOL):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassEnd):
(JSC::Yarr::YarrPatternConstructor::atomParenthesesSubpatternBegin):
(JSC::Yarr::YarrPatternConstructor::atomParentheticalAssertionBegin):
(JSC::Yarr::YarrPatternConstructor::atomParenthesesEnd):
(JSC::Yarr::YarrPatternConstructor::atomBackReference):
(JSC::Yarr::YarrPatternConstructor::copyDisjunction):
(JSC::Yarr::YarrPatternConstructor::copyTerm):
(JSC::Yarr::YarrPatternConstructor::quantifyAtom):
(JSC::Yarr::YarrPatternConstructor::disjunction):
(JSC::Yarr::YarrPatternConstructor::regexBegin):
(JSC::Yarr::YarrPatternConstructor::regexEnd):
(JSC::Yarr::YarrPatternConstructor::regexError):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPatternConstructor::setupDisjunctionOffsets):
(JSC::Yarr::YarrPatternConstructor::setupOffsets):
(JSC::Yarr::YarrPatternConstructor::checkForTerminalParentheses):
(JSC::Yarr::YarrPatternConstructor::optimizeBOL):
(JSC::Yarr::YarrPatternConstructor::addBeginTerm):
(JSC::Yarr::YarrPatternConstructor::setupDisjunctionBeginTerms):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeBeginTerms):
(JSC::Yarr::YarrPatternConstructor::setupBeginChars):
(JSC::Yarr::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h: Renamed from Source/JavaScriptCore/yarr/RegexPattern.h.
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClassTable::create):
(JSC::Yarr::CharacterClassTable::CharacterClassTable):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::PatternTerm::ForwardReference):
(JSC::Yarr::PatternTerm::BOL):
(JSC::Yarr::PatternTerm::EOL):
(JSC::Yarr::PatternTerm::WordBoundary):
(JSC::Yarr::PatternTerm::invert):
(JSC::Yarr::PatternTerm::capture):
(JSC::Yarr::PatternTerm::quantify):
(JSC::Yarr::PatternAlternative::PatternAlternative):
(JSC::Yarr::PatternAlternative::lastTerm):
(JSC::Yarr::PatternAlternative::removeLastTerm):
(JSC::Yarr::PatternAlternative::setOnceThrough):
(JSC::Yarr::PatternAlternative::onceThrough):
(JSC::Yarr::PatternDisjunction::PatternDisjunction):
(JSC::Yarr::PatternDisjunction::~PatternDisjunction):
(JSC::Yarr::PatternDisjunction::addNewAlternative):
(JSC::Yarr::TermChain::TermChain):
(JSC::Yarr::BeginChar::BeginChar):
(JSC::Yarr::YarrPattern::~YarrPattern):
(JSC::Yarr::YarrPattern::reset):
(JSC::Yarr::YarrPattern::containsIllegalBackReference):
(JSC::Yarr::YarrPattern::newlineCharacterClass):
(JSC::Yarr::YarrPattern::digitsCharacterClass):
(JSC::Yarr::YarrPattern::spacesCharacterClass):
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::nondigitsCharacterClass):
(JSC::Yarr::YarrPattern::nonspacesCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@75421 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/Android.mk b/Source/JavaScriptCore/Android.mk
index be8a9b4..865a751 100644
--- a/Source/JavaScriptCore/Android.mk
+++ b/Source/JavaScriptCore/Android.mk
@@ -190,9 +190,9 @@ LOCAL_SRC_FILES := \
\
wtf/unicode/icu/CollatorICU.cpp \
\
- yarr/RegexPattern.cpp \
- yarr/RegexInterpreter.cpp \
- yarr/RegexJIT.cpp \
+ yarr/YarrPattern.cpp \
+ yarr/YarrInterpreter.cpp \
+ yarr/YarrJIT.cpp \
yarr/YarrSyntaxChecker.cpp
# generated headers
diff --git a/Source/JavaScriptCore/CMakeLists.txt b/Source/JavaScriptCore/CMakeLists.txt
index 31dc4e1..ef53adb 100644
--- a/Source/JavaScriptCore/CMakeLists.txt
+++ b/Source/JavaScriptCore/CMakeLists.txt
@@ -167,9 +167,9 @@ SET(JavaScriptCore_SOURCES
runtime/TimeoutChecker.cpp
runtime/UString.cpp
- yarr/RegexPattern.cpp
- yarr/RegexInterpreter.cpp
- yarr/RegexJIT.cpp
+ yarr/YarrPattern.cpp
+ yarr/YarrInterpreter.cpp
+ yarr/YarrJIT.cpp
yarr/YarrSyntaxChecker.cpp
wtf/DateMath.cpp
@@ -221,7 +221,7 @@ ADD_CUSTOM_COMMAND(
MAIN_DEPENDENCY ${JAVASCRIPTCORE_DIR}/create_regex_tables
COMMAND ${PYTHON_EXECUTABLE} ${JAVASCRIPTCORE_DIR}/create_regex_tables > ${DERIVED_SOURCES_DIR}/RegExpJitTables.h
VERBATIM)
-ADD_SOURCE_DEPENDENCIES(${JAVASCRIPTCORE_DIR}/yarr/RegexPattern.cpp ${DERIVED_SOURCES_DIR}/RegExpJitTables.h)
+ADD_SOURCE_DEPENDENCIES(${JAVASCRIPTCORE_DIR}/yarr/YarrPattern.cpp ${DERIVED_SOURCES_DIR}/RegExpJitTables.h)
diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog
index e1905af..6986b71 100644
--- a/Source/JavaScriptCore/ChangeLog
+++ b/Source/JavaScriptCore/ChangeLog
@@ -1,3 +1,383 @@
+2011-01-10 Peter Varga <pvarga at inf.u-szeged.hu>
+
+ Reviewed by Gavin Barraclough.
+
+ Rename the existing YARR files and classes
+ https://bugs.webkit.org/show_bug.cgi?id=51872
+
+ Replace the "Regex" prefix with "Yarr" in the name of YARR files and classes.
+
+ * Android.mk:
+ * CMakeLists.txt:
+ * GNUmakefile.am:
+ * JavaScriptCore.gypi:
+ * JavaScriptCore.pro:
+ * JavaScriptCore.xcodeproj/project.pbxproj:
+ * runtime/RegExp.cpp:
+ (JSC::RegExp::compile):
+ (JSC::RegExp::match):
+ (JSC::RegExp::printTraceData):
+ * yarr/YarrInterpreter.cpp: Renamed from Source/JavaScriptCore/yarr/RegexInterpreter.cpp.
+ (JSC::Yarr::Interpreter::appendParenthesesDisjunctionContext):
+ (JSC::Yarr::Interpreter::popParenthesesDisjunctionContext):
+ (JSC::Yarr::Interpreter::DisjunctionContext::DisjunctionContext):
+ (JSC::Yarr::Interpreter::DisjunctionContext::operator new):
+ (JSC::Yarr::Interpreter::allocDisjunctionContext):
+ (JSC::Yarr::Interpreter::freeDisjunctionContext):
+ (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::ParenthesesDisjunctionContext):
+ (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::operator new):
+ (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::restoreOutput):
+ (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::getDisjunctionContext):
+ (JSC::Yarr::Interpreter::allocParenthesesDisjunctionContext):
+ (JSC::Yarr::Interpreter::freeParenthesesDisjunctionContext):
+ (JSC::Yarr::Interpreter::InputStream::InputStream):
+ (JSC::Yarr::Interpreter::InputStream::next):
+ (JSC::Yarr::Interpreter::InputStream::rewind):
+ (JSC::Yarr::Interpreter::InputStream::read):
+ (JSC::Yarr::Interpreter::InputStream::readPair):
+ (JSC::Yarr::Interpreter::InputStream::readChecked):
+ (JSC::Yarr::Interpreter::InputStream::reread):
+ (JSC::Yarr::Interpreter::InputStream::prev):
+ (JSC::Yarr::Interpreter::InputStream::getPos):
+ (JSC::Yarr::Interpreter::InputStream::setPos):
+ (JSC::Yarr::Interpreter::InputStream::atStart):
+ (JSC::Yarr::Interpreter::InputStream::atEnd):
+ (JSC::Yarr::Interpreter::InputStream::checkInput):
+ (JSC::Yarr::Interpreter::InputStream::uncheckInput):
+ (JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
+ (JSC::Yarr::Interpreter::testCharacterClass):
+ (JSC::Yarr::Interpreter::checkCharacter):
+ (JSC::Yarr::Interpreter::checkCasedCharacter):
+ (JSC::Yarr::Interpreter::checkCharacterClass):
+ (JSC::Yarr::Interpreter::tryConsumeBackReference):
+ (JSC::Yarr::Interpreter::matchAssertionBOL):
+ (JSC::Yarr::Interpreter::matchAssertionEOL):
+ (JSC::Yarr::Interpreter::matchAssertionWordBoundary):
+ (JSC::Yarr::Interpreter::backtrackPatternCharacter):
+ (JSC::Yarr::Interpreter::backtrackPatternCasedCharacter):
+ (JSC::Yarr::Interpreter::matchCharacterClass):
+ (JSC::Yarr::Interpreter::backtrackCharacterClass):
+ (JSC::Yarr::Interpreter::matchBackReference):
+ (JSC::Yarr::Interpreter::backtrackBackReference):
+ (JSC::Yarr::Interpreter::recordParenthesesMatch):
+ (JSC::Yarr::Interpreter::resetMatches):
+ (JSC::Yarr::Interpreter::parenthesesDoBacktrack):
+ (JSC::Yarr::Interpreter::matchParenthesesOnceBegin):
+ (JSC::Yarr::Interpreter::matchParenthesesOnceEnd):
+ (JSC::Yarr::Interpreter::backtrackParenthesesOnceBegin):
+ (JSC::Yarr::Interpreter::backtrackParenthesesOnceEnd):
+ (JSC::Yarr::Interpreter::matchParenthesesTerminalBegin):
+ (JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
+ (JSC::Yarr::Interpreter::backtrackParenthesesTerminalBegin):
+ (JSC::Yarr::Interpreter::backtrackParenthesesTerminalEnd):
+ (JSC::Yarr::Interpreter::matchParentheticalAssertionBegin):
+ (JSC::Yarr::Interpreter::matchParentheticalAssertionEnd):
+ (JSC::Yarr::Interpreter::backtrackParentheticalAssertionBegin):
+ (JSC::Yarr::Interpreter::backtrackParentheticalAssertionEnd):
+ (JSC::Yarr::Interpreter::matchParentheses):
+ (JSC::Yarr::Interpreter::backtrackParentheses):
+ (JSC::Yarr::Interpreter::lookupForBeginChars):
+ (JSC::Yarr::Interpreter::matchDisjunction):
+ (JSC::Yarr::Interpreter::matchNonZeroDisjunction):
+ (JSC::Yarr::Interpreter::interpret):
+ (JSC::Yarr::Interpreter::Interpreter):
+ (JSC::Yarr::ByteCompiler::ParenthesesStackEntry::ParenthesesStackEntry):
+ (JSC::Yarr::ByteCompiler::ByteCompiler):
+ (JSC::Yarr::ByteCompiler::compile):
+ (JSC::Yarr::ByteCompiler::checkInput):
+ (JSC::Yarr::ByteCompiler::assertionBOL):
+ (JSC::Yarr::ByteCompiler::assertionEOL):
+ (JSC::Yarr::ByteCompiler::assertionWordBoundary):
+ (JSC::Yarr::ByteCompiler::atomPatternCharacter):
+ (JSC::Yarr::ByteCompiler::atomCharacterClass):
+ (JSC::Yarr::ByteCompiler::atomBackReference):
+ (JSC::Yarr::ByteCompiler::atomParenthesesOnceBegin):
+ (JSC::Yarr::ByteCompiler::atomParenthesesTerminalBegin):
+ (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin):
+ (JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin):
+ (JSC::Yarr::ByteCompiler::atomParentheticalAssertionEnd):
+ (JSC::Yarr::ByteCompiler::popParenthesesStack):
+ (JSC::Yarr::ByteCompiler::dumpDisjunction):
+ (JSC::Yarr::ByteCompiler::closeAlternative):
+ (JSC::Yarr::ByteCompiler::closeBodyAlternative):
+ (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternEnd):
+ (JSC::Yarr::ByteCompiler::atomParenthesesOnceEnd):
+ (JSC::Yarr::ByteCompiler::atomParenthesesTerminalEnd):
+ (JSC::Yarr::ByteCompiler::regexBegin):
+ (JSC::Yarr::ByteCompiler::regexEnd):
+ (JSC::Yarr::ByteCompiler::alternativeBodyDisjunction):
+ (JSC::Yarr::ByteCompiler::alternativeDisjunction):
+ (JSC::Yarr::ByteCompiler::emitDisjunction):
+ (JSC::Yarr::byteCompile):
+ (JSC::Yarr::interpret):
+ * yarr/YarrInterpreter.h: Renamed from Source/JavaScriptCore/yarr/RegexInterpreter.h.
+ (JSC::Yarr::ByteTerm::ByteTerm):
+ (JSC::Yarr::ByteTerm::BOL):
+ (JSC::Yarr::ByteTerm::CheckInput):
+ (JSC::Yarr::ByteTerm::EOL):
+ (JSC::Yarr::ByteTerm::WordBoundary):
+ (JSC::Yarr::ByteTerm::BackReference):
+ (JSC::Yarr::ByteTerm::BodyAlternativeBegin):
+ (JSC::Yarr::ByteTerm::BodyAlternativeDisjunction):
+ (JSC::Yarr::ByteTerm::BodyAlternativeEnd):
+ (JSC::Yarr::ByteTerm::AlternativeBegin):
+ (JSC::Yarr::ByteTerm::AlternativeDisjunction):
+ (JSC::Yarr::ByteTerm::AlternativeEnd):
+ (JSC::Yarr::ByteTerm::SubpatternBegin):
+ (JSC::Yarr::ByteTerm::SubpatternEnd):
+ (JSC::Yarr::ByteTerm::invert):
+ (JSC::Yarr::ByteTerm::capture):
+ (JSC::Yarr::ByteDisjunction::ByteDisjunction):
+ (JSC::Yarr::BytecodePattern::BytecodePattern):
+ (JSC::Yarr::BytecodePattern::~BytecodePattern):
+ * yarr/YarrJIT.cpp: Renamed from Source/JavaScriptCore/yarr/RegexJIT.cpp.
+ (JSC::Yarr::YarrGenerator::optimizeAlternative):
+ (JSC::Yarr::YarrGenerator::matchCharacterClassRange):
+ (JSC::Yarr::YarrGenerator::matchCharacterClass):
+ (JSC::Yarr::YarrGenerator::jumpIfNoAvailableInput):
+ (JSC::Yarr::YarrGenerator::jumpIfAvailableInput):
+ (JSC::Yarr::YarrGenerator::checkInput):
+ (JSC::Yarr::YarrGenerator::atEndOfInput):
+ (JSC::Yarr::YarrGenerator::notAtEndOfInput):
+ (JSC::Yarr::YarrGenerator::jumpIfCharEquals):
+ (JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
+ (JSC::Yarr::YarrGenerator::readCharacter):
+ (JSC::Yarr::YarrGenerator::storeToFrame):
+ (JSC::Yarr::YarrGenerator::storeToFrameWithPatch):
+ (JSC::Yarr::YarrGenerator::loadFromFrame):
+ (JSC::Yarr::YarrGenerator::loadFromFrameAndJump):
+ (JSC::Yarr::YarrGenerator::IndirectJumpEntry::IndirectJumpEntry):
+ (JSC::Yarr::YarrGenerator::IndirectJumpEntry::addJump):
+ (JSC::Yarr::YarrGenerator::AlternativeBacktrackRecord::AlternativeBacktrackRecord):
+ (JSC::Yarr::YarrGenerator::GenerationState::GenerationState):
+ (JSC::Yarr::YarrGenerator::GenerationState::addIndirectJumpEntry):
+ (JSC::Yarr::YarrGenerator::GenerationState::emitIndirectJumpTable):
+ (JSC::Yarr::YarrGenerator::GenerationState::incrementParenNestingLevel):
+ (JSC::Yarr::YarrGenerator::GenerationState::decrementParenNestingLevel):
+ (JSC::Yarr::YarrGenerator::GenerationState::addParenthesesTail):
+ (JSC::Yarr::YarrGenerator::GenerationState::emitParenthesesTail):
+ (JSC::Yarr::YarrGenerator::GenerationState::addJumpToNextInteration):
+ (JSC::Yarr::YarrGenerator::GenerationState::addJumpsToNextInteration):
+ (JSC::Yarr::YarrGenerator::GenerationState::addDataLabelToNextIteration):
+ (JSC::Yarr::YarrGenerator::GenerationState::linkToNextIteration):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::BacktrackDestination):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::clear):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::clearDataLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::hasDestination):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::isStackOffset):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::isLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::isJumpList):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::hasDataLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::copyTarget):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::copyTo):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::addBacktrackJump):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setStackOffset):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setNextBacktrackLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::copyBacktrackToLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackToLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackJumpList):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setBacktrackSourceLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setDataLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::setSubDataLabelPtr):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::linkToNextBacktrack):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::getStackOffset):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::getLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::getBacktrackJumps):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::getDataLabel):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::jumpToBacktrack):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::linkDataLabelToHereIfExists):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::plantJumpToBacktrackIfExists):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::linkAlternativeBacktracks):
+ (JSC::Yarr::YarrGenerator::BacktrackDestination::linkAlternativeBacktracksTo):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::TermGenerationState):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::resetAlternative):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::alternativeValid):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::nextAlternative):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::alternative):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::isLastAlternative):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::resetTerm):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::termValid):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::nextTerm):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::term):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::isLastTerm):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::getSubParenNum):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::isMainDisjunction):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::setParenthesesTail):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::getParenthesesTail):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::lookaheadTerm):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::isSinglePatternCharacterLookaheadTerm):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::inputOffset):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::clearBacktrack):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::jumpToBacktrack):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::plantJumpToBacktrackIfExists):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::linkDataLabelToBacktrackIfExists):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::addBacktrackJump):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackDataLabel):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::setBackTrackStackOffset):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackLabel):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::linkAlternativeBacktracks):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::linkAlternativeBacktracksTo):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::setBacktrackLink):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::chainBacktracks):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::chainBacktrackJumps):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::getBacktrackDestination):
+ (JSC::Yarr::YarrGenerator::TermGenerationState::propagateBacktrackingFrom):
+ (JSC::Yarr::YarrGenerator::ParenthesesTail::ParenthesesTail):
+ (JSC::Yarr::YarrGenerator::ParenthesesTail::processBacktracks):
+ (JSC::Yarr::YarrGenerator::ParenthesesTail::setNextIteration):
+ (JSC::Yarr::YarrGenerator::ParenthesesTail::addAfterParenJump):
+ (JSC::Yarr::YarrGenerator::ParenthesesTail::generateCode):
+ (JSC::Yarr::YarrGenerator::generateAssertionBOL):
+ (JSC::Yarr::YarrGenerator::generateAssertionEOL):
+ (JSC::Yarr::YarrGenerator::matchAssertionWordchar):
+ (JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterSingle):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterPair):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassSingle):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
+ (JSC::Yarr::YarrGenerator::generateParenthesesDisjunction):
+ (JSC::Yarr::YarrGenerator::generateParenthesesSingle):
+ (JSC::Yarr::YarrGenerator::generateParenthesesGreedyNoBacktrack):
+ (JSC::Yarr::YarrGenerator::generateParentheticalAssertion):
+ (JSC::Yarr::YarrGenerator::generateTerm):
+ (JSC::Yarr::YarrGenerator::generateDisjunction):
+ (JSC::Yarr::YarrGenerator::generateEnter):
+ (JSC::Yarr::YarrGenerator::generateReturn):
+ (JSC::Yarr::YarrGenerator::YarrGenerator):
+ (JSC::Yarr::YarrGenerator::generate):
+ (JSC::Yarr::YarrGenerator::compile):
+ (JSC::Yarr::jitCompile):
+ * yarr/YarrJIT.h: Renamed from Source/JavaScriptCore/yarr/RegexJIT.h.
+ (JSC::Yarr::YarrCodeBlock::YarrCodeBlock):
+ (JSC::Yarr::YarrCodeBlock::~YarrCodeBlock):
+ (JSC::Yarr::YarrCodeBlock::setFallBack):
+ (JSC::Yarr::YarrCodeBlock::isFallBack):
+ (JSC::Yarr::YarrCodeBlock::set):
+ (JSC::Yarr::YarrCodeBlock::execute):
+ (JSC::Yarr::YarrCodeBlock::getAddr):
+ (JSC::Yarr::execute):
+ * yarr/YarrParser.h: Renamed from Source/JavaScriptCore/yarr/RegexParser.h.
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::CharacterClassParserDelegate):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::begin):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::end):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::assertionWordBoundary):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBackReference):
+ (JSC::Yarr::Parser::Parser):
+ (JSC::Yarr::Parser::parseEscape):
+ (JSC::Yarr::Parser::parseAtomEscape):
+ (JSC::Yarr::Parser::parseCharacterClassEscape):
+ (JSC::Yarr::Parser::parseCharacterClass):
+ (JSC::Yarr::Parser::parseParenthesesBegin):
+ (JSC::Yarr::Parser::parseParenthesesEnd):
+ (JSC::Yarr::Parser::parseQuantifier):
+ (JSC::Yarr::Parser::parseTokens):
+ (JSC::Yarr::Parser::parse):
+ (JSC::Yarr::Parser::saveState):
+ (JSC::Yarr::Parser::restoreState):
+ (JSC::Yarr::Parser::atEndOfPattern):
+ (JSC::Yarr::Parser::peek):
+ (JSC::Yarr::Parser::peekIsDigit):
+ (JSC::Yarr::Parser::peekDigit):
+ (JSC::Yarr::Parser::consume):
+ (JSC::Yarr::Parser::consumeDigit):
+ (JSC::Yarr::Parser::consumeNumber):
+ (JSC::Yarr::Parser::consumeOctal):
+ (JSC::Yarr::Parser::tryConsume):
+ (JSC::Yarr::Parser::tryConsumeHex):
+ (JSC::Yarr::parse):
+ * yarr/YarrPattern.cpp: Renamed from Source/JavaScriptCore/yarr/RegexPattern.cpp.
+ (JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
+ (JSC::Yarr::CharacterClassConstructor::reset):
+ (JSC::Yarr::CharacterClassConstructor::append):
+ (JSC::Yarr::CharacterClassConstructor::putChar):
+ (JSC::Yarr::CharacterClassConstructor::isUnicodeUpper):
+ (JSC::Yarr::CharacterClassConstructor::isUnicodeLower):
+ (JSC::Yarr::CharacterClassConstructor::putRange):
+ (JSC::Yarr::CharacterClassConstructor::charClass):
+ (JSC::Yarr::CharacterClassConstructor::addSorted):
+ (JSC::Yarr::CharacterClassConstructor::addSortedRange):
+ (JSC::Yarr::BeginCharHelper::BeginCharHelper):
+ (JSC::Yarr::BeginCharHelper::addBeginChar):
+ (JSC::Yarr::BeginCharHelper::merge):
+ (JSC::Yarr::BeginCharHelper::addCharacter):
+ (JSC::Yarr::BeginCharHelper::linkHotTerms):
+ (JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
+ (JSC::Yarr::YarrPatternConstructor::~YarrPatternConstructor):
+ (JSC::Yarr::YarrPatternConstructor::reset):
+ (JSC::Yarr::YarrPatternConstructor::assertionBOL):
+ (JSC::Yarr::YarrPatternConstructor::assertionEOL):
+ (JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
+ (JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
+ (JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassEnd):
+ (JSC::Yarr::YarrPatternConstructor::atomParenthesesSubpatternBegin):
+ (JSC::Yarr::YarrPatternConstructor::atomParentheticalAssertionBegin):
+ (JSC::Yarr::YarrPatternConstructor::atomParenthesesEnd):
+ (JSC::Yarr::YarrPatternConstructor::atomBackReference):
+ (JSC::Yarr::YarrPatternConstructor::copyDisjunction):
+ (JSC::Yarr::YarrPatternConstructor::copyTerm):
+ (JSC::Yarr::YarrPatternConstructor::quantifyAtom):
+ (JSC::Yarr::YarrPatternConstructor::disjunction):
+ (JSC::Yarr::YarrPatternConstructor::regexBegin):
+ (JSC::Yarr::YarrPatternConstructor::regexEnd):
+ (JSC::Yarr::YarrPatternConstructor::regexError):
+ (JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
+ (JSC::Yarr::YarrPatternConstructor::setupDisjunctionOffsets):
+ (JSC::Yarr::YarrPatternConstructor::setupOffsets):
+ (JSC::Yarr::YarrPatternConstructor::checkForTerminalParentheses):
+ (JSC::Yarr::YarrPatternConstructor::optimizeBOL):
+ (JSC::Yarr::YarrPatternConstructor::addBeginTerm):
+ (JSC::Yarr::YarrPatternConstructor::setupDisjunctionBeginTerms):
+ (JSC::Yarr::YarrPatternConstructor::setupAlternativeBeginTerms):
+ (JSC::Yarr::YarrPatternConstructor::setupBeginChars):
+ (JSC::Yarr::compile):
+ (JSC::Yarr::YarrPattern::YarrPattern):
+ * yarr/YarrPattern.h: Renamed from Source/JavaScriptCore/yarr/RegexPattern.h.
+ (JSC::Yarr::CharacterRange::CharacterRange):
+ (JSC::Yarr::CharacterClassTable::create):
+ (JSC::Yarr::CharacterClassTable::CharacterClassTable):
+ (JSC::Yarr::CharacterClass::CharacterClass):
+ (JSC::Yarr::PatternTerm::PatternTerm):
+ (JSC::Yarr::PatternTerm::ForwardReference):
+ (JSC::Yarr::PatternTerm::BOL):
+ (JSC::Yarr::PatternTerm::EOL):
+ (JSC::Yarr::PatternTerm::WordBoundary):
+ (JSC::Yarr::PatternTerm::invert):
+ (JSC::Yarr::PatternTerm::capture):
+ (JSC::Yarr::PatternTerm::quantify):
+ (JSC::Yarr::PatternAlternative::PatternAlternative):
+ (JSC::Yarr::PatternAlternative::lastTerm):
+ (JSC::Yarr::PatternAlternative::removeLastTerm):
+ (JSC::Yarr::PatternAlternative::setOnceThrough):
+ (JSC::Yarr::PatternAlternative::onceThrough):
+ (JSC::Yarr::PatternDisjunction::PatternDisjunction):
+ (JSC::Yarr::PatternDisjunction::~PatternDisjunction):
+ (JSC::Yarr::PatternDisjunction::addNewAlternative):
+ (JSC::Yarr::TermChain::TermChain):
+ (JSC::Yarr::BeginChar::BeginChar):
+ (JSC::Yarr::YarrPattern::~YarrPattern):
+ (JSC::Yarr::YarrPattern::reset):
+ (JSC::Yarr::YarrPattern::containsIllegalBackReference):
+ (JSC::Yarr::YarrPattern::newlineCharacterClass):
+ (JSC::Yarr::YarrPattern::digitsCharacterClass):
+ (JSC::Yarr::YarrPattern::spacesCharacterClass):
+ (JSC::Yarr::YarrPattern::wordcharCharacterClass):
+ (JSC::Yarr::YarrPattern::nondigitsCharacterClass):
+ (JSC::Yarr::YarrPattern::nonspacesCharacterClass):
+ (JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
+
2011-01-10 Gavin Barraclough <barraclough at apple.com>
Windows build fix.
diff --git a/Source/JavaScriptCore/GNUmakefile.am b/Source/JavaScriptCore/GNUmakefile.am
index 62ae602..bee8ed4 100644
--- a/Source/JavaScriptCore/GNUmakefile.am
+++ b/Source/JavaScriptCore/GNUmakefile.am
@@ -552,13 +552,13 @@ javascriptcore_sources += \
Source/JavaScriptCore/wtf/VMTags.h \
Source/JavaScriptCore/wtf/WTFThreadData.cpp \
Source/JavaScriptCore/wtf/WTFThreadData.h \
- Source/JavaScriptCore/yarr/RegexInterpreter.cpp \
- Source/JavaScriptCore/yarr/RegexInterpreter.h \
- Source/JavaScriptCore/yarr/RegexJIT.cpp \
- Source/JavaScriptCore/yarr/RegexJIT.h \
- Source/JavaScriptCore/yarr/RegexParser.h \
- Source/JavaScriptCore/yarr/RegexPattern.cpp \
- Source/JavaScriptCore/yarr/RegexPattern.h \
+ Source/JavaScriptCore/yarr/YarrInterpreter.cpp \
+ Source/JavaScriptCore/yarr/YarrInterpreter.h \
+ Source/JavaScriptCore/yarr/YarrJIT.cpp \
+ Source/JavaScriptCore/yarr/YarrJIT.h \
+ Source/JavaScriptCore/yarr/YarrParser.h \
+ Source/JavaScriptCore/yarr/YarrPattern.cpp \
+ Source/JavaScriptCore/yarr/YarrPattern.h \
Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp \
Source/JavaScriptCore/yarr/YarrSyntaxChecker.h
diff --git a/Source/JavaScriptCore/JavaScriptCore.gypi b/Source/JavaScriptCore/JavaScriptCore.gypi
index 3d8ae87..8442884 100644
--- a/Source/JavaScriptCore/JavaScriptCore.gypi
+++ b/Source/JavaScriptCore/JavaScriptCore.gypi
@@ -494,13 +494,13 @@
'wtf/win/MainThreadWin.cpp',
'wtf/win/OwnPtrWin.cpp',
'wtf/wx/MainThreadWx.cpp',
- 'yarr/RegexInterpreter.cpp',
- 'yarr/RegexInterpreter.h',
- 'yarr/RegexJIT.cpp',
- 'yarr/RegexJIT.h',
- 'yarr/RegexParser.h',
- 'yarr/RegexPattern.cpp',
- 'yarr/RegexPattern.h',
+ 'yarr/YarrInterpreter.cpp',
+ 'yarr/YarrInterpreter.h',
+ 'yarr/YarrJIT.cpp',
+ 'yarr/YarrJIT.h',
+ 'yarr/YarrParser.h',
+ 'yarr/YarrPattern.cpp',
+ 'yarr/YarrPattern.h',
'yarr/YarrSyntaxChecker.cpp',
'yarr/YarrSyntaxChecker.h',
]
diff --git a/Source/JavaScriptCore/JavaScriptCore.pro b/Source/JavaScriptCore/JavaScriptCore.pro
index 179ef01..ab475b2 100644
--- a/Source/JavaScriptCore/JavaScriptCore.pro
+++ b/Source/JavaScriptCore/JavaScriptCore.pro
@@ -208,9 +208,9 @@ SOURCES += \
runtime/Structure.cpp \
runtime/TimeoutChecker.cpp \
runtime/UString.cpp \
- yarr/RegexPattern.cpp \
- yarr/RegexInterpreter.cpp \
- yarr/RegexJIT.cpp \
+ yarr/YarrPattern.cpp \
+ yarr/YarrInterpreter.cpp \
+ yarr/YarrJIT.cpp \
yarr/YarrSyntaxChecker.cpp
# Generated files, simply list them for JavaScriptCore
diff --git a/Source/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj b/Source/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj
index 2478ea5..deaba10 100644
--- a/Source/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj
+++ b/Source/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj
@@ -1658,31 +1658,31 @@
Name="yarr"
>
<File
- RelativePath="..\..\yarr\RegexInterpreter.cpp"
+ RelativePath="..\..\yarr\YarrInterpreter.cpp"
>
</File>
<File
- RelativePath="..\..\yarr\RegexInterpreter.h"
+ RelativePath="..\..\yarr\YarrInterpreter.h"
>
</File>
<File
- RelativePath="..\..\yarr\RegexJIT.cpp"
+ RelativePath="..\..\yarr\YarrJIT.cpp"
>
</File>
<File
- RelativePath="..\..\yarr\RegexJIT.h"
+ RelativePath="..\..\yarr\YarrJIT.h"
>
</File>
<File
- RelativePath="..\..\yarr\RegexParser.h"
+ RelativePath="..\..\yarr\YarrParser.h"
>
</File>
<File
- RelativePath="..\..\yarr\RegexPattern.cpp"
+ RelativePath="..\..\yarr\YarrPattern.cpp"
>
</File>
<File
- RelativePath="..\..\yarr\RegexPattern.h"
+ RelativePath="..\..\yarr\YarrPattern.h"
>
</File>
<File
diff --git a/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj b/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
index 7a96e45..f2e6d66 100644
--- a/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
+++ b/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
@@ -223,6 +223,13 @@
86676D5211FED9BC004B6863 /* BumpPointerAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 86676D4D11FED55D004B6863 /* BumpPointerAllocator.h */; settings = {ATTRIBUTES = (Private, ); }; };
86704B4212DB8A8100A9FE7B /* YarrSyntaxChecker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86704B4012DB8A8100A9FE7B /* YarrSyntaxChecker.cpp */; };
86704B4312DB8A8100A9FE7B /* YarrSyntaxChecker.h in Headers */ = {isa = PBXBuildFile; fileRef = 86704B4112DB8A8100A9FE7B /* YarrSyntaxChecker.h */; };
+ 86704B8412DBA33700A9FE7B /* YarrInterpreter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86704B7D12DBA33700A9FE7B /* YarrInterpreter.cpp */; };
+ 86704B8512DBA33700A9FE7B /* YarrInterpreter.h in Headers */ = {isa = PBXBuildFile; fileRef = 86704B7E12DBA33700A9FE7B /* YarrInterpreter.h */; };
+ 86704B8612DBA33700A9FE7B /* YarrJIT.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86704B7F12DBA33700A9FE7B /* YarrJIT.cpp */; };
+ 86704B8712DBA33700A9FE7B /* YarrJIT.h in Headers */ = {isa = PBXBuildFile; fileRef = 86704B8012DBA33700A9FE7B /* YarrJIT.h */; };
+ 86704B8812DBA33700A9FE7B /* YarrParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 86704B8112DBA33700A9FE7B /* YarrParser.h */; };
+ 86704B8912DBA33700A9FE7B /* YarrPattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86704B8212DBA33700A9FE7B /* YarrPattern.cpp */; };
+ 86704B8A12DBA33700A9FE7B /* YarrPattern.h in Headers */ = {isa = PBXBuildFile; fileRef = 86704B8312DBA33700A9FE7B /* YarrPattern.h */; };
868BFA08117CEFD100B908B1 /* AtomicString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 868BFA00117CEFD100B908B1 /* AtomicString.cpp */; };
868BFA09117CEFD100B908B1 /* AtomicString.h in Headers */ = {isa = PBXBuildFile; fileRef = 868BFA01117CEFD100B908B1 /* AtomicString.h */; settings = {ATTRIBUTES = (Private, ); }; };
868BFA0A117CEFD100B908B1 /* AtomicStringImpl.h in Headers */ = {isa = PBXBuildFile; fileRef = 868BFA02117CEFD100B908B1 /* AtomicStringImpl.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -232,7 +239,6 @@
868BFA17117CF19900B908B1 /* WTFString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 868BFA15117CF19900B908B1 /* WTFString.cpp */; };
868BFA18117CF19900B908B1 /* WTFString.h in Headers */ = {isa = PBXBuildFile; fileRef = 868BFA16117CF19900B908B1 /* WTFString.h */; settings = {ATTRIBUTES = (Private, ); }; };
868BFA60117D048200B908B1 /* StaticConstructors.h in Headers */ = {isa = PBXBuildFile; fileRef = 868BFA5F117D048200B908B1 /* StaticConstructors.h */; settings = {ATTRIBUTES = (Private, ); }; };
- 868D81A212C428CE0093DC84 /* RegexPattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 868D81A112C428CE0093DC84 /* RegexPattern.cpp */; };
8690231512092D5C00630AF9 /* PageReservation.h in Headers */ = {isa = PBXBuildFile; fileRef = 8690231412092D5C00630AF9 /* PageReservation.h */; settings = {ATTRIBUTES = (Private, ); }; };
869D04AF1193B54D00803475 /* CachedTranscendentalFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 869D04AE1193B54D00803475 /* CachedTranscendentalFunction.h */; settings = {ATTRIBUTES = (Private, ); }; };
869EBCB70E8C6D4A008722CC /* ResultType.h in Headers */ = {isa = PBXBuildFile; fileRef = 869EBCB60E8C6D4A008722CC /* ResultType.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -266,12 +272,6 @@
86DB64640F95C6FC00D7D921 /* ExecutableAllocatorFixedVMPool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86DB64630F95C6FC00D7D921 /* ExecutableAllocatorFixedVMPool.cpp */; };
86E116B10FE75AC800B512BC /* CodeLocation.h in Headers */ = {isa = PBXBuildFile; fileRef = 86E116B00FE75AC800B512BC /* CodeLocation.h */; };
86E85539111B9968001AF51E /* JSStringBuilder.h in Headers */ = {isa = PBXBuildFile; fileRef = 86E85538111B9968001AF51E /* JSStringBuilder.h */; };
- 86EAC4970F93E8D1008EC948 /* RegexInterpreter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86EAC48F0F93E8D1008EC948 /* RegexInterpreter.cpp */; };
- 86EAC4980F93E8D1008EC948 /* RegexInterpreter.h in Headers */ = {isa = PBXBuildFile; fileRef = 86EAC4900F93E8D1008EC948 /* RegexInterpreter.h */; };
- 86EAC4990F93E8D1008EC948 /* RegexJIT.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86EAC4910F93E8D1008EC948 /* RegexJIT.cpp */; };
- 86EAC49A0F93E8D1008EC948 /* RegexJIT.h in Headers */ = {isa = PBXBuildFile; fileRef = 86EAC4920F93E8D1008EC948 /* RegexJIT.h */; };
- 86EAC49B0F93E8D1008EC948 /* RegexParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 86EAC4930F93E8D1008EC948 /* RegexParser.h */; };
- 86EAC49C0F93E8D1008EC948 /* RegexPattern.h in Headers */ = {isa = PBXBuildFile; fileRef = 86EAC4940F93E8D1008EC948 /* RegexPattern.h */; };
86F38859121130CA007A7CE3 /* AtomicStringHash.h in Headers */ = {isa = PBXBuildFile; fileRef = 86F38858121130CA007A7CE3 /* AtomicStringHash.h */; settings = {ATTRIBUTES = (Private, ); }; };
90213E3D123A40C200D422F3 /* MemoryStatistics.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 90213E3B123A40C200D422F3 /* MemoryStatistics.cpp */; };
90213E3E123A40C200D422F3 /* MemoryStatistics.h in Headers */ = {isa = PBXBuildFile; fileRef = 90213E3C123A40C200D422F3 /* MemoryStatistics.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -845,6 +845,13 @@
86676D4D11FED55D004B6863 /* BumpPointerAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BumpPointerAllocator.h; sourceTree = "<group>"; };
86704B4012DB8A8100A9FE7B /* YarrSyntaxChecker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrSyntaxChecker.cpp; path = yarr/YarrSyntaxChecker.cpp; sourceTree = "<group>"; };
86704B4112DB8A8100A9FE7B /* YarrSyntaxChecker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrSyntaxChecker.h; path = yarr/YarrSyntaxChecker.h; sourceTree = "<group>"; };
+ 86704B7D12DBA33700A9FE7B /* YarrInterpreter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrInterpreter.cpp; path = yarr/YarrInterpreter.cpp; sourceTree = "<group>"; };
+ 86704B7E12DBA33700A9FE7B /* YarrInterpreter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrInterpreter.h; path = yarr/YarrInterpreter.h; sourceTree = "<group>"; };
+ 86704B7F12DBA33700A9FE7B /* YarrJIT.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrJIT.cpp; path = yarr/YarrJIT.cpp; sourceTree = "<group>"; };
+ 86704B8012DBA33700A9FE7B /* YarrJIT.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrJIT.h; path = yarr/YarrJIT.h; sourceTree = "<group>"; };
+ 86704B8112DBA33700A9FE7B /* YarrParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrParser.h; path = yarr/YarrParser.h; sourceTree = "<group>"; };
+ 86704B8212DBA33700A9FE7B /* YarrPattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrPattern.cpp; path = yarr/YarrPattern.cpp; sourceTree = "<group>"; };
+ 86704B8312DBA33700A9FE7B /* YarrPattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrPattern.h; path = yarr/YarrPattern.h; sourceTree = "<group>"; };
867FC35F11B763950025105E /* JavaScriptCore.JSVALUE32_64only.exp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.exports; path = JavaScriptCore.JSVALUE32_64only.exp; sourceTree = "<group>"; };
867FC36011B763950025105E /* JavaScriptCore.JSVALUE32only.exp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.exports; path = JavaScriptCore.JSVALUE32only.exp; sourceTree = "<group>"; };
867FC36111B763950025105E /* JavaScriptCore.JSVALUE64only.exp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.exports; path = JavaScriptCore.JSVALUE64only.exp; sourceTree = "<group>"; };
@@ -857,7 +864,6 @@
868BFA15117CF19900B908B1 /* WTFString.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = WTFString.cpp; path = text/WTFString.cpp; sourceTree = "<group>"; };
868BFA16117CF19900B908B1 /* WTFString.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = WTFString.h; path = text/WTFString.h; sourceTree = "<group>"; };
868BFA5F117D048200B908B1 /* StaticConstructors.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StaticConstructors.h; sourceTree = "<group>"; };
- 868D81A112C428CE0093DC84 /* RegexPattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RegexPattern.cpp; path = yarr/RegexPattern.cpp; sourceTree = "<group>"; };
8690231412092D5C00630AF9 /* PageReservation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PageReservation.h; sourceTree = "<group>"; };
869D04AE1193B54D00803475 /* CachedTranscendentalFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CachedTranscendentalFunction.h; sourceTree = "<group>"; };
869EBCB60E8C6D4A008722CC /* ResultType.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ResultType.h; sourceTree = "<group>"; };
@@ -891,12 +897,6 @@
86DB64630F95C6FC00D7D921 /* ExecutableAllocatorFixedVMPool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ExecutableAllocatorFixedVMPool.cpp; sourceTree = "<group>"; };
86E116B00FE75AC800B512BC /* CodeLocation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CodeLocation.h; sourceTree = "<group>"; };
86E85538111B9968001AF51E /* JSStringBuilder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSStringBuilder.h; sourceTree = "<group>"; };
- 86EAC48F0F93E8D1008EC948 /* RegexInterpreter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RegexInterpreter.cpp; path = yarr/RegexInterpreter.cpp; sourceTree = "<group>"; };
- 86EAC4900F93E8D1008EC948 /* RegexInterpreter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RegexInterpreter.h; path = yarr/RegexInterpreter.h; sourceTree = "<group>"; };
- 86EAC4910F93E8D1008EC948 /* RegexJIT.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RegexJIT.cpp; path = yarr/RegexJIT.cpp; sourceTree = "<group>"; };
- 86EAC4920F93E8D1008EC948 /* RegexJIT.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RegexJIT.h; path = yarr/RegexJIT.h; sourceTree = "<group>"; };
- 86EAC4930F93E8D1008EC948 /* RegexParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RegexParser.h; path = yarr/RegexParser.h; sourceTree = "<group>"; };
- 86EAC4940F93E8D1008EC948 /* RegexPattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RegexPattern.h; path = yarr/RegexPattern.h; sourceTree = "<group>"; };
86F38858121130CA007A7CE3 /* AtomicStringHash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AtomicStringHash.h; path = text/AtomicStringHash.h; sourceTree = "<group>"; };
90213E3B123A40C200D422F3 /* MemoryStatistics.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MemoryStatistics.cpp; sourceTree = "<group>"; };
90213E3C123A40C200D422F3 /* MemoryStatistics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MemoryStatistics.h; sourceTree = "<group>"; };
@@ -1909,15 +1909,15 @@
86EAC48C0F93E8B9008EC948 /* yarr */ = {
isa = PBXGroup;
children = (
+ 86704B7D12DBA33700A9FE7B /* YarrInterpreter.cpp */,
+ 86704B7E12DBA33700A9FE7B /* YarrInterpreter.h */,
+ 86704B7F12DBA33700A9FE7B /* YarrJIT.cpp */,
+ 86704B8012DBA33700A9FE7B /* YarrJIT.h */,
+ 86704B8112DBA33700A9FE7B /* YarrParser.h */,
+ 86704B8212DBA33700A9FE7B /* YarrPattern.cpp */,
+ 86704B8312DBA33700A9FE7B /* YarrPattern.h */,
86704B4012DB8A8100A9FE7B /* YarrSyntaxChecker.cpp */,
86704B4112DB8A8100A9FE7B /* YarrSyntaxChecker.h */,
- 86EAC48F0F93E8D1008EC948 /* RegexInterpreter.cpp */,
- 86EAC4900F93E8D1008EC948 /* RegexInterpreter.h */,
- 86EAC4910F93E8D1008EC948 /* RegexJIT.cpp */,
- 86EAC4920F93E8D1008EC948 /* RegexJIT.h */,
- 86EAC4930F93E8D1008EC948 /* RegexParser.h */,
- 86EAC4940F93E8D1008EC948 /* RegexPattern.h */,
- 868D81A112C428CE0093DC84 /* RegexPattern.cpp */,
);
name = yarr;
sourceTree = "<group>";
@@ -2294,11 +2294,7 @@
90D3469C0E285280009492EE /* RefCountedLeakCounter.h in Headers */,
BC18C4580E16F5CD00B34460 /* RefPtr.h in Headers */,
BC18C4590E16F5CD00B34460 /* RefPtrHashMap.h in Headers */,
- 86EAC4980F93E8D1008EC948 /* RegexInterpreter.h in Headers */,
- 86EAC49A0F93E8D1008EC948 /* RegexJIT.h in Headers */,
BC18C45A0E16F5CD00B34460 /* RegExp.h in Headers */,
- 86EAC49B0F93E8D1008EC948 /* RegexParser.h in Headers */,
- 86EAC49C0F93E8D1008EC948 /* RegexPattern.h in Headers */,
A1712B3F11C7B228007A5315 /* RegExpCache.h in Headers */,
BCD202C20E1706A7002C7E82 /* RegExpConstructor.h in Headers */,
BCD202D60E170708002C7E82 /* RegExpConstructor.lut.h in Headers */,
@@ -2397,6 +2393,10 @@
5135FAF212D26ACE003C083B /* Decoder.h in Headers */,
5135FAF312D26AD1003C083B /* Encoder.h in Headers */,
86704B4312DB8A8100A9FE7B /* YarrSyntaxChecker.h in Headers */,
+ 86704B8512DBA33700A9FE7B /* YarrInterpreter.h in Headers */,
+ 86704B8712DBA33700A9FE7B /* YarrJIT.h in Headers */,
+ 86704B8812DBA33700A9FE7B /* YarrParser.h in Headers */,
+ 86704B8A12DBA33700A9FE7B /* YarrPattern.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2823,10 +2823,7 @@
14469DE9107EC7E700650446 /* PrototypeFunction.cpp in Sources */,
088FA5BB0EF76D4300578E6F /* RandomNumber.cpp in Sources */,
905B02AE0E28640F006DF882 /* RefCountedLeakCounter.cpp in Sources */,
- 86EAC4970F93E8D1008EC948 /* RegexInterpreter.cpp in Sources */,
- 86EAC4990F93E8D1008EC948 /* RegexJIT.cpp in Sources */,
14280841107EC0930013E7B2 /* RegExp.cpp in Sources */,
- 868D81A212C428CE0093DC84 /* RegexPattern.cpp in Sources */,
A1712B3B11C7B212007A5315 /* RegExpCache.cpp in Sources */,
14280842107EC0930013E7B2 /* RegExpConstructor.cpp in Sources */,
14280843107EC0930013E7B2 /* RegExpObject.cpp in Sources */,
@@ -2861,6 +2858,9 @@
868BFA17117CF19900B908B1 /* WTFString.cpp in Sources */,
86D08D5311793613006E5ED0 /* WTFThreadData.cpp in Sources */,
86704B4212DB8A8100A9FE7B /* YarrSyntaxChecker.cpp in Sources */,
+ 86704B8412DBA33700A9FE7B /* YarrInterpreter.cpp in Sources */,
+ 86704B8612DBA33700A9FE7B /* YarrJIT.cpp in Sources */,
+ 86704B8912DBA33700A9FE7B /* YarrPattern.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
diff --git a/Source/JavaScriptCore/runtime/JSGlobalData.h b/Source/JavaScriptCore/runtime/JSGlobalData.h
index b4a6440..6fb202f 100644
--- a/Source/JavaScriptCore/runtime/JSGlobalData.h
+++ b/Source/JavaScriptCore/runtime/JSGlobalData.h
@@ -229,8 +229,7 @@ namespace JSC {
int maxReentryDepth;
RegExpCache* m_regExpCache;
-
- BumpPointerAllocator m_regexAllocator;
+ BumpPointerAllocator m_regExpAllocator;
#if ENABLE(REGEXP_TRACING)
typedef ListHashSet<RefPtr<RegExp> > RTTraceList;
diff --git a/Source/JavaScriptCore/runtime/RegExp.cpp b/Source/JavaScriptCore/runtime/RegExp.cpp
index 664a7fb..4ab59bd 100644
--- a/Source/JavaScriptCore/runtime/RegExp.cpp
+++ b/Source/JavaScriptCore/runtime/RegExp.cpp
@@ -22,22 +22,23 @@
#include "config.h"
#include "RegExp.h"
+
#include "Lexer.h"
+#include "yarr/YarrInterpreter.h"
+#include "yarr/YarrJIT.h"
+#include "yarr/YarrPattern.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wtf/Assertions.h>
#include <wtf/OwnArrayPtr.h>
-#include "yarr/RegexJIT.h"
-#include "yarr/RegexInterpreter.h"
-#include "yarr/RegexPattern.h"
namespace JSC {
struct RegExpRepresentation {
#if ENABLE(YARR_JIT)
- Yarr::RegexCodeBlock m_regExpJITCode;
+ Yarr::YarrCodeBlock m_regExpJITCode;
#endif
OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
};
@@ -82,7 +83,7 @@ PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patte
RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
{
- Yarr::RegexPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
if (m_constructionError)
return ParseError;
@@ -92,7 +93,7 @@ RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
#if ENABLE(YARR_JIT)
if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
- Yarr::jitCompileRegex(pattern, globalData, m_representation->m_regExpJITCode);
+ Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
#if ENABLE(YARR_JIT_DEBUG)
if (!m_representation->m_regExpJITCode.isFallBack())
res = JITCode;
@@ -105,7 +106,7 @@ RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
}
#endif
- m_representation->m_regExpBytecode = Yarr::byteCompileRegex(pattern, &globalData->m_regexAllocator);
+ m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
return res;
}
@@ -144,13 +145,13 @@ int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
int result;
#if ENABLE(YARR_JIT)
if (m_state == JITCode) {
- result = Yarr::executeRegex(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
#if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(s, startOffset, offsetVector, result);
#endif
} else
#endif
- result = Yarr::interpretRegex(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
+ result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
ASSERT(result >= -1);
#if ENABLE(REGEXP_TRACING)
@@ -181,7 +182,7 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int*
for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
interpreterOffsetVector[j] = -1;
- interpreterResult = Yarr::interpretRegex(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+ interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
if (jitResult != interpreterResult)
differences++;
@@ -230,7 +231,7 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int*
snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
#if ENABLE(YARR_JIT)
- Yarr::RegexCodeBlock& codeBlock = m_representation->m_regExpJITCode;
+ Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
char jitAddr[20];
if (m_state == JITCode)
diff --git a/Source/JavaScriptCore/yarr/RegexInterpreter.cpp b/Source/JavaScriptCore/yarr/RegexInterpreter.cpp
deleted file mode 100644
index 7769922..0000000
--- a/Source/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ /dev/null
@@ -1,1891 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "RegexInterpreter.h"
-
-#include "RegexPattern.h"
-#include <wtf/BumpPointerAllocator.h>
-
-#ifndef NDEBUG
-#include <stdio.h>
-#endif
-
-using namespace WTF;
-
-namespace JSC { namespace Yarr {
-
-class Interpreter {
-public:
- struct ParenthesesDisjunctionContext;
-
- struct BackTrackInfoPatternCharacter {
- uintptr_t matchAmount;
- };
- struct BackTrackInfoCharacterClass {
- uintptr_t matchAmount;
- };
- struct BackTrackInfoBackReference {
- uintptr_t begin; // Not really needed for greedy quantifiers.
- uintptr_t matchAmount; // Not really needed for fixed quantifiers.
- };
- struct BackTrackInfoAlternative {
- uintptr_t offset;
- };
- struct BackTrackInfoParentheticalAssertion {
- uintptr_t begin;
- };
- struct BackTrackInfoParenthesesOnce {
- uintptr_t begin;
- };
- struct BackTrackInfoParenthesesTerminal {
- uintptr_t begin;
- };
- struct BackTrackInfoParentheses {
- uintptr_t matchAmount;
- ParenthesesDisjunctionContext* lastContext;
- };
-
- static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
- {
- context->next = backTrack->lastContext;
- backTrack->lastContext = context;
- ++backTrack->matchAmount;
- }
-
- static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
- {
- ASSERT(backTrack->matchAmount);
- ASSERT(backTrack->lastContext);
- backTrack->lastContext = backTrack->lastContext->next;
- --backTrack->matchAmount;
- }
-
- struct DisjunctionContext
- {
- DisjunctionContext()
- : term(0)
- {
- }
-
- void* operator new(size_t, void* where)
- {
- return where;
- }
-
- int term;
- unsigned matchBegin;
- unsigned matchEnd;
- uintptr_t frame[1];
- };
-
- DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
- {
- size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
- allocatorPool = allocatorPool->ensureCapacity(size);
- if (!allocatorPool)
- CRASH();
- return new(allocatorPool->alloc(size)) DisjunctionContext();
- }
-
- void freeDisjunctionContext(DisjunctionContext* context)
- {
- allocatorPool = allocatorPool->dealloc(context);
- }
-
- struct ParenthesesDisjunctionContext
- {
- ParenthesesDisjunctionContext(int* output, ByteTerm& term)
- : next(0)
- {
- unsigned firstSubpatternId = term.atom.subpatternId;
- unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
-
- for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
- subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
- output[(firstSubpatternId << 1) + i] = -1;
- }
-
- new(getDisjunctionContext(term)) DisjunctionContext();
- }
-
- void* operator new(size_t, void* where)
- {
- return where;
- }
-
- void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
- {
- for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
- output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
- }
-
- DisjunctionContext* getDisjunctionContext(ByteTerm& term)
- {
- return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
- }
-
- ParenthesesDisjunctionContext* next;
- int subpatternBackup[1];
- };
-
- ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
- {
- size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
- allocatorPool = allocatorPool->ensureCapacity(size);
- if (!allocatorPool)
- CRASH();
- return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
- }
-
- void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
- {
- allocatorPool = allocatorPool->dealloc(context);
- }
-
- class InputStream {
- public:
- InputStream(const UChar* input, unsigned start, unsigned length)
- : input(input)
- , pos(start)
- , length(length)
- {
- }
-
- void next()
- {
- ++pos;
- }
-
- void rewind(unsigned amount)
- {
- ASSERT(pos >= amount);
- pos -= amount;
- }
-
- int read()
- {
- ASSERT(pos < length);
- if (pos < length)
- return input[pos];
- return -1;
- }
-
- int readPair()
- {
- ASSERT(pos + 1 < length);
- return input[pos] | input[pos + 1] << 16;
- }
-
- int readChecked(int position)
- {
- ASSERT(position < 0);
- ASSERT((unsigned)-position <= pos);
- unsigned p = pos + position;
- ASSERT(p < length);
- return input[p];
- }
-
- int reread(unsigned from)
- {
- ASSERT(from < length);
- return input[from];
- }
-
- int prev()
- {
- ASSERT(!(pos > length));
- if (pos && length)
- return input[pos - 1];
- return -1;
- }
-
- unsigned getPos()
- {
- return pos;
- }
-
- void setPos(unsigned p)
- {
- pos = p;
- }
-
- bool atStart()
- {
- return pos == 0;
- }
-
- bool atEnd()
- {
- return pos == length;
- }
-
- bool checkInput(int count)
- {
- if ((pos + count) <= length) {
- pos += count;
- return true;
- } else
- return false;
- }
-
- void uncheckInput(int count)
- {
- pos -= count;
- }
-
- bool atStart(int position)
- {
- return (pos + position) == 0;
- }
-
- bool atEnd(int position)
- {
- return (pos + position) == length;
- }
-
- bool isNotAvailableInput(int position)
- {
- return (pos + position) > length;
- }
-
- private:
- const UChar* input;
- unsigned pos;
- unsigned length;
- };
-
- bool testCharacterClass(CharacterClass* characterClass, int ch)
- {
- if (ch & 0xFF80) {
- for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
- if (ch == characterClass->m_matchesUnicode[i])
- return true;
- for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
- if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
- return true;
- } else {
- for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
- if (ch == characterClass->m_matches[i])
- return true;
- for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
- if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
- return true;
- }
-
- return false;
- }
-
- bool checkCharacter(int testChar, int inputPosition)
- {
- return testChar == input.readChecked(inputPosition);
- }
-
- bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
- {
- int ch = input.readChecked(inputPosition);
- return (loChar == ch) || (hiChar == ch);
- }
-
- bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
- {
- bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
- return invert ? !match : match;
- }
-
- bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
- {
- int matchSize = matchEnd - matchBegin;
-
- if (!input.checkInput(matchSize))
- return false;
-
- if (pattern->m_ignoreCase) {
- for (int i = 0; i < matchSize; ++i) {
- int ch = input.reread(matchBegin + i);
-
- int lo = Unicode::toLower(ch);
- int hi = Unicode::toUpper(ch);
-
- if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
- input.uncheckInput(matchSize);
- return false;
- }
- }
- } else {
- for (int i = 0; i < matchSize; ++i) {
- if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
- input.uncheckInput(matchSize);
- return false;
- }
- }
- }
-
- return true;
- }
-
- bool matchAssertionBOL(ByteTerm& term)
- {
- return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
- }
-
- bool matchAssertionEOL(ByteTerm& term)
- {
- if (term.inputPosition)
- return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
- else
- return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
- }
-
- bool matchAssertionWordBoundary(ByteTerm& term)
- {
- bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
- bool readIsWordchar;
- if (term.inputPosition)
- readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
- else
- readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
-
- bool wordBoundary = prevIsWordchar != readIsWordchar;
- return term.invert() ? !wordBoundary : wordBoundary;
- }
-
- bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
- {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
- {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
- if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
- return false;
- }
- return true;
- }
-
- case QuantifierGreedy: {
- unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- return true;
- }
-
- case QuantifierNonGreedy:
- backTrack->matchAmount = 0;
- return true;
- }
-
- ASSERT_NOT_REACHED();
- return false;
- }
-
- bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeBackReference);
- BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
-
- int matchBegin = output[(term.atom.subpatternId << 1)];
- int matchEnd = output[(term.atom.subpatternId << 1) + 1];
-
- // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
- // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
- // Eg.: /(a\1)/
- if (matchEnd == -1)
- return true;
-
- ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
-
- if (matchBegin == matchEnd)
- return true;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- backTrack->begin = input.getPos();
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
- if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
- input.setPos(backTrack->begin);
- return false;
- }
- }
- return true;
- }
-
- case QuantifierGreedy: {
- unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
- ++matchAmount;
- backTrack->matchAmount = matchAmount;
- return true;
- }
-
- case QuantifierNonGreedy:
- backTrack->begin = input.getPos();
- backTrack->matchAmount = 0;
- return true;
- }
-
- ASSERT_NOT_REACHED();
- return false;
- }
-
- bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeBackReference);
- BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
-
- int matchBegin = output[(term.atom.subpatternId << 1)];
- int matchEnd = output[(term.atom.subpatternId << 1) + 1];
- ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
-
- if (matchBegin == matchEnd)
- return false;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- // for quantityCount == 1, could rewind.
- input.setPos(backTrack->begin);
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.rewind(matchEnd - matchBegin);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
- ++backTrack->matchAmount;
- return true;
- } else
- input.setPos(backTrack->begin);
- break;
- }
-
- return false;
- }
-
- void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
- {
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
- output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
- }
- }
- void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
- {
- unsigned firstSubpatternId = term.atom.subpatternId;
- unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
- context->restoreOutput(output, firstSubpatternId, count);
- }
- JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
- {
- while (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
-
- JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch)
- return JSRegExpMatch;
-
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- return JSRegExpNoMatch;
- }
-
- bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy: {
- // set this speculatively; if we get to the parens end this will be true.
- backTrack->begin = input.getPos();
- break;
- }
- case QuantifierNonGreedy: {
- backTrack->begin = notFound;
- context->term += term.atom.parenthesesWidth;
- return true;
- }
- case QuantifierFixedCount:
- break;
- }
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
- }
-
- return true;
- }
-
- bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
- }
-
- if (term.atom.quantityType == QuantifierFixedCount)
- return true;
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
- return backTrack->begin != input.getPos();
- }
-
- bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = -1;
- output[(subpatternId << 1) + 1] = -1;
- }
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy:
- // if we backtrack to this point, there is another chance - try matching nothing.
- ASSERT(backTrack->begin != notFound);
- backTrack->begin = notFound;
- context->term += term.atom.parenthesesWidth;
- return true;
- case QuantifierNonGreedy:
- ASSERT(backTrack->begin != notFound);
- case QuantifierFixedCount:
- break;
- }
-
- return false;
- }
-
- bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy:
- if (backTrack->begin == notFound) {
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
- case QuantifierNonGreedy:
- if (backTrack->begin == notFound) {
- backTrack->begin = input.getPos();
- if (term.capture()) {
- // Technically this access to inputPosition should be accessing the begin term's
- // inputPosition, but for repeats other than fixed these values should be
- // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
- ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
- unsigned subpatternId = term.atom.subpatternId;
- output[subpatternId << 1] = input.getPos() + term.inputPosition;
- }
- context->term -= term.atom.parenthesesWidth;
- return true;
- }
- case QuantifierFixedCount:
- break;
- }
-
- return false;
- }
-
- bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
- ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
- ASSERT(!term.capture());
-
- BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
- backTrack->begin = input.getPos();
- return true;
- }
-
- bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
-
- BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
- // Empty match is a failed match.
- if (backTrack->begin == input.getPos())
- return false;
-
- // Successful match! Okay, what's next? - loop around and try to match moar!
- context->term -= (term.atom.parenthesesWidth + 1);
- return true;
- }
-
- bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
- ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
- ASSERT(!term.capture());
-
- // If we backtrack to this point, we have failed to match this iteration of the parens.
- // Since this is greedy / zero minimum a failed is also accepted as a match!
- context->term += term.atom.parenthesesWidth;
- return true;
- }
-
- bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
- {
- // 'Terminal' parentheses are at the end of the regex, and as such a match past end
- // should always be returned as a successful match - we should never backtrack to here.
- ASSERT_NOT_REACHED();
- return false;
- }
-
- bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- backTrack->begin = input.getPos();
- return true;
- }
-
- bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- input.setPos(backTrack->begin);
-
- // We've reached the end of the parens; if they are inverted, this is failure.
- if (term.invert()) {
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
-
- return true;
- }
-
- bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- // We've failed to match parens; if they are inverted, this is win!
- if (term.invert()) {
- context->term += term.atom.parenthesesWidth;
- return true;
- }
-
- return false;
- }
-
- bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- input.setPos(backTrack->begin);
-
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
-
- JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-
- BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
- ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
-
- backTrack->matchAmount = 0;
- backTrack->lastContext = 0;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- // While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
- // Try to do a match, and it it succeeds, add it to the list.
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- // The match failed; try to find an alternate point to carry on from.
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result == JSRegExpNoMatch) {
- JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
- if (backtrackResult != JSRegExpMatch)
- return backtrackResult;
- } else
- return result;
- }
- }
-
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- case QuantifierGreedy: {
- while (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
-
- break;
- }
- }
-
- if (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- case QuantifierNonGreedy:
- return JSRegExpMatch;
- }
-
- ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- // Rules for backtracking differ depending on whether this is greedy or non-greedy.
- //
- // Greedy matches never should try just adding more - you should already have done
- // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
- // you backtrack an item off the list needs checking, since we'll never have matched
- // the one less case. Tracking forwards, still add as much as possible.
- //
- // Non-greedy, we've already done the one less case, so don't match on popping.
- // We haven't done the one more case, so always try to add that.
- //
- JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-
- BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
- ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
-
- ParenthesesDisjunctionContext* context = 0;
- JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
-
- if (result != JSRegExpMatch)
- return result;
-
- // While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
- // Try to do a match, and it it succeeds, add it to the list.
- context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
-
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- // The match failed; try to find an alternate point to carry on from.
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result == JSRegExpNoMatch) {
- JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
- if (backtrackResult != JSRegExpMatch)
- return backtrackResult;
- } else
- return result;
- }
- }
-
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
- context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- case QuantifierGreedy: {
- if (!backTrack->matchAmount)
- return JSRegExpNoMatch;
-
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch) {
- while (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (parenthesesResult == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (parenthesesResult != JSRegExpNoMatch)
- return parenthesesResult;
-
- break;
- }
- }
- } else {
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- if (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- case QuantifierNonGreedy: {
- // If we've not reached the limit, try to add one more match.
- if (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch) {
- appendParenthesesDisjunctionContext(backTrack, context);
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- // Nope - okay backtrack looking for an alternative.
- while (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch) {
- // successful backtrack! we're back in the game!
- if (backTrack->matchAmount) {
- context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- // pop a match off the stack
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- return result;
- }
-
- return JSRegExpNoMatch;
- }
- }
-
- ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- void lookupForBeginChars()
- {
- int character;
- bool firstSingleCharFound;
-
- while (true) {
- if (input.isNotAvailableInput(2))
- return;
-
- firstSingleCharFound = false;
-
- character = input.readPair();
-
- for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
- BeginChar bc = pattern->m_beginChars[i];
-
- if (!firstSingleCharFound && bc.value <= 0xFFFF) {
- firstSingleCharFound = true;
- character &= 0xFFFF;
- }
-
- if ((character | bc.mask) == bc.value)
- return;
- }
-
- input.next();
- }
- }
-
-#define MATCH_NEXT() { ++context->term; goto matchAgain; }
-#define BACKTRACK() { --context->term; goto backtrack; }
-#define currentTerm() (disjunction->terms[context->term])
- JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
- {
- if (!--remainingMatchCount)
- return JSRegExpErrorHitLimit;
-
- if (btrack)
- BACKTRACK();
-
- if (pattern->m_containsBeginChars && isBody)
- lookupForBeginChars();
-
- context->matchBegin = input.getPos();
- context->term = 0;
-
- matchAgain:
- ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
-
- switch (currentTerm().type) {
- case ByteTerm::TypeSubpatternBegin:
- MATCH_NEXT();
- case ByteTerm::TypeSubpatternEnd:
- context->matchEnd = input.getPos();
- return JSRegExpMatch;
-
- case ByteTerm::TypeBodyAlternativeBegin:
- MATCH_NEXT();
- case ByteTerm::TypeBodyAlternativeDisjunction:
- case ByteTerm::TypeBodyAlternativeEnd:
- context->matchEnd = input.getPos();
- return JSRegExpMatch;
-
- case ByteTerm::TypeAlternativeBegin:
- MATCH_NEXT();
- case ByteTerm::TypeAlternativeDisjunction:
- case ByteTerm::TypeAlternativeEnd: {
- int offset = currentTerm().alternative.end;
- BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
- backTrack->offset = offset;
- context->term += offset;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypeAssertionBOL:
- if (matchAssertionBOL(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeAssertionEOL:
- if (matchAssertionEOL(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeAssertionWordBoundary:
- if (matchAssertionWordBoundary(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypePatternCharacterOnce:
- case ByteTerm::TypePatternCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
- if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
- BACKTRACK();
- }
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCharacterGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
- if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCharacterNonGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- backTrack->matchAmount = 0;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypePatternCasedCharacterOnce:
- case ByteTerm::TypePatternCasedCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
- if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
- BACKTRACK();
- }
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCasedCharacterGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
- if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCasedCharacterNonGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- backTrack->matchAmount = 0;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypeCharacterClass:
- if (matchCharacterClass(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeBackReference:
- if (matchBackReference(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpattern: {
- JSRegExpResult result = matchParentheses(currentTerm(), context);
-
- if (result == JSRegExpMatch) {
- MATCH_NEXT();
- } else if (result != JSRegExpNoMatch)
- return result;
-
- BACKTRACK();
- }
- case ByteTerm::TypeParenthesesSubpatternOnceBegin:
- if (matchParenthesesOnceBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternOnceEnd:
- if (matchParenthesesOnceEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
- if (matchParenthesesTerminalBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
- if (matchParenthesesTerminalEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionBegin:
- if (matchParentheticalAssertionBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionEnd:
- if (matchParentheticalAssertionEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypeCheckInput:
- if (input.checkInput(currentTerm().checkInputCount))
- MATCH_NEXT();
- BACKTRACK();
- }
-
- // We should never fall-through to here.
- ASSERT_NOT_REACHED();
-
- backtrack:
- ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
-
- switch (currentTerm().type) {
- case ByteTerm::TypeSubpatternBegin:
- return JSRegExpNoMatch;
- case ByteTerm::TypeSubpatternEnd:
- ASSERT_NOT_REACHED();
-
- case ByteTerm::TypeBodyAlternativeBegin:
- case ByteTerm::TypeBodyAlternativeDisjunction: {
- int offset = currentTerm().alternative.next;
- context->term += offset;
- if (offset > 0)
- MATCH_NEXT();
-
- if (input.atEnd())
- return JSRegExpNoMatch;
-
- input.next();
-
- if (pattern->m_containsBeginChars && isBody)
- lookupForBeginChars();
-
- context->matchBegin = input.getPos();
-
- if (currentTerm().alternative.onceThrough)
- context->term += currentTerm().alternative.next;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypeBodyAlternativeEnd:
- ASSERT_NOT_REACHED();
-
- case ByteTerm::TypeAlternativeBegin:
- case ByteTerm::TypeAlternativeDisjunction: {
- int offset = currentTerm().alternative.next;
- context->term += offset;
- if (offset > 0)
- MATCH_NEXT();
- BACKTRACK();
- }
- case ByteTerm::TypeAlternativeEnd: {
- // We should never backtrack back into an alternative of the main body of the regex.
- BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
- unsigned offset = backTrack->offset;
- context->term -= offset;
- BACKTRACK();
- }
-
- case ByteTerm::TypeAssertionBOL:
- case ByteTerm::TypeAssertionEOL:
- case ByteTerm::TypeAssertionWordBoundary:
- BACKTRACK();
-
- case ByteTerm::TypePatternCharacterOnce:
- case ByteTerm::TypePatternCharacterFixed:
- case ByteTerm::TypePatternCharacterGreedy:
- case ByteTerm::TypePatternCharacterNonGreedy:
- if (backtrackPatternCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypePatternCasedCharacterOnce:
- case ByteTerm::TypePatternCasedCharacterFixed:
- case ByteTerm::TypePatternCasedCharacterGreedy:
- case ByteTerm::TypePatternCasedCharacterNonGreedy:
- if (backtrackPatternCasedCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeCharacterClass:
- if (backtrackCharacterClass(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeBackReference:
- if (backtrackBackReference(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpattern: {
- JSRegExpResult result = backtrackParentheses(currentTerm(), context);
-
- if (result == JSRegExpMatch) {
- MATCH_NEXT();
- } else if (result != JSRegExpNoMatch)
- return result;
-
- BACKTRACK();
- }
- case ByteTerm::TypeParenthesesSubpatternOnceBegin:
- if (backtrackParenthesesOnceBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternOnceEnd:
- if (backtrackParenthesesOnceEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
- if (backtrackParenthesesTerminalBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
- if (backtrackParenthesesTerminalEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionBegin:
- if (backtrackParentheticalAssertionBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionEnd:
- if (backtrackParentheticalAssertionEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypeCheckInput:
- input.uncheckInput(currentTerm().checkInputCount);
- BACKTRACK();
- }
-
- ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
- {
- JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
-
- if (result == JSRegExpMatch) {
- while (context->matchBegin == context->matchEnd) {
- result = matchDisjunction(disjunction, context, true);
- if (result != JSRegExpMatch)
- return result;
- }
- return JSRegExpMatch;
- }
-
- return result;
- }
-
- int interpret()
- {
- allocatorPool = pattern->m_allocator->startAllocator();
- if (!allocatorPool)
- CRASH();
-
- for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
- output[i] = -1;
-
- DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
-
- JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
- if (result == JSRegExpMatch) {
- output[0] = context->matchBegin;
- output[1] = context->matchEnd;
- }
-
- freeDisjunctionContext(context);
-
- pattern->m_allocator->stopAllocator();
-
- // RegExp.cpp currently expects all error to be converted to -1.
- ASSERT((result == JSRegExpMatch) == (output[0] != -1));
- return output[0];
- }
-
- Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
- : pattern(pattern)
- , output(output)
- , input(inputChar, start, length)
- , allocatorPool(0)
- , remainingMatchCount(matchLimit)
- {
- }
-
-private:
- BytecodePattern *pattern;
- int* output;
- InputStream input;
- BumpPointerPool* allocatorPool;
- unsigned remainingMatchCount;
-};
-
-
-
-class ByteCompiler {
- struct ParenthesesStackEntry {
- unsigned beginTerm;
- unsigned savedAlternativeIndex;
- ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
- : beginTerm(beginTerm)
- , savedAlternativeIndex(savedAlternativeIndex)
- {
- }
- };
-
-public:
- ByteCompiler(RegexPattern& pattern)
- : m_pattern(pattern)
- {
- m_currentAlternativeIndex = 0;
- }
-
- PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
- {
- regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
- emitDisjunction(m_pattern.m_body);
- regexEnd();
-
- return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
- }
-
- void checkInput(unsigned count)
- {
- m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
- }
-
- void assertionBOL(int inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
- }
-
- void assertionEOL(int inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
- }
-
- void assertionWordBoundary(bool invert, int inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
- }
-
- void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- if (m_pattern.m_ignoreCase) {
- UChar lo = Unicode::toLower(ch);
- UChar hi = Unicode::toUpper(ch);
-
- if (lo != hi) {
- m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
- return;
- }
- }
-
- m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
- }
-
- void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
-
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- }
-
- void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- ASSERT(subpatternId);
-
- m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
-
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- }
-
- void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
- // then fix this up at the end! - simplifying this should make it much clearer.
- // https://bugs.webkit.org/show_bug.cgi?id=50136
-
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
-
- bool invert = m_bodyDisjunction->terms[beginTerm].invert();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- unsigned popParenthesesStack()
- {
- ASSERT(m_parenthesesStack.size());
- int stackEnd = m_parenthesesStack.size() - 1;
- unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
- m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
- m_parenthesesStack.shrink(stackEnd);
-
- ASSERT(beginTerm < m_bodyDisjunction->terms.size());
- ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
-
- return beginTerm;
- }
-
-#ifndef NDEBUG
- void dumpDisjunction(ByteDisjunction* disjunction)
- {
- printf("ByteDisjunction(%p):\n\t", disjunction);
- for (unsigned i = 0; i < disjunction->terms.size(); ++i)
- printf("{ %d } ", disjunction->terms[i].type);
- printf("\n");
- }
-#endif
-
- void closeAlternative(int beginTerm)
- {
- int origBeginTerm = beginTerm;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
- int endIndex = m_bodyDisjunction->terms.size();
-
- unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
-
- if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
- m_bodyDisjunction->terms.remove(beginTerm);
- else {
- while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
- beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
- m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
-
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
- m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
- }
- }
-
- void closeBodyAlternative()
- {
- int beginTerm = 0;
- int origBeginTerm = 0;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
- int endIndex = m_bodyDisjunction->terms.size();
-
- unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
-
- while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
- beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
- m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
-
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
- m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
- }
-
- void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
-
- ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
-
- bool capture = parenthesesBegin.capture();
- unsigned subpatternId = parenthesesBegin.atom.subpatternId;
-
- unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
- ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
-
- parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
- for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
- parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
- parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
-
- m_bodyDisjunction->terms.shrink(beginTerm);
-
- m_allParenthesesInfo.append(parenthesesDisjunction);
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
-
- bool capture = m_bodyDisjunction->terms[beginTerm].capture();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
-
- bool capture = m_bodyDisjunction->terms[beginTerm].capture();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
- {
- m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
- m_bodyDisjunction->terms[0].frameLocation = 0;
- m_currentAlternativeIndex = 0;
- }
-
- void regexEnd()
- {
- closeBodyAlternative();
- }
-
- void alternativeBodyDisjunction(bool onceThrough)
- {
- int newAlternativeIndex = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
-
- m_currentAlternativeIndex = newAlternativeIndex;
- }
-
- void alternativeDisjunction()
- {
- int newAlternativeIndex = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
-
- m_currentAlternativeIndex = newAlternativeIndex;
- }
-
- void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0, bool isParentheticalAssertion = false)
- {
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
-
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
-
- if (alt) {
- if (disjunction == m_pattern.m_body)
- alternativeBodyDisjunction(alternative->onceThrough());
- else
- alternativeDisjunction();
- }
-
- unsigned minimumSize = alternative->m_minimumSize;
- int countToCheck;
-
- if (isParentheticalAssertion && parenthesesInputCountAlreadyChecked > minimumSize)
- countToCheck = 0;
- else
- countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
-
- ASSERT(countToCheck >= 0);
- if (countToCheck) {
- checkInput(countToCheck);
- currentCountAlreadyChecked += countToCheck;
- }
-
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
- PatternTerm& term = alternative->m_terms[i];
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- assertionBOL(term.inputPosition - currentCountAlreadyChecked);
- break;
-
- case PatternTerm::TypeAssertionEOL:
- assertionEOL(term.inputPosition - currentCountAlreadyChecked);
- break;
-
- case PatternTerm::TypeAssertionWordBoundary:
- assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
- break;
-
- case PatternTerm::TypePatternCharacter:
- atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeCharacterClass:
- atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeBackReference:
- atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypeParenthesesSubpattern: {
- unsigned disjunctionAlreadyCheckedCount = 0;
- if (term.quantityCount == 1 && !term.parentheses.isCopy) {
- unsigned alternativeFrameLocation = term.frameLocation;
- // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
- if (term.quantityType == QuantifierFixedCount)
- disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
- else
- alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
- } else if (term.parentheses.isTerminal) {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
- } else {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
- atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
- }
- break;
- }
-
- case PatternTerm::TypeParentheticalAssertion: {
- unsigned alternativeFrameLocation = term.frameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
-
- ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition);
- int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
-
- atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
- atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
- break;
- }
- }
- }
- }
- }
-
-private:
- RegexPattern& m_pattern;
- OwnPtr<ByteDisjunction> m_bodyDisjunction;
- unsigned m_currentAlternativeIndex;
- Vector<ParenthesesStackEntry> m_parenthesesStack;
- Vector<ByteDisjunction*> m_allParenthesesInfo;
-};
-
-PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator* allocator)
-{
- return ByteCompiler(pattern).compile(allocator);
-}
-
-int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
-{
- return Interpreter(regex, output, input, start, length).interpret();
-}
-
-
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
-
-
-} }
diff --git a/Source/JavaScriptCore/yarr/RegexInterpreter.h b/Source/JavaScriptCore/yarr/RegexInterpreter.h
deleted file mode 100644
index 0fd8a57..0000000
--- a/Source/JavaScriptCore/yarr/RegexInterpreter.h
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RegexInterpreter_h
-#define RegexInterpreter_h
-
-#include "RegexParser.h"
-#include "RegexPattern.h"
-#include <wtf/PassOwnPtr.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WTF {
-class BumpPointerAllocator;
-}
-using WTF::BumpPointerAllocator;
-
-namespace JSC { namespace Yarr {
-
-// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
-
-// The below limit restricts the number of "recursive" match calls in order to
-// avoid spending exponential time on complex regular expressions.
-static const unsigned matchLimit = 1000000;
-
-enum JSRegExpResult {
- JSRegExpMatch = 1,
- JSRegExpNoMatch = 0,
- JSRegExpErrorNoMatch = -1,
- JSRegExpErrorHitLimit = -2,
- JSRegExpErrorNoMemory = -3,
- JSRegExpErrorInternal = -4
-};
-
-class ByteDisjunction;
-
-struct ByteTerm {
- enum Type {
- TypeBodyAlternativeBegin,
- TypeBodyAlternativeDisjunction,
- TypeBodyAlternativeEnd,
- TypeAlternativeBegin,
- TypeAlternativeDisjunction,
- TypeAlternativeEnd,
- TypeSubpatternBegin,
- TypeSubpatternEnd,
- TypeAssertionBOL,
- TypeAssertionEOL,
- TypeAssertionWordBoundary,
- TypePatternCharacterOnce,
- TypePatternCharacterFixed,
- TypePatternCharacterGreedy,
- TypePatternCharacterNonGreedy,
- TypePatternCasedCharacterOnce,
- TypePatternCasedCharacterFixed,
- TypePatternCasedCharacterGreedy,
- TypePatternCasedCharacterNonGreedy,
- TypeCharacterClass,
- TypeBackReference,
- TypeParenthesesSubpattern,
- TypeParenthesesSubpatternOnceBegin,
- TypeParenthesesSubpatternOnceEnd,
- TypeParenthesesSubpatternTerminalBegin,
- TypeParenthesesSubpatternTerminalEnd,
- TypeParentheticalAssertionBegin,
- TypeParentheticalAssertionEnd,
- TypeCheckInput,
- } type;
- union {
- struct {
- union {
- UChar patternCharacter;
- struct {
- UChar lo;
- UChar hi;
- } casedCharacter;
- CharacterClass* characterClass;
- unsigned subpatternId;
- };
- union {
- ByteDisjunction* parenthesesDisjunction;
- unsigned parenthesesWidth;
- };
- QuantifierType quantityType;
- unsigned quantityCount;
- } atom;
- struct {
- int next;
- int end;
- bool onceThrough;
- } alternative;
- unsigned checkInputCount;
- };
- unsigned frameLocation;
- bool m_capture : 1;
- bool m_invert : 1;
- int inputPosition;
-
- ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- : frameLocation(frameLocation)
- , m_capture(false)
- , m_invert(false)
- {
- switch (quantityType) {
- case QuantifierFixedCount:
- type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
- break;
- case QuantifierGreedy:
- type = ByteTerm::TypePatternCharacterGreedy;
- break;
- case QuantifierNonGreedy:
- type = ByteTerm::TypePatternCharacterNonGreedy;
- break;
- }
-
- atom.patternCharacter = ch;
- atom.quantityType = quantityType;
- atom.quantityCount = quantityCount;
- inputPosition = inputPos;
- }
-
- ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
- : frameLocation(frameLocation)
- , m_capture(false)
- , m_invert(false)
- {
- switch (quantityType) {
- case QuantifierFixedCount:
- type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
- break;
- case QuantifierGreedy:
- type = ByteTerm::TypePatternCasedCharacterGreedy;
- break;
- case QuantifierNonGreedy:
- type = ByteTerm::TypePatternCasedCharacterNonGreedy;
- break;
- }
-
- atom.casedCharacter.lo = lo;
- atom.casedCharacter.hi = hi;
- atom.quantityType = quantityType;
- atom.quantityCount = quantityCount;
- inputPosition = inputPos;
- }
-
- ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
- : type(ByteTerm::TypeCharacterClass)
- , m_capture(false)
- , m_invert(invert)
- {
- atom.characterClass = characterClass;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
- : type(type)
- , m_capture(capture)
- , m_invert(false)
- {
- atom.subpatternId = subpatternId;
- atom.parenthesesDisjunction = parenthesesInfo;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- ByteTerm(Type type, bool invert = false)
- : type(type)
- , m_capture(false)
- , m_invert(invert)
- {
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- }
-
- ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
- : type(type)
- , m_capture(capture)
- , m_invert(invert)
- {
- atom.subpatternId = subpatternId;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- static ByteTerm BOL(int inputPos)
- {
- ByteTerm term(TypeAssertionBOL);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm CheckInput(unsigned count)
- {
- ByteTerm term(TypeCheckInput);
- term.checkInputCount = count;
- return term;
- }
-
- static ByteTerm EOL(int inputPos)
- {
- ByteTerm term(TypeAssertionEOL);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm WordBoundary(bool invert, int inputPos)
- {
- ByteTerm term(TypeAssertionWordBoundary, invert);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm BackReference(unsigned subpatternId, int inputPos)
- {
- return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
- }
-
- static ByteTerm BodyAlternativeBegin(bool onceThrough)
- {
- ByteTerm term(TypeBodyAlternativeBegin);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = onceThrough;
- return term;
- }
-
- static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
- {
- ByteTerm term(TypeBodyAlternativeDisjunction);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = onceThrough;
- return term;
- }
-
- static ByteTerm BodyAlternativeEnd()
- {
- ByteTerm term(TypeBodyAlternativeEnd);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeBegin()
- {
- ByteTerm term(TypeAlternativeBegin);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeDisjunction()
- {
- ByteTerm term(TypeAlternativeDisjunction);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeEnd()
- {
- ByteTerm term(TypeAlternativeEnd);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm SubpatternBegin()
- {
- return ByteTerm(TypeSubpatternBegin);
- }
-
- static ByteTerm SubpatternEnd()
- {
- return ByteTerm(TypeSubpatternEnd);
- }
-
- bool invert()
- {
- return m_invert;
- }
-
- bool capture()
- {
- return m_capture;
- }
-};
-
-class ByteDisjunction : public FastAllocBase {
-public:
- ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
- : m_numSubpatterns(numSubpatterns)
- , m_frameSize(frameSize)
- {
- }
-
- Vector<ByteTerm> terms;
- unsigned m_numSubpatterns;
- unsigned m_frameSize;
-};
-
-struct BytecodePattern : FastAllocBase {
- BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator)
- : m_body(body)
- , m_ignoreCase(pattern.m_ignoreCase)
- , m_multiline(pattern.m_multiline)
- , m_containsBeginChars(pattern.m_containsBeginChars)
- , m_allocator(allocator)
- {
- newlineCharacterClass = pattern.newlineCharacterClass();
- wordcharCharacterClass = pattern.wordcharCharacterClass();
-
- m_allParenthesesInfo.append(allParenthesesInfo);
- m_userCharacterClasses.append(pattern.m_userCharacterClasses);
- // 'Steal' the RegexPattern's CharacterClasses! We clear its
- // array, so that it won't delete them on destruction. We'll
- // take responsibility for that.
- pattern.m_userCharacterClasses.clear();
-
- m_beginChars.append(pattern.m_beginChars);
- }
-
- ~BytecodePattern()
- {
- deleteAllValues(m_allParenthesesInfo);
- deleteAllValues(m_userCharacterClasses);
- }
-
- OwnPtr<ByteDisjunction> m_body;
- bool m_ignoreCase;
- bool m_multiline;
- bool m_containsBeginChars;
- // Each BytecodePattern is associated with a RegExp, each RegExp is associated
- // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regexAllocator.
- BumpPointerAllocator* m_allocator;
-
- CharacterClass* newlineCharacterClass;
- CharacterClass* wordcharCharacterClass;
-
- Vector<BeginChar> m_beginChars;
-
-private:
- Vector<ByteDisjunction*> m_allParenthesesInfo;
- Vector<CharacterClass*> m_userCharacterClasses;
-};
-
-PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*);
-int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
-
-} } // namespace JSC::Yarr
-
-#endif // RegexInterpreter_h
diff --git a/Source/JavaScriptCore/yarr/RegexJIT.cpp b/Source/JavaScriptCore/yarr/RegexJIT.cpp
deleted file mode 100644
index 50fe6db..0000000
--- a/Source/JavaScriptCore/yarr/RegexJIT.cpp
+++ /dev/null
@@ -1,2213 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "RegexJIT.h"
-
-#include "ASCIICType.h"
-#include "JSGlobalData.h"
-#include "LinkBuffer.h"
-#include "MacroAssembler.h"
-#include "RegexParser.h"
-
-#if ENABLE(YARR_JIT)
-
-using namespace WTF;
-
-namespace JSC { namespace Yarr {
-
-class RegexGenerator : private MacroAssembler {
- friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
-
-#if CPU(ARM)
- static const RegisterID input = ARMRegisters::r0;
- static const RegisterID index = ARMRegisters::r1;
- static const RegisterID length = ARMRegisters::r2;
- static const RegisterID output = ARMRegisters::r4;
-
- static const RegisterID regT0 = ARMRegisters::r5;
- static const RegisterID regT1 = ARMRegisters::r6;
-
- static const RegisterID returnRegister = ARMRegisters::r0;
-#elif CPU(MIPS)
- static const RegisterID input = MIPSRegisters::a0;
- static const RegisterID index = MIPSRegisters::a1;
- static const RegisterID length = MIPSRegisters::a2;
- static const RegisterID output = MIPSRegisters::a3;
-
- static const RegisterID regT0 = MIPSRegisters::t4;
- static const RegisterID regT1 = MIPSRegisters::t5;
-
- static const RegisterID returnRegister = MIPSRegisters::v0;
-#elif CPU(X86)
- static const RegisterID input = X86Registers::eax;
- static const RegisterID index = X86Registers::edx;
- static const RegisterID length = X86Registers::ecx;
- static const RegisterID output = X86Registers::edi;
-
- static const RegisterID regT0 = X86Registers::ebx;
- static const RegisterID regT1 = X86Registers::esi;
-
- static const RegisterID returnRegister = X86Registers::eax;
-#elif CPU(X86_64)
- static const RegisterID input = X86Registers::edi;
- static const RegisterID index = X86Registers::esi;
- static const RegisterID length = X86Registers::edx;
- static const RegisterID output = X86Registers::ecx;
-
- static const RegisterID regT0 = X86Registers::eax;
- static const RegisterID regT1 = X86Registers::ebx;
-
- static const RegisterID returnRegister = X86Registers::eax;
-#endif
-
- void optimizeAlternative(PatternAlternative* alternative)
- {
- if (!alternative->m_terms.size())
- return;
-
- for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
- PatternTerm& term = alternative->m_terms[i];
- PatternTerm& nextTerm = alternative->m_terms[i + 1];
-
- if ((term.type == PatternTerm::TypeCharacterClass)
- && (term.quantityType == QuantifierFixedCount)
- && (nextTerm.type == PatternTerm::TypePatternCharacter)
- && (nextTerm.quantityType == QuantifierFixedCount)) {
- PatternTerm termCopy = term;
- alternative->m_terms[i] = nextTerm;
- alternative->m_terms[i + 1] = termCopy;
- }
- }
- }
-
- void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
- {
- do {
- // pick which range we're going to generate
- int which = count >> 1;
- char lo = ranges[which].begin;
- char hi = ranges[which].end;
-
- // check if there are any ranges or matches below lo. If not, just jl to failure -
- // if there is anything else to check, check that first, if it falls through jmp to failure.
- if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
- Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
-
- // generate code for all ranges before this one
- if (which)
- matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
-
- while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
- matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
- ++*matchIndex;
- }
- failures.append(jump());
-
- loOrAbove.link(this);
- } else if (which) {
- Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
-
- matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
- failures.append(jump());
-
- loOrAbove.link(this);
- } else
- failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
-
- while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
- ++*matchIndex;
-
- matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
- // fall through to here, the value is above hi.
-
- // shuffle along & loop around if there are any more matches to handle.
- unsigned next = which + 1;
- ranges += next;
- count -= next;
- } while (count);
- }
-
- void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
- {
- if (charClass->m_table) {
- ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
- matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
- return;
- }
- Jump unicodeFail;
- if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
- Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
-
- if (charClass->m_matchesUnicode.size()) {
- for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
- UChar ch = charClass->m_matchesUnicode[i];
- matchDest.append(branch32(Equal, character, Imm32(ch)));
- }
- }
-
- if (charClass->m_rangesUnicode.size()) {
- for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
- UChar lo = charClass->m_rangesUnicode[i].begin;
- UChar hi = charClass->m_rangesUnicode[i].end;
-
- Jump below = branch32(LessThan, character, Imm32(lo));
- matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
- below.link(this);
- }
- }
-
- unicodeFail = jump();
- isAscii.link(this);
- }
-
- if (charClass->m_ranges.size()) {
- unsigned matchIndex = 0;
- JumpList failures;
- matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
- while (matchIndex < charClass->m_matches.size())
- matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
-
- failures.link(this);
- } else if (charClass->m_matches.size()) {
- // optimization: gather 'a','A' etc back together, can mask & test once.
- Vector<char> matchesAZaz;
-
- for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
- char ch = charClass->m_matches[i];
- if (m_pattern.m_ignoreCase) {
- if (isASCIILower(ch)) {
- matchesAZaz.append(ch);
- continue;
- }
- if (isASCIIUpper(ch))
- continue;
- }
- matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
- }
-
- if (unsigned countAZaz = matchesAZaz.size()) {
- or32(Imm32(32), character);
- for (unsigned i = 0; i < countAZaz; ++i)
- matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
- }
- }
-
- if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
- unicodeFail.link(this);
- }
-
- // Jumps if input not available; will have (incorrectly) incremented already!
- Jump jumpIfNoAvailableInput(unsigned countToCheck)
- {
- add32(Imm32(countToCheck), index);
- return branch32(Above, index, length);
- }
-
- Jump jumpIfAvailableInput(unsigned countToCheck)
- {
- add32(Imm32(countToCheck), index);
- return branch32(BelowOrEqual, index, length);
- }
-
- Jump checkInput()
- {
- return branch32(BelowOrEqual, index, length);
- }
-
- Jump atEndOfInput()
- {
- return branch32(Equal, index, length);
- }
-
- Jump notAtEndOfInput()
- {
- return branch32(NotEqual, index, length);
- }
-
- Jump jumpIfCharEquals(UChar ch, int inputPosition)
- {
- return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
- }
-
- Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
- {
- return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
- }
-
- void readCharacter(int inputPosition, RegisterID reg)
- {
- load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
- }
-
- void storeToFrame(RegisterID reg, unsigned frameLocation)
- {
- poke(reg, frameLocation);
- }
-
- void storeToFrame(Imm32 imm, unsigned frameLocation)
- {
- poke(imm, frameLocation);
- }
-
- DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
- {
- return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
- }
-
- void loadFromFrame(unsigned frameLocation, RegisterID reg)
- {
- peek(reg, frameLocation);
- }
-
- void loadFromFrameAndJump(unsigned frameLocation)
- {
- jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
- }
-
- struct IndirectJumpEntry {
- IndirectJumpEntry(int32_t stackOffset)
- : m_stackOffset(stackOffset)
- {
- }
-
- IndirectJumpEntry(int32_t stackOffset, Jump jump)
- : m_stackOffset(stackOffset)
- {
- addJump(jump);
- }
-
- void addJump(Jump jump)
- {
- m_relJumps.append(jump);
- }
-
- int32_t m_stackOffset;
- JumpList m_relJumps;
- };
-
- struct AlternativeBacktrackRecord {
- DataLabelPtr dataLabel;
- Label backtrackLocation;
-
- AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
- : dataLabel(dataLabel)
- , backtrackLocation(backtrackLocation)
- {
- }
- };
-
- struct ParenthesesTail;
- struct TermGenerationState;
-
- struct GenerationState {
- typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap;
-
- GenerationState()
- : m_parenNestingLevel(0)
- {
- }
-
- void addIndirectJumpEntry(int32_t stackOffset, Jump jump)
- {
- IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset);
-
- ASSERT(stackOffset >= 0);
-
- uint32_t offset = static_cast<uint32_t>(stackOffset);
-
- if (result == m_indirectJumpMap.end())
- m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump));
- else
- result->second->addJump(jump);
- }
-
- void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps)
- {
- JumpList::JumpVector jumpVector = jumps.jumps();
- size_t size = jumpVector.size();
- for (size_t i = 0; i < size; ++i)
- addIndirectJumpEntry(stackOffset, jumpVector[i]);
-
- jumps.empty();
- }
-
- void emitIndirectJumpTable(MacroAssembler* masm)
- {
- for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) {
- IndirectJumpEntry* indJumpEntry = iter->second;
- indJumpEntry->m_relJumps.link(masm);
- masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset));
- delete indJumpEntry;
- }
- }
-
- void incrementParenNestingLevel()
- {
- ++m_parenNestingLevel;
- }
-
- void decrementParenNestingLevel()
- {
- --m_parenNestingLevel;
- }
-
- ParenthesesTail* addParenthesesTail(PatternTerm& term, ParenthesesTail* nextOuterParenTail)
- {
- ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, nextOuterParenTail);
- m_parenTails.append(parenthesesTail);
- m_parenTailsForIteration.append(parenthesesTail);
-
- return parenthesesTail;
- }
-
- void emitParenthesesTail(RegexGenerator* generator)
- {
- unsigned vectorSize = m_parenTails.size();
- bool priorBacktrackFallThrough = false;
-
- // Emit in reverse order so parentTail N can fall through to N-1
- for (unsigned index = vectorSize; index > 0; --index) {
- JumpList jumpsToNext;
- priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1);
- if (index > 1)
- jumpsToNext.linkTo(generator->label(), generator);
- else
- addJumpsToNextInteration(jumpsToNext);
- }
- m_parenTails.clear();
- }
-
- void addJumpToNextInteration(Jump jump)
- {
- m_jumpsToNextInteration.append(jump);
- }
-
- void addJumpsToNextInteration(JumpList jumps)
- {
- m_jumpsToNextInteration.append(jumps);
- }
-
- void addDataLabelToNextIteration(DataLabelPtr dataLabel)
- {
- m_dataPtrsToNextIteration.append(dataLabel);
- }
-
- void linkToNextIteration(Label label)
- {
- m_nextIteration = label;
-
- for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i)
- m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration));
-
- m_dataPtrsToNextIteration.clear();
-
- for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i)
- m_parenTailsForIteration[i]->setNextIteration(m_nextIteration);
-
- m_parenTailsForIteration.clear();
- }
-
- void linkToNextIteration(RegexGenerator* generator)
- {
- m_jumpsToNextInteration.linkTo(m_nextIteration, generator);
- }
-
- int m_parenNestingLevel;
- Vector<AlternativeBacktrackRecord> m_backtrackRecords;
- IndirectJumpHashMap m_indirectJumpMap;
- Label m_nextIteration;
- Vector<OwnPtr<ParenthesesTail> > m_parenTails;
- JumpList m_jumpsToNextInteration;
- Vector<DataLabelPtr> m_dataPtrsToNextIteration;
- Vector<ParenthesesTail*> m_parenTailsForIteration;
- };
-
- struct BacktrackDestination {
- typedef enum {
- NoBacktrack,
- BacktrackLabel,
- BacktrackStackOffset,
- BacktrackJumpList,
- BacktrackLinked
- } BacktrackType;
-
- BacktrackDestination()
- : m_backtrackType(NoBacktrack)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
- {
- }
-
- BacktrackDestination(int32_t stackOffset)
- : m_backtrackType(BacktrackStackOffset)
- , m_backtrackStackOffset(stackOffset)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
- {
- }
-
- BacktrackDestination(Label label)
- : m_backtrackType(BacktrackLabel)
- , m_backtrackLabel(label)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
- {
- }
-
- void clear(bool doDataLabelClear = true)
- {
- m_backtrackType = NoBacktrack;
- if (doDataLabelClear)
- clearDataLabel();
- m_nextBacktrack = 0;
- }
-
- void clearDataLabel()
- {
- m_dataLabelPtr = DataLabelPtr();
- }
-
- bool hasDestination()
- {
- return (m_backtrackType != NoBacktrack);
- }
-
- bool isStackOffset()
- {
- return (m_backtrackType == BacktrackStackOffset);
- }
-
- bool isLabel()
- {
- return (m_backtrackType == BacktrackLabel);
- }
-
- bool isJumpList()
- {
- return (m_backtrackType == BacktrackJumpList);
- }
-
- bool hasDataLabel()
- {
- return m_dataLabelPtr.isSet();
- }
-
- void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true)
- {
- m_backtrackType = rhs.m_backtrackType;
- if (m_backtrackType == BacktrackStackOffset)
- m_backtrackStackOffset = rhs.m_backtrackStackOffset;
- else if (m_backtrackType == BacktrackLabel)
- m_backtrackLabel = rhs.m_backtrackLabel;
- if (copyDataLabel)
- m_dataLabelPtr = rhs.m_dataLabelPtr;
- m_backtrackSourceJumps = rhs.m_backtrackSourceJumps;
- m_backtrackSourceLabel = rhs.m_backtrackSourceLabel;
- }
-
- void copyTo(BacktrackDestination& lhs)
- {
- lhs.m_backtrackType = m_backtrackType;
- if (m_backtrackType == BacktrackStackOffset)
- lhs.m_backtrackStackOffset = m_backtrackStackOffset;
- else if (m_backtrackType == BacktrackLabel)
- lhs.m_backtrackLabel = m_backtrackLabel;
- lhs.m_backtrackSourceJumps = m_backtrackSourceJumps;
- lhs.m_backtrackSourceLabel = m_backtrackSourceLabel;
- lhs.m_dataLabelPtr = m_dataLabelPtr;
- lhs.m_backTrackJumps = m_backTrackJumps;
- }
-
- void addBacktrackJump(Jump jump)
- {
- m_backTrackJumps.append(jump);
- }
-
- void setStackOffset(int32_t stackOffset)
- {
- m_backtrackType = BacktrackStackOffset;
- m_backtrackStackOffset = stackOffset;
- }
-
- void setLabel(Label label)
- {
- m_backtrackType = BacktrackLabel;
- m_backtrackLabel = label;
- }
-
- void setNextBacktrackLabel(Label label)
- {
- if (m_nextBacktrack)
- m_nextBacktrack->setLabel(label);
- }
-
- void copyBacktrackToLabel(BacktrackDestination& rhs)
- {
- if (rhs.m_backtrackToLabel)
- m_backtrackToLabel = rhs.m_backtrackToLabel;
- }
-
- void setBacktrackToLabel(Label* backtrackToLabel)
- {
- if (!m_backtrackToLabel)
- m_backtrackToLabel = backtrackToLabel;
- }
-
- void setBacktrackJumpList(JumpList* jumpList)
- {
- m_backtrackType = BacktrackJumpList;
- m_backtrackSourceJumps = jumpList;
- }
-
- void setBacktrackSourceLabel(Label* backtrackSourceLabel)
- {
- m_backtrackSourceLabel = backtrackSourceLabel;
- }
-
- void setDataLabel(DataLabelPtr dp)
- {
- if (m_subDataLabelPtr) {
- *m_subDataLabelPtr = dp;
- m_subDataLabelPtr = 0;
- } else
- m_dataLabelPtr = dp;
- }
-
- void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr)
- {
- m_subDataLabelPtr = subDataLabelPtr;
- }
-
- void linkToNextBacktrack(BacktrackDestination* nextBacktrack)
- {
- m_nextBacktrack = nextBacktrack;
- }
-
- int32_t getStackOffset()
- {
- ASSERT(m_backtrackType == BacktrackStackOffset);
- return m_backtrackStackOffset;
- }
-
- Label getLabel()
- {
- ASSERT(m_backtrackType == BacktrackLabel);
- return m_backtrackLabel;
- }
-
- JumpList& getBacktrackJumps()
- {
- return m_backTrackJumps;
- }
-
- DataLabelPtr& getDataLabel()
- {
- return m_dataLabelPtr;
- }
-
- void jumpToBacktrack(MacroAssembler* masm)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- masm->jump().linkTo(*m_backtrackSourceLabel, masm);
- else
- m_backtrackSourceJumps->append(masm->jump());
- } else if (isStackOffset())
- masm->jump(Address(stackPointerRegister, m_backtrackStackOffset));
- else if (isLabel())
- masm->jump().linkTo(m_backtrackLabel, masm);
- else
- m_backTrackJumps.append(masm->jump());
- }
-
- void jumpToBacktrack(RegexGenerator* generator, Jump jump)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- jump.linkTo(*m_backtrackSourceLabel, generator);
- else
- m_backtrackSourceJumps->append(jump);
- } else if (isStackOffset())
- generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump);
- else if (isLabel())
- jump.linkTo(getLabel(), generator);
- else
- m_backTrackJumps.append(jump);
- }
-
- void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- jumps.linkTo(*m_backtrackSourceLabel, generator);
- else
- m_backtrackSourceJumps->append(jumps);
- } else if (isStackOffset())
- generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps);
- else if (isLabel())
- jumps.linkTo(getLabel(), generator);
- else
- m_backTrackJumps.append(jumps);
- }
-
- bool linkDataLabelToHereIfExists(RegexGenerator* generator)
- {
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), generator->label()));
- clearDataLabel();
- return true;
- }
-
- return false;
- }
-
- bool plantJumpToBacktrackIfExists(RegexGenerator* generator)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- generator->jump(*m_backtrackSourceLabel);
- else
- m_backtrackSourceJumps->append(generator->jump());
-
- return true;
- }
-
- if (isStackOffset()) {
- generator->jump(Address(stackPointerRegister, getStackOffset()));
- return true;
- }
-
- if (isLabel()) {
- generator->jump(getLabel());
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel()));
- clearDataLabel();
- }
- return true;
- }
-
- return false;
- }
-
- void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false)
- {
- Label hereLabel = generator->label();
-
- if (m_backtrackToLabel) {
- *m_backtrackToLabel = hereLabel;
- m_backtrackToLabel = 0;
- }
-
- m_backTrackJumps.link(generator);
-
- if (nextIteration)
- generator->m_expressionState.linkToNextIteration(hereLabel);
-
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel));
- // data label cleared as a result of the clear() below
- }
-
- clear();
- }
-
- void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false)
- {
- m_backTrackJumps.linkTo(label, generator);
-
- if (nextIteration)
- generator->m_expressionState.linkToNextIteration(label);
-
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label));
- clearDataLabel();
- }
- }
-
- private:
- BacktrackType m_backtrackType;
- int32_t m_backtrackStackOffset;
- Label m_backtrackLabel;
- DataLabelPtr m_dataLabelPtr;
- Label* m_backtrackToLabel;
- DataLabelPtr* m_subDataLabelPtr;
- BacktrackDestination* m_nextBacktrack;
- Label* m_backtrackSourceLabel;
- JumpList* m_backtrackSourceJumps;
- JumpList m_backTrackJumps;
- };
-
- struct TermGenerationState {
- TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
- : disjunction(disjunction)
- , checkedTotal(checkedTotal)
- , m_subParenNum(0)
- , m_linkedBacktrack(0)
- , m_parenthesesTail(0)
- {
- }
-
- void resetAlternative()
- {
- m_backtrack.clear();
- alt = 0;
- }
- bool alternativeValid()
- {
- return alt < disjunction->m_alternatives.size();
- }
- void nextAlternative()
- {
- ++alt;
- }
- PatternAlternative* alternative()
- {
- return disjunction->m_alternatives[alt];
- }
- bool isLastAlternative()
- {
- return (alt + 1) == disjunction->m_alternatives.size();
- }
-
- void resetTerm()
- {
- ASSERT(alternativeValid());
- t = 0;
- m_subParenNum = 0;
- }
- bool termValid()
- {
- ASSERT(alternativeValid());
- return t < alternative()->m_terms.size();
- }
- void nextTerm()
- {
- ASSERT(alternativeValid());
- ++t;
- }
- PatternTerm& term()
- {
- ASSERT(alternativeValid());
- return alternative()->m_terms[t];
- }
- bool isLastTerm()
- {
- ASSERT(alternativeValid());
- return (t + 1) == alternative()->m_terms.size();
- }
- unsigned getSubParenNum()
- {
- return m_subParenNum++;
- }
- bool isMainDisjunction()
- {
- return !disjunction->m_parent;
- }
-
- void setParenthesesTail(ParenthesesTail* parenthesesTail)
- {
- m_parenthesesTail = parenthesesTail;
- }
-
- ParenthesesTail* getParenthesesTail()
- {
- return m_parenthesesTail;
- }
-
- PatternTerm& lookaheadTerm()
- {
- ASSERT(alternativeValid());
- ASSERT((t + 1) < alternative()->m_terms.size());
- return alternative()->m_terms[t + 1];
- }
- bool isSinglePatternCharacterLookaheadTerm()
- {
- ASSERT(alternativeValid());
- return ((t + 1) < alternative()->m_terms.size())
- && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
- && (lookaheadTerm().quantityType == QuantifierFixedCount)
- && (lookaheadTerm().quantityCount == 1);
- }
-
- int inputOffset()
- {
- return term().inputPosition - checkedTotal;
- }
-
- void clearBacktrack()
- {
- m_backtrack.clear(false);
- m_linkedBacktrack = 0;
- }
-
- void jumpToBacktrack(MacroAssembler* masm)
- {
- m_backtrack.jumpToBacktrack(masm);
- }
-
- void jumpToBacktrack(RegexGenerator* generator, Jump jump)
- {
- m_backtrack.jumpToBacktrack(generator, jump);
- }
-
- void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps)
- {
- m_backtrack.jumpToBacktrack(generator, jumps);
- }
-
- bool plantJumpToBacktrackIfExists(RegexGenerator* generator)
- {
- return m_backtrack.plantJumpToBacktrackIfExists(generator);
- }
-
- bool linkDataLabelToBacktrackIfExists(RegexGenerator* generator)
- {
- if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel()));
- m_backtrack.clearDataLabel();
- return true;
- }
-
- return false;
- }
-
- void addBacktrackJump(Jump jump)
- {
- m_backtrack.addBacktrackJump(jump);
- }
-
- void setBacktrackDataLabel(DataLabelPtr dp)
- {
- m_backtrack.setDataLabel(dp);
- }
-
- void setBackTrackStackOffset(int32_t stackOffset)
- {
- m_backtrack.setStackOffset(stackOffset);
- }
-
- void setBacktrackLabel(Label label)
- {
- m_backtrack.setLabel(label);
- }
-
- void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false)
- {
- m_backtrack.linkAlternativeBacktracks(generator, nextIteration);
- m_linkedBacktrack = 0;
- }
-
- void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false)
- {
- m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration);
- }
-
- void setBacktrackLink(BacktrackDestination* linkedBacktrack)
- {
- m_linkedBacktrack = linkedBacktrack;
- }
-
- void chainBacktracks(BacktrackDestination* followonBacktrack)
- {
- if (m_linkedBacktrack)
- m_linkedBacktrack->linkToNextBacktrack(followonBacktrack);
- }
-
- void chainBacktrackJumps(JumpList* jumpList)
- {
- if (m_linkedBacktrack && !(m_linkedBacktrack->hasDestination()))
- m_linkedBacktrack->setBacktrackJumpList(jumpList);
- }
-
- BacktrackDestination& getBacktrackDestination()
- {
- return m_backtrack;
- }
-
- void propagateBacktrackingFrom(RegexGenerator* generator, BacktrackDestination& backtrack, bool doJump = true)
- {
- if (doJump)
- m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps());
- if (backtrack.hasDestination()) {
- if (m_backtrack.hasDataLabel())
- generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel());
-
- m_backtrack.copyTarget(backtrack, doJump);
- }
- }
-
- PatternDisjunction* disjunction;
- int checkedTotal;
- private:
- unsigned alt;
- unsigned t;
- unsigned m_subParenNum;
- BacktrackDestination m_backtrack;
- BacktrackDestination* m_linkedBacktrack;
- ParenthesesTail* m_parenthesesTail;
-
- };
-
- struct ParenthesesTail {
- ParenthesesTail(PatternTerm& term, int nestingLevel, ParenthesesTail* nextOuterParenTail)
- : m_term(term)
- , m_nestingLevel(nestingLevel)
- , m_subParenIndex(0)
- , m_nextOuterParenTail(nextOuterParenTail)
- {
- }
-
- void processBacktracks(RegexGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough)
- {
- m_nonGreedyTryParentheses = nonGreedyTryParentheses;
- m_fallThrough = fallThrough;
-
- m_subParenIndex = state.getSubParenNum();
- parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack);
- state.chainBacktracks(&m_backtrack);
- BacktrackDestination& stateBacktrack = state.getBacktrackDestination();
- stateBacktrack.copyTo(m_backtrack);
- stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel);
- state.setBacktrackLink(&m_backtrack);
- stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr);
-
- m_doDirectBacktrack = m_parenBacktrack.hasDestination();
-
- if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy))
- m_doDirectBacktrack = false;
-
- if (m_doDirectBacktrack)
- state.propagateBacktrackingFrom(generator, m_parenBacktrack, false);
- else {
- stateBacktrack.setBacktrackJumpList(&m_pattBacktrackJumps);
- stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens);
- }
-
- parenthesesState.chainBacktrackJumps(&m_pattBacktrackJumps);
- }
-
- void setNextIteration(Label nextIteration)
- {
- if (!m_nestingLevel && !m_backtrackToLabel.isSet())
- m_backtrackToLabel = nextIteration;
- }
-
- void addAfterParenJump(Jump jump)
- {
- m_pattBacktrackJumps.append(jump);
- }
-
- bool generateCode(RegexGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough)
- {
- const RegisterID indexTemporary = regT0;
- unsigned parenthesesFrameLocation = m_term.frameLocation;
- Jump fromPriorBacktrack;
- bool needJumpForPriorParenTail = false;
-
- if (priorBackTrackFallThrough
- && ((m_term.quantityType == QuantifierGreedy)
- || (m_term.quantityType == QuantifierNonGreedy)
- || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) {
- // If the prior paren tail code assumed that it could fall through,
- // but we need to generate after paren backtrack code, then provide
- // a jump around that code for the prior paren tail code.
- // A regular expressing like ((xxx)...)? needs this.
- fromPriorBacktrack = generator->jump();
- needJumpForPriorParenTail = true;
- }
-
- if (!m_backtrack.hasDestination()) {
- if (m_backtrackToLabel.isSet()) {
- m_backtrack.setLabel(m_backtrackToLabel);
- nextBacktrackFallThrough = false;
- } else if (!m_subParenIndex && m_nextOuterParenTail) {
- // If we don't have a destination and we are the first term of a nested paren, go
- // back to the outer paren.
- // There is an optimization if the next outer paren is the next paren to be emitted.
- // In that case we really want the else clause.
- m_backtrack.setBacktrackJumpList(&m_nextOuterParenTail->m_withinBacktrackJumps);
- nextBacktrackFallThrough = false;
- } else
- m_backtrack.setBacktrackJumpList(&jumpsToNext);
- } else
- nextBacktrackFallThrough = false;
-
- // A failure AFTER the parens jumps here - Backtrack to this paren
- m_backtrackFromAfterParens = generator->label();
-
- if (m_dataAfterLabelPtr.isSet())
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens));
-
- m_pattBacktrackJumps.link(generator);
-
- if (m_term.quantityType == QuantifierGreedy) {
- // If this is -1 we have now tested with both with and without the parens.
- generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
- m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, Imm32(-1)));
- } else if (m_term.quantityType == QuantifierNonGreedy) {
- // If this is -1 we have now tested with both with and without the parens.
- generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
- generator->branch32(Equal, indexTemporary, Imm32(-1)).linkTo(m_nonGreedyTryParentheses, generator);
- }
-
- if (!m_doDirectBacktrack)
- m_parenBacktrack.plantJumpToBacktrackIfExists(generator);
-
- // A failure WITHIN the parens jumps here
- if (needJumpForPriorParenTail)
- fromPriorBacktrack.link(generator);
- m_parenBacktrack.linkAlternativeBacktracks(generator);
- m_withinBacktrackJumps.link(generator);
-
- if (m_term.capture())
- generator->store32(Imm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int)));
-
- if (m_term.quantityType == QuantifierGreedy) {
- generator->storeToFrame(Imm32(-1), parenthesesFrameLocation);
- generator->jump().linkTo(m_fallThrough, generator);
- nextBacktrackFallThrough = false;
- } else if (!nextBacktrackFallThrough)
- m_backtrack.jumpToBacktrack(generator);
-
- if (!m_doDirectBacktrack)
- m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens);
-
- return nextBacktrackFallThrough;
- }
-
- PatternTerm& m_term;
- int m_nestingLevel;
- unsigned m_subParenIndex;
- ParenthesesTail* m_nextOuterParenTail;
- Label m_nonGreedyTryParentheses;
- Label m_fallThrough;
- Label m_backtrackToLabel;
- Label m_backtrackFromAfterParens;
- DataLabelPtr m_dataAfterLabelPtr;
- JumpList m_pattBacktrackJumps;
- JumpList m_withinBacktrackJumps;
- BacktrackDestination m_parenBacktrack;
- BacktrackDestination m_backtrack;
- bool m_doDirectBacktrack;
- };
-
- void generateAssertionBOL(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
-
- if (m_pattern.m_multiline) {
- const RegisterID character = regT0;
-
- JumpList matchDest;
- if (!term.inputPosition)
- matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
-
- readCharacter(state.inputOffset() - 1, character);
- matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(this);
-
- matchDest.link(this);
- } else {
- // Erk, really should poison out these alternatives early. :-/
- if (term.inputPosition)
- state.jumpToBacktrack(this);
- else
- state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal)));
- }
- }
-
- void generateAssertionEOL(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
-
- if (m_pattern.m_multiline) {
- const RegisterID character = regT0;
-
- JumpList matchDest;
- if (term.inputPosition == state.checkedTotal)
- matchDest.append(atEndOfInput());
-
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(this);
-
- matchDest.link(this);
- } else {
- if (term.inputPosition == state.checkedTotal)
- state.jumpToBacktrack(this, notAtEndOfInput());
- // Erk, really should poison out these alternatives early. :-/
- else
- state.jumpToBacktrack(this);
- }
- }
-
- // Also falls though on nextIsNotWordChar.
- void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
- {
- const RegisterID character = regT0;
- PatternTerm& term = state.term();
-
- if (term.inputPosition == state.checkedTotal)
- nextIsNotWordChar.append(atEndOfInput());
-
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
- }
-
- void generateAssertionWordBoundary(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- PatternTerm& term = state.term();
-
- Jump atBegin;
- JumpList matchDest;
- if (!term.inputPosition)
- atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
- readCharacter(state.inputOffset() - 1, character);
- matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
- if (!term.inputPosition)
- atBegin.link(this);
-
- // We fall through to here if the last character was not a wordchar.
- JumpList nonWordCharThenWordChar;
- JumpList nonWordCharThenNonWordChar;
- if (term.invert()) {
- matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
- nonWordCharThenWordChar.append(jump());
- } else {
- matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
- nonWordCharThenNonWordChar.append(jump());
- }
- state.jumpToBacktrack(this, nonWordCharThenNonWordChar);
-
- // We jump here if the last character was a wordchar.
- matchDest.link(this);
- JumpList wordCharThenWordChar;
- JumpList wordCharThenNonWordChar;
- if (term.invert()) {
- matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
- wordCharThenWordChar.append(jump());
- } else {
- matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
- // This can fall-though!
- }
-
- state.jumpToBacktrack(this, wordCharThenWordChar);
-
- nonWordCharThenWordChar.link(this);
- wordCharThenNonWordChar.link(this);
- }
-
- void generatePatternCharacterSingle(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- UChar ch = state.term().patternCharacter;
-
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
- or32(Imm32(32), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset()));
- }
- }
-
- void generatePatternCharacterPair(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- UChar ch1 = state.term().patternCharacter;
- UChar ch2 = state.lookaheadTerm().patternCharacter;
-
- int mask = 0;
- int chPair = ch1 | (ch2 << 16);
-
- if (m_pattern.m_ignoreCase) {
- if (isASCIIAlpha(ch1))
- mask |= 32;
- if (isASCIIAlpha(ch2))
- mask |= 32 << 16;
- }
-
- if (mask) {
- load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
- or32(Imm32(mask), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask)));
- } else
- state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)));
- }
-
- void generatePatternCharacterFixed(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
-
- move(index, countRegister);
- sub32(Imm32(term.quantityCount), countRegister);
-
- Label loop(this);
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
- or32(Imm32(32), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)));
- }
- add32(Imm32(1), countRegister);
- branch32(NotEqual, countRegister, index).linkTo(loop, this);
- }
-
- void generatePatternCharacterGreedy(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
-
- move(Imm32(0), countRegister);
-
- JumpList failures;
- Label loop(this);
- failures.append(atEndOfInput());
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
- or32(Imm32(32), character);
- failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
- }
-
- add32(Imm32(1), countRegister);
- add32(Imm32(1), index);
- if (term.quantityCount != quantifyInfinite) {
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
- failures.append(jump());
- } else
- jump(loop);
-
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
- sub32(Imm32(1), countRegister);
- sub32(Imm32(1), index);
-
- failures.link(this);
-
- storeToFrame(countRegister, term.frameLocation);
-
- state.setBacktrackLabel(backtrackBegin);
- }
-
- void generatePatternCharacterNonGreedy(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
-
- move(Imm32(0), countRegister);
-
- Jump firstTimeDoNothing = jump();
-
- Label hardFail(this);
- sub32(countRegister, index);
- state.jumpToBacktrack(this);
-
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
-
- atEndOfInput().linkTo(hardFail, this);
- if (term.quantityCount != quantifyInfinite)
- branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
- or32(Imm32(32), character);
- branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
- }
-
- add32(Imm32(1), countRegister);
- add32(Imm32(1), index);
-
- firstTimeDoNothing.link(this);
- storeToFrame(countRegister, term.frameLocation);
-
- state.setBacktrackLabel(backtrackBegin);
- }
-
- void generateCharacterClassSingle(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- PatternTerm& term = state.term();
-
- JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
-
- if (term.invert())
- state.jumpToBacktrack(this, matchDest);
- else {
- state.jumpToBacktrack(this);
- matchDest.link(this);
- }
- }
-
- void generateCharacterClassFixed(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
-
- move(index, countRegister);
- sub32(Imm32(term.quantityCount), countRegister);
-
- Label loop(this);
- JumpList matchDest;
- load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
- matchCharacterClass(character, matchDest, term.characterClass);
-
- if (term.invert())
- state.jumpToBacktrack(this, matchDest);
- else {
- state.jumpToBacktrack(this);
- matchDest.link(this);
- }
-
- add32(Imm32(1), countRegister);
- branch32(NotEqual, countRegister, index).linkTo(loop, this);
- }
-
- void generateCharacterClassGreedy(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
-
- move(Imm32(0), countRegister);
-
- JumpList failures;
- Label loop(this);
- failures.append(atEndOfInput());
-
- if (term.invert()) {
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, failures, term.characterClass);
- } else {
- JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
- failures.append(jump());
- matchDest.link(this);
- }
-
- add32(Imm32(1), countRegister);
- add32(Imm32(1), index);
- if (term.quantityCount != quantifyInfinite) {
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
- failures.append(jump());
- } else
- jump(loop);
-
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
- sub32(Imm32(1), countRegister);
- sub32(Imm32(1), index);
-
- failures.link(this);
-
- storeToFrame(countRegister, term.frameLocation);
-
- state.setBacktrackLabel(backtrackBegin);
- }
-
- void generateCharacterClassNonGreedy(TermGenerationState& state)
- {
- const RegisterID character = regT0;
- const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
-
- move(Imm32(0), countRegister);
-
- Jump firstTimeDoNothing = jump();
-
- Label hardFail(this);
- sub32(countRegister, index);
- state.jumpToBacktrack(this);
-
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
-
- atEndOfInput().linkTo(hardFail, this);
- branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
-
- JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
-
- if (term.invert())
- matchDest.linkTo(hardFail, this);
- else {
- jump(hardFail);
- matchDest.link(this);
- }
-
- add32(Imm32(1), countRegister);
- add32(Imm32(1), index);
-
- firstTimeDoNothing.link(this);
- storeToFrame(countRegister, term.frameLocation);
-
- state.setBacktrackLabel(backtrackBegin);
- }
-
- void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
- {
- ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
- ASSERT(parenthesesTerm.quantityCount == 1);
-
- PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
- unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
-
- if (disjunction->m_alternatives.size() == 1) {
- state.resetAlternative();
- ASSERT(state.alternativeValid());
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
-
- int countToCheck = alternative->m_minimumSize - preCheckedCount;
- if (countToCheck) {
- ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
-
- // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
- // will be forced to always trampoline into here, just to decrement the index.
- // Ick.
- Jump skip = jump();
-
- Label backtrackBegin(this);
- sub32(Imm32(countToCheck), index);
- state.addBacktrackJump(jump());
-
- skip.link(this);
-
- state.setBacktrackLabel(backtrackBegin);
-
- state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck));
- state.checkedTotal += countToCheck;
- }
-
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
-
- state.checkedTotal -= countToCheck;
- } else {
- JumpList successes;
- bool propogateBacktrack = false;
-
- for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
-
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
-
- ASSERT(alternative->m_minimumSize >= preCheckedCount);
- int countToCheck = alternative->m_minimumSize - preCheckedCount;
- if (countToCheck) {
- state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
- state.checkedTotal += countToCheck;
- }
-
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
-
- // Matched an alternative.
- DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
-
- if (!state.isLastAlternative() || countToCheck)
- successes.append(jump());
-
- // Alternative did not match.
-
- state.setBacktrackDataLabel(dataLabel);
-
- // Do we have a backtrack destination?
- // if so, link the data label to it.
- state.linkDataLabelToBacktrackIfExists(this);
-
- if (!state.isLastAlternative() || countToCheck)
- state.linkAlternativeBacktracks(this);
-
- if (countToCheck) {
- sub32(Imm32(countToCheck), index);
- state.checkedTotal -= countToCheck;
- } else if (state.isLastAlternative())
- propogateBacktrack = true;
- }
- // We fall through to here when the last alternative fails.
- // Add a backtrack out of here for the parenthese handling code to link up.
- if (!propogateBacktrack)
- state.addBacktrackJump(jump());
-
- // Save address on stack for the parens code to backtrack to, to retry the
- // next alternative.
- state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*));
-
- successes.link(this);
- }
- }
-
- void generateParenthesesSingle(TermGenerationState& state)
- {
- const RegisterID indexTemporary = regT0;
- PatternTerm& term = state.term();
- PatternDisjunction* disjunction = term.parentheses.disjunction;
- ASSERT(term.quantityCount == 1);
-
- unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0;
-
- unsigned parenthesesFrameLocation = term.frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation;
- if (term.quantityType != QuantifierFixedCount)
- alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
-
- // optimized case - no capture & no quantifier can be handled in a light-weight manner.
- if (!term.capture() && (term.quantityType == QuantifierFixedCount)) {
- m_expressionState.incrementParenNestingLevel();
-
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // this expects that any backtracks back out of the parentheses will be in the
- // parenthesesState's m_backTrackJumps vector, and that if they need backtracking
- // they will have set an entry point on the parenthesesState's m_backtrackLabel.
- BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination();
- state.propagateBacktrackingFrom(this, parenthesesBacktrack);
- state.getBacktrackDestination().copyBacktrackToLabel(parenthesesBacktrack);
-
- m_expressionState.decrementParenNestingLevel();
- } else {
- Jump nonGreedySkipParentheses;
- Label nonGreedyTryParentheses;
- if (term.quantityType == QuantifierGreedy)
- storeToFrame(index, parenthesesFrameLocation);
- else if (term.quantityType == QuantifierNonGreedy) {
- storeToFrame(Imm32(-1), parenthesesFrameLocation);
- nonGreedySkipParentheses = jump();
- nonGreedyTryParentheses = label();
- storeToFrame(index, parenthesesFrameLocation);
- }
-
- // store the match start index
- if (term.capture()) {
- int inputOffset = state.inputOffset() - preCheckedCount;
- if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(inputOffset), indexTemporary);
- store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
- } else
- store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
- }
-
- ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getParenthesesTail());
-
- m_expressionState.incrementParenNestingLevel();
-
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
-
- // Save the parenthesesTail for backtracking from nested parens to this one.
- parenthesesState.setParenthesesTail(parenthesesTail);
-
- // generate the body of the parentheses
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
-
- // For non-fixed counts, backtrack if we didn't match anything.
- if (term.quantityType != QuantifierFixedCount)
- parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))));
-
- // store the match end index
- if (term.capture()) {
- int inputOffset = state.inputOffset();
- if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(state.inputOffset()), indexTemporary);
- store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
- } else
- store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
- }
-
- m_expressionState.decrementParenNestingLevel();
-
- parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label());
-
- parenthesesState.getBacktrackDestination().clear();
-
- if (term.quantityType == QuantifierNonGreedy)
- nonGreedySkipParentheses.link(this);
- }
- }
-
- void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
- {
- PatternTerm& parenthesesTerm = state.term();
- PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
- ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
- ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
-
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
-
- Label matchAgain(this);
-
- storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later.
-
- for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
-
- PatternAlternative* alternative = parenthesesState.alternative();
- optimizeAlternative(alternative);
-
- int countToCheck = alternative->m_minimumSize;
- if (countToCheck) {
- parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
- parenthesesState.checkedTotal += countToCheck;
- }
-
- for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
- generateTerm(parenthesesState);
-
- // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet.
- branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
-
- // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack,
- // or fall through to try the next alternative if no backtrack is available.
- parenthesesState.plantJumpToBacktrackIfExists(this);
-
- parenthesesState.linkAlternativeBacktracks(this);
-
- // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
-
- if (countToCheck) {
- sub32(Imm32(countToCheck), index);
- parenthesesState.checkedTotal -= countToCheck;
- }
- }
-
- // If the last alternative falls through to here, we have a failed match...
- // Which means that we match whatever we have matched up to this point (even if nothing).
- }
-
- void generateParentheticalAssertion(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
- PatternDisjunction* disjunction = term.parentheses.disjunction;
- ASSERT(term.quantityCount == 1);
- ASSERT(term.quantityType == QuantifierFixedCount);
-
- unsigned parenthesesFrameLocation = term.frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
-
- int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
-
- if (term.invert()) {
- // Inverted case
- storeToFrame(index, parenthesesFrameLocation);
-
- state.checkedTotal -= countCheckedAfterAssertion;
- if (countCheckedAfterAssertion)
- sub32(Imm32(countCheckedAfterAssertion), index);
-
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // Success! - which means - Fail!
- loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(this);
-
- // And fail means success.
- parenthesesState.linkAlternativeBacktracks(this);
-
- loadFromFrame(parenthesesFrameLocation, index);
-
- state.checkedTotal += countCheckedAfterAssertion;
- } else {
- // Normal case
- storeToFrame(index, parenthesesFrameLocation);
-
- state.checkedTotal -= countCheckedAfterAssertion;
- if (countCheckedAfterAssertion)
- sub32(Imm32(countCheckedAfterAssertion), index);
-
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // Success! - which means - Success!
- loadFromFrame(parenthesesFrameLocation, index);
- Jump success = jump();
-
- parenthesesState.linkAlternativeBacktracks(this);
-
- loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(this);
-
- success.link(this);
-
- state.checkedTotal += countCheckedAfterAssertion;
- }
- }
-
- void generateTerm(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- generateAssertionBOL(state);
- break;
-
- case PatternTerm::TypeAssertionEOL:
- generateAssertionEOL(state);
- break;
-
- case PatternTerm::TypeAssertionWordBoundary:
- generateAssertionWordBoundary(state);
- break;
-
- case PatternTerm::TypePatternCharacter:
- switch (term.quantityType) {
- case QuantifierFixedCount:
- if (term.quantityCount == 1) {
- if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
- generatePatternCharacterPair(state);
- state.nextTerm();
- } else
- generatePatternCharacterSingle(state);
- } else
- generatePatternCharacterFixed(state);
- break;
- case QuantifierGreedy:
- generatePatternCharacterGreedy(state);
- break;
- case QuantifierNonGreedy:
- generatePatternCharacterNonGreedy(state);
- break;
- }
- break;
-
- case PatternTerm::TypeCharacterClass:
- switch (term.quantityType) {
- case QuantifierFixedCount:
- if (term.quantityCount == 1)
- generateCharacterClassSingle(state);
- else
- generateCharacterClassFixed(state);
- break;
- case QuantifierGreedy:
- generateCharacterClassGreedy(state);
- break;
- case QuantifierNonGreedy:
- generateCharacterClassNonGreedy(state);
- break;
- }
- break;
-
- case PatternTerm::TypeBackReference:
- m_shouldFallBack = true;
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypeParenthesesSubpattern:
- if (term.quantityCount == 1 && !term.parentheses.isCopy)
- generateParenthesesSingle(state);
- else if (term.parentheses.isTerminal)
- generateParenthesesGreedyNoBacktrack(state);
- else
- m_shouldFallBack = true;
- break;
-
- case PatternTerm::TypeParentheticalAssertion:
- generateParentheticalAssertion(state);
- break;
- }
- }
-
- void generateDisjunction(PatternDisjunction* disjunction)
- {
- TermGenerationState state(disjunction, 0);
- state.resetAlternative();
-
- // check availability for the next alternative
- int countCheckedForCurrentAlternative = 0;
- int countToCheckForFirstAlternative = 0;
- bool hasShorterAlternatives = false;
- bool setRepeatAlternativeLabels = false;
- JumpList notEnoughInputForPreviousAlternative;
- Label firstAlternative;
- Label firstAlternativeInputChecked;
-
- // The label 'firstAlternative' is used to plant a check to see if there is
- // sufficient input available to run the first repeating alternative.
- // The label 'firstAlternativeInputChecked' will jump directly to matching
- // the first repeating alternative having skipped this check.
-
- if (state.alternativeValid()) {
- PatternAlternative* alternative = state.alternative();
- if (!alternative->onceThrough()) {
- firstAlternative = Label(this);
- setRepeatAlternativeLabels = true;
- }
- countToCheckForFirstAlternative = alternative->m_minimumSize;
- state.checkedTotal += countToCheckForFirstAlternative;
- if (countToCheckForFirstAlternative)
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
- countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
- }
-
- if (setRepeatAlternativeLabels)
- firstAlternativeInputChecked = Label(this);
-
- while (state.alternativeValid()) {
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
-
- // Track whether any alternatives are shorter than the first one.
- if (!alternative->onceThrough())
- hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
-
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
-
- // If we get here, the alternative matched.
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
-
- ASSERT(index != returnRegister);
- if (m_pattern.m_body->m_hasFixedSize) {
- move(index, returnRegister);
- if (alternative->m_minimumSize)
- sub32(Imm32(alternative->m_minimumSize), returnRegister);
-
- store32(returnRegister, output);
- } else
- load32(Address(output), returnRegister);
-
- store32(index, Address(output, 4));
-
- generateReturn();
-
- state.nextAlternative();
- if (alternative->onceThrough() && state.alternativeValid())
- state.clearBacktrack();
-
- // if there are any more alternatives, plant the check for input before looping.
- if (state.alternativeValid()) {
- PatternAlternative* nextAlternative = state.alternative();
- if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) {
- // We have handled non-repeating alternatives, jump to next iteration
- // and loop over repeating alternatives.
- state.jumpToBacktrack(this);
-
- countToCheckForFirstAlternative = nextAlternative->m_minimumSize;
-
- // If we get here, there the last input checked failed.
- notEnoughInputForPreviousAlternative.link(this);
-
- state.linkAlternativeBacktracks(this);
-
- // Back up to start the looping alternatives.
- if (countCheckedForCurrentAlternative)
- sub32(Imm32(countCheckedForCurrentAlternative), index);
-
- firstAlternative = Label(this);
-
- state.checkedTotal = countToCheckForFirstAlternative;
- if (countToCheckForFirstAlternative)
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
-
- countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
-
- firstAlternativeInputChecked = Label(this);
-
- setRepeatAlternativeLabels = true;
- } else {
- int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
-
- if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
- // If we get here, then the last input checked failed.
- notEnoughInputForPreviousAlternative.link(this);
-
- // Check if sufficent input available to run the next alternative
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
- // We are now in the correct state to enter the next alternative; this add is only required
- // to mirror and revert operation of the sub32, just below.
- add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
-
- // If we get here, then the last input checked passed.
- state.linkAlternativeBacktracks(this);
-
- // No need to check if we can run the next alternative, since it is shorter -
- // just update index.
- sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
- } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
- // If we get here, then the last input checked failed.
- // If there is insufficient input to run the current alternative, and the next alternative is longer,
- // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
- // we had checked.
- notEnoughInputForPreviousAlternative.link(this);
- add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
- notEnoughInputForPreviousAlternative.append(jump());
-
- // The next alternative is longer than the current one; check the difference.
- state.linkAlternativeBacktracks(this);
-
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
- } else { // CASE 3: Both alternatives are the same length.
- ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
-
- // If the next alterative is the same length as this one, then no need to check the input -
- // if there was sufficent input to run the current alternative then there is sufficient
- // input to run the next one; if not, there isn't.
- state.linkAlternativeBacktracks(this);
- }
- state.checkedTotal -= countCheckedForCurrentAlternative;
- countCheckedForCurrentAlternative = countToCheckForNextAlternative;
- state.checkedTotal += countCheckedForCurrentAlternative;
- }
- }
- }
-
- // If we get here, all Alternatives failed...
-
- state.checkedTotal -= countCheckedForCurrentAlternative;
-
- if (!setRepeatAlternativeLabels) {
- // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link
- // the match failures to this point, and fall through to the return below.
- state.linkAlternativeBacktracks(this, true);
-
- notEnoughInputForPreviousAlternative.link(this);
- } else {
- // How much more input need there be to be able to retry from the first alternative?
- // examples:
- // /yarr_jit/ or /wrec|pcre/
- // In these examples we need check for one more input before looping.
- // /yarr_jit|pcre/
- // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
- // being four longer than the last alternative checked, and another +1 to effectively move
- // the start position along by one).
- // /yarr|rules/ or /wrec|notsomuch/
- // In these examples, provided that there was sufficient input to have just been matching for
- // the second alternative we can loop without checking for available input (since the second
- // alternative is longer than the first). In the latter example we need to decrement index
- // (by 4) so the start position is only progressed by 1 from the last iteration.
- int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
-
- // First, deal with the cases where there was sufficient input to try the last alternative.
- if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
- state.linkAlternativeBacktracks(this, true);
- else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
- state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true);
- else { // no need to check the input, but we do have some bookkeeping to do first.
- state.linkAlternativeBacktracks(this, true);
-
- // Where necessary update our preserved start position.
- if (!m_pattern.m_body->m_hasFixedSize) {
- move(index, regT0);
- sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
- store32(regT0, Address(output));
- }
-
- // Update index if necessary, and loop (without checking).
- if (incrementForNextIter)
- add32(Imm32(incrementForNextIter), index);
- jump().linkTo(firstAlternativeInputChecked, this);
- }
-
- notEnoughInputForPreviousAlternative.link(this);
- // Update our idea of the start position, if we're tracking this.
- if (!m_pattern.m_body->m_hasFixedSize) {
- if (countCheckedForCurrentAlternative - 1) {
- move(index, regT0);
- sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
- store32(regT0, Address(output));
- } else
- store32(index, Address(output));
- }
-
- // Check if there is sufficent input to run the first alternative again.
- jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
- // No - insufficent input to run the first alteranative, are there any other alternatives we
- // might need to check? If so, the last check will have left the index incremented by
- // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
- // LESS input is available, to have the effect of just progressing the start position by 1
- // from the last iteration. If this check passes we can just jump up to the check associated
- // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
- // first alternative again, and this check will fail (otherwise the check planted just above
- // here would have passed). This is a bit sad, however it saves trying to do something more
- // complex here in compilation, and in the common case we should end up coallescing the checks.
- //
- // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
- // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
- // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
- // is sufficient input to run either alternative (constantly failing). If there had been only
- // one alternative, or if the shorter alternative had come first, we would have terminated
- // immediately. :-/
- if (hasShorterAlternatives)
- jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
- // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
- // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
- // but since we're about to return a failure this doesn't really matter!)
- }
-
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
-
- move(Imm32(-1), returnRegister);
-
- generateReturn();
-
- m_expressionState.emitParenthesesTail(this);
- m_expressionState.emitIndirectJumpTable(this);
- m_expressionState.linkToNextIteration(this);
- }
-
- void generateEnter()
- {
-#if CPU(X86_64)
- push(X86Registers::ebp);
- move(stackPointerRegister, X86Registers::ebp);
- push(X86Registers::ebx);
-#elif CPU(X86)
- push(X86Registers::ebp);
- move(stackPointerRegister, X86Registers::ebp);
- // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
- push(X86Registers::ebx);
- push(X86Registers::edi);
- push(X86Registers::esi);
- // load output into edi (2 = saved ebp + return address).
- #if COMPILER(MSVC)
- loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
- loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
- loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
- loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
- #else
- loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
- #endif
-#elif CPU(ARM)
- push(ARMRegisters::r4);
- push(ARMRegisters::r5);
- push(ARMRegisters::r6);
-#if CPU(ARM_TRADITIONAL)
- push(ARMRegisters::r8); // scratch register
-#endif
- move(ARMRegisters::r3, output);
-#elif CPU(MIPS)
- // Do nothing.
-#endif
- }
-
- void generateReturn()
- {
-#if CPU(X86_64)
- pop(X86Registers::ebx);
- pop(X86Registers::ebp);
-#elif CPU(X86)
- pop(X86Registers::esi);
- pop(X86Registers::edi);
- pop(X86Registers::ebx);
- pop(X86Registers::ebp);
-#elif CPU(ARM)
-#if CPU(ARM_TRADITIONAL)
- pop(ARMRegisters::r8); // scratch register
-#endif
- pop(ARMRegisters::r6);
- pop(ARMRegisters::r5);
- pop(ARMRegisters::r4);
-#elif CPU(MIPS)
- // Do nothing
-#endif
- ret();
- }
-
-public:
- RegexGenerator(RegexPattern& pattern)
- : m_pattern(pattern)
- , m_shouldFallBack(false)
- {
- }
-
- void generate()
- {
- generateEnter();
-
- if (!m_pattern.m_body->m_hasFixedSize)
- store32(index, Address(output));
-
- if (m_pattern.m_body->m_callFrameSize)
- subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
-
- generateDisjunction(m_pattern.m_body);
- }
-
- void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
- {
- generate();
-
- LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0);
-
- for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i)
- patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation));
-
- jitObject.set(patchBuffer.finalizeCode());
- jitObject.setFallBack(m_shouldFallBack);
- }
-
-private:
- RegexPattern& m_pattern;
- bool m_shouldFallBack;
- GenerationState m_expressionState;
-};
-
-void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject)
-{
- RegexGenerator generator(pattern);
- generator.compile(globalData, jitObject);
-}
-
-
-}}
-
-#endif
diff --git a/Source/JavaScriptCore/yarr/RegexJIT.h b/Source/JavaScriptCore/yarr/RegexJIT.h
deleted file mode 100644
index 5e3dca1..0000000
--- a/Source/JavaScriptCore/yarr/RegexJIT.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RegexJIT_h
-#define RegexJIT_h
-
-#if ENABLE(YARR_JIT)
-
-#include "MacroAssembler.h"
-#include "RegexPattern.h"
-#include "UString.h"
-
-#if CPU(X86) && !COMPILER(MSVC)
-#define YARR_CALL __attribute__ ((regparm (3)))
-#else
-#define YARR_CALL
-#endif
-
-namespace JSC {
-
-class JSGlobalData;
-class ExecutablePool;
-
-namespace Yarr {
-
-class RegexCodeBlock {
- typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
-
-public:
- RegexCodeBlock()
- : m_needFallBack(false)
- {
- }
-
- ~RegexCodeBlock()
- {
- }
-
- void setFallBack(bool fallback) { m_needFallBack = fallback; }
- bool isFallBack() { return m_needFallBack; }
- void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
-
- int execute(const UChar* input, unsigned start, unsigned length, int* output)
- {
- return reinterpret_cast<RegexJITCode>(m_ref.m_code.executableAddress())(input, start, length, output);
- }
-
-#if ENABLE(REGEXP_TRACING)
- void *getAddr() { return m_ref.m_code.executableAddress(); }
-#endif
-
-private:
- MacroAssembler::CodeRef m_ref;
- bool m_needFallBack;
-};
-
-void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject);
-
-inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
-{
- return jitObject.execute(input, start, length, output);
-}
-
-} } // namespace JSC::Yarr
-
-#endif
-
-#endif // RegexJIT_h
diff --git a/Source/JavaScriptCore/yarr/RegexParser.h b/Source/JavaScriptCore/yarr/RegexParser.h
deleted file mode 100644
index 2e42f8a..0000000
--- a/Source/JavaScriptCore/yarr/RegexParser.h
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RegexParser_h
-#define RegexParser_h
-
-#include "UString.h"
-#include <limits.h>
-#include <wtf/ASCIICType.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace JSC { namespace Yarr {
-
-static const unsigned quantifyInfinite = UINT_MAX;
-#define REGEXP_ERROR_PREFIX "Invalid regular expression: "
-
-enum BuiltInCharacterClassID {
- DigitClassID,
- SpaceClassID,
- WordClassID,
- NewlineClassID,
-};
-
-// The Parser class should not be used directly - only via the Yarr::parse() method.
-template<class Delegate>
-class Parser {
-private:
- template<class FriendDelegate>
- friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
-
- enum ErrorCode {
- NoError,
- PatternTooLarge,
- QuantifierOutOfOrder,
- QuantifierWithoutAtom,
- MissingParentheses,
- ParenthesesUnmatched,
- ParenthesesTypeInvalid,
- CharacterClassUnmatched,
- CharacterClassOutOfOrder,
- EscapeUnterminated,
- NumberOfErrorCodes
- };
-
- /*
- * CharacterClassParserDelegate:
- *
- * The class CharacterClassParserDelegate is used in the parsing of character
- * classes. This class handles detection of character ranges. This class
- * implements enough of the delegate interface such that it can be passed to
- * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
- * to perform the parsing of escape characters in character sets.
- */
- class CharacterClassParserDelegate {
- public:
- CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
- : m_delegate(delegate)
- , m_err(err)
- , m_state(Empty)
- {
- }
-
- /*
- * begin():
- *
- * Called at beginning of construction.
- */
- void begin(bool invert)
- {
- m_delegate.atomCharacterClassBegin(invert);
- }
-
- /*
- * atomPatternCharacter():
- *
- * This method is called either from parseCharacterClass() (for an unescaped
- * character in a character class), or from parseEscape(). In the former case
- * the value true will be passed for the argument 'hyphenIsRange', and in this
- * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
- * is different to /[a\-z]/).
- */
- void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
- {
- switch (m_state) {
- case AfterCharacterClass:
- // Following a builtin character class we need look out for a hyphen.
- // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
- // If we see a hyphen following a charater class then unlike usual
- // we'll report it to the delegate immediately, and put ourself into
- // a poisoned state. Any following calls to add another character or
- // character class will result in an error. (A hypen following a
- // character-class is itself valid, but only at the end of a regex).
- if (hyphenIsRange && ch == '-') {
- m_delegate.atomCharacterClassAtom('-');
- m_state = AfterCharacterClassHyphen;
- return;
- }
- // Otherwise just fall through - cached character so treat this as Empty.
-
- case Empty:
- m_character = ch;
- m_state = CachedCharacter;
- return;
-
- case CachedCharacter:
- if (hyphenIsRange && ch == '-')
- m_state = CachedCharacterHyphen;
- else {
- m_delegate.atomCharacterClassAtom(m_character);
- m_character = ch;
- }
- return;
-
- case CachedCharacterHyphen:
- if (ch < m_character) {
- m_err = CharacterClassOutOfOrder;
- return;
- }
- m_delegate.atomCharacterClassRange(m_character, ch);
- m_state = Empty;
- return;
-
- // See coment in atomBuiltInCharacterClass below.
- // This too is technically an error, per ECMA-262, and again we
- // we chose to allow this. Note a subtlely here that while we
- // diverge from the spec's definition of CharacterRange we do
- // remain in compliance with the grammar. For example, consider
- // the expression /[\d-a-z]/. We comply with the grammar in
- // this case by not allowing a-z to be matched as a range.
- case AfterCharacterClassHyphen:
- m_delegate.atomCharacterClassAtom(ch);
- m_state = Empty;
- return;
- }
- }
-
- /*
- * atomBuiltInCharacterClass():
- *
- * Adds a built-in character class, called by parseEscape().
- */
- void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
- {
- switch (m_state) {
- case CachedCharacter:
- // Flush the currently cached character, then fall through.
- m_delegate.atomCharacterClassAtom(m_character);
-
- case Empty:
- case AfterCharacterClass:
- m_state = AfterCharacterClass;
- m_delegate.atomCharacterClassBuiltIn(classID, invert);
- return;
-
- // If we hit either of these cases, we have an invalid range that
- // looks something like /[x-\d]/ or /[\d-\d]/.
- // According to ECMA-262 this should be a syntax error, but
- // empirical testing shows this to break teh webz. Instead we
- // comply with to the ECMA-262 grammar, and assume the grammar to
- // have matched the range correctly, but tweak our interpretation
- // of CharacterRange. Effectively we implicitly handle the hyphen
- // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
- case CachedCharacterHyphen:
- m_delegate.atomCharacterClassAtom(m_character);
- m_delegate.atomCharacterClassAtom('-');
- // fall through
- case AfterCharacterClassHyphen:
- m_delegate.atomCharacterClassBuiltIn(classID, invert);
- m_state = Empty;
- return;
- }
- }
-
- /*
- * end():
- *
- * Called at end of construction.
- */
- void end()
- {
- if (m_state == CachedCharacter)
- m_delegate.atomCharacterClassAtom(m_character);
- else if (m_state == CachedCharacterHyphen) {
- m_delegate.atomCharacterClassAtom(m_character);
- m_delegate.atomCharacterClassAtom('-');
- }
- m_delegate.atomCharacterClassEnd();
- }
-
- // parseEscape() should never call these delegate methods when
- // invoked with inCharacterClass set.
- void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
- void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
-
- private:
- Delegate& m_delegate;
- ErrorCode& m_err;
- enum CharacterClassConstructionState {
- Empty,
- CachedCharacter,
- CachedCharacterHyphen,
- AfterCharacterClass,
- AfterCharacterClassHyphen,
- } m_state;
- UChar m_character;
- };
-
- Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
- : m_delegate(delegate)
- , m_backReferenceLimit(backReferenceLimit)
- , m_err(NoError)
- , m_data(pattern.characters())
- , m_size(pattern.length())
- , m_index(0)
- , m_parenthesesNestingDepth(0)
- {
- }
-
- /*
- * parseEscape():
- *
- * Helper for parseTokens() AND parseCharacterClass().
- * Unlike the other parser methods, this function does not report tokens
- * directly to the member delegate (m_delegate), instead tokens are
- * emitted to the delegate provided as an argument. In the case of atom
- * escapes, parseTokens() will call parseEscape() passing m_delegate as
- * an argument, and as such the escape will be reported to the delegate.
- *
- * However this method may also be used by parseCharacterClass(), in which
- * case a CharacterClassParserDelegate will be passed as the delegate that
- * tokens should be added to. A boolean flag is also provided to indicate
- * whether that an escape in a CharacterClass is being parsed (some parsing
- * rules change in this context).
- *
- * The boolean value returned by this method indicates whether the token
- * parsed was an atom (outside of a characted class \b and \B will be
- * interpreted as assertions).
- */
- template<bool inCharacterClass, class EscapeDelegate>
- bool parseEscape(EscapeDelegate& delegate)
- {
- ASSERT(!m_err);
- ASSERT(peek() == '\\');
- consume();
-
- if (atEndOfPattern()) {
- m_err = EscapeUnterminated;
- return false;
- }
-
- switch (peek()) {
- // Assertions
- case 'b':
- consume();
- if (inCharacterClass)
- delegate.atomPatternCharacter('\b');
- else {
- delegate.assertionWordBoundary(false);
- return false;
- }
- break;
- case 'B':
- consume();
- if (inCharacterClass)
- delegate.atomPatternCharacter('B');
- else {
- delegate.assertionWordBoundary(true);
- return false;
- }
- break;
-
- // CharacterClassEscape
- case 'd':
- consume();
- delegate.atomBuiltInCharacterClass(DigitClassID, false);
- break;
- case 's':
- consume();
- delegate.atomBuiltInCharacterClass(SpaceClassID, false);
- break;
- case 'w':
- consume();
- delegate.atomBuiltInCharacterClass(WordClassID, false);
- break;
- case 'D':
- consume();
- delegate.atomBuiltInCharacterClass(DigitClassID, true);
- break;
- case 'S':
- consume();
- delegate.atomBuiltInCharacterClass(SpaceClassID, true);
- break;
- case 'W':
- consume();
- delegate.atomBuiltInCharacterClass(WordClassID, true);
- break;
-
- // DecimalEscape
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9': {
- // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
- // First, try to parse this as backreference.
- if (!inCharacterClass) {
- ParseState state = saveState();
-
- unsigned backReference = consumeNumber();
- if (backReference <= m_backReferenceLimit) {
- delegate.atomBackReference(backReference);
- break;
- }
-
- restoreState(state);
- }
-
- // Not a backreference, and not octal.
- if (peek() >= '8') {
- delegate.atomPatternCharacter('\\');
- break;
- }
-
- // Fall-through to handle this as an octal escape.
- }
-
- // Octal escape
- case '0':
- delegate.atomPatternCharacter(consumeOctal());
- break;
-
- // ControlEscape
- case 'f':
- consume();
- delegate.atomPatternCharacter('\f');
- break;
- case 'n':
- consume();
- delegate.atomPatternCharacter('\n');
- break;
- case 'r':
- consume();
- delegate.atomPatternCharacter('\r');
- break;
- case 't':
- consume();
- delegate.atomPatternCharacter('\t');
- break;
- case 'v':
- consume();
- delegate.atomPatternCharacter('\v');
- break;
-
- // ControlLetter
- case 'c': {
- ParseState state = saveState();
- consume();
- if (!atEndOfPattern()) {
- int control = consume();
-
- // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
- if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
- delegate.atomPatternCharacter(control & 0x1f);
- break;
- }
- }
- restoreState(state);
- delegate.atomPatternCharacter('\\');
- break;
- }
-
- // HexEscape
- case 'x': {
- consume();
- int x = tryConsumeHex(2);
- if (x == -1)
- delegate.atomPatternCharacter('x');
- else
- delegate.atomPatternCharacter(x);
- break;
- }
-
- // UnicodeEscape
- case 'u': {
- consume();
- int u = tryConsumeHex(4);
- if (u == -1)
- delegate.atomPatternCharacter('u');
- else
- delegate.atomPatternCharacter(u);
- break;
- }
-
- // IdentityEscape
- default:
- delegate.atomPatternCharacter(consume());
- }
-
- return true;
- }
-
- /*
- * parseAtomEscape(), parseCharacterClassEscape():
- *
- * These methods alias to parseEscape().
- */
- bool parseAtomEscape()
- {
- return parseEscape<false>(m_delegate);
- }
- void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
- {
- parseEscape<true>(delegate);
- }
-
- /*
- * parseCharacterClass():
- *
- * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
- * to an instance of CharacterClassParserDelegate, to describe the character class to the
- * delegate.
- */
- void parseCharacterClass()
- {
- ASSERT(!m_err);
- ASSERT(peek() == '[');
- consume();
-
- CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
-
- characterClassConstructor.begin(tryConsume('^'));
-
- while (!atEndOfPattern()) {
- switch (peek()) {
- case ']':
- consume();
- characterClassConstructor.end();
- return;
-
- case '\\':
- parseCharacterClassEscape(characterClassConstructor);
- break;
-
- default:
- characterClassConstructor.atomPatternCharacter(consume(), true);
- }
-
- if (m_err)
- return;
- }
-
- m_err = CharacterClassUnmatched;
- }
-
- /*
- * parseParenthesesBegin():
- *
- * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
- */
- void parseParenthesesBegin()
- {
- ASSERT(!m_err);
- ASSERT(peek() == '(');
- consume();
-
- if (tryConsume('?')) {
- if (atEndOfPattern()) {
- m_err = ParenthesesTypeInvalid;
- return;
- }
-
- switch (consume()) {
- case ':':
- m_delegate.atomParenthesesSubpatternBegin(false);
- break;
-
- case '=':
- m_delegate.atomParentheticalAssertionBegin();
- break;
-
- case '!':
- m_delegate.atomParentheticalAssertionBegin(true);
- break;
-
- default:
- m_err = ParenthesesTypeInvalid;
- }
- } else
- m_delegate.atomParenthesesSubpatternBegin();
-
- ++m_parenthesesNestingDepth;
- }
-
- /*
- * parseParenthesesEnd():
- *
- * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
- */
- void parseParenthesesEnd()
- {
- ASSERT(!m_err);
- ASSERT(peek() == ')');
- consume();
-
- if (m_parenthesesNestingDepth > 0)
- m_delegate.atomParenthesesEnd();
- else
- m_err = ParenthesesUnmatched;
-
- --m_parenthesesNestingDepth;
- }
-
- /*
- * parseQuantifier():
- *
- * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
- */
- void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
- {
- ASSERT(!m_err);
- ASSERT(min <= max);
-
- if (lastTokenWasAnAtom)
- m_delegate.quantifyAtom(min, max, !tryConsume('?'));
- else
- m_err = QuantifierWithoutAtom;
- }
-
- /*
- * parseTokens():
- *
- * This method loops over the input pattern reporting tokens to the delegate.
- * The method returns when a parse error is detected, or the end of the pattern
- * is reached. One piece of state is tracked around the loop, which is whether
- * the last token passed to the delegate was an atom (this is necessary to detect
- * a parse error when a quantifier provided without an atom to quantify).
- */
- void parseTokens()
- {
- bool lastTokenWasAnAtom = false;
-
- while (!atEndOfPattern()) {
- switch (peek()) {
- case '|':
- consume();
- m_delegate.disjunction();
- lastTokenWasAnAtom = false;
- break;
-
- case '(':
- parseParenthesesBegin();
- lastTokenWasAnAtom = false;
- break;
-
- case ')':
- parseParenthesesEnd();
- lastTokenWasAnAtom = true;
- break;
-
- case '^':
- consume();
- m_delegate.assertionBOL();
- lastTokenWasAnAtom = false;
- break;
-
- case '$':
- consume();
- m_delegate.assertionEOL();
- lastTokenWasAnAtom = false;
- break;
-
- case '.':
- consume();
- m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
- lastTokenWasAnAtom = true;
- break;
-
- case '[':
- parseCharacterClass();
- lastTokenWasAnAtom = true;
- break;
-
- case '\\':
- lastTokenWasAnAtom = parseAtomEscape();
- break;
-
- case '*':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
- lastTokenWasAnAtom = false;
- break;
-
- case '+':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
- lastTokenWasAnAtom = false;
- break;
-
- case '?':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 0, 1);
- lastTokenWasAnAtom = false;
- break;
-
- case '{': {
- ParseState state = saveState();
-
- consume();
- if (peekIsDigit()) {
- unsigned min = consumeNumber();
- unsigned max = min;
-
- if (tryConsume(','))
- max = peekIsDigit() ? consumeNumber() : quantifyInfinite;
-
- if (tryConsume('}')) {
- if (min <= max)
- parseQuantifier(lastTokenWasAnAtom, min, max);
- else
- m_err = QuantifierOutOfOrder;
- lastTokenWasAnAtom = false;
- break;
- }
- }
-
- restoreState(state);
- } // if we did not find a complete quantifer, fall through to the default case.
-
- default:
- m_delegate.atomPatternCharacter(consume());
- lastTokenWasAnAtom = true;
- }
-
- if (m_err)
- return;
- }
-
- if (m_parenthesesNestingDepth > 0)
- m_err = MissingParentheses;
- }
-
- /*
- * parse():
- *
- * This method calls regexBegin(), calls parseTokens() to parse over the input
- * patterns, calls regexEnd() or regexError() as appropriate, and converts any
- * error code to a const char* for a result.
- */
- const char* parse()
- {
- if (m_size > MAX_PATTERN_SIZE)
- m_err = PatternTooLarge;
- else
- parseTokens();
- ASSERT(atEndOfPattern() || m_err);
-
- // The order of this array must match the ErrorCode enum.
- static const char* errorMessages[NumberOfErrorCodes] = {
- 0, // NoError
- REGEXP_ERROR_PREFIX "regular expression too large",
- REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier",
- REGEXP_ERROR_PREFIX "nothing to repeat",
- REGEXP_ERROR_PREFIX "missing )",
- REGEXP_ERROR_PREFIX "unmatched parentheses",
- REGEXP_ERROR_PREFIX "unrecognized character after (?",
- REGEXP_ERROR_PREFIX "missing terminating ] for character class",
- REGEXP_ERROR_PREFIX "range out of order in character class",
- REGEXP_ERROR_PREFIX "\\ at end of pattern"
- };
-
- return errorMessages[m_err];
- }
-
-
- // Misc helper functions:
-
- typedef unsigned ParseState;
-
- ParseState saveState()
- {
- return m_index;
- }
-
- void restoreState(ParseState state)
- {
- m_index = state;
- }
-
- bool atEndOfPattern()
- {
- ASSERT(m_index <= m_size);
- return m_index == m_size;
- }
-
- int peek()
- {
- ASSERT(m_index < m_size);
- return m_data[m_index];
- }
-
- bool peekIsDigit()
- {
- return !atEndOfPattern() && WTF::isASCIIDigit(peek());
- }
-
- unsigned peekDigit()
- {
- ASSERT(peekIsDigit());
- return peek() - '0';
- }
-
- int consume()
- {
- ASSERT(m_index < m_size);
- return m_data[m_index++];
- }
-
- unsigned consumeDigit()
- {
- ASSERT(peekIsDigit());
- return consume() - '0';
- }
-
- unsigned consumeNumber()
- {
- unsigned n = consumeDigit();
- // check for overflow.
- for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
- n = newValue;
- consume();
- }
- return n;
- }
-
- unsigned consumeOctal()
- {
- ASSERT(WTF::isASCIIOctalDigit(peek()));
-
- unsigned n = consumeDigit();
- while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
- n = n * 8 + consumeDigit();
- return n;
- }
-
- bool tryConsume(UChar ch)
- {
- if (atEndOfPattern() || (m_data[m_index] != ch))
- return false;
- ++m_index;
- return true;
- }
-
- int tryConsumeHex(int count)
- {
- ParseState state = saveState();
-
- int n = 0;
- while (count--) {
- if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
- restoreState(state);
- return -1;
- }
- n = (n << 4) | WTF::toASCIIHexValue(consume());
- }
- return n;
- }
-
- Delegate& m_delegate;
- unsigned m_backReferenceLimit;
- ErrorCode m_err;
- const UChar* m_data;
- unsigned m_size;
- unsigned m_index;
- unsigned m_parenthesesNestingDepth;
-
- // Derived by empirical testing of compile time in PCRE and WREC.
- static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
-};
-
-/*
- * Yarr::parse():
- *
- * The parse method is passed a pattern to be parsed and a delegate upon which
- * callbacks will be made to record the parsed tokens forming the regex.
- * Yarr::parse() returns null on success, or a const C string providing an error
- * message where a parse error occurs.
- *
- * The Delegate must implement the following interface:
- *
- * void assertionBOL();
- * void assertionEOL();
- * void assertionWordBoundary(bool invert);
- *
- * void atomPatternCharacter(UChar ch);
- * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
- * void atomCharacterClassBegin(bool invert)
- * void atomCharacterClassAtom(UChar ch)
- * void atomCharacterClassRange(UChar begin, UChar end)
- * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
- * void atomCharacterClassEnd()
- * void atomParenthesesSubpatternBegin(bool capture = true);
- * void atomParentheticalAssertionBegin(bool invert = false);
- * void atomParenthesesEnd();
- * void atomBackReference(unsigned subpatternId);
- *
- * void quantifyAtom(unsigned min, unsigned max, bool greedy);
- *
- * void disjunction();
- *
- * The regular expression is described by a sequence of assertion*() and atom*()
- * callbacks to the delegate, describing the terms in the regular expression.
- * Following an atom a quantifyAtom() call may occur to indicate that the previous
- * atom should be quantified. In the case of atoms described across multiple
- * calls (parentheses and character classes) the call to quantifyAtom() will come
- * after the call to the atom*End() method, never after atom*Begin().
- *
- * Character classes may either be described by a single call to
- * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
- * In the latter case, ...Begin() will be called, followed by a sequence of
- * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
- *
- * Sequences of atoms and assertions are broken into alternatives via calls to
- * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
- * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
- * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
- * capturing subpattern, this will be the subpatternId associated with these
- * parentheses, and will also by definition be the lowest subpatternId of these
- * parentheses and of any nested paretheses. The atomParenthesesEnd() method
- * is passed the subpatternId of the last capturing subexpression nested within
- * these paretheses. In the case of a capturing subpattern with no nested
- * capturing subpatterns, the same subpatternId will be passed to the begin and
- * end functions. In the case of non-capturing subpatterns the subpatternId
- * passed to the begin method is also the first possible subpatternId that might
- * be nested within these paretheses. If a set of non-capturing parentheses does
- * not contain any capturing subpatterns, then the subpatternId passed to begin
- * will be greater than the subpatternId passed to end.
- */
-
-template<class Delegate>
-const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite)
-{
- return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
-}
-
-} } // namespace JSC::Yarr
-
-#endif // RegexParser_h
diff --git a/Source/JavaScriptCore/yarr/RegexPattern.cpp b/Source/JavaScriptCore/yarr/RegexPattern.cpp
deleted file mode 100644
index f9aa92d..0000000
--- a/Source/JavaScriptCore/yarr/RegexPattern.cpp
+++ /dev/null
@@ -1,987 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "RegexInterpreter.h"
-#include "RegexPattern.h"
-#include <wtf/Vector.h>
-
-using namespace WTF;
-
-namespace JSC { namespace Yarr {
-
-#include "RegExpJitTables.h"
-
-class CharacterClassConstructor {
-public:
- CharacterClassConstructor(bool isCaseInsensitive = false)
- : m_isCaseInsensitive(isCaseInsensitive)
- {
- }
-
- void reset()
- {
- m_matches.clear();
- m_ranges.clear();
- m_matchesUnicode.clear();
- m_rangesUnicode.clear();
- }
-
- void append(const CharacterClass* other)
- {
- for (size_t i = 0; i < other->m_matches.size(); ++i)
- addSorted(m_matches, other->m_matches[i]);
- for (size_t i = 0; i < other->m_ranges.size(); ++i)
- addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
- for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
- addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
- for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
- addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
- }
-
- void putChar(UChar ch)
- {
- if (ch <= 0x7f) {
- if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
- addSorted(m_matches, toASCIIUpper(ch));
- addSorted(m_matches, toASCIILower(ch));
- } else
- addSorted(m_matches, ch);
- } else {
- UChar upper, lower;
- if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
- addSorted(m_matchesUnicode, upper);
- addSorted(m_matchesUnicode, lower);
- } else
- addSorted(m_matchesUnicode, ch);
- }
- }
-
- // returns true if this character has another case, and 'ch' is the upper case form.
- static inline bool isUnicodeUpper(UChar ch)
- {
- return ch != Unicode::toLower(ch);
- }
-
- // returns true if this character has another case, and 'ch' is the lower case form.
- static inline bool isUnicodeLower(UChar ch)
- {
- return ch != Unicode::toUpper(ch);
- }
-
- void putRange(UChar lo, UChar hi)
- {
- if (lo <= 0x7f) {
- char asciiLo = lo;
- char asciiHi = std::min(hi, (UChar)0x7f);
- addSortedRange(m_ranges, lo, asciiHi);
-
- if (m_isCaseInsensitive) {
- if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
- addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
- if ((asciiLo <= 'z') && (asciiHi >= 'a'))
- addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
- }
- }
- if (hi >= 0x80) {
- uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
- addSortedRange(m_rangesUnicode, unicodeCurr, hi);
-
- if (m_isCaseInsensitive) {
- while (unicodeCurr <= hi) {
- // If the upper bound of the range (hi) is 0xffff, the increments to
- // unicodeCurr in this loop may take it to 0x10000. This is fine
- // (if so we won't re-enter the loop, since the loop condition above
- // will definitely fail) - but this does mean we cannot use a UChar
- // to represent unicodeCurr, we must use a 32-bit value instead.
- ASSERT(unicodeCurr <= 0xffff);
-
- if (isUnicodeUpper(unicodeCurr)) {
- UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
- UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
- while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
- lowerCaseRangeEnd++;
- addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
- } else if (isUnicodeLower(unicodeCurr)) {
- UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
- UChar upperCaseRangeEnd = upperCaseRangeBegin;
- while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
- upperCaseRangeEnd++;
- addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
- } else
- ++unicodeCurr;
- }
- }
- }
- }
-
- CharacterClass* charClass()
- {
- CharacterClass* characterClass = new CharacterClass(0);
-
- characterClass->m_matches.append(m_matches);
- characterClass->m_ranges.append(m_ranges);
- characterClass->m_matchesUnicode.append(m_matchesUnicode);
- characterClass->m_rangesUnicode.append(m_rangesUnicode);
-
- reset();
-
- return characterClass;
- }
-
-private:
- void addSorted(Vector<UChar>& matches, UChar ch)
- {
- unsigned pos = 0;
- unsigned range = matches.size();
-
- // binary chop, find position to insert char.
- while (range) {
- unsigned index = range >> 1;
-
- int val = matches[pos+index] - ch;
- if (!val)
- return;
- else if (val > 0)
- range = index;
- else {
- pos += (index+1);
- range -= (index+1);
- }
- }
-
- if (pos == matches.size())
- matches.append(ch);
- else
- matches.insert(pos, ch);
- }
-
- void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
- {
- unsigned end = ranges.size();
-
- // Simple linear scan - I doubt there are that many ranges anyway...
- // feel free to fix this with something faster (eg binary chop).
- for (unsigned i = 0; i < end; ++i) {
- // does the new range fall before the current position in the array
- if (hi < ranges[i].begin) {
- // optional optimization: concatenate appending ranges? - may not be worthwhile.
- if (hi == (ranges[i].begin - 1)) {
- ranges[i].begin = lo;
- return;
- }
- ranges.insert(i, CharacterRange(lo, hi));
- return;
- }
- // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
- // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
- // end of the last range they concatenate, which is just as good.
- if (lo <= (ranges[i].end + 1)) {
- // found an intersect! we'll replace this entry in the array.
- ranges[i].begin = std::min(ranges[i].begin, lo);
- ranges[i].end = std::max(ranges[i].end, hi);
-
- // now check if the new range can subsume any subsequent ranges.
- unsigned next = i+1;
- // each iteration of the loop we will either remove something from the list, or break the loop.
- while (next < ranges.size()) {
- if (ranges[next].begin <= (ranges[i].end + 1)) {
- // the next entry now overlaps / concatenates this one.
- ranges[i].end = std::max(ranges[i].end, ranges[next].end);
- ranges.remove(next);
- } else
- break;
- }
-
- return;
- }
- }
-
- // CharacterRange comes after all existing ranges.
- ranges.append(CharacterRange(lo, hi));
- }
-
- bool m_isCaseInsensitive;
-
- Vector<UChar> m_matches;
- Vector<CharacterRange> m_ranges;
- Vector<UChar> m_matchesUnicode;
- Vector<CharacterRange> m_rangesUnicode;
-};
-
-struct BeginCharHelper {
- BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
- : m_beginChars(beginChars)
- , m_isCaseInsensitive(isCaseInsensitive)
- {}
-
- void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
- {
- if (quantityType == QuantifierFixedCount && quantityCount > 1) {
- // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
- beginChar.value |= beginChar.value << 16;
- beginChar.mask |= beginChar.mask << 16;
- addCharacter(beginChar);
- } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
- // In case of characters with fixed quantifier we should check the next character as well.
- linkHotTerms(beginChar, hotTerms);
- else
- // In case of greedy matching the next character checking is unnecessary therefore we just store
- // the first character.
- addCharacter(beginChar);
- }
-
- // Merge two following BeginChars in the vector to reduce the number of character checks.
- void merge(unsigned size)
- {
- for (unsigned i = 0; i < size; i++) {
- BeginChar* curr = &m_beginChars->at(i);
- BeginChar* next = &m_beginChars->at(i + 1);
-
- // If the current and the next size of value is different we should skip the merge process
- // because the 16bit and 32bit values are unmergable.
- if (curr->value <= 0xFFFF && next->value > 0xFFFF)
- continue;
-
- unsigned diff = curr->value ^ next->value;
-
- curr->mask |= diff;
- curr->value |= curr->mask;
-
- m_beginChars->remove(i + 1);
- size--;
- }
- }
-
-private:
- void addCharacter(BeginChar beginChar)
- {
- unsigned pos = 0;
- unsigned range = m_beginChars->size();
-
- // binary chop, find position to insert char.
- while (range) {
- unsigned index = range >> 1;
-
- int val = m_beginChars->at(pos+index).value - beginChar.value;
- if (!val)
- return;
- if (val < 0)
- range = index;
- else {
- pos += (index+1);
- range -= (index+1);
- }
- }
-
- if (pos == m_beginChars->size())
- m_beginChars->append(beginChar);
- else
- m_beginChars->insert(pos, beginChar);
- }
-
- // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
- void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
- {
- for (unsigned i = 0; i < hotTerms->size(); i++) {
- PatternTerm hotTerm = hotTerms->at(i).term;
- ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
-
- UChar characterNext = hotTerm.patternCharacter;
-
- // Append a character to an existing BeginChar object.
- if (characterNext <= 0x7f) {
- unsigned mask = 0;
-
- if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
- mask = 32;
- characterNext = toASCIILower(characterNext);
- }
-
- addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
- } else {
- UChar upper, lower;
- if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
- addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
- addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
- } else
- addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
- }
- }
- }
-
- Vector<BeginChar>* m_beginChars;
- bool m_isCaseInsensitive;
-};
-
-class RegexPatternConstructor {
-public:
- RegexPatternConstructor(RegexPattern& pattern)
- : m_pattern(pattern)
- , m_characterClassConstructor(pattern.m_ignoreCase)
- , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
- , m_invertParentheticalAssertion(false)
- {
- }
-
- ~RegexPatternConstructor()
- {
- }
-
- void reset()
- {
- m_pattern.reset();
- m_characterClassConstructor.reset();
- }
-
- void assertionBOL()
- {
- if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
- m_alternative->m_startsWithBOL = true;
- m_alternative->m_containsBOL = true;
- m_pattern.m_containsBOL = true;
- }
- m_alternative->m_terms.append(PatternTerm::BOL());
- }
- void assertionEOL()
- {
- m_alternative->m_terms.append(PatternTerm::EOL());
- }
- void assertionWordBoundary(bool invert)
- {
- m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
- }
-
- void atomPatternCharacter(UChar ch)
- {
- // We handle case-insensitive checking of unicode characters which do have both
- // cases by handling them as if they were defined using a CharacterClass.
- if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
- atomCharacterClassBegin();
- atomCharacterClassAtom(ch);
- atomCharacterClassEnd();
- } else
- m_alternative->m_terms.append(PatternTerm(ch));
- }
-
- void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
- {
- switch (classID) {
- case DigitClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
- break;
- case SpaceClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
- break;
- case WordClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
- break;
- case NewlineClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
- break;
- }
- }
-
- void atomCharacterClassBegin(bool invert = false)
- {
- m_invertCharacterClass = invert;
- }
-
- void atomCharacterClassAtom(UChar ch)
- {
- m_characterClassConstructor.putChar(ch);
- }
-
- void atomCharacterClassRange(UChar begin, UChar end)
- {
- m_characterClassConstructor.putRange(begin, end);
- }
-
- void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
- {
- ASSERT(classID != NewlineClassID);
-
- switch (classID) {
- case DigitClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
- break;
-
- case SpaceClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
- break;
-
- case WordClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
- break;
-
- default:
- ASSERT_NOT_REACHED();
- }
- }
-
- void atomCharacterClassEnd()
- {
- CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
- m_pattern.m_userCharacterClasses.append(newCharacterClass);
- m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
- }
-
- void atomParenthesesSubpatternBegin(bool capture = true)
- {
- unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
- if (capture)
- m_pattern.m_numSubpatterns++;
-
- PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
- m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
- m_alternative = parenthesesDisjunction->addNewAlternative();
- }
-
- void atomParentheticalAssertionBegin(bool invert = false)
- {
- PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
- m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
- m_alternative = parenthesesDisjunction->addNewAlternative();
- m_invertParentheticalAssertion = invert;
- }
-
- void atomParenthesesEnd()
- {
- ASSERT(m_alternative->m_parent);
- ASSERT(m_alternative->m_parent->m_parent);
-
- PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
- m_alternative = m_alternative->m_parent->m_parent;
-
- PatternTerm& lastTerm = m_alternative->lastTerm();
-
- unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
- unsigned numBOLAnchoredAlts = 0;
- // Bubble up BOL flags
- for (unsigned i = 0; i < numParenAlternatives; i++) {
- if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
- numBOLAnchoredAlts++;
- }
-
- if (numBOLAnchoredAlts) {
- m_alternative->m_containsBOL = true;
- // If all the alternatives in parens start with BOL, then so does this one
- if (numBOLAnchoredAlts == numParenAlternatives)
- m_alternative->m_startsWithBOL = true;
- }
-
- lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
- m_invertParentheticalAssertion = false;
- }
-
- void atomBackReference(unsigned subpatternId)
- {
- ASSERT(subpatternId);
- m_pattern.m_containsBackreferences = true;
- m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
-
- if (subpatternId > m_pattern.m_numSubpatterns) {
- m_alternative->m_terms.append(PatternTerm::ForwardReference());
- return;
- }
-
- PatternAlternative* currentAlternative = m_alternative;
- ASSERT(currentAlternative);
-
- // Note to self: if we waited until the AST was baked, we could also remove forwards refs
- while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
- PatternTerm& term = currentAlternative->lastTerm();
- ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
-
- if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
- m_alternative->m_terms.append(PatternTerm::ForwardReference());
- return;
- }
- }
-
- m_alternative->m_terms.append(PatternTerm(subpatternId));
- }
-
- // deep copy the argument disjunction. If filterStartsWithBOL is true,
- // skip alternatives with m_startsWithBOL set true.
- PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
- {
- PatternDisjunction* newDisjunction = 0;
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
- if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
- if (!newDisjunction) {
- newDisjunction = new PatternDisjunction();
- newDisjunction->m_parent = disjunction->m_parent;
- }
- PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
- newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
- }
- }
-
- if (newDisjunction)
- m_pattern.m_disjunctions.append(newDisjunction);
- return newDisjunction;
- }
-
- PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
- {
- if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
- return PatternTerm(term);
-
- PatternTerm termCopy = term;
- termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
- return termCopy;
- }
-
- void quantifyAtom(unsigned min, unsigned max, bool greedy)
- {
- ASSERT(min <= max);
- ASSERT(m_alternative->m_terms.size());
-
- if (!max) {
- m_alternative->removeLastTerm();
- return;
- }
-
- PatternTerm& term = m_alternative->lastTerm();
- ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
- ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
-
- // For any assertion with a zero minimum, not matching is valid and has no effect,
- // remove it. Otherwise, we need to match as least once, but there is no point
- // matching more than once, so remove the quantifier. It is not entirely clear
- // from the spec whether or not this behavior is correct, but I believe this
- // matches Firefox. :-/
- if (term.type == PatternTerm::TypeParentheticalAssertion) {
- if (!min)
- m_alternative->removeLastTerm();
- return;
- }
-
- if (min == 0)
- term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
- else if (min == max)
- term.quantify(min, QuantifierFixedCount);
- else {
- term.quantify(min, QuantifierFixedCount);
- m_alternative->m_terms.append(copyTerm(term));
- // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
- m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
- if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
- m_alternative->lastTerm().parentheses.isCopy = true;
- }
- }
-
- void disjunction()
- {
- m_alternative = m_alternative->m_parent->addNewAlternative();
- }
-
- void regexBegin()
- {
- m_pattern.m_body = new PatternDisjunction();
- m_alternative = m_pattern.m_body->addNewAlternative();
- m_pattern.m_disjunctions.append(m_pattern.m_body);
- }
-
- unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
- {
- alternative->m_hasFixedSize = true;
- unsigned currentInputPosition = initialInputPosition;
-
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
- PatternTerm& term = alternative->m_terms[i];
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- case PatternTerm::TypeAssertionEOL:
- case PatternTerm::TypeAssertionWordBoundary:
- term.inputPosition = currentInputPosition;
- break;
-
- case PatternTerm::TypeBackReference:
- term.inputPosition = currentInputPosition;
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference;
- alternative->m_hasFixedSize = false;
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypePatternCharacter:
- term.inputPosition = currentInputPosition;
- if (term.quantityType != QuantifierFixedCount) {
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter;
- alternative->m_hasFixedSize = false;
- } else
- currentInputPosition += term.quantityCount;
- break;
-
- case PatternTerm::TypeCharacterClass:
- term.inputPosition = currentInputPosition;
- if (term.quantityType != QuantifierFixedCount) {
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass;
- alternative->m_hasFixedSize = false;
- } else
- currentInputPosition += term.quantityCount;
- break;
-
- case PatternTerm::TypeParenthesesSubpattern:
- // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
- term.frameLocation = currentCallFrameSize;
- if (term.quantityCount == 1 && !term.parentheses.isCopy) {
- if (term.quantityType != QuantifierFixedCount)
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
- // If quantity is fixed, then pre-check its minimum size.
- if (term.quantityType == QuantifierFixedCount)
- currentInputPosition += term.parentheses.disjunction->m_minimumSize;
- term.inputPosition = currentInputPosition;
- } else if (term.parentheses.isTerminal) {
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
- term.inputPosition = currentInputPosition;
- } else {
- term.inputPosition = currentInputPosition;
- setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition);
- currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses;
- }
- // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
- alternative->m_hasFixedSize = false;
- break;
-
- case PatternTerm::TypeParentheticalAssertion:
- term.inputPosition = currentInputPosition;
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
- break;
- }
- }
-
- alternative->m_minimumSize = currentInputPosition - initialInputPosition;
- return currentCallFrameSize;
- }
-
- unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
- {
- if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
- initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative;
-
- unsigned minimumInputSize = UINT_MAX;
- unsigned maximumCallFrameSize = 0;
- bool hasFixedSize = true;
-
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
- unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
- minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
- maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
- hasFixedSize &= alternative->m_hasFixedSize;
- }
-
- ASSERT(minimumInputSize != UINT_MAX);
- ASSERT(maximumCallFrameSize >= initialCallFrameSize);
-
- disjunction->m_hasFixedSize = hasFixedSize;
- disjunction->m_minimumSize = minimumInputSize;
- disjunction->m_callFrameSize = maximumCallFrameSize;
- return maximumCallFrameSize;
- }
-
- void setupOffsets()
- {
- setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
- }
-
- // This optimization identifies sets of parentheses that we will never need to backtrack.
- // In these cases we do not need to store state from prior iterations.
- // We can presently avoid backtracking for:
- // * where the parens are at the end of the regular expression (last term in any of the
- // alternatives of the main body disjunction).
- // * where the parens are non-capturing, and quantified unbounded greedy (*).
- // * where the parens do not contain any capturing subpatterns.
- void checkForTerminalParentheses()
- {
- // This check is much too crude; should be just checking whether the candidate
- // node contains nested capturing subpatterns, not the whole expression!
- if (m_pattern.m_numSubpatterns)
- return;
-
- Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
- for (size_t i = 0; i < alternatives.size(); ++i) {
- Vector<PatternTerm>& terms = alternatives[i]->m_terms;
- if (terms.size()) {
- PatternTerm& term = terms.last();
- if (term.type == PatternTerm::TypeParenthesesSubpattern
- && term.quantityType == QuantifierGreedy
- && term.quantityCount == quantifyInfinite
- && !term.capture())
- term.parentheses.isTerminal = true;
- }
- }
- }
-
- void optimizeBOL()
- {
- // Look for expressions containing beginning of line (^) anchoring and unroll them.
- // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
- // This code relies on the parsing code tagging alternatives with m_containsBOL and
- // m_startsWithBOL and rolling those up to containing alternatives.
- // At this point, this is only valid for non-multiline expressions.
- PatternDisjunction* disjunction = m_pattern.m_body;
-
- if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
- return;
-
- PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
-
- // Set alternatives in disjunction to "onceThrough"
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
- disjunction->m_alternatives[alt]->setOnceThrough();
-
- if (loopDisjunction) {
- // Move alternatives from loopDisjunction to disjunction
- for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
- disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
-
- loopDisjunction->m_alternatives.clear();
- }
- }
-
- bool addBeginTerm(PatternTerm term, Vector<TermChain>* beginTerms, PatternAlternative* alternative, unsigned numTerms, unsigned termIndex, unsigned depth)
- {
- if (term.quantityType == QuantifierFixedCount) {
- beginTerms->append(TermChain(term));
- if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
- setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1);
- } else if (termIndex != numTerms - 1) {
- beginTerms->append(TermChain(term));
- return true;
- }
-
- return false;
- }
-
- // This function collects the terms which are potentially matching the first number of depth characters in the result.
- // If this function returns false then it found at least one term which makes the beginning character
- // look-up optimization inefficient.
- bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
- {
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
-
- if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
- return false;
- }
-
- return true;
- }
-
- bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
- {
- bool checkNext = true;
- unsigned numTerms = alternative->m_terms.size();
-
- while (checkNext && termIndex < numTerms) {
- PatternTerm term = alternative->m_terms[termIndex];
- checkNext = false;
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- case PatternTerm::TypeAssertionEOL:
- case PatternTerm::TypeAssertionWordBoundary:
- return false;
-
- case PatternTerm::TypeBackReference:
- case PatternTerm::TypeForwardReference:
- return false;
-
- case PatternTerm::TypePatternCharacter:
- if (addBeginTerm(term, beginTerms, alternative, numTerms, termIndex, depth)) {
- termIndex++;
- checkNext = true;
- }
- break;
-
- case PatternTerm::TypeCharacterClass:
- return false;
-
- case PatternTerm::TypeParentheticalAssertion:
- if (term.invert())
- return false;
-
- case PatternTerm::TypeParenthesesSubpattern:
- if (term.quantityType != QuantifierFixedCount) {
- if (termIndex == numTerms - 1)
- break;
-
- termIndex++;
- checkNext = true;
-
- }
-
- if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
- return false;
-
- break;
- }
- }
-
- return true;
- }
-
- void setupBeginChars()
- {
- Vector<TermChain> beginTerms;
- bool containsFixedCharacter = false;
-
- if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
- && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
- unsigned size = beginTerms.size();
-
- // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
- if (!size)
- return;
-
- m_pattern.m_containsBeginChars = true;
-
- for (unsigned i = 0; i < size; i++) {
- PatternTerm term = beginTerms[i].term;
-
- // We have just collected PatternCharacter terms, other terms are not allowed.
- ASSERT(term.type == PatternTerm::TypePatternCharacter);
-
- if (term.quantityType == QuantifierFixedCount)
- containsFixedCharacter = true;
-
- UChar character = term.patternCharacter;
- unsigned mask = 0;
-
- if (character <= 0x7f) {
- if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
- mask = 32;
- character = toASCIILower(character);
- }
-
- m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- } else {
- UChar upper, lower;
- if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
- m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- } else
- m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- }
- }
-
- // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
- if (!containsFixedCharacter) {
- m_pattern.m_containsBeginChars = false;
- return;
- }
-
- size = m_pattern.m_beginChars.size();
-
- if (size > 2)
- m_beginCharHelper.merge(size - 1);
- else if (size <= 1)
- m_pattern.m_containsBeginChars = false;
- }
- }
-
-private:
- RegexPattern& m_pattern;
- PatternAlternative* m_alternative;
- CharacterClassConstructor m_characterClassConstructor;
- BeginCharHelper m_beginCharHelper;
- bool m_invertCharacterClass;
- bool m_invertParentheticalAssertion;
-};
-
-
-static const char* compileRegex(const UString& patternString, RegexPattern& pattern)
-{
- RegexPatternConstructor constructor(pattern);
-
- constructor.regexBegin();
- if (const char* error = parse(constructor, patternString))
- return error;
-
- // If the pattern contains illegal backreferences reset & reparse.
- // Quoting Netscape's "What's new in JavaScript 1.2",
- // "Note: if the number of left parentheses is less than the number specified
- // in \#, the \# is taken as an octal escape as described in the next row."
- if (pattern.containsIllegalBackReference()) {
- unsigned numSubpatterns = pattern.m_numSubpatterns;
-
- constructor.reset();
- constructor.regexBegin();
-#if !ASSERT_DISABLED
- const char* error =
-#endif
- parse(constructor, patternString, numSubpatterns);
-
- ASSERT(!error);
- ASSERT(numSubpatterns == pattern.m_numSubpatterns);
- }
-
- constructor.checkForTerminalParentheses();
- constructor.optimizeBOL();
-
- constructor.setupOffsets();
- constructor.setupBeginChars();
-
- return 0;
-};
-
-RegexPattern::RegexPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error)
- : m_ignoreCase(ignoreCase)
- , m_multiline(multiline)
- , m_containsBackreferences(false)
- , m_containsBeginChars(false)
- , m_containsBOL(false)
- , m_numSubpatterns(0)
- , m_maxBackReference(0)
- , newlineCached(0)
- , digitsCached(0)
- , spacesCached(0)
- , wordcharCached(0)
- , nondigitsCached(0)
- , nonspacesCached(0)
- , nonwordcharCached(0)
-{
- *error = compileRegex(pattern, *this);
-}
-
-} }
diff --git a/Source/JavaScriptCore/yarr/RegexPattern.h b/Source/JavaScriptCore/yarr/RegexPattern.h
deleted file mode 100644
index 6833dd6..0000000
--- a/Source/JavaScriptCore/yarr/RegexPattern.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RegexPattern_h
-#define RegexPattern_h
-
-#include <wtf/Vector.h>
-#include <wtf/unicode/Unicode.h>
-
-#include <UString.h>
-
-namespace JSC { namespace Yarr {
-
-#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
-#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
-#define RegexStackSpaceForBackTrackInfoBackReference 2
-#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
-#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
-#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
-#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
-#define RegexStackSpaceForBackTrackInfoParentheses 2
-
-struct PatternDisjunction;
-
-struct CharacterRange {
- UChar begin;
- UChar end;
-
- CharacterRange(UChar begin, UChar end)
- : begin(begin)
- , end(end)
- {
- }
-};
-
-struct CharacterClassTable : RefCounted<CharacterClassTable> {
- const char* m_table;
- bool m_inverted;
- static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
- {
- return adoptRef(new CharacterClassTable(table, inverted));
- }
-
-private:
- CharacterClassTable(const char* table, bool inverted)
- : m_table(table)
- , m_inverted(inverted)
- {
- }
-};
-
-struct CharacterClass : FastAllocBase {
- // All CharacterClass instances have to have the full set of matches and ranges,
- // they may have an optional table for faster lookups (which must match the
- // specified matches and ranges)
- CharacterClass(PassRefPtr<CharacterClassTable> table)
- : m_table(table)
- {
- }
- Vector<UChar> m_matches;
- Vector<CharacterRange> m_ranges;
- Vector<UChar> m_matchesUnicode;
- Vector<CharacterRange> m_rangesUnicode;
- RefPtr<CharacterClassTable> m_table;
-};
-
-enum QuantifierType {
- QuantifierFixedCount,
- QuantifierGreedy,
- QuantifierNonGreedy,
-};
-
-struct PatternTerm {
- enum Type {
- TypeAssertionBOL,
- TypeAssertionEOL,
- TypeAssertionWordBoundary,
- TypePatternCharacter,
- TypeCharacterClass,
- TypeBackReference,
- TypeForwardReference,
- TypeParenthesesSubpattern,
- TypeParentheticalAssertion,
- } type;
- bool m_capture :1;
- bool m_invert :1;
- union {
- UChar patternCharacter;
- CharacterClass* characterClass;
- unsigned backReferenceSubpatternId;
- struct {
- PatternDisjunction* disjunction;
- unsigned subpatternId;
- unsigned lastSubpatternId;
- bool isCopy;
- bool isTerminal;
- } parentheses;
- };
- QuantifierType quantityType;
- unsigned quantityCount;
- int inputPosition;
- unsigned frameLocation;
-
- PatternTerm(UChar ch)
- : type(PatternTerm::TypePatternCharacter)
- , m_capture(false)
- , m_invert(false)
- {
- patternCharacter = ch;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(CharacterClass* charClass, bool invert)
- : type(PatternTerm::TypeCharacterClass)
- , m_capture(false)
- , m_invert(invert)
- {
- characterClass = charClass;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
- : type(type)
- , m_capture(capture)
- , m_invert(invert)
- {
- parentheses.disjunction = disjunction;
- parentheses.subpatternId = subpatternId;
- parentheses.isCopy = false;
- parentheses.isTerminal = false;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(Type type, bool invert = false)
- : type(type)
- , m_capture(false)
- , m_invert(invert)
- {
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(unsigned spatternId)
- : type(TypeBackReference)
- , m_capture(false)
- , m_invert(false)
- {
- backReferenceSubpatternId = spatternId;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- static PatternTerm ForwardReference()
- {
- return PatternTerm(TypeForwardReference);
- }
-
- static PatternTerm BOL()
- {
- return PatternTerm(TypeAssertionBOL);
- }
-
- static PatternTerm EOL()
- {
- return PatternTerm(TypeAssertionEOL);
- }
-
- static PatternTerm WordBoundary(bool invert)
- {
- return PatternTerm(TypeAssertionWordBoundary, invert);
- }
-
- bool invert()
- {
- return m_invert;
- }
-
- bool capture()
- {
- return m_capture;
- }
-
- void quantify(unsigned count, QuantifierType type)
- {
- quantityCount = count;
- quantityType = type;
- }
-};
-
-struct PatternAlternative : FastAllocBase {
- PatternAlternative(PatternDisjunction* disjunction)
- : m_parent(disjunction)
- , m_onceThrough(false)
- , m_hasFixedSize(false)
- , m_startsWithBOL(false)
- , m_containsBOL(false)
- {
- }
-
- PatternTerm& lastTerm()
- {
- ASSERT(m_terms.size());
- return m_terms[m_terms.size() - 1];
- }
-
- void removeLastTerm()
- {
- ASSERT(m_terms.size());
- m_terms.shrink(m_terms.size() - 1);
- }
-
- void setOnceThrough()
- {
- m_onceThrough = true;
- }
-
- bool onceThrough()
- {
- return m_onceThrough;
- }
-
- Vector<PatternTerm> m_terms;
- PatternDisjunction* m_parent;
- unsigned m_minimumSize;
- bool m_onceThrough : 1;
- bool m_hasFixedSize : 1;
- bool m_startsWithBOL : 1;
- bool m_containsBOL : 1;
-};
-
-struct PatternDisjunction : FastAllocBase {
- PatternDisjunction(PatternAlternative* parent = 0)
- : m_parent(parent)
- , m_hasFixedSize(false)
- {
- }
-
- ~PatternDisjunction()
- {
- deleteAllValues(m_alternatives);
- }
-
- PatternAlternative* addNewAlternative()
- {
- PatternAlternative* alternative = new PatternAlternative(this);
- m_alternatives.append(alternative);
- return alternative;
- }
-
- Vector<PatternAlternative*> m_alternatives;
- PatternAlternative* m_parent;
- unsigned m_minimumSize;
- unsigned m_callFrameSize;
- bool m_hasFixedSize;
-};
-
-// You probably don't want to be calling these functions directly
-// (please to be calling newlineCharacterClass() et al on your
-// friendly neighborhood RegexPattern instance to get nicely
-// cached copies).
-CharacterClass* newlineCreate();
-CharacterClass* digitsCreate();
-CharacterClass* spacesCreate();
-CharacterClass* wordcharCreate();
-CharacterClass* nondigitsCreate();
-CharacterClass* nonspacesCreate();
-CharacterClass* nonwordcharCreate();
-
-struct TermChain {
- TermChain(PatternTerm term)
- : term(term)
- {}
-
- PatternTerm term;
- Vector<TermChain> hotTerms;
-};
-
-struct BeginChar {
- BeginChar()
- : value(0)
- , mask(0)
- {}
-
- BeginChar(unsigned value, unsigned mask)
- : value(value)
- , mask(mask)
- {}
-
- unsigned value;
- unsigned mask;
-};
-
-struct RegexPattern {
- RegexPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
-
- ~RegexPattern()
- {
- deleteAllValues(m_disjunctions);
- deleteAllValues(m_userCharacterClasses);
- }
-
- void reset()
- {
- m_numSubpatterns = 0;
- m_maxBackReference = 0;
-
- m_containsBackreferences = false;
- m_containsBeginChars = false;
- m_containsBOL = false;
-
- newlineCached = 0;
- digitsCached = 0;
- spacesCached = 0;
- wordcharCached = 0;
- nondigitsCached = 0;
- nonspacesCached = 0;
- nonwordcharCached = 0;
-
- deleteAllValues(m_disjunctions);
- m_disjunctions.clear();
- deleteAllValues(m_userCharacterClasses);
- m_userCharacterClasses.clear();
- m_beginChars.clear();
- }
-
- bool containsIllegalBackReference()
- {
- return m_maxBackReference > m_numSubpatterns;
- }
-
- CharacterClass* newlineCharacterClass()
- {
- if (!newlineCached)
- m_userCharacterClasses.append(newlineCached = newlineCreate());
- return newlineCached;
- }
- CharacterClass* digitsCharacterClass()
- {
- if (!digitsCached)
- m_userCharacterClasses.append(digitsCached = digitsCreate());
- return digitsCached;
- }
- CharacterClass* spacesCharacterClass()
- {
- if (!spacesCached)
- m_userCharacterClasses.append(spacesCached = spacesCreate());
- return spacesCached;
- }
- CharacterClass* wordcharCharacterClass()
- {
- if (!wordcharCached)
- m_userCharacterClasses.append(wordcharCached = wordcharCreate());
- return wordcharCached;
- }
- CharacterClass* nondigitsCharacterClass()
- {
- if (!nondigitsCached)
- m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
- return nondigitsCached;
- }
- CharacterClass* nonspacesCharacterClass()
- {
- if (!nonspacesCached)
- m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
- return nonspacesCached;
- }
- CharacterClass* nonwordcharCharacterClass()
- {
- if (!nonwordcharCached)
- m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
- return nonwordcharCached;
- }
-
- bool m_ignoreCase : 1;
- bool m_multiline : 1;
- bool m_containsBackreferences : 1;
- bool m_containsBeginChars : 1;
- bool m_containsBOL : 1;
- unsigned m_numSubpatterns;
- unsigned m_maxBackReference;
- PatternDisjunction* m_body;
- Vector<PatternDisjunction*, 4> m_disjunctions;
- Vector<CharacterClass*> m_userCharacterClasses;
- Vector<BeginChar> m_beginChars;
-
-private:
- CharacterClass* newlineCached;
- CharacterClass* digitsCached;
- CharacterClass* spacesCached;
- CharacterClass* wordcharCached;
- CharacterClass* nondigitsCached;
- CharacterClass* nonspacesCached;
- CharacterClass* nonwordcharCached;
-};
-
-} } // namespace JSC::Yarr
-
-#endif // RegexPattern_h
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
new file mode 100644
index 0000000..a4ae88f
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
@@ -0,0 +1,1891 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "YarrInterpreter.h"
+
+#include "YarrPattern.h"
+#include <wtf/BumpPointerAllocator.h>
+
+#ifndef NDEBUG
+#include <stdio.h>
+#endif
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class Interpreter {
+public:
+ struct ParenthesesDisjunctionContext;
+
+ struct BackTrackInfoPatternCharacter {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoCharacterClass {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoBackReference {
+ uintptr_t begin; // Not really needed for greedy quantifiers.
+ uintptr_t matchAmount; // Not really needed for fixed quantifiers.
+ };
+ struct BackTrackInfoAlternative {
+ uintptr_t offset;
+ };
+ struct BackTrackInfoParentheticalAssertion {
+ uintptr_t begin;
+ };
+ struct BackTrackInfoParenthesesOnce {
+ uintptr_t begin;
+ };
+ struct BackTrackInfoParenthesesTerminal {
+ uintptr_t begin;
+ };
+ struct BackTrackInfoParentheses {
+ uintptr_t matchAmount;
+ ParenthesesDisjunctionContext* lastContext;
+ };
+
+ static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
+ {
+ context->next = backTrack->lastContext;
+ backTrack->lastContext = context;
+ ++backTrack->matchAmount;
+ }
+
+ static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
+ {
+ ASSERT(backTrack->matchAmount);
+ ASSERT(backTrack->lastContext);
+ backTrack->lastContext = backTrack->lastContext->next;
+ --backTrack->matchAmount;
+ }
+
+ struct DisjunctionContext
+ {
+ DisjunctionContext()
+ : term(0)
+ {
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ int term;
+ unsigned matchBegin;
+ unsigned matchEnd;
+ uintptr_t frame[1];
+ };
+
+ DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
+ {
+ size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+ allocatorPool = allocatorPool->ensureCapacity(size);
+ if (!allocatorPool)
+ CRASH();
+ return new(allocatorPool->alloc(size)) DisjunctionContext();
+ }
+
+ void freeDisjunctionContext(DisjunctionContext* context)
+ {
+ allocatorPool = allocatorPool->dealloc(context);
+ }
+
+ struct ParenthesesDisjunctionContext
+ {
+ ParenthesesDisjunctionContext(int* output, ByteTerm& term)
+ : next(0)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
+
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
+ subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
+ output[(firstSubpatternId << 1) + i] = -1;
+ }
+
+ new(getDisjunctionContext(term)) DisjunctionContext();
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
+ {
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
+ output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
+ }
+
+ DisjunctionContext* getDisjunctionContext(ByteTerm& term)
+ {
+ return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
+ }
+
+ ParenthesesDisjunctionContext* next;
+ int subpatternBackup[1];
+ };
+
+ ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
+ {
+ size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+ allocatorPool = allocatorPool->ensureCapacity(size);
+ if (!allocatorPool)
+ CRASH();
+ return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
+ }
+
+ void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
+ {
+ allocatorPool = allocatorPool->dealloc(context);
+ }
+
+ class InputStream {
+ public:
+ InputStream(const UChar* input, unsigned start, unsigned length)
+ : input(input)
+ , pos(start)
+ , length(length)
+ {
+ }
+
+ void next()
+ {
+ ++pos;
+ }
+
+ void rewind(unsigned amount)
+ {
+ ASSERT(pos >= amount);
+ pos -= amount;
+ }
+
+ int read()
+ {
+ ASSERT(pos < length);
+ if (pos < length)
+ return input[pos];
+ return -1;
+ }
+
+ int readPair()
+ {
+ ASSERT(pos + 1 < length);
+ return input[pos] | input[pos + 1] << 16;
+ }
+
+ int readChecked(int position)
+ {
+ ASSERT(position < 0);
+ ASSERT((unsigned)-position <= pos);
+ unsigned p = pos + position;
+ ASSERT(p < length);
+ return input[p];
+ }
+
+ int reread(unsigned from)
+ {
+ ASSERT(from < length);
+ return input[from];
+ }
+
+ int prev()
+ {
+ ASSERT(!(pos > length));
+ if (pos && length)
+ return input[pos - 1];
+ return -1;
+ }
+
+ unsigned getPos()
+ {
+ return pos;
+ }
+
+ void setPos(unsigned p)
+ {
+ pos = p;
+ }
+
+ bool atStart()
+ {
+ return pos == 0;
+ }
+
+ bool atEnd()
+ {
+ return pos == length;
+ }
+
+ bool checkInput(int count)
+ {
+ if ((pos + count) <= length) {
+ pos += count;
+ return true;
+ } else
+ return false;
+ }
+
+ void uncheckInput(int count)
+ {
+ pos -= count;
+ }
+
+ bool atStart(int position)
+ {
+ return (pos + position) == 0;
+ }
+
+ bool atEnd(int position)
+ {
+ return (pos + position) == length;
+ }
+
+ bool isNotAvailableInput(int position)
+ {
+ return (pos + position) > length;
+ }
+
+ private:
+ const UChar* input;
+ unsigned pos;
+ unsigned length;
+ };
+
+ bool testCharacterClass(CharacterClass* characterClass, int ch)
+ {
+ if (ch & 0xFF80) {
+ for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
+ if (ch == characterClass->m_matchesUnicode[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
+ if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
+ return true;
+ } else {
+ for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
+ if (ch == characterClass->m_matches[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
+ if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
+ return true;
+ }
+
+ return false;
+ }
+
+ bool checkCharacter(int testChar, int inputPosition)
+ {
+ return testChar == input.readChecked(inputPosition);
+ }
+
+ bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
+ {
+ int ch = input.readChecked(inputPosition);
+ return (loChar == ch) || (hiChar == ch);
+ }
+
+ bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
+ {
+ bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
+ return invert ? !match : match;
+ }
+
+ bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
+ {
+ int matchSize = matchEnd - matchBegin;
+
+ if (!input.checkInput(matchSize))
+ return false;
+
+ if (pattern->m_ignoreCase) {
+ for (int i = 0; i < matchSize; ++i) {
+ int ch = input.reread(matchBegin + i);
+
+ int lo = Unicode::toLower(ch);
+ int hi = Unicode::toUpper(ch);
+
+ if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
+ input.uncheckInput(matchSize);
+ return false;
+ }
+ }
+ } else {
+ for (int i = 0; i < matchSize; ++i) {
+ if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
+ input.uncheckInput(matchSize);
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ bool matchAssertionBOL(ByteTerm& term)
+ {
+ return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
+ }
+
+ bool matchAssertionEOL(ByteTerm& term)
+ {
+ if (term.inputPosition)
+ return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+ else
+ return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
+ }
+
+ bool matchAssertionWordBoundary(ByteTerm& term)
+ {
+ bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
+ bool readIsWordchar;
+ if (term.inputPosition)
+ readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
+ else
+ readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
+
+ bool wordBoundary = prevIsWordchar != readIsWordchar;
+ return term.invert() ? !wordBoundary : wordBoundary;
+ }
+
+ bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
+ return false;
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+
+ // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
+ // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
+ // Eg.: /(a\1)/
+ if (matchEnd == -1)
+ return true;
+
+ ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+
+ if (matchBegin == matchEnd)
+ return true;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ backTrack->begin = input.getPos();
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ input.setPos(backTrack->begin);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
+ ++matchAmount;
+ backTrack->matchAmount = matchAmount;
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->begin = input.getPos();
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+ ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+
+ if (matchBegin == matchEnd)
+ return false;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ // for quantityCount == 1, could rewind.
+ input.setPos(backTrack->begin);
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.rewind(matchEnd - matchBegin);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ ++backTrack->matchAmount;
+ return true;
+ } else
+ input.setPos(backTrack->begin);
+ break;
+ }
+
+ return false;
+ }
+
+ void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
+ output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
+ }
+ }
+ void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
+ context->restoreOutput(output, firstSubpatternId, count);
+ }
+ JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+ {
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+
+ JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
+ if (result == JSRegExpMatch)
+ return JSRegExpMatch;
+
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result != JSRegExpNoMatch)
+ return result;
+ }
+
+ return JSRegExpNoMatch;
+ }
+
+ bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy: {
+ // set this speculatively; if we get to the parens end this will be true.
+ backTrack->begin = input.getPos();
+ break;
+ }
+ case QuantifierNonGreedy: {
+ backTrack->begin = notFound;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
+ }
+
+ return true;
+ }
+
+ bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ }
+
+ if (term.atom.quantityType == QuantifierFixedCount)
+ return true;
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+ return backTrack->begin != input.getPos();
+ }
+
+ bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = -1;
+ output[(subpatternId << 1) + 1] = -1;
+ }
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ // if we backtrack to this point, there is another chance - try matching nothing.
+ ASSERT(backTrack->begin != notFound);
+ backTrack->begin = notFound;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ case QuantifierNonGreedy:
+ ASSERT(backTrack->begin != notFound);
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ if (backTrack->begin == notFound) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+ case QuantifierNonGreedy:
+ if (backTrack->begin == notFound) {
+ backTrack->begin = input.getPos();
+ if (term.capture()) {
+ // Technically this access to inputPosition should be accessing the begin term's
+ // inputPosition, but for repeats other than fixed these values should be
+ // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
+ ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
+ unsigned subpatternId = term.atom.subpatternId;
+ output[subpatternId << 1] = input.getPos() + term.inputPosition;
+ }
+ context->term -= term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
+ ASSERT(term.atom.quantityType == QuantifierGreedy);
+ ASSERT(term.atom.quantityCount == quantifyInfinite);
+ ASSERT(!term.capture());
+
+ BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
+ backTrack->begin = input.getPos();
+ return true;
+ }
+
+ bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
+
+ BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
+ // Empty match is a failed match.
+ if (backTrack->begin == input.getPos())
+ return false;
+
+ // Successful match! Okay, what's next? - loop around and try to match moar!
+ context->term -= (term.atom.parenthesesWidth + 1);
+ return true;
+ }
+
+ bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
+ ASSERT(term.atom.quantityType == QuantifierGreedy);
+ ASSERT(term.atom.quantityCount == quantifyInfinite);
+ ASSERT(!term.capture());
+
+ // If we backtrack to this point, we have failed to match this iteration of the parens.
+ // Since this is greedy / zero minimum a failed is also accepted as a match!
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+
+ bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
+ {
+ // 'Terminal' parentheses are at the end of the regex, and as such a match past end
+ // should always be returned as a successful match - we should never backtrack to here.
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ backTrack->begin = input.getPos();
+ return true;
+ }
+
+ bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ // We've reached the end of the parens; if they are inverted, this is failure.
+ if (term.invert()) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ return true;
+ }
+
+ bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ // We've failed to match parens; if they are inverted, this is win!
+ if (term.invert()) {
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ backTrack->matchAmount = 0;
+ backTrack->lastContext = 0;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+ if (result == JSRegExpMatch)
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result == JSRegExpNoMatch) {
+ JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+ if (backtrackResult != JSRegExpMatch)
+ return backtrackResult;
+ } else
+ return result;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return JSRegExpMatch;
+ }
+
+ case QuantifierGreedy: {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+ if (result == JSRegExpMatch)
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result != JSRegExpNoMatch)
+ return result;
+
+ break;
+ }
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return JSRegExpMatch;
+ }
+
+ case QuantifierNonGreedy:
+ return JSRegExpMatch;
+ }
+
+ ASSERT_NOT_REACHED();
+ return JSRegExpErrorNoMatch;
+ }
+
+ // Rules for backtracking differ depending on whether this is greedy or non-greedy.
+ //
+ // Greedy matches never should try just adding more - you should already have done
+ // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
+ // you backtrack an item off the list needs checking, since we'll never have matched
+ // the one less case. Tracking forwards, still add as much as possible.
+ //
+ // Non-greedy, we've already done the one less case, so don't match on popping.
+ // We haven't done the one more case, so always try to add that.
+ //
+ JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+
+ ParenthesesDisjunctionContext* context = 0;
+ JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
+
+ if (result != JSRegExpMatch)
+ return result;
+
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+
+ if (result == JSRegExpMatch)
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result == JSRegExpNoMatch) {
+ JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+ if (backtrackResult != JSRegExpMatch)
+ return backtrackResult;
+ } else
+ return result;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return JSRegExpMatch;
+ }
+
+ case QuantifierGreedy: {
+ if (!backTrack->matchAmount)
+ return JSRegExpNoMatch;
+
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+ if (result == JSRegExpMatch) {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+ if (parenthesesResult == JSRegExpMatch)
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+
+ if (parenthesesResult != JSRegExpNoMatch)
+ return parenthesesResult;
+
+ break;
+ }
+ }
+ } else {
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result != JSRegExpNoMatch)
+ return result;
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return JSRegExpMatch;
+ }
+
+ case QuantifierNonGreedy: {
+ // If we've not reached the limit, try to add one more match.
+ if (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+ if (result == JSRegExpMatch) {
+ appendParenthesesDisjunctionContext(backTrack, context);
+ recordParenthesesMatch(term, context);
+ return JSRegExpMatch;
+ }
+
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+
+ if (result != JSRegExpNoMatch)
+ return result;
+ }
+
+ // Nope - okay backtrack looking for an alternative.
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+ if (result == JSRegExpMatch) {
+ // successful backtrack! we're back in the game!
+ if (backTrack->matchAmount) {
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return JSRegExpMatch;
+ }
+
+ // pop a match off the stack
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+
+ return result;
+ }
+
+ return JSRegExpNoMatch;
+ }
+ }
+
+ ASSERT_NOT_REACHED();
+ return JSRegExpErrorNoMatch;
+ }
+
+ void lookupForBeginChars()
+ {
+ int character;
+ bool firstSingleCharFound;
+
+ while (true) {
+ if (input.isNotAvailableInput(2))
+ return;
+
+ firstSingleCharFound = false;
+
+ character = input.readPair();
+
+ for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
+ BeginChar bc = pattern->m_beginChars[i];
+
+ if (!firstSingleCharFound && bc.value <= 0xFFFF) {
+ firstSingleCharFound = true;
+ character &= 0xFFFF;
+ }
+
+ if ((character | bc.mask) == bc.value)
+ return;
+ }
+
+ input.next();
+ }
+ }
+
+#define MATCH_NEXT() { ++context->term; goto matchAgain; }
+#define BACKTRACK() { --context->term; goto backtrack; }
+#define currentTerm() (disjunction->terms[context->term])
+ JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
+ {
+ if (!--remainingMatchCount)
+ return JSRegExpErrorHitLimit;
+
+ if (btrack)
+ BACKTRACK();
+
+ if (pattern->m_containsBeginChars && isBody)
+ lookupForBeginChars();
+
+ context->matchBegin = input.getPos();
+ context->term = 0;
+
+ matchAgain:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeSubpatternEnd:
+ context->matchEnd = input.getPos();
+ return JSRegExpMatch;
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeBodyAlternativeDisjunction:
+ case ByteTerm::TypeBodyAlternativeEnd:
+ context->matchEnd = input.getPos();
+ return JSRegExpMatch;
+
+ case ByteTerm::TypeAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeAlternativeDisjunction:
+ case ByteTerm::TypeAlternativeEnd: {
+ int offset = currentTerm().alternative.end;
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ backTrack->offset = offset;
+ context->term += offset;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ if (matchAssertionBOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionEOL:
+ if (matchAssertionEOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionWordBoundary:
+ if (matchAssertionWordBoundary(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeCharacterClass:
+ if (matchCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (matchBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern: {
+ JSRegExpResult result = matchParentheses(currentTerm(), context);
+
+ if (result == JSRegExpMatch) {
+ MATCH_NEXT();
+ } else if (result != JSRegExpNoMatch)
+ return result;
+
+ BACKTRACK();
+ }
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (matchParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (matchParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+ if (matchParenthesesTerminalBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+ if (matchParenthesesTerminalEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (matchParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (matchParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ if (input.checkInput(currentTerm().checkInputCount))
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+
+ // We should never fall-through to here.
+ ASSERT_NOT_REACHED();
+
+ backtrack:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ return JSRegExpNoMatch;
+ case ByteTerm::TypeSubpatternEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ case ByteTerm::TypeBodyAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+
+ if (input.atEnd())
+ return JSRegExpNoMatch;
+
+ input.next();
+
+ if (pattern->m_containsBeginChars && isBody)
+ lookupForBeginChars();
+
+ context->matchBegin = input.getPos();
+
+ if (currentTerm().alternative.onceThrough)
+ context->term += currentTerm().alternative.next;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypeBodyAlternativeEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeAlternativeBegin:
+ case ByteTerm::TypeAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+ case ByteTerm::TypeAlternativeEnd: {
+ // We should never backtrack back into an alternative of the main body of the regex.
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ unsigned offset = backTrack->offset;
+ context->term -= offset;
+ BACKTRACK();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ case ByteTerm::TypeAssertionEOL:
+ case ByteTerm::TypeAssertionWordBoundary:
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed:
+ case ByteTerm::TypePatternCharacterGreedy:
+ case ByteTerm::TypePatternCharacterNonGreedy:
+ if (backtrackPatternCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed:
+ case ByteTerm::TypePatternCasedCharacterGreedy:
+ case ByteTerm::TypePatternCasedCharacterNonGreedy:
+ if (backtrackPatternCasedCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeCharacterClass:
+ if (backtrackCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (backtrackBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern: {
+ JSRegExpResult result = backtrackParentheses(currentTerm(), context);
+
+ if (result == JSRegExpMatch) {
+ MATCH_NEXT();
+ } else if (result != JSRegExpNoMatch)
+ return result;
+
+ BACKTRACK();
+ }
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (backtrackParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (backtrackParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+ if (backtrackParenthesesTerminalBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+ if (backtrackParenthesesTerminalEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (backtrackParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (backtrackParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ input.uncheckInput(currentTerm().checkInputCount);
+ BACKTRACK();
+ }
+
+ ASSERT_NOT_REACHED();
+ return JSRegExpErrorNoMatch;
+ }
+
+ JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ {
+ JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
+
+ if (result == JSRegExpMatch) {
+ while (context->matchBegin == context->matchEnd) {
+ result = matchDisjunction(disjunction, context, true);
+ if (result != JSRegExpMatch)
+ return result;
+ }
+ return JSRegExpMatch;
+ }
+
+ return result;
+ }
+
+ int interpret()
+ {
+ allocatorPool = pattern->m_allocator->startAllocator();
+ if (!allocatorPool)
+ CRASH();
+
+ for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
+ output[i] = -1;
+
+ DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
+
+ JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
+ if (result == JSRegExpMatch) {
+ output[0] = context->matchBegin;
+ output[1] = context->matchEnd;
+ }
+
+ freeDisjunctionContext(context);
+
+ pattern->m_allocator->stopAllocator();
+
+ // RegExp.cpp currently expects all error to be converted to -1.
+ ASSERT((result == JSRegExpMatch) == (output[0] != -1));
+ return output[0];
+ }
+
+ Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
+ : pattern(pattern)
+ , output(output)
+ , input(inputChar, start, length)
+ , allocatorPool(0)
+ , remainingMatchCount(matchLimit)
+ {
+ }
+
+private:
+ BytecodePattern *pattern;
+ int* output;
+ InputStream input;
+ BumpPointerPool* allocatorPool;
+ unsigned remainingMatchCount;
+};
+
+
+
+class ByteCompiler {
+ struct ParenthesesStackEntry {
+ unsigned beginTerm;
+ unsigned savedAlternativeIndex;
+ ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
+ : beginTerm(beginTerm)
+ , savedAlternativeIndex(savedAlternativeIndex)
+ {
+ }
+ };
+
+public:
+ ByteCompiler(YarrPattern& pattern)
+ : m_pattern(pattern)
+ {
+ m_currentAlternativeIndex = 0;
+ }
+
+ PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
+ {
+ regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
+ emitDisjunction(m_pattern.m_body);
+ regexEnd();
+
+ return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
+ }
+
+ void checkInput(unsigned count)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
+ }
+
+ void assertionBOL(int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
+ }
+
+ void assertionEOL(int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
+ }
+
+ void assertionWordBoundary(bool invert, int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
+ }
+
+ void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ if (m_pattern.m_ignoreCase) {
+ UChar lo = Unicode::toLower(ch);
+ UChar hi = Unicode::toUpper(ch);
+
+ if (lo != hi) {
+ m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
+ return;
+ }
+ }
+
+ m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
+ }
+
+ void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
+
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ ASSERT(subpatternId);
+
+ m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
+
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
+ // then fix this up at the end! - simplifying this should make it much clearer.
+ // https://bugs.webkit.org/show_bug.cgi?id=50136
+
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = m_bodyDisjunction->terms.size();
+
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
+
+ bool invert = m_bodyDisjunction->terms[beginTerm].invert();
+ unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
+ m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+ }
+
+ unsigned popParenthesesStack()
+ {
+ ASSERT(m_parenthesesStack.size());
+ int stackEnd = m_parenthesesStack.size() - 1;
+ unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
+ m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
+ m_parenthesesStack.shrink(stackEnd);
+
+ ASSERT(beginTerm < m_bodyDisjunction->terms.size());
+ ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
+
+ return beginTerm;
+ }
+
+#ifndef NDEBUG
+ void dumpDisjunction(ByteDisjunction* disjunction)
+ {
+ printf("ByteDisjunction(%p):\n\t", disjunction);
+ for (unsigned i = 0; i < disjunction->terms.size(); ++i)
+ printf("{ %d } ", disjunction->terms[i].type);
+ printf("\n");
+ }
+#endif
+
+ void closeAlternative(int beginTerm)
+ {
+ int origBeginTerm = beginTerm;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
+ int endIndex = m_bodyDisjunction->terms.size();
+
+ unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
+
+ if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
+ m_bodyDisjunction->terms.remove(beginTerm);
+ else {
+ while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
+ m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
+ m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+ }
+
+ void closeBodyAlternative()
+ {
+ int beginTerm = 0;
+ int origBeginTerm = 0;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
+ int endIndex = m_bodyDisjunction->terms.size();
+
+ unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
+
+ while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
+ m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
+ m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = m_bodyDisjunction->terms.size();
+
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
+ ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
+
+ bool capture = parenthesesBegin.capture();
+ unsigned subpatternId = parenthesesBegin.atom.subpatternId;
+
+ unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
+ ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
+ for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
+ parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
+
+ m_bodyDisjunction->terms.shrink(beginTerm);
+
+ m_allParenthesesInfo.append(parenthesesDisjunction);
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
+
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = m_bodyDisjunction->terms.size();
+
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
+ bool capture = m_bodyDisjunction->terms[beginTerm].capture();
+ unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
+ m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+ }
+
+ void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = m_bodyDisjunction->terms.size();
+
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
+
+ bool capture = m_bodyDisjunction->terms[beginTerm].capture();
+ unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
+ m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+ }
+
+ void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
+ {
+ m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
+ m_bodyDisjunction->terms[0].frameLocation = 0;
+ m_currentAlternativeIndex = 0;
+ }
+
+ void regexEnd()
+ {
+ closeBodyAlternative();
+ }
+
+ void alternativeBodyDisjunction(bool onceThrough)
+ {
+ int newAlternativeIndex = m_bodyDisjunction->terms.size();
+ m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
+
+ m_currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void alternativeDisjunction()
+ {
+ int newAlternativeIndex = m_bodyDisjunction->terms.size();
+ m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
+
+ m_currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0, bool isParentheticalAssertion = false)
+ {
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
+
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+
+ if (alt) {
+ if (disjunction == m_pattern.m_body)
+ alternativeBodyDisjunction(alternative->onceThrough());
+ else
+ alternativeDisjunction();
+ }
+
+ unsigned minimumSize = alternative->m_minimumSize;
+ int countToCheck;
+
+ if (isParentheticalAssertion && parenthesesInputCountAlreadyChecked > minimumSize)
+ countToCheck = 0;
+ else
+ countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
+
+ ASSERT(countToCheck >= 0);
+ if (countToCheck) {
+ checkInput(countToCheck);
+ currentCountAlreadyChecked += countToCheck;
+ }
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ assertionBOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ assertionEOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeBackReference:
+ atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern: {
+ unsigned disjunctionAlreadyCheckedCount = 0;
+ if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+ unsigned alternativeFrameLocation = term.frameLocation;
+ // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
+ if (term.quantityType == QuantifierFixedCount)
+ disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
+ else
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
+ atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
+ } else if (term.parentheses.isTerminal) {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
+ atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
+ } else {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ }
+ break;
+ }
+
+ case PatternTerm::TypeParentheticalAssertion: {
+ unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition);
+ int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
+
+ atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
+ atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+private:
+ YarrPattern& m_pattern;
+ OwnPtr<ByteDisjunction> m_bodyDisjunction;
+ unsigned m_currentAlternativeIndex;
+ Vector<ParenthesesStackEntry> m_parenthesesStack;
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+};
+
+PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
+{
+ return ByteCompiler(pattern).compile(allocator);
+}
+
+int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
+{
+ return Interpreter(bytecode, output, input, start, length).interpret();
+}
+
+
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+
+
+} }
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h
new file mode 100644
index 0000000..6ffe9b7
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef YarrInterpreter_h
+#define YarrInterpreter_h
+
+#include "YarrParser.h"
+#include "YarrPattern.h"
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WTF {
+class BumpPointerAllocator;
+}
+using WTF::BumpPointerAllocator;
+
+namespace JSC { namespace Yarr {
+
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+ JSRegExpMatch = 1,
+ JSRegExpNoMatch = 0,
+ JSRegExpErrorNoMatch = -1,
+ JSRegExpErrorHitLimit = -2,
+ JSRegExpErrorNoMemory = -3,
+ JSRegExpErrorInternal = -4
+};
+
+class ByteDisjunction;
+
+struct ByteTerm {
+ enum Type {
+ TypeBodyAlternativeBegin,
+ TypeBodyAlternativeDisjunction,
+ TypeBodyAlternativeEnd,
+ TypeAlternativeBegin,
+ TypeAlternativeDisjunction,
+ TypeAlternativeEnd,
+ TypeSubpatternBegin,
+ TypeSubpatternEnd,
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacterOnce,
+ TypePatternCharacterFixed,
+ TypePatternCharacterGreedy,
+ TypePatternCharacterNonGreedy,
+ TypePatternCasedCharacterOnce,
+ TypePatternCasedCharacterFixed,
+ TypePatternCasedCharacterGreedy,
+ TypePatternCasedCharacterNonGreedy,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeParenthesesSubpattern,
+ TypeParenthesesSubpatternOnceBegin,
+ TypeParenthesesSubpatternOnceEnd,
+ TypeParenthesesSubpatternTerminalBegin,
+ TypeParenthesesSubpatternTerminalEnd,
+ TypeParentheticalAssertionBegin,
+ TypeParentheticalAssertionEnd,
+ TypeCheckInput,
+ } type;
+ union {
+ struct {
+ union {
+ UChar patternCharacter;
+ struct {
+ UChar lo;
+ UChar hi;
+ } casedCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ };
+ union {
+ ByteDisjunction* parenthesesDisjunction;
+ unsigned parenthesesWidth;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ } atom;
+ struct {
+ int next;
+ int end;
+ bool onceThrough;
+ } alternative;
+ unsigned checkInputCount;
+ };
+ unsigned frameLocation;
+ bool m_capture : 1;
+ bool m_invert : 1;
+ int inputPosition;
+
+ ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ , m_capture(false)
+ , m_invert(false)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCharacterNonGreedy;
+ break;
+ }
+
+ atom.patternCharacter = ch;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ , m_capture(false)
+ , m_invert(false)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCasedCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCasedCharacterNonGreedy;
+ break;
+ }
+
+ atom.casedCharacter.lo = lo;
+ atom.casedCharacter.hi = hi;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
+ : type(ByteTerm::TypeCharacterClass)
+ , m_capture(false)
+ , m_invert(invert)
+ {
+ atom.characterClass = characterClass;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
+ : type(type)
+ , m_capture(capture)
+ , m_invert(false)
+ {
+ atom.subpatternId = subpatternId;
+ atom.parenthesesDisjunction = parenthesesInfo;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, bool invert = false)
+ : type(type)
+ , m_capture(false)
+ , m_invert(invert)
+ {
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
+ : type(type)
+ , m_capture(capture)
+ , m_invert(invert)
+ {
+ atom.subpatternId = subpatternId;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ static ByteTerm BOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionBOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm CheckInput(unsigned count)
+ {
+ ByteTerm term(TypeCheckInput);
+ term.checkInputCount = count;
+ return term;
+ }
+
+ static ByteTerm EOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionEOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm WordBoundary(bool invert, int inputPos)
+ {
+ ByteTerm term(TypeAssertionWordBoundary, invert);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm BackReference(unsigned subpatternId, int inputPos)
+ {
+ return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
+ }
+
+ static ByteTerm BodyAlternativeBegin(bool onceThrough)
+ {
+ ByteTerm term(TypeBodyAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = onceThrough;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
+ {
+ ByteTerm term(TypeBodyAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = onceThrough;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeEnd()
+ {
+ ByteTerm term(TypeBodyAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = false;
+ return term;
+ }
+
+ static ByteTerm AlternativeBegin()
+ {
+ ByteTerm term(TypeAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = false;
+ return term;
+ }
+
+ static ByteTerm AlternativeDisjunction()
+ {
+ ByteTerm term(TypeAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = false;
+ return term;
+ }
+
+ static ByteTerm AlternativeEnd()
+ {
+ ByteTerm term(TypeAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ term.alternative.onceThrough = false;
+ return term;
+ }
+
+ static ByteTerm SubpatternBegin()
+ {
+ return ByteTerm(TypeSubpatternBegin);
+ }
+
+ static ByteTerm SubpatternEnd()
+ {
+ return ByteTerm(TypeSubpatternEnd);
+ }
+
+ bool invert()
+ {
+ return m_invert;
+ }
+
+ bool capture()
+ {
+ return m_capture;
+ }
+};
+
+class ByteDisjunction : public FastAllocBase {
+public:
+ ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
+ : m_numSubpatterns(numSubpatterns)
+ , m_frameSize(frameSize)
+ {
+ }
+
+ Vector<ByteTerm> terms;
+ unsigned m_numSubpatterns;
+ unsigned m_frameSize;
+};
+
+struct BytecodePattern : FastAllocBase {
+ BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
+ : m_body(body)
+ , m_ignoreCase(pattern.m_ignoreCase)
+ , m_multiline(pattern.m_multiline)
+ , m_containsBeginChars(pattern.m_containsBeginChars)
+ , m_allocator(allocator)
+ {
+ newlineCharacterClass = pattern.newlineCharacterClass();
+ wordcharCharacterClass = pattern.wordcharCharacterClass();
+
+ m_allParenthesesInfo.append(allParenthesesInfo);
+ m_userCharacterClasses.append(pattern.m_userCharacterClasses);
+ // 'Steal' the YarrPattern's CharacterClasses! We clear its
+ // array, so that it won't delete them on destruction. We'll
+ // take responsibility for that.
+ pattern.m_userCharacterClasses.clear();
+
+ m_beginChars.append(pattern.m_beginChars);
+ }
+
+ ~BytecodePattern()
+ {
+ deleteAllValues(m_allParenthesesInfo);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ OwnPtr<ByteDisjunction> m_body;
+ bool m_ignoreCase;
+ bool m_multiline;
+ bool m_containsBeginChars;
+ // Each BytecodePattern is associated with a RegExp, each RegExp is associated
+ // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
+ BumpPointerAllocator* m_allocator;
+
+ CharacterClass* newlineCharacterClass;
+ CharacterClass* wordcharCharacterClass;
+
+ Vector<BeginChar> m_beginChars;
+
+private:
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+ Vector<CharacterClass*> m_userCharacterClasses;
+};
+
+PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
+int interpret(BytecodePattern*, const UChar* input, unsigned start, unsigned length, int* output);
+
+} } // namespace JSC::Yarr
+
+#endif // YarrInterpreter_h
diff --git a/Source/JavaScriptCore/yarr/YarrJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp
new file mode 100644
index 0000000..545fcf5
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp
@@ -0,0 +1,2213 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "YarrJIT.h"
+
+#include "ASCIICType.h"
+#include "JSGlobalData.h"
+#include "LinkBuffer.h"
+#include "MacroAssembler.h"
+#include "YarrParser.h"
+
+#if ENABLE(YARR_JIT)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class YarrGenerator : private MacroAssembler {
+ friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+
+#if CPU(ARM)
+ static const RegisterID input = ARMRegisters::r0;
+ static const RegisterID index = ARMRegisters::r1;
+ static const RegisterID length = ARMRegisters::r2;
+ static const RegisterID output = ARMRegisters::r4;
+
+ static const RegisterID regT0 = ARMRegisters::r5;
+ static const RegisterID regT1 = ARMRegisters::r6;
+
+ static const RegisterID returnRegister = ARMRegisters::r0;
+#elif CPU(MIPS)
+ static const RegisterID input = MIPSRegisters::a0;
+ static const RegisterID index = MIPSRegisters::a1;
+ static const RegisterID length = MIPSRegisters::a2;
+ static const RegisterID output = MIPSRegisters::a3;
+
+ static const RegisterID regT0 = MIPSRegisters::t4;
+ static const RegisterID regT1 = MIPSRegisters::t5;
+
+ static const RegisterID returnRegister = MIPSRegisters::v0;
+#elif CPU(X86)
+ static const RegisterID input = X86Registers::eax;
+ static const RegisterID index = X86Registers::edx;
+ static const RegisterID length = X86Registers::ecx;
+ static const RegisterID output = X86Registers::edi;
+
+ static const RegisterID regT0 = X86Registers::ebx;
+ static const RegisterID regT1 = X86Registers::esi;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+#elif CPU(X86_64)
+ static const RegisterID input = X86Registers::edi;
+ static const RegisterID index = X86Registers::esi;
+ static const RegisterID length = X86Registers::edx;
+ static const RegisterID output = X86Registers::ecx;
+
+ static const RegisterID regT0 = X86Registers::eax;
+ static const RegisterID regT1 = X86Registers::ebx;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+#endif
+
+ void optimizeAlternative(PatternAlternative* alternative)
+ {
+ if (!alternative->m_terms.size())
+ return;
+
+ for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+ PatternTerm& nextTerm = alternative->m_terms[i + 1];
+
+ if ((term.type == PatternTerm::TypeCharacterClass)
+ && (term.quantityType == QuantifierFixedCount)
+ && (nextTerm.type == PatternTerm::TypePatternCharacter)
+ && (nextTerm.quantityType == QuantifierFixedCount)) {
+ PatternTerm termCopy = term;
+ alternative->m_terms[i] = nextTerm;
+ alternative->m_terms[i + 1] = termCopy;
+ }
+ }
+ }
+
+ void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
+ {
+ do {
+ // pick which range we're going to generate
+ int which = count >> 1;
+ char lo = ranges[which].begin;
+ char hi = ranges[which].end;
+
+ // check if there are any ranges or matches below lo. If not, just jl to failure -
+ // if there is anything else to check, check that first, if it falls through jmp to failure.
+ if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ // generate code for all ranges before this one
+ if (which)
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
+ ++*matchIndex;
+ }
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else if (which) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else
+ failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
+ ++*matchIndex;
+
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
+ // fall through to here, the value is above hi.
+
+ // shuffle along & loop around if there are any more matches to handle.
+ unsigned next = which + 1;
+ ranges += next;
+ count -= next;
+ } while (count);
+ }
+
+ void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
+ {
+ if (charClass->m_table) {
+ ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
+ matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
+ return;
+ }
+ Jump unicodeFail;
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
+ Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
+
+ if (charClass->m_matchesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
+ UChar ch = charClass->m_matchesUnicode[i];
+ matchDest.append(branch32(Equal, character, Imm32(ch)));
+ }
+ }
+
+ if (charClass->m_rangesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
+ UChar lo = charClass->m_rangesUnicode[i].begin;
+ UChar hi = charClass->m_rangesUnicode[i].end;
+
+ Jump below = branch32(LessThan, character, Imm32(lo));
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
+ below.link(this);
+ }
+ }
+
+ unicodeFail = jump();
+ isAscii.link(this);
+ }
+
+ if (charClass->m_ranges.size()) {
+ unsigned matchIndex = 0;
+ JumpList failures;
+ matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
+ while (matchIndex < charClass->m_matches.size())
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
+
+ failures.link(this);
+ } else if (charClass->m_matches.size()) {
+ // optimization: gather 'a','A' etc back together, can mask & test once.
+ Vector<char> matchesAZaz;
+
+ for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
+ char ch = charClass->m_matches[i];
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIILower(ch)) {
+ matchesAZaz.append(ch);
+ continue;
+ }
+ if (isASCIIUpper(ch))
+ continue;
+ }
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
+ }
+
+ if (unsigned countAZaz = matchesAZaz.size()) {
+ or32(Imm32(32), character);
+ for (unsigned i = 0; i < countAZaz; ++i)
+ matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
+ }
+ }
+
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
+ unicodeFail.link(this);
+ }
+
+ // Jumps if input not available; will have (incorrectly) incremented already!
+ Jump jumpIfNoAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(Above, index, length);
+ }
+
+ Jump jumpIfAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump checkInput()
+ {
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump atEndOfInput()
+ {
+ return branch32(Equal, index, length);
+ }
+
+ Jump notAtEndOfInput()
+ {
+ return branch32(NotEqual, index, length);
+ }
+
+ Jump jumpIfCharEquals(UChar ch, int inputPosition)
+ {
+ return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
+ {
+ return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ void readCharacter(int inputPosition, RegisterID reg)
+ {
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ }
+
+ void storeToFrame(RegisterID reg, unsigned frameLocation)
+ {
+ poke(reg, frameLocation);
+ }
+
+ void storeToFrame(Imm32 imm, unsigned frameLocation)
+ {
+ poke(imm, frameLocation);
+ }
+
+ DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
+ {
+ return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ void loadFromFrame(unsigned frameLocation, RegisterID reg)
+ {
+ peek(reg, frameLocation);
+ }
+
+ void loadFromFrameAndJump(unsigned frameLocation)
+ {
+ jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ struct IndirectJumpEntry {
+ IndirectJumpEntry(int32_t stackOffset)
+ : m_stackOffset(stackOffset)
+ {
+ }
+
+ IndirectJumpEntry(int32_t stackOffset, Jump jump)
+ : m_stackOffset(stackOffset)
+ {
+ addJump(jump);
+ }
+
+ void addJump(Jump jump)
+ {
+ m_relJumps.append(jump);
+ }
+
+ int32_t m_stackOffset;
+ JumpList m_relJumps;
+ };
+
+ struct AlternativeBacktrackRecord {
+ DataLabelPtr dataLabel;
+ Label backtrackLocation;
+
+ AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
+ : dataLabel(dataLabel)
+ , backtrackLocation(backtrackLocation)
+ {
+ }
+ };
+
+ struct ParenthesesTail;
+ struct TermGenerationState;
+
+ struct GenerationState {
+ typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap;
+
+ GenerationState()
+ : m_parenNestingLevel(0)
+ {
+ }
+
+ void addIndirectJumpEntry(int32_t stackOffset, Jump jump)
+ {
+ IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset);
+
+ ASSERT(stackOffset >= 0);
+
+ uint32_t offset = static_cast<uint32_t>(stackOffset);
+
+ if (result == m_indirectJumpMap.end())
+ m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump));
+ else
+ result->second->addJump(jump);
+ }
+
+ void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps)
+ {
+ JumpList::JumpVector jumpVector = jumps.jumps();
+ size_t size = jumpVector.size();
+ for (size_t i = 0; i < size; ++i)
+ addIndirectJumpEntry(stackOffset, jumpVector[i]);
+
+ jumps.empty();
+ }
+
+ void emitIndirectJumpTable(MacroAssembler* masm)
+ {
+ for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) {
+ IndirectJumpEntry* indJumpEntry = iter->second;
+ indJumpEntry->m_relJumps.link(masm);
+ masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset));
+ delete indJumpEntry;
+ }
+ }
+
+ void incrementParenNestingLevel()
+ {
+ ++m_parenNestingLevel;
+ }
+
+ void decrementParenNestingLevel()
+ {
+ --m_parenNestingLevel;
+ }
+
+ ParenthesesTail* addParenthesesTail(PatternTerm& term, ParenthesesTail* nextOuterParenTail)
+ {
+ ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, nextOuterParenTail);
+ m_parenTails.append(parenthesesTail);
+ m_parenTailsForIteration.append(parenthesesTail);
+
+ return parenthesesTail;
+ }
+
+ void emitParenthesesTail(YarrGenerator* generator)
+ {
+ unsigned vectorSize = m_parenTails.size();
+ bool priorBacktrackFallThrough = false;
+
+ // Emit in reverse order so parentTail N can fall through to N-1
+ for (unsigned index = vectorSize; index > 0; --index) {
+ JumpList jumpsToNext;
+ priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1);
+ if (index > 1)
+ jumpsToNext.linkTo(generator->label(), generator);
+ else
+ addJumpsToNextInteration(jumpsToNext);
+ }
+ m_parenTails.clear();
+ }
+
+ void addJumpToNextInteration(Jump jump)
+ {
+ m_jumpsToNextInteration.append(jump);
+ }
+
+ void addJumpsToNextInteration(JumpList jumps)
+ {
+ m_jumpsToNextInteration.append(jumps);
+ }
+
+ void addDataLabelToNextIteration(DataLabelPtr dataLabel)
+ {
+ m_dataPtrsToNextIteration.append(dataLabel);
+ }
+
+ void linkToNextIteration(Label label)
+ {
+ m_nextIteration = label;
+
+ for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i)
+ m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration));
+
+ m_dataPtrsToNextIteration.clear();
+
+ for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i)
+ m_parenTailsForIteration[i]->setNextIteration(m_nextIteration);
+
+ m_parenTailsForIteration.clear();
+ }
+
+ void linkToNextIteration(YarrGenerator* generator)
+ {
+ m_jumpsToNextInteration.linkTo(m_nextIteration, generator);
+ }
+
+ int m_parenNestingLevel;
+ Vector<AlternativeBacktrackRecord> m_backtrackRecords;
+ IndirectJumpHashMap m_indirectJumpMap;
+ Label m_nextIteration;
+ Vector<OwnPtr<ParenthesesTail> > m_parenTails;
+ JumpList m_jumpsToNextInteration;
+ Vector<DataLabelPtr> m_dataPtrsToNextIteration;
+ Vector<ParenthesesTail*> m_parenTailsForIteration;
+ };
+
+ struct BacktrackDestination {
+ typedef enum {
+ NoBacktrack,
+ BacktrackLabel,
+ BacktrackStackOffset,
+ BacktrackJumpList,
+ BacktrackLinked
+ } BacktrackType;
+
+ BacktrackDestination()
+ : m_backtrackType(NoBacktrack)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ BacktrackDestination(int32_t stackOffset)
+ : m_backtrackType(BacktrackStackOffset)
+ , m_backtrackStackOffset(stackOffset)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ BacktrackDestination(Label label)
+ : m_backtrackType(BacktrackLabel)
+ , m_backtrackLabel(label)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ void clear(bool doDataLabelClear = true)
+ {
+ m_backtrackType = NoBacktrack;
+ if (doDataLabelClear)
+ clearDataLabel();
+ m_nextBacktrack = 0;
+ }
+
+ void clearDataLabel()
+ {
+ m_dataLabelPtr = DataLabelPtr();
+ }
+
+ bool hasDestination()
+ {
+ return (m_backtrackType != NoBacktrack);
+ }
+
+ bool isStackOffset()
+ {
+ return (m_backtrackType == BacktrackStackOffset);
+ }
+
+ bool isLabel()
+ {
+ return (m_backtrackType == BacktrackLabel);
+ }
+
+ bool isJumpList()
+ {
+ return (m_backtrackType == BacktrackJumpList);
+ }
+
+ bool hasDataLabel()
+ {
+ return m_dataLabelPtr.isSet();
+ }
+
+ void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true)
+ {
+ m_backtrackType = rhs.m_backtrackType;
+ if (m_backtrackType == BacktrackStackOffset)
+ m_backtrackStackOffset = rhs.m_backtrackStackOffset;
+ else if (m_backtrackType == BacktrackLabel)
+ m_backtrackLabel = rhs.m_backtrackLabel;
+ if (copyDataLabel)
+ m_dataLabelPtr = rhs.m_dataLabelPtr;
+ m_backtrackSourceJumps = rhs.m_backtrackSourceJumps;
+ m_backtrackSourceLabel = rhs.m_backtrackSourceLabel;
+ }
+
+ void copyTo(BacktrackDestination& lhs)
+ {
+ lhs.m_backtrackType = m_backtrackType;
+ if (m_backtrackType == BacktrackStackOffset)
+ lhs.m_backtrackStackOffset = m_backtrackStackOffset;
+ else if (m_backtrackType == BacktrackLabel)
+ lhs.m_backtrackLabel = m_backtrackLabel;
+ lhs.m_backtrackSourceJumps = m_backtrackSourceJumps;
+ lhs.m_backtrackSourceLabel = m_backtrackSourceLabel;
+ lhs.m_dataLabelPtr = m_dataLabelPtr;
+ lhs.m_backTrackJumps = m_backTrackJumps;
+ }
+
+ void addBacktrackJump(Jump jump)
+ {
+ m_backTrackJumps.append(jump);
+ }
+
+ void setStackOffset(int32_t stackOffset)
+ {
+ m_backtrackType = BacktrackStackOffset;
+ m_backtrackStackOffset = stackOffset;
+ }
+
+ void setLabel(Label label)
+ {
+ m_backtrackType = BacktrackLabel;
+ m_backtrackLabel = label;
+ }
+
+ void setNextBacktrackLabel(Label label)
+ {
+ if (m_nextBacktrack)
+ m_nextBacktrack->setLabel(label);
+ }
+
+ void copyBacktrackToLabel(BacktrackDestination& rhs)
+ {
+ if (rhs.m_backtrackToLabel)
+ m_backtrackToLabel = rhs.m_backtrackToLabel;
+ }
+
+ void setBacktrackToLabel(Label* backtrackToLabel)
+ {
+ if (!m_backtrackToLabel)
+ m_backtrackToLabel = backtrackToLabel;
+ }
+
+ void setBacktrackJumpList(JumpList* jumpList)
+ {
+ m_backtrackType = BacktrackJumpList;
+ m_backtrackSourceJumps = jumpList;
+ }
+
+ void setBacktrackSourceLabel(Label* backtrackSourceLabel)
+ {
+ m_backtrackSourceLabel = backtrackSourceLabel;
+ }
+
+ void setDataLabel(DataLabelPtr dp)
+ {
+ if (m_subDataLabelPtr) {
+ *m_subDataLabelPtr = dp;
+ m_subDataLabelPtr = 0;
+ } else
+ m_dataLabelPtr = dp;
+ }
+
+ void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr)
+ {
+ m_subDataLabelPtr = subDataLabelPtr;
+ }
+
+ void linkToNextBacktrack(BacktrackDestination* nextBacktrack)
+ {
+ m_nextBacktrack = nextBacktrack;
+ }
+
+ int32_t getStackOffset()
+ {
+ ASSERT(m_backtrackType == BacktrackStackOffset);
+ return m_backtrackStackOffset;
+ }
+
+ Label getLabel()
+ {
+ ASSERT(m_backtrackType == BacktrackLabel);
+ return m_backtrackLabel;
+ }
+
+ JumpList& getBacktrackJumps()
+ {
+ return m_backTrackJumps;
+ }
+
+ DataLabelPtr& getDataLabel()
+ {
+ return m_dataLabelPtr;
+ }
+
+ void jumpToBacktrack(MacroAssembler* masm)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ masm->jump().linkTo(*m_backtrackSourceLabel, masm);
+ else
+ m_backtrackSourceJumps->append(masm->jump());
+ } else if (isStackOffset())
+ masm->jump(Address(stackPointerRegister, m_backtrackStackOffset));
+ else if (isLabel())
+ masm->jump().linkTo(m_backtrackLabel, masm);
+ else
+ m_backTrackJumps.append(masm->jump());
+ }
+
+ void jumpToBacktrack(YarrGenerator* generator, Jump jump)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ jump.linkTo(*m_backtrackSourceLabel, generator);
+ else
+ m_backtrackSourceJumps->append(jump);
+ } else if (isStackOffset())
+ generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump);
+ else if (isLabel())
+ jump.linkTo(getLabel(), generator);
+ else
+ m_backTrackJumps.append(jump);
+ }
+
+ void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ jumps.linkTo(*m_backtrackSourceLabel, generator);
+ else
+ m_backtrackSourceJumps->append(jumps);
+ } else if (isStackOffset())
+ generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps);
+ else if (isLabel())
+ jumps.linkTo(getLabel(), generator);
+ else
+ m_backTrackJumps.append(jumps);
+ }
+
+ bool linkDataLabelToHereIfExists(YarrGenerator* generator)
+ {
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), generator->label()));
+ clearDataLabel();
+ return true;
+ }
+
+ return false;
+ }
+
+ bool plantJumpToBacktrackIfExists(YarrGenerator* generator)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ generator->jump(*m_backtrackSourceLabel);
+ else
+ m_backtrackSourceJumps->append(generator->jump());
+
+ return true;
+ }
+
+ if (isStackOffset()) {
+ generator->jump(Address(stackPointerRegister, getStackOffset()));
+ return true;
+ }
+
+ if (isLabel()) {
+ generator->jump(getLabel());
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel()));
+ clearDataLabel();
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false)
+ {
+ Label hereLabel = generator->label();
+
+ if (m_backtrackToLabel) {
+ *m_backtrackToLabel = hereLabel;
+ m_backtrackToLabel = 0;
+ }
+
+ m_backTrackJumps.link(generator);
+
+ if (nextIteration)
+ generator->m_expressionState.linkToNextIteration(hereLabel);
+
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel));
+ // data label cleared as a result of the clear() below
+ }
+
+ clear();
+ }
+
+ void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false)
+ {
+ m_backTrackJumps.linkTo(label, generator);
+
+ if (nextIteration)
+ generator->m_expressionState.linkToNextIteration(label);
+
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label));
+ clearDataLabel();
+ }
+ }
+
+ private:
+ BacktrackType m_backtrackType;
+ int32_t m_backtrackStackOffset;
+ Label m_backtrackLabel;
+ DataLabelPtr m_dataLabelPtr;
+ Label* m_backtrackToLabel;
+ DataLabelPtr* m_subDataLabelPtr;
+ BacktrackDestination* m_nextBacktrack;
+ Label* m_backtrackSourceLabel;
+ JumpList* m_backtrackSourceJumps;
+ JumpList m_backTrackJumps;
+ };
+
+ struct TermGenerationState {
+ TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
+ : disjunction(disjunction)
+ , checkedTotal(checkedTotal)
+ , m_subParenNum(0)
+ , m_linkedBacktrack(0)
+ , m_parenthesesTail(0)
+ {
+ }
+
+ void resetAlternative()
+ {
+ m_backtrack.clear();
+ alt = 0;
+ }
+ bool alternativeValid()
+ {
+ return alt < disjunction->m_alternatives.size();
+ }
+ void nextAlternative()
+ {
+ ++alt;
+ }
+ PatternAlternative* alternative()
+ {
+ return disjunction->m_alternatives[alt];
+ }
+ bool isLastAlternative()
+ {
+ return (alt + 1) == disjunction->m_alternatives.size();
+ }
+
+ void resetTerm()
+ {
+ ASSERT(alternativeValid());
+ t = 0;
+ m_subParenNum = 0;
+ }
+ bool termValid()
+ {
+ ASSERT(alternativeValid());
+ return t < alternative()->m_terms.size();
+ }
+ void nextTerm()
+ {
+ ASSERT(alternativeValid());
+ ++t;
+ }
+ PatternTerm& term()
+ {
+ ASSERT(alternativeValid());
+ return alternative()->m_terms[t];
+ }
+ bool isLastTerm()
+ {
+ ASSERT(alternativeValid());
+ return (t + 1) == alternative()->m_terms.size();
+ }
+ unsigned getSubParenNum()
+ {
+ return m_subParenNum++;
+ }
+ bool isMainDisjunction()
+ {
+ return !disjunction->m_parent;
+ }
+
+ void setParenthesesTail(ParenthesesTail* parenthesesTail)
+ {
+ m_parenthesesTail = parenthesesTail;
+ }
+
+ ParenthesesTail* getParenthesesTail()
+ {
+ return m_parenthesesTail;
+ }
+
+ PatternTerm& lookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ ASSERT((t + 1) < alternative()->m_terms.size());
+ return alternative()->m_terms[t + 1];
+ }
+ bool isSinglePatternCharacterLookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ return ((t + 1) < alternative()->m_terms.size())
+ && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
+ && (lookaheadTerm().quantityType == QuantifierFixedCount)
+ && (lookaheadTerm().quantityCount == 1);
+ }
+
+ int inputOffset()
+ {
+ return term().inputPosition - checkedTotal;
+ }
+
+ void clearBacktrack()
+ {
+ m_backtrack.clear(false);
+ m_linkedBacktrack = 0;
+ }
+
+ void jumpToBacktrack(MacroAssembler* masm)
+ {
+ m_backtrack.jumpToBacktrack(masm);
+ }
+
+ void jumpToBacktrack(YarrGenerator* generator, Jump jump)
+ {
+ m_backtrack.jumpToBacktrack(generator, jump);
+ }
+
+ void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps)
+ {
+ m_backtrack.jumpToBacktrack(generator, jumps);
+ }
+
+ bool plantJumpToBacktrackIfExists(YarrGenerator* generator)
+ {
+ return m_backtrack.plantJumpToBacktrackIfExists(generator);
+ }
+
+ bool linkDataLabelToBacktrackIfExists(YarrGenerator* generator)
+ {
+ if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel()));
+ m_backtrack.clearDataLabel();
+ return true;
+ }
+
+ return false;
+ }
+
+ void addBacktrackJump(Jump jump)
+ {
+ m_backtrack.addBacktrackJump(jump);
+ }
+
+ void setBacktrackDataLabel(DataLabelPtr dp)
+ {
+ m_backtrack.setDataLabel(dp);
+ }
+
+ void setBackTrackStackOffset(int32_t stackOffset)
+ {
+ m_backtrack.setStackOffset(stackOffset);
+ }
+
+ void setBacktrackLabel(Label label)
+ {
+ m_backtrack.setLabel(label);
+ }
+
+ void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false)
+ {
+ m_backtrack.linkAlternativeBacktracks(generator, nextIteration);
+ m_linkedBacktrack = 0;
+ }
+
+ void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false)
+ {
+ m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration);
+ }
+
+ void setBacktrackLink(BacktrackDestination* linkedBacktrack)
+ {
+ m_linkedBacktrack = linkedBacktrack;
+ }
+
+ void chainBacktracks(BacktrackDestination* followonBacktrack)
+ {
+ if (m_linkedBacktrack)
+ m_linkedBacktrack->linkToNextBacktrack(followonBacktrack);
+ }
+
+ void chainBacktrackJumps(JumpList* jumpList)
+ {
+ if (m_linkedBacktrack && !(m_linkedBacktrack->hasDestination()))
+ m_linkedBacktrack->setBacktrackJumpList(jumpList);
+ }
+
+ BacktrackDestination& getBacktrackDestination()
+ {
+ return m_backtrack;
+ }
+
+ void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true)
+ {
+ if (doJump)
+ m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps());
+ if (backtrack.hasDestination()) {
+ if (m_backtrack.hasDataLabel())
+ generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel());
+
+ m_backtrack.copyTarget(backtrack, doJump);
+ }
+ }
+
+ PatternDisjunction* disjunction;
+ int checkedTotal;
+ private:
+ unsigned alt;
+ unsigned t;
+ unsigned m_subParenNum;
+ BacktrackDestination m_backtrack;
+ BacktrackDestination* m_linkedBacktrack;
+ ParenthesesTail* m_parenthesesTail;
+
+ };
+
+ struct ParenthesesTail {
+ ParenthesesTail(PatternTerm& term, int nestingLevel, ParenthesesTail* nextOuterParenTail)
+ : m_term(term)
+ , m_nestingLevel(nestingLevel)
+ , m_subParenIndex(0)
+ , m_nextOuterParenTail(nextOuterParenTail)
+ {
+ }
+
+ void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough)
+ {
+ m_nonGreedyTryParentheses = nonGreedyTryParentheses;
+ m_fallThrough = fallThrough;
+
+ m_subParenIndex = state.getSubParenNum();
+ parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack);
+ state.chainBacktracks(&m_backtrack);
+ BacktrackDestination& stateBacktrack = state.getBacktrackDestination();
+ stateBacktrack.copyTo(m_backtrack);
+ stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel);
+ state.setBacktrackLink(&m_backtrack);
+ stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr);
+
+ m_doDirectBacktrack = m_parenBacktrack.hasDestination();
+
+ if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy))
+ m_doDirectBacktrack = false;
+
+ if (m_doDirectBacktrack)
+ state.propagateBacktrackingFrom(generator, m_parenBacktrack, false);
+ else {
+ stateBacktrack.setBacktrackJumpList(&m_pattBacktrackJumps);
+ stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens);
+ }
+
+ parenthesesState.chainBacktrackJumps(&m_pattBacktrackJumps);
+ }
+
+ void setNextIteration(Label nextIteration)
+ {
+ if (!m_nestingLevel && !m_backtrackToLabel.isSet())
+ m_backtrackToLabel = nextIteration;
+ }
+
+ void addAfterParenJump(Jump jump)
+ {
+ m_pattBacktrackJumps.append(jump);
+ }
+
+ bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough)
+ {
+ const RegisterID indexTemporary = regT0;
+ unsigned parenthesesFrameLocation = m_term.frameLocation;
+ Jump fromPriorBacktrack;
+ bool needJumpForPriorParenTail = false;
+
+ if (priorBackTrackFallThrough
+ && ((m_term.quantityType == QuantifierGreedy)
+ || (m_term.quantityType == QuantifierNonGreedy)
+ || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) {
+ // If the prior paren tail code assumed that it could fall through,
+ // but we need to generate after paren backtrack code, then provide
+ // a jump around that code for the prior paren tail code.
+ // A regular expressing like ((xxx)...)? needs this.
+ fromPriorBacktrack = generator->jump();
+ needJumpForPriorParenTail = true;
+ }
+
+ if (!m_backtrack.hasDestination()) {
+ if (m_backtrackToLabel.isSet()) {
+ m_backtrack.setLabel(m_backtrackToLabel);
+ nextBacktrackFallThrough = false;
+ } else if (!m_subParenIndex && m_nextOuterParenTail) {
+ // If we don't have a destination and we are the first term of a nested paren, go
+ // back to the outer paren.
+ // There is an optimization if the next outer paren is the next paren to be emitted.
+ // In that case we really want the else clause.
+ m_backtrack.setBacktrackJumpList(&m_nextOuterParenTail->m_withinBacktrackJumps);
+ nextBacktrackFallThrough = false;
+ } else
+ m_backtrack.setBacktrackJumpList(&jumpsToNext);
+ } else
+ nextBacktrackFallThrough = false;
+
+ // A failure AFTER the parens jumps here - Backtrack to this paren
+ m_backtrackFromAfterParens = generator->label();
+
+ if (m_dataAfterLabelPtr.isSet())
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens));
+
+ m_pattBacktrackJumps.link(generator);
+
+ if (m_term.quantityType == QuantifierGreedy) {
+ // If this is -1 we have now tested with both with and without the parens.
+ generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, Imm32(-1)));
+ } else if (m_term.quantityType == QuantifierNonGreedy) {
+ // If this is -1 we have now tested with both with and without the parens.
+ generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ generator->branch32(Equal, indexTemporary, Imm32(-1)).linkTo(m_nonGreedyTryParentheses, generator);
+ }
+
+ if (!m_doDirectBacktrack)
+ m_parenBacktrack.plantJumpToBacktrackIfExists(generator);
+
+ // A failure WITHIN the parens jumps here
+ if (needJumpForPriorParenTail)
+ fromPriorBacktrack.link(generator);
+ m_parenBacktrack.linkAlternativeBacktracks(generator);
+ m_withinBacktrackJumps.link(generator);
+
+ if (m_term.capture())
+ generator->store32(Imm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int)));
+
+ if (m_term.quantityType == QuantifierGreedy) {
+ generator->storeToFrame(Imm32(-1), parenthesesFrameLocation);
+ generator->jump().linkTo(m_fallThrough, generator);
+ nextBacktrackFallThrough = false;
+ } else if (!nextBacktrackFallThrough)
+ m_backtrack.jumpToBacktrack(generator);
+
+ if (!m_doDirectBacktrack)
+ m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens);
+
+ return nextBacktrackFallThrough;
+ }
+
+ PatternTerm& m_term;
+ int m_nestingLevel;
+ unsigned m_subParenIndex;
+ ParenthesesTail* m_nextOuterParenTail;
+ Label m_nonGreedyTryParentheses;
+ Label m_fallThrough;
+ Label m_backtrackToLabel;
+ Label m_backtrackFromAfterParens;
+ DataLabelPtr m_dataAfterLabelPtr;
+ JumpList m_pattBacktrackJumps;
+ JumpList m_withinBacktrackJumps;
+ BacktrackDestination m_parenBacktrack;
+ BacktrackDestination m_backtrack;
+ bool m_doDirectBacktrack;
+ };
+
+ void generateAssertionBOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (!term.inputPosition)
+ matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
+
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(this);
+
+ matchDest.link(this);
+ } else {
+ // Erk, really should poison out these alternatives early. :-/
+ if (term.inputPosition)
+ state.jumpToBacktrack(this);
+ else
+ state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal)));
+ }
+ }
+
+ void generateAssertionEOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (term.inputPosition == state.checkedTotal)
+ matchDest.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(this);
+
+ matchDest.link(this);
+ } else {
+ if (term.inputPosition == state.checkedTotal)
+ state.jumpToBacktrack(this, notAtEndOfInput());
+ // Erk, really should poison out these alternatives early. :-/
+ else
+ state.jumpToBacktrack(this);
+ }
+ }
+
+ // Also falls though on nextIsNotWordChar.
+ void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ if (term.inputPosition == state.checkedTotal)
+ nextIsNotWordChar.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
+ }
+
+ void generateAssertionWordBoundary(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ Jump atBegin;
+ JumpList matchDest;
+ if (!term.inputPosition)
+ atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
+ if (!term.inputPosition)
+ atBegin.link(this);
+
+ // We fall through to here if the last character was not a wordchar.
+ JumpList nonWordCharThenWordChar;
+ JumpList nonWordCharThenNonWordChar;
+ if (term.invert()) {
+ matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
+ nonWordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
+ nonWordCharThenNonWordChar.append(jump());
+ }
+ state.jumpToBacktrack(this, nonWordCharThenNonWordChar);
+
+ // We jump here if the last character was a wordchar.
+ matchDest.link(this);
+ JumpList wordCharThenWordChar;
+ JumpList wordCharThenNonWordChar;
+ if (term.invert()) {
+ matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
+ wordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
+ // This can fall-though!
+ }
+
+ state.jumpToBacktrack(this, wordCharThenWordChar);
+
+ nonWordCharThenWordChar.link(this);
+ wordCharThenNonWordChar.link(this);
+ }
+
+ void generatePatternCharacterSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch = state.term().patternCharacter;
+
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset()));
+ }
+ }
+
+ void generatePatternCharacterPair(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch1 = state.term().patternCharacter;
+ UChar ch2 = state.lookaheadTerm().patternCharacter;
+
+ int mask = 0;
+ int chPair = ch1 | (ch2 << 16);
+
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIIAlpha(ch1))
+ mask |= 32;
+ if (isASCIIAlpha(ch2))
+ mask |= 32 << 16;
+ }
+
+ if (mask) {
+ load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
+ or32(Imm32(mask), character);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask)));
+ } else
+ state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)));
+ }
+
+ void generatePatternCharacterFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)));
+ }
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generatePatternCharacterGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ if (term.quantityCount != quantifyInfinite) {
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+ } else
+ jump(loop);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackLabel(backtrackBegin);
+ }
+
+ void generatePatternCharacterNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ if (term.quantityCount != quantifyInfinite)
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackLabel(backtrackBegin);
+ }
+
+ void generateCharacterClassSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invert())
+ state.jumpToBacktrack(this, matchDest);
+ else {
+ state.jumpToBacktrack(this);
+ matchDest.link(this);
+ }
+ }
+
+ void generateCharacterClassFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ JumpList matchDest;
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invert())
+ state.jumpToBacktrack(this, matchDest);
+ else {
+ state.jumpToBacktrack(this);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generateCharacterClassGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+
+ if (term.invert()) {
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, failures, term.characterClass);
+ } else {
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+ failures.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ if (term.quantityCount != quantifyInfinite) {
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+ } else
+ jump(loop);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackLabel(backtrackBegin);
+ }
+
+ void generateCharacterClassNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invert())
+ matchDest.linkTo(hardFail, this);
+ else {
+ jump(hardFail);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackLabel(backtrackBegin);
+ }
+
+ void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
+ {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
+ ASSERT(parenthesesTerm.quantityCount == 1);
+
+ PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
+ unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
+
+ if (disjunction->m_alternatives.size() == 1) {
+ state.resetAlternative();
+ ASSERT(state.alternativeValid());
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
+
+ // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
+ // will be forced to always trampoline into here, just to decrement the index.
+ // Ick.
+ Jump skip = jump();
+
+ Label backtrackBegin(this);
+ sub32(Imm32(countToCheck), index);
+ state.addBacktrackJump(jump());
+
+ skip.link(this);
+
+ state.setBacktrackLabel(backtrackBegin);
+
+ state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck));
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ state.checkedTotal -= countToCheck;
+ } else {
+ JumpList successes;
+ bool propogateBacktrack = false;
+
+ for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
+
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ ASSERT(alternative->m_minimumSize >= preCheckedCount);
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // Matched an alternative.
+ DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
+
+ if (!state.isLastAlternative() || countToCheck)
+ successes.append(jump());
+
+ // Alternative did not match.
+
+ state.setBacktrackDataLabel(dataLabel);
+
+ // Do we have a backtrack destination?
+ // if so, link the data label to it.
+ state.linkDataLabelToBacktrackIfExists(this);
+
+ if (!state.isLastAlternative() || countToCheck)
+ state.linkAlternativeBacktracks(this);
+
+ if (countToCheck) {
+ sub32(Imm32(countToCheck), index);
+ state.checkedTotal -= countToCheck;
+ } else if (state.isLastAlternative())
+ propogateBacktrack = true;
+ }
+ // We fall through to here when the last alternative fails.
+ // Add a backtrack out of here for the parenthese handling code to link up.
+ if (!propogateBacktrack)
+ state.addBacktrackJump(jump());
+
+ // Save address on stack for the parens code to backtrack to, to retry the
+ // next alternative.
+ state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*));
+
+ successes.link(this);
+ }
+ }
+
+ void generateParenthesesSingle(TermGenerationState& state)
+ {
+ const RegisterID indexTemporary = regT0;
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+
+ unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0;
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term.quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+
+ // optimized case - no capture & no quantifier can be handled in a light-weight manner.
+ if (!term.capture() && (term.quantityType == QuantifierFixedCount)) {
+ m_expressionState.incrementParenNestingLevel();
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // this expects that any backtracks back out of the parentheses will be in the
+ // parenthesesState's m_backTrackJumps vector, and that if they need backtracking
+ // they will have set an entry point on the parenthesesState's m_backtrackLabel.
+ BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination();
+ state.propagateBacktrackingFrom(this, parenthesesBacktrack);
+ state.getBacktrackDestination().copyBacktrackToLabel(parenthesesBacktrack);
+
+ m_expressionState.decrementParenNestingLevel();
+ } else {
+ Jump nonGreedySkipParentheses;
+ Label nonGreedyTryParentheses;
+ if (term.quantityType == QuantifierGreedy)
+ storeToFrame(index, parenthesesFrameLocation);
+ else if (term.quantityType == QuantifierNonGreedy) {
+ storeToFrame(Imm32(-1), parenthesesFrameLocation);
+ nonGreedySkipParentheses = jump();
+ nonGreedyTryParentheses = label();
+ storeToFrame(index, parenthesesFrameLocation);
+ }
+
+ // store the match start index
+ if (term.capture()) {
+ int inputOffset = state.inputOffset() - preCheckedCount;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ }
+
+ ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getParenthesesTail());
+
+ m_expressionState.incrementParenNestingLevel();
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+
+ // Save the parenthesesTail for backtracking from nested parens to this one.
+ parenthesesState.setParenthesesTail(parenthesesTail);
+
+ // generate the body of the parentheses
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+
+ // For non-fixed counts, backtrack if we didn't match anything.
+ if (term.quantityType != QuantifierFixedCount)
+ parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))));
+
+ // store the match end index
+ if (term.capture()) {
+ int inputOffset = state.inputOffset();
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(state.inputOffset()), indexTemporary);
+ store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ }
+
+ m_expressionState.decrementParenNestingLevel();
+
+ parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label());
+
+ parenthesesState.getBacktrackDestination().clear();
+
+ if (term.quantityType == QuantifierNonGreedy)
+ nonGreedySkipParentheses.link(this);
+ }
+ }
+
+ void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
+ {
+ PatternTerm& parenthesesTerm = state.term();
+ PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
+ ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
+ ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+
+ Label matchAgain(this);
+
+ storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later.
+
+ for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
+
+ PatternAlternative* alternative = parenthesesState.alternative();
+ optimizeAlternative(alternative);
+
+ int countToCheck = alternative->m_minimumSize;
+ if (countToCheck) {
+ parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
+ parenthesesState.checkedTotal += countToCheck;
+ }
+
+ for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
+ generateTerm(parenthesesState);
+
+ // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet.
+ branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
+
+ // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack,
+ // or fall through to try the next alternative if no backtrack is available.
+ parenthesesState.plantJumpToBacktrackIfExists(this);
+
+ parenthesesState.linkAlternativeBacktracks(this);
+
+ // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
+
+ if (countToCheck) {
+ sub32(Imm32(countToCheck), index);
+ parenthesesState.checkedTotal -= countToCheck;
+ }
+ }
+
+ // If the last alternative falls through to here, we have a failed match...
+ // Which means that we match whatever we have matched up to this point (even if nothing).
+ }
+
+ void generateParentheticalAssertion(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+ ASSERT(term.quantityType == QuantifierFixedCount);
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
+
+ if (term.invert()) {
+ // Inverted case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Fail!
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(this);
+
+ // And fail means success.
+ parenthesesState.linkAlternativeBacktracks(this);
+
+ loadFromFrame(parenthesesFrameLocation, index);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ } else {
+ // Normal case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Success!
+ loadFromFrame(parenthesesFrameLocation, index);
+ Jump success = jump();
+
+ parenthesesState.linkAlternativeBacktracks(this);
+
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(this);
+
+ success.link(this);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ }
+ }
+
+ void generateTerm(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ generateAssertionBOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ generateAssertionEOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ generateAssertionWordBoundary(state);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1) {
+ if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
+ generatePatternCharacterPair(state);
+ state.nextTerm();
+ } else
+ generatePatternCharacterSingle(state);
+ } else
+ generatePatternCharacterFixed(state);
+ break;
+ case QuantifierGreedy:
+ generatePatternCharacterGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generatePatternCharacterNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1)
+ generateCharacterClassSingle(state);
+ else
+ generateCharacterClassFixed(state);
+ break;
+ case QuantifierGreedy:
+ generateCharacterClassGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generateCharacterClassNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeBackReference:
+ m_shouldFallBack = true;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ if (term.quantityCount == 1 && !term.parentheses.isCopy)
+ generateParenthesesSingle(state);
+ else if (term.parentheses.isTerminal)
+ generateParenthesesGreedyNoBacktrack(state);
+ else
+ m_shouldFallBack = true;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ generateParentheticalAssertion(state);
+ break;
+ }
+ }
+
+ void generateDisjunction(PatternDisjunction* disjunction)
+ {
+ TermGenerationState state(disjunction, 0);
+ state.resetAlternative();
+
+ // check availability for the next alternative
+ int countCheckedForCurrentAlternative = 0;
+ int countToCheckForFirstAlternative = 0;
+ bool hasShorterAlternatives = false;
+ bool setRepeatAlternativeLabels = false;
+ JumpList notEnoughInputForPreviousAlternative;
+ Label firstAlternative;
+ Label firstAlternativeInputChecked;
+
+ // The label 'firstAlternative' is used to plant a check to see if there is
+ // sufficient input available to run the first repeating alternative.
+ // The label 'firstAlternativeInputChecked' will jump directly to matching
+ // the first repeating alternative having skipped this check.
+
+ if (state.alternativeValid()) {
+ PatternAlternative* alternative = state.alternative();
+ if (!alternative->onceThrough()) {
+ firstAlternative = Label(this);
+ setRepeatAlternativeLabels = true;
+ }
+ countToCheckForFirstAlternative = alternative->m_minimumSize;
+ state.checkedTotal += countToCheckForFirstAlternative;
+ if (countToCheckForFirstAlternative)
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+ countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+ }
+
+ if (setRepeatAlternativeLabels)
+ firstAlternativeInputChecked = Label(this);
+
+ while (state.alternativeValid()) {
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ // Track whether any alternatives are shorter than the first one.
+ if (!alternative->onceThrough())
+ hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // If we get here, the alternative matched.
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ ASSERT(index != returnRegister);
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, returnRegister);
+ if (alternative->m_minimumSize)
+ sub32(Imm32(alternative->m_minimumSize), returnRegister);
+
+ store32(returnRegister, output);
+ } else
+ load32(Address(output), returnRegister);
+
+ store32(index, Address(output, 4));
+
+ generateReturn();
+
+ state.nextAlternative();
+ if (alternative->onceThrough() && state.alternativeValid())
+ state.clearBacktrack();
+
+ // if there are any more alternatives, plant the check for input before looping.
+ if (state.alternativeValid()) {
+ PatternAlternative* nextAlternative = state.alternative();
+ if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) {
+ // We have handled non-repeating alternatives, jump to next iteration
+ // and loop over repeating alternatives.
+ state.jumpToBacktrack(this);
+
+ countToCheckForFirstAlternative = nextAlternative->m_minimumSize;
+
+ // If we get here, there the last input checked failed.
+ notEnoughInputForPreviousAlternative.link(this);
+
+ state.linkAlternativeBacktracks(this);
+
+ // Back up to start the looping alternatives.
+ if (countCheckedForCurrentAlternative)
+ sub32(Imm32(countCheckedForCurrentAlternative), index);
+
+ firstAlternative = Label(this);
+
+ state.checkedTotal = countToCheckForFirstAlternative;
+ if (countToCheckForFirstAlternative)
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+
+ countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+
+ firstAlternativeInputChecked = Label(this);
+
+ setRepeatAlternativeLabels = true;
+ } else {
+ int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
+
+ if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
+ // If we get here, then the last input checked failed.
+ notEnoughInputForPreviousAlternative.link(this);
+
+ // Check if sufficent input available to run the next alternative
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ // We are now in the correct state to enter the next alternative; this add is only required
+ // to mirror and revert operation of the sub32, just below.
+ add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+
+ // If we get here, then the last input checked passed.
+ state.linkAlternativeBacktracks(this);
+
+ // No need to check if we can run the next alternative, since it is shorter -
+ // just update index.
+ sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+ } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
+ // If we get here, then the last input checked failed.
+ // If there is insufficient input to run the current alternative, and the next alternative is longer,
+ // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
+ // we had checked.
+ notEnoughInputForPreviousAlternative.link(this);
+ add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
+ notEnoughInputForPreviousAlternative.append(jump());
+
+ // The next alternative is longer than the current one; check the difference.
+ state.linkAlternativeBacktracks(this);
+
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ } else { // CASE 3: Both alternatives are the same length.
+ ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
+
+ // If the next alterative is the same length as this one, then no need to check the input -
+ // if there was sufficent input to run the current alternative then there is sufficient
+ // input to run the next one; if not, there isn't.
+ state.linkAlternativeBacktracks(this);
+ }
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+ countCheckedForCurrentAlternative = countToCheckForNextAlternative;
+ state.checkedTotal += countCheckedForCurrentAlternative;
+ }
+ }
+ }
+
+ // If we get here, all Alternatives failed...
+
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+
+ if (!setRepeatAlternativeLabels) {
+ // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link
+ // the match failures to this point, and fall through to the return below.
+ state.linkAlternativeBacktracks(this, true);
+
+ notEnoughInputForPreviousAlternative.link(this);
+ } else {
+ // How much more input need there be to be able to retry from the first alternative?
+ // examples:
+ // /yarr_jit/ or /wrec|pcre/
+ // In these examples we need check for one more input before looping.
+ // /yarr_jit|pcre/
+ // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
+ // being four longer than the last alternative checked, and another +1 to effectively move
+ // the start position along by one).
+ // /yarr|rules/ or /wrec|notsomuch/
+ // In these examples, provided that there was sufficient input to have just been matching for
+ // the second alternative we can loop without checking for available input (since the second
+ // alternative is longer than the first). In the latter example we need to decrement index
+ // (by 4) so the start position is only progressed by 1 from the last iteration.
+ int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
+
+ // First, deal with the cases where there was sufficient input to try the last alternative.
+ if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
+ state.linkAlternativeBacktracks(this, true);
+ else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
+ state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true);
+ else { // no need to check the input, but we do have some bookkeeping to do first.
+ state.linkAlternativeBacktracks(this, true);
+
+ // Where necessary update our preserved start position.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ store32(regT0, Address(output));
+ }
+
+ // Update index if necessary, and loop (without checking).
+ if (incrementForNextIter)
+ add32(Imm32(incrementForNextIter), index);
+ jump().linkTo(firstAlternativeInputChecked, this);
+ }
+
+ notEnoughInputForPreviousAlternative.link(this);
+ // Update our idea of the start position, if we're tracking this.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ if (countCheckedForCurrentAlternative - 1) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ store32(regT0, Address(output));
+ } else
+ store32(index, Address(output));
+ }
+
+ // Check if there is sufficent input to run the first alternative again.
+ jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
+ // No - insufficent input to run the first alteranative, are there any other alternatives we
+ // might need to check? If so, the last check will have left the index incremented by
+ // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
+ // LESS input is available, to have the effect of just progressing the start position by 1
+ // from the last iteration. If this check passes we can just jump up to the check associated
+ // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
+ // first alternative again, and this check will fail (otherwise the check planted just above
+ // here would have passed). This is a bit sad, however it saves trying to do something more
+ // complex here in compilation, and in the common case we should end up coallescing the checks.
+ //
+ // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
+ // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
+ // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
+ // is sufficient input to run either alternative (constantly failing). If there had been only
+ // one alternative, or if the shorter alternative had come first, we would have terminated
+ // immediately. :-/
+ if (hasShorterAlternatives)
+ jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
+ // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
+ // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
+ // but since we're about to return a failure this doesn't really matter!)
+ }
+
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ move(Imm32(-1), returnRegister);
+
+ generateReturn();
+
+ m_expressionState.emitParenthesesTail(this);
+ m_expressionState.emitIndirectJumpTable(this);
+ m_expressionState.linkToNextIteration(this);
+ }
+
+ void generateEnter()
+ {
+#if CPU(X86_64)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ push(X86Registers::ebx);
+#elif CPU(X86)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
+ push(X86Registers::ebx);
+ push(X86Registers::edi);
+ push(X86Registers::esi);
+ // load output into edi (2 = saved ebp + return address).
+ #if COMPILER(MSVC)
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
+ loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
+ loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
+ loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
+ #else
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
+ #endif
+#elif CPU(ARM)
+ push(ARMRegisters::r4);
+ push(ARMRegisters::r5);
+ push(ARMRegisters::r6);
+#if CPU(ARM_TRADITIONAL)
+ push(ARMRegisters::r8); // scratch register
+#endif
+ move(ARMRegisters::r3, output);
+#elif CPU(MIPS)
+ // Do nothing.
+#endif
+ }
+
+ void generateReturn()
+ {
+#if CPU(X86_64)
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(X86)
+ pop(X86Registers::esi);
+ pop(X86Registers::edi);
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(ARM)
+#if CPU(ARM_TRADITIONAL)
+ pop(ARMRegisters::r8); // scratch register
+#endif
+ pop(ARMRegisters::r6);
+ pop(ARMRegisters::r5);
+ pop(ARMRegisters::r4);
+#elif CPU(MIPS)
+ // Do nothing
+#endif
+ ret();
+ }
+
+public:
+ YarrGenerator(YarrPattern& pattern)
+ : m_pattern(pattern)
+ , m_shouldFallBack(false)
+ {
+ }
+
+ void generate()
+ {
+ generateEnter();
+
+ if (!m_pattern.m_body->m_hasFixedSize)
+ store32(index, Address(output));
+
+ if (m_pattern.m_body->m_callFrameSize)
+ subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ generateDisjunction(m_pattern.m_body);
+ }
+
+ void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
+ {
+ generate();
+
+ LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0);
+
+ for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i)
+ patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation));
+
+ jitObject.set(patchBuffer.finalizeCode());
+ jitObject.setFallBack(m_shouldFallBack);
+ }
+
+private:
+ YarrPattern& m_pattern;
+ bool m_shouldFallBack;
+ GenerationState m_expressionState;
+};
+
+void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
+{
+ YarrGenerator generator(pattern);
+ generator.compile(globalData, jitObject);
+}
+
+
+}}
+
+#endif
diff --git a/Source/JavaScriptCore/yarr/YarrJIT.h b/Source/JavaScriptCore/yarr/YarrJIT.h
new file mode 100644
index 0000000..dfdc777
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrJIT.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef YarrJIT_h
+#define YarrJIT_h
+
+#if ENABLE(YARR_JIT)
+
+#include "MacroAssembler.h"
+#include "YarrPattern.h"
+#include "UString.h"
+
+#if CPU(X86) && !COMPILER(MSVC)
+#define YARR_CALL __attribute__ ((regparm (3)))
+#else
+#define YARR_CALL
+#endif
+
+namespace JSC {
+
+class JSGlobalData;
+class ExecutablePool;
+
+namespace Yarr {
+
+class YarrCodeBlock {
+ typedef int (*YarrJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
+
+public:
+ YarrCodeBlock()
+ : m_needFallBack(false)
+ {
+ }
+
+ ~YarrCodeBlock()
+ {
+ }
+
+ void setFallBack(bool fallback) { m_needFallBack = fallback; }
+ bool isFallBack() { return m_needFallBack; }
+ void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
+
+ int execute(const UChar* input, unsigned start, unsigned length, int* output)
+ {
+ return reinterpret_cast<YarrJITCode>(m_ref.m_code.executableAddress())(input, start, length, output);
+ }
+
+#if ENABLE(REGEXP_TRACING)
+ void *getAddr() { return m_ref.m_code.executableAddress(); }
+#endif
+
+private:
+ MacroAssembler::CodeRef m_ref;
+ bool m_needFallBack;
+};
+
+void jitCompile(YarrPattern&, JSGlobalData*, YarrCodeBlock& jitObject);
+
+inline int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
+{
+ return jitObject.execute(input, start, length, output);
+}
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // YarrJIT_h
diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h
new file mode 100644
index 0000000..6c3fba5
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrParser.h
@@ -0,0 +1,872 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef YarrParser_h
+#define YarrParser_h
+
+#include "UString.h"
+#include <limits.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace JSC { namespace Yarr {
+
+static const unsigned quantifyInfinite = UINT_MAX;
+#define REGEXP_ERROR_PREFIX "Invalid regular expression: "
+
+enum BuiltInCharacterClassID {
+ DigitClassID,
+ SpaceClassID,
+ WordClassID,
+ NewlineClassID,
+};
+
+// The Parser class should not be used directly - only via the Yarr::parse() method.
+template<class Delegate>
+class Parser {
+private:
+ template<class FriendDelegate>
+ friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
+
+ enum ErrorCode {
+ NoError,
+ PatternTooLarge,
+ QuantifierOutOfOrder,
+ QuantifierWithoutAtom,
+ MissingParentheses,
+ ParenthesesUnmatched,
+ ParenthesesTypeInvalid,
+ CharacterClassUnmatched,
+ CharacterClassOutOfOrder,
+ EscapeUnterminated,
+ NumberOfErrorCodes
+ };
+
+ /*
+ * CharacterClassParserDelegate:
+ *
+ * The class CharacterClassParserDelegate is used in the parsing of character
+ * classes. This class handles detection of character ranges. This class
+ * implements enough of the delegate interface such that it can be passed to
+ * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
+ * to perform the parsing of escape characters in character sets.
+ */
+ class CharacterClassParserDelegate {
+ public:
+ CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
+ : m_delegate(delegate)
+ , m_err(err)
+ , m_state(Empty)
+ {
+ }
+
+ /*
+ * begin():
+ *
+ * Called at beginning of construction.
+ */
+ void begin(bool invert)
+ {
+ m_delegate.atomCharacterClassBegin(invert);
+ }
+
+ /*
+ * atomPatternCharacter():
+ *
+ * This method is called either from parseCharacterClass() (for an unescaped
+ * character in a character class), or from parseEscape(). In the former case
+ * the value true will be passed for the argument 'hyphenIsRange', and in this
+ * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
+ * is different to /[a\-z]/).
+ */
+ void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
+ {
+ switch (m_state) {
+ case AfterCharacterClass:
+ // Following a builtin character class we need look out for a hyphen.
+ // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
+ // If we see a hyphen following a charater class then unlike usual
+ // we'll report it to the delegate immediately, and put ourself into
+ // a poisoned state. Any following calls to add another character or
+ // character class will result in an error. (A hypen following a
+ // character-class is itself valid, but only at the end of a regex).
+ if (hyphenIsRange && ch == '-') {
+ m_delegate.atomCharacterClassAtom('-');
+ m_state = AfterCharacterClassHyphen;
+ return;
+ }
+ // Otherwise just fall through - cached character so treat this as Empty.
+
+ case Empty:
+ m_character = ch;
+ m_state = CachedCharacter;
+ return;
+
+ case CachedCharacter:
+ if (hyphenIsRange && ch == '-')
+ m_state = CachedCharacterHyphen;
+ else {
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_character = ch;
+ }
+ return;
+
+ case CachedCharacterHyphen:
+ if (ch < m_character) {
+ m_err = CharacterClassOutOfOrder;
+ return;
+ }
+ m_delegate.atomCharacterClassRange(m_character, ch);
+ m_state = Empty;
+ return;
+
+ // See coment in atomBuiltInCharacterClass below.
+ // This too is technically an error, per ECMA-262, and again we
+ // we chose to allow this. Note a subtlely here that while we
+ // diverge from the spec's definition of CharacterRange we do
+ // remain in compliance with the grammar. For example, consider
+ // the expression /[\d-a-z]/. We comply with the grammar in
+ // this case by not allowing a-z to be matched as a range.
+ case AfterCharacterClassHyphen:
+ m_delegate.atomCharacterClassAtom(ch);
+ m_state = Empty;
+ return;
+ }
+ }
+
+ /*
+ * atomBuiltInCharacterClass():
+ *
+ * Adds a built-in character class, called by parseEscape().
+ */
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ switch (m_state) {
+ case CachedCharacter:
+ // Flush the currently cached character, then fall through.
+ m_delegate.atomCharacterClassAtom(m_character);
+
+ case Empty:
+ case AfterCharacterClass:
+ m_state = AfterCharacterClass;
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ return;
+
+ // If we hit either of these cases, we have an invalid range that
+ // looks something like /[x-\d]/ or /[\d-\d]/.
+ // According to ECMA-262 this should be a syntax error, but
+ // empirical testing shows this to break teh webz. Instead we
+ // comply with to the ECMA-262 grammar, and assume the grammar to
+ // have matched the range correctly, but tweak our interpretation
+ // of CharacterRange. Effectively we implicitly handle the hyphen
+ // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
+ case CachedCharacterHyphen:
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_delegate.atomCharacterClassAtom('-');
+ // fall through
+ case AfterCharacterClassHyphen:
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ m_state = Empty;
+ return;
+ }
+ }
+
+ /*
+ * end():
+ *
+ * Called at end of construction.
+ */
+ void end()
+ {
+ if (m_state == CachedCharacter)
+ m_delegate.atomCharacterClassAtom(m_character);
+ else if (m_state == CachedCharacterHyphen) {
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_delegate.atomCharacterClassAtom('-');
+ }
+ m_delegate.atomCharacterClassEnd();
+ }
+
+ // parseEscape() should never call these delegate methods when
+ // invoked with inCharacterClass set.
+ void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
+ void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
+
+ private:
+ Delegate& m_delegate;
+ ErrorCode& m_err;
+ enum CharacterClassConstructionState {
+ Empty,
+ CachedCharacter,
+ CachedCharacterHyphen,
+ AfterCharacterClass,
+ AfterCharacterClassHyphen,
+ } m_state;
+ UChar m_character;
+ };
+
+ Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
+ : m_delegate(delegate)
+ , m_backReferenceLimit(backReferenceLimit)
+ , m_err(NoError)
+ , m_data(pattern.characters())
+ , m_size(pattern.length())
+ , m_index(0)
+ , m_parenthesesNestingDepth(0)
+ {
+ }
+
+ /*
+ * parseEscape():
+ *
+ * Helper for parseTokens() AND parseCharacterClass().
+ * Unlike the other parser methods, this function does not report tokens
+ * directly to the member delegate (m_delegate), instead tokens are
+ * emitted to the delegate provided as an argument. In the case of atom
+ * escapes, parseTokens() will call parseEscape() passing m_delegate as
+ * an argument, and as such the escape will be reported to the delegate.
+ *
+ * However this method may also be used by parseCharacterClass(), in which
+ * case a CharacterClassParserDelegate will be passed as the delegate that
+ * tokens should be added to. A boolean flag is also provided to indicate
+ * whether that an escape in a CharacterClass is being parsed (some parsing
+ * rules change in this context).
+ *
+ * The boolean value returned by this method indicates whether the token
+ * parsed was an atom (outside of a characted class \b and \B will be
+ * interpreted as assertions).
+ */
+ template<bool inCharacterClass, class EscapeDelegate>
+ bool parseEscape(EscapeDelegate& delegate)
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '\\');
+ consume();
+
+ if (atEndOfPattern()) {
+ m_err = EscapeUnterminated;
+ return false;
+ }
+
+ switch (peek()) {
+ // Assertions
+ case 'b':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('\b');
+ else {
+ delegate.assertionWordBoundary(false);
+ return false;
+ }
+ break;
+ case 'B':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('B');
+ else {
+ delegate.assertionWordBoundary(true);
+ return false;
+ }
+ break;
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, false);
+ break;
+ case 's':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, false);
+ break;
+ case 'w':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, false);
+ break;
+ case 'D':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, true);
+ break;
+ case 'S':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, true);
+ break;
+ case 'W':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, true);
+ break;
+
+ // DecimalEscape
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
+ // First, try to parse this as backreference.
+ if (!inCharacterClass) {
+ ParseState state = saveState();
+
+ unsigned backReference = consumeNumber();
+ if (backReference <= m_backReferenceLimit) {
+ delegate.atomBackReference(backReference);
+ break;
+ }
+
+ restoreState(state);
+ }
+
+ // Not a backreference, and not octal.
+ if (peek() >= '8') {
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // Fall-through to handle this as an octal escape.
+ }
+
+ // Octal escape
+ case '0':
+ delegate.atomPatternCharacter(consumeOctal());
+ break;
+
+ // ControlEscape
+ case 'f':
+ consume();
+ delegate.atomPatternCharacter('\f');
+ break;
+ case 'n':
+ consume();
+ delegate.atomPatternCharacter('\n');
+ break;
+ case 'r':
+ consume();
+ delegate.atomPatternCharacter('\r');
+ break;
+ case 't':
+ consume();
+ delegate.atomPatternCharacter('\t');
+ break;
+ case 'v':
+ consume();
+ delegate.atomPatternCharacter('\v');
+ break;
+
+ // ControlLetter
+ case 'c': {
+ ParseState state = saveState();
+ consume();
+ if (!atEndOfPattern()) {
+ int control = consume();
+
+ // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
+ if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
+ delegate.atomPatternCharacter(control & 0x1f);
+ break;
+ }
+ }
+ restoreState(state);
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+ int x = tryConsumeHex(2);
+ if (x == -1)
+ delegate.atomPatternCharacter('x');
+ else
+ delegate.atomPatternCharacter(x);
+ break;
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+ int u = tryConsumeHex(4);
+ if (u == -1)
+ delegate.atomPatternCharacter('u');
+ else
+ delegate.atomPatternCharacter(u);
+ break;
+ }
+
+ // IdentityEscape
+ default:
+ delegate.atomPatternCharacter(consume());
+ }
+
+ return true;
+ }
+
+ /*
+ * parseAtomEscape(), parseCharacterClassEscape():
+ *
+ * These methods alias to parseEscape().
+ */
+ bool parseAtomEscape()
+ {
+ return parseEscape<false>(m_delegate);
+ }
+ void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
+ {
+ parseEscape<true>(delegate);
+ }
+
+ /*
+ * parseCharacterClass():
+ *
+ * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
+ * to an instance of CharacterClassParserDelegate, to describe the character class to the
+ * delegate.
+ */
+ void parseCharacterClass()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '[');
+ consume();
+
+ CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
+
+ characterClassConstructor.begin(tryConsume('^'));
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case ']':
+ consume();
+ characterClassConstructor.end();
+ return;
+
+ case '\\':
+ parseCharacterClassEscape(characterClassConstructor);
+ break;
+
+ default:
+ characterClassConstructor.atomPatternCharacter(consume(), true);
+ }
+
+ if (m_err)
+ return;
+ }
+
+ m_err = CharacterClassUnmatched;
+ }
+
+ /*
+ * parseParenthesesBegin():
+ *
+ * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
+ */
+ void parseParenthesesBegin()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '(');
+ consume();
+
+ if (tryConsume('?')) {
+ if (atEndOfPattern()) {
+ m_err = ParenthesesTypeInvalid;
+ return;
+ }
+
+ switch (consume()) {
+ case ':':
+ m_delegate.atomParenthesesSubpatternBegin(false);
+ break;
+
+ case '=':
+ m_delegate.atomParentheticalAssertionBegin();
+ break;
+
+ case '!':
+ m_delegate.atomParentheticalAssertionBegin(true);
+ break;
+
+ default:
+ m_err = ParenthesesTypeInvalid;
+ }
+ } else
+ m_delegate.atomParenthesesSubpatternBegin();
+
+ ++m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseParenthesesEnd():
+ *
+ * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
+ */
+ void parseParenthesesEnd()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == ')');
+ consume();
+
+ if (m_parenthesesNestingDepth > 0)
+ m_delegate.atomParenthesesEnd();
+ else
+ m_err = ParenthesesUnmatched;
+
+ --m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseQuantifier():
+ *
+ * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
+ */
+ void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
+ {
+ ASSERT(!m_err);
+ ASSERT(min <= max);
+
+ if (lastTokenWasAnAtom)
+ m_delegate.quantifyAtom(min, max, !tryConsume('?'));
+ else
+ m_err = QuantifierWithoutAtom;
+ }
+
+ /*
+ * parseTokens():
+ *
+ * This method loops over the input pattern reporting tokens to the delegate.
+ * The method returns when a parse error is detected, or the end of the pattern
+ * is reached. One piece of state is tracked around the loop, which is whether
+ * the last token passed to the delegate was an atom (this is necessary to detect
+ * a parse error when a quantifier provided without an atom to quantify).
+ */
+ void parseTokens()
+ {
+ bool lastTokenWasAnAtom = false;
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case '|':
+ consume();
+ m_delegate.disjunction();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '(':
+ parseParenthesesBegin();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case ')':
+ parseParenthesesEnd();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '^':
+ consume();
+ m_delegate.assertionBOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '$':
+ consume();
+ m_delegate.assertionEOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '.':
+ consume();
+ m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '[':
+ parseCharacterClass();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '\\':
+ lastTokenWasAnAtom = parseAtomEscape();
+ break;
+
+ case '*':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '+':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '?':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, 1);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '{': {
+ ParseState state = saveState();
+
+ consume();
+ if (peekIsDigit()) {
+ unsigned min = consumeNumber();
+ unsigned max = min;
+
+ if (tryConsume(','))
+ max = peekIsDigit() ? consumeNumber() : quantifyInfinite;
+
+ if (tryConsume('}')) {
+ if (min <= max)
+ parseQuantifier(lastTokenWasAnAtom, min, max);
+ else
+ m_err = QuantifierOutOfOrder;
+ lastTokenWasAnAtom = false;
+ break;
+ }
+ }
+
+ restoreState(state);
+ } // if we did not find a complete quantifer, fall through to the default case.
+
+ default:
+ m_delegate.atomPatternCharacter(consume());
+ lastTokenWasAnAtom = true;
+ }
+
+ if (m_err)
+ return;
+ }
+
+ if (m_parenthesesNestingDepth > 0)
+ m_err = MissingParentheses;
+ }
+
+ /*
+ * parse():
+ *
+ * This method calls parseTokens() to parse over the input and converts any
+ * error code to a const char* for a result.
+ */
+ const char* parse()
+ {
+ if (m_size > MAX_PATTERN_SIZE)
+ m_err = PatternTooLarge;
+ else
+ parseTokens();
+ ASSERT(atEndOfPattern() || m_err);
+
+ // The order of this array must match the ErrorCode enum.
+ static const char* errorMessages[NumberOfErrorCodes] = {
+ 0, // NoError
+ REGEXP_ERROR_PREFIX "regular expression too large",
+ REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier",
+ REGEXP_ERROR_PREFIX "nothing to repeat",
+ REGEXP_ERROR_PREFIX "missing )",
+ REGEXP_ERROR_PREFIX "unmatched parentheses",
+ REGEXP_ERROR_PREFIX "unrecognized character after (?",
+ REGEXP_ERROR_PREFIX "missing terminating ] for character class",
+ REGEXP_ERROR_PREFIX "range out of order in character class",
+ REGEXP_ERROR_PREFIX "\\ at end of pattern"
+ };
+
+ return errorMessages[m_err];
+ }
+
+
+ // Misc helper functions:
+
+ typedef unsigned ParseState;
+
+ ParseState saveState()
+ {
+ return m_index;
+ }
+
+ void restoreState(ParseState state)
+ {
+ m_index = state;
+ }
+
+ bool atEndOfPattern()
+ {
+ ASSERT(m_index <= m_size);
+ return m_index == m_size;
+ }
+
+ int peek()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index];
+ }
+
+ bool peekIsDigit()
+ {
+ return !atEndOfPattern() && WTF::isASCIIDigit(peek());
+ }
+
+ unsigned peekDigit()
+ {
+ ASSERT(peekIsDigit());
+ return peek() - '0';
+ }
+
+ int consume()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index++];
+ }
+
+ unsigned consumeDigit()
+ {
+ ASSERT(peekIsDigit());
+ return consume() - '0';
+ }
+
+ unsigned consumeNumber()
+ {
+ unsigned n = consumeDigit();
+ // check for overflow.
+ for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
+ n = newValue;
+ consume();
+ }
+ return n;
+ }
+
+ unsigned consumeOctal()
+ {
+ ASSERT(WTF::isASCIIOctalDigit(peek()));
+
+ unsigned n = consumeDigit();
+ while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
+ n = n * 8 + consumeDigit();
+ return n;
+ }
+
+ bool tryConsume(UChar ch)
+ {
+ if (atEndOfPattern() || (m_data[m_index] != ch))
+ return false;
+ ++m_index;
+ return true;
+ }
+
+ int tryConsumeHex(int count)
+ {
+ ParseState state = saveState();
+
+ int n = 0;
+ while (count--) {
+ if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
+ restoreState(state);
+ return -1;
+ }
+ n = (n << 4) | WTF::toASCIIHexValue(consume());
+ }
+ return n;
+ }
+
+ Delegate& m_delegate;
+ unsigned m_backReferenceLimit;
+ ErrorCode m_err;
+ const UChar* m_data;
+ unsigned m_size;
+ unsigned m_index;
+ unsigned m_parenthesesNestingDepth;
+
+ // Derived by empirical testing of compile time in PCRE and WREC.
+ static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
+};
+
+/*
+ * Yarr::parse():
+ *
+ * The parse method is passed a pattern to be parsed and a delegate upon which
+ * callbacks will be made to record the parsed tokens forming the regex.
+ * Yarr::parse() returns null on success, or a const C string providing an error
+ * message where a parse error occurs.
+ *
+ * The Delegate must implement the following interface:
+ *
+ * void assertionBOL();
+ * void assertionEOL();
+ * void assertionWordBoundary(bool invert);
+ *
+ * void atomPatternCharacter(UChar ch);
+ * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
+ * void atomCharacterClassBegin(bool invert)
+ * void atomCharacterClassAtom(UChar ch)
+ * void atomCharacterClassRange(UChar begin, UChar end)
+ * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ * void atomCharacterClassEnd()
+ * void atomParenthesesSubpatternBegin(bool capture = true);
+ * void atomParentheticalAssertionBegin(bool invert = false);
+ * void atomParenthesesEnd();
+ * void atomBackReference(unsigned subpatternId);
+ *
+ * void quantifyAtom(unsigned min, unsigned max, bool greedy);
+ *
+ * void disjunction();
+ *
+ * The regular expression is described by a sequence of assertion*() and atom*()
+ * callbacks to the delegate, describing the terms in the regular expression.
+ * Following an atom a quantifyAtom() call may occur to indicate that the previous
+ * atom should be quantified. In the case of atoms described across multiple
+ * calls (parentheses and character classes) the call to quantifyAtom() will come
+ * after the call to the atom*End() method, never after atom*Begin().
+ *
+ * Character classes may either be described by a single call to
+ * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
+ * In the latter case, ...Begin() will be called, followed by a sequence of
+ * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
+ *
+ * Sequences of atoms and assertions are broken into alternatives via calls to
+ * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
+ * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
+ * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
+ * capturing subpattern, this will be the subpatternId associated with these
+ * parentheses, and will also by definition be the lowest subpatternId of these
+ * parentheses and of any nested paretheses. The atomParenthesesEnd() method
+ * is passed the subpatternId of the last capturing subexpression nested within
+ * these paretheses. In the case of a capturing subpattern with no nested
+ * capturing subpatterns, the same subpatternId will be passed to the begin and
+ * end functions. In the case of non-capturing subpatterns the subpatternId
+ * passed to the begin method is also the first possible subpatternId that might
+ * be nested within these paretheses. If a set of non-capturing parentheses does
+ * not contain any capturing subpatterns, then the subpatternId passed to begin
+ * will be greater than the subpatternId passed to end.
+ */
+
+template<class Delegate>
+const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite)
+{
+ return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
+}
+
+} } // namespace JSC::Yarr
+
+#endif // YarrParser_h
diff --git a/Source/JavaScriptCore/yarr/YarrPattern.cpp b/Source/JavaScriptCore/yarr/YarrPattern.cpp
new file mode 100644
index 0000000..9ca04cb
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrPattern.cpp
@@ -0,0 +1,985 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "YarrPattern.h"
+
+#include "YarrInterpreter.h"
+#include <wtf/Vector.h>
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+#include "RegExpJitTables.h"
+
+class CharacterClassConstructor {
+public:
+ CharacterClassConstructor(bool isCaseInsensitive = false)
+ : m_isCaseInsensitive(isCaseInsensitive)
+ {
+ }
+
+ void reset()
+ {
+ m_matches.clear();
+ m_ranges.clear();
+ m_matchesUnicode.clear();
+ m_rangesUnicode.clear();
+ }
+
+ void append(const CharacterClass* other)
+ {
+ for (size_t i = 0; i < other->m_matches.size(); ++i)
+ addSorted(m_matches, other->m_matches[i]);
+ for (size_t i = 0; i < other->m_ranges.size(); ++i)
+ addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
+ for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
+ addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
+ for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
+ addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
+ }
+
+ void putChar(UChar ch)
+ {
+ if (ch <= 0x7f) {
+ if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
+ addSorted(m_matches, toASCIIUpper(ch));
+ addSorted(m_matches, toASCIILower(ch));
+ } else
+ addSorted(m_matches, ch);
+ } else {
+ UChar upper, lower;
+ if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
+ addSorted(m_matchesUnicode, upper);
+ addSorted(m_matchesUnicode, lower);
+ } else
+ addSorted(m_matchesUnicode, ch);
+ }
+ }
+
+ // returns true if this character has another case, and 'ch' is the upper case form.
+ static inline bool isUnicodeUpper(UChar ch)
+ {
+ return ch != Unicode::toLower(ch);
+ }
+
+ // returns true if this character has another case, and 'ch' is the lower case form.
+ static inline bool isUnicodeLower(UChar ch)
+ {
+ return ch != Unicode::toUpper(ch);
+ }
+
+ void putRange(UChar lo, UChar hi)
+ {
+ if (lo <= 0x7f) {
+ char asciiLo = lo;
+ char asciiHi = std::min(hi, (UChar)0x7f);
+ addSortedRange(m_ranges, lo, asciiHi);
+
+ if (m_isCaseInsensitive) {
+ if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
+ if ((asciiLo <= 'z') && (asciiHi >= 'a'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
+ }
+ }
+ if (hi >= 0x80) {
+ uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
+ addSortedRange(m_rangesUnicode, unicodeCurr, hi);
+
+ if (m_isCaseInsensitive) {
+ while (unicodeCurr <= hi) {
+ // If the upper bound of the range (hi) is 0xffff, the increments to
+ // unicodeCurr in this loop may take it to 0x10000. This is fine
+ // (if so we won't re-enter the loop, since the loop condition above
+ // will definitely fail) - but this does mean we cannot use a UChar
+ // to represent unicodeCurr, we must use a 32-bit value instead.
+ ASSERT(unicodeCurr <= 0xffff);
+
+ if (isUnicodeUpper(unicodeCurr)) {
+ UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
+ UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
+ lowerCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
+ } else if (isUnicodeLower(unicodeCurr)) {
+ UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
+ UChar upperCaseRangeEnd = upperCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
+ upperCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
+ } else
+ ++unicodeCurr;
+ }
+ }
+ }
+ }
+
+ CharacterClass* charClass()
+ {
+ CharacterClass* characterClass = new CharacterClass(0);
+
+ characterClass->m_matches.append(m_matches);
+ characterClass->m_ranges.append(m_ranges);
+ characterClass->m_matchesUnicode.append(m_matchesUnicode);
+ characterClass->m_rangesUnicode.append(m_rangesUnicode);
+
+ reset();
+
+ return characterClass;
+ }
+
+private:
+ void addSorted(Vector<UChar>& matches, UChar ch)
+ {
+ unsigned pos = 0;
+ unsigned range = matches.size();
+
+ // binary chop, find position to insert char.
+ while (range) {
+ unsigned index = range >> 1;
+
+ int val = matches[pos+index] - ch;
+ if (!val)
+ return;
+ else if (val > 0)
+ range = index;
+ else {
+ pos += (index+1);
+ range -= (index+1);
+ }
+ }
+
+ if (pos == matches.size())
+ matches.append(ch);
+ else
+ matches.insert(pos, ch);
+ }
+
+ void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
+ {
+ unsigned end = ranges.size();
+
+ // Simple linear scan - I doubt there are that many ranges anyway...
+ // feel free to fix this with something faster (eg binary chop).
+ for (unsigned i = 0; i < end; ++i) {
+ // does the new range fall before the current position in the array
+ if (hi < ranges[i].begin) {
+ // optional optimization: concatenate appending ranges? - may not be worthwhile.
+ if (hi == (ranges[i].begin - 1)) {
+ ranges[i].begin = lo;
+ return;
+ }
+ ranges.insert(i, CharacterRange(lo, hi));
+ return;
+ }
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
+ // end of the last range they concatenate, which is just as good.
+ if (lo <= (ranges[i].end + 1)) {
+ // found an intersect! we'll replace this entry in the array.
+ ranges[i].begin = std::min(ranges[i].begin, lo);
+ ranges[i].end = std::max(ranges[i].end, hi);
+
+ // now check if the new range can subsume any subsequent ranges.
+ unsigned next = i+1;
+ // each iteration of the loop we will either remove something from the list, or break the loop.
+ while (next < ranges.size()) {
+ if (ranges[next].begin <= (ranges[i].end + 1)) {
+ // the next entry now overlaps / concatenates this one.
+ ranges[i].end = std::max(ranges[i].end, ranges[next].end);
+ ranges.remove(next);
+ } else
+ break;
+ }
+
+ return;
+ }
+ }
+
+ // CharacterRange comes after all existing ranges.
+ ranges.append(CharacterRange(lo, hi));
+ }
+
+ bool m_isCaseInsensitive;
+
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+};
+
+struct BeginCharHelper {
+ BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
+ : m_beginChars(beginChars)
+ , m_isCaseInsensitive(isCaseInsensitive)
+ {}
+
+ void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
+ {
+ if (quantityType == QuantifierFixedCount && quantityCount > 1) {
+ // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
+ beginChar.value |= beginChar.value << 16;
+ beginChar.mask |= beginChar.mask << 16;
+ addCharacter(beginChar);
+ } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
+ // In case of characters with fixed quantifier we should check the next character as well.
+ linkHotTerms(beginChar, hotTerms);
+ else
+ // In case of greedy matching the next character checking is unnecessary therefore we just store
+ // the first character.
+ addCharacter(beginChar);
+ }
+
+ // Merge two following BeginChars in the vector to reduce the number of character checks.
+ void merge(unsigned size)
+ {
+ for (unsigned i = 0; i < size; i++) {
+ BeginChar* curr = &m_beginChars->at(i);
+ BeginChar* next = &m_beginChars->at(i + 1);
+
+ // If the current and the next size of value is different we should skip the merge process
+ // because the 16bit and 32bit values are unmergable.
+ if (curr->value <= 0xFFFF && next->value > 0xFFFF)
+ continue;
+
+ unsigned diff = curr->value ^ next->value;
+
+ curr->mask |= diff;
+ curr->value |= curr->mask;
+
+ m_beginChars->remove(i + 1);
+ size--;
+ }
+ }
+
+private:
+ void addCharacter(BeginChar beginChar)
+ {
+ unsigned pos = 0;
+ unsigned range = m_beginChars->size();
+
+ // binary chop, find position to insert char.
+ while (range) {
+ unsigned index = range >> 1;
+
+ int val = m_beginChars->at(pos+index).value - beginChar.value;
+ if (!val)
+ return;
+ if (val < 0)
+ range = index;
+ else {
+ pos += (index+1);
+ range -= (index+1);
+ }
+ }
+
+ if (pos == m_beginChars->size())
+ m_beginChars->append(beginChar);
+ else
+ m_beginChars->insert(pos, beginChar);
+ }
+
+ // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
+ void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
+ {
+ for (unsigned i = 0; i < hotTerms->size(); i++) {
+ PatternTerm hotTerm = hotTerms->at(i).term;
+ ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
+
+ UChar characterNext = hotTerm.patternCharacter;
+
+ // Append a character to an existing BeginChar object.
+ if (characterNext <= 0x7f) {
+ unsigned mask = 0;
+
+ if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
+ mask = 32;
+ characterNext = toASCIILower(characterNext);
+ }
+
+ addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
+ } else {
+ UChar upper, lower;
+ if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
+ addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
+ addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
+ } else
+ addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
+ }
+ }
+ }
+
+ Vector<BeginChar>* m_beginChars;
+ bool m_isCaseInsensitive;
+};
+
+class YarrPatternConstructor {
+public:
+ YarrPatternConstructor(YarrPattern& pattern)
+ : m_pattern(pattern)
+ , m_characterClassConstructor(pattern.m_ignoreCase)
+ , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
+ , m_invertParentheticalAssertion(false)
+ {
+ m_pattern.m_body = new PatternDisjunction();
+ m_alternative = m_pattern.m_body->addNewAlternative();
+ m_pattern.m_disjunctions.append(m_pattern.m_body);
+ }
+
+ ~YarrPatternConstructor()
+ {
+ }
+
+ void reset()
+ {
+ m_pattern.reset();
+ m_characterClassConstructor.reset();
+
+ m_pattern.m_body = new PatternDisjunction();
+ m_alternative = m_pattern.m_body->addNewAlternative();
+ m_pattern.m_disjunctions.append(m_pattern.m_body);
+ }
+
+ void assertionBOL()
+ {
+ if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
+ m_alternative->m_startsWithBOL = true;
+ m_alternative->m_containsBOL = true;
+ m_pattern.m_containsBOL = true;
+ }
+ m_alternative->m_terms.append(PatternTerm::BOL());
+ }
+ void assertionEOL()
+ {
+ m_alternative->m_terms.append(PatternTerm::EOL());
+ }
+ void assertionWordBoundary(bool invert)
+ {
+ m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
+ }
+
+ void atomPatternCharacter(UChar ch)
+ {
+ // We handle case-insensitive checking of unicode characters which do have both
+ // cases by handling them as if they were defined using a CharacterClass.
+ if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
+ atomCharacterClassBegin();
+ atomCharacterClassAtom(ch);
+ atomCharacterClassEnd();
+ } else
+ m_alternative->m_terms.append(PatternTerm(ch));
+ }
+
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ switch (classID) {
+ case DigitClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
+ break;
+ case SpaceClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
+ break;
+ case WordClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+ break;
+ case NewlineClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
+ break;
+ }
+ }
+
+ void atomCharacterClassBegin(bool invert = false)
+ {
+ m_invertCharacterClass = invert;
+ }
+
+ void atomCharacterClassAtom(UChar ch)
+ {
+ m_characterClassConstructor.putChar(ch);
+ }
+
+ void atomCharacterClassRange(UChar begin, UChar end)
+ {
+ m_characterClassConstructor.putRange(begin, end);
+ }
+
+ void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ {
+ ASSERT(classID != NewlineClassID);
+
+ switch (classID) {
+ case DigitClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
+ break;
+
+ case SpaceClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
+ break;
+
+ case WordClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+ break;
+
+ default:
+ ASSERT_NOT_REACHED();
+ }
+ }
+
+ void atomCharacterClassEnd()
+ {
+ CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
+ m_pattern.m_userCharacterClasses.append(newCharacterClass);
+ m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
+ }
+
+ void atomParenthesesSubpatternBegin(bool capture = true)
+ {
+ unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
+ if (capture)
+ m_pattern.m_numSubpatterns++;
+
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ }
+
+ void atomParentheticalAssertionBegin(bool invert = false)
+ {
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ m_invertParentheticalAssertion = invert;
+ }
+
+ void atomParenthesesEnd()
+ {
+ ASSERT(m_alternative->m_parent);
+ ASSERT(m_alternative->m_parent->m_parent);
+
+ PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
+ m_alternative = m_alternative->m_parent->m_parent;
+
+ PatternTerm& lastTerm = m_alternative->lastTerm();
+
+ unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
+ unsigned numBOLAnchoredAlts = 0;
+ // Bubble up BOL flags
+ for (unsigned i = 0; i < numParenAlternatives; i++) {
+ if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
+ numBOLAnchoredAlts++;
+ }
+
+ if (numBOLAnchoredAlts) {
+ m_alternative->m_containsBOL = true;
+ // If all the alternatives in parens start with BOL, then so does this one
+ if (numBOLAnchoredAlts == numParenAlternatives)
+ m_alternative->m_startsWithBOL = true;
+ }
+
+ lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+ m_invertParentheticalAssertion = false;
+ }
+
+ void atomBackReference(unsigned subpatternId)
+ {
+ ASSERT(subpatternId);
+ m_pattern.m_containsBackreferences = true;
+ m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
+
+ if (subpatternId > m_pattern.m_numSubpatterns) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+
+ PatternAlternative* currentAlternative = m_alternative;
+ ASSERT(currentAlternative);
+
+ // Note to self: if we waited until the AST was baked, we could also remove forwards refs
+ while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
+ PatternTerm& term = currentAlternative->lastTerm();
+ ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
+
+ if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+ }
+
+ m_alternative->m_terms.append(PatternTerm(subpatternId));
+ }
+
+ // deep copy the argument disjunction. If filterStartsWithBOL is true,
+ // skip alternatives with m_startsWithBOL set true.
+ PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
+ {
+ PatternDisjunction* newDisjunction = 0;
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
+ if (!newDisjunction) {
+ newDisjunction = new PatternDisjunction();
+ newDisjunction->m_parent = disjunction->m_parent;
+ }
+ PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
+ newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
+ }
+ }
+
+ if (newDisjunction)
+ m_pattern.m_disjunctions.append(newDisjunction);
+ return newDisjunction;
+ }
+
+ PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
+ {
+ if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
+ return PatternTerm(term);
+
+ PatternTerm termCopy = term;
+ termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
+ return termCopy;
+ }
+
+ void quantifyAtom(unsigned min, unsigned max, bool greedy)
+ {
+ ASSERT(min <= max);
+ ASSERT(m_alternative->m_terms.size());
+
+ if (!max) {
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ PatternTerm& term = m_alternative->lastTerm();
+ ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
+ ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
+
+ // For any assertion with a zero minimum, not matching is valid and has no effect,
+ // remove it. Otherwise, we need to match as least once, but there is no point
+ // matching more than once, so remove the quantifier. It is not entirely clear
+ // from the spec whether or not this behavior is correct, but I believe this
+ // matches Firefox. :-/
+ if (term.type == PatternTerm::TypeParentheticalAssertion) {
+ if (!min)
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ if (min == 0)
+ term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ else if (min == max)
+ term.quantify(min, QuantifierFixedCount);
+ else {
+ term.quantify(min, QuantifierFixedCount);
+ m_alternative->m_terms.append(copyTerm(term));
+ // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
+ m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
+ m_alternative->lastTerm().parentheses.isCopy = true;
+ }
+ }
+
+ void disjunction()
+ {
+ m_alternative = m_alternative->m_parent->addNewAlternative();
+ }
+
+ unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
+ {
+ alternative->m_hasFixedSize = true;
+ unsigned currentInputPosition = initialInputPosition;
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ case PatternTerm::TypeAssertionEOL:
+ case PatternTerm::TypeAssertionWordBoundary:
+ term.inputPosition = currentInputPosition;
+ break;
+
+ case PatternTerm::TypeBackReference:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference;
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
+ term.frameLocation = currentCallFrameSize;
+ if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+ if (term.quantityType != QuantifierFixedCount)
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ // If quantity is fixed, then pre-check its minimum size.
+ if (term.quantityType == QuantifierFixedCount)
+ currentInputPosition += term.parentheses.disjunction->m_minimumSize;
+ term.inputPosition = currentInputPosition;
+ } else if (term.parentheses.isTerminal) {
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ term.inputPosition = currentInputPosition;
+ } else {
+ term.inputPosition = currentInputPosition;
+ setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition);
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
+ }
+ // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
+ break;
+ }
+ }
+
+ alternative->m_minimumSize = currentInputPosition - initialInputPosition;
+ return currentCallFrameSize;
+ }
+
+ unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
+ {
+ if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
+ initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative;
+
+ unsigned minimumInputSize = UINT_MAX;
+ unsigned maximumCallFrameSize = 0;
+ bool hasFixedSize = true;
+
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
+ minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
+ maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
+ hasFixedSize &= alternative->m_hasFixedSize;
+ }
+
+ ASSERT(minimumInputSize != UINT_MAX);
+ ASSERT(maximumCallFrameSize >= initialCallFrameSize);
+
+ disjunction->m_hasFixedSize = hasFixedSize;
+ disjunction->m_minimumSize = minimumInputSize;
+ disjunction->m_callFrameSize = maximumCallFrameSize;
+ return maximumCallFrameSize;
+ }
+
+ void setupOffsets()
+ {
+ setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
+ }
+
+ // This optimization identifies sets of parentheses that we will never need to backtrack.
+ // In these cases we do not need to store state from prior iterations.
+ // We can presently avoid backtracking for:
+ // * where the parens are at the end of the regular expression (last term in any of the
+ // alternatives of the main body disjunction).
+ // * where the parens are non-capturing, and quantified unbounded greedy (*).
+ // * where the parens do not contain any capturing subpatterns.
+ void checkForTerminalParentheses()
+ {
+ // This check is much too crude; should be just checking whether the candidate
+ // node contains nested capturing subpatterns, not the whole expression!
+ if (m_pattern.m_numSubpatterns)
+ return;
+
+ Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+ for (size_t i = 0; i < alternatives.size(); ++i) {
+ Vector<PatternTerm>& terms = alternatives[i]->m_terms;
+ if (terms.size()) {
+ PatternTerm& term = terms.last();
+ if (term.type == PatternTerm::TypeParenthesesSubpattern
+ && term.quantityType == QuantifierGreedy
+ && term.quantityCount == quantifyInfinite
+ && !term.capture())
+ term.parentheses.isTerminal = true;
+ }
+ }
+ }
+
+ void optimizeBOL()
+ {
+ // Look for expressions containing beginning of line (^) anchoring and unroll them.
+ // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
+ // This code relies on the parsing code tagging alternatives with m_containsBOL and
+ // m_startsWithBOL and rolling those up to containing alternatives.
+ // At this point, this is only valid for non-multiline expressions.
+ PatternDisjunction* disjunction = m_pattern.m_body;
+
+ if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
+ return;
+
+ PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
+
+ // Set alternatives in disjunction to "onceThrough"
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
+ disjunction->m_alternatives[alt]->setOnceThrough();
+
+ if (loopDisjunction) {
+ // Move alternatives from loopDisjunction to disjunction
+ for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
+ disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
+
+ loopDisjunction->m_alternatives.clear();
+ }
+ }
+
+ bool addBeginTerm(PatternTerm term, Vector<TermChain>* beginTerms, PatternAlternative* alternative, unsigned numTerms, unsigned termIndex, unsigned depth)
+ {
+ if (term.quantityType == QuantifierFixedCount) {
+ beginTerms->append(TermChain(term));
+ if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
+ setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1);
+ } else if (termIndex != numTerms - 1) {
+ beginTerms->append(TermChain(term));
+ return true;
+ }
+
+ return false;
+ }
+
+ // This function collects the terms which are potentially matching the first number of depth characters in the result.
+ // If this function returns false then it found at least one term which makes the beginning character
+ // look-up optimization inefficient.
+ bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
+ {
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+
+ if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
+ return false;
+ }
+
+ return true;
+ }
+
+ bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
+ {
+ bool checkNext = true;
+ unsigned numTerms = alternative->m_terms.size();
+
+ while (checkNext && termIndex < numTerms) {
+ PatternTerm term = alternative->m_terms[termIndex];
+ checkNext = false;
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ case PatternTerm::TypeAssertionEOL:
+ case PatternTerm::TypeAssertionWordBoundary:
+ return false;
+
+ case PatternTerm::TypeBackReference:
+ case PatternTerm::TypeForwardReference:
+ return false;
+
+ case PatternTerm::TypePatternCharacter:
+ if (addBeginTerm(term, beginTerms, alternative, numTerms, termIndex, depth)) {
+ termIndex++;
+ checkNext = true;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ return false;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ if (term.invert())
+ return false;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ if (term.quantityType != QuantifierFixedCount) {
+ if (termIndex == numTerms - 1)
+ break;
+
+ termIndex++;
+ checkNext = true;
+
+ }
+
+ if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
+ return false;
+
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ void setupBeginChars()
+ {
+ Vector<TermChain> beginTerms;
+ bool containsFixedCharacter = false;
+
+ if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
+ && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
+ unsigned size = beginTerms.size();
+
+ // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
+ if (!size)
+ return;
+
+ m_pattern.m_containsBeginChars = true;
+
+ for (unsigned i = 0; i < size; i++) {
+ PatternTerm term = beginTerms[i].term;
+
+ // We have just collected PatternCharacter terms, other terms are not allowed.
+ ASSERT(term.type == PatternTerm::TypePatternCharacter);
+
+ if (term.quantityType == QuantifierFixedCount)
+ containsFixedCharacter = true;
+
+ UChar character = term.patternCharacter;
+ unsigned mask = 0;
+
+ if (character <= 0x7f) {
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
+ mask = 32;
+ character = toASCIILower(character);
+ }
+
+ m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+ } else {
+ UChar upper, lower;
+ if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
+ m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+ m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+ } else
+ m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+ }
+ }
+
+ // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
+ if (!containsFixedCharacter) {
+ m_pattern.m_containsBeginChars = false;
+ return;
+ }
+
+ size = m_pattern.m_beginChars.size();
+
+ if (size > 2)
+ m_beginCharHelper.merge(size - 1);
+ else if (size <= 1)
+ m_pattern.m_containsBeginChars = false;
+ }
+ }
+
+private:
+ YarrPattern& m_pattern;
+ PatternAlternative* m_alternative;
+ CharacterClassConstructor m_characterClassConstructor;
+ BeginCharHelper m_beginCharHelper;
+ bool m_invertCharacterClass;
+ bool m_invertParentheticalAssertion;
+};
+
+
+static const char* compile(const UString& patternString, YarrPattern& pattern)
+{
+ YarrPatternConstructor constructor(pattern);
+
+ if (const char* error = parse(constructor, patternString))
+ return error;
+
+ // If the pattern contains illegal backreferences reset & reparse.
+ // Quoting Netscape's "What's new in JavaScript 1.2",
+ // "Note: if the number of left parentheses is less than the number specified
+ // in \#, the \# is taken as an octal escape as described in the next row."
+ if (pattern.containsIllegalBackReference()) {
+ unsigned numSubpatterns = pattern.m_numSubpatterns;
+
+ constructor.reset();
+#if !ASSERT_DISABLED
+ const char* error =
+#endif
+ parse(constructor, patternString, numSubpatterns);
+
+ ASSERT(!error);
+ ASSERT(numSubpatterns == pattern.m_numSubpatterns);
+ }
+
+ constructor.checkForTerminalParentheses();
+ constructor.optimizeBOL();
+
+ constructor.setupOffsets();
+ constructor.setupBeginChars();
+
+ return 0;
+};
+
+YarrPattern::YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error)
+ : m_ignoreCase(ignoreCase)
+ , m_multiline(multiline)
+ , m_containsBackreferences(false)
+ , m_containsBeginChars(false)
+ , m_containsBOL(false)
+ , m_numSubpatterns(0)
+ , m_maxBackReference(0)
+ , newlineCached(0)
+ , digitsCached(0)
+ , spacesCached(0)
+ , wordcharCached(0)
+ , nondigitsCached(0)
+ , nonspacesCached(0)
+ , nonwordcharCached(0)
+{
+ *error = compile(pattern, *this);
+}
+
+} }
diff --git a/Source/JavaScriptCore/yarr/YarrPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h
new file mode 100644
index 0000000..14810c6
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/YarrPattern.h
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef YarrPattern_h
+#define YarrPattern_h
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+#include <UString.h>
+
+namespace JSC { namespace Yarr {
+
+#define YarrStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
+#define YarrStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
+#define YarrStackSpaceForBackTrackInfoBackReference 2
+#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
+#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1
+#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1
+#define YarrStackSpaceForBackTrackInfoParentheses 2
+
+struct PatternDisjunction;
+
+struct CharacterRange {
+ UChar begin;
+ UChar end;
+
+ CharacterRange(UChar begin, UChar end)
+ : begin(begin)
+ , end(end)
+ {
+ }
+};
+
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+ const char* m_table;
+ bool m_inverted;
+ static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+ {
+ return adoptRef(new CharacterClassTable(table, inverted));
+ }
+
+private:
+ CharacterClassTable(const char* table, bool inverted)
+ : m_table(table)
+ , m_inverted(inverted)
+ {
+ }
+};
+
+struct CharacterClass : FastAllocBase {
+ // All CharacterClass instances have to have the full set of matches and ranges,
+ // they may have an optional table for faster lookups (which must match the
+ // specified matches and ranges)
+ CharacterClass(PassRefPtr<CharacterClassTable> table)
+ : m_table(table)
+ {
+ }
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+ RefPtr<CharacterClassTable> m_table;
+};
+
+enum QuantifierType {
+ QuantifierFixedCount,
+ QuantifierGreedy,
+ QuantifierNonGreedy,
+};
+
+struct PatternTerm {
+ enum Type {
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacter,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeForwardReference,
+ TypeParenthesesSubpattern,
+ TypeParentheticalAssertion,
+ } type;
+ bool m_capture :1;
+ bool m_invert :1;
+ union {
+ UChar patternCharacter;
+ CharacterClass* characterClass;
+ unsigned backReferenceSubpatternId;
+ struct {
+ PatternDisjunction* disjunction;
+ unsigned subpatternId;
+ unsigned lastSubpatternId;
+ bool isCopy;
+ bool isTerminal;
+ } parentheses;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ int inputPosition;
+ unsigned frameLocation;
+
+ PatternTerm(UChar ch)
+ : type(PatternTerm::TypePatternCharacter)
+ , m_capture(false)
+ , m_invert(false)
+ {
+ patternCharacter = ch;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(CharacterClass* charClass, bool invert)
+ : type(PatternTerm::TypeCharacterClass)
+ , m_capture(false)
+ , m_invert(invert)
+ {
+ characterClass = charClass;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
+ : type(type)
+ , m_capture(capture)
+ , m_invert(invert)
+ {
+ parentheses.disjunction = disjunction;
+ parentheses.subpatternId = subpatternId;
+ parentheses.isCopy = false;
+ parentheses.isTerminal = false;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, bool invert = false)
+ : type(type)
+ , m_capture(false)
+ , m_invert(invert)
+ {
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(unsigned spatternId)
+ : type(TypeBackReference)
+ , m_capture(false)
+ , m_invert(false)
+ {
+ backReferenceSubpatternId = spatternId;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ static PatternTerm ForwardReference()
+ {
+ return PatternTerm(TypeForwardReference);
+ }
+
+ static PatternTerm BOL()
+ {
+ return PatternTerm(TypeAssertionBOL);
+ }
+
+ static PatternTerm EOL()
+ {
+ return PatternTerm(TypeAssertionEOL);
+ }
+
+ static PatternTerm WordBoundary(bool invert)
+ {
+ return PatternTerm(TypeAssertionWordBoundary, invert);
+ }
+
+ bool invert()
+ {
+ return m_invert;
+ }
+
+ bool capture()
+ {
+ return m_capture;
+ }
+
+ void quantify(unsigned count, QuantifierType type)
+ {
+ quantityCount = count;
+ quantityType = type;
+ }
+};
+
+struct PatternAlternative : FastAllocBase {
+ PatternAlternative(PatternDisjunction* disjunction)
+ : m_parent(disjunction)
+ , m_onceThrough(false)
+ , m_hasFixedSize(false)
+ , m_startsWithBOL(false)
+ , m_containsBOL(false)
+ {
+ }
+
+ PatternTerm& lastTerm()
+ {
+ ASSERT(m_terms.size());
+ return m_terms[m_terms.size() - 1];
+ }
+
+ void removeLastTerm()
+ {
+ ASSERT(m_terms.size());
+ m_terms.shrink(m_terms.size() - 1);
+ }
+
+ void setOnceThrough()
+ {
+ m_onceThrough = true;
+ }
+
+ bool onceThrough()
+ {
+ return m_onceThrough;
+ }
+
+ Vector<PatternTerm> m_terms;
+ PatternDisjunction* m_parent;
+ unsigned m_minimumSize;
+ bool m_onceThrough : 1;
+ bool m_hasFixedSize : 1;
+ bool m_startsWithBOL : 1;
+ bool m_containsBOL : 1;
+};
+
+struct PatternDisjunction : FastAllocBase {
+ PatternDisjunction(PatternAlternative* parent = 0)
+ : m_parent(parent)
+ , m_hasFixedSize(false)
+ {
+ }
+
+ ~PatternDisjunction()
+ {
+ deleteAllValues(m_alternatives);
+ }
+
+ PatternAlternative* addNewAlternative()
+ {
+ PatternAlternative* alternative = new PatternAlternative(this);
+ m_alternatives.append(alternative);
+ return alternative;
+ }
+
+ Vector<PatternAlternative*> m_alternatives;
+ PatternAlternative* m_parent;
+ unsigned m_minimumSize;
+ unsigned m_callFrameSize;
+ bool m_hasFixedSize;
+};
+
+// You probably don't want to be calling these functions directly
+// (please to be calling newlineCharacterClass() et al on your
+// friendly neighborhood YarrPattern instance to get nicely
+// cached copies).
+CharacterClass* newlineCreate();
+CharacterClass* digitsCreate();
+CharacterClass* spacesCreate();
+CharacterClass* wordcharCreate();
+CharacterClass* nondigitsCreate();
+CharacterClass* nonspacesCreate();
+CharacterClass* nonwordcharCreate();
+
+struct TermChain {
+ TermChain(PatternTerm term)
+ : term(term)
+ {}
+
+ PatternTerm term;
+ Vector<TermChain> hotTerms;
+};
+
+struct BeginChar {
+ BeginChar()
+ : value(0)
+ , mask(0)
+ {}
+
+ BeginChar(unsigned value, unsigned mask)
+ : value(value)
+ , mask(mask)
+ {}
+
+ unsigned value;
+ unsigned mask;
+};
+
+struct YarrPattern {
+ YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
+
+ ~YarrPattern()
+ {
+ deleteAllValues(m_disjunctions);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ void reset()
+ {
+ m_numSubpatterns = 0;
+ m_maxBackReference = 0;
+
+ m_containsBackreferences = false;
+ m_containsBeginChars = false;
+ m_containsBOL = false;
+
+ newlineCached = 0;
+ digitsCached = 0;
+ spacesCached = 0;
+ wordcharCached = 0;
+ nondigitsCached = 0;
+ nonspacesCached = 0;
+ nonwordcharCached = 0;
+
+ deleteAllValues(m_disjunctions);
+ m_disjunctions.clear();
+ deleteAllValues(m_userCharacterClasses);
+ m_userCharacterClasses.clear();
+ m_beginChars.clear();
+ }
+
+ bool containsIllegalBackReference()
+ {
+ return m_maxBackReference > m_numSubpatterns;
+ }
+
+ CharacterClass* newlineCharacterClass()
+ {
+ if (!newlineCached)
+ m_userCharacterClasses.append(newlineCached = newlineCreate());
+ return newlineCached;
+ }
+ CharacterClass* digitsCharacterClass()
+ {
+ if (!digitsCached)
+ m_userCharacterClasses.append(digitsCached = digitsCreate());
+ return digitsCached;
+ }
+ CharacterClass* spacesCharacterClass()
+ {
+ if (!spacesCached)
+ m_userCharacterClasses.append(spacesCached = spacesCreate());
+ return spacesCached;
+ }
+ CharacterClass* wordcharCharacterClass()
+ {
+ if (!wordcharCached)
+ m_userCharacterClasses.append(wordcharCached = wordcharCreate());
+ return wordcharCached;
+ }
+ CharacterClass* nondigitsCharacterClass()
+ {
+ if (!nondigitsCached)
+ m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
+ return nondigitsCached;
+ }
+ CharacterClass* nonspacesCharacterClass()
+ {
+ if (!nonspacesCached)
+ m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
+ return nonspacesCached;
+ }
+ CharacterClass* nonwordcharCharacterClass()
+ {
+ if (!nonwordcharCached)
+ m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
+ return nonwordcharCached;
+ }
+
+ bool m_ignoreCase : 1;
+ bool m_multiline : 1;
+ bool m_containsBackreferences : 1;
+ bool m_containsBeginChars : 1;
+ bool m_containsBOL : 1;
+ unsigned m_numSubpatterns;
+ unsigned m_maxBackReference;
+ PatternDisjunction* m_body;
+ Vector<PatternDisjunction*, 4> m_disjunctions;
+ Vector<CharacterClass*> m_userCharacterClasses;
+ Vector<BeginChar> m_beginChars;
+
+private:
+ CharacterClass* newlineCached;
+ CharacterClass* digitsCached;
+ CharacterClass* spacesCached;
+ CharacterClass* wordcharCached;
+ CharacterClass* nondigitsCached;
+ CharacterClass* nonspacesCached;
+ CharacterClass* nonwordcharCached;
+};
+
+} } // namespace JSC::Yarr
+
+#endif // YarrPattern_h
diff --git a/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp
index a023082..51fda94 100644
--- a/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp
+++ b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp
@@ -26,7 +26,7 @@
#include "config.h"
#include "YarrSyntaxChecker.h"
-#include "RegexParser.h"
+#include "YarrParser.h"
namespace JSC { namespace Yarr {
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list