Index: tests/phpunit/includes/parser/NestingTest.php =================================================================== --- tests/phpunit/includes/parser/NestingTest.php (revision 0) +++ tests/phpunit/includes/parser/NestingTest.php (revision 0) @@ -0,0 +1,62 @@ +123", "foo", "123", array() ), + array( "foo", "foo", "foo", array() ), + array( "AfooB", "foo", "foo", array() ), + array( "A123456789B", "foo", "123456789", array( "a" => "1", "b" => "foo" ) ), + array( "" . + "" . + "" . + "", + "ref", "", array( "name" => "citation:1" ) + ), + + array( "", "ref", "", array( "name" => "x", "foo" => "foo" ) ) + + + ); + } + + function provideNotWorkingCases() { + return array( + array( "123", "foo", null, null ), + array( "123456", "foo", "123456", array( "foo" => "123" ) ), + ); + } + + var $mInnerText; + var $mParams; + + /** + * @dataProvider provideCases + */ + function testCases( $wikiText, $tag, $expectedInnerText, $expectedParams ) { + global $wgParserConf; + $parser = new Parser( $wgParserConf ); + + $parser->setHook( $tag, array( $this, 'tagCallback' ) ); + $parserOutput = $parser->parse( $wikiText, Title::newFromText( 'Test' ), new ParserOptions ); + + $this->assertEquals( $expectedInnerText, $this->mInnerText ); + $this->assertEquals( $expectedParams, $this->mParams ); + + $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle + } + + function tagCallback( $innerText, $params, $parser ) { + $this->mInnerText = $innerText; + $this->mParams = $params; + + return "dummy"; + } +} Index: includes/parser/Preprocessor_DOM.php =================================================================== --- includes/parser/Preprocessor_DOM.php (revision 110110) +++ includes/parser/Preprocessor_DOM.php (working copy) @@ -413,18 +413,51 @@ $close = ''; } else { $attrEnd = $tagEndPos; - // Find closing tag - if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", - $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) - { - $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); - $i = $matches[0][1] + strlen( $matches[0][0] ); - $close = '' . htmlspecialchars( $matches[0][0] ) . ''; - } else { - // No end tag -- let it run out to the end of the text. + + // we know that one start tag was already found + $numOfStartTags = 1; + $numOfEndTags = 0; + $offset = $tagEndPos; + $pattern = "/<" . preg_quote( $name, '/' ) . "[^>]*>" . "|" . "<\/" . preg_quote( $name, '/' ) . "\s*>/i"; + + while( $numOfStartTags != $numOfEndTags ) { + // match start tag or end tag + if( preg_match( $pattern, $text, $matches, PREG_OFFSET_CAPTURE, $offset + 1 ) === 0 ) { + // nothing found + break; + } + + // what is it, start or end tag? + if( substr( $matches[ 0 ][ 0 ], 0, 2 ) === "' . htmlspecialchars( $matches[0][0] ) . ''; + + } else { + // not balanced, at least one start tag is orphan + $inner = substr( $text, $tagEndPos + 1, $offset - ( $tagEndPos + 1 ) ); + $i = $offset + strlen( $previousMatches[0][0] ); + $close = '' . htmlspecialchars( $previousMatches[0][0] ) . ''; + } } } // and just become tags