Index: tests/phpunit/includes/parser/NestingTest.php
===================================================================
--- tests/phpunit/includes/parser/NestingTest.php (revision 0)
+++ tests/phpunit/includes/parser/NestingTest.php (revision 0)
@@ -0,0 +1,62 @@
+123", "foo", "123", array() ),
+ array( "foo", "foo", "foo", array() ),
+ array( "AfooB", "foo", "foo", array() ),
+ array( "A123456789B", "foo", "123456789", array( "a" => "1", "b" => "foo" ) ),
+ array( "" .
+ "[" .
+ "][" .
+ "]",
+ "ref", "[", array( "name" => "citation:1" )
+ ),
+
+ array( "]", "ref", "", array( "name" => "x", "foo" => "foo" ) )
+
+
+ );
+ }
+
+ function provideNotWorkingCases() {
+ return array(
+ array( "123", "foo", null, null ),
+ array( "123456", "foo", "123456", array( "foo" => "123" ) ),
+ );
+ }
+
+ var $mInnerText;
+ var $mParams;
+
+ /**
+ * @dataProvider provideCases
+ */
+ function testCases( $wikiText, $tag, $expectedInnerText, $expectedParams ) {
+ global $wgParserConf;
+ $parser = new Parser( $wgParserConf );
+
+ $parser->setHook( $tag, array( $this, 'tagCallback' ) );
+ $parserOutput = $parser->parse( $wikiText, Title::newFromText( 'Test' ), new ParserOptions );
+
+ $this->assertEquals( $expectedInnerText, $this->mInnerText );
+ $this->assertEquals( $expectedParams, $this->mParams );
+
+ $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle
+ }
+
+ function tagCallback( $innerText, $params, $parser ) {
+ $this->mInnerText = $innerText;
+ $this->mParams = $params;
+
+ return "dummy";
+ }
+}
Index: includes/parser/Preprocessor_DOM.php
===================================================================
--- includes/parser/Preprocessor_DOM.php (revision 110110)
+++ includes/parser/Preprocessor_DOM.php (working copy)
@@ -413,18 +413,51 @@
$close = '';
} else {
$attrEnd = $tagEndPos;
- // Find closing tag
- if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
- $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
- {
- $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
- $i = $matches[0][1] + strlen( $matches[0][0] );
- $close = '' . htmlspecialchars( $matches[0][0] ) . '';
- } else {
- // No end tag -- let it run out to the end of the text.
+
+ // we know that one start tag was already found
+ $numOfStartTags = 1;
+ $numOfEndTags = 0;
+ $offset = $tagEndPos;
+ $pattern = "/<" . preg_quote( $name, '/' ) . "[^>]*>" . "|" . "<\/" . preg_quote( $name, '/' ) . "\s*>/i";
+
+ while( $numOfStartTags != $numOfEndTags ) {
+ // match start tag or end tag
+ if( preg_match( $pattern, $text, $matches, PREG_OFFSET_CAPTURE, $offset + 1 ) === 0 ) {
+ // nothing found
+ break;
+ }
+
+ // what is it, start or end tag?
+ if( substr( $matches[ 0 ][ 0 ], 0, 2 ) === "" ) {
+ $numOfEndTags++;
+ } else {
+ $numOfStartTags++;
+ }
+
+ $previousMatches = $matches;
+ $offset = $matches[ 0 ][ 1 ];
+ }
+
+ if( $numOfEndTags === 0 ) {
+ // no end tag, swallow everything
$inner = substr( $text, $tagEndPos + 1 );
$i = strlen( $text );
$close = '';
+
+ } else {
+
+ if( $numOfStartTags === $numOfEndTags ) {
+ // match made in heaven, all start tags have end tags
+ $inner = substr( $text, $tagEndPos + 1, $offset - ( $tagEndPos + 1 ) );
+ $i = $offset + strlen( $matches[0][0] );
+ $close = '' . htmlspecialchars( $matches[0][0] ) . '';
+
+ } else {
+ // not balanced, at least one start tag is orphan
+ $inner = substr( $text, $tagEndPos + 1, $offset - ( $tagEndPos + 1 ) );
+ $i = $offset + strlen( $previousMatches[0][0] );
+ $close = '' . htmlspecialchars( $previousMatches[0][0] ) . '';
+ }
}
}
// and just become tags