From 185547a4119f067017ff0a36dccf21d263d71a7b Mon Sep 17 00:00:00 2001 From: csteipp Date: Fri, 27 Mar 2015 15:08:52 -0700 Subject: [PATCH] SECURITY: Always expand xml entities when checking SVG's XmlTypeCheck's use of xml_parse for filtering SVG's sometimes left xml entities unexpanded, which can lead to false-negatives when the callback was used for filtering. Update XmlTypeCheck to use XMLReader instead, tell the library to fully expand entities, and rely on the library to error out if it encounters XML that is likely to cause a DoS if parsed. Bug: T88310 Change-Id: I77c77a2d6d22f549e7ef969811f7edd77a45dbba --- includes/libs/XmlTypeCheck.php | 251 +++++++++++++++-------- tests/phpunit/includes/XmlTypeCheckTest.php | 19 ++ tests/phpunit/includes/upload/UploadBaseTest.php | 13 +- 3 files changed, 192 insertions(+), 91 deletions(-) diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php index aca857e..31a4e28 100644 --- a/includes/libs/XmlTypeCheck.php +++ b/includes/libs/XmlTypeCheck.php @@ -2,6 +2,11 @@ /** * XML syntax and type checker. * + * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us + * more control over the expansion of XML entities. When passed to the + * callback, entities will be fully expanded, but may report the XML is + * invalid if expanding the entities are likely to cause a DoS. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -25,7 +30,7 @@ class XmlTypeCheck { * Will be set to true or false to indicate whether the file is * well-formed XML. Note that this doesn't check schema validity. */ - public $wellFormed = false; + public $wellFormed = null; /** * Will be set to true if the optional element filter returned @@ -78,12 +83,7 @@ class XmlTypeCheck { function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) { $this->filterCallback = $filterCallback; $this->parserOptions = array_merge( $this->parserOptions, $options ); - - if ( $isFile ) { - $this->validateFromFile( $input ); - } else { - $this->validateFromString( $input ); - } + $this->validateFromInput( $input, $isFile ); } /** @@ -125,140 +125,211 @@ class XmlTypeCheck { return $this->rootElement; } + /** - * Get an XML parser with the root element handler. - * @see XmlTypeCheck::rootElementOpen() - * @return resource a resource handle for the XML parser + * @param string $fname the filename */ - private function getParser() { - $parser = xml_parser_create_ns( 'UTF-8' ); - // case folding violates XML standard, turn it off - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false ); - if ( $this->parserOptions['processing_instruction_handler'] ) { - xml_set_processing_instruction_handler( - $parser, - array( $this, 'processingInstructionHandler' ) - ); + private function validateFromInput( $xml, $isFile ) { + $reader = new XMLReader(); + if ( $isFile ) { + $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } else { + $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } + if ( $s !== true ) { + // Couldn't open the XML + $this->wellFormed = false; + } else { + $oldDisable = libxml_disable_entity_loader( true ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); + try { + $this->validate( $reader ); + } catch ( Exception $e ) { + // Calling this malformed, because we didn't parse the whole + // thing. Maybe just an external entity refernce. + $this->wellFormed = false; + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); + throw $e; + } + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); } - return $parser; } - /** - * @param string $fname the filename - */ - private function validateFromFile( $fname ) { - $parser = $this->getParser(); - - if ( file_exists( $fname ) ) { - $file = fopen( $fname, "rb" ); - if ( $file ) { - do { - $chunk = fread( $file, 32768 ); - $ret = xml_parse( $parser, $chunk, feof( $file ) ); - if ( $ret == 0 ) { - $this->wellFormed = false; - fclose( $file ); - xml_parser_free( $parser ); - return; + private function readNext( XMLReader $reader ) { + set_error_handler( array( $this, 'XmlErrorHandler' ) ); + $ret = $reader->read(); + restore_error_handler(); + return $ret; + } + + public function XmlErrorHandler( $errno, $errstr ) { + $this->wellFormed = false; + } + + private function validate( $reader ) { + + // First, move through anything that isn't an element, and + // handle any processing instructions with the callback + do { + if( !$this->readNext( $reader ) ) { + // Hit the end of the document before any elements + $this->wellFormed = false; + return; + } + if ( $reader->nodeType === XMLReader::PI ) { + $this->processingInstructionHandler( $reader->name, $reader->value ); + } + } while ( $reader->nodeType != XMLReader::ELEMENT ); + + // Process the rest of the document + do { + switch ( $reader->nodeType ) { + case XMLReader::ELEMENT: + $name = $this->expandNS( + $reader->name, + $reader->namespaceURI + ); + if ( $this->rootElement === '' ) { + $this->rootElement = $name; } - } while ( !feof( $file ) ); + $empty = $reader->isEmptyElement; + $attrs = $this->getAttributesArray( $reader ); + $this->elementOpen( $name, $attrs ); + if ( $empty ) { + $this->elementClose(); + } + break; + + case XMLReader::END_ELEMENT: + $this->elementClose(); + break; + + case XMLReader::WHITESPACE: + case XMLReader::SIGNIFICANT_WHITESPACE: + case XMLReader::CDATA: + case XMLReader::TEXT: + $this->elementData( $reader->value ); + break; - fclose( $file ); + case XMLReader::ENTITY_REF: + // Unexpanded entity (maybe external?), + // don't send to the filter (xml_parse didn't) + break; + + case XMLReader::COMMENT: + // Don't send to the filter (xml_parse didn't) + break; + + case XMLReader::PI: + // Processing instructions can happen after the header too + $this->processingInstructionHandler( + $reader->name, + $reader->value + ); + break; + default: + // One of DOC, DOC_TYPE, ENTITY, END_ENTITY, + // NOTATION, or XML_DECLARATION + // xml_parse didn't send these to the filter, so we won't. } + + } while ( $this->readNext( $reader ) ); + + if ( $this->stackDepth !== 0 ) { + $this->wellFormed = false; + } elseif ( $this->wellFormed === null ) { + $this->wellFormed = true; } - $this->wellFormed = true; - xml_parser_free( $parser ); } /** - * - * @param string $string the XML-input-string to be checked. + * Get all of the attributes for an XMLReader's current node + * @param $r XMLReader + * @return array of attributes */ - private function validateFromString( $string ) { - $parser = $this->getParser(); - $ret = xml_parse( $parser, $string, true ); - xml_parser_free( $parser ); - if ( $ret == 0 ) { - $this->wellFormed = false; - return; + private function getAttributesArray( XMLReader $r ) { + $attrs = array(); + while ( $r->moveToNextAttribute() ) { + if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) { + // XMLReader treats xmlns attributes as normal + // attributes, while xml_parse doesn't + continue; + } + $name = $this->expandNS( $r->name, $r->namespaceURI ); + $attrs[$name] = $r->value; } - $this->wellFormed = true; + return $attrs; } /** - * @param $parser - * @param $name - * @param $attribs + * @param $name element or attribute name, maybe with a full or short prefix + * @param $namespaceURI the namespaceURI + * @return string the name prefixed with namespaceURI */ - private function rootElementOpen( $parser, $name, $attribs ) { - $this->rootElement = $name; - - if ( is_callable( $this->filterCallback ) ) { - xml_set_element_handler( - $parser, - array( $this, 'elementOpen' ), - array( $this, 'elementClose' ) - ); - xml_set_character_data_handler( $parser, array( $this, 'elementData' ) ); - $this->elementOpen( $parser, $name, $attribs ); - } else { - // We only need the first open element - xml_set_element_handler( $parser, false, false ); + private function expandNS( $name, $namespaceURI ) { + if ( $namespaceURI ) { + $parts = explode( ':', $name ); + $localname = array_pop( $parts ); + return "$namespaceURI:$localname"; } + return $name; } /** - * @param $parser * @param $name * @param $attribs */ - private function elementOpen( $parser, $name, $attribs ) { + private function elementOpen( $name, $attribs ) { $this->elementDataContext[] = array( $name, $attribs ); $this->elementData[] = ''; $this->stackDepth++; } /** - * @param $parser - * @param $name */ - private function elementClose( $parser, $name ) { + private function elementClose() { list( $name, $attribs ) = array_pop( $this->elementDataContext ); $data = array_pop( $this->elementData ); $this->stackDepth--; - if ( call_user_func( - $this->filterCallback, - $name, - $attribs, - $data - ) ) { - // Filter hit! + if ( is_callable( $this->filterCallback ) + && call_user_func( + $this->filterCallback, + $name, + $attribs, + $data + ) + ) { + // Filter hit $this->filterMatch = true; } } /** - * @param $parser * @param $data */ - private function elementData( $parser, $data ) { - // xml_set_character_data_handler breaks the data on & characters, so - // we collect any data here, and we'll run the callback in elementClose + private function elementData( $data ) { + // Collect any data here, and we'll run the callback in elementClose $this->elementData[ $this->stackDepth - 1 ] .= trim( $data ); } /** - * @param $parser * @param $target * @param $data */ - private function processingInstructionHandler( $parser, $target, $data ) { - if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) { - // Filter hit! - $this->filterMatch = true; + private function processingInstructionHandler( $target, $data ) { + if ( $this->parserOptions['processing_instruction_handler'] ) { + if ( call_user_func( + $this->parserOptions['processing_instruction_handler'], + $target, + $data + ) ) { + // Filter hit! + $this->filterMatch = true; + } } } } diff --git a/tests/phpunit/includes/XmlTypeCheckTest.php b/tests/phpunit/includes/XmlTypeCheckTest.php index 8d6f1ed..6ad97fd 100644 --- a/tests/phpunit/includes/XmlTypeCheckTest.php +++ b/tests/phpunit/includes/XmlTypeCheckTest.php @@ -8,6 +8,7 @@ class XmlTypeCheckTest extends MediaWikiTestCase { const WELL_FORMED_XML = ""; const MAL_FORMED_XML = ""; + const XML_WITH_PIH = ''; /** * @covers XMLTypeCheck::newFromString @@ -27,4 +28,22 @@ class XmlTypeCheckTest extends MediaWikiTestCase { $this->assertFalse( $testXML->wellFormed ); } + /** + * @covers XMLTypeCheck::processingInstructionHandler + */ + public function testProcessingInstructionHandler() { + $called = false; + $testXML = new XmlTypeCheck( + self::XML_WITH_PIH, + null, + false, + array( + 'processing_instruction_handler' => function() use ( &$called ) { + $called = true; + } + ) + ); + $this->assertTrue( $called ); + } + } diff --git a/tests/phpunit/includes/upload/UploadBaseTest.php b/tests/phpunit/includes/upload/UploadBaseTest.php index a40dd50..3d3b006 100644 --- a/tests/phpunit/includes/upload/UploadBaseTest.php +++ b/tests/phpunit/includes/upload/UploadBaseTest.php @@ -367,6 +367,18 @@ class UploadBaseTest extends MediaWikiTestCase { true, 'SVG with data:text/html link target (firefox only)' ), + array( + ' ]> &lol2; ', + true, + true, + 'SVG with encoded script tag in internal entity (reported by Beyond Security)' + ), + array( + ' ]> &foo; ', + false, + false, + 'SVG with external entity' + ), // Test good, but strange files that we want to allow array( @@ -381,7 +393,6 @@ class UploadBaseTest extends MediaWikiTestCase { false, 'SVG with local urls, including filter: in style' ), - ); } } -- 1.8.4.5