From 271b680c089ebc1ee25acac764458c3495865b17 Mon Sep 17 00:00:00 2001 From: csteipp Date: Tue, 3 Feb 2015 17:45:05 -0800 Subject: [PATCH] SECURITY: Always expand xml entities when checking SVG's Add an XmlReaderTypeCheck that is based on php's XMLReader instead of xml_parse, and use it where XmlTypeCheck was being used for filtering SVG's. XmlTypeCheck's use of xml_parse for filtering SVG's sometimes leaves xml entities unexpanded, which can lead to false-negatives. Bug: T88310 Change-Id: I77c77a2d6d22f549e7ef969811f7edd77a45dbba --- autoload.php | 1 + includes/libs/XmlReaderTypeCheck.php | 334 +++++++++++++++++++++ includes/upload/UploadBase.php | 2 +- .../includes/libs/XmlReaderTypeCheckTest.php | 49 +++ tests/phpunit/includes/libs/XmlTypeCheckTest.php | 19 ++ 5 files changed, 404 insertions(+), 1 deletion(-) create mode 100644 includes/libs/XmlReaderTypeCheck.php create mode 100644 tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php diff --git a/autoload.php b/autoload.php index 11b5266..5facd2c 100644 --- a/autoload.php +++ b/autoload.php @@ -1334,6 +1334,7 @@ $wgAutoloadLocalClasses = array( 'XmlJsCode' => __DIR__ . '/includes/Xml.php', 'XmlSelect' => __DIR__ . '/includes/Xml.php', 'XmlTypeCheck' => __DIR__ . '/includes/libs/XmlTypeCheck.php', + 'XmlReaderTypeCheck' => __DIR__ . '/includes/libs/XmlReaderTypeCheck.php', 'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php', 'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php', 'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReader.php', diff --git a/includes/libs/XmlReaderTypeCheck.php b/includes/libs/XmlReaderTypeCheck.php new file mode 100644 index 0000000..e859bac --- /dev/null +++ b/includes/libs/XmlReaderTypeCheck.php @@ -0,0 +1,334 @@ + '', + ); + + /** + * @param string $input a filename or string containing the XML element + * @param callable $filterCallback (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, attributes, and text contents. + * Filter should return 'true' to toggle on $this->filterMatch + * @param bool $isFile (optional) indicates if the first parameter is a + * filename (default, true) or if it is a string (false) + * @param array $options list of additional parsing options: + * processing_instruction_handler: Callback for xml_set_processing_instruction_handler + */ + function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) { + $this->filterCallback = $filterCallback ?: function() { return false; } ; + $this->parserOptions = array_merge( $this->parserOptions, $options ); + $this->validateFromInput( $input, $isFile ); + } + + /** + * Alternative constructor: from filename + * + * @param string $fname the filename of an XML document + * @param callable $filterCallback (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, and attributes, but not to text contents. + * Filter should return 'true' to toggle on $this->filterMatch + * @return XmlReaderTypeCheck + */ + public static function newFromFilename( $fname, $filterCallback = null ) { + return new self( $fname, $filterCallback, true ); + } + + /** + * Alternative constructor: from string + * + * @param string $string a string containing an XML element + * @param callable $filterCallback (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, and attributes, but not to text contents. + * Filter should return 'true' to toggle on $this->filterMatch + * @return XmlReaderTypeCheck + */ + public static function newFromString( $string, $filterCallback = null ) { + return new self( $string, $filterCallback, false ); + } + + /** + * Get the root element. Simple accessor to $rootElement + * + * @return string + */ + public function getRootElement() { + return $this->rootElement; + } + + + /** + * @param string $fname the filename + */ + private function validateFromInput( $xml, $isFile ) { + $reader = new XMLReader(); + if ( $isFile ) { + $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } else { + $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } + if ( $s !== true ) { + // Couldn't open the XML + wfDebugLog( "XRTC", __METHOD__ . ": malformed, source won't open" ); + $this->wellFormed = false; + } else { + $oldDisable = libxml_disable_entity_loader( true ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); + try { + $this->validate( $reader ); + } catch ( Exception $e ) { + // Calling this malformed, because we didn't parse the whole + // thing. Maybe just an external entity refernce. + wfDebugLog( "XRTC", __METHOD__ . ": malformed, exception during parse" ); + $this->wellFormed = false; + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); + throw $e; + } + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); + } + } + + private function readNext( XMLReader $reader ) { + set_error_handler ( array( $this, 'XmlErrorHandler' ) ); + $ret = $reader->read(); + restore_error_handler(); + return $ret; + } + + public function XmlErrorHandler( $errno , $errstr ) { + $this->wellFormed = false; + } + + private function validate( $reader ) { + + // First, move through anything that isn't an element, and + // handle any processing instructions with the callback + do { + if( !$this->readNext( $reader ) ) { + // Hit the end of the document before any elements + wfDebugLog( "XRTC", __METHOD__ . ": malformed, no elements" ); + $this->wellFormed = false; + return; + } + if ( $reader->nodeType === XMLReader::PI ) { + $this->processingInstructionHandler( $reader->name, $reader->value ); + } + } while ( $reader->nodeType != XMLReader::ELEMENT ); + + // Process the rest of the document + do { + switch ( $reader->nodeType ) { + case XMLReader::ELEMENT: + $name = $this->expandNS( + $reader->name, + $reader->namespaceURI + ); + if ( $this->rootElement === '' ) { + $this->rootElement = $name; + } + $empty = $reader->isEmptyElement; + $attrs = $this->getAttributesArray( $reader ); + $this->elementOpen( $name, $attrs ); + if ( $empty ) { + $this->elementClose(); + } + break; + + case XMLReader::END_ELEMENT: + $this->elementClose(); + break; + + case XMLReader::WHITESPACE: + case XMLReader::SIGNIFICANT_WHITESPACE: + case XMLReader::CDATA: + case XMLReader::TEXT: + $this->elementData( $reader->value ); + break; + + case XMLReader::ENTITY_REF: + // Unexpanded entity (maybe external?), + // don't send to the filter (xml_parse didn't) + break; + + case XMLReader::COMMENT: + // Don't send to the filter (xml_parse didn't) + break; + + case XMLReader::PI: + // Processing instructions can happen after the header too + $this->processingInstructionHandler( + $reader->name, + $reader->value + ); + break; + default: + // One of COMMENT, DOC, DOC_TYPE, ENTITY, END_ENTITY, + // NOTATION, or XML_DECLARATION + // xml_parse didn't send these to the filter, so we won't. + } + + } while ( $this->readNext( $reader ) ); + + if ( $this->stackDepth !== 0 ) { + wfDebugLog( "XRTC", __METHOD__ . ": malformed, non-zero stack at end" ); + $this->wellFormed = false; + } elseif ( $this->wellFormed === null ) { + $this->wellFormed = true; + } + + } + + /** + * Get all of the attributes for an XMLReader's current node + * @param $r XMLReader + * @return array of attributes + */ + private function getAttributesArray( XMLReader $r ) { + $attrs = array(); + while ( $r->moveToNextAttribute() ) { + if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) { + // XMLReader treats xmlns attributes as normal + // attributes, while xml_parse doesn't + continue; + } + $name = $this->expandNS( $r->name, $r->namespaceURI ); + $attrs[$name] = $r->value; + } + return $attrs; + } + + /** + * @param $name element or attribute name, maybe with a full or short prefix + * @param $namespaceURI the namespaceURI + * @return string the name prefixed with namespaceURI + */ + private function expandNS( $name, $namespaceURI ) { + if ( $namespaceURI ) { + $localname = array_pop( explode( ':', $name ) ); + return "$namespaceURI:$localname"; + } + return $name; + } + + /** + * @param $name + * @param $attribs + */ + private function elementOpen( $name, $attribs ) { + $this->elementDataContext[] = array( $name, $attribs ); + $this->elementData[] = ''; + $this->stackDepth++; + } + + /** + */ + private function elementClose() { + list( $name, $attribs ) = array_pop( $this->elementDataContext ); + $data = array_pop( $this->elementData ); + $this->stackDepth--; + + if ( call_user_func( + $this->filterCallback, + $name, + $attribs, + $data + ) ) { + // Filter hit + $this->filterMatch = true; + } + } + + /** + * @param $data + */ + private function elementData( $data ) { + // Collect any data here, and we'll run the callback in elementClose + $this->elementData[ $this->stackDepth - 1 ] .= trim( $data ); + } + + /** + * @param $target + * @param $data + */ + private function processingInstructionHandler( $target, $data ) { + if ( $this->parserOptions['processing_instruction_handler'] ) { + if ( call_user_func( + $this->parserOptions['processing_instruction_handler'], + $target, + $data + ) ) { + // Filter hit! + $this->filterMatch = true; + } + } + } +} diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php index e27b709..f6eea62 100644 --- a/includes/upload/UploadBase.php +++ b/includes/upload/UploadBase.php @@ -1249,7 +1249,7 @@ abstract class UploadBase { */ protected function detectScriptInSvg( $filename, $partial ) { $this->mSVGNSError = false; - $check = new XmlTypeCheck( + $check = new XmlReaderTypeCheck( $filename, array( $this, 'checkSvgScriptCallback' ), true, diff --git a/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php b/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php new file mode 100644 index 0000000..ed1f58e --- /dev/null +++ b/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php @@ -0,0 +1,49 @@ +"; + const MAL_FORMED_XML = ""; + const XML_WITH_PIH = ''; + + /** + * @covers XMLTypeCheck::newFromString + * @covers XMLTypeCheck::getRootElement + */ + public function testWellFormedXML() { + $testXML = XmlReaderTypeCheck::newFromString( self::WELL_FORMED_XML ); + $this->assertTrue( $testXML->wellFormed ); + $this->assertEquals( 'root', $testXML->getRootElement() ); + } + + /** + * @covers XMLTypeCheck::newFromString + */ + public function testMalFormedXML() { + $testXML = XmlReaderTypeCheck::newFromString( self::MAL_FORMED_XML ); + $this->assertFalse( $testXML->wellFormed ); + } + + /** + * @covers XMLTypeCheck::processingInstructionHandler + */ + public function testProcessingInstructionHandler() { + $called = false; + $testXML = new XmlReaderTypeCheck( + self::XML_WITH_PIH, + null, + false, + array( + 'processing_instruction_handler' => function() use ( &$called ) { + $called = true; + } + ) + ); + $this->assertTrue( $called ); + } + +} diff --git a/tests/phpunit/includes/libs/XmlTypeCheckTest.php b/tests/phpunit/includes/libs/XmlTypeCheckTest.php index e7b3e77..f0ba934 100644 --- a/tests/phpunit/includes/libs/XmlTypeCheckTest.php +++ b/tests/phpunit/includes/libs/XmlTypeCheckTest.php @@ -8,6 +8,7 @@ class XmlTypeCheckTest extends PHPUnit_Framework_TestCase { const WELL_FORMED_XML = ""; const MAL_FORMED_XML = ""; + const XML_WITH_PIH = ''; /** * @covers XMLTypeCheck::newFromString @@ -27,4 +28,22 @@ class XmlTypeCheckTest extends PHPUnit_Framework_TestCase { $this->assertFalse( $testXML->wellFormed ); } + /** + * @covers XMLTypeCheck::processingInstructionHandler + */ + public function testProcessingInstructionHandler() { + $called = false; + $testXML = new XmlTypeCheck( + self::XML_WITH_PIH, + null, + false, + array( + 'processing_instruction_handler' => function() use ( &$called ) { + $called = true; + } + ) + ); + $this->assertTrue( $called ); + } + } -- 1.8.4.5