From a9852fead009bf82c6a9e2259aff081f3225b574 Mon Sep 17 00:00:00 2001 From: csteipp Date: Fri, 27 Mar 2015 16:27:19 -0700 Subject: [PATCH] SECURITY: Don't allow entities in XMP Test for, and refuse to parse, XMP chunks with a doctype declaration when parsing XMP. Bug: T85848 Change-Id: Iea4feb077ee85a35509a920153daaa9321ee69f3 --- includes/media/BitmapMetadataHandler.php | 6 +- includes/media/JpegMetadataExtractor.php | 2 +- includes/media/XMP.php | 96 ++++++++++++++++++++++ tests/phpunit/data/xmp/doctype-included.result.php | 3 + tests/phpunit/data/xmp/doctype-included.xmp | 12 +++ tests/phpunit/data/xmp/doctype-not-included.xmp | 11 +++ tests/phpunit/includes/media/XMPTest.php | 49 +++++++++++ 7 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 tests/phpunit/data/xmp/doctype-included.result.php create mode 100644 tests/phpunit/data/xmp/doctype-included.xmp create mode 100644 tests/phpunit/data/xmp/doctype-not-included.xmp diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php index 746dddd..566018c 100644 --- a/includes/media/BitmapMetadataHandler.php +++ b/includes/media/BitmapMetadataHandler.php @@ -126,7 +126,7 @@ class BitmapMetadataHandler { * @throws MWException on invalid file. */ static function Jpeg ( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $seg = JpegMetadataExtractor::segmentSplitter( $filename ); @@ -168,7 +168,7 @@ class BitmapMetadataHandler { * @return Array Array for storage in img_metadata. */ static public function PNG ( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $array = PNGMetadataExtractor::getMetadata( $filename ); @@ -205,7 +205,7 @@ class BitmapMetadataHandler { $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' ); } - if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) { + if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { $xmp = new XMPReader(); $xmp->parse( $baseArray['xmp'] ); $xmpRes = $xmp->getResults(); diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php index 224b4a2..7cbd2e9 100644 --- a/includes/media/JpegMetadataExtractor.php +++ b/includes/media/JpegMetadataExtractor.php @@ -24,7 +24,7 @@ class JpegMetadataExtractor { * @throws MWException if given invalid file. */ static function segmentSplitter ( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $segmentCount = 0; diff --git a/includes/media/XMP.php b/includes/media/XMP.php index 0dbf563..3a4a915 100644 --- a/includes/media/XMP.php +++ b/includes/media/XMP.php @@ -40,6 +40,12 @@ class XMPReader { protected $items; + /** @var int Flag determining if the XMP is safe to parse **/ + private $parsable = 0; + + /** @var string Buffer of XML to parse **/ + private $xmlParsableBuffer = ''; + /** * These are various mode constants. * they are used to figure out what to do @@ -69,6 +75,12 @@ class XMPReader { const NS_XML = 'http://www.w3.org/XML/1998/namespace'; + // States used while determining if XML is safe to parse + const PARSABLE_UNKNOWN = 0; + const PARSABLE_OK = 1; + const PARSABLE_BUFFERING = 2; + const PARSABLE_NO = 3; + /** * Constructor. * @@ -106,6 +118,9 @@ class XMPReader { array( $this, 'endElement' ) ); xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); + + $this->parsable = self::PARSABLE_UNKNOWN; + $this->xmlParsableBuffer = ''; } /** Destroy the xml parser @@ -117,6 +132,13 @@ class XMPReader { xml_parser_free( $this->xmlParser ); } + /** + * Check if this instance supports using this class + */ + public static function isSupported() { + return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' ); + } + /** Get the result array. Do some post-processing before returning * the array, and transform any metadata that is special-cased. * @@ -263,6 +285,27 @@ class XMPReader { wfRestoreWarnings(); } + // Ensure the XMP block does not have an xml doctype declaration, which + // could declare entities unsafe to parse with xml_parse (T85848/T71210). + if ( $this->parsable !== self::PARSABLE_OK ) { + if ( $this->parsable === self::PARSABLE_NO ) { + throw new MWException( 'Unsafe doctype declaration in XML.' ); + } + + $content = $this->xmlParsableBuffer . $content; + if ( !$this->checkParseSafety( $content ) ) { + if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { + // parse wasn't Unsuccessful yet, so return true + // in this case. + return true; + } + $msg = ( $this->parsable === self::PARSABLE_NO ) ? + 'Unsafe doctype declaration in XML.' : + 'No root element found in XML.'; + throw new MWException( $msg ); + } + } + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); if ( !$ok ) { $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); @@ -385,6 +428,59 @@ class XMPReader { } + /** + * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't + * contain a doctype declaration which could contain a dos attack if we + * parse it and expand internal entities (T85848). + * + * @param string $content xml string to check for parse safety + * @return bool true if the xml is safe to parse, false otherwise + */ + private function checkParseSafety( $content ) { + $reader = new XMLReader(); + $result = null; + + // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed + // instead of using XML(). + $reader->open( + 'data://text/plain,' . urlencode( $content ), + null, + LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET + ); + + $oldDisable = libxml_disable_entity_loader( true ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); + + // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning + // when parsing truncated XML, which causes unit tests to fail. + wfSuppressWarnings(); + while ( $reader->read() ) { + if ( $reader->nodeType === XMLReader::ELEMENT ) { + // Reached the first element without hitting a doctype declaration + $this->parsable = self::PARSABLE_OK; + $result = true; + break; + } + if ( $reader->nodeType === XMLReader::DOC_TYPE ) { + $this->parsable = self::PARSABLE_NO; + $result = false; + break; + } + } + wfRestoreWarnings(); + libxml_disable_entity_loader( $oldDisable ); + + if ( !is_null( $result ) ) { + return $result; + } + + // Reached the end of the parsable xml without finding an element + // or doctype. Buffer and try again. + $this->parsable = self::PARSABLE_BUFFERING; + $this->xmlParsableBuffer = $content; + return false; + } + /** When we hit a closing element in MODE_IGNORE * Check to see if this is the element we started to ignore, * in which case we get out of MODE_IGNORE diff --git a/tests/phpunit/data/xmp/doctype-included.result.php b/tests/phpunit/data/xmp/doctype-included.result.php new file mode 100644 index 0000000..9a9cc36 --- /dev/null +++ b/tests/phpunit/data/xmp/doctype-included.result.php @@ -0,0 +1,3 @@ + ]> + + + + +True 0 1 False False + + diff --git a/tests/phpunit/data/xmp/doctype-not-included.xmp b/tests/phpunit/data/xmp/doctype-not-included.xmp new file mode 100644 index 0000000..9a40b4b --- /dev/null +++ b/tests/phpunit/data/xmp/doctype-not-included.xmp @@ -0,0 +1,11 @@ + + + + +True 0 1 False False + + diff --git a/tests/phpunit/includes/media/XMPTest.php b/tests/phpunit/includes/media/XMPTest.php index c952b23..01cd657 100644 --- a/tests/phpunit/includes/media/XMPTest.php +++ b/tests/phpunit/includes/media/XMPTest.php @@ -53,6 +53,8 @@ class XMPTest extends MediaWikiTestCase { array( 'utf32LE', 'UTF-32LE encoding' ), array( 'xmpExt', 'Extended XMP missing second part' ), ); + $xmpFiles[] = array( 'doctype-included', 'XMP includes doctype' ); + foreach( $xmpFiles as $file ) { $xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' ); // I'm not sure if this is the best way to handle getting the @@ -154,4 +156,51 @@ class XMPTest extends MediaWikiTestCase { $this->assertEquals( $expected, $actual ); } + /** + * Test for multi-section, hostile XML + * @covers checkParseSafety + */ + public function testCheckParseSafety() { + + // Test for detection + $xmpPath = __DIR__ . '/../../data/xmp/'; + $file = fopen( $xmpPath . 'doctype-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertFalse( $valid, 'Check that doctype is detected in fragmented XML' ); + $this->assertEquals( + array(), + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + fclose( $file ); + unset( $reader ); + + // Test for false positives + $file = fopen( $xmpPath . 'doctype-not-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertTrue( + $valid, + 'Check for false-positive detecting doctype in fragmented XML' + ); + $this->assertEquals( + array( + 'xmp-exif' => array( + 'DigitalZoomRatio' => '0/10', + 'Flash' => '9' + ) + ), + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + } } -- 1.8.4.5