From d071ea6e362e6062c44248f42b6a39035cf41618 Mon Sep 17 00:00:00 2001 From: csteipp Date: Thu, 12 Mar 2015 15:49:22 -0700 Subject: [PATCH] SECURITY: Don't allow entities in XMP with HHVM Test for, and refuse to parse, XMP chunks with a doctype declaration when parsing XMP under HHVM. Bug: T85848 Change-Id: Iea4feb077ee85a35509a920153daaa9321ee69f3 --- includes/media/BitmapMetadataHandler.php | 6 +- includes/media/JpegMetadataExtractor.php | 2 +- includes/media/XMP.php | 100 +++++++++++++++++++++ tests/phpunit/data/xmp/doctype-included.result.php | 3 + tests/phpunit/data/xmp/doctype-included.xmp | 12 +++ tests/phpunit/data/xmp/doctype-not-included.xmp | 11 +++ tests/phpunit/includes/media/XMPTest.php | 55 ++++++++++++ 7 files changed, 185 insertions(+), 4 deletions(-) create mode 100644 tests/phpunit/data/xmp/doctype-included.result.php create mode 100644 tests/phpunit/data/xmp/doctype-included.xmp create mode 100644 tests/phpunit/data/xmp/doctype-not-included.xmp diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php index bb7a1e8..c8d37bb 100644 --- a/includes/media/BitmapMetadataHandler.php +++ b/includes/media/BitmapMetadataHandler.php @@ -154,7 +154,7 @@ class BitmapMetadataHandler { * @throws MWException On invalid file. */ static function Jpeg( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $seg = JpegMetadataExtractor::segmentSplitter( $filename ); @@ -196,7 +196,7 @@ class BitmapMetadataHandler { * @return array Array for storage in img_metadata. */ public static function PNG( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $array = PNGMetadataExtractor::getMetadata( $filename ); @@ -236,7 +236,7 @@ class BitmapMetadataHandler { $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' ); } - if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) { + if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { $xmp = new XMPReader(); $xmp->parse( $baseArray['xmp'] ); $xmpRes = $xmp->getResults(); diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php index 8c5b46b..aaa9930 100644 --- a/includes/media/JpegMetadataExtractor.php +++ b/includes/media/JpegMetadataExtractor.php @@ -48,7 +48,7 @@ class JpegMetadataExtractor { * @throws MWException If given invalid file. */ static function segmentSplitter( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $segmentCount = 0; diff --git a/includes/media/XMP.php b/includes/media/XMP.php index 0d341aa..a384cbd 100644 --- a/includes/media/XMP.php +++ b/includes/media/XMP.php @@ -80,6 +80,12 @@ class XMPReader { /** @var int */ private $extendedXMPOffset = 0; + /** @var int Flag determining if the XMP is safe to parse **/ + private $parsable = 0; + + /** @var string Buffer of XML to parse **/ + private $xmlParsableBuffer = ''; + /** * These are various mode constants. * they are used to figure out what to do @@ -108,6 +114,12 @@ class XMPReader { const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; const NS_XML = 'http://www.w3.org/XML/1998/namespace'; + // States used while determining if XML is safe to parse + const PARSABLE_UNKNOWN = 0; + const PARSABLE_OK = 1; + const PARSABLE_BUFFERING = 2; + const PARSABLE_NO = 3; + /** * Constructor. * @@ -145,6 +157,9 @@ class XMPReader { array( $this, 'endElement' ) ); xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); + + $this->parsable = self::PARSABLE_UNKNOWN; + $this->xmlParsableBuffer = ''; } /** Destroy the xml parser @@ -156,6 +171,14 @@ class XMPReader { xml_parser_free( $this->xmlParser ); } + /** + * Check if this instance supports using this class + */ + public static function isSupported() { + $xmlReader = wfIsHHVM() ? class_exists( 'XMLReader' ) : true; + return function_exists( 'xml_parser_create_ns' ) && $xmlReader; + } + /** Get the result array. Do some post-processing before returning * the array, and transform any metadata that is special-cased. * @@ -305,6 +328,27 @@ class XMPReader { wfRestoreWarnings(); } + // Ensure the XMP block does not have an xml doctype declaration, which + // could declare entities unsafe to parse with xml_parse on HHVM (T85848). + if ( wfIsHHVM() && $this->parsable !== self::PARSABLE_OK ) { + if ( $this->parsable === self::PARSABLE_NO ) { + throw new Exception( 'Unsafe doctype declaration in XML.' ); + } + + $content = $this->xmlParsableBuffer . $content; + if ( !$this->checkParseSafety( $content ) ) { + if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { + // parse wasn't Unsuccessful yet, so return true + // in this case. + return true; + } + $msg = ( $this->parsable === self::PARSABLE_NO ) ? + 'Unsafe doctype declaration in XML.' : + 'No root element found in XML.'; + throw new Exception( $msg ); + } + } + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); if ( !$ok ) { $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); @@ -437,6 +481,62 @@ class XMPReader { } } + /** + * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't + * contain a doctype declaration which could contain a dos attack if we + * parse it and expand internal entities (T85848). + * + * @param string $content xml string to check for parse safety + * @return bool true if the xml is safe to parse, false otherwise + */ + private function checkParseSafety( $content ) { + $reader = new XMLReader(); + $result = null; + + // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed + // instead of using XML(). + $reader->open( + 'data://text/plain,' . urlencode( $content ), + null, + LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET + ); + + $oldDisable = libxml_disable_entity_loader( true ); + $reset = new ScopedCallback( + 'libxml_disable_entity_loader', + array( $oldDisable ) + ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); + + // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning + // when parsing truncated XML, which causes unit tests to fail. + wfSuppressWarnings(); + while ( $reader->read() ) { + if ( $reader->nodeType === XMLReader::ELEMENT ) { + // Reached the first element without hitting a doctype declaration + $this->parsable = self::PARSABLE_OK; + $result = true; + break; + } + if ( $reader->nodeType === XMLReader::DOC_TYPE ) { + $this->parsable = self::PARSABLE_NO; + $result = false; + break; + } + } + wfRestoreWarnings(); + + if ( !is_null( $result ) ) { + return $result; + } + + // Reached the end of the parsable xml without finding an element + // or doctype. Buffer and try again. + $this->parsable = self::PARSABLE_BUFFERING; + $this->xmlParsableBuffer = $content; + return false; + } + /** When we hit a closing element in MODE_IGNORE * Check to see if this is the element we started to ignore, * in which case we get out of MODE_IGNORE diff --git a/tests/phpunit/data/xmp/doctype-included.result.php b/tests/phpunit/data/xmp/doctype-included.result.php new file mode 100644 index 0000000..9a9cc36 --- /dev/null +++ b/tests/phpunit/data/xmp/doctype-included.result.php @@ -0,0 +1,3 @@ + ]> + + + + +True 0 1 False False + + diff --git a/tests/phpunit/data/xmp/doctype-not-included.xmp b/tests/phpunit/data/xmp/doctype-not-included.xmp new file mode 100644 index 0000000..9a40b4b --- /dev/null +++ b/tests/phpunit/data/xmp/doctype-not-included.xmp @@ -0,0 +1,11 @@ + + + + +True 0 1 False False + + diff --git a/tests/phpunit/includes/media/XMPTest.php b/tests/phpunit/includes/media/XMPTest.php index 798e492..65f2818 100644 --- a/tests/phpunit/includes/media/XMPTest.php +++ b/tests/phpunit/includes/media/XMPTest.php @@ -62,6 +62,10 @@ class XMPTest extends MediaWikiTestCase { array( 'gps', 'Handling of exif GPS parameters in XMP' ), ); + if ( wfIsHHVM() ) { + $xmpFiles[] = array( 'doctype-included', 'XMP includes doctype' ); + } + foreach ( $xmpFiles as $file ) { $xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' ); // I'm not sure if this is the best way to handle getting the @@ -170,4 +174,55 @@ class XMPTest extends MediaWikiTestCase { $this->assertEquals( $expected, $actual ); } + + /** + * Test for multi-section, hostile XML + * @covers checkParseSafety + */ + public function testCheckParseSafety() { + if ( !wfIsHHVM() ) { + $this->markTestSkipped( 'No safety check for non-hhvm' ); + } + + // Test for detection + $xmpPath = __DIR__ . '/../../data/xmp/'; + $file = fopen( $xmpPath . 'doctype-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertFalse( $valid, 'Check that doctype is detected in fragmented XML' ); + $this->assertEquals( + array(), + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + fclose( $file ); + unset( $reader ); + + // Test for false positives + $file = fopen( $xmpPath . 'doctype-not-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertTrue( + $valid, + 'Check for false-positive detecting doctype in fragmented XML' + ); + $this->assertEquals( + array( + 'xmp-exif' => array( + 'DigitalZoomRatio' => '0/10', + 'Flash' => '9' + ) + ), + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + } } -- 1.8.4.5