From 260473ccad52a6dad84c17d5d32cf67e5e97b9bb Mon Sep 17 00:00:00 2001
From: csteipp <csteipp@wikimedia.org>
Date: Thu, 12 Mar 2015 15:49:22 -0700
Subject: [PATCH] SECURITY: Don't allow entities in XMP with HHVM

Test for, and refuse to parse, XMP chunks with a doctype declaration
when parsing XMP under HHVM.

Bug: T85848
Change-Id: Iea4feb077ee85a35509a920153daaa9321ee69f3
---
 includes/media/BitmapMetadataHandler.php           |  6 ++--
 includes/media/JpegMetadataExtractor.php           |  2 +-
 includes/media/XMP.php                             | 38 ++++++++++++++++++++++
 tests/phpunit/data/xmp/doctype-included.result.php |  3 ++
 tests/phpunit/data/xmp/doctype-included.xmp        | 12 +++++++
 tests/phpunit/includes/media/XMPTest.php           |  4 +++
 6 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 tests/phpunit/data/xmp/doctype-included.result.php
 create mode 100644 tests/phpunit/data/xmp/doctype-included.xmp

diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php
index bb7a1e8..c8d37bb 100644
--- a/includes/media/BitmapMetadataHandler.php
+++ b/includes/media/BitmapMetadataHandler.php
@@ -154,7 +154,7 @@ class BitmapMetadataHandler {
 	 * @throws MWException On invalid file.
 	 */
 	static function Jpeg( $filename ) {
-		$showXMP = function_exists( 'xml_parser_create_ns' );
+		$showXMP = XMPReader::isSupported();
 		$meta = new self();
 
 		$seg = JpegMetadataExtractor::segmentSplitter( $filename );
@@ -196,7 +196,7 @@ class BitmapMetadataHandler {
 	 * @return array Array for storage in img_metadata.
 	 */
 	public static function PNG( $filename ) {
-		$showXMP = function_exists( 'xml_parser_create_ns' );
+		$showXMP = XMPReader::isSupported();
 
 		$meta = new self();
 		$array = PNGMetadataExtractor::getMetadata( $filename );
@@ -236,7 +236,7 @@ class BitmapMetadataHandler {
 			$meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' );
 		}
 
-		if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) {
+		if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) {
 			$xmp = new XMPReader();
 			$xmp->parse( $baseArray['xmp'] );
 			$xmpRes = $xmp->getResults();
diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php
index 8c5b46b..aaa9930 100644
--- a/includes/media/JpegMetadataExtractor.php
+++ b/includes/media/JpegMetadataExtractor.php
@@ -48,7 +48,7 @@ class JpegMetadataExtractor {
 	 * @throws MWException If given invalid file.
 	 */
 	static function segmentSplitter( $filename ) {
-		$showXMP = function_exists( 'xml_parser_create_ns' );
+		$showXMP = XMPReader::isSupported();
 
 		$segmentCount = 0;
 
diff --git a/includes/media/XMP.php b/includes/media/XMP.php
index 0d341aa..2429c25 100644
--- a/includes/media/XMP.php
+++ b/includes/media/XMP.php
@@ -80,6 +80,9 @@ class XMPReader {
 	/** @var int */
 	private $extendedXMPOffset = 0;
 
+	/** @var bool Flag if a parse is already in progress **/
+	private $parseInProgress = false;
+
 	/**
 	 * These are various mode constants.
 	 * they are used to figure out what to do
@@ -145,6 +148,7 @@ class XMPReader {
 			array( $this, 'endElement' ) );
 
 		xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
+		$this->parseInProgress = false;
 	}
 
 	/** Destroy the xml parser
@@ -156,6 +160,14 @@ class XMPReader {
 		xml_parser_free( $this->xmlParser );
 	}
 
+	/**
+	 * Check if this instance supports using this class
+	 */
+	public static function isSupported() {
+		$xmlReader = wfIsHHVM() ? class_exists( 'XMLReader' ) : true;
+		return function_exists( 'xml_parser_create_ns' ) && $xmlReader;
+	}
+
 	/** Get the result array. Do some post-processing before returning
 	 * the array, and transform any metadata that is special-cased.
 	 *
@@ -305,6 +317,32 @@ class XMPReader {
 				wfRestoreWarnings();
 			}
 
+			// Ensure the XMP block does not have an xml doctype declaration, which
+			// could declare entities unsafe to submit to xml_parse on HHVM (T85848).
+			if ( wfIsHHVM() && !$this->parseInProgress ) {
+				$reader = new XMLReader();
+				// For XMLReader to parse incomplete/invalid XML, it has to be open()'ed
+				// instead of using XML(). XMP block must be less than 64k, so urlencoding
+				// isn't completely crazy.
+				$reader->open(
+					'data://text/plain,' . urlencode( $content ),
+					null,
+					LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET
+				);
+				$oldDisable = libxml_disable_entity_loader( true );
+				$reset = new ScopedCallback(
+					'libxml_disable_entity_loader',
+					array( $oldDisable )
+				);
+				$reader->setParserProperty( XMLReader::SUBST_ENTITIES, false );
+
+				while ( $reader->read() && $reader->nodeType !== XMLReader::ELEMENT ) {
+					if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
+						throw new Exception( 'unsafe doctype declaration in XML' );
+					}
+				}
+			}
+
 			$ok = xml_parse( $this->xmlParser, $content, $allOfIt );
 			if ( !$ok ) {
 				$error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
diff --git a/tests/phpunit/data/xmp/doctype-included.result.php b/tests/phpunit/data/xmp/doctype-included.result.php
new file mode 100644
index 0000000..9a9cc36
--- /dev/null
+++ b/tests/phpunit/data/xmp/doctype-included.result.php
@@ -0,0 +1,3 @@
+<?php
+
+$result = array();
diff --git a/tests/phpunit/data/xmp/doctype-included.xmp b/tests/phpunit/data/xmp/doctype-included.xmp
new file mode 100644
index 0000000..8c94675
--- /dev/null
+++ b/tests/phpunit/data/xmp/doctype-included.xmp
@@ -0,0 +1,12 @@
+<?xpacket begin="﻿" id="W5M0MpCehiHzreSzNTczkc9d"?> <!DOCTYPE x:xmpmeta [ <!ENTITY lol "lollollollollollollollollollollol"> ]>
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core
+ 4.1.3-c001 49.282696, Mon Apr 02 2007 21:16:10        ">
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+<rdf:Description
+ rdf:about=""
+ xmlns:exif="http://ns.adobe.com/exif/1.0/"
+ exif:DigitalZoomRatio="0/10">
+<exif:Flash rdf:parseType='Resource'>
+<exif:Fired>True</exif:Fired> <exif:Return>0</exif:Return> <exif:Mode>1</exif:Mode> <exif:Function>False</exif:Function> <exif:RedEyeMode>False</exif:RedEyeMode></exif:Flash> </rdf:Description> </rdf:RDF> </x:xmpmeta>
+
+<?xpacket end="w"?>
diff --git a/tests/phpunit/includes/media/XMPTest.php b/tests/phpunit/includes/media/XMPTest.php
index 798e492..48fb9cb 100644
--- a/tests/phpunit/includes/media/XMPTest.php
+++ b/tests/phpunit/includes/media/XMPTest.php
@@ -62,6 +62,10 @@ class XMPTest extends MediaWikiTestCase {
 			array( 'gps', 'Handling of exif GPS parameters in XMP' ),
 		);
 
+		if ( wfIsHHVM() ) {
+			$xmpFiles[] = array( 'doctype-included', 'XMP includes doctype' );
+		}
+
 		foreach ( $xmpFiles as $file ) {
 			$xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' );
 			// I'm not sure if this is the best way to handle getting the
-- 
1.8.4.5

