From befb9250ef467c453781c1fcb62b26987d78ced4 Mon Sep 17 00:00:00 2001
From: Brian Wolff <bawolff+wn@gmail.com>
Date: Thu, 3 Dec 2015 21:39:16 -0500
Subject: [PATCH] SECURITY: Include quote characters in strip markers so esc in
 attr

Strip markers get substituted for general html, which means the
substitution text general does not escape quote characters. If
someone can convince MW to put a strip marker in an attribute,
you can get around escaping requirements that way. This patch
adds the characters `"' to the strip marker text. At least one
of these characters should be escaped inside attributes (regardless
of what quote character you use for attributes), thus normal html
escaping will deactivate the strip markers, preventing the
vulnrability.

This will break any extension that escapes input with htmlspecialchars,
to add to html/half parsed html output, but assumes that strip markers
are unmangled. I don't think its very common to do this. The primary
example I found was some core usages of Xml::escapeTagsOnly(). (And
even in that case, it only affected the corner case of being called
via {{#tag:..}})

Based on MatmaRex's suggestion.

Change-Id: If887065e12026530f36e5f35dd7ab0831d313561
---
 includes/parser/CoreTagHooks.php | 24 +++++++++++++++++++-----
 includes/parser/Parser.php       |  9 +++++++--
 tests/parser/parserTests.txt     |  9 +++++++++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php
index 9755ea9..3f4f54a 100644
--- a/includes/parser/CoreTagHooks.php
+++ b/includes/parser/CoreTagHooks.php
@@ -56,9 +56,14 @@ class CoreTagHooks {
 		$content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' );
 
 		$attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
-		return Xml::openElement( 'pre', $attribs ) .
-			Xml::escapeTagsOnly( $content ) .
-			'</pre>';
+		// We need to let both '"' and '&' through,
+		// for strip markers and entities respectively.
+		$content = str_replace(
+			array( '>', '<' ),
+			array( '&gt;', '&lt;' ),
+			$content
+		);
+		return Html::rawElement( 'pre', $attribs, $content );
 	}
 
 	/**
@@ -98,8 +103,17 @@ class CoreTagHooks {
 	 * @return array
 	 */
 	public static function nowiki( $content, $attributes, $parser ) {
-		$content = strtr( $content, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
-		return array( Xml::escapeTagsOnly( $content ), 'markerType' => 'nowiki' );
+		$content = strtr( $content, array(
+			// lang converter
+			'-{' => '-&#123;',
+			'}-' => '&#125;-',
+			// html tags
+			'<' => '&lt;',
+			'>' => '&gt;'
+			// Note: Both '"' and '&' are not converted.
+			// This allows strip markers and entities through.
+		) );
+		return array( $content, 'markerType' => 'nowiki' );
 	}
 
 	/**
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index c07a08a..55e0250 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -129,9 +129,14 @@ class Parser {
 	 *
 	 * Must not consist of all title characters, or else it will change
 	 * the behavior of <nowiki> in a link.
+	 *
+	 * Must have a character that needs escaping in attributes, otherwise
+	 * someone could put a strip marker in an attribute, to get around
+	 * escaping quote marks, and break out of the attribute. Thus we add
+	 * `'".
 	 */
-	const MARKER_SUFFIX = "-QINU\x7f";
-	const MARKER_PREFIX = "\x7fUNIQ-";
+	const MARKER_SUFFIX = "-QINU`\"'\x7f";
+	const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 
 	# Markers used for wrapping the table of contents
 	const TOC_START = '<mw:toc>';
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index c8c63f3..4b3e50c 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -2263,6 +2263,15 @@ Entities inside <pre>
 !! end
 
 !! test
+<nowiki> inside of #tag:pre
+!! wikitext
+{{#tag:pre|Foo <nowiki>&rarr;bar</nowiki>}}
+!! html
+<pre>Foo &#8594;bar</pre>
+
+!! end
+
+!! test
 <nowiki> and <pre> preference (first one wins)
 !! wikitext
 <pre>
-- 
2.6.6

