From e8c07372b3dea61cc8f27317e63e97722d961cde Mon Sep 17 00:00:00 2001
From: Arlo Breault <abreault@wikimedia.org>
Date: Wed, 7 Apr 2021 13:43:13 -0700
Subject: [PATCH] [SECURITY] Use a protected key to distinguish comments
 internal to Parsoid

Bug: T279451
Change-Id: I40bdfddaed292a33479874b5e49b17fe616c3889
---
 wikimedia/parsoid/src/Utils/WTUtils.php      | 14 ++++++++++++--
 wikimedia/parsoid/src/Wt2Html/Grammar.pegphp |  2 ++
 wikimedia/parsoid/src/Wt2Html/Grammar.php    |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/wikimedia/parsoid/src/Utils/WTUtils.php b/wikimedia/parsoid/src/Utils/WTUtils.php
index 6ad970ae..b2951d58 100644
--- a/wikimedia/parsoid/src/Utils/WTUtils.php
+++ b/wikimedia/parsoid/src/Utils/WTUtils.php
@@ -725,6 +725,8 @@ class WTUtils {
 		// Now encode '-', '>' and '&' in the "true value" as HTML entities,
 		// so that they can be safely embedded in an HTML comment.
 		// This part doesn't have to map strings 1-to-1.
+		// WARNING(T279451): This is actually the part which protects the
+		// "-type" key in self::fosterCommentData
 		return preg_replace_callback( '/[->&]/', function ( $m ) {
 			return Utils::entityEncodeAll( $m[0] );
 		}, $trueValue );
@@ -789,7 +791,11 @@ class WTUtils {
 	 */
 	public static function fosterCommentData( string $typeOf, array $attrs, bool $encode ): string {
 		$str = PHPUtils::jsonEncode( [
-			'@type' => $typeOf,
+			// WARNING(T279451): The choice of "-type" as the key is because
+			// "-" will be encoded with self::encodeComment when comments come
+			// from source wikitext (see the grammar), so we can be sure when
+			// reinserting that the comments are internal to Parsoid
+			'-type' => $typeOf,
 			'attrs' => $attrs
 		] );
 		if ( $encode ) {
@@ -808,16 +814,20 @@ class WTUtils {
 		Env $env, DOMNode $node, bool $decode
 	): ?DOMNode {
 		if ( DOMUtils::isComment( $node ) && preg_match( '/^\{.+\}$/D', $node->nodeValue ) ) {
+			// XXX(T279451#6981267): Hardcode this for good measure, even
+			// though all production uses should already be passing in `false`
+			$decode = false;
 			// Convert serialized meta tags back from comments.
 			// We use this trick because comments won't be fostered,
 			// providing more accurate information about where tags are expected
 			// to be found.
+			// @phan-suppress-next-line PhanImpossibleCondition
 			$data = json_decode( $decode ? self::decodeComment( $node->nodeValue ) : $node->nodeValue );
 			if ( $data === null ) {
 				// not a valid json attribute, do nothing
 				return null;
 			}
-			$type = $data->{'@type'};
+			$type = $data->{'-type'} ?? '';
 			if ( preg_match( '/^mw:/', $type ) ) {
 				$meta = $node->ownerDocument->createElement( 'meta' );
 				foreach ( $data->attrs as $attr ) {
diff --git a/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp b/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp
index a278ba9b..e4a4c14a 100644
--- a/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp
+++ b/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp
@@ -677,6 +677,8 @@ heading =
 
 comment =
 	'<!--' c:$(!"-->" .)* ('-->' / eof) {
+		// WARNING(T279451): This encoding is important for the choice of key
+		// in WTUtils::fosterCommentData
 		$data = WTUtils::encodeComment( $c );
 		return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
 	}
diff --git a/wikimedia/parsoid/src/Wt2Html/Grammar.php b/wikimedia/parsoid/src/Wt2Html/Grammar.php
index 1c7fac16..7786260e 100644
--- a/wikimedia/parsoid/src/Wt2Html/Grammar.php
+++ b/wikimedia/parsoid/src/Wt2Html/Grammar.php
@@ -562,6 +562,8 @@ class Grammar extends \WikiPEG\PEGParserBase {
   }
   private function a26($c) {
   
+  		// WARNING(T279451): This encoding is important for the choice of key
+  		// in WTUtils::fosterCommentData
   		$data = WTUtils::encodeComment( $c );
   		return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
   	
-- 
2.30.2

