From 42b1091250163d046f92d2217e97817bc64ed1a6 Mon Sep 17 00:00:00 2001
From: Arlo Breault <abreault@wikimedia.org>
Date: Wed, 7 Apr 2021 12:10:39 -0400
Subject: [PATCH] Use a protected key to distinguish comments internal to
 Parsoid

Bug: T279451
Change-Id: I40bdfddaed292a33479874b5e49b17fe616c3889
---
 src/Utils/WTUtils.php      | 10 ++++++++--
 src/Wt2Html/Grammar.pegphp |  2 ++
 src/Wt2Html/Grammar.php    |  2 ++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Utils/WTUtils.php b/src/Utils/WTUtils.php
index 40dff131b..63c6be999 100644
--- a/src/Utils/WTUtils.php
+++ b/src/Utils/WTUtils.php
@@ -725,6 +725,8 @@ class WTUtils {
 		// Now encode '-', '>' and '&' in the "true value" as HTML entities,
 		// so that they can be safely embedded in an HTML comment.
 		// This part doesn't have to map strings 1-to-1.
+		// WARNING(T279451): This is actually the part which protects the
+		// "-type" key in self::fosterCommentData
 		return preg_replace_callback( '/[->&]/', function ( $m ) {
 			return Utils::entityEncodeAll( $m[0] );
 		}, $trueValue );
@@ -788,7 +790,11 @@ class WTUtils {
 	 */
 	public static function fosterCommentData( string $typeOf, array $attrs ): string {
 		return PHPUtils::jsonEncode( [
-			'@type' => $typeOf,
+			// WARNING(T279451): The choice of "-type" as the key is because
+			// "-" will be encoded with self::encodeComment when comments come
+			// from source wikitext (see the grammar), so we can be sure when
+			// reinserting that the comments are internal to Parsoid
+			'-type' => $typeOf,
 			'attrs' => $attrs
 		] );
 	}
@@ -811,7 +817,7 @@ class WTUtils {
 				// not a valid json attribute, do nothing
 				return null;
 			}
-			$type = $data->{'@type'} ?? '';
+			$type = $data->{'-type'} ?? '';
 			if ( preg_match( '/^mw:/', $type ) ) {
 				$meta = $node->ownerDocument->createElement( 'meta' );
 				foreach ( $data->attrs as $attr ) {
diff --git a/src/Wt2Html/Grammar.pegphp b/src/Wt2Html/Grammar.pegphp
index a278ba9b4..e4a4c14a2 100644
--- a/src/Wt2Html/Grammar.pegphp
+++ b/src/Wt2Html/Grammar.pegphp
@@ -677,6 +677,8 @@ heading =
 
 comment =
 	'<!--' c:$(!"-->" .)* ('-->' / eof) {
+		// WARNING(T279451): This encoding is important for the choice of key
+		// in WTUtils::fosterCommentData
 		$data = WTUtils::encodeComment( $c );
 		return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
 	}
diff --git a/src/Wt2Html/Grammar.php b/src/Wt2Html/Grammar.php
index 1c7fac166..7786260ec 100644
--- a/src/Wt2Html/Grammar.php
+++ b/src/Wt2Html/Grammar.php
@@ -562,6 +562,8 @@ class Grammar extends \WikiPEG\PEGParserBase {
   }
   private function a26($c) {
   
+  		// WARNING(T279451): This encoding is important for the choice of key
+  		// in WTUtils::fosterCommentData
   		$data = WTUtils::encodeComment( $c );
   		return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
   	
-- 
2.31.1

