From 15169a678f9f468ff6465035a32f28e8ec82003f Mon Sep 17 00:00:00 2001 From: Arlo Breault Date: Wed, 7 Apr 2021 12:10:39 -0400 Subject: [PATCH] Use a protected key to distinguish comments internal to Parsoid Bug: T279451 Change-Id: I40bdfddaed292a33479874b5e49b17fe616c3889 --- src/Utils/WTUtils.php | 14 ++++++++++++-- src/Wt2Html/Grammar.pegphp | 2 ++ src/Wt2Html/Grammar.php | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Utils/WTUtils.php b/src/Utils/WTUtils.php index 88b7c0ba6..f6e0720a3 100644 --- a/src/Utils/WTUtils.php +++ b/src/Utils/WTUtils.php @@ -700,6 +700,8 @@ class WTUtils { // Now encode '-', '>' and '&' in the "true value" as HTML entities, // so that they can be safely embedded in an HTML comment. // This part doesn't have to map strings 1-to-1. + // WARNING(T279451): This is actually the part which protects the + // "-type" key in self::fosterCommentData return preg_replace_callback( '/[->&]/', function ( $m ) { return Utils::entityEncodeAll( $m[0] ); }, $trueValue ); @@ -764,7 +766,11 @@ class WTUtils { */ public static function fosterCommentData( string $typeOf, array $attrs, bool $encode ): string { $str = PHPUtils::jsonEncode( [ - '@type' => $typeOf, + // WARNING(T279451): The choice of "-type" as the key is because + // "-" will be encoded with self::encodeComment when comments come + // from source wikitext (see the grammar), so we can be sure when + // reinserting that the comments are internal to Parsoid + '-type' => $typeOf, 'attrs' => $attrs ] ); if ( $encode ) { @@ -782,16 +788,20 @@ class WTUtils { public static function reinsertFosterableContent( Env $env, DOMNode $node, bool $decode ): ?DOMNode { if ( DOMUtils::isComment( $node ) && preg_match( '/^\{.+\}$/D', $node->nodeValue ) ) { + // XXX(T279451#6981267): Hardcode this for good measure, even + // though all production uses should already be passing in `false` + $decode = false; // Convert serialized meta tags back from comments. // We use this trick because comments won't be fostered, // providing more accurate information about where tags are expected // to be found. + // @phan-suppress-next-line PhanImpossibleCondition $data = json_decode( $decode ? self::decodeComment( $node->nodeValue ) : $node->nodeValue ); if ( $data === null ) { // not a valid json attribute, do nothing return null; } - $type = $data->{'@type'}; + $type = $data->{'-type'} ?? ''; if ( preg_match( '/^mw:/', $type ) ) { $meta = $node->ownerDocument->createElement( 'meta' ); foreach ( $data->attrs as $attr ) { diff --git a/src/Wt2Html/Grammar.pegphp b/src/Wt2Html/Grammar.pegphp index c70fe58fa..0a102751c 100644 --- a/src/Wt2Html/Grammar.pegphp +++ b/src/Wt2Html/Grammar.pegphp @@ -657,6 +657,8 @@ heading = comment = '" .)* ('-->' / eof) { + // WARNING(T279451): This encoding is important for the choice of key + // in WTUtils::fosterCommentData $data = WTUtils::encodeComment( $c ); return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ]; } diff --git a/src/Wt2Html/Grammar.php b/src/Wt2Html/Grammar.php index 3453737af..2b4ffd4fe 100644 --- a/src/Wt2Html/Grammar.php +++ b/src/Wt2Html/Grammar.php @@ -537,6 +537,8 @@ class Grammar extends \WikiPEG\PEGParserBase { } private function a23($c) { + // WARNING(T279451): This encoding is important for the choice of key + // in WTUtils::fosterCommentData $data = WTUtils::encodeComment( $c ); return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ]; -- 2.31.1