From e8c07372b3dea61cc8f27317e63e97722d961cde Mon Sep 17 00:00:00 2001 From: Arlo Breault Date: Wed, 7 Apr 2021 13:43:13 -0700 Subject: [PATCH] [SECURITY] Use a protected key to distinguish comments internal to Parsoid Bug: T279451 Change-Id: I40bdfddaed292a33479874b5e49b17fe616c3889 --- wikimedia/parsoid/src/Utils/WTUtils.php | 14 ++++++++++++-- wikimedia/parsoid/src/Wt2Html/Grammar.pegphp | 2 ++ wikimedia/parsoid/src/Wt2Html/Grammar.php | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/wikimedia/parsoid/src/Utils/WTUtils.php b/wikimedia/parsoid/src/Utils/WTUtils.php index 6ad970ae..b2951d58 100644 --- a/wikimedia/parsoid/src/Utils/WTUtils.php +++ b/wikimedia/parsoid/src/Utils/WTUtils.php @@ -725,6 +725,8 @@ class WTUtils { // Now encode '-', '>' and '&' in the "true value" as HTML entities, // so that they can be safely embedded in an HTML comment. // This part doesn't have to map strings 1-to-1. + // WARNING(T279451): This is actually the part which protects the + // "-type" key in self::fosterCommentData return preg_replace_callback( '/[->&]/', function ( $m ) { return Utils::entityEncodeAll( $m[0] ); }, $trueValue ); @@ -789,7 +791,11 @@ class WTUtils { */ public static function fosterCommentData( string $typeOf, array $attrs, bool $encode ): string { $str = PHPUtils::jsonEncode( [ - '@type' => $typeOf, + // WARNING(T279451): The choice of "-type" as the key is because + // "-" will be encoded with self::encodeComment when comments come + // from source wikitext (see the grammar), so we can be sure when + // reinserting that the comments are internal to Parsoid + '-type' => $typeOf, 'attrs' => $attrs ] ); if ( $encode ) { @@ -808,16 +814,20 @@ class WTUtils { Env $env, DOMNode $node, bool $decode ): ?DOMNode { if ( DOMUtils::isComment( $node ) && preg_match( '/^\{.+\}$/D', $node->nodeValue ) ) { + // XXX(T279451#6981267): Hardcode this for good measure, even + // though all production uses should already be passing in `false` + $decode = false; // Convert serialized meta tags back from comments. // We use this trick because comments won't be fostered, // providing more accurate information about where tags are expected // to be found. + // @phan-suppress-next-line PhanImpossibleCondition $data = json_decode( $decode ? self::decodeComment( $node->nodeValue ) : $node->nodeValue ); if ( $data === null ) { // not a valid json attribute, do nothing return null; } - $type = $data->{'@type'}; + $type = $data->{'-type'} ?? ''; if ( preg_match( '/^mw:/', $type ) ) { $meta = $node->ownerDocument->createElement( 'meta' ); foreach ( $data->attrs as $attr ) { diff --git a/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp b/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp index a278ba9b..e4a4c14a 100644 --- a/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp +++ b/wikimedia/parsoid/src/Wt2Html/Grammar.pegphp @@ -677,6 +677,8 @@ heading = comment = '" .)* ('-->' / eof) { + // WARNING(T279451): This encoding is important for the choice of key + // in WTUtils::fosterCommentData $data = WTUtils::encodeComment( $c ); return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ]; } diff --git a/wikimedia/parsoid/src/Wt2Html/Grammar.php b/wikimedia/parsoid/src/Wt2Html/Grammar.php index 1c7fac16..7786260e 100644 --- a/wikimedia/parsoid/src/Wt2Html/Grammar.php +++ b/wikimedia/parsoid/src/Wt2Html/Grammar.php @@ -562,6 +562,8 @@ class Grammar extends \WikiPEG\PEGParserBase { } private function a26($c) { + // WARNING(T279451): This encoding is important for the choice of key + // in WTUtils::fosterCommentData $data = WTUtils::encodeComment( $c ); return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ]; -- 2.30.2