From 5913efe56cd7f57293bddb476935ad32ea312ede Mon Sep 17 00:00:00 2001
From: Lucas Werkmeister <lucas.werkmeister@wikimedia.de>
Date: Mon, 4 Aug 2025 16:43:00 +0200
Subject: [PATCH] SECURITY: Sanitize data- attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, if you managed to get data- attributes with e.g spaces or
slashes in the name into validateAttributes(), then the rest of the
attribute name would not be validated and get concatenated into HTML
that would eventually be parsed as separate attributes (or even tag
contents and new markup, if you had a > in the name). I don’t think this
was possible via regular <p> parsing, as decodeTagAttributes() would
decode the attributes differently in that case, but it was possible via
various wikitext constructs, including {{#tag:}}.

Tighten the regex to throw out such invalid attributes, and add a few
tests in this direction. More refactoring, and especially more tests,
can happen later, once this chaneg is public and we can benefit from CI.

Bug: T401099
Change-Id: SECURITY-Id095a3278083dbedba083d5aa3c1cbaa379a682f
---
 includes/parser/Sanitizer.php                  |  3 ++-
 .../phpunit/includes/parser/SanitizerTest.php  | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/includes/parser/Sanitizer.php b/includes/parser/Sanitizer.php
index 2d6934bb93..22662ab57b 100644
--- a/includes/parser/Sanitizer.php
+++ b/includes/parser/Sanitizer.php
@@ -512,8 +512,9 @@ public static function validateAttributes( array $attribs, array $allowed ): arr
 			# * Disallow data attributes used by MediaWiki code
 			# * Ensure that the attribute is not namespaced by banning
 			#   colons.
+			# * Also disallow characters that would terminate the attribute (T401099).
 			if ( (
-				!preg_match( '/^data-[^:]*$/i', $attribute ) &&
+				!preg_match( '/^data-[^:\t \r\n\/>\0]*$/i', $attribute ) &&
 				!array_key_exists( $attribute, $allowed )
 			) || self::isReservedDataAttribute( $attribute ) ) {
 				continue;
diff --git a/tests/phpunit/includes/parser/SanitizerTest.php b/tests/phpunit/includes/parser/SanitizerTest.php
index 24b5acf140..dbb8eb71e2 100644
--- a/tests/phpunit/includes/parser/SanitizerTest.php
+++ b/tests/phpunit/includes/parser/SanitizerTest.php
@@ -160,6 +160,24 @@ public static function provideValidateTagAttributes() {
 				[ 'role' => 'menuitem', 'aria-hidden' => 'false' ],
 				[ 'role' => 'menuitem', 'aria-hidden' => 'false' ],
 			],
+			[ 'div',
+				[
+					'data-wikitext' => 'wikitext',
+					'DATA-WIKITEXT-2' => 'WIKITEXT-2',
+					'data-mw' => 'disallow impersonating parsoid',
+					'DATA-mw' => 'disallow impersonating PARSOID',
+					'data-mw-extension' => 'disallow impersonating extension',
+					'data-:namespaced' => 'disallow namespace',
+					'data- invalid' => 'disallow XSS',
+					'data-/invalid' => 'disallow XSS',
+					'data->invalid' => 'disallow XSS',
+				],
+				[
+					'data-wikitext' => 'wikitext',
+					'DATA-WIKITEXT-2' => 'WIKITEXT-2',
+					# other attributes removed
+				]
+			],
 		];
 	}
 
-- 
2.50.1

