From 7ab02540008465472453d9bb1fd8df779ab94661 Mon Sep 17 00:00:00 2001
From: "C. Scott Ananian" <cscott@cscott.net>
Date: Mon, 4 Aug 2025 17:12:24 +0200
Subject: [PATCH] SECURITY: Sanitize data- attributes

Bug: T401099
Change-Id: SECURITY-I40725eb061c3ab293a9e7ddfaf4549710f444bdf
---
 wikimedia/parsoid/src/Core/Sanitizer.php | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/wikimedia/parsoid/src/Core/Sanitizer.php b/wikimedia/parsoid/src/Core/Sanitizer.php
index 086ac1ea98..1b823d36c7 100644
--- a/wikimedia/parsoid/src/Core/Sanitizer.php
+++ b/wikimedia/parsoid/src/Core/Sanitizer.php
@@ -965,7 +965,10 @@ public static function sanitizeTagAttrs(
 				# * Disallow data attributes used by MediaWiki code
 				# * Ensure that the attribute is not namespaced by banning
 				#   colons.
-				if ( ( !preg_match( '/^data-[^:]*$/iD', $k ) && !isset( $list[$k] ) )
+				# * Ensure attribute name will be accepted by the HTML
+				#   parser; see
+				#   https://github.com/whatwg/dom/issues/849#issuecomment-1007541209
+				if ( ( !preg_match( '|^data-[^:= \t\r\n/>\0]*$|iD', $k ) && !isset( $list[$k] ) )
 					 || self::isReservedDataAttribute( $k )
 				) {
 					$newAttrs[$k] = [ null, $origV, $origK ];
-- 
2.50.1

