From 97c69a1a34efde29dc708e12a611cf64b4ff7fcc Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sun, 24 Jan 2016 09:56:25 -0500 Subject: [PATCH] Use more complicated regex for detecting html (Hopefully will detect if there is an unescaped '>' inside attribute) Change-Id: Iabbd926eab13a218bf92b20b54d83f611c3e4830 --- languages/LanguageConverter.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 1825603..8b58628 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -374,9 +374,12 @@ class LanguageConverter { $scriptfix = ']*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; // disable conversion of
 tags
 		$prefix = ']*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+		// The "|.*+)" at the end, is in case we missed some part of html syntax,
+		// we will fail securely (hopefully) by matching the rest of the string.
+		$htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
 
-		$reg = '/' . $codefix . $scriptfix . $prefix .
-			'<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+		$reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+			'&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
 		$startPos = 0;
 		$sourceBlob = '';
 		$literalBlob = '';
-- 
2.0.1