From 7cc0debec2f0a1babb02e033bdea4dec4f3bb080 Mon Sep 17 00:00:00 2001 From: csteipp Date: Tue, 12 Nov 2013 13:42:59 -0800 Subject: [PATCH] SECURITY: Improve css javascript detection * Forbid vertical tabs * Convert Fullwidth, sup/sub script, IPA, and repetition unicode to ascii, for ie6 Bug: 55332 Change-Id: I41a71b5b8fbecadd0f958cf57cc90d4c2fd9366e --- includes/Sanitizer.php | 39 ++++++++++++++++++++++++++- tests/parser/parserTests.txt | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 2dff081..849e4d6 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -854,6 +854,27 @@ class Sanitizer { $value = preg_replace_callback( $decodeRegex, array( __CLASS__, 'cssDecodeCallback' ), $value ); + // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii + $value = preg_replace_callback( + '/[!-z]/u', // U+FF01 to U+FF5A + function ( $matches ) { + $cp = utf8ToCodepoint( $matches[0] ); + if ( $cp === false ) { + return ''; + } + return chr( $cp - 65248 ); // ASCII range \x21-\x7A + }, + $value + ); + + // Convert more characters IE6 might treat as ascii + // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D + $value = str_replace( + array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ), + array( 'r', 'n', 'n', 'l', 'i', '(', '(' ), + $value + ); + // Remove any comments; IE gets token splitting wrong // This must be done AFTER decoding character references and // escape sequences, because those steps can introduce comments @@ -869,8 +890,24 @@ class Sanitizer { $value = substr( $value, 0, $commentPos ); } + // S followed by repeat, iteration, or prolonged sound marks, + // which IE will treat as "ss" + $value = preg_replace( + '/s(?: + \xE3\x80\xB1 | # U+3031 + \xE3\x82\x9D | # U+309D + \xE3\x83\xBC | # U+30FC + \xE3\x83\xBD | # U+30FD + \xEF\xB9\xBC | # U+FE7C + \xEF\xB9\xBD | # U+FE7D + \xEF\xBD\xB0 # U+FF70 + )/ix', + 'ss', + $value + ); + // Reject problematic keywords and control characters - if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { + if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) { return '/* invalid control char */'; } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', $value ) ) { return '/* insecure input */'; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index e9218de..f0603e7 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -8470,6 +8470,70 @@ MSIE CSS safety test: comment in expression !! end +!! test +CSS safety test: vertical tab +!! input +

A

+!! result +

A

+ +!! end + +!! test +MSIE CSS safety test: Fullwidth +!! input +

A

+
B
+!! result +

A

+
B
+ +!! end + +!! test +MSIE CSS safety test: IPA extensions +!! input +
A
+

B

+!! result +
A
+

B

+ +!! end + +!! test +MSIE CSS safety test: sup/sub script +!! input +
A
+
B
+

C

+!! result +
A
+
B
+

C

+ +!! end + +!! test +MSIE CSS safety test: Repetition markers +!! input +

A

+

B

+

C

+

D

+

E

+

F

+

G

+!! result +

A

+

B

+

C

+

D

+

E

+

F

+

G

+ +!! end !! test Table attribute legitimate extension -- 1.8.1.4