From a9072c98beb8944b7f08cd66100a58e3c20769bc Mon Sep 17 00:00:00 2001 From: csteipp Date: Tue, 12 Nov 2013 13:35:31 -0800 Subject: [PATCH] SECURITY: Improve css javascript detection * Forbid vertical tabs * Convert Fullwidth, sup/sub script, IPA, and repetition unicode to ascii, for ie6 Bug: 55332 Change-Id: I41a71b5b8fbecadd0f958cf57cc90d4c2fd9366e --- includes/Sanitizer.php | 46 ++++++++++++++++++++++++++++++- tests/parser/parserTests.txt | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 196abd9..50ac84d 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -882,6 +882,21 @@ class Sanitizer { $value = preg_replace_callback( $decodeRegex, array( __CLASS__, 'cssDecodeCallback' ), $value ); + // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii + $value = preg_replace_callback( + '/[!-z]/u', // U+FF01 to U+FF5A + array( __CLASS__, 'cssNormalizeUnicodeWidth' ), + $value + ); + + // Convert more characters IE6 might treat as ascii + // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D + $value = str_replace( + array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ), + array( 'r', 'n', 'n', 'l', 'i', '(', '(' ), + $value + ); + // Remove any comments; IE gets token splitting wrong // This must be done AFTER decoding character references and // escape sequences, because those steps can introduce comments @@ -897,8 +912,24 @@ class Sanitizer { $value = substr( $value, 0, $commentPos ); } + // S followed by repeat, iteration, or prolonged sound marks, + // which IE will treat as "ss" + $value = preg_replace( + '/s(?: + \xE3\x80\xB1 | # U+3031 + \xE3\x82\x9D | # U+309D + \xE3\x83\xBC | # U+30FC + \xE3\x83\xBD | # U+30FD + \xEF\xB9\xBC | # U+FE7C + \xEF\xB9\xBD | # U+FE7D + \xEF\xBD\xB0 # U+FF70 + )/ix', + 'ss', + $value + ); + // Reject problematic keywords and control characters - if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { + if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) { return '/* invalid control char */'; } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix', $value ) ) { return '/* insecure input */'; @@ -907,6 +938,19 @@ class Sanitizer { } /** + * Normalize Unicode U+FF01 to U+FF5A + * @param character $char + * @return character in ASCII range \x21-\x7A + */ + static function cssNormalizeUnicodeWidth( $matches ) { + $cp = utf8ToCodepoint( $matches[0] ); + if ( $cp === false ) { + return ''; + } + return chr( $cp - 65248 ); // ASCII range \x21-\x7A + } + + /** * @param $matches array * @return String */ diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 11a5516..527c10c 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -5059,6 +5059,70 @@ MSIE CSS safety test: comment in expression !! end +!! test +CSS safety test: vertical tab +!! input +

A

+!! result +

A

+ +!! end + +!! test +MSIE CSS safety test: Fullwidth +!! input +

A

+
B
+!! result +

A

+
B
+ +!! end + +!! test +MSIE CSS safety test: IPA extensions +!! input +
A
+

B

+!! result +
A
+

B

+ +!! end + +!! test +MSIE CSS safety test: sup/sub script +!! input +
A
+
B
+

C

+!! result +
A
+
B
+

C

+ +!! end + +!! test +MSIE CSS safety test: Repetition markers +!! input +

A

+

B

+

C

+

D

+

E

+

F

+

G

+!! result +

A

+

B

+

C

+

D

+

E

+

F

+

G

+ +!! end !! test Table attribute legitimate extension -- 1.8.1.4