From 800f9d9912422eb565e6809331f956469b7c87e5 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sun, 24 Jan 2016 05:29:10 -0500 Subject: [PATCH] SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit Adjust regexes for what not to convert to avoid backtracking by preferring possesive quantifiers Add check that we really have matched to the end of the string, and log error if the regex hits some sort of error preventing the entire string from being matched. Should the regex not match to the end, then language conversion is disabled for the string. Bug: T124404 Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a --- languages/LanguageConverter.php | 47 +++++++++++++++++------ tests/phpunit/languages/LanguageConverterTest.php | 19 +++++++++ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 5a9f652..9ca28378 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -20,6 +20,8 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -361,20 +363,24 @@ class LanguageConverter { 2. HTML entities 3. placeholders created by the parser */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of