From 14e8dd19cb0c5f7cd95be080a92bef694f6c01e6 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sun, 24 Jan 2016 05:29:10 -0500 Subject: [PATCH] XSS in langconverter when regex hits pcre.backtrack_limit Adjust regexes for what not to convert to avoid backtracking by preferring possesive quantifiers Add check that we really have matched to the end of the string, and log error if the regex hits some sort of error preventing the entire string from being matched. Should the regex not match to the end, then language conversion is disabled for the string. Bug: T124404 Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a --- languages/LanguageConverter.php | 51 ++++++++++++++++++------ tests/phpunit/includes/LanguageConverterTest.php | 19 +++++++++ 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 8451401..a2dd4ab 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -19,6 +19,8 @@ * @ingroup Language */ +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -356,26 +358,32 @@ class LanguageConverter { 1. HTML markups (anything between < and >) 2. HTML entities 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. */ global $wgParser; if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { - $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; + $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]++'; } else { $marker = ''; } // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of