Index: includes/api/ApiPageSet.php =================================================================== --- includes/api/ApiPageSet.php (revision 87498) +++ includes/api/ApiPageSet.php (working copy) @@ -655,15 +655,26 @@ * @return LinkBatch */ private function processTitlesArray( $titles ) { + global $wgContLang; $linkBatch = new LinkBatch(); foreach ( $titles as $title ) { - $titleObj = is_string( $title ) ? Title::newFromText( $title ) : $title; + if ( is_string( $title ) ) { + // Normalize title for Unicode and create a Title object + $normTitle = $wgContLang->normalize( $title ); + $titleObj = Title::newFromText( $normTitle ); + } else { + $titleObj = $title; + $normTitle = $titleObj->getPrefixedText(); + } if ( !$titleObj ) { // Handle invalid titles gracefully - $this->mAllpages[0][$title] = $this->mFakePageId; - $this->mInvalidTitles[$this->mFakePageId] = $title; + $this->mAllpages[0][$normTitle] = $this->mFakePageId; + $this->mInvalidTitles[$this->mFakePageId] = $normTitle; $this->mFakePageId--; + if ( is_string( $title ) && $title !== $normTitle ) { + $this->mNormalizedTitles[$title] = $normTitle; + } continue; // There's nothing else we can do } $unconvertedTitle = $titleObj->getPrefixedText(); @@ -674,7 +685,6 @@ $this->mInterwikiTitles[$titleObj->getPrefixedText()] = $iw; } else { // Variants checking - global $wgContLang; if ( $this->mConvertTitles && count( $wgContLang->getVariants() ) > 1 && !$titleObj->exists() ) { @@ -713,7 +723,8 @@ protected function getAllowedParams() { return array( 'titles' => array( - ApiBase::PARAM_ISMULTI => true + ApiBase::PARAM_ISMULTI => true, + ApiBase::PARAM_NORMALIZE_UNICODE => false, ), 'pageids' => array( ApiBase::PARAM_TYPE => 'integer', Index: includes/api/ApiBase.php =================================================================== --- includes/api/ApiBase.php (revision 87498) +++ includes/api/ApiBase.php (working copy) @@ -53,6 +53,7 @@ const PARAM_DEPRECATED = 7; // Boolean, is the parameter deprecated (will show a warning) const PARAM_REQUIRED = 8; // Boolean, is the parameter required? const PARAM_RANGE_ENFORCE = 9; // Boolean, if MIN/MAX are set, enforce (die) these? Only applies if TYPE='integer' Use with extreme caution + const PARAM_NORMALIZE_UNICODE = 10; // Boolean, should we normalize Unicode for a parameter? Defaults to true const LIMIT_BIG1 = 500; // Fast query, std user limit const LIMIT_BIG2 = 5000; // Fast query, bot/sysop limit @@ -672,6 +673,7 @@ $dupes = false; $deprecated = false; $required = false; + $normalizeUnicode = true; } else { $default = isset( $paramSettings[self::PARAM_DFLT] ) ? $paramSettings[self::PARAM_DFLT] : null; $multi = isset( $paramSettings[self::PARAM_ISMULTI] ) ? $paramSettings[self::PARAM_ISMULTI] : false; @@ -679,6 +681,7 @@ $dupes = isset( $paramSettings[self::PARAM_ALLOW_DUPLICATES] ) ? $paramSettings[self::PARAM_ALLOW_DUPLICATES] : false; $deprecated = isset( $paramSettings[self::PARAM_DEPRECATED] ) ? $paramSettings[self::PARAM_DEPRECATED] : false; $required = isset( $paramSettings[self::PARAM_REQUIRED] ) ? $paramSettings[self::PARAM_REQUIRED] : false; + $normalizeUnicode = isset( $paramSettings[self::PARAM_NORMALIZE_UNICODE] ) ? $paramSettings[self::PARAM_NORMALIZE_UNICODE] : true; // When type is not given, and no choices, the type is the same as $default if ( !isset( $type ) ) { @@ -698,7 +701,7 @@ $value = $this->getMain()->getRequest()->getCheck( $encParamName ); } else { - $value = $this->getMain()->getRequest()->getVal( $encParamName, $default ); + $value = $this->getMain()->getRequest()->getVal( $encParamName, $default, $normalizeUnicode ); if ( isset( $value ) && $type == 'namespace' ) { $type = MWNamespace::getValidNamespaces(); Index: includes/WebRequest.php =================================================================== --- includes/WebRequest.php (revision 87498) +++ includes/WebRequest.php (working copy) @@ -238,9 +238,10 @@ * @param $arr Array * @param $name String * @param $default Mixed + * @param $normalizeUnicode Whether to normalize Unicode in the returned value * @return mixed */ - private function getGPCVal( $arr, $name, $default ) { + private function getGPCVal( $arr, $name, $default, $normalizeUnicode = true ) { # PHP is so nice to not touch input data, except sometimes: # http://us2.php.net/variables.external#language.variables.external.dot-in-names # Work around PHP *feature* to avoid *bugs* elsewhere. @@ -254,7 +255,9 @@ $data = $wgContLang->checkTitleEncoding( $data ); } } - $data = $this->normalizeUnicode( $data ); + if ( $normalizeUnicode ) { + $data = $this->normalizeUnicode( $data ); + } return $data; } else { taint( $default ); @@ -270,10 +273,11 @@ * * @param $name String * @param $default String: optional default (or NULL) + * @param $normalizeUnicode Whether to normalize Unicode in the returned value * @return String */ - public function getVal( $name, $default = null ) { - $val = $this->getGPCVal( $this->data, $name, $default ); + public function getVal( $name, $default = null, $normalizeUnicode = true ) { + $val = $this->getGPCVal( $this->data, $name, $default, $normalizeUnicode ); if( is_array( $val ) ) { $val = $default; }