loadRegs(); } function loadDefaultTables(){ $kk2Cyrl = array(); $kk2Latn = array(); $kk2Arab = array(); $this->mTables = array( 'kk-cyrl' => new ReplacementArray($kk2Cyrl), 'kk-latn' => new ReplacementArray($kk2Latn), 'kk-arab' => new ReplacementArray($kk2Arab), 'kk' => new ReplacementArray() ); } function loadRegs(){ $this->mCyrl2Latn = array( '/^И(['.KK_C_UC.']|$)/u' => 'İY$1', '/^И(['.KK_C_LC.']|$)/u' => 'İy$1', '/^Ю(['.KK_C_UC.']|$)/u' => 'İYW$1', '/^Ю(['.KK_C_LC.']|$)/u' => 'İyw$1', '/^Я(['.KK_C_UC.']|$)/u' => 'İYA$1', '/^Я(['.KK_C_LC.']|$)/u' => 'İya$1', '/Ё/u' => 'Ö', '/ё/u' => 'ö', '/И/u' => 'İY', '/и/u' => 'iy', '/Ю/u' => 'İYW', '/ю/u' => 'iyw', '/Я/u' => 'İYA', '/я/u' => 'iya', '/У/u' => 'Ú', '/у/u' => 'ú', '/Щ/u' => 'Ş', '/щ/u' => 'ş', '/[ъЪ]/u' => '', '/[ьЬ]/u' => '', '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä', '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v', '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ', '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e', '/Ё/u' => 'Ö', '/ё/u' => 'ö', '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z', '/Й/u' => 'Y', '/й/u' => 'y', '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q', '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm', '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ŋ', '/ң/u' => 'ŋ', '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö', '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r', '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't', '/Ұ/u' => 'U', '/ұ/u' => 'u', '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f', '/Х/u' => 'H', '/х/u' => 'h', '/Һ/u' => 'H', '/һ/u' => 'h', '/Ц/u' => 's', '/ц/u' => 's', '/Ч/u' => 'Ç', '/ч/u' => 'ç', '/Ш/u' => 'С', '/ш/u' => 'с', '/Щ/u' => 'C', '/щ/u' => 'C', '/Ы/u' => 'I', '/ы/u' => 'ı', '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'E', '/э/u' => 'e', //Cyrillic -> Latin for letters И and У '/ИІ/ui' => 'İYİ', '/иі/ui' => 'iyi', '/ИЫ/ui' => 'IYI', '/иы/ui' => 'ıyı', '/ИЯ/ui' => 'İYA', '/ия/ui' => 'iya', ); $this->mCyrl2Arab = array( ## Punctuation -> Arabic '/#|№|No\./u' => '؀', # ؀ '/\,/' => '،', # ، '/;/' => '؛', # ؛ '/\?/' => '؟', # ؟ '/%/' => '٪', # ٪ '/\*/' => '٭', # ٭ ## Cyrillic -> Arabic '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '', '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى', '/[и]/ui' => 'ي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي', '/ц/ui' => 'س', '/щ/ui' => 'ش', '/һ/ui' => 'ح', '/ч/ui' => 'چ', '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع', '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك', '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن', '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س', '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح', '/ш/ui' => 'ش', // Cyrillic -> Arabic MOJE UPRAVY - upravil som yya = ya '/ия/ui' => 'يا', ); } function parseManualRule($rule,$flags=array()){ if(in_array('T',$flags)){ return parent::parseManualRule($rule,$flags); } // otherwise ignore all formatting foreach($this->mVariants as $v){ $carray[$v] = $rule; } return $carray; } /* * A function wrapper: * - if there is no selected variant, leave the link * names as they were * - do not try to find variants for usernames */ function findVariantLink(&$link, &$nt, $ignoreOtherCond = false){ // check for user namespace if(is_object($nt)){ $ns = $nt->getNamespace(); if($ns==NS_USER || $ns==NS_USER_TALK) return; } $oldlink=$link; parent::findVariantLink($link, $nt, $ignoreOtherCond); if($this->getPreferredVariant()==$this->mMainLanguageCode) $link=$oldlink; } /* * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) */ function autoConvert($text, $toVariant=false){ global $wgTitle; if(is_object($wgTitle) && $wgTitle->getNameSpace()==NS_FILE){ $imagename = $wgTitle->getNsText(); if(preg_match("/^$imagename:/",$text)) return $text; } return parent::autoConvert($text,$toVariant); } /** * It translates text into variant */ function translate($text, $toVariant){ global $wgContLanguageCode; $text = parent::translate($text, $toVariant); $letters = ''; switch($toVariant){ case 'kk-cyrl': $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789'; $wgContLanguageCode = 'kk'; break; case 'kk-latn': $letters = KK_C_UC . KK_C_LC . '№0123456789'; $wgContLanguageCode = 'kk-Latn'; break; case 'kk-arab': $letters = KK_C_UC . KK_C_LC . 'ʺʹ'.*/',;\?%\*№0123456789'; $wgContLanguageCode = 'kk-Arab'; break; default: $wgContLanguageCode = 'kk'; return $text; } // disable conversion variables like $1, $2... $varsfix = '\$[0-9]'; $matches = preg_split('/' . $varsfix . '[^' . $letters . ']+/u', $text, -1, PREG_SPLIT_OFFSET_CAPTURE); $mstart = 0; $ret = ''; foreach( $matches as $m ){ $ret.= substr($text, $mstart, $m[1]-$mstart); $ret.= $this->regsConverter($m[0], $toVariant); $mstart = $m[1] + strlen($m[0]); } return $ret; } function regsConverter($text, $toVariant){ if ($text == '') return $text; $pat = array(); $rep = array(); switch($toVariant){ case 'kk-arab': $letters = KK_C_LC.KK_C_UC/*.KK_L_LC.KK_L_UC*/; $front = 'әөүіӘӨҮІ'/*.'äöüiÄÖÜİ'*/; $excludes = 'еэгғкқЕЭГҒКҚ'/*.'egğkqEGĞKQ'*/; // split text to words $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE); $mstart = 0; $ret = ''; foreach($matches as $m){ $ret.= substr($text, $mstart, $m[1] - $mstart); // is matched the word to front vowels? // exclude a words matched to е, э, г, к, к, қ, // them should be without hamza if(preg_match('/['.$front.']/u', $m[0]) && !preg_match('/['.$excludes.']/u', $m[0])){ $ret .= preg_replace('/['.$letters.']+/u', HAMZA.'$0', $m[0]); } else{$ret .= $m[0];} $mstart = $m[1] + strlen($m[0]); } $text =& $ret; foreach($this->mCyrl2Arab as $pat => $rep){ $text = preg_replace($pat, $rep, $text); } return $text; break; case 'kk-latn': foreach($this->mCyrl2Latn as $pat => $rep){ $text = preg_replace($pat, $rep, $text); } return $text; break; default: return $text; } } /* * We want our external link captions to be converted in variants, * so we return the original text instead -{$text}-, except for URLs */ function markNoConversion($text, $noParse=false){ if($noParse || preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//", $text)) return parent::markNoConversion($text); return $text; } function convertCategoryKey($key){ return $this->autoConvert($key, 'kk'); } } /** * class that handles Cyrillic, Latin and Arabic scripts for Kazakh * right now it only distinguish kk_cyrl, kk_latn, kk_arab. * * @ingroup Language */ class LanguageKk extends LanguageKk_cyrl{ function __construct(){ global $wgHooks; parent::__construct(); $variants = array('kk', 'kk-cyrl', 'kk-latn', 'kk-arab'); $variantfallbacks = array( 'kk' => 'kk-cyrl', 'kk-cyrl' => 'kk', 'kk-latn' => 'kk', 'kk-arab' => 'kk', ); $this->mConverter = new KkConverter($this, 'kk', $variants, $variantfallbacks); $wgHooks['ArticleSaveComplete'][] = $this->mConverter; } /** * Work around for right-to-left direction support in kk-arab and kk-cn * * @return bool */ function isRTL(){ $variant = $this->getPreferredVariant(); if($variant == 'kk-arab'){ return true; } else{return parent::isRTL();} } /* * It fixes issue with ucfirst for transforming 'i' to 'İ' * */ function ucfirst ($string){ $variant = $this->getPreferredVariant(); if (($variant == 'kk-latn') && $string[0] == 'i'){ $string = 'İ' . substr($string, 1); } else {$string = parent::ucfirst($string);} return $string; } /* * It fixes issue with lcfirst for transforming 'I' to 'ı' * */ function lcfirst ($string){ $variant = $this->getPreferredVariant(); if (($variant == 'kk-latn') && $string[0] == 'I'){ $string = 'ı' . substr($string, 1); } else {$string = parent::lcfirst($string);} return $string; } function convertGrammar($word, $case){ wfProfileIn( __METHOD__ ); $variant = $this->getPreferredVariant(); switch ($variant){ case 'kk-arab': $word = parent::convertGrammarKk_arab($word, $case); break; case 'kk-latn': $word = parent::convertGrammarKk_latn($word, $case); break; case 'kk-cyrl': case 'kk': default: $word = parent::convertGrammarKk_cyrl($word, $case); } wfProfileOut(__METHOD__); return $word; } }