Index: AntiSpoof_body.php =================================================================== --- AntiSpoof_body.php (revision 113060) +++ AntiSpoof_body.php (working copy) @@ -306,6 +306,29 @@ return $out; } + /* + * Helper function for checkUnicodeString: Return an error on a bad character. + * TODO: I would like to show Unicode character name, but it is not clear how to get it. + * @param $msgId -- string, message identifier. + * @param $point -- number, codepoint of the bad character. + * @return Formatted error message. + */ + private static function badCharErr( $msgId, $point ) { + $symbol = codepointToUtf8( $point ); + // Combining marks are combined with the previous character. If abusing character is a + // combining mark, prepend it with space to show them correctly. + if ( self::getScriptCode( $point ) == "SCRIPT_COMBINING_MARKS" ) { + $symbol = ' ' . $symbol; + } + $code = sprintf( 'U+%04X', $point ); + if ( preg_match( '/\A\p{C}\z/u', $symbol ) ) { + $char = wfMsg( 'antispoof-bad-char-non-printable', $code ); + } else { + $char = wfMsg( 'antispoof-bad-char', $symbol, $code ); + } + return array( "ERROR", wfMsg( $msgId, $char ) ); + } + /** * TODO: does too much in one routine, refactor... * @param $testName @@ -321,8 +344,10 @@ return array( "ERROR", wfMsg( 'antispoof-empty' ) ); } - if ( array_intersect( self::stringToList( $testName ), self::$character_blacklist ) ) { - return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) ); + foreach ( self::stringToList( $testName ) as $char ) { + if ( in_array( $char, self::$character_blacklist ) ) { + return self::badCharErr( 'antispoof-blacklisted', $char ); + } } # Perform Unicode _compatibility_ decomposition @@ -330,23 +355,31 @@ $testChars = self::stringToList( $testName ); # Be paranoid: check again, just in case Unicode normalization code changes... - if ( array_intersect( $testChars, self::$character_blacklist ) ) { - return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) ); + foreach ( $testChars as $char ) { + if ( in_array( $char, self::$character_blacklist ) ) { + return self::badCharErr( 'antispoof-blacklisted', $char ); + } } # Check for this: should not happen in any valid Unicode string if ( self::getScriptCode( $testChars[0] ) == "SCRIPT_COMBINING_MARKS" ) { - return array( "ERROR", wfMsg( 'antispoof-combining' ) ); + return self::badCharErr( 'antispoof-combining', $testChars[0] ); } # Strip all combining characters in order to crudely strip accents # Note: NFKD normalization should have decomposed all accented chars earlier $testChars = self::stripScript( $testChars, "SCRIPT_COMBINING_MARKS" ); - $testScripts = array_unique( array_map( array( 'AntiSpoof', 'getScriptCode' ), $testChars ) ); - if ( in_array( "SCRIPT_UNASSIGNED", $testScripts ) || in_array( "SCRIPT_DEPRECATED", $testScripts ) ) { - return array( "ERROR", wfMsg( 'antispoof-unassigned' ) ); + $testScripts = array_map( array( 'AntiSpoof', 'getScriptCode' ), $testChars ); + $unassigned = array_search( "SCRIPT_UNASSIGNED", $testScripts ); + if ( $unassigned !== False ) { + return self::badCharErr( 'antispoof-unassigned', $testChars[$unassigned] ); } + $deprecated = array_search( "SCRIPT_DEPRECTED", $testScripts ); + if ( $deprecated !== False ) { + return self::badCharErr( 'antispoof-deprecated', $testChars[$deprecated] ); + } + $testScripts = array_unique( $testScripts ); # We don't mind ASCII punctuation or digits $testScripts = array_diff( $testScripts, Index: AntiSpoof.i18n.php =================================================================== --- AntiSpoof.i18n.php (revision 113060) +++ AntiSpoof.i18n.php (working copy) @@ -15,11 +15,14 @@ 'antispoof-conflict-bottom' => 'Please choose another name.', 'antispoof-name-illegal' => 'The name "$1" is not allowed to prevent confusing or spoofed usernames: $2. Please choose another name.', + 'antispoof-bad-char' => '"$1" ($2)', + 'antispoof-bad-char-non-printable' => '$1', 'antispoof-badtype' => 'Bad data type', 'antispoof-empty' => 'Empty string', - 'antispoof-blacklisted' => 'Contains blacklisted character', - 'antispoof-combining' => 'Begins with combining mark', - 'antispoof-unassigned' => 'Contains unassigned or deprecated character', + 'antispoof-blacklisted' => 'Contains blacklisted character $1', + 'antispoof-combining' => 'Begins with combining mark $1', + 'antispoof-unassigned' => 'Contains unassigned character $1', + 'antispoof-deprecated' => 'Contains deprecated character $1', 'antispoof-noletters' => 'Does not contain any letters', 'antispoof-mixedscripts' => 'Contains incompatible mixed scripts', 'antispoof-tooshort' => 'Canonicalized name too short', @@ -43,11 +46,21 @@ 'antispoof-name-illegal' => 'Account creation error message because a user account creation rule was violated. Parameters: * $1 is the username that someone wanted to create * $2 is the error message. One of {{msg-mw|antispoof-badtype}}, {{msg-mw|antispoof-empty}}, {{msg-mw|antispoof-blacklisted}} and others.', + 'antispoof-bad-char' => 'It is not a complete message but a template for designator of a bad character, so localization can format it properly. Parameters: +* $1 is the bad character itself. +* $2 is the Unicode code point of bad character ("U+" followed by hex number).', + 'antispoof-bad-char-non-printable' => 'The same as antispooof-bad-char, but for non-printable characters. Since non-printable characters do not have visual representation, template has only one parameter: +* $1 is the Unicode code point of bad character ("U+" followed by hex number).', 'antispoof-badtype' => 'Reason for failed account creation.', 'antispoof-empty' => 'Reason for failed account creation.', - 'antispoof-blacklisted' => 'Reason for failed account creation.', - 'antispoof-combining' => 'Reason for failed account creation.', - 'antispoof-unassigned' => 'Reason for failed account creation.', + 'antispoof-blacklisted' => 'Reason for failed account creation. Parameters: +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', + 'antispoof-combining' => 'Reason for failed account creation. Parameters: +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', + 'antispoof-unassigned' => 'Reason for failed account creation. Parameters: +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', + 'antispoof-deprecated' => 'Reason for failed account creation. Parameters: +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', 'antispoof-noletters' => 'Reason for failed account creation.', 'antispoof-mixedscripts' => 'Reason for failed account creation.', 'antispoof-tooshort' => 'Reason for failed account creation.',