From 6f6b3533ad4c8ec6d1b74186d2d840a8502bfb2c Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Wed, 29 Aug 2018 12:32:33 +0200 Subject: [PATCH] Fix Unicode string comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Canonical Decomposition is not the form of strings used by Wikibase (Wikibase uses Canonical Composition), and it is not usually the form in which user input is received (some browsers [1] explicitly normalize inputs to Canonical Composition, and other input is also usually in a normalized form), so if we only Decompose one half of the values when comparing them, we will usually end up reporting a mismatch. This manifests itself as an error (“base statement not found”) when attempting to add a statement with qualifiers whose value contains umlauts: when attempting to add the qualifiers, QuickStatements will be unable to locate the statement it just added. The fix for this is simple: normalize both sides when comparing strings. In that case, it also doesn’t matter which normalization form we use. [1]: https://stackoverflow.com/a/11190012 --- public_html/quickstatements.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public_html/quickstatements.php b/public_html/quickstatements.php index 5e40e6e..2471cea 100644 --- a/public_html/quickstatements.php +++ b/public_html/quickstatements.php @@ -483,7 +483,9 @@ protected function getStatementID ( $command ) { protected function compareDatavalue ( $d1 , $d2 ) { if ( $d1->type != $d2->type ) return false ; if ( $d1->type == 'string' ) { - return normalizer_normalize($d1->value,Normalizer::FORM_D) == $d2->value; # Yay Unicode! + $value1 = normalizer_normalize($d1->value,Normalizer::FORM_D); + $value2 = normalizer_normalize($d2->value,Normalizer::FORM_D); + return $value1 == $value2; } if ( $d1->type == 'quantity' ) return $d1->value->amount*1 == $d2->value->amount*1 ; if ( $d1->type == 'time' ) { -- 2.17.1