diff --git a/WikibaseSearch.entitytypes.repo.php b/WikibaseSearch.entitytypes.repo.php index 4dceef5..2d431bb 100644 --- a/WikibaseSearch.entitytypes.repo.php +++ b/WikibaseSearch.entitytypes.repo.php @@ -1,95 +1,95 @@ [ Def::SEARCH_FIELD_DEFINITIONS => function ( array $languageCodes, SettingsArray $searchSettings ) { $repo = WikibaseRepo::getDefaultInstance(); $services = MediaWikiServices::getInstance(); $config = $services->getMainConfig(); if ( $config->has( 'LexemeLanguageCodePropertyId' ) ) { $lcID = $config->get( 'LexemeLanguageCodePropertyId' ); } else { $lcID = null; } return new LexemeFieldDefinitions( StatementProviderFieldDefinitions::newFromSettings( new InProcessCachingDataTypeLookup( $repo->getPropertyDataTypeLookup() ), WikibaseRepo::getDataTypeDefinitions( $services ) ->getSearchIndexDataFormatterCallbacks(), $searchSettings ), $repo->getEntityLookup(), $lcID ? WikibaseRepo::getEntityIdParser( $services )->parse( $lcID ) : null ); }, Def::ENTITY_SEARCH_CALLBACK => function ( WebRequest $request ) { $repo = WikibaseRepo::getDefaultInstance(); $entityIdParser = WikibaseRepo::getEntityIdParser(); $languageFallbackChainFactory = WikibaseRepo::getLanguageFallbackChainFactory(); return new CombinedEntitySearchHelper( [ new EntityIdSearchHelper( $repo->getEntityLookup(), $entityIdParser, new LanguageFallbackLabelDescriptionLookup( WikibaseRepo::getTermLookup(), - $languageFallbackChainFactory->newFromLanguage( $repo->getUserLanguage() ) + $languageFallbackChainFactory->newFromLanguage( WikibaseRepo::getUserLanguage() ) ), $repo->getEntityTypeToRepositoryMapping() ), new LexemeSearchEntity( $entityIdParser, $request, - $repo->getUserLanguage(), + WikibaseRepo::getUserLanguage(), $languageFallbackChainFactory, WikibaseRepo::getPrefetchingTermLookup() ) ] ); }, Def::FULLTEXT_SEARCH_CONTEXT => LexemeFullTextQueryBuilder::CONTEXT_LEXEME_FULLTEXT, ], 'form' => [ Def::ENTITY_SEARCH_CALLBACK => function ( WebRequest $request ) { $repo = WikibaseRepo::getDefaultInstance(); $entityIdParser = WikibaseRepo::getEntityIdParser(); return new CombinedEntitySearchHelper( [ new Wikibase\Repo\Api\EntityIdSearchHelper( $repo->getEntityLookup(), $entityIdParser, new NullLabelDescriptionLookup(), $repo->getEntityTypeToRepositoryMapping() ), new FormSearchEntity( $entityIdParser, $request, - $repo->getUserLanguage(), + WikibaseRepo::getUserLanguage(), WikibaseRepo::getLanguageFallbackChainFactory(), WikibaseRepo::getPrefetchingTermLookup() ), ] ); }, ], // TODO: support senses? ]; diff --git a/src/LexemeFullTextQueryBuilder.php b/src/LexemeFullTextQueryBuilder.php index dbf3942..0781614 100644 --- a/src/LexemeFullTextQueryBuilder.php +++ b/src/LexemeFullTextQueryBuilder.php @@ -1,222 +1,222 @@ settings = $settings; $this->entityIdParser = $entityIdParser; $this->userLanguage = $userLanguage; $this->lookupFactory = $lookupFactory; } /** * Create fulltext builder from global environment. * @param array $settings Configuration from config file * @return LexemeFullTextQueryBuilder * @throws \MWException */ public static function newFromGlobals( array $settings ) { $repo = WikibaseRepo::getDefaultInstance(); return new static( $settings, new LanguageFallbackLabelDescriptionLookupFactory( WikibaseRepo::getLanguageFallbackChainFactory(), $repo->getTermLookup(), $repo->getTermBuffer() ), WikibaseRepo::getEntityIdParser(), - $repo->getUserLanguage() + WikibaseRepo::getUserLanguage() ); } /** * Search articles with provided term. * * @param SearchContext $searchContext * @param string $term term to search * @throws \MWException */ public function build( SearchContext $searchContext, $term ) { if ( $searchContext->areResultsPossible() && !$searchContext->isSpecialKeywordUsed() ) { // We use entity search query if we did not find any advanced syntax // and the base builder did not reject the query $this->buildEntitySearchQuery( $searchContext, $term ); } // if we did find advanced query, we keep the old setup but change the result type // FIXME: make it dispatch by content model $searchContext->setResultsType( new LexemeFulltextResult( $this->entityIdParser, $this->userLanguage, $this->lookupFactory ) ); } /** * @param SearchContext $searchContext * @return bool */ public function buildDegraded( SearchContext $searchContext ) { // Not doing anything for now return false; } /** * Build a fulltext query for Wikibase entity. * @param SearchContext $searchContext * @param string $term Search term */ protected function buildEntitySearchQuery( SearchContext $searchContext, $term ) { $searchContext->setProfileContext( self::CONTEXT_LEXEME_FULLTEXT ); $searchContext->addSyntaxUsed( self::LEXEME_FULL_TEXT_MARKER, 10 ); /* * Overall query structure is as follows: * - Bool with: * Filter of namespace = N * OR (Should with 1 mininmum) of: * title.keyword = QUERY * lexeme_forms.id = QUERY * fulltext match query * * Fulltext match query is: * Filter of: * at least one of: all, all.plain matching * OR (should with 0 minimum) of: * DISMAX query of: {lemma|form}.near_match * OR (should with 0 minimum) of: * all * all.plain */ $profile = $this->settings; // $fields is collecting all the fields for dismax query to be used in // scoring match $fields = [ [ "lemma.near_match", $profile['exact'] ], [ "lemma.near_match_folded", $profile['folded'] ], [ "lexeme_forms.representation.near_match", $profile['exact'] * $profile['form-discount'], ], [ "lexeme_forms.representation.near_match_folded", $profile['folded'] * $profile['form-discount'], ], ]; $titleMatch = new Term( [ 'title.keyword' => EntitySearchUtils::normalizeId( $term, $this->entityIdParser ), ] ); // lexeme_forms.id is a lowercase_keyword so use Match to apply the analyzer $formIdMatch = new Match( 'lexeme_forms.id', EntitySearchUtils::normalizeId( $term, $this->entityIdParser ) ); // Main query filter $filterQuery = $this->buildSimpleAllFilter( $term ); // Near match ones, they use constant score $nearMatchQuery = new DisMax(); $nearMatchQuery->setTieBreaker( 0 ); foreach ( $fields as $field ) { $nearMatchQuery->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $term ) ); } // Tokenized ones $tokenizedQuery = $this->buildSimpleAllFilter( $term, 'OR', $profile['any'] ); // Main labels/desc query $fullTextQuery = new BoolQuery(); $fullTextQuery->addFilter( $filterQuery ); $fullTextQuery->addShould( $nearMatchQuery ); $fullTextQuery->addShould( $tokenizedQuery ); // Main query $query = new BoolQuery(); $query->setParam( 'disable_coord', true ); // Match either labels or exact match to title $query->addShould( $titleMatch ); $query->addShould( $formIdMatch ); $query->addShould( $fullTextQuery ); $query->setMinimumShouldMatch( 1 ); $searchContext->setMainQuery( $query ); } /** * Builds a simple filter on all and all.plain when all terms must match * * @param string $query * @param string $operator * @param null $boost * @return BoolQuery */ private function buildSimpleAllFilter( $query, $operator = 'AND', $boost = null ) { $filter = new BoolQuery(); // FIXME: We can't use solely the stem field here // - Depending on languages it may lack stopwords, // A dedicated field used for filtering would be nice foreach ( [ 'all', 'all.plain' ] as $field ) { $m = new Match(); $m->setFieldQuery( $field, $query ); $m->setFieldOperator( $field, $operator ); if ( $boost ) { $m->setFieldBoost( $field, $boost ); } $filter->addShould( $m ); } return $filter; } }