Index: DidYouMean/DidYouMean.php =================================================================== --- DidYouMean/DidYouMean.php (revision 0) +++ DidYouMean/DidYouMean.php (revision 0) @@ -0,0 +1,399 @@ + 'DidYouMean', 'author' => 'hippietrail (Andrew Dunbar)' ); + +# do database lookup from these +$wgHooks['ArticleNoArticleText'][] = 'wfDymArticleNoArticleText'; +$wgHooks['SpecialSearchNogomatch'][] = 'wfDymSpecialSearchNogomatch'; + +# db lookup + parse existing {{see}} and add enhanced one with db results +$wgHooks['ParserBeforeStrip'][] = 'wfDymParserBeforeStrip'; + +# handle delete +$wgHooks['ArticleDelete'][] = 'wfDymArticleDelete'; + +# handle move +$wgHooks['TitleMoveComplete'][] = 'wfDymTitleMoveComplete'; + +# handle create / edit +$wgHooks['AlternateEdit'][] = 'wfDymAlternateEdit'; +$wgHooks['ArticleSaveComplete'][] = 'wfDymArticleSaveComplete'; + +# handle undelete +$wgHooks['ArticleUndelete'][] = 'wfDymArticleUndelete'; + +# set this in LocalSettings.php +$wgDymUseSeeTemplate = false; + +# TODO this is called even when editing a new page + +function wfDymArticleNoArticleText( &$article, &$text ) { + wfDebug( 'HIPP: ' . __METHOD__ . "\n" ); + + $sees = wfDymLookup( 0, $article->getTitle()->getText() ); + + sort($sees); + + if (count($sees)) + $text = build_sees($sees) . $text; + + return true; +} + +# this is called when using the Go/Search box but it is not called when entering +# a URL for a non-existing article + +function wfDymSpecialSearchNogomatch( &$title ) { + global $wgOut; + + wfDebug( 'HIPP: ' . __METHOD__ . "\n" ); + + $sees = wfDymLookup( 0, $title->getText() ); + + sort($sees); + + if (count($sees)) + $wgOut->addWikiText( build_sees($sees) ); + + return true; +} + +# this is called per chunk of wikitext, not per article + +function wfDymParserBeforeStrip( &$parser, &$text, &$stripState ) { + #wfDebug( 'HIPP: ' . __METHOD__ . "\n" ); + + # if revisionid is 0 this is not an article chunk + if ($parser->mDymFirstChunk == 'no' || !$parser->getVariableValue('revisionid') || $parser->getVariableValue('namespace')) + return true; + + $parser->mDymFirstChunk = 'no'; + + $title = $parser->getTitle(); + $parser->mDymSees = wfDymLookup( $title->getArticleID(), $title->getText() ); + + if (preg_match( "/{{[sS]ee\|([^}]*)}}/", $text, $see )) { + wfDebug( "HIPP: see Hit\n" ); + $sees = explode("|", $see[1]); + } elseif (preg_match( "/{{[xX]see(\|[^}]*)}}/", $text, $see )) { + wfDebug( "HIPP: xsee Hit\n" ); + preg_match_all( "/\|\[\[([^]|]*)(?:\|([^|]*))?\]\](?: \(([^)]*)\))?/", $see[1], $ma ); + $sees = $ma[1]; + } else { + wfDebug( "HIPP: (x)see Miss\n" ); + # there's no {{see}} in this chunk of wikitext + # if this is the 1st chunk of the article itself we can put an empty {{see}} there. + $text = "{{see|}}\n" . $text; + $sees = array(); + } + + # normalize entities and urlencoding to pure utf-8 + foreach ($sees as &$value) + $value = urldecode(html_entity_decode($value, ENT_QUOTES, 'UTF-8')); + + wfDebug( 'HIPP: Parser: ' . utf8_decode(implode(', ', $sees)) . "\n" ); + wfDebug( 'HIPP: DBase: ' . utf8_decode(implode(', ', $parser->mDymSees)) . "\n" ); + + # add in the stuff from the database lookup + $sees = array_unique(array_merge($sees, $parser->mDymSees)); + sort($sees); + + wfDebug( 'HIPP: Merged: ' . utf8_decode(implode(', ', $sees)) . "\n" ); + + # TODO is it better to use $parser->insertStripItem() ? + + if (count($sees)) + $built_sees = build_sees($sees); + else + $built_sees = ''; + + $text = preg_replace( + '/{{[xX]?[sS]ee\|[^}]*}}/', + #$built_sees . '
$0
', + $built_sees, + $text ); + + return true; +} + +# turn the array of titles into some wikitext we can add to an article + +function build_sees( $sees ) { + global $wgDymUseSeeTemplate; + + if ($wgDymUseSeeTemplate == true) + return '{{see|' . implode('|', $sees) . '}}'; + else + return '
\'\'See also:\'\' \'\'\'[[' . implode(']]\'\'\', \'\'\'[[', $sees) . ']]\'\'\'
'; +} + +# pass pageid = 0 to lookup by normtitle + +function wfDymLookup( $pageid, $title ) { + wfDebug( 'HIPP: ' . __METHOD__ . "\n" ); + + $sees = array(); + + $dbr = wfGetDB( DB_SLAVE ); + + if ( $dbr->tableExists( 'dympage' ) && $dbr->tableExists( 'dymnorm' ) ) { + $normid = false; + + if ($pageid) { + wfDebug( "HIPP: lookup by pageid: $pageid\n" ); + $normid = $dbr->selectField( + array( 'page', 'dympage' ), + 'dp_normid', + array( 'page_id = dp_pageid', 'page_id' => $pageid ) + ); + } else { + wfDebug( "HIPP: lookup by normtitle: " . wfDymNormalise($title) . "\n" ); + $normid = $dbr->selectField( + 'dymnorm', + 'dn_normid', + array( 'dn_normtitle' => wfDymNormalise($title) ) + ); + } + + if ($normid) { + $res = $dbr->select( + /* FROM */ array( 'page', 'dympage' ), + /* SELECT */ 'page_title', + /* WHERE */ array( 'page_id = dp_pageid', 'dp_normid' => $normid ) + ); + + $nr = $dbr->numRows( $res ); + + if ($nr == 0) { + wfDebug( "HIPP: DB New Miss\n" ); + } else { + wfDebug( "HIPP: DB New Hit\n" ); + + # accumulate the db results + while( $o = $dbr->fetchObject( $res ) ) { + $t2 = str_replace('_', ' ', $o->page_title); + $dbo = utf8_decode($t2); + if ($title != $t2) { + array_push( $sees, $t2 ); + $dbo = '++ ' . $dbo; + } + else + $dbo = ' (' . $dbo . ')'; + wfDebug( "HIPP: $dbo\n" ); + } + + $dbr->freeResult( $res ); + } + } + } else { + wfDebug( "HIPP: No dympage or dymnorm table\n" ); + } + + return $sees; +} + +function wfDymArticleInsertComplete( &$article, &$user, $text, $summary, $isminor, $watchthis, $something ) { + + if ($article->getTitle()->getNamespace() != 0 || $article->isRedirect() == true) + return true; + + wfDoInsert( $article->getID(), $article->getTitle()->getText() ); + + return true; +} + +function dymArticleUndelete( &$title, &$create ) { + + if ($create == false || $title->getNamespace() != 0) + return true; + + # TODO it's not possible to detect if the undeleted article is a redirect! + #$artic1e = new Article( $title ); + #if ($article->isRedirect( $article->getContent() )) { + # return true; + #} + + doInsert( $title->getArticleId(), $title->getText() ); + + return true; +} + +function wfDymArticleDelete( $article, $user, $reason ) { + + if ($article->getTitle()->getNamespace() != 0 || $article->isRedirect() == true) + return true; + + wfDoDelete( $article->getID() ); + + return true; +} + +function wfDymTitleMoveComplete( &$title, &$nt, &$wgUser, &$pageid, &$redirid ) { + $oldtitletext = $title->getText(); + $oldns = $title->getNamespace(); + $newtitletext = $nt->getText(); + $newns = $nt->getNamespace(); + + wfDebug( 'HIPP: ' . __METHOD__ . "\n" ); + + if ($oldns != 0 && $newns != 0) + return true; + + # TODO we can't always check if we're moving a redirect because the old article's content + # TODO has already been replaced with the redirect to the new title but a + # TODO new title's content is still "noarticletext" at this point! + #$a1 = new Article( $title ); + #$a2 = new Article( $nt ); + #wfDebug( "HIPP: getContent() for isRedirect()\n\tfrom <<<" . $a1->getContent() . ">>>\n\t to <<<" . $a2->getContent() . ">>>\n" ); + #if ($a1->isRedirect( $a->getContent() )) { + # wfDebug( "HIPP: moving a redirect (?)\n" ); + # return true; + #} + + if ($oldns == 0 && $newns == 0) { + wfDoUpdate( $pageid, $newtitletext ); + } elseif ($oldns == 0) { + wfDoDelete( $pageid ); + } elseif ($newns == 0) { + wfDoInsert( $pageid, $newtitletext ); + } + + return true; +} + +# called at action=edit. can detect if we're about to edit a redirect + +function wfDymAlternateEdit( $editpage ) { + global $wgParser; + + if ($editpage->mArticle->isRedirect()) + $wgParser->mDymRedirBeforeEdit = true; + + return 1; +} + +# called at end of action=submit + +function wfDymArticleSaveComplete( $article, $user, $text, $summary, $isminor, $dunno1, $dunno2, $flags ) { + global $wgParser; + + if ($article->getTitle()->getNamespace() != 0) + return true; + + if ($article->isRedirect($text)) { + if (!$wgParser->mDymRedirBeforeEdit && !($flags & EDIT_NEW)) + wfDoDelete( $article->getID() ); + } else { + if ($wgParser->mDymRedirBeforeEdit || $flags & EDIT_NEW) + wfDoInsert( $article->getID(), $article->getTitle()->getText() ); + } + + $wgParser->mDymRedirBeforeEdit = false; + + return true; +} + +function wfDoInsert( $pageid , $title ) { + wfDebug( 'HIPP: ' . __METHOD__ . " INSERT\n" ); + $dbw = wfGetDB( DB_MASTER ); + + $norm = wfDymNormalise($title); + + # find or create normid for the new title + $normid = $dbw->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) ); + if ($normid) { + wfDebug( "HIPP: old: $title ->\t$norm = $normid\n" ); + } else { + $nsvid = $dbw->nextSequenceValue( 'dymnorm_dn_normid_seq' ); + $dbw->insert( 'dymnorm', array( 'dn_normid' => $nsvid, 'dn_normtitle' => $norm ) ); + $normid = $dbw->insertId(); + wfDebug( "HIPP: NEW: $title ->\t$norm = $normid\n" ); + } + $dbw->insert( 'dympage', array( 'dp_pageid' => $pageid, 'dp_normid' => $normid ) ); + + # touch all pages which will now link here + $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) . + " WHERE page_id=dp_pageid AND dp_normid=$normid" ); + #$dbw->update( + # array('page', 'dympage'), + # array('page_touched' => $dbw->timestamp()), + # array('page_id=dp_pageid', 'dp_normid' => $normid), + # __METHOD__ + #); +} + +function wfDoDelete( $pageid ) { + wfDebug( 'HIPP: ' . __METHOD__ . " DELETE\n" ); + $dbw = wfGetDB( DB_MASTER ); + + $normid = $dbw->selectField( 'dympage', 'dp_normid', array('dp_pageid' => $pageid) ); + + $dbw->delete( 'dympage', array('dp_pageid' => $pageid) ); + + $count = $dbw->selectField( 'dympage', 'COUNT(*)', array('dp_normid' => $normid) ); + + if ($count == 0) + $dbw->delete( 'dymnorm', array('dn_normid' => $normid) ); + + # touch all pages which used to link here + $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) . + " WHERE page_id=dp_pageid AND dp_normid=$normid" ); + #$dbw->update( + # array('page', 'dympage'), + # array('page_touched' => $dbw->timestamp()), + # array('page_id=dp_pageid', 'dp_normid' => $normid), + # __METHOD__ + #); +} + +function wfDoUpdate( $pageid, $title ) { + wfDebug( 'HIPP: ' . __METHOD__ . " MOVE\n" ); + $dbw = wfGetDB( DB_MASTER ); + + $norm = wfDymNormalise($title); + + $normid = $dbw->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) ); + if ($normid) { + wfDebug( "HIPP: old: $title ->\t$norm = $normid\n" ); + } else { + $nsvid = $dbw->nextSequenceValue( 'dymnorm_dn_normid_seq' ); + $dbw->insert( 'dymnorm', array( 'dn_normid' => $nsvid, 'dn_normtitle' => $norm ) ); + $normid = $dbw->insertId(); + wfDebug( "HIPP: NEW: $title ->\t$norm = $normid\n" ); + } + + $oldnormid = $dbw->selectField( 'dympage', 'dp_normid', array('dp_pageid' => $pageid) ); + + if ($oldnormid != $normid) { + $dbw->update( 'dympage', array( 'dp_normid' => $normid ), array( 'dp_pageid' => $pageid ) ); + + $count = $dbw->selectField( 'dympage', 'COUNT(*)', array('dp_normid' => $oldnormid) ); + + if ($count == 0) + $dbw->delete( 'dymnorm', array('dn_normid' => $oldnormid) ); + + # touch all pages which linked to the old name or will link to the new one + $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) . + " WHERE page_id=dp_pageid AND (dp_normid=$normid OR dp_normid=$oldnormid)" ); + #$dbw->update( + # array('page', 'dympage'), + # array('page_touched' => $dbw->timestamp()), + # array('page_id=dp_pageid', 'dp_normid' => $normid), + # __METHOD__ + #); + #$dbw->update( + # array('page', 'dympage'), + # array('page_touched' => $dbw->timestamp()), + # array('page_id=dp_pageid', 'dp_normid' => $oldnormid), + # __METHOD__ + #); + } +} + +?> Index: DidYouMean/DYMNorm.php =================================================================== --- DidYouMean/DYMNorm.php (revision 0) +++ DidYouMean/DYMNorm.php (revision 0) @@ -0,0 +1,250 @@ + Index: DidYouMean/didyoumean.sql =================================================================== --- DidYouMean/didyoumean.sql (revision 0) +++ DidYouMean/didyoumean.sql (revision 0) @@ -0,0 +1,15 @@ +CREATE TABLE /*$wgDBprefix*/dympage ( + `dp_pageid` int(8) NOT NULL, + `dp_normid` int(8) NOT NULL, + PRIMARY KEY (`dp_pageid`), + UNIQUE KEY `dp_pageid` (`dp_pageid`), + KEY `dp_normid` (`dp_normid`) +); + +CREATE TABLE /*$wgDBprefix*/dymnorm ( + `dn_normid` int(8) AUTO_INCREMENT, + `dn_normtitle` varchar(255) binary NOT NULL, + PRIMARY KEY (`dn_normid`), + UNIQUE KEY `dn_normid` (`dn_normid`), + UNIQUE KEY `dn_normtitle` (`dn_normtitle`) +); Index: DidYouMean/install.php =================================================================== --- DidYouMean/install.php (revision 0) +++ DidYouMean/install.php (revision 0) @@ -0,0 +1,99 @@ + + * @copyright © 2007 Andrew Dunbar + * @licence Copyright holder allows use of the code for any purpose + */ + +# We're going to have to assume we're running from one of two places +## extensions/install.php (bad setup!) +## extensions/DidYouMean/install.php (the dir name doesn't even matter) +$maint = dirname( dirname( __FILE__ ) ) . '/maintenance'; +if( is_file( $maint . '/commandLine.inc' ) ) { + require_once( $maint . '/commandLine.inc' ); +} else { + $maint = dirname( dirname( dirname( __FILE__ ) ) ) . '/maintenance'; + if( is_file( $maint . '/commandLine.inc' ) ) { + require_once( $maint . '/commandLine.inc' ); + } else { + # We can't find it, give up + echo( "The installation script was unable to find the maintenance directories.\n\n" ); + die( 1 ); + } +} + +# Set up some other paths +$sql = dirname( __FILE__ ) . '/didyoumean.sql'; + +# Whine if we don't have appropriate credentials to hand +if( !isset( $wgDBadminuser ) || !isset( $wgDBadminpassword ) ) { + echo( "No superuser credentials could be found. Please provide the details\n" ); + echo( "of a user with appropriate permissions to update the database. See\n" ); + echo( "AdminSettings.sample for more details.\n\n" ); + die( 1 ); +} + +# Get a connection +$dbclass = $wgDBtype == 'MySql' + ? 'Database' + : 'Database' . ucfirst( strtolower( $wgDBtype ) ); +$dbc = new $dbclass; +$dba = $dbc->newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname, 1 ); + +# Check we're connected +if( !$dba->isOpen() ) { + echo( "A connection to the database could not be established.\n\n" ); + die( 1 ); +} + +# Do nothing if the tables exist +if( !$dba->tableExists( 'dympage' ) || !$dba->tableExists( 'dymnorm' ) ) { + echo( "Sourcing: $sql\n" ); + $res = $dba->sourceFile( $sql ); + echo( "Result: $res\n" ); + if( $res ) { + echo( "The tables have been set up correctly.\n" ); + + require_once( 'DYMNorm.php' ); + + $result = $dba->select( + 'page', + array ( 'page_title', 'page_id' ), + array ( + 'page_namespace=0', + 'page_is_redirect=0' + ) + ); + + while( $row = $dba->fetchObject( $result ) ) { + #echo "$row->page_title\n"; + + $norm = wfDymNormalise($row->page_title); + + # *new* table using numeric columns where possible + $theid = $dba->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) ); + if ($theid) { + echo( "old: $row->page_title ->\t$norm = $theid\n" ); + } else { + $normid = $dba->nextSequenceValue( 'dymnorm_dn_normid_seq' ); + $dba->insert( 'dymnorm', array( 'dn_normid' => $normid, 'dn_normtitle' => $norm ) ); + $theid = $dba->insertId(); + echo( "NEW: $row->page_title ->\t$norm = $theid\n" ); + } + $dba->insert( 'dympage', array( 'dp_pageid' => $row->page_id, 'dp_normid' => $theid ) ); + } + $dba->freeResult( $result ); + } +} else { + echo( "The tables already exist. No action was taken.\n" ); +} + +# Close the connection +$dba->close(); +echo( "\n" ); + +?>