# http://www.mediawiki.org/ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # http://www.gnu.org/copyleft/gpl.html /** * * @package MediaWiki * @subpackage SpecialPage */ # This is not a valid entry point, perform no further processing unless MEDIAWIKI is defined if( !defined( 'MEDIAWIKI' ) ) { die( "This file is part of MediaWiki and is not a valid entry point\n" ); } /** */ require_once( 'Revision.php' ); /* * ==== comment to be removed after patch acceptance ==== * why not just boolean's for the related parameters ? * do we expect other values ? */ define( 'MW_EXPORT_FULL', 0 ); define( 'MW_EXPORT_CURRENT', 1 ); define( 'MW_EXPORT_BUFFER', 0 ); define( 'MW_EXPORT_STREAM', 1 ); /* * ==== comment to be removed after patch acceptance === * wfSpecialExport does 4 things : * 1 - process the request and/or the config parameters * 2 - send HTTP header(s) * 3 - send the XML output * 4 - build the form * Because this patch increases a little bit tasks 1, 2 and 4, * it seems more convenient to leave task 1 here, * but to move tasks 2, 3 and 4 in separate functions. * In order to avoid wfXxx name space pollution, these 3 functions * are created as static function of class WikiExporter * doHeaders() , doExport() , doForm */ /** * Entry point * @param $page string */ function wfSpecialExport( $page = '' ) { global $wgRequest; // config parameters : $wgSpecialExportFilename = is_integer($wgSpecialExportFilename) ? $wgSpecialExportFilename : MW_EXPORT_FILENAME_DEFAULT ; $wgSpecialExportDownload = is_integer($wgSpecialExportDownload) ? $wgSpecialExportDownload : MW_EXPORT_DISPO_DEFAULT ; $wgSpecialExportPrefix = is_string($wgSpecialExportPrefix) ? $wgSpecialExportPrefix : MW_EXPORT_DEFAULT_PREFIX ; // proceed/prepare request : if( $wgRequest->getVal( 'action' ) == 'submit') { $page = $wgRequest->getText( 'pages' ); $curonly = $wgRequest->getCheck( 'curonly' ); $szRequesXmlFileName = $wgRequest->getText( 'szXmlFileName' ); $zRequestDispo = $wgRequest->getCheck( 'zDispo' ); } else { # Pre-check the 'current version only' box in the UI $curonly = true; } if( $page != '' ) { // output HTTP header(s) : WikiExporter::doHeaders( // $zXmlDisposition = true // shall we use Content-Disposition ? (( $wgSpecialExportDownload == MW_EXPORT_DISPO_ALLWAYS) || $zRequestDispo ) // , $szSuggestedPrefix = MW_EXPORT_DEFAULT_PREFIX , $wgSpecialExportPrefix // , $zDate = true , (($wgSpecialExportFilename == MW_EXPORT_FILENAME_ASK_ELSE_TIMESTAMP) || ($wgSpecialExportFilename == MW_EXPORT_FILENAME_TIMESTAMP)) , $szXmlFileName // , $zRelyOnFileName = false // , $uMaxLength = 250 // RARELY USED ! // , $zGerman = true // RARELY USED ! ) ; // output XML : WikiExporter::doExport( $page // , $history = MW_EXPORT_CURRENT , ( $curonly ? MW_EXPORT_CURRENT : MW_EXPORT_FULL ) // , $buffer = MW_EXPORT_BUFFER ) ; } else { // prepare the form : WikiExporter::doForm( // $zCurOnly = true // preset value of the 'current v. all' checkbox $curonly // , $zDisposition = true // preset value for the 'download' checkbox , (($wgSpecialExportDownload == MW_EXPORT_DISPO_SUGGESTED) || $zRequestDispo) // , $szXmlDispositionFileName = '' // preset value for the file name , $szXmlFileName // , $zAskFilename = true // shall we prompt the user for a filename? , (($wgSpecialExportFilename == MW_EXPORT_FILENAME_ASK_ELSE_TIMESTAMP) || ($wgSpecialExportFilename == MW_EXPORT_FILENAME_ASK_ELSE_RANDOM)) // , $zAskDisposition = true // shall we prompt the user for download? , (($wgSpecialExportDownload == MW_EXPORT_DISPO_PERHAPS) || ($wgSpecialExportDownload == MW_EXPORT_DISPO_SUGGESTED)) // , $szSuggestedPrefix = MW_EXPORT_DEFAULT_PREFIX , $wgSpecialExportPrefix // , $zDate = true , (($wgSpecialExportFilename == MW_EXPORT_FILENAME_ASK_ELSE_TIMESTAMP) || ($wgSpecialExportFilename == MW_EXPORT_FILENAME_TIMESTAMP)) // , $zRelyOnFileName = false // , $uMaxLength = 250 // RARELY USED ! // , $zGerman = true // RARELY USED ! ) ; } } /** * @package MediaWiki * @subpackage SpecialPage */ class WikiExporter { /**#@+ * @access public * @static */ /** * @param boolean $zXmlDisposition : shall we use Content-Disposition ? * @param string $szSuggestedPrefix * @param boolean $zDate : use a timestamp (otherwise use a random value) * @param string $szXmlDispositionFileName * @param boolean $zRelyOnFileName (turn on if you are sure file name is ok, probably beacause it was generated automatically) * @param unsigned $uMaxLength (RARE) Maximum filename length. Default is 250 characters. * @param boolean $zGerman (RARE) proceed also german characters */ function doHeaders( $zXmlDisposition = true // shall we use Content-Disposition ? , $szSuggestedPrefix = MW_EXPORT_DEFAULT_PREFIX , $zDate = true , $szXmlDispositionFileName = '' , $zRelyOnFileName = false , $uMaxLength = 250 // RARELY USED ! , $zGerman = true // RARELY USED ! ) { global $wgOut ; $wgOut->disable(); header( "Content-type: application/xml; charset=utf-8" ); if($zXmlDisposition) { if( ! $zRelyOnFileName ) { $szXmlDispositionFileName = wfUnixFileName( $szXmlDispositionFileName,$uMaxLength,$zGerman) ; } if(! $szXmlDispositionFileName) { $szXmlDispositionFileName = wfSuggestFileName( $szSuggestedPrefix,$zDate,$uMaxLength) ; } header("Content-Disposition: attachment; filename=\"{$szXmlDispositionFileName}\"'"); } } /** * factorize and run... * * @param string $page * @param unsigned $history one of MW_EXPORT_FULL or MW_EXPORT_CURRENT * @param unsigned $buffer one of MW_EXPORT_BUFFER or MW_EXPORT_STREAM */ function doExport( $page /* * ==== comment to be removed after patch acceptance ==== * why not just boolean's for these 2 parameters ? */ , $history = MW_EXPORT_CURRENT , $buffer = MW_EXPORT_BUFFER ) { $pages = explode( "\n", $page ); $db =& wfGetDB( DB_SLAVE ); $exporter = new WikiExporter( $db , $history = MW_EXPORT_CURRENT , $buffer = MW_EXPORT_BUFFER ); /* * ==== comment to be removed after patch acceptance ==== * Mind that openStream() and closeStream() are, in fact, static functions */ $exporter->openStream(); // WikiExporter::openStream(); //... $exporter->pagesByName( $pages ); $exporter->closeStream(); // WikiExporter::closeStream(); //... } /** * write down the Export form on $wgOut * * @param boolean $curonly ($zCurOnly) preset value of the 'current v. all' checkbox * @param boolean $zDisposition preset value for the 'download' checkbox * @param string $szXmlDispositionFileName preset value for the file name * @param boolean $zAskFilename shall we prompt the user for a filename? * @param boolean $zAskDisposition shall we prompt the user for download? * @param boolean $zDate use time stamp if we need to generate a file name * @param boolean $zRelyOnFileName (turn on if you are sure file name is ok, probably beacause it was generated automatically) * @param unsigned $uMaxLength (rarely used) * @param boolean $zGerman (rarely used) */ function doForm( $zCurOnly = true // preset value of the 'current v. all' checkbox , $zDisposition = true // preset value for the 'download' checkbox , $szXmlDispositionFileName = '' // preset value for the file name , $zAskFilename = true // shall we prompt the user for a filename? , $zAskDisposition = true // shall we prompt the user for download? , $szSuggestedPrefix = MW_EXPORT_DEFAULT_PREFIX , $zDate = true , $zRelyOnFileName = false , $uMaxLength = 250 // RARELY USED ! , $zGerman = true // RARELY USED ! ) { global $wgOut ; $wgOut->addWikiText( wfMsg( "exporttext" ) ); $titleObj = Title::makeTitle( NS_SPECIAL, "Export" ); $action = $titleObj->escapeLocalURL( 'action=submit' ); /* * === comment to be removed after patch acceptance : * there was a minor bug here : $curonly was ignored */ $szCurOnlyChecked = $zCurOnly ? " checked='checked' " : '' ; $szPromptCurOnly = wfMsg( "exportcuronly" ) ; $wgOut->addHTML( "


" ) ; if($zAskDisposition) { /* * === comment to be removed after patch acceptance : * === Mind that 'exportpromptdisposition' is a NEW message. */ $szPromptDisposition = wfMsg('exportpromptdisposition') ; $szDispositionChecked = $zDisposition ? " checked='checked' " : '' ; $wgOut->addHTML( "
" ); } if($zAskFilename) { /* * === comment to be removed after patch acceptance : * === Mind that 'exportpromptfilename' is a NEW message. */ $szPromptFilename = wfMsg('exportpromptfilename') ; if( ! $zRelyOnFileName ) { /* $szXmlDispositionFileName may perfectly be empty. However, if it is provided, then it MUST be ok! It is probably already builded automatically and correct. Anyway, we still make an ultimate checking. */ $szXmlDispositionFileName = trim($szXmlDispositionFileName) ; if($szXmlDispositionFileName) { $szXmlDispositionFileName = wfUnixFileName($szXmlDispositionFileName,$uMaxLength,$zGerman) ; } } if(! $szXmlDispositionFileName) { $szXmlDispositionFileName = wfSuggestFileName($szSuggestedPrefix,$zDate,$uMaxLength) ; } $wgOut->addHTML( "
" ); } /* * === comment to be removed after patch acceptance : * === Mind that 'exportsubmit' is a NEW message. * The reason is that, at least in french, standard words often used on submit buttons * such as 'submit', 'send', 'apply', 'transmit' are very ambiguous. * 'Send' is generally translated by 'Envoyer'. * But what means 'envoyer'? From where/who? To where/who? * Why not 'receive' or 'recevoir' ? etc * An *explicit* word such as 'Export', 'Import', 'Download', 'Upload' * must absolutely be used (at least in some languages)... */ $szSubmit = wfMsg('exportsubmit') ; $wgOut->addHTML( "
" ); /* * ==== comments to be removed when patch is accepted : * Mind the 3 new messages (see Language.php) * - exportpromptdisposition * - exportpromptfilename * - exportsubmit */ } /**#@-*/ /**#@+ * @access private */ /** * @var function */ var $pageCallback = null; /** * @var function */ var $revCallback = null; /** * @var unsigned mode */ var $history ; /** * @var unsigned mode */ var $buffer ; /** * @var Database */ var $db ; /**#@-*/ /** * If using MW_EXPORT_STREAM to stream a large amount of data, * provide a database connection which is not managed by * LoadBalancer to read from: some history blob types will * make additional queries to pull source data while the * main query is still running. * * @param Database $db * @param unsigned $history one of MW_EXPORT_FULL or MW_EXPORT_CURRENT * @param unsigned $buffer one of MW_EXPORT_BUFFER or MW_EXPORT_STREAM */ function WikiExporter( &$db , $history = MW_EXPORT_CURRENT , $buffer = MW_EXPORT_BUFFER ) { $this->db =& $db; $this->history = $history; $this->buffer = $buffer; } /** * Set a callback to be called after each page in the output * stream is closed. The callback will be passed a database row * object with the last revision output. * * A set callback can be removed by passing null here. * * @param mixed $callback */ function setPageCallback( $callback ) { $this->pageCallback = $callback; } /** * Set a callback to be called after each revision in the output * stream is closed. The callback will be passed a database row * object with the revision data. * * A set callback can be removed by passing null here. * * @param mixed $callback */ function setRevisionCallback( $callback ) { $this->revCallback = $callback; } /** * Returns the export schema version. * @return string */ function schemaVersion() { return "0.3"; } /** * Opens the XML output stream's root element. * This does not include an xml directive, so is safe to include * as a subelement in a larger XML stream. Namespace and XML Schema * references are included. * * To capture the stream to a string, use PHP's output buffering * functions. Output will be encoded in UTF-8. */ function openStream() { global $wgContLanguageCode; $ver = $this->schemaVersion(); print wfElement( 'mediawiki', array( 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . "http://www.mediawiki.org/xml/export-$ver.xsd", 'version' => $ver, 'xml:lang' => $wgContLanguageCode ), null ) . "\n"; $this->siteInfo(); } function siteInfo() { $info = array( $this->sitename(), $this->homelink(), $this->generator(), $this->caseSetting(), $this->namespaces() ); print "\n"; foreach( $info as $item ) { print " $item\n"; } print "\n"; } function sitename() { global $wgSitename; return wfElement( 'sitename', array(), $wgSitename ); } function generator() { global $wgVersion; return wfElement( 'generator', array(), "MediaWiki $wgVersion" ); } function homelink() { $page = Title::newFromText( wfMsgForContent( 'mainpage' ) ); return wfElement( 'base', array(), $page->getFullUrl() ); } function caseSetting() { global $wgCapitalLinks; // "case-insensitive" option is reserved for future $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; return wfElement( 'case', array(), $sensitivity ); } function namespaces() { global $wgContLang; $spaces = "\n"; foreach( $wgContLang->getNamespaces() as $ns => $title ) { $spaces .= ' ' . wfElement( 'namespace', array( 'key' => $ns ), str_replace( '_', ' ', $title ) ) . "\n"; } $spaces .= " "; return $spaces; } /** * Closes the output stream with the closing root element. * Call when finished dumping things. */ function closeStream() { print "\n"; } /** * Dumps a series of page and revision records for all pages * in the database, either including complete history or only * the most recent version. * * * @param Database $db */ function allPages() { return $this->dumpFrom( '' ); } /** * @param Title $title */ function pageByTitle( $title ) { return $this->dumpFrom( 'page_namespace=' . $title->getNamespace() . ' AND page_title=' . $this->db->addQuotes( $title->getDbKey() ) ); } function pageByName( $name ) { $title = Title::newFromText( $name ); if( is_null( $title ) ) { return WikiError( "Can't export invalid title" ); } else { return $this->pageByTitle( $title ); } } function pagesByName( $names ) { foreach( $names as $name ) { $this->pageByName( $name ); } } // -------------------- private implementation below -------------------- /**#@+ * @access private */ function dumpFrom( $cond = '' ) { $fname = 'WikiExporter::dumpFrom'; wfProfileIn( $fname ); $page = $this->db->tableName( 'page' ); $revision = $this->db->tableName( 'revision' ); $text = $this->db->tableName( 'text' ); if( $this->history == MW_EXPORT_FULL ) { $join = 'page_id=rev_page'; } elseif( $this->history == MW_EXPORT_CURRENT ) { $join = 'page_id=rev_page AND page_latest=rev_id'; } else { wfProfileOut( $fname ); return new WikiError( "$fname given invalid history dump type." ); } $where = ( $cond == '' ) ? '' : "$cond AND"; if( $this->buffer == MW_EXPORT_STREAM ) { $prev = $this->db->bufferResults( false ); } if( $cond == '' ) { // Optimization hack for full-database dump $pageindex = 'FORCE INDEX (PRIMARY)'; $revindex = 'FORCE INDEX(page_timestamp)'; } else { $pageindex = ''; $revindex = ''; } $result = $this->db->query( "SELECT * FROM $page $pageindex, $revision $revindex, $text WHERE $where $join AND rev_text_id=old_id ORDER BY page_id", $fname ); $wrapper = $this->db->resultObject( $result ); $this->outputStream( $wrapper ); if( $this->buffer == MW_EXPORT_STREAM ) { $this->db->bufferResults( $prev ); } wfProfileOut( $fname ); } /** * Runs through a query result set dumping page and revision records. * The result set should be sorted/grouped by page to avoid duplicate * page records in the output. * * The result set will be freed once complete. Should be safe for * streaming (non-buffered) queries, as long as it was made on a * separate database connection not managed by LoadBalancer; some * blob storage types will make queries to pull source data. * * @param ResultWrapper $resultset */ function outputStream( $resultset ) { $last = null; while( $row = $resultset->fetchObject() ) { if( is_null( $last ) || $last->page_namespace != $row->page_namespace || $last->page_title != $row->page_title ) { if( isset( $last ) ) { $this->closePage( $last ); } $this->openPage( $row ); $last = $row; } $this->dumpRev( $row ); } if( isset( $last ) ) { $this->closePage( $last ); } $resultset->free(); } /** * Opens a section on the output stream, with data * from the given database row. * * @param object $row */ function openPage( $row ) { print "\n"; $title = Title::makeTitle( $row->page_namespace, $row->page_title ); print ' ' . wfElementClean( 'title', array(), $title->getPrefixedText() ) . "\n"; print ' ' . wfElement( 'id', array(), $row->page_id ) . "\n"; if( '' != $row->page_restrictions ) { print ' ' . wfElement( 'restrictions', array(), $row->page_restrictions ) . "\n"; } } /** * Closes a section on the output stream. * If a per-page callback has been set, it will be called * and passed the last database row used for this page. * * @param object $row */ function closePage( $row ) { print "\n"; if( isset( $this->pageCallback ) ) { call_user_func( $this->pageCallback, $row ); } } /** * Dumps a section on the output stream, with * data filled in from the given database row. * * @param object $row */ function dumpRev( $row ) { $fname = 'WikiExporter::dumpRev'; wfProfileIn( $fname ); print " \n"; print " " . wfElement( 'id', null, $row->rev_id ) . "\n"; $ts = wfTimestamp2ISO8601( $row->rev_timestamp ); print " " . wfElement( 'timestamp', null, $ts ) . "\n"; print " "; if( $row->rev_user ) { print wfElementClean( 'username', null, $row->rev_user_text ); print wfElement( 'id', null, $row->rev_user ); } else { print wfElementClean( 'ip', null, $row->rev_user_text ); } print "\n"; if( $row->rev_minor_edit ) { print " \n"; } if( $row->rev_comment != '' ) { print " " . wfElementClean( 'comment', null, $row->rev_comment ) . "\n"; } $text = Revision::getRevisionText( $row ); print " " . wfElementClean( 'text', array( 'xml:space' => 'preserve' ), $text ) . "\n"; print " \n"; wfProfileOut( $fname ); if( isset( $this->revCallback ) ) { call_user_func( $this->revCallback, $row ); } } /**#@-*/ } /** * @param string $ts * @return string */ function wfTimestamp2ISO8601( $ts ) { #2003-08-05T18:30:02Z return preg_replace( '/^(....)(..)(..)(..)(..)(..)$/', '$1-$2-$3T$4:$5:$6Z', $ts ); } /* * ==== comment to be removed after patch acceptance ==== * The next 3 global functions are, in my personal implementation, * located, for reusability, in includes/GlobalFunctions.php rather than here * because they can be usefull eveywhere there is a need to build a filename. * Mind that we voluntary divide into 3 functions as to improve reability. * I suggest to move these 3 functions into GlobalFunctions.php */ /** * Generates a unique file name e.g. for a downloaded file * * @param string $szSuggestedPrefix * @param boolean $zDate : use a timestamp (otherwise use a random value) * @param unsigned $uMaxLength Maximum filename length. Default is 250 characters. * @return string generated file name */ function wfSuggestFileName( $szPrefix = MW_EXPORT_DEFAULT_PREFIX , $zDate = true , $uMaxLength = 250 ) { $szName = $szPrefix . '-' . ( $zDate ? date("Y-m-d-H-i-s") : mt_rand(1234567,9876543) ) ; // still verify : illegal prefix ? length ? return wfUnixFileName($szName,$uMaxLength) ; } /** * Generates an usefull Unix filename besides the original one. * * @param string $szName The filename to be processed. * @param unsigned $uMaxLength Maximum filename length. Default is 250 characters. * @param boolean $zGerman proceed also german characters * @return string */ function wfUnixFileName( $szName , $uMaxLength = 250 , $zGerman= true ) { $uMaxLength = max(1,min(250,$uMaxLength)) ; // remove accents : $szName = wfRemoveAccents($szName,$zGerman) ; // Replace any remaining special characters with an underscore. $szName = preg_replace('/[^a-z0-9.-]+/i', '_', $szName); // Remove any useless underscores, e.g. _a_a_._a_ becomes a_a.a $szName = preg_replace('/_*\b_*/', '', $szName); // Crop the filename if it's too long. while (strlen($szName) > $uMaxLength) $szName = preg_replace('/.\b/', '', $szName, 1); return $szName; } /** * wfRemoveAccents replaces some special 'european' characters * so that a name may be used e.g. to build a file name. * * Mind that the returned string has the same length of the original * if $zGerman==false, but may have a different length otherwise. * * @param string $szName a name with accents * @param boolean $zGerman proceed also german characters * @return string the name with accents removed (replacing) */ function wfRemoveAccents($szName, $zGerman=true) { if($zGerman) { // Replace German umlauts and other special characters. $szName = str_replace("ß", "ss", $szName); $szName = preg_replace('/[ÄÖÜäöü]/', '\0e', $szName); } // Proceed accents, etc $szName = strtr($szName, "ÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÒÓÔÕÖØÙÚÛÜàáâãäåæèéêëìíîïòóôõöøùúûüçÇß", "AAAAAAAEEEEIIIIOOOOOOUUUUaaaaaaaeeeeiiiioooooouuuucCs"); /* * Mind that the process of "ß", "ü", etc is different if 'german' is on or off! */ return $szName ; } /** * Function 'xmlsafe' UNUSED, renamed to wfXmlSafe to stick to mw conventions * * @param string $string * @return string $string * @todo move function into WikiExporter class or decide to rename as wfXmlSafe * @todo this function is currently unused... */ function wfXmlSafe( $string ) { $fname = 'xmlsafe'; wfProfileIn( $fname ); /** * The page may contain old data which has not been properly normalized. * Invalid UTF-8 sequences or forbidden control characters will make our * XML output invalid, so be sure to strip them out. */ $string = UtfNormal::cleanUp( $string ); $string = htmlspecialchars( $string ); wfProfileOut( $fname ); return $string; } /** * ==== comment to be removed after patch acceptance ==== * I systematically destroy whitespaces after the final ? > * These caracters are generally harmless. * However, in some applications (cookies, headers) they are dangerous. * So, it's a matter of principle : just delete them eveywhere */ ?>