diff -ur phase3/includes/DefaultSettings.php blahtex/includes/DefaultSettings.php --- phase3/includes/DefaultSettings.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/DefaultSettings.php 2006-05-20 01:26:33.000000000 +1000 @@ -1097,6 +1097,10 @@ $wgUseTeX = false; /** Location of the texvc binary */ $wgTexvc = './math/texvc'; +/** Location of the blahtex binary. If empty, texvc is used instead */ +$wgBlahtex = ''; +/** Command-line options for blahtex */ +$wgBlahtexOptions = '--texvc-compatible-commands --mathml-version-1-fonts --disallow-plane-1 --spacing strict'; # # Profiling / debugging diff -ur phase3/includes/Math.php blahtex/includes/Math.php --- phase3/includes/Math.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/Math.php 2006-05-20 01:44:56.000000000 +1000 @@ -1,198 +1,580 @@ parsing - * @package MediaWiki + * @file Math.php + * Contains everything related to parsing. + * \sa math/README */ /** - * Takes LaTeX fragments, sends them to a helper program (texvc) for rendering - * to rasterized PNG and HTML and MathML approximations. An appropriate - * rendering form is picked and returned. - * - * by Tomasz Wegrzanowski, with additions by Brion Vibber (2003, 2004) + * %Parser for the blahtex's output. + */ +class blahtexOutputParser { + var $parser; /**< \private */ + var $stack; /**< \private */ + var $results; /**< \private */ + + function blahtexOutputParser() + { + $this->parser = xml_parser_create( "UTF-8" ); + $this->stack = array(); + $this->results = array(); + $this->prevCdata = false; + + xml_set_object( $this->parser, $this ); + xml_parser_set_option( $this->parser, XML_OPTION_CASE_FOLDING, 0 ); + xml_set_element_handler( $this->parser, "startElement", "stopElement" ); + xml_set_character_data_handler( $this->parser, "characterData" ); + } + + /** + * Main function, which parses blahtex's output. + * The format of blahtex's output is based on XML. This function + * parses the XML and returns an array representing the tree + * structure. For instance, if $retval denotes the return value, + * then $retval["blahtex"]["error"] contains the text within the + * tag within the tag. If there is more than one + * tag within a tag, then + * $retval["blahtex"]["error"] is an array of strings. As a special + * case, $retval["mathmlMarkup"] contains the segment between + * and . + * @param $data String with output to be parsed. + * @return Array representing XML tree. + */ + function parse( $data ) + { + // We splice out any segment between and + // so that the XML parser doesn't have to deal with all the MathML tags. + $markupBegin = strpos( $data, "" ); + if ( !( $markupBegin === false ) ) { + $markupEnd = strpos( $data, "" ); + $this->results["mathmlMarkup"] = + trim( substr( $data, $markupBegin + 8, $markupEnd - $markupBegin - 8 ) ); + $data = substr( $data, 0, $markupBegin + 8 ) . substr( $data, $markupEnd ); + } + xml_parse( $this->parser, $data ); + return $this->results; + } + + /** @privatesection */ + function startElement( $parser, $name, $attributes ) + { + $this->prevCdata = false; + if ( count( $this->stack ) == 0 ) + array_push( $this->stack, $name ); + else + array_push( $this->stack, $this->stack[count( $this->stack ) - 1] . ":$name" ); + } + + function stopElement($parser, $name) + { + $this->prevCdata = false; + array_pop( $this->stack ); + } + + function characterData($parser, $data) + { + $index = $this->stack[count( $this->stack ) - 1]; + if ( $this->prevCdata ) { + // Merge subsequent CDATA blocks + if ( is_array( $this->results[$index] ) ) + array_push( $this->results[$index], + array_pop( $this->results[$index] ) . $data); + else + $this->results[$index] .= $data; + } else { + if ( !isset( $this->results[$index] ) ) + $this->results[$index] = $data; + elseif ( is_array( $this->results[$index] ) ) + array_push( $this->results[$index], $data ); + else + $this->results[$index] = array( $this->results[$index], $data ); + } + $this->prevCdata = true; + } +} + +/** + * Render formulas to PNG, HTML and MathML. + * Takes LaTeX fragments, sends them to helper program (texvc and + * blahtex) for rendering to rasterized PNG and HTML and MathML + * approximations. An appropriate rendering form is picked, depending + * on the user's preferences, and returned. The rendering is cached in + * the @c math table in the database, and the PNG files are cached in + * @c $wgMathDirectory on the file system. * - * @package MediaWiki + * @author Tomasz Wegrzanowski, with additions by Brion Vibber (2003, 2004) */ class MathRenderer { - var $mode = MW_MATH_MODERN; - var $tex = ''; - var $inputhash = ''; - var $hash = ''; - var $html = ''; - var $mathml = ''; - var $conservativeness = 0; - + /** @privatesection */ + var $mode = MW_MATH_MODERN; /**< @User preference for maths */ + var $tex = ''; /**< LaTeX fragment */ + var $inputhash = ''; /**< Hash value of $tex */ + var $hash = ''; /**< Name of PNG file */ + var $html = ''; /**< HTML rendering of $tex */ + var $mathml = ''; /**< MathML rendering of $tex */ + var $conservativeness = 0; /**< How conservative the HTML rendering is */ + + /** + * Constructor. + * @param $tex String containing LaTeX fragment to be rendered. + * @public + */ function MathRenderer( $tex ) { $this->tex = $tex; - } - + } + + /** + * Set the preferred output mode. + * The output mode specifies whether render() should output PNG, + * HTML or MathML. + * @param $mode Output mode, can be @c MW_MATH_PNG, + * @c MW_MATH_SIMPLE, @c MW_MATH_HTML, @c MW_MATH_SOURCE, + * @c MW_MATH_MODERN, or @c MW_MATH_MATHML. + * @public + */ function setOutputMode( $mode ) { $this->mode = $mode; } + /** + * Main function, which renders the LaTeX fragment. + * This function renders the LaTeX fragment specified in the + * constructor. The output depends on the output mode, set with + * setOutputMode(), as follows: + * - @c MW_MATH_PNG : Output is in PNG format, fall back to HTML. + * - @c MW_MATH_SIMPLE : Output is in HTML format if the HTML is + * simple and in PNG otherwise. + * - @c MW_MATH_HTML : Output is in HTML format, fall back to PNG. + * - @c MW_MATH_SOURCE : Output the LaTeX fragment verbatim, + * surrounded by a pair of @c $ characters. + * - @c MW_MATH_MODERN : Output is in HTML format unless the HTML + * is complicated, fall back to PNG. + * - @c MW_MATH_MATHML : Output is in MathML format, fall back to + * PNG. + * + * @return String containing HTML fragment, representing the + * formula in the given LaTeX fragment. + * @public + */ function render() { - global $wgTmpDirectory, $wgInputEncoding; - global $wgTexvc; + global $wgBlahtex; $fname = 'MathRenderer::render'; - + if( $this->mode == MW_MATH_SOURCE ) { # No need to render or parse anything more! - return ('$ '.htmlspecialchars( $this->tex ).' $'); + return ( '$ '.htmlspecialchars( $this->tex ).' $' ); } - + if( !$this->_recall() ) { - # Ensure that the temp and output directories are available before continuing... - if( !file_exists( $wgTmpDirectory ) ) { - if( !@mkdir( $wgTmpDirectory ) ) { - return $this->_error( 'math_bad_tmpdir' ); - } - } elseif( !is_dir( $wgTmpDirectory ) || !is_writable( $wgTmpDirectory ) ) { - return $this->_error( 'math_bad_tmpdir' ); + $res = $this->testEnvironment(); + if ( $res ) + return $res; + + // Run texvc + list( $success, $res ) = $this->invokeTexvc( $this->tex ); + if ( !$success ) + return $res; + $texvcError = $this->processTexvcOutput( $res ); + + // Run blahtex, if configured + if ( $wgBlahtex ) { + list( $success, $res ) = $this->invokeBlahtex( $this->tex, $this->hash == NULL ); + if ( !$success ) + return $res; + $parser = new blahtexOutputParser(); + $res = $parser->parse( $res ); + wfDebug(print_r($res, TRUE)); + $blahtexError = $this->processBlahtexOutput( $res ); + if ( $blahtexError && $texvcError ) + return $blahtexError; + } else { + if ( $texvcError ) + return $texvcError; } - if( function_exists( 'is_executable' ) && !is_executable( $wgTexvc ) ) { - return $this->_error( 'math_notexvc' ); - } - $cmd = $wgTexvc . ' ' . - escapeshellarg( $wgTmpDirectory ).' '. - escapeshellarg( $wgTmpDirectory ).' '. - escapeshellarg( $this->tex ).' '. - escapeshellarg( $wgInputEncoding ); - - if ( wfIsWindows() ) { - # Invoke it within cygwin sh, because texvc expects sh features in its default shell - $cmd = 'sh -c ' . wfEscapeShellArg( $cmd ); + # Now save it back to the DB: + if ( !wfReadOnly() ) { + if ( $this->hash ) + $outmd5_sql = pack( 'H32', $this->hash ); + else + $outmd5_sql = ''; + + $md5_sql = pack( 'H32', $this->md5 ); # Binary packed, not hex + + $dbw =& wfGetDB( DB_MASTER ); + $dbw->replace( 'math', array( 'math_inputhash' ), + array( + 'math_inputhash' => $md5_sql, + 'math_outputhash' => $outmd5_sql, + 'math_html_conservativeness' => $this->conservativeness, + 'math_html' => $this->html, + 'math_mathml' => $this->mathml, + ), $fname, array( 'IGNORE' ) + ); } + } + + return $this->_doRender(); + } - wfDebug( "TeX: $cmd\n" ); - $contents = `$cmd`; - wfDebug( "TeX output:\n $contents\n---\n" ); - - if (strlen($contents) == 0) { - return $this->_error( 'math_unknown_error' ); + /** + * Test whether the necessary directories and executables exist. + * @return String containing HTML fragment with error message if + * there is a problem, @c false otherwise. + */ + function testEnvironment() + { + global $wgTmpDirectory, $wgTexvc, $wgBlahtex; + + if( !file_exists( $wgTmpDirectory ) ) { + if( !@mkdir( $wgTmpDirectory ) ) { + return $this->_error( 'math_bad_tmpdir' ); } + } elseif( !is_dir( $wgTmpDirectory ) || !is_writable( $wgTmpDirectory ) ) { + return $this->_error( 'math_bad_tmpdir' ); + } + + if( function_exists( 'is_executable' ) && !is_executable( $wgTexvc ) ) { + return $this->_error( 'math_notexvc' ); + } + if ($wgBlahtex && function_exists( 'is_executable' ) && !is_executable( $wgBlahtex )) + return $this->_error( 'math_noblahtex', $wgBlahtex ); + + return false; + } - $retval = substr ($contents, 0, 1); - if (($retval == 'C') || ($retval == 'M') || ($retval == 'L')) { - if ($retval == 'C') - $this->conservativeness = 2; - else if ($retval == 'M') - $this->conservativeness = 1; - else - $this->conservativeness = 0; - $outdata = substr ($contents, 33); + /** + * Invoke the texvc executable. + * This function invokes the @c texvc helper program, whose + * location is specified in $wgTexvc. + * @param $tex String containing the LaTeX fragment to be rendered. + * @return A 2-tuple. + * - If an error occurred, then the first element is @c false and + * the second element is a string containing an HTML fragment + * with the error message. + * - Otherwise, the first element is @c true and the second + * element s a string containing the output of @c texvc. + */ + function invokeTexvc( $tex ) + { + global $wgMathDirectory, $wgTmpDirectory, $wgTexvc, $wgInputEncoding; + + $cmd = $wgTexvc . ' ' . + escapeshellarg( $wgTmpDirectory ).' '. + escapeshellarg( $wgTmpDirectory ).' '. + escapeshellarg( $this->tex ).' '. + escapeshellarg( $wgInputEncoding ); + + if ( wfIsWindows() ) { + // Invoke it within cygwin sh, because texvc expects sh features in its default shell + $cmd = 'sh -c ' . wfEscapeShellArg( $cmd ); + } + + wfDebug( "TeX: $cmd\n" ); + $contents = `$cmd`; + wfDebug( "TeX output:\n $contents\n---\n" ); + + if ( strlen( $contents ) == 0 ) { + return array( false, $this->_error( 'math_unknown_error' ) ); + } - $i = strpos($outdata, "\000"); + return array( true, $contents ); + } - $this->html = substr($outdata, 0, $i); - $this->mathml = substr($outdata, $i+1); - } else if (($retval == 'c') || ($retval == 'm') || ($retval == 'l')) { - $this->html = substr ($contents, 33); - if ($retval == 'c') - $this->conservativeness = 2; - else if ($retval == 'm') - $this->conservativeness = 1; - else - $this->conservativeness = 0; - $this->mathml = NULL; - } else if ($retval == 'X') { - $this->html = NULL; - $this->mathml = substr ($contents, 33); + /** + * Process texvc output. + * Parse the output, fill the mathml, html, hash, and + * conservativeness fields in the database and move the PNG image + * to its final destination. + * @param $contents String containing texvc output. + * @return String containing HTML fragment with error message if + * an error occurred, @c false otherwise. + */ + function processTexvcOutput( $contents ) { + global $wgTmpDirectory; + + $retval = substr( $contents, 0, 1 ); + if ( ( $retval == 'C' ) || ( $retval == 'M' ) || ( $retval == 'L' ) ) { + if ( $retval == 'C' ) + $this->conservativeness = 2; + else if ( $retval == 'M' ) + $this->conservativeness = 1; + else $this->conservativeness = 0; - } else if ($retval == '+') { - $this->html = NULL; - $this->mathml = NULL; + $outdata = substr( $contents, 33 ); + + $i = strpos( $outdata, "\000" ); + + $this->html = substr( $outdata, 0, $i ); + $this->mathml = substr( $outdata, $i+1 ); + } else if ( ( $retval == 'c' ) || ( $retval == 'm' ) || ( $retval == 'l' ) ) { + $this->html = substr( $contents, 33 ); + if ( $retval == 'c' ) + $this->conservativeness = 2; + else if ( $retval == 'm' ) + $this->conservativeness = 1; + else $this->conservativeness = 0; - } else { - $errbit = htmlspecialchars( substr($contents, 1) ); - switch( $retval ) { - case 'E': return $this->_error( 'math_lexing_error', $errbit ); - case 'S': return $this->_error( 'math_syntax_error', $errbit ); - case 'F': return $this->_error( 'math_unknown_function', $errbit ); - default: return $this->_error( 'math_unknown_error', $errbit ); - } + $this->mathml = NULL; + } else if ( $retval == 'X' ) { + $this->html = NULL; + $this->mathml = substr( $contents, 33 ); + $this->conservativeness = 0; + } else if ( $retval == '+' ) { + $this->html = NULL; + $this->mathml = NULL; + $this->conservativeness = 0; + } else { + $errbit = htmlspecialchars( substr( $contents, 1 ) ); + switch( $retval ) { + case 'E': return $this->_error( 'math_lexing_error', $errbit ); + case 'S': return $this->_error( 'math_syntax_error', $errbit ); + case 'F': return $this->_error( 'math_unknown_function', $errbit ); + default: return $this->_error( 'math_unknown_error', $errbit ); } + } - $this->hash = substr ($contents, 1, 32); - if (!preg_match("/^[a-f0-9]{32}$/", $this->hash)) { - return $this->_error( 'math_unknown_error' ); - } + $this->hash = NULL; + $hash = substr( $contents, 1, 32 ); + if ( !preg_match( "/^[a-f0-9]{32}$/", $hash ) ) { + return $this->_error( 'math_unknown_error' ); + } + + if( !file_exists( "$wgTmpDirectory/{$hash}.png" ) ) { + return $this->_error( 'math_image_error' ); + } + + $this->hash = $hash; + $tmp = $this->moveToMathDir( "{$hash}.png" ); + if ( $tmp !== false ) { + $this->hash = NULL; + return $tmp; + } - if( !file_exists( "$wgTmpDirectory/{$this->hash}.png" ) ) { - return $this->_error( 'math_image_error' ); - } + return false; + } - $hashpath = $this->_getHashPath(); - if( !file_exists( $hashpath ) ) { - if( !@wfMkdirParents( $hashpath, 0755 ) ) { - return $this->_error( 'math_bad_output' ); - } - } elseif( !is_dir( $hashpath ) || !is_writable( $hashpath ) ) { - return $this->_error( 'math_bad_output' ); - } + /** + * Invoke the blahtex executable. + * This function invokes the @c blahtex helper program. The + * location of the program is specified in $wgBlahtex. Extra + * options may be specified in $wgBlahtexOptions. + * @param $tex String containing the LaTeX fragment to be rendered. + * @param $makePNG Boolean specifying whether blahtex should + * generate both MathML and PNG (@c true) or only MathML (@c false). + * @return A 2-tuple. + * - If an error occurred, then the first element is @c false and + * the second element is a string containing an HTML fragment + * with the error message. + * - Otherwise, the first element is @c true and the second + * element s a string containing the output of @c blahtex. + */ + function invokeBlahtex( $tex, $makePNG ) + { + global $wgBlahtex, $wgBlahtexOptions, $wgTmpDirectory; + + $descriptorspec = array( 0 => array( "pipe", "r" ), + 1 => array( "pipe", "w" ) ); + $options = '--mathml ' . $wgBlahtexOptions; + if ( $makePNG ) + $options .= " --png --temp-directory $wgTmpDirectory --png-directory $wgTmpDirectory"; + + $process = proc_open( $wgBlahtex.' '.$options, $descriptorspec, $pipes ); + if ( !$process ) { + return array( false, $this->_error( 'math_unknown_error', ' #1' ) ); + } + fwrite( $pipes[0], '\\displaystyle ' ); + fwrite( $pipes[0], $tex ); + fclose( $pipes[0] ); + + $contents = ''; + while ( !feof($pipes[1] ) ) { + $contents .= fgets( $pipes[1], 4096 ); + } + fclose( $pipes[1] ); + if ( proc_close( $process ) != 0 ) { + // exit code of blahtex is not zero; this shouldn't happen + return array( false, $this->_error( 'math_unknown_error', ' #2' ) ); + } + + return array( true, $contents ); + } - if( !rename( "$wgTmpDirectory/{$this->hash}.png", "$hashpath/{$this->hash}.png" ) ) { - return $this->_error( 'math_output_error' ); + /** + * Process blahtex output. + * Parse the output and fill the mathml field in the database. If + * blahtex has also generated a PNG image, then update the hash + * field as well move the PNG image to its final destination. + * @param $contents String containing blahtex output. + * @return String containing HTML fragment with error message if + * an error occurred, @c false otherwise. + */ + function processBlahtexOutput( $results ) + { + if ( isset( $results["blahtex:logicError"] ) ) { + // Something went completely wrong + return $this->_error('math_unknown_error', $results["blahtex:logicError"]); + + } elseif ( isset( $results["blahtex:error:id"] ) ) { + // There was a syntax error in the input + return $this->blahtexError( $results, "blahtex:error" ); + + } elseif (isset($results["mathmlMarkup"]) || isset($results["blahtex:png:md5"])) { + // We got some results + if ( isset( $results["mathmlMarkup"] ) ) + $this->mathml = $results['mathmlMarkup']; + if ( isset( $results["blahtex:png:md5"] ) ) { + $this->hash = $results["blahtex:png:md5"]; + $tmp = $this->moveToMathDir( "{$this->hash}.png" ); + if ( $tmp !== false ) + return $tmp; } + return false; - # Now save it back to the DB: - if ( !wfReadOnly() ) { - $outmd5_sql = pack('H32', $this->hash); - - $md5_sql = pack('H32', $this->md5); # Binary packed, not hex + } else { + // There is an error somewhere + if ( isset( $results["blahtex:mathml:error:id"] ) ) + return $this->blahtexError( $results, "blahtex:mathml:error" ); + if ( isset( $results["blahtex:png:error:id"] ) ) + return $this->blahtexError( $results, "blahtex:png:error" ); + return $this->_error( 'math_unknown_error', ' #3' ); + } + } - $dbw =& wfGetDB( DB_MASTER ); - $dbw->replace( 'math', array( 'math_inputhash' ), - array( - 'math_inputhash' => $md5_sql, - 'math_outputhash' => $outmd5_sql, - 'math_html_conservativeness' => $this->conservativeness, - 'math_html' => $this->html, - 'math_mathml' => $this->mathml, - ), $fname, array( 'IGNORE' ) - ); + /** + * Build an error message for blahtex. + * @param $results Parse tree as returned by + * blahtexOutputParser::parse() . + * @param $node String representing the node in the tree that the + * message is stored under. + * @returns String containing HTML fragment with the error + * message. + */ + function blahtexError( $results, $node ) { + $id = 'math_' . $results[$node . ":id"]; + $fallback = $results[$node . ":message"]; + if ( isset( $results[$node . ":arg"] ) ) { + if ( is_array( $results[$node . ":arg"] ) ) { + // Error message has two or three arguments + $arg1 = $results[$node . ":arg"][0]; + $arg2 = $results[$node . ":arg"][1]; + if ( count( $results[$node . ":arg"][1] > 2 ) ) + $arg3 = $results[$node . ":arg"][1]; + else + $arg3 = ''; + return $this->_error( $id, $arg1, $arg2, $arg3, $fallback ); + } else { + // Error message has one argument + $arg = $results[$node . ":arg"]; + return $this->_error( $id, $arg, '', '', $fallback ); } - } + else { + // Error message without arguments + return $this->_error( $id, '', '', '', $fallback ); + } + } + + /** + * Move a PNG image to its final destination. + * The file is moved from $wgTmpDirectory to a directory under + * $wgMathDirectory. This function assumes that $hash is set. + * @param $fname String containing name of file to be moved. + * @return String containing HTML fragment with error message if + * an error occurred, @c false otherwise. + */ + function moveToMathDir( $fname ) { + global $wgTmpDirectory; - return $this->_doRender(); + $hashpath = $this->_getHashPath(); + if( !file_exists( $hashpath ) ) { + if( !@wfMkdirParents( $hashpath, 0755 ) ) { + return $this->_error( 'math_bad_output' ); + } + } elseif( !is_dir( $hashpath ) || !is_writable( $hashpath ) ) { + return $this->_error( 'math_bad_output' ); + } + + if( !rename( "$wgTmpDirectory/$fname", "$hashpath/$fname" ) ) { + return $this->_error( 'math_output_error' ); + } + return false; } - function _error( $msg, $append = '' ) { - $mf = htmlspecialchars( wfMsg( 'math_failure' ) ); - $errmsg = htmlspecialchars( wfMsg( $msg ) ); + /** + * Build an error message in HTML. + * @param $msg String containing lookup key for the message; will + * be passed on to wfMsg() . + * @param $arg1 String containing first argument for the message. + * @param $arg2 String containing second argument for the message. + * @param $arg3 String containing third argument for the message. + * @param $fallback String containing a fallback message in case + * the lookup key in $msg is not found. + * @return String containing HTML fragment with the error message. + */ + function _error( $msg, $arg1 = '', $arg2 = '', $arg3 = '', $fallback = NULL ) { + $mf = htmlspecialchars( wfMsg( 'math_failure' ) ); + if ( $msg ) { + if ( $fallback && wfMsg( $msg ) == '<' . htmlspecialchars( $msg ) . '>' ) + $errmsg = htmlspecialchars( $fallback ); + else + $errmsg = htmlspecialchars( wfMsg( $msg, $arg1, $arg2, $arg3 ) ); + } + else + $errmsg = ''; $source = htmlspecialchars( str_replace( "\n", ' ', $this->tex ) ); - return "$mf ($errmsg$append): $source\n"; + // Note: the str_replace above is because the return value must not contain newlines + return "$mf ($errmsg): $source\n"; } + /** + * Recall cached information from the database. + * This function computes the hash value for the formula specified + * in $tex and looks whether any information is stored in the @c + * math table in the database. In that case, the $hash, + * $conservativeness, $html and $mathml member variables are + * updated. + * @return @c true if information was found in the database, @c + * false if not. + */ function _recall() { global $wgMathDirectory; $fname = 'MathRenderer::_recall'; $this->md5 = md5( $this->tex ); $dbr =& wfGetDB( DB_SLAVE ); - $rpage = $dbr->selectRow( 'math', - array( 'math_outputhash','math_html_conservativeness','math_html','math_mathml' ), - array( 'math_inputhash' => pack("H32", $this->md5)), # Binary packed, not hex - $fname + $rpage = $dbr->selectRow( 'math', + array( 'math_outputhash','math_html_conservativeness','math_html','math_mathml' ), + array( 'math_inputhash' => pack("H32", $this->md5)), # Binary packed, not hex + $fname ); - if( $rpage !== false ) { - # Tailing 0x20s can get dropped by the database, add it back on if necessary: + if( $rpage === false ) + return false; // Missing from the database + + if( $rpage->math_outputhash == '' ) + $this->hash = NULL; + else { + // Tailing 0x20s can get dropped by the database, add them back on if necessary: $xhash = unpack( 'H32md5', $rpage->math_outputhash . " " ); $this->hash = $xhash ['md5']; + } + + $this->conservativeness = $rpage->math_html_conservativeness; + $this->html = $rpage->math_html; + $this->mathml = $rpage->math_mathml; + + if( !$this->html && !$this->mathml && !$this->hash ) + return false; // Database contains no useful information - $this->conservativeness = $rpage->math_html_conservativeness; - $this->html = $rpage->math_html; - $this->mathml = $rpage->math_mathml; - - if( file_exists( $this->_getHashPath() . "/{$this->hash}.png" ) ) { - return true; - } - - if( file_exists( $wgMathDirectory . "/{$this->hash}.png" ) ) { - $hashpath = $this->_getHashPath(); + if( $this->hash) { + $hashpath = $this->_getHashPath(); + // MediaWiki 1.5 / 1.6 transition: + // All files used to be stored directly under $wgMathDirectory + // Move them to the new layout if necessary + if( file_exists( $wgMathDirectory . "/{$this->hash}.png" ) + && !file_exists( $hashpath . "/{$this->hash}.png" ) ) { if( !file_exists( $hashpath ) ) { if( !@wfMkdirParents( $hashpath, 0755 ) ) { return false; @@ -202,35 +584,94 @@ } if ( function_exists( "link" ) ) { return link ( $wgMathDirectory . "/{$this->hash}.png", - $hashpath . "/{$this->hash}.png" ); + $hashpath . "/{$this->hash}.png" ); } else { return rename ( $wgMathDirectory . "/{$this->hash}.png", $hashpath . "/{$this->hash}.png" ); } } - + + if ( !file_exists( $hashpath . "/{$this->hash}.png" ) ) { + $this->hash = NULL; // File disappeared from the render cache + return false; + } } - - # Missing from the database and/or the render cache - return false; + + return true; } /** - * Select among PNG, HTML, or MathML output depending on + * Do the actual rendering. + * After all preliminaries are completed, this function chooses + * between PNG, HTML, or MathML output depending on the output mode + * stored in $mode and the available options and returns a + * rendering of the specified formula. + * @return String containing HTML fragment representing the formula. */ function _doRender() { - if( $this->mode == MW_MATH_MATHML && $this->mathml != '' ) { - return "{$this->mathml}"; + + switch( $this->mode ) { + + case MW_MATH_PNG: + if( $this->hash ) + $choice = 'png'; + elseif ( $this->html ) + $choice = 'html'; + else + $choice = 'mathml'; + break; + + case MW_MATH_SIMPLE: + if( $this->hash && ( !$this->html || $this->conservativeness != 2 ) ) + $choice = 'png'; + elseif ( $this->html ) + $choice = 'html'; + else + $choice = 'mathml'; + break; + + case MW_MATH_HTML: + if ( $this->html ) + $choice = 'html'; + elseif( $this->hash ) + $choice = 'png'; + else + $choice = 'mathml'; + break; + + case MW_MATH_MODERN: + if( $this->hash && ( !$this->html || $this->conservativeness == 0 ) ) + $choice = 'png'; + elseif ( $this->html ) + $choice = 'html'; + else + $choice = 'mathml'; + break; + + case MW_MATH_MATHML: + if ( $this->mathml ) + $choice = 'mathml'; + elseif( $this->hash ) + $choice = 'png'; + else + $choice = 'html'; + break; + } - if (($this->mode == MW_MATH_PNG) || ($this->html == '') || - (($this->mode == MW_MATH_SIMPLE) && ($this->conservativeness != 2)) || - (($this->mode == MW_MATH_MODERN || $this->mode == MW_MATH_MATHML) && ($this->conservativeness == 0))) { + + if( $choice == 'mathml' ) + return "{$this->mathml}"; + elseif( $choice == 'png' ) return $this->_linkToMathImage(); - } else { + else return ''.$this->html.''; - } } + /** + * Construct a link to PNG file + * @return String containing HTML fragment with the PNG file + * representing the formula. + */ function _linkToMathImage() { global $wgMathPath; $url = htmlspecialchars( "$wgMathPath/" . substr($this->hash, 0, 1) @@ -240,6 +681,13 @@ return "\"$alt\""; } + /** + * Get directory to store PNG image in. + * The PNG images are stored in a tiered directory tree under + * $wgMathDirectory. This function compute the directory that the + * PNG image for the specified formula should go in. + * @return String with directory path. + */ function _getHashPath() { global $wgMathDirectory; $path = $wgMathDirectory .'/'. substr($this->hash, 0, 1) @@ -249,13 +697,18 @@ return $path; } - } +/** + * Render a LaTeX fragment. + * @param $tex String containing the LaTeX fragment. + * @return String containing an HTML fragment representing the formula + * specified in $tex. + */ function renderMath( $tex ) { global $wgUser; $math = new MathRenderer( $tex ); - $math->setOutputMode( $wgUser->getOption('math')); + $math->setOutputMode( $wgUser->getOption( 'math' ) ); return $math->render(); } diff -ur phase3/includes/OutputPage.php blahtex/includes/OutputPage.php --- phase3/includes/OutputPage.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/OutputPage.php 2006-05-20 01:26:34.000000000 +1000 @@ -581,7 +581,19 @@ # Disable temporary placeholders, so that the skin produces HTML $sk->postParseLinkColour( false ); - header( "Content-type: $wgMimeType; charset={$wgOutputEncoding}" ); + /* Send page as XHTML if the user has selected MathML and the browser accepts XHTML */ + if ( $wgUser->getOption( 'math' ) == MW_MATH_MATHML ) { + if ( isset( $_SERVER['HTTP_ACCEPT'] ) && stristr( $_SERVER['HTTP_ACCEPT'], 'application/xhtml+xml' )) { + header( "Content-type: application/xhtml+xml; charset={$wgOutputEncoding}" ); + } else if ( isset( $_SERVER["HTTP_USER_AGENT"] ) && stristr( $_SERVER["HTTP_USER_AGENT"], "MathPlayer" )) { + header( "Content-type: application/xhtml+xml" ); + } else { + header( "Content-type: text/html; charset={$wgOutputEncoding}" ); + } + } else { + header( "Content-type: text/html; charset={$wgOutputEncoding}" ); + } + header( 'Content-language: '.$wgContLanguageCode ); if ($this->mArticleBodyOnly) { @@ -949,14 +961,23 @@ global $wgDocType, $wgDTD, $wgContLanguageCode, $wgOutputEncoding, $wgMimeType; global $wgUser, $wgContLang, $wgUseTrackbacks, $wgTitle; - if( $wgMimeType == 'text/xml' || $wgMimeType == 'application/xhtml+xml' || $wgMimeType == 'application/xml' ) { - $ret = "\n"; - } else { - $ret = ''; + /* If the user has selected MathML, then we should prepare an XHTML page */ + if( $wgUser->getOption('math') == MW_MATH_MATHML ) { + $ret = "\n" + . "\n"; + array_push( $this->mMetatags, array( "http:Content-type", + "application/xhtml+xml; charset={$wgOutputEncoding}" ) ); + } else { + if( $wgMimeType == 'text/xml' || $wgMimeType == 'application/xhtml+xml' || $wgMimeType == 'application/xml' ) { + $ret = "\n"; + } else { + $ret = ''; + } + $ret .= "\n"; + array_push( $this->mMetatags, array( "http:Content-type", "$wgMimeType; charset={$wgOutputEncoding}" ) ); } - $ret .= "\n"; - if ( '' == $this->getHTMLTitle() ) { $this->setHTMLTitle( wfMsg( 'pagetitle', $this->getPageTitle() )); } @@ -964,7 +985,6 @@ $rtl = $wgContLang->isRTL() ? " dir='RTL'" : ''; $ret .= "\n"; $ret .= "\n" . htmlspecialchars( $this->getHTMLTitle() ) . "\n"; - array_push( $this->mMetatags, array( "http:Content-type", "$wgMimeType; charset={$wgOutputEncoding}" ) ); $ret .= $this->getHeadLinks(); global $wgStylePath; diff -ur phase3/includes/Parser.php blahtex/includes/Parser.php --- phase3/includes/Parser.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/Parser.php 2006-05-20 01:26:36.000000000 +1000 @@ -334,7 +334,7 @@ $start = "/<$tag(\\s+[^>]*|\\s*\/?)>/i"; $end = "/<\\/$tag\\s*>/i"; } - + while ( '' != $text ) { $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); $stripped .= $p[0]; @@ -359,7 +359,6 @@ $params[$marker] = Sanitizer::decodeTagAttributes( $attributes ); if ( $empty === '/' ) { - // Empty element tag, $content[$marker] = null; $text = $inside; } else { @@ -622,6 +621,9 @@ * will be used depending on availability. Override the default * $wgTidyInternal setting to disable the internal if it's not working. * + * Since html tidy does not understand MathML, we temporarily replace + * all ... with placeholders. + * * @param string $text Hideous HTML input * @return string Corrected HTML output * @public @@ -629,6 +631,9 @@ */ function tidy( $text ) { global $wgTidyInternal; + $math_content = array(); + $text = Parser::extractTagsAndParams( 'math', $text, $math_content, + $math_tags, $math_params, $this->mUniqPrefix ); $wrappedtext = ''. 'test'.$text.''; @@ -641,6 +646,10 @@ wfDebug( "Tidy error detected!\n" ); return $text . "\n\n"; } + foreach( $math_content as $marker => $content ) { + $full_tag = $math_tags[$marker] . $content . ""; + $correctedtext = str_replace( $marker, $full_tag, $correctedtext ); + } return $correctedtext; } @@ -685,6 +694,7 @@ if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. // Just give up; we'll use the source text and append a warning. + wfDebug("tidy invocation: $wgTidyBin -config $wgTidyConf $wgTidyOpts$opts\n"); return null; } else { return $cleansource; diff -ur phase3/includes/Sanitizer.php blahtex/includes/Sanitizer.php --- phase3/includes/Sanitizer.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/Sanitizer.php 2006-05-20 01:26:36.000000000 +1000 @@ -314,7 +314,93 @@ 'Zeta' => 918, 'zeta' => 950, 'zwj' => 8205, - 'zwnj' => 8204 ); + 'zwnj' => 8204, + /* Plane-1 entities for MathML. */ + 'Ascr' => 0x1D49C, + 'Cscr' => 0x1D49E, + 'Dscr' => 0x1D49F, + 'Gscr' => 0x1D4A2, + 'Jscr' => 0x1D4A5, + 'Kscr' => 0x1D4A6, + 'Nscr' => 0x1D4A9, + 'Oscr' => 0x1D4AA, + 'Pscr' => 0x1D4AB, + 'Qscr' => 0x1D4AC, + 'Sscr' => 0x1D4AE, + 'Tscr' => 0x1D4AF, + 'Uscr' => 0x1D4B0, + 'Vscr' => 0x1D4B1, + 'Wscr' => 0x1D4B2, + 'Xscr' => 0x1D4B3, + 'Yscr' => 0x1D4B4, + 'Zscr' => 0x1D4B5, + 'Afr' => 0x1D504, + 'Bfr' => 0x1D505, + 'Dfr' => 0x1D507, + 'Efr' => 0x1D508, + 'Ffr' => 0x1D509, + 'Gfr' => 0x1D50A, + 'Jfr' => 0x1D50D, + 'Kfr' => 0x1D50E, + 'Lfr' => 0x1D50F, + 'Mfr' => 0x1D510, + 'Nfr' => 0x1D511, + 'Ofr' => 0x1D512, + 'Pfr' => 0x1D513, + 'Qfr' => 0x1D514, + 'Sfr' => 0x1D516, + 'Tfr' => 0x1D517, + 'Ufr' => 0x1D518, + 'Vfr' => 0x1D519, + 'Wfr' => 0x1D51A, + 'Xfr' => 0x1D51B, + 'Yfr' => 0x1D51C, + 'afr' => 0x1D51E, + 'bfr' => 0x1D51F, + 'cfr' => 0x1D520, + 'dfr' => 0x1D521, + 'efr' => 0x1D522, + 'ffr' => 0x1D523, + 'gfr' => 0x1D524, + 'hfr' => 0x1D525, + 'ifr' => 0x1D526, + 'jfr' => 0x1D527, + 'kfr' => 0x1D528, + 'lfr' => 0x1D529, + 'mfr' => 0x1D52A, + 'nfr' => 0x1D52B, + 'ofr' => 0x1D52C, + 'pfr' => 0x1D52D, + 'qfr' => 0x1D52E, + 'rfr' => 0x1D52F, + 'sfr' => 0x1D530, + 'tfr' => 0x1D531, + 'ufr' => 0x1D532, + 'vfr' => 0x1D533, + 'wfr' => 0x1D534, + 'xfr' => 0x1D535, + 'yfr' => 0x1D536, + 'zfr' => 0x1D537, + 'Aopf' => 0x1D538, + 'Bopf' => 0x1D539, + 'Dopf' => 0x1D53B, + 'Eopf' => 0x1D53C, + 'Fopf' => 0x1D53D, + 'Gopf' => 0x1D53E, + 'Iopf' => 0x1D540, + 'Jopf' => 0x1D541, + 'Kopf' => 0x1D542, + 'Lopf' => 0x1D543, + 'Mopf' => 0x1D544, + 'Oopf' => 0x1D546, + 'Sopf' => 0x1D54A, + 'Topf' => 0x1D54B, + 'Uopf' => 0x1D54C, + 'Vopf' => 0x1D54D, + 'Wopf' => 0x1D54E, + 'Xopf' => 0x1D54F, + 'Yopf' => 0x1D550 +); /** @package MediaWiki */ class Sanitizer { diff -ur phase3/includes/SkinTemplate.php blahtex/includes/SkinTemplate.php --- phase3/includes/SkinTemplate.php 2006-05-20 00:57:37.000000000 +1000 +++ blahtex/includes/SkinTemplate.php 2006-05-20 01:26:37.000000000 +1000 @@ -137,7 +137,7 @@ global $wgScript, $wgStylePath, $wgContLanguageCode; global $wgMimeType, $wgJsMimeType, $wgOutputEncoding, $wgRequest; global $wgDisableCounters, $wgLogo, $action, $wgFeedClasses, $wgHideInterlanguageLinks; - global $wgMaxCredits, $wgShowCreditsIfMax; + global $wgMaxCredits, $wgShowCreditsIfMax, $wgDocType, $wgDTD;; global $wgPageShowWatchingUsers; global $wgUseTrackbacks; global $wgDBname; @@ -219,7 +219,19 @@ if ($wgUseTrackbacks && $out->isArticleRelated()) $tpl->set( 'trackbackhtml', $wgTitle->trackbackRDF()); - $tpl->setRef( 'mimetype', $wgMimeType ); + /* If the user has selected MathML, then we should prepare an XHTML page */ + if( $wgUser->getOption('math') == MW_MATH_MATHML ) { + $tpl->set( 'mimetype', 'application/xhtml+xml' ); + $tpl->set( 'doctype', '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN' ); + $tpl->set( 'dtd', 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd' ); + $tpl->set( 'xmlheaders', "\n" ); + } else { + $tpl->setRef( 'mimetype', $wgMimeType ); + $tpl->set( 'doctype', '-//W3C//DTD XHTML 1.0 Transitional//EN' ); + $tpl->set( 'dtd', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' ); + $tpl->set( 'xmlheaders', '' ); + } + $tpl->setRef( 'jsmimetype', $wgJsMimeType ); $tpl->setRef( 'charset', $wgOutputEncoding ); $tpl->set( 'headlinks', $out->getHeadLinks() ); diff -ur phase3/languages/Messages.php blahtex/languages/Messages.php --- phase3/languages/Messages.php 2006-05-20 00:58:50.000000000 +1000 +++ blahtex/languages/Messages.php 2006-05-20 01:46:07.000000000 +1000 @@ -678,15 +678,6 @@ 'dateformat' => 'Date format', 'datedefault' => 'No preference', 'datetime' => 'Date and time', -'math_failure' => 'Failed to parse', -'math_unknown_error' => 'unknown error', -'math_unknown_function' => 'unknown function', -'math_lexing_error' => 'lexing error', -'math_syntax_error' => 'syntax error', -'math_image_error' => 'PNG conversion failed; check for correct installation of latex, dvips, gs, and convert', -'math_bad_tmpdir' => 'Can\'t write to or create math temp directory', -'math_bad_output' => 'Can\'t write to or create math output directory', -'math_notexvc' => 'Missing texvc executable; please see math/README to configure.', 'prefs-personal' => 'User profile', 'prefs-rc' => 'Recent changes', 'prefs-watchlist' => 'Watchlist', @@ -719,6 +710,94 @@ 'default' => 'default', 'files' => 'Files', +# Mathematics +# + +'math_failure' => 'Failed to parse', +'math_unknown_error' => 'unknown error$1', +'math_unknown_function' => 'unknown function ', +'math_input_error' => 'input error', +'math_lexing_error' => 'lexing error $1', +'math_syntax_error' => 'syntax error $1', +'math_image_error' => 'PNG conversion failed; check for correct installation of latex, dvips, gs, and convert', +'math_bad_tmpdir' => 'Can\'t write to or create math temp directory', +'math_bad_output' => 'Can\'t write to or create math output directory', +'math_notexvc' => 'Missing texvc executable; please see math/README to configure.', +'math_noblahtex' => 'Can\'t execute blahtex, which should be at $1', + +# Blahtex messages +'math_AmbiguousInfix' => 'Ambiguous placement of "$1" ' . + '(try using additional braces "{ ... }" to disambiguate)', +'math_CannotChangeDirectory' => 'Cannot change working directory', +'math_CannotCreateTexFile' => 'Cannot create tex file', +'math_CannotRunDvipng' => 'Cannot run dvipng', +'math_CannotRunLatex' => 'Cannot run latex', +'math_CannotWritePngDirectory' => ' Cannot write to output PNG directory', +'math_CannotWriteTexFile' => 'Cannot write to tex file', +'math_CasesRowTooBig' => 'There can only be two entries in each row of a "cases" block', +'math_DoubleSubscript' => 'Encountered two subscripts attached to the same base ' . + '(only one is allowed)', +'math_DoubleSuperscript' => 'Encountered two superscripts attached to the same base ' . + '(only one is allowed)', +'math_IllegalCharacter' => 'Illegal character in input', +'math_IllegalCommandInMathMode' => 'The command "$1" is illegal in math mode', +'math_IllegalCommandInMathModeWithHint' => 'The command "$1" is illegal in math mode ' . + '(perhaps you intended to use "$2" instead?)', +'math_IllegalCommandInTextMode' => 'The command "$1" is illegal in text mode', +'math_IllegalCommandInTextModeWithHint' => 'The command "$1" is illegal in text mode ' . + '(perhaps you intended to use "$2" instead?)', +'math_IllegalDelimiter' => 'Illegal delimiter following "$1"', +'math_IllegalFinalBackslash' => 'Illegal backslash "\\" at end of input', +'math_IllegalNestedFontEncodings' => 'Font encoding commands may not be nested', +'math_IllegalRedefinition' => 'The command "$1" has already been defined; you cannot redefine it', +'math_InvalidColour' => 'The colour "$1" is invalid', +'math_InvalidUtf8Input' => 'The input string was not valid UTF-8', +'math_LatexFontNotSpecified' => 'No LaTeX font has been specified for "$1"', +'math_LatexPackageUnavailable' => 'Unable to render PNG because the LaTeX package "$1" is unavailable', +'math_MismatchedBeginAndEnd' => 'Commands "$1" and "$2" do not match', +'math_MisplacedLimits' => 'The command "$1" can only appear after a math operator ' . + '(consider using "\\mathop")', +'math_MissingCommandAfterNewcommand' => 'Missing or illegal new command name after "\\newcommand" ' . + '(there must be precisely one command defined; it must begin ' . + 'with a backslash "\\" and contain only alphabetic characters)', +'math_MissingDelimiter' => 'Missing delimiter after "$1"', +'math_MissingOpenBraceAfter' => 'Missing open brace "{" after "$1"', +'math_MissingOpenBraceAtEnd' => 'Missing open brace "{" at end of input', +'math_MissingOpenBraceBefore' => 'Missing open brace "{" before "$1"', +'math_MissingOrIllegalParameterCount' => 'Missing or illegal parameter count in definition of "$1" ' . + '(must be a single digit between 1 and 9 inclusive)', +'math_MissingOrIllegalParameterIndex' => 'Missing or illegal parameter index in definition of "$1"', +'math_NonAsciiInMathMode' => 'Non-ASCII characters may only be used in text mode ' . + '(try enclosing the problem characters in "\\text{...}")', +'math_NotEnoughArguments' => 'Not enough arguments were supplied for "$1"', +'math_PngIncompatibleCharacter' => 'Unable to correctly generate PNG containing the character $1', +'math_ReservedCommand' => 'The command "$1" is reserved for internal use by blahtex', +'math_SubstackRowTooBig' => 'There can only be one entry in each row of a "substack" block', +'math_TooManyMathmlNodes' => 'There are too many nodes in the MathML tree', +'math_TooManyTokens' => 'The input is too long', +'math_UnavailableSymbolFontCombination' => 'The symbol "$1" is not available in the font "$2"', +'math_UnexpectedNextCell' => 'The command "&" may only appear inside a "\\begin ... \\end" block', +'math_UnexpectedNextRow' => 'The command "\\\\" may only appear inside a "\\begin ... \\end" block', +'math_UnmatchedBegin' => 'Encountered "\\begin" without matching "\\end"', +'math_UnmatchedCloseBrace' => 'Encountered close brace "}" without matching open brace "{"', +'math_UnmatchedEnd' => 'Encountered "\\end" without matching "\\begin"', +'math_UnmatchedLeft' => 'Encountered "\\left" without matching "\\right"', +'math_UnmatchedOpenBrace' => 'Encountered open brace "{" without matching close brace "}"', +'math_UnmatchedOpenBracket' => 'Encountered open bracket "[" without matching close bracket "]"', +'math_UnmatchedRight' => 'Encountered "\\right" without matching "\\left"', +'math_UnrecognisedCommand' => 'Unrecognised command "$1"', +'WrongFontEncoding' => 'The symbol "$1" may not appear in font encoding "$2"', +'WrongFontEncodingWithHint' => 'The symbol "$1" may not appear in font encoding "$2" (try using the "$3{...}" command)', + +# User levels special page +# + +# switching pan +'groups-lookup-group' => 'Manage group rights', +'groups-group-edit' => 'Existing groups:', +'editgroup' => 'Edit Group', +'addgroup' => 'Add Group', + # User rights 'userrights-lookup-user' => 'Manage user groups', 'userrights-user-editname' => 'Enter a username:', @@ -1505,7 +1584,7 @@ 'mw_math_html' => 'HTML if possible or else PNG', 'mw_math_source' => 'Leave it as TeX (for text browsers)', 'mw_math_modern' => 'Recommended for modern browsers', -'mw_math_mathml' => 'MathML if possible (experimental)', +'mw_math_mathml' => 'MathML (experimental)', # Patrolling 'markaspatrolleddiff' => "Mark as patrolled", diff -ur phase3/skins/MonoBook.php blahtex/skins/MonoBook.php --- phase3/skins/MonoBook.php 2006-05-20 00:56:56.000000000 +1000 +++ blahtex/skins/MonoBook.php 2006-05-20 01:26:42.000000000 +1000 @@ -50,7 +50,9 @@ // Suppress warnings to prevent notices about missing indexes in $this->data wfSuppressWarnings(); -?> +?>html('xmlheaders'); +?>text('doctype'); ?>" + "text('dtd'); ?>">