#!/usr/bin/env hhvm
<?php

if ( PHP_SAPI !== 'cli' ) {
	exit;
}

require __DIR__ . '/../vendor/autoload.php';

use RemexHtml\DOM;
use RemexHtml\Tokenizer;
use RemexHtml\TreeBuilder;
use RemexHtml\Serializer;

class NullHandler implements Tokenizer\TokenHandler {
	function startDocument( Tokenizer\Tokenizer $t, $fns, $fn ) {}
	function endDocument( $pos ) {}
	function error( $text, $pos ) {}
	function characters( $text, $start, $length, $sourceStart, $sourceLength ) {}
	function startTag( $name, Tokenizer\Attributes $attrs, $selfClose,
		$sourceStart, $sourceLength ) {}
	function endTag( $name, $sourceStart, $sourceLength ) {}
	function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {}
	function comment( $text, $sourceStart, $sourceLength ) {}
}

class NullTreeHandler implements TreeBuilder\TreeHandler {
	function startDocument( $fns, $fn ) {}
	function endDocument( $pos ) {}
	function characters( $parent, $refNode, $text, $start, $length, $sourceStart, $sourceLength ) {}
	function insertElement( $parent, $refNode, TreeBuilder\Element $element, $void,
		$sourceStart, $sourceLength ) {}
	function endTag( TreeBuilder\Element $element, $sourceStart, $sourceLength ) {}
	function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {}
	function comment( $parent, $refNode, $text, $sourceStart, $sourceLength ) {}
	function error( $text, $pos ) {}
	function mergeAttributes( TreeBuilder\Element $element, Tokenizer\Attributes $attrs, $sourceStart ) {}
	function removeNode( TreeBuilder\Element $element, $sourceStart ) {}
	function reparentChildren( TreeBuilder\Element $element, TreeBuilder\Element $newParent, $sourceStart ) {}
}

function reserialize( $text ) {
	$handler = new Tokenizer\TokenSerializer;
	$tokenizer = new Tokenizer\Tokenizer( $handler, $text, [] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	print $handler->getOutput() . "\n";
	foreach ( $handler->getErrors() as $error ) {
		print "Error at {$error[1]}: {$error[0]}\n";
	}
}

function reserializeState( $text, $state, $endTag ) {
	$handler = new Tokenizer\TokenSerializer;
	$tokenizer = new Tokenizer\Tokenizer( $handler, $text, [] );
	$tokenizer->execute( [ 'state' => $state, 'appropriateEndTag' => $endTag ] );
	print $handler->getOutput() . "\n";
	foreach ( $handler->getErrors() as $error ) {
		print "Error at {$error[1]}: {$error[0]}\n";
	}
}

function reserializeScript( $text ) {
	reserializeState( $text, Tokenizer\Tokenizer::STATE_SCRIPT_DATA, 'script' );
}

function reserializeXmp( $text ) {
	reserializeState( $text, Tokenizer\Tokenizer::STATE_RCDATA, 'xmp' );
}

function trace( $text ) {
	$traceCallback = function ( $msg ) {
		print "$msg\n";
	};
	$formatter = new Serializer\HtmlFormatter;
	$serializer = new Serializer\Serializer( $formatter );
	$treeTracer = new TreeBuilder\TreeMutationTracer( $serializer, $traceCallback );
	$treeBuilder = new TreeBuilder\TreeBuilder( $treeTracer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$dispatchTracer = new TreeBuilder\DispatchTracer( $text, $dispatcher, $traceCallback );
	$tokenizer = new Tokenizer\Tokenizer( $dispatchTracer, $text, [] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );

	print $serializer->getResult() . "\n";
}

function traceDestruct( $text ) {
	$traceCallback = function ( $msg ) {
		print "$msg\n";
	};
	$destructTracer = new TreeBuilder\DestructTracer( $traceCallback );
	$treeTracer = new TreeBuilder\TreeMutationTracer( $destructTracer, $traceCallback );
	$treeBuilder = new TreeBuilder\TreeBuilder( $treeTracer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$dispatchTracer = new TreeBuilder\DispatchTracer( $text, $dispatcher, $traceCallback );
	$tokenizer = new Tokenizer\Tokenizer( $dispatchTracer, $text, [] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
}

function tidy( $text ) {
	$error = function ( $msg, $pos ) {
		// print "  *  [$pos] $msg\n";
	};
	$formatter = new Serializer\HtmlFormatter;
	$serializer = new Serializer\Serializer( $formatter, $error );
	$treeBuilder = new TreeBuilder\TreeBuilder( $serializer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, $GLOBALS['tokenizerOptions'] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	// print $serializer->getResult() . "\n";
}

function test( $text ) {
	$error = function ( $msg, $pos ) {
		print "  *  [$pos] $msg\n";
	};
	$formatter = new Serializer\TestFormatter;
	$serializer = new Serializer\Serializer( $formatter, $error );
	$treeBuilder = new TreeBuilder\TreeBuilder( $serializer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, $GLOBALS['tokenizerOptions'] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	print $serializer->getResult() . "\n";
}

function tidyViaDOM( $text ) {
	$error = function ( $msg, $pos ) {
		//print "  *  [$pos] $msg\n";
	};
	$formatter = new Serializer\HtmlFormatter;
	$domBuilder = new DOM\DOMBuilder( $error );
	$serializer = new DOM\DOMSerializer( $domBuilder, $formatter );
	$treeBuilder = new TreeBuilder\TreeBuilder( $serializer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, [] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	// print $serializer->getResult() . "\n";
}

function testViaDOM( $text ) {
	$error = function ( $msg, $pos ) {
		print "  *  [$pos] $msg\n";
	};
	$formatter = new Serializer\TestFormatter;
	$domBuilder = new DOM\DOMBuilder( $error );
	$serializer = new DOM\DOMSerializer( $domBuilder, $formatter );
	$treeBuilder = new TreeBuilder\TreeBuilder( $serializer, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, [] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	// print $serializer->getResult() . "\n";
}


function benchmarkTreeBuilder( $text ) {
	$time = -microtime( true );
	$handler = new NullTreeHandler;
	$treeBuilder = new TreeBuilder\TreeBuilder( $handler, [] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, $GLOBALS['tokenizerOptions'] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	$time += microtime( true );
	print "$time\n";
}

function benchmarkDOM( $text ) {
	$time = -microtime( true );
	$domBuilder = new DOM\DOMBuilder;
	$treeBuilder = new TreeBuilder\TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
	$dispatcher = new TreeBuilder\Dispatcher( $treeBuilder );
	$tokenizer = new Tokenizer\Tokenizer( $dispatcher, $text, $GLOBALS['tokenizerOptions'] );
	$tokenizer->execute( $GLOBALS['executeOptions'] );
	$time += microtime( true );
	print "$time\n";
}


function generate( $text ) {
	$generator = Tokenizer\TokenGenerator::generate( $text, $GLOBALS['tokenizerOptions'] );
	foreach ( $generator as $token ) {
		if ( $token['type'] === 'text' ) {
			$token['text'] = substr( $token['text'], $token['start'], $token['length'] );
			unset( $token['start'] );
			unset( $token['length'] );
		}
		print_r( $token );
	}
}

function benchmarkGenerate( $text ) {
	$time = -microtime( true );
	$generator = Tokenizer\TokenGenerator::generate( $text, $GLOBALS['tokenizerOptions'] );
	foreach ( $generator as $token ) {
	}
	$time += microtime( true );
	print "$time\n";
}


$text = file_get_contents($argv[1]);
$tokenizerOptions = [
	'ignoreNulls' => true,
	'ignoreCharRefs' => true,
	'ignoreErrors' => true,
	'skipPreprocess' => true,
];
$executeOptions = [
	// 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
	// 'fragmentName' => 'div'
];
print "---- Tree builder ----\n";
for ($i = 0; $i < 10; $i++) {
	print "Iteration $i:";
	benchmarkTreeBuilder($text);
}
print "---- DOM ----\n";
for ($i = 0; $i < 10; $i++) {
	print "Iteration $i:";
	benchmarkDOM($text);
}
print "---- Generate? ----\n";
for ($i = 0; $i < 10; $i++) {
	print "Iteration $i:";
	benchmarkGenerate($text);
}
print "---- DOM + serialize ----\n";
for ($i = 0; $i < 10; $i++) {
	print "Iteration $i:";
	$time = -microtime( true );
	tidyViaDOM($text);
	$time += microtime( true );
	print "$time\n";
}
print "---- SAX + serialize ----\n";
for ($i = 0; $i < 10; $i++) {
	print "Iteration $i:";
	$time = -microtime( true );
	tidy($text);
	$time += microtime( true );
	print "$time\n";
}
