diff --git a/DiffEngine.h b/DiffEngine.h index aeef88d..646bb08 100644 --- a/DiffEngine.h +++ b/DiffEngine.h @@ -1,580 +1,580 @@ /** * GPL blah blah, see below for history */ #ifndef DIFFENGINE_H #define DIFFENGINE_H //#define USE_JUDY #include #include #include #include #include #include #ifdef USE_JUDY #include "JudyHS.h" #endif -#include "wikidiff2.h" +#include "Wikidiff2.h" /** * Diff operation - * + * * from and to are vectors containing pointers to the objects passed in from_lines and to_lines * * op is one of the following - * copy: A sequence of lines (in from and to) which are the same in both files. + * copy: A sequence of lines (in from and to) which are the same in both files. * del: A sequence of lines (in from) which were in the first file but not the second. * add: A sequence of lines (in to) which were in the second file but not the first. - * change: A sequence of lines which are different between the two files. Lines from the - * first file are in from, lines from the second are in to. The two vectors need + * change: A sequence of lines which are different between the two files. Lines from the + * first file are in from, lines from the second are in to. The two vectors need * not be the same length. */ template class DiffOp { public: typedef std::vector > PointerVector; DiffOp(int op_, const PointerVector & from_, const PointerVector & to_) : op(op_), from(from_), to(to_) {} enum {copy, del, add, change}; int op; PointerVector from; PointerVector to; }; /** * Basic diff template class. After construction, edits will contain a vector of DiffOpTemplate * objects representing the diff */ template class Diff { public: typedef std::vector > ValueVector; typedef std::vector, WD2_ALLOCATOR > DiffOpVector; Diff(const ValueVector & from_lines, const ValueVector & to_lines); - + virtual void add_edit(const DiffOp & edit) { - edits.push_back(edit); + edits.push_back(edit); } unsigned size() { return edits.size(); } DiffOp & operator[](int i) {return edits[i];} DiffOpVector edits; }; /** * Class used internally by Diff to actually compute the diffs. * * The algorithm used here is mostly lifted from the perl module * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip * * More ideas are taken from: * http://www.ics.uci.edu/~eppstein/161/960229.html * * Some ideas are (and a bit of code) are from from analyze.c, from GNU * diffutils-2.7, which can be found at: * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz * - * This implementation is largely due to Geoffrey T. Dairiki, who wrote this - * diff engine for phpwiki 1-3.3. It was then adopted by MediaWiki. + * This implementation is largely due to Geoffrey T. Dairiki, who wrote this + * diff engine for phpwiki 1-3.3. It was then adopted by MediaWiki. * * Finally, it was ported to C++ by Tim Starling in February 2006 * * @access private */ template -class _DiffEngine +class _DiffEngine { public: // Vectors typedef std::vector BoolVector; // skip the allocator here to get the specialisation typedef std::vector > PointerVector; typedef std::vector > ValueVector; typedef std::vector > IntVector; typedef std::vector, WD2_ALLOCATOR > > IntPairVector; // Maps #ifdef USE_JUDY typedef JudyHS MatchesMap; #else typedef std::map, WD2_ALLOCATOR > MatchesMap; #endif // Sets typedef std::set, WD2_ALLOCATOR > IntSet; #ifdef USE_JUDY typedef JudySet ValueSet; #else typedef std::set, WD2_ALLOCATOR > ValueSet; #endif _DiffEngine() : done(false) {} void clear(); - void diff (const ValueVector & from_lines, + void diff (const ValueVector & from_lines, const ValueVector & to_lines, Diff & diff); int _lcs_pos (int ypos); void _compareseq (int xoff, int xlim, int yoff, int ylim); - void _shift_boundaries (const ValueVector & lines, BoolVector & changed, + void _shift_boundaries (const ValueVector & lines, BoolVector & changed, const BoolVector & other_changed); protected: - int _diag (int xoff, int xlim, int yoff, int ylim, int nchunks, + int _diag (int xoff, int xlim, int yoff, int ylim, int nchunks, IntPairVector & seps); - + BoolVector xchanged, ychanged; PointerVector xv, yv; IntVector xind, yind; IntVector seq; IntSet in_seq; int lcs; bool done; enum {MAX_CHUNKS=8}; }; //----------------------------------------------------------------------------- // _DiffEngine implementation //----------------------------------------------------------------------------- template -void _DiffEngine::clear() +void _DiffEngine::clear() { xchanged.clear(); ychanged.clear(); xv.clear(); yv.clear(); xind.clear(); yind.clear(); seq.clear(); in_seq.clear(); done = false; } template -void _DiffEngine::diff (const ValueVector & from_lines, - const ValueVector & to_lines, Diff & diff) +void _DiffEngine::diff (const ValueVector & from_lines, + const ValueVector & to_lines, Diff & diff) { int n_from = (int)from_lines.size(); int n_to = (int)to_lines.size(); // If this diff engine has been used before for a diff, clear the member variables if (done) { clear(); - } + } xchanged.resize(n_from); ychanged.resize(n_to); seq.resize(std::max(n_from, n_to) + 1); // Skip leading common lines. int skip, endskip; for (skip = 0; skip < n_from && skip < n_to; skip++) { if (from_lines[skip] != to_lines[skip]) break; xchanged[skip] = ychanged[skip] = false; } // Skip trailing common lines. int xi = n_from, yi = n_to; for (endskip = 0; --xi > skip && --yi > skip; endskip++) { if (from_lines[xi] != to_lines[yi]) break; xchanged[xi] = ychanged[yi] = false; } // Ignore lines which do not exist in both files. ValueSet xhash, yhash; for (xi = skip; xi < n_from - endskip; xi++) { xhash.insert(from_lines[xi]); } for (yi = skip; yi < n_to - endskip; yi++) { const T & line = to_lines[yi]; if ( (ychanged[yi] = (xhash.find(line) == xhash.end())) ) continue; yhash.insert(line); - yv.push_back(&line); + yv.push_back(&line); yind.push_back(yi); } for (xi = skip; xi < n_from - endskip; xi++) { const T & line = from_lines[xi]; if ( (xchanged[xi] = (yhash.find(line) == yhash.end())) ) continue; xv.push_back(&line); xind.push_back(xi); } // Find the LCS. _compareseq(0, xv.size(), 0, yv.size()); // Merge edits when possible _shift_boundaries(from_lines, xchanged, ychanged); _shift_boundaries(to_lines, ychanged, xchanged); // Compute the edit operations. xi = yi = 0; while (xi < n_from || yi < n_to) { assert(yi < n_to || xchanged[xi]); assert(xi < n_from || ychanged[yi]); // Skip matching "snake". PointerVector del; PointerVector add; PointerVector empty; while (xi < n_from && yi < n_to && !xchanged[xi] && !ychanged[yi]) { del.push_back(&from_lines[xi]); add.push_back(&to_lines[yi]); ++xi; ++yi; } if (del.size()) { diff.add_edit(DiffOp(DiffOp::copy, del, add)); del.clear(); add.clear(); } // Find deletes & adds. while (xi < n_from && xchanged[xi]) del.push_back(&from_lines[xi++]); while (yi < n_to && ychanged[yi]) add.push_back(&to_lines[yi++]); if (del.size() && add.size()) diff.add_edit(DiffOp(DiffOp::change, del, add)); else if (del.size()) diff.add_edit(DiffOp(DiffOp::del, del, empty)); else if (add.size()) diff.add_edit(DiffOp(DiffOp::add, empty, add)); } done = true; } /* Divide the Largest Common Subsequence (LCS) of the sequences * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally * sized segments. * * Returns (LCS, SEPS). LCS is the length of the LCS. SEPS is an * array of NCHUNKS+1 (X, Y) indexes giving the diving points between * sub sequences. The first sub-sequence is contained in [X0, X1), * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note * that (X0, Y0) == (XOFF, YOFF) and * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). * * This function assumes that the first lines of the specified portions * of the two files do not match, and likewise that the last lines do not * match. The caller must trim matching lines from the beginning and end * of the portions it is going to specify. */ template -int _DiffEngine::_diag (int xoff, int xlim, int yoff, int ylim, int nchunks, - IntPairVector & seps) +int _DiffEngine::_diag (int xoff, int xlim, int yoff, int ylim, int nchunks, + IntPairVector & seps) { using std::swap; using std::make_pair; using std::copy; bool flip = false; MatchesMap ymatches; if (xlim - xoff > ylim - yoff) { // Things seems faster (I'm not sure I understand why) // when the shortest sequence in X. flip = true; swap(xoff, yoff); swap(xlim, ylim); } if (flip) for (int i = ylim - 1; i >= yoff; i--) ymatches[*xv[i]].push_back(i); else for (int i = ylim - 1; i >= yoff; i--) ymatches[*yv[i]].push_back(i); int nlines = ylim - yoff; lcs = 0; seq[0] = yoff - 1; in_seq.clear(); // 2-d array, line major, chunk minor IntVector ymids(nlines * nchunks); int numer = xlim - xoff + nchunks - 1; int x = xoff, x1, y1; for (int chunk = 0; chunk < nchunks; chunk++) { if (chunk > 0) - for (int i = 0; i <= lcs; i++) + for (int i = 0; i <= lcs; i++) ymids.at(i * nchunks + chunk-1) = seq[i]; x1 = xoff + (int)((numer + (xlim-xoff)*chunk) / nchunks); for ( ; x < x1; x++) { const T & line = flip ? *yv[x] : *xv[x]; #ifdef USE_JUDY IntVector * pMatches = ymatches.Get(line); if (!pMatches) continue; #else typename MatchesMap::iterator iter = ymatches.find(line); if (iter == ymatches.end()) continue; IntVector * pMatches = &(iter->second); #endif IntVector::iterator y; int k = 0; - + for (y = pMatches->begin(); y != pMatches->end(); ++y) { if (!in_seq.count(*y)) { k = _lcs_pos(*y); assert(k > 0); - copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks, + copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks, ymids.begin() + k * nchunks); ++y; break; } } for ( ; y != pMatches->end(); ++y) { if (*y > seq[k-1]) { assert(*y < seq[k]); // Optimization: this is a common case: // next match is just replacing previous match. in_seq.erase(seq[k]); seq[k] = *y; in_seq.insert(*y); } else if (!in_seq.count(*y)) { k = _lcs_pos(*y); assert(k > 0); - copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks, + copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks, ymids.begin() + k * nchunks); } } } } seps.clear(); seps.resize(nchunks + 1); - + seps[0] = flip ? make_pair(yoff, xoff) : make_pair(xoff, yoff); IntVector::iterator ymid = ymids.begin() + lcs * nchunks; for (int n = 0; n < nchunks - 1; n++) { x1 = xoff + (numer + (xlim - xoff) * n) / nchunks; y1 = ymid[n] + 1; seps[n+1] = flip ? make_pair(y1, x1) : make_pair(x1, y1); } seps[nchunks] = flip ? make_pair(ylim, xlim) : make_pair(xlim, ylim); return lcs; } template int _DiffEngine::_lcs_pos (int ypos) { int end = lcs; if (end == 0 || ypos > seq[end]) { seq[++lcs] = ypos; in_seq.insert(ypos); return lcs; } int beg = 1; while (beg < end) { int mid = (beg + end) / 2; if ( ypos > seq[mid] ) beg = mid + 1; else end = mid; } assert(ypos != seq[end]); in_seq.erase(seq[end]); seq[end] = ypos; in_seq.insert(ypos); return end; } /* Find LCS of two sequences. * * The results are recorded in the vectors {x,y}changed[], by * storing a 1 in the element for each line that is an insertion * or deletion (ie. is not in the LCS). * * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. * * Note that XLIM, YLIM are exclusive bounds. * All line numbers are origin-0 and discarded lines are not counted. */ template void _DiffEngine::_compareseq (int xoff, int xlim, int yoff, int ylim) { using std::pair; IntPairVector seps; int lcs; - + // Slide down the bottom initial diagonal. while (xoff < xlim && yoff < ylim && *xv[xoff] == *yv[yoff]) { ++xoff; ++yoff; } // Slide up the top initial diagonal. while (xlim > xoff && ylim > yoff && *xv[xlim - 1] == *yv[ylim - 1]) { --xlim; --ylim; } if (xoff == xlim || yoff == ylim) lcs = 0; else { // This is ad hoc but seems to work well. //nchunks = sqrt(min(xlim - xoff, ylim - yoff) / 2.5); //nchunks = max(2,min(8,(int)nchunks)); int nchunks = std::min(MAX_CHUNKS-1, std::min(xlim - xoff, ylim - yoff)) + 1; lcs = _diag(xoff, xlim, yoff, ylim, nchunks, seps); } if (lcs == 0) { // X and Y sequences have no common subsequence: // mark all changed. while (yoff < ylim) ychanged[yind[yoff++]] = true; while (xoff < xlim) xchanged[xind[xoff++]] = true; } else { // Use the partitions to split this problem into subproblems. IntPairVector::iterator pt1, pt2; pt1 = pt2 = seps.begin(); while (++pt2 != seps.end()) { _compareseq (pt1->first, pt2->first, pt1->second, pt2->second); pt1 = pt2; } } } /* Adjust inserts/deletes of identical lines to join changes * as much as possible. * * We do something when a run of changed lines include a * line at one end and has an excluded, identical line at the other. * We are free to choose which identical line is included. * `compareseq' usually chooses the one at the beginning, * but usually it is cleaner to consider the following identical line * to be the "change". * * This is extracted verbatim from analyze.c (GNU diffutils-2.7). */ template -void _DiffEngine::_shift_boundaries (const ValueVector & lines, BoolVector & changed, - const BoolVector & other_changed) +void _DiffEngine::_shift_boundaries (const ValueVector & lines, BoolVector & changed, + const BoolVector & other_changed) { int i = 0; int j = 0; int len = (int)lines.size(); int other_len = (int)other_changed.size(); while (1) { /* * Scan forwards to find beginning of another run of changes. * Also keep track of the corresponding point in the other file. * * Throughout this code, i and j are adjusted together so that * the first i elements of changed and the first j elements * of other_changed both contain the same number of zeros * (unchanged lines). * Furthermore, j is always kept so that j == other_len or * other_changed[j] == false. */ while (j < other_len && other_changed[j]) j++; while (i < len && ! changed[i]) { i++; j++; while (j < other_len && other_changed[j]) j++; } if (i == len) break; int start = i, runlength, corresponding; // Find the end of this run of changes. while (++i < len && changed[i]) continue; do { /* * Record the length of this run of changes, so that * we can later determine whether the run has grown. */ runlength = i - start; /* * Move the changed region back, so long as the * previous unchanged line matches the last changed one. * This merges with previous changed regions. */ while (start > 0 && lines[start - 1] == lines[i - 1]) { changed[--start] = true; changed[--i] = false; while (start > 0 && changed[start - 1]) start--; while (other_changed[--j]) continue; } /* * Set CORRESPONDING to the end of the changed run, at the last * point where it corresponds to a changed run in the other file. * CORRESPONDING == LEN means no such point has been found. */ corresponding = j < other_len ? i : len; /* * Move the changed region forward, so long as the * first changed line matches the following unchanged one. * This merges with following changed regions. * Do this second, so that if there are no merges, * the changed region is moved forward as far as possible. */ while (i < len && lines[start] == lines[i]) { changed[start++] = false; changed[i++] = true; while (i < len && changed[i]) i++; j++; if (j < other_len && other_changed[j]) { corresponding = i; while (j < other_len && other_changed[j]) j++; } } } while (runlength != i - start); /* * If possible, move the fully-merged run of changes * back to a corresponding run in the other file. */ while (corresponding < i) { changed[--start] = 1; changed[--i] = 0; while (other_changed[--j]) continue; } } } //----------------------------------------------------------------------------- // Diff implementation //----------------------------------------------------------------------------- template Diff::Diff(const ValueVector & from_lines, const ValueVector & to_lines) { _DiffEngine engine; engine.diff(from_lines, to_lines, *this); } #endif diff --git a/InlineDiff.cpp b/InlineDiff.cpp new file mode 100644 index 0000000..3fa941c --- /dev/null +++ b/InlineDiff.cpp @@ -0,0 +1,91 @@ +#include "InlineDiff.h" + +void InlineDiff::printAdd(const String& line) +{ + printWrappedLine("
", line, "
\n"); +} + +void InlineDiff::printDelete(const String& line) +{ + printWrappedLine("
", line, "
\n"); +} + +void InlineDiff::printWordDiff(const String& text1, const String& text2) +{ + WordVector words1, words2; + + explodeWords(text1, words1); + explodeWords(text2, words2); + WordDiff worddiff(words1, words2); + String word; + + result += "
"; + for (unsigned i = 0; i < worddiff.size(); ++i) { + DiffOp & op = worddiff[i]; + int n, j; + if (op.op == DiffOp::copy) { + n = op.from.size(); + for (j=0; jget_whole(word); + printText(word); + } + } else if (op.op == DiffOp::del) { + n = op.from.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + } else if (op.op == DiffOp::add) { + n = op.to.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + } else if (op.op == DiffOp::change) { + n = op.from.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + n = op.to.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + } + } + result += "
\n"; +} + +void InlineDiff::printBlockHeader(int leftLine, int rightLine) +{ + char buf[256]; // should be plenty + snprintf(buf, sizeof(buf), + "
\n", + leftLine, rightLine); + result += buf; +} + +void InlineDiff::printContext(const String & input) +{ + printWrappedLine("
", input, "
\n"); +} + +void InlineDiff::printWrappedLine(const char* pre, const String& line, const char* post) +{ + result += pre; + if (line.empty()) { + result += " "; + } else { + printText(line); + } + result += post; +} diff --git a/InlineDiff.h b/InlineDiff.h new file mode 100644 index 0000000..01304ca --- /dev/null +++ b/InlineDiff.h @@ -0,0 +1,18 @@ +#ifndef INLINEDIFF_H +#define INLINEDIFF_H + +#include "Wikidiff2.h" + +class InlineDiff: public Wikidiff2 { + public: + protected: + void printAdd(const String& line); + void printDelete(const String& line); + void printWordDiff(const String& text1, const String& text2); + void printBlockHeader(int leftLine, int rightLine); + void printContext(const String& input); + + void printWrappedLine(const char* pre, const String& line, const char* post); +}; + +#endif diff --git a/TableDiff.cpp b/TableDiff.cpp new file mode 100644 index 0000000..f613e9d --- /dev/null +++ b/TableDiff.cpp @@ -0,0 +1,123 @@ +#include +#include "Wikidiff2.h" +#include "TableDiff.h" + +void TableDiff::printAdd(const String & line) +{ + result += "\n" + "  \n" + " +\n" + " "; + printTextWithDiv(line); + result += "\n\n"; +} + +void TableDiff::printDelete(const String & line) +{ + result += "\n" + " −\n" + " "; + printTextWithDiv(line); + result += "\n" + "  \n" + "\n"; +} + +void TableDiff::printWordDiff(const String & text1, const String & text2) +{ + WordVector words1, words2; + + explodeWords(text1, words1); + explodeWords(text2, words2); + WordDiff worddiff(words1, words2); + + //debugPrintWordDiff(worddiff); + + // print twice; first for left side, then for right side + result += "\n" + " −\n" + "
"; + printWordDiffSide(worddiff, false); + result += "
\n" + " +\n" + "
"; + printWordDiffSide(worddiff, true); + result += "
\n" + "\n"; +} + +void TableDiff::printWordDiffSide(WordDiff &worddiff, bool added) +{ + String word; + for (unsigned i = 0; i < worddiff.size(); ++i) { + DiffOp & op = worddiff[i]; + int n, j; + if (op.op == DiffOp::copy) { + n = op.from.size(); + if (added) { + for (j=0; jget_whole(word); + printText(word); + } + } else { + for (j=0; jget_whole(word); + printText(word); + } + } + } else if (!added && (op.op == DiffOp::del || op.op == DiffOp::change)) { + n = op.from.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + } else if (added && (op.op == DiffOp::add || op.op == DiffOp::change)) { + n = op.to.size(); + result += ""; + for (j=0; jget_whole(word); + printText(word); + } + result += ""; + } + } +} + +void TableDiff::printTextWithDiv(const String & input) +{ + // Wrap string in a
if it's not empty + if (input.size() > 0) { + result.append("
"); + printText(input); + result.append("
"); + } +} + +void TableDiff::printBlockHeader(int leftLine, int rightLine) +{ + char buf[256]; // should be plenty + snprintf(buf, sizeof(buf), + "\n" + " \n" + " \n" + "\n", + leftLine, rightLine); + result += buf; +} + +void TableDiff::printContext(const String & input) +{ + result += + "\n" + "  \n" + " "; + printTextWithDiv(input); + result += + "\n" + "  \n" + " "; + printTextWithDiv(input); + result += "\n\n"; +} diff --git a/TableDiff.h b/TableDiff.h new file mode 100644 index 0000000..0a3adc6 --- /dev/null +++ b/TableDiff.h @@ -0,0 +1,19 @@ +#ifndef TABLEDIFF_H +#define TABLEDIFF_H + +#include "Wikidiff2.h" + +class TableDiff: public Wikidiff2 { + public: + protected: + void printAdd(const String& line); + void printDelete(const String& line); + void printWordDiff(const String& text1, const String & text2); + void printTextWithDiv(const String& input); + void printBlockHeader(int leftLine, int rightLine); + void printContext(const String& input); + + void printWordDiffSide(WordDiff& worddiff, bool added); +}; + +#endif diff --git a/wikidiff2.cpp b/Wikidiff2.cpp similarity index 70% rename from wikidiff2.cpp rename to Wikidiff2.cpp index 857ee37..e31d15a 100644 --- a/wikidiff2.cpp +++ b/Wikidiff2.cpp @@ -1,460 +1,344 @@ /** - * Diff formatter, based on code by Steinar H. Gunderson, converted to work with the + * Diff formatter, based on code by Steinar H. Gunderson, converted to work with the * Dairiki diff engine by Tim Starling - * + * * GPL. */ #include #include -#include "wikidiff2.h" +#include "Wikidiff2.h" #include #include #include -void Wikidiff2::diffLines(const StringVector & lines1, const StringVector & lines2, + +void Wikidiff2::diffLines(const StringVector & lines1, const StringVector & lines2, int numContextLines) { // first do line-level diff StringDiff linediff(lines1, lines2); - + int ctx = 0; int from_index = 1, to_index = 1; // Should a line number be printed before the next context line? // Set to true initially so we get a line number on line 1 bool showLineNumber = true; for (int i = 0; i < linediff.size(); ++i) { int n, j, n1, n2; // Line 1 changed, show heading with no leading context if (linediff[i].op != DiffOp::copy && i == 0) { - result += - "\n" - " \n" - " \n" - "\n"; + printBlockHeader(1, 1); } - + switch (linediff[i].op) { case DiffOp::add: // inserted lines n = linediff[i].to.size(); for (j=0; j::del: // deleted lines n = linediff[i].from.size(); for (j=0; j::copy: // copy/context n = linediff[i].from.size(); for (j=0; j= n - numContextLines)) /*leading*/ { if (showLineNumber) { - // Print Line: heading - char buf[256]; // should be plenty - snprintf(buf, 256, - "\n" - " \n" - " \n" - "\n", - from_index, to_index); - result += buf; + printBlockHeader(from_index, to_index); showLineNumber = false; } - // Print context - result += - "\n" - "  \n" - " "; - printTextWithDiv(*linediff[i].from[j]); - result += - "\n" - "  \n" - " "; - printTextWithDiv(*linediff[i].from[j]); - result += "\n\n"; + printContext(*linediff[i].from[j]); } else { showLineNumber = true; } from_index++; to_index++; } break; case DiffOp::change: // replace, i.e. we do a word diff between the two sets of lines n1 = linediff[i].from.size(); n2 = linediff[i].to.size(); n = std::min(n1, n2); for (j=0; j n2) { for (j=n2; j \n" - " +\n" - " "; - printTextWithDiv(line); - result += "\n\n"; -} - -void Wikidiff2::printDelete(const String & line) -{ - result += "\n" - " −\n" - " "; - printTextWithDiv(line); - result += "\n" - "  \n" - "\n"; -} - -void Wikidiff2::printWordDiff(const String & text1, const String & text2) -{ - WordVector words1, words2; - - explodeWords(text1, words1); - explodeWords(text2, words2); - WordDiff worddiff(words1, words2); - - //debugPrintWordDiff(worddiff); - - // print twice; first for left side, then for right side - result += "\n" - " −\n" - "
"; - printWordDiffSide(worddiff, false); - result += "
\n" - " +\n" - "
"; - printWordDiffSide(worddiff, true); - result += "
\n" - "\n"; -} - void Wikidiff2::debugPrintWordDiff(WordDiff & worddiff) { for (unsigned i = 0; i < worddiff.size(); ++i) { - DiffOp & op = worddiff[i]; + DiffOp & op = worddiff[i]; switch (op.op) { case DiffOp::copy: result += "Copy\n"; break; case DiffOp::del: result += "Delete\n"; break; case DiffOp::add: result += "Add\n"; break; case DiffOp::change: result += "Change\n"; break; } result += "From: "; bool first = true; for (int j=0; jwhole() + ")"; } result += "\n"; result += "To: "; first = true; for (int j=0; jwhole() + ")"; } result += "\n\n"; } } -void Wikidiff2::printWordDiffSide(WordDiff &worddiff, bool added) -{ - String word; - for (unsigned i = 0; i < worddiff.size(); ++i) { - DiffOp & op = worddiff[i]; - int n, j; - if (op.op == DiffOp::copy) { - n = op.from.size(); - if (added) { - for (j=0; jget_whole(word); - printText(word); - } - } else { - for (j=0; jget_whole(word); - printText(word); - } - } - } else if (!added && (op.op == DiffOp::del || op.op == DiffOp::change)) { - n = op.from.size(); - result += ""; - for (j=0; jget_whole(word); - printText(word); - } - result += ""; - } else if (added && (op.op == DiffOp::add || op.op == DiffOp::change)) { - n = op.to.size(); - result += ""; - for (j=0; jget_whole(word); - printText(word); - } - result += ""; - } - } -} - -void Wikidiff2::printTextWithDiv(const String & input) -{ - // Wrap string in a
if it's not empty - if (input.size() > 0) { - result.append("
"); - printText(input); - result.append("
"); - } -} - void Wikidiff2::printText(const String & input) { size_t start = 0; size_t end = input.find_first_of("<>&"); while (end != String::npos) { if (end > start) { result.append(input, start, end - start); } switch (input[end]) { case '<': result.append("<"); break; case '>': result.append(">"); break; default /*case '&'*/: result.append("&"); } start = end + 1; end = input.find_first_of("<>&", start); } // Append the rest of the string after the last special character if (start < input.size()) { result.append(input, start, input.size() - start); } } // Weak UTF-8 decoder // Will return garbage on invalid input (overshort sequences, overlong sequences, etc.) -int Wikidiff2::nextUtf8Char(String::const_iterator & p, String::const_iterator & charStart, +int Wikidiff2::nextUtf8Char(String::const_iterator & p, String::const_iterator & charStart, String::const_iterator end) { int c = 0; unsigned char byte; int seqLength = 0; charStart = p; if (p == end) { return 0; } do { byte = (unsigned char)*p; if (byte < 0x80) { c = byte; seqLength = 0; } else if (byte >= 0xc0) { // Start of UTF-8 character // If this is unexpected, due to an overshort sequence, we ignore the invalid // sequence and resynchronise here if (byte < 0xe0) { seqLength = 1; c = byte & 0x1f; } else if (byte < 0xf0) { seqLength = 2; c = byte & 0x0f; } else { seqLength = 3; c = byte & 7; } } else if (seqLength) { c <<= 6; c |= byte & 0x3f; --seqLength; } else { // Unexpected continuation, ignore } ++p; } while (seqLength && p != end); return c; } // Split a string into words // -// TODO: I think the best way to do this would be to use ICU BreakIterator -// instead of libthai + DIY. Basically you'd run BreakIterators from several +// TODO: I think the best way to do this would be to use ICU BreakIterator +// instead of libthai + DIY. Basically you'd run BreakIterators from several // different locales (en, th, ja) and merge the results, i.e. if a break occurs -// in any locale at a given position, split the string. I don't know if the -// quality of the Thai dictionary in ICU matches the one in libthai, we would +// in any locale at a given position, split the string. I don't know if the +// quality of the Thai dictionary in ICU matches the one in libthai, we would // have to check this somehow. void Wikidiff2::explodeWords(const String & text, WordVector &words) { // Don't try to do a word-level diff on very long lines if (text.size() > MAX_DIFF_LINE) { words.push_back(Word(text.begin(), text.end(), text.end())); return; } // Decode the UTF-8 in the string. // * Save the character sizes (in bytes) - // * Convert the string to TIS-620, which is the internal character set of libthai. + // * Convert the string to TIS-620, which is the internal character set of libthai. // * Save the character offsets of any break positions (same format as libthai). String tisText, charSizes; String::const_iterator suffixEnd, charStart, p; IntSet breaks; tisText.reserve(text.size()); charSizes.reserve(text.size()); wchar_t ch, lastChar; thchar_t thaiChar; bool hasThaiChars = false; p = text.begin(); ch = nextUtf8Char(p, charStart, text.end()); lastChar = 0; int charIndex = 0; while (ch) { thaiChar = th_uni2tis(ch); if (thaiChar >= 0x80 && thaiChar != THCHAR_ERR) { hasThaiChars = true; } tisText += (char)thaiChar; charSizes += (char)(p - charStart); if (isLetter(ch)) { if (lastChar && !isLetter(lastChar)) { breaks.insert(charIndex); } } else { breaks.insert(charIndex); } charIndex++; lastChar = ch; ch = nextUtf8Char(p, charStart, text.end()); } // If there were any Thai characters in the string, run th_brk on it and add // the resulting break positions if (hasThaiChars) { IntVector thaiBreakPositions; tisText += '\0'; thaiBreakPositions.resize(tisText.size()); - int numBreaks = th_brk((const thchar_t*)(tisText.data()), + int numBreaks = th_brk((const thchar_t*)(tisText.data()), &thaiBreakPositions[0], thaiBreakPositions.size()); thaiBreakPositions.resize(numBreaks); breaks.insert(thaiBreakPositions.begin(), thaiBreakPositions.end()); } - // Add a fake end-of-string character and have a break on it, so that the + // Add a fake end-of-string character and have a break on it, so that the // last word gets added without special handling breaks.insert(charSizes.size()); charSizes += (char)0; // Now make the word array by traversing the breaks set p = text.begin(); IntSet::iterator pBrk = breaks.begin(); String::const_iterator wordStart = text.begin(); String::const_iterator suffixStart = text.end(); // If there's a break at the start of the string, skip it if (pBrk != breaks.end() && *pBrk == 0) { pBrk++; } for (charIndex = 0; charIndex < charSizes.size(); p += charSizes[charIndex++]) { // Assume all spaces are ASCII if (isSpace(*p)) { suffixStart = p; } if (pBrk != breaks.end() && charIndex == *pBrk) { if (suffixStart == text.end()) { words.push_back(Word(wordStart, p, p)); } else { words.push_back(Word(wordStart, suffixStart, p)); } pBrk++; suffixStart = text.end(); wordStart = p; } } } void Wikidiff2::explodeLines(const String & text, StringVector &lines) { String::const_iterator ptr = text.begin(); while (ptr != text.end()) { String::const_iterator ptr2 = std::find(ptr, text.end(), '\n'); lines.push_back(String(ptr, ptr2)); - + ptr = ptr2; if (ptr != text.end()) { ++ptr; } } } const Wikidiff2::String & Wikidiff2::execute(const String & text1, const String & text2, int numContextLines) { // Allocate some result space to avoid excessive copying result.clear(); result.reserve(text1.size() + text2.size() + 10000); - + // Split input strings into lines StringVector lines1; StringVector lines2; explodeLines(text1, lines1); explodeLines(text2, lines2); // Do the diff diffLines(lines1, lines2, numContextLines); // Return a reference to the result buffer return result; } - diff --git a/wikidiff2.h b/Wikidiff2.h similarity index 78% rename from wikidiff2.h rename to Wikidiff2.h index d714514..c8fea3d 100644 --- a/wikidiff2.h +++ b/Wikidiff2.h @@ -1,87 +1,88 @@ #ifndef WIKIDIFF2_H #define WIKIDIFF2_H #define MAX_DIFF_LINE 10000 /** Set WD2_USE_STD_ALLOCATOR to compile for standalone (non-PHP) operation */ #ifdef WD2_USE_STD_ALLOCATOR #define WD2_ALLOCATOR std::allocator #else #define WD2_ALLOCATOR PhpAllocator #include "php_cpp_allocator.h" #endif #include "DiffEngine.h" #include "Word.h" #include #include #include class Wikidiff2 { public: typedef std::basic_string, WD2_ALLOCATOR > String; typedef std::vector > StringVector; typedef std::vector > WordVector; typedef std::vector > IntVector; typedef std::set, WD2_ALLOCATOR > IntSet; typedef Diff StringDiff; typedef Diff WordDiff; const String & execute(const String & text1, const String & text2, int numContextLines); inline const String & getResult() const; protected: String result; - void diffLines(const StringVector & lines1, const StringVector & lines2, + virtual void diffLines(const StringVector & lines1, const StringVector & lines2, int numContextLines); - void printAdd(const String & line); - void printDelete(const String & line); - void printWordDiff(const String & text1, const String & text2); - void printWordDiffSide(WordDiff &worddiff, bool added); - void printTextWithDiv(const String & input); + virtual void printAdd(const String & line) = 0; + virtual void printDelete(const String & line) = 0; + virtual void printWordDiff(const String & text1, const String & text2) = 0; + virtual void printBlockHeader(int leftLine, int rightLine) = 0; + virtual void printContext(const String & input) = 0; + void printText(const String & input); inline bool isLetter(int ch); inline bool isSpace(int ch); void debugPrintWordDiff(WordDiff & worddiff); - int nextUtf8Char(String::const_iterator & p, String::const_iterator & charStart, + int nextUtf8Char(String::const_iterator & p, String::const_iterator & charStart, String::const_iterator end); void explodeWords(const String & text, WordVector &tokens); void explodeLines(const String & text, StringVector &lines); }; -bool Wikidiff2::isLetter(int ch) +inline bool Wikidiff2::isLetter(int ch) { // Standard alphanumeric if ((ch >= '0' && ch <= '9') || (ch == '_') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { return true; } // Punctuation and control characters if (ch < 0xc0) return false; // Chinese, Japanese: split up character by character if (ch >= 0x3000 && ch <= 0x9fff) return false; if (ch >= 0x20000 && ch <= 0x2a000) return false; // Otherwise assume it's from a language that uses spaces return true; } -bool Wikidiff2::isSpace(int ch) +inline bool Wikidiff2::isSpace(int ch) { return ch == ' ' || ch == '\t'; } -const Wikidiff2::String & Wikidiff2::getResult() const +inline const Wikidiff2::String & Wikidiff2::getResult() const { return result; } #endif diff --git a/Word.h b/Word.h index 5d447aa..1950bc3 100644 --- a/Word.h +++ b/Word.h @@ -1,64 +1,64 @@ #ifndef WORD_H #define WORD_H #include #include -#include "wikidiff2.h" +#include "Wikidiff2.h" // a small class to accomodate word-level diffs; basically, a body and an // optional suffix (the latter consisting of a single whitespace), where // only the bodies are compared on operator==. -// +// // This class stores iterators pointing to the line string, this is to avoid // excessive allocation calls. To avoid invalidation, the source string should // not be changed or destroyed. class Word { public: typedef std::basic_string, WD2_ALLOCATOR > String; typedef String::const_iterator Iterator; - + Iterator bodyStart; Iterator bodyEnd; Iterator suffixEnd; - + /** * The body is the character sequence [bs, be) * The whitespace suffix is the character sequence [be, se) */ - Word(Iterator bs, Iterator be, Iterator se) + Word(Iterator bs, Iterator be, Iterator se) : bodyStart(bs), bodyEnd(be), suffixEnd(se) {} bool operator== (const Word &w) const { - return (bodyEnd - bodyStart == w.bodyEnd - w.bodyStart) + return (bodyEnd - bodyStart == w.bodyEnd - w.bodyStart) && std::equal(bodyStart, bodyEnd, w.bodyStart); } bool operator!=(const Word &w) const { return !operator==(w); } bool operator<(const Word &w) const { return std::lexicographical_compare(bodyStart, bodyEnd, w.bodyStart, w.bodyEnd); } // Get the whole word as a string String whole() const { String w; get_whole(w); return w; } // Assign the whole word to a string void get_whole(String & w) const { // Do it with swap() to avoid a second copy String temp(bodyStart, suffixEnd); temp.swap(w); } // Get the body as a string operator String() const { return String(bodyStart, bodyEnd); } }; #endif diff --git a/config.m4 b/config.m4 index 0904d50..b848398 100644 --- a/config.m4 +++ b/config.m4 @@ -1,41 +1,41 @@ dnl $Id$ dnl if test -z "$CXX"; then dnl AC_MSG_ERROR([PHP is bugged. Set \$CXX to a C++ compiler.]) dnl fi PHP_ARG_ENABLE(wikidiff2, whether to enable wikidiff2 support, [ --enable-wikidiff2 Enable wikidiff2 support]) if test "$PHP_WIKIDIFF2" != "no"; then PHP_REQUIRE_CXX AC_LANG_CPLUSPLUS PHP_ADD_LIBRARY(stdc++,,WIKIDIFF2_SHARED_LIBADD) if test -z "$PKG_CONFIG" then AC_PATH_PROG(PKG_CONFIG, pkg-config, no) fi if test "$PKG_CONFIG" = "no" then AC_MSG_ERROR([required utility 'pkg-config' not found]) fi if ! $PKG_CONFIG --exists libthai then AC_MSG_ERROR(['libthai' not known to pkg-config]) fi PHP_EVAL_INCLINE(`$PKG_CONFIG --cflags-only-I libthai`) PHP_EVAL_LIBLINE(`$PKG_CONFIG --libs libthai`, WIKIDIFF2_SHARED_LIBADD) export OLD_CPPFLAGS="$CPPFLAGS" export CPPFLAGS="$CPPFLAGS $INCLUDES -DHAVE_WIKIDIFF2" AC_CHECK_HEADER([thai/thailib.h], [], AC_MSG_ERROR('thai/thailib.h' header not found')) export CPPFLAGS="$OLD_CPPFLAGS" PHP_SUBST(WIKIDIFF2_SHARED_LIBADD) AC_DEFINE(HAVE_WIKIDIFF2, 1, [ ]) export CXXFLAGS="-Wno-write-strings $CXXFLAGS" - PHP_NEW_EXTENSION(wikidiff2, php_wikidiff2.cpp wikidiff2.cpp, $ext_shared) + PHP_NEW_EXTENSION(wikidiff2, php_wikidiff2.cpp Wikidiff2.cpp TableDiff.cpp InlineDiff.cpp, $ext_shared) fi diff --git a/debian/changelog b/debian/changelog index 2b8988b..55c1867 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,36 +1,42 @@ +php-wikidiff2 (1.2) precise-wikimedia; urgency=low + + * Added new function, wikidiff2_inline_diff(), for one-column diffs + + -- Max Semenik Mon, 09 Dec 2013 21:56:00 +0000 + php-wikidiff2 (1.1.2-2) precise-wikimedia; urgency=low * Fixed incorrect path in wikidiff2.ini -- Tim Starling Tue, 08 Jan 2013 14:30:53 +1100 php-wikidiff2 (1.1.2-1) lucid-wikimedia; urgency=low * Fixed bug 26354 (added class to dummy cells containing  ) * Fixed bug 25697 (empty context lines not showing) -- Roan Kattouw Tue, 03 Jan 2012 14:15:41 +0100 php5-wikidiff2 (1.1.1-1) lucid; urgency=low * Fixed bug 33331 (word breaking around punctuation) -- Tim Starling Fri, 23 Dec 2011 15:33:40 +1100 php5-wikidiff2 (1.1.0-2) lucid; urgency=low * Include a config file so the extension loads -- Mark Bergsma Tue, 08 Feb 2011 12:50:21 +0000 php5-wikidiff2 (1.1.0-1) lucid; urgency=low * New non-swig version -- Tim Starling Sun, 20 Jun 2010 17:45:37 +1000 php5-wikidiff2 (1.0.2-1) unstable; urgency=low * Initial release as deb package -- Brion Vibber Tue, 21 Aug 2007 18:03:07 +0000 diff --git a/php_wikidiff2.cpp b/php_wikidiff2.cpp index 9ca7106..48e9d93 100644 --- a/php_wikidiff2.cpp +++ b/php_wikidiff2.cpp @@ -1,106 +1,144 @@ /* $Id$ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "php.h" #include "php_ini.h" #include "ext/standard/info.h" #include "php_wikidiff2.h" -#include "wikidiff2.h" +#include "Wikidiff2.h" +#include "TableDiff.h" +#include "InlineDiff.h" static int le_wikidiff2; zend_function_entry wikidiff2_functions[] = { - PHP_FE(wikidiff2_do_diff, NULL) + PHP_FE(wikidiff2_do_diff, NULL) + PHP_FE(wikidiff2_inline_diff, NULL) {NULL, NULL, NULL} }; zend_module_entry wikidiff2_module_entry = { #if ZEND_MODULE_API_NO >= 20010901 STANDARD_MODULE_HEADER, #endif "wikidiff2", wikidiff2_functions, PHP_MINIT(wikidiff2), PHP_MSHUTDOWN(wikidiff2), - PHP_RINIT(wikidiff2), + PHP_RINIT(wikidiff2), PHP_RSHUTDOWN(wikidiff2), PHP_MINFO(wikidiff2), #if ZEND_MODULE_API_NO >= 20010901 - "0.1", + "0.2", #endif STANDARD_MODULE_PROPERTIES }; #ifdef COMPILE_DL_WIKIDIFF2 ZEND_GET_MODULE(wikidiff2) #endif PHP_MINIT_FUNCTION(wikidiff2) { return SUCCESS; } PHP_MSHUTDOWN_FUNCTION(wikidiff2) { return SUCCESS; } PHP_RINIT_FUNCTION(wikidiff2) { return SUCCESS; } PHP_RSHUTDOWN_FUNCTION(wikidiff2) { return SUCCESS; } PHP_MINFO_FUNCTION(wikidiff2) { php_info_print_table_start(); php_info_print_table_header(2, "wikidiff2 support", "enabled"); php_info_print_table_end(); } /* {{{ proto string wikidiff2_do_diff(string text1, string text2, int numContextLines) * * Warning: the input text must be valid UTF-8! Do not pass user input directly * to this function. */ PHP_FUNCTION(wikidiff2_do_diff) { char *text1 = NULL; char *text2 = NULL; int argc = ZEND_NUM_ARGS(); int text1_len; int text2_len; long numContextLines; - if (zend_parse_parameters(argc TSRMLS_CC, "ssl", &text1, &text1_len, &text2, + if (zend_parse_parameters(argc TSRMLS_CC, "ssl", &text1, &text1_len, &text2, &text2_len, &numContextLines) == FAILURE) { return; } try { - Wikidiff2 wikidiff2; + TableDiff wikidiff2; Wikidiff2::String text1String(text1, text1_len); Wikidiff2::String text2String(text2, text2_len); const Wikidiff2::String & ret = wikidiff2.execute(text1String, text2String, numContextLines); RETURN_STRINGL( const_cast(ret.data()), ret.size(), 1); } catch (std::bad_alloc &e) { zend_error(E_WARNING, "Out of memory in wikidiff2_do_diff()."); } catch (...) { zend_error(E_WARNING, "Unknown exception in wikidiff2_do_diff()."); } } + +/* {{{ proto string wikidiff2_inline_diff(string text1, string text2, int numContextLines) + * + * Warning: the input text must be valid UTF-8! Do not pass user input directly + * to this function. + */ +PHP_FUNCTION(wikidiff2_inline_diff) +{ + char *text1 = NULL; + char *text2 = NULL; + int argc = ZEND_NUM_ARGS(); + int text1_len; + int text2_len; + long numContextLines; + + if (zend_parse_parameters(argc TSRMLS_CC, "ssl", &text1, &text1_len, &text2, + &text2_len, &numContextLines) == FAILURE) + { + return; + } + + + try { + InlineDiff wikidiff2; + Wikidiff2::String text1String(text1, text1_len); + Wikidiff2::String text2String(text2, text2_len); + const Wikidiff2::String& ret = wikidiff2.execute(text1String, text2String, numContextLines); + RETURN_STRINGL( const_cast(ret.data()), ret.size(), 1); + } catch (std::bad_alloc &e) { + zend_error(E_WARNING, "Out of memory in wikidiff2_inline_diff()."); + } catch (...) { + zend_error(E_WARNING, "Unknown exception in wikidiff2_inline_diff()."); + } +} + /* }}} */ diff --git a/php_wikidiff2.h b/php_wikidiff2.h index ec45e39..973233f 100644 --- a/php_wikidiff2.h +++ b/php_wikidiff2.h @@ -1,65 +1,66 @@ /* +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2008 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: | +----------------------------------------------------------------------+ */ /* $Id: header,v 1.16.2.1.2.1.2.1 2008/02/07 19:39:50 iliaa Exp $ */ #ifndef PHP_WIKIDIFF2_H #define PHP_WIKIDIFF2_H extern zend_module_entry wikidiff2_module_entry; #define phpext_wikidiff2_ptr &wikidiff2_module_entry #ifdef PHP_WIN32 # define PHP_WIKIDIFF2_API __declspec(dllexport) #elif defined(__GNUC__) && __GNUC__ >= 4 # define PHP_WIKIDIFF2_API __attribute__ ((visibility("default"))) #else # define PHP_WIKIDIFF2_API #endif #ifdef ZTS #include "TSRM.h" #endif PHP_MINIT_FUNCTION(wikidiff2); PHP_MSHUTDOWN_FUNCTION(wikidiff2); PHP_RINIT_FUNCTION(wikidiff2); PHP_RSHUTDOWN_FUNCTION(wikidiff2); PHP_MINFO_FUNCTION(wikidiff2); PHP_FUNCTION(wikidiff2_do_diff); +PHP_FUNCTION(wikidiff2_inline_diff); #ifdef ZTS #define WIKIDIFF2_G(v) TSRMG(wikidiff2_globals_id, zend_wikidiff2_globals *, v) #else #define WIKIDIFF2_G(v) (wikidiff2_globals.v) #endif #endif /* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: noet sw=4 ts=4 fdm=marker * vim<600: noet sw=4 ts=4 */ diff --git a/tests/004.phpt b/tests/004.phpt new file mode 100644 index 0000000..32594d4 --- /dev/null +++ b/tests/004.phpt @@ -0,0 +1,38 @@ +--TEST-- +Diff test D: inline diffs +--SKIPIF-- + +--FILE-- + +--EXPECT-- +
+
foo bartest
+
 
+
baz
+
quuxtest
+
 
+
bang
+