1 module dshould.stringcmp;
2 
3 import std.algorithm : EditOp, map;
4 import std.format : format;
5 import std.range;
6 import std.typecons;
7 import dshould.ShouldType;
8 import dshould.basic;
9 
10 /**
11  * Given string types, this version of the word `.equal` implements color-coded diffs.
12  * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically.
13  */
14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__)
15 if (isInstanceOf!(ShouldType, Should) && is(T == string))
16 {
17     should.allowOnlyWords!().before!"equal (string)";
18 
19     should.addWord!"equal".stringCmp(expected, file, line);
20 }
21 
22 private void stringCmp(Should, T)(Should should, T expected, string file, size_t line)
23 if (isInstanceOf!(ShouldType, Should))
24 {
25     import std.algorithm : canFind;
26 
27     should.allowOnlyWords!("equal").before!"equal (string)";
28 
29     should.terminateChain;
30 
31     auto got = should.got();
32 
33     if (got != expected)
34     {
35         string original;
36         string diff;
37 
38         if (got.canFind("\n"))
39         {
40             auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n"));
41 
42             original = diffPair.original.join("\n") ~ "\n";
43             diff = "\n" ~ diffPair.target.join("\n");
44         }
45         else
46         {
47             auto diffPair = oneLineDiff(expected, got);
48 
49             original = diffPair.original;
50             diff = diffPair.target;
51         }
52 
53         throw new FluentException(
54             "test failed",
55             format!`: expected '%s', but got '%s'`(original, diff),
56             file, line
57         );
58     }
59 }
60 
61 private auto oneLineDiff(string expected, string text) @safe
62 {
63     alias removePred = text => red(text);
64     alias addPred = text => green(text);
65     alias keepPred = text => text;
66     alias ditchPred = lines => string.init;
67 
68     auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text);
69     auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text);
70 
71     alias DiffPair = Tuple!(string, "original", string, "target");
72 
73     if (originalColored.isNull)
74     {
75         return DiffPair(expected, text);
76     }
77     else
78     {
79         return DiffPair(originalColored.get, targetColored.get);
80     }
81 }
82 
83 @("collates successive replacements")
84 unittest
85 {
86     const expectedOriginal = "Hello W" ~ red("or") ~ "ld";
87     const expectedTarget = "Hello W" ~ green("ey") ~ "ld";
88     const diff = "Hello World".oneLineDiff("Hello Weyld");
89 
90     (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget);
91 }
92 
93 @("does not colorize diff view for strings that are too different")
94 unittest
95 {
96     const diff = "Hello World".oneLineDiff("Goodbye Universe");
97 
98     (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe");
99 }
100 
101 @("tries to not change diff mode too often")
102 unittest
103 {
104     const cleanDiff = `method="` ~ GREEN_CODE ~ `multiply` ~ CLEAR_CODE ~ `"`;
105 
106     `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff);
107 }
108 
109 unittest
110 {
111     const originalText = "test";
112     const targetText = "test, but";
113 
114     with (originalText.oneLineDiff(targetText))
115     {
116         (original ~ target).should.equal(originalText ~ originalText ~ green(", but"));
117     }
118 }
119 
120 private auto multiLineDiff(string[] expected, string[] text) @safe
121 {
122     alias removePred = lines => lines.map!(line => red("-" ~ line));
123     alias addPred = lines => lines.map!(line => green("+" ~ line));
124     alias keepPred = lines => lines.map!(line => " " ~ line);
125     alias ditchPred = lines => (string[]).init;
126 
127     auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text);
128     auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text);
129 
130     alias DiffPair = Tuple!(string[], "original", string[], "target");
131 
132     if (originalColored.isNull)
133     {
134         return DiffPair(expected, text);
135     }
136     else
137     {
138         return DiffPair(originalColored.get, targetColored.get);
139     }
140 }
141 
142 @("supports multiline diff")
143 unittest
144 {
145     import std..string : join, split;
146 
147     // given
148     const originalText = `{
149         "int": 1,
150         "array": ["XX"],
151         "timecodeBegin": "2003-02-01T12:00:00Z",
152         "timecodeEnd": "2003-02-01T14:00:00Z",
153         "enum": "ENUM",
154         "lastEntry": "goodbye"
155     }`;
156     const modifiedText = `{
157         "int": 1,
158         "array": ["XX"],
159         "enum": "ENUM",
160         "somethingElse": "goes here",
161         "lastEntry": "goodbye"
162     }`;
163 
164     // then
165     const patchTextOriginal = ` {
166          "int": 1,
167          "array": ["XX"],
168 ` ~ red(`-        "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ `
169 ` ~ red(`-        "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ `
170          "enum": "ENUM",
171          "lastEntry": "goodbye"
172      }`;
173 
174     const patchTextTarget = ` {
175          "int": 1,
176          "array": ["XX"],
177          "enum": "ENUM",
178 ` ~ green(`+        "somethingElse": "goes here",`) ~ `
179          "lastEntry": "goodbye"
180      }`;
181 
182     const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n"));
183 
184     (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget);
185 }
186 
187 // TODO bracket crossing cost
188 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted
189 {
190     import std.algorithm : count;
191     import std.array : Appender, empty;
192     import std.range : dropOne, front;
193 
194     Appender!T diff;
195     diff.reserve(text.length);
196 
197     // preds are called with continuous runs
198     Appender!(ElementType!T[]) addBuffer;
199     Appender!(ElementType!T[]) removeBuffer;
200 
201     auto levenshtein = Levenshtein!T(expected, text);
202     const path = levenshtein.reconstructPath;
203 
204     if (path.count!(a => a != EditOp.none) > text.length)
205     {
206         return Nullable!T.init; // no diff view, too different
207     }
208 
209     void flushAdd()
210     {
211         if (!addBuffer.data.empty)
212         {
213             diff ~= addPred(addBuffer.data);
214             addBuffer.clear;
215         }
216     }
217 
218     void flushRemove()
219     {
220         if (!removeBuffer.data.empty)
221         {
222             diff ~= removePred(removeBuffer.data);
223             removeBuffer.clear;
224         }
225     }
226 
227     void add(ElementType!T element)
228     {
229         flushRemove;
230         addBuffer ~= element;
231     }
232 
233     void remove(ElementType!T element)
234     {
235         flushAdd;
236         removeBuffer ~= element;
237     }
238 
239     void flush()
240     {
241         flushRemove;
242         flushAdd;
243     }
244 
245     void same(ElementType!T element)
246     {
247         flush;
248         diff ~= keepPred([element]);
249     }
250 
251     foreach (editOp; path)
252     {
253         final switch (editOp)
254         {
255             case EditOp.none:
256                 same(text.front);
257                 text = text.dropOne;
258                 expected = expected.dropOne;
259                 break;
260             case EditOp.insert:
261                 add(text.front);
262                 text = text.dropOne;
263                 break;
264             case EditOp.remove:
265                 remove(expected.front);
266                 expected = expected.dropOne;
267                 break;
268             case EditOp.substitute:
269                 assert(false);
270         }
271     }
272 
273     assert(text.empty, format!`leftover %s`(text));
274     assert(expected.empty);
275 
276     flush;
277 
278     return diff.data.nullable;
279 }
280 
281 private auto red(T)(T text)
282 {
283     return RED_CODE ~ text ~ CLEAR_CODE;
284 }
285 
286 private auto green(T)(T text)
287 {
288     return GREEN_CODE ~ text ~ CLEAR_CODE;
289 }
290 
291 private enum RED_CODE = "\x1b[31m";
292 private enum GREEN_CODE = "\x1b[32m";
293 private enum CLEAR_CODE = "\x1b[39m";
294 
295 /**
296  * Modified Levenshtein distance from from std.algorithm
297  * Given two ranges, returns a sequence of insertions and deletions
298  * that turn the first range into the second.
299  * This is equivalent to diff computation, though it's
300  * comparatively inefficient at O(n^2) memory and runtime.
301  *
302  * This version adds customizable path cost, used to implement orphan avoidance.
303  */
304 private struct Levenshtein(Range)
305 {
306     @disable this();
307 
308     public this(Range s, Range t)
309     {
310         import std.algorithm : min;
311 
312         const slen = walkLength(s.save);
313         const tlen = walkLength(t.save);
314 
315         this.rows = slen + 1;
316         this.cols = tlen + 1;
317         this.matrixData = new Cell[this.rows * this.cols];
318         initMatrix;
319 
320         foreach (i; 1 .. this.rows)
321         {
322             const sFront = s.front;
323             auto tCurrent = t.save;
324 
325             foreach (j; 1 .. this.cols)
326             {
327                 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement
328                     + pathCost(EditOp.insert, i, j);
329                 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement
330                     + pathCost(EditOp.remove, i, j);
331                 auto costNone = (sFront != tCurrent.front)
332                     ? float.infinity
333                     : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.none, i, j));
334 
335                 tCurrent.popFront();
336 
337                 Cell cell;
338 
339                 if (costNone <= costDeletion)
340                 {
341                     if (costNone <= costInsertion)
342                     {
343                         cell = Cell(costNone, EditOp.none);
344                     }
345                     else
346                     {
347                         cell = Cell(costInsertion, EditOp.insert);
348                     }
349                 }
350                 else
351                 {
352                     if (costDeletion <= costInsertion)
353                     {
354                         cell = Cell(costDeletion, EditOp.remove);
355                     }
356                     else
357                     {
358                         cell = Cell(costInsertion, EditOp.insert);
359                     }
360                 }
361 
362                 matrix(i, j) = cell;
363             }
364             s.popFront();
365         }
366     }
367 
368     private float pathCost(EditOp proposedOp, size_t i, size_t j)
369     {
370         import std.algorithm : countUntil, endsWith, equal, filter;
371 
372         alias step = (a, n) {
373             auto cell = a[n - 1];
374 
375             if (cell.i == 0 && cell.j == 0)
376             {
377                 return cell;
378             }
379 
380             moveByOp(cell.op, cell.i, cell.j);
381 
382             return typeof(cell)(matrix(cell.i, cell.j).op, cell.i, cell.j);
383         };
384 
385         auto infiniteTrace = tuple!("op", "i", "j")(proposedOp, i, j).recurrence!step;
386         auto trace = infiniteTrace.takeExactly(infiniteTrace.countUntil!(a => a.i == 0 && a.j == 0)).map!(a => a.op);
387         auto traceInsert = trace.filter!(op => op == EditOp.insert || op == EditOp.none);
388         auto traceRemove = trace.filter!(op => op == EditOp.remove || op == EditOp.none);
389 
390         // significantly penalize orphaned "no change" lines
391         alias orphan = op => only(op, EditOp.none, op);
392         const orphanPathCost =
393             traceInsert.take(3).equal(orphan(EditOp.insert)) ? orphanCost : 0 +
394             traceRemove.take(3).equal(orphan(EditOp.remove)) ? orphanCost : 0;
395 
396         // slightly penalize mode changes, to avoid pathologies arising from distant matches
397         const pathChangesModeCost =
398             traceInsert.take(2).equal(only(EditOp.none, EditOp.insert)) ? modeChangeCost : 0 +
399             traceRemove.take(2).equal(only(EditOp.none, EditOp.remove)) ? modeChangeCost : 0;
400 
401         return orphanPathCost + pathChangesModeCost;
402     }
403 
404     public EditOp[] reconstructPath()
405     {
406         import std.algorithm.mutation : reverse;
407 
408         EditOp[] result;
409         size_t i = this.rows - 1;
410         size_t j = this.cols - 1;
411 
412         while (i > 0 || j > 0)
413         {
414             const op = matrix(i, j).op;
415 
416             result ~= op;
417             moveByOp(op, i, j);
418         }
419         reverse(result);
420         return result;
421     }
422 
423     private void moveByOp(EditOp op, ref size_t i, ref size_t j)
424     in
425     {
426         assert(i > 0 || j > 0);
427     }
428     do
429     {
430         final switch (op)
431         {
432             case EditOp.none:
433                 i--;
434                 j--;
435                 break;
436             case EditOp.insert:
437                 j--;
438                 break;
439             case EditOp.remove:
440                 i--;
441                 break;
442             case EditOp.substitute:
443                 assert(false);
444         }
445     }
446 
447     private enum deletionIncrement = 1;
448     private enum insertionIncrement = 1;
449     private enum orphanCost = 2.5;
450     private enum modeChangeCost = 0.05;
451 
452     private alias Cell = Tuple!(float, "cost", EditOp, "op");
453     private Cell[] matrixData = null;
454     private size_t rows = 0;
455     private size_t cols = 0;
456 
457     invariant
458     {
459         assert(matrixData.length == rows * cols);
460     }
461 
462     // Treat _matrix as a rectangular array
463     private ref Cell matrix(size_t row, size_t col)
464     in
465     {
466         assert(row >= 0 && row < this.rows);
467         assert(col >= 0 && col < this.cols);
468     }
469     do
470     {
471         return this.matrixData[row * this.cols + col];
472     }
473 
474     private void initMatrix()
475     {
476         this.matrixData[] = Cell(0, EditOp.insert);
477 
478         foreach (r; 0 .. rows)
479         {
480             this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove);
481         }
482 
483         foreach (c; 0 .. cols)
484         {
485             this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert);
486         }
487     }
488 }