1 module dshould.stringcmp;
2 
3 import std.algorithm : map;
4 import std.format : format;
5 import std.range;
6 import std.typecons;
7 import dshould.ShouldType;
8 import dshould.basic;
9 
10 /**
11  * Given string types, this version of the word `.equal` implements color-coded diffs.
12  * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically.
13  */
14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__)
15 if (isInstanceOf!(ShouldType, Should) && is(T == string))
16 {
17     should.allowOnlyWords!().before!"equal (string)";
18 
19     should.addWord!"equal".stringCmp(expected, file, line);
20 }
21 
22 private void stringCmp(Should, T)(Should should, T expected, string file, size_t line)
23 if (isInstanceOf!(ShouldType, Should))
24 {
25     import std.algorithm : canFind;
26 
27     should.allowOnlyWords!("equal").before!"equal (string)";
28 
29     should.terminateChain;
30 
31     auto got = should.got();
32 
33     if (got != expected)
34     {
35         string original;
36         string diff;
37 
38         if (got.canFind("\n"))
39         {
40             auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n"));
41 
42             original = diffPair.original.join("\n") ~ "\n";
43             diff = "\n" ~ diffPair.target.join("\n");
44         }
45         else
46         {
47             auto diffPair = oneLineDiff(expected, got);
48 
49             original = diffPair.original;
50             diff = diffPair.target;
51         }
52 
53         throw new FluentException(
54             format!`'%s'`(original),
55             format!`'%s'`(diff),
56             file, line
57         );
58     }
59 }
60 
61 private auto oneLineDiff(string expected, string text) @safe
62 {
63     alias removePred = text => red(text);
64     alias addPred = text => green(text);
65     alias keepPred = text => text;
66     alias ditchPred = lines => string.init;
67 
68     auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text);
69     auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text);
70 
71     alias DiffPair = Tuple!(string, "original", string, "target");
72 
73     if (originalColored.isNull)
74     {
75         return DiffPair(expected, text);
76     }
77     else
78     {
79         return DiffPair(originalColored.get, targetColored.get);
80     }
81 }
82 
83 @("collates successive replacements")
84 unittest
85 {
86     const expectedOriginal = "Hello W" ~ red("or") ~ "ld";
87     const expectedTarget = "Hello W" ~ green("ey") ~ "ld";
88     const diff = "Hello World".oneLineDiff("Hello Weyld");
89 
90     (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget);
91 }
92 
93 @("does not colorize diff view for strings that are too different")
94 unittest
95 {
96     const diff = "Hello World".oneLineDiff("Goodbye Universe");
97 
98     (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe");
99 }
100 
101 @("tries to not change diff mode too often")
102 unittest
103 {
104     const cleanDiff = `method="` ~ green(`multiply`) ~ `"`;
105 
106     `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff);
107 }
108 
109 unittest
110 {
111     const originalText = "test";
112     const targetText = "test, but";
113 
114     with (originalText.oneLineDiff(targetText))
115     {
116         (original ~ target).should.equal(originalText ~ originalText ~ green(", but"));
117     }
118 }
119 
120 private auto multiLineDiff(string[] expected, string[] text) @safe
121 {
122     alias removePred = lines => lines.map!(line => red("-" ~ line));
123     alias addPred = lines => lines.map!(line => green("+" ~ line));
124     alias keepPred = lines => lines.map!(line => " " ~ line);
125     alias ditchPred = lines => (string[]).init;
126 
127     auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text);
128     auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text);
129 
130     alias DiffPair = Tuple!(string[], "original", string[], "target");
131 
132     if (originalColored.isNull)
133     {
134         return DiffPair(expected, text);
135     }
136     else
137     {
138         return DiffPair(originalColored.get, targetColored.get);
139     }
140 }
141 
142 @("supports multiline diff")
143 unittest
144 {
145     import std.string : join, split;
146 
147     // given
148     const originalText = `{
149         "int": 1,
150         "array": ["XX"],
151         "timecodeBegin": "2003-02-01T12:00:00Z",
152         "timecodeEnd": "2003-02-01T14:00:00Z",
153         "enum": "ENUM",
154         "lastEntry": "goodbye"
155     }`;
156     const modifiedText = `{
157         "int": 1,
158         "array": ["XX"],
159         "enum": "ENUM",
160         "somethingElse": "goes here",
161         "lastEntry": "goodbye"
162     }`;
163 
164     // then
165     const patchTextOriginal = ` {
166          "int": 1,
167          "array": ["XX"],
168 ` ~ red(`-        "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ `
169 ` ~ red(`-        "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ `
170          "enum": "ENUM",
171          "lastEntry": "goodbye"
172      }`;
173 
174     const patchTextTarget = ` {
175          "int": 1,
176          "array": ["XX"],
177          "enum": "ENUM",
178 ` ~ green(`+        "somethingElse": "goes here",`) ~ `
179          "lastEntry": "goodbye"
180      }`;
181 
182     const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n"));
183 
184     (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget);
185 }
186 
187 @("supports comparison of large strings")
188 unittest
189 {
190     import std.string : join, split;
191 
192     // given
193     const repetitions = 500/"Hello World".length;
194     const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join;
195     const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join;
196 
197     originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan
198 }
199 
200 // TODO bracket crossing cost
201 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted
202 {
203     import std.algorithm : count;
204     import std.array : Appender, empty;
205     import std.range : dropOne, front;
206 
207     Appender!T diff;
208     diff.reserve(text.length);
209 
210     // preds are called with continuous runs
211     Appender!(ElementType!T[]) addBuffer;
212     Appender!(ElementType!T[]) removeBuffer;
213 
214     auto levenshtein = Levenshtein!T(expected, text);
215     const path = levenshtein.reconstructPath;
216 
217     if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length)
218     {
219         return Nullable!T.init; // no diff view, too different
220     }
221 
222     void flushAdd()
223     {
224         if (!addBuffer.data.empty)
225         {
226             diff ~= addPred(addBuffer.data);
227             addBuffer.clear;
228         }
229     }
230 
231     void flushRemove()
232     {
233         if (!removeBuffer.data.empty)
234         {
235             diff ~= removePred(removeBuffer.data);
236             removeBuffer.clear;
237         }
238     }
239 
240     void add(ElementType!T element)
241     {
242         flushRemove;
243         addBuffer ~= element;
244     }
245 
246     void remove(ElementType!T element)
247     {
248         flushAdd;
249         removeBuffer ~= element;
250     }
251 
252     void flush()
253     {
254         flushRemove;
255         flushAdd;
256     }
257 
258     void same(ElementType!T element)
259     {
260         flush;
261         diff ~= keepPred([element]);
262     }
263 
264     foreach (editOp; path)
265     {
266         final switch (editOp)
267         {
268             case levenshtein.EditOp.Type.keep:
269                 same(text.front);
270                 text = text.dropOne;
271                 expected = expected.dropOne;
272                 break;
273             case levenshtein.EditOp.Type.insert:
274                 add(text.front);
275                 text = text.dropOne;
276                 break;
277             case levenshtein.EditOp.Type.remove:
278                 remove(expected.front);
279                 expected = expected.dropOne;
280                 break;
281         }
282     }
283 
284     assert(text.empty, format!`leftover %s`(text));
285     assert(expected.empty);
286 
287     flush;
288 
289     return diff.data.nullable;
290 }
291 
292 package auto red(T)(T text)
293 {
294     return RED_CODE ~ text ~ CLEAR_CODE;
295 }
296 
297 package auto green(T)(T text)
298 {
299     return GREEN_CODE ~ text ~ CLEAR_CODE;
300 }
301 
302 private enum RED_CODE = "\x1b[31m";
303 private enum GREEN_CODE = "\x1b[32m";
304 private enum CLEAR_CODE = "\x1b[39m";
305 
306 /**
307  * Modified Levenshtein distance from from std.algorithm
308  * Given two ranges, returns a sequence of insertions and deletions
309  * that turn the first range into the second.
310  * This is equivalent to diff computation, though it's
311  * comparatively inefficient at O(n^2) memory and runtime.
312  *
313  * This version adds customizable path cost, used to implement orphan avoidance.
314  */
315 private struct Levenshtein(Range)
316 {
317     @disable this();
318 
319     public this(Range s, Range t)
320     {
321         import std.algorithm : min;
322 
323         const slen = walkLength(s.save);
324         const tlen = walkLength(t.save);
325 
326         this.rows = slen + 1;
327         this.cols = tlen + 1;
328         this.matrixData = new Cell[this.rows * this.cols];
329         initMatrix;
330 
331         foreach (i; 1 .. this.rows)
332         {
333             const sFront = s.front;
334             auto tCurrent = t.save;
335 
336             foreach (j; 1 .. this.cols)
337             {
338                 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement
339                     + pathCost(EditOp.insert(1), i, j);
340                 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement
341                     + pathCost(EditOp.remove(1), i, j);
342                 auto costNone = (sFront != tCurrent.front)
343                     ? float.infinity
344                     : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j));
345 
346                 tCurrent.popFront();
347 
348                 Cell cell;
349 
350                 if (costNone <= costDeletion)
351                 {
352                     if (costNone <= costInsertion)
353                     {
354                         cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op));
355                     }
356                     else
357                     {
358                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
359                     }
360                 }
361                 else
362                 {
363                     if (costDeletion <= costInsertion)
364                     {
365                         cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op));
366                     }
367                     else
368                     {
369                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
370                     }
371                 }
372 
373                 matrix(i, j) = cell;
374             }
375             s.popFront();
376         }
377     }
378 
379     private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc
380     {
381         import std.algorithm : startsWith;
382 
383         enum Path
384         {
385             insert,
386             remove,
387         }
388 
389         struct PathRange(Path path)
390         {
391             Levenshtein* self;
392 
393             size_t i;
394             size_t j;
395 
396             EditOp op;
397 
398             @property EditOp.Type front() const
399             in
400             {
401                 final switch (path)
402                 {
403                     case Path.insert:
404                         assert(op.type != EditOp.Type.remove);
405                         break;
406                     case Path.remove:
407                         assert(op.type != EditOp.Type.insert);
408                         break;
409                 }
410             }
411             do
412             {
413 
414                 return this.op.type;
415             }
416 
417             bool empty()
418             {
419                 if (this.op.type == EditOp.Type.keep && this.op.count == 0)
420                 {
421                     assert(this.i == 0 && this.j == 0);
422 
423                     return true;
424                 }
425 
426                 assert(this.op.count > 0);
427 
428                 return false;
429             }
430 
431             // advance until front is again on the path
432             void advance()
433             {
434                 while (true)
435                 {
436                     final switch (path)
437                     {
438                         case Path.insert:
439                             if (this.op.type != EditOp.Type.remove)
440                             {
441                                 return;
442                             }
443                             break;
444                         case Path.remove:
445                             if (this.op.type != EditOp.Type.insert)
446                             {
447                                 return;
448                             }
449                             break;
450                     }
451 
452                     this.self.skipByOp(this.op, this.i, this.j);
453                     this.op = this.self.matrix(this.i, this.j).op;
454                 }
455             }
456 
457             void popFront()
458             {
459                 this.self.moveByOp(this.op, this.i, this.j);
460                 this.op = this.self.matrix(this.i, this.j).op;
461 
462                 advance;
463             }
464         }
465 
466         static assert(isInputRange!(PathRange!(Path.insert)));
467         static assert(isInputRange!(PathRange!(Path.remove)));
468 
469         auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp);
470         auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp);
471 
472         // prepare for use
473         traceInsert.advance;
474         traceRemove.advance;
475 
476         // significantly penalize orphaned "no change" lines
477         alias orphan = op => only(op, EditOp.Type.keep, op);
478         const orphanPathCost =
479             traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 +
480             traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0;
481 
482         // slightly penalize mode changes, to avoid pathologies arising from distant matches
483         const pathChangesModeCost =
484             traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 +
485             traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0;
486 
487         return orphanPathCost + pathChangesModeCost;
488     }
489 
490     public EditOp.Type[] reconstructPath()
491     {
492         import std.algorithm.mutation : reverse;
493 
494         EditOp.Type[] result;
495         size_t i = this.rows - 1;
496         size_t j = this.cols - 1;
497 
498         while (i > 0 || j > 0)
499         {
500             const op = matrix(i, j).op;
501 
502             assert(op.count > 0);
503 
504             result ~= op.type.repeat(op.count).array;
505 
506             skipByOp(op, i, j);
507         }
508         reverse(result);
509         return result;
510     }
511 
512     private void moveByOp(EditOp op, ref size_t i, ref size_t j)
513     in
514     {
515         assert(i > 0 || j > 0);
516         assert(op.count > 0);
517     }
518     do
519     {
520         final switch (op.type)
521         {
522             case EditOp.Type.keep:
523                 i--;
524                 j--;
525                 break;
526             case EditOp.Type.insert:
527                 j--;
528                 break;
529             case EditOp.Type.remove:
530                 i--;
531                 break;
532         }
533     }
534 
535     private void skipByOp(EditOp op, ref size_t i, ref size_t j)
536     {
537         final switch (op.type)
538         {
539             case EditOp.Type.keep:
540                 assert(i >= op.count && j >= op.count);
541 
542                 i -= op.count;
543                 j -= op.count;
544                 break;
545             case EditOp.Type.insert:
546                 assert(j >= op.count);
547 
548                 j -= op.count;
549                 break;
550             case EditOp.Type.remove:
551                 assert(i >= op.count);
552 
553                 i -= op.count;
554                 break;
555         }
556     }
557 
558     struct EditOp
559     {
560         enum Type
561         {
562             insert,
563             remove,
564             keep,
565         }
566 
567         template constructByType(Type type)
568         {
569             static EditOp constructByType(size_t count)
570             {
571                 return EditOp(type, count);
572             }
573 
574             static EditOp constructByType(EditOp previousOp)
575             {
576                 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1);
577             }
578         }
579 
580         alias insert = constructByType!(Type.insert);
581         alias remove = constructByType!(Type.remove);
582         alias keep = constructByType!(Type.keep);
583 
584         Type type;
585 
586         size_t count; // number of times this op is repeated on the best path
587     }
588 
589     private enum deletionIncrement = 1;
590     private enum insertionIncrement = 1;
591     private enum orphanCost = 2.5;
592     private enum modeChangeCost = 0.05;
593 
594     private alias Cell = Tuple!(float, "cost", EditOp, "op");
595     private Cell[] matrixData = null;
596     private size_t rows = 0;
597     private size_t cols = 0;
598 
599     invariant
600     {
601         assert(matrixData.length == rows * cols);
602     }
603 
604     // Treat _matrix as a rectangular array
605     private ref Cell matrix(size_t row, size_t col)
606     in
607     {
608         assert(row >= 0 && row < this.rows);
609         assert(col >= 0 && col < this.cols);
610     }
611     do
612     {
613         return this.matrixData[row * this.cols + col];
614     }
615 
616     private void initMatrix()
617     {
618         this.matrixData[] = Cell(0, EditOp.keep(0));
619 
620         foreach (r; 1 .. rows)
621         {
622             this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r));
623         }
624 
625         foreach (c; 1 .. cols)
626         {
627             this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c));
628         }
629     }
630 }