1 module dshould.stringcmp;
2 
3 import std.algorithm : map;
4 import std.format : format;
5 import std.range;
6 import std.typecons;
7 import dshould.ShouldType;
8 import dshould.basic;
9 
10 /**
11  * Given string types, this version of the word `.equal` implements color-coded diffs.
12  * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically.
13  */
14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__)
15 if (isInstanceOf!(ShouldType, Should) && is(T == string))
16 {
17     auto got = should.got();
18 
19     should.allowOnlyWords!().before!"equal (string)";
20     should.terminateChain;
21 
22     if (got != expected)
23     {
24         stringCmpError(got, expected, Yes.quote, file, line);
25     }
26 }
27 
28 package void stringCmpError(string got, string expected, Flag!"quote" quote, string file, size_t line)
29 {
30     import std.algorithm : canFind;
31 
32     string original;
33     string diff;
34     const bool multiline = got.canFind("\n");
35 
36     if (multiline)
37     {
38         auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n"));
39 
40         original = diffPair.original.join("\n");
41         diff = diffPair.target.join("\n");
42     }
43     else
44     {
45         auto diffPair = oneLineDiff(expected, got);
46 
47         original = diffPair.original;
48         diff = diffPair.target;
49     }
50 
51     const string fmt = (multiline ? "\n" : "") ~ (quote ? "'%s'" : "%s");
52 
53     throw new FluentException(
54         format(fmt, original),
55         format(fmt, diff),
56         file, line
57     );
58 }
59 
60 private auto oneLineDiff(string expected, string text) @safe
61 {
62     alias removePred = text => red(text);
63     alias addPred = text => green(text);
64     alias keepPred = text => text;
65     alias ditchPred = lines => string.init;
66 
67     auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text);
68     auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text);
69 
70     alias DiffPair = Tuple!(string, "original", string, "target");
71 
72     if (originalColored.isNull)
73     {
74         return DiffPair(expected, text);
75     }
76     else
77     {
78         return DiffPair(originalColored.get, targetColored.get);
79     }
80 }
81 
82 @("collates successive replacements")
83 unittest
84 {
85     const expectedOriginal = "Hello W" ~ red("or") ~ "ld";
86     const expectedTarget = "Hello W" ~ green("ey") ~ "ld";
87     const diff = "Hello World".oneLineDiff("Hello Weyld");
88 
89     (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget);
90 }
91 
92 @("does not colorize diff view for strings that are too different")
93 unittest
94 {
95     const diff = "Hello World".oneLineDiff("Goodbye Universe");
96 
97     (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe");
98 }
99 
100 @("tries to not change diff mode too often")
101 unittest
102 {
103     const cleanDiff = `method="` ~ green(`multiply`) ~ `"`;
104 
105     `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff);
106 }
107 
108 unittest
109 {
110     const originalText = "test";
111     const targetText = "test, but";
112 
113     with (originalText.oneLineDiff(targetText))
114     {
115         (original ~ target).should.equal(originalText ~ originalText ~ green(", but"));
116     }
117 }
118 
119 private auto multiLineDiff(string[] expected, string[] text) @safe
120 {
121     alias removePred = lines => lines.map!(line => red("-" ~ line));
122     alias addPred = lines => lines.map!(line => green("+" ~ line));
123     alias keepPred = lines => lines.map!(line => " " ~ line);
124     alias ditchPred = lines => (string[]).init;
125 
126     auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text);
127     auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text);
128 
129     alias DiffPair = Tuple!(string[], "original", string[], "target");
130 
131     if (originalColored.isNull)
132     {
133         return DiffPair(expected, text);
134     }
135     else
136     {
137         return DiffPair(originalColored.get, targetColored.get);
138     }
139 }
140 
141 @("supports multiline diff")
142 unittest
143 {
144     import std..string : join, split;
145 
146     // given
147     const originalText = `{
148         "int": 1,
149         "array": ["XX"],
150         "timecodeBegin": "2003-02-01T12:00:00Z",
151         "timecodeEnd": "2003-02-01T14:00:00Z",
152         "enum": "ENUM",
153         "lastEntry": "goodbye"
154     }`;
155     const modifiedText = `{
156         "int": 1,
157         "array": ["XX"],
158         "enum": "ENUM",
159         "somethingElse": "goes here",
160         "lastEntry": "goodbye"
161     }`;
162 
163     // then
164     const patchTextOriginal = ` {
165          "int": 1,
166          "array": ["XX"],
167 ` ~ red(`-        "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ `
168 ` ~ red(`-        "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ `
169          "enum": "ENUM",
170          "lastEntry": "goodbye"
171      }`;
172 
173     const patchTextTarget = ` {
174          "int": 1,
175          "array": ["XX"],
176          "enum": "ENUM",
177 ` ~ green(`+        "somethingElse": "goes here",`) ~ `
178          "lastEntry": "goodbye"
179      }`;
180 
181     const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n"));
182 
183     (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget);
184 }
185 
186 @("supports comparison of large strings")
187 unittest
188 {
189     import std..string : join, split;
190 
191     // given
192     const repetitions = 500/"Hello World".length;
193     const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join;
194     const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join;
195 
196     originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan
197 }
198 
199 // TODO bracket crossing cost
200 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted
201 {
202     import std.algorithm : count;
203     import std.array : Appender, empty;
204     import std.range : dropOne, front;
205 
206     Appender!T diff;
207     diff.reserve(text.length);
208 
209     // preds are called with continuous runs
210     Appender!(ElementType!T[]) addBuffer;
211     Appender!(ElementType!T[]) removeBuffer;
212 
213     auto levenshtein = Levenshtein!T(expected, text);
214     const path = levenshtein.reconstructPath;
215 
216     if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length)
217     {
218         return Nullable!T.init; // no diff view, too different
219     }
220 
221     void flushAdd()
222     {
223         if (!addBuffer.data.empty)
224         {
225             diff ~= addPred(addBuffer.data);
226             addBuffer.clear;
227         }
228     }
229 
230     void flushRemove()
231     {
232         if (!removeBuffer.data.empty)
233         {
234             diff ~= removePred(removeBuffer.data);
235             removeBuffer.clear;
236         }
237     }
238 
239     void add(ElementType!T element)
240     {
241         flushRemove;
242         addBuffer ~= element;
243     }
244 
245     void remove(ElementType!T element)
246     {
247         flushAdd;
248         removeBuffer ~= element;
249     }
250 
251     void flush()
252     {
253         flushRemove;
254         flushAdd;
255     }
256 
257     void same(ElementType!T element)
258     {
259         flush;
260         diff ~= keepPred([element]);
261     }
262 
263     foreach (editOp; path)
264     {
265         final switch (editOp)
266         {
267             case levenshtein.EditOp.Type.keep:
268                 same(text.front);
269                 text = text.dropOne;
270                 expected = expected.dropOne;
271                 break;
272             case levenshtein.EditOp.Type.insert:
273                 add(text.front);
274                 text = text.dropOne;
275                 break;
276             case levenshtein.EditOp.Type.remove:
277                 remove(expected.front);
278                 expected = expected.dropOne;
279                 break;
280         }
281     }
282 
283     assert(text.empty, format!`leftover %s`(text));
284     assert(expected.empty);
285 
286     flush;
287 
288     return diff.data.nullable;
289 }
290 
291 package auto red(T)(T text)
292 {
293     return RED_CODE ~ text ~ CLEAR_CODE;
294 }
295 
296 package auto green(T)(T text)
297 {
298     return GREEN_CODE ~ text ~ CLEAR_CODE;
299 }
300 
301 private enum RED_CODE = "\x1b[31m";
302 private enum GREEN_CODE = "\x1b[32m";
303 private enum CLEAR_CODE = "\x1b[39m";
304 
305 /**
306  * Modified Levenshtein distance from from std.algorithm
307  * Given two ranges, returns a sequence of insertions and deletions
308  * that turn the first range into the second.
309  * This is equivalent to diff computation, though it's
310  * comparatively inefficient at O(n^2) memory and runtime.
311  *
312  * This version adds customizable path cost, used to implement orphan avoidance.
313  */
314 private struct Levenshtein(Range)
315 {
316     @disable this();
317 
318     public this(Range s, Range t)
319     {
320         import std.algorithm : min;
321 
322         const slen = walkLength(s.save);
323         const tlen = walkLength(t.save);
324 
325         this.rows = slen + 1;
326         this.cols = tlen + 1;
327         this.matrixData = new Cell[this.rows * this.cols];
328         initMatrix;
329 
330         foreach (i; 1 .. this.rows)
331         {
332             const sFront = s.front;
333             auto tCurrent = t.save;
334 
335             foreach (j; 1 .. this.cols)
336             {
337                 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement
338                     + pathCost(EditOp.insert(1), i, j);
339                 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement
340                     + pathCost(EditOp.remove(1), i, j);
341                 auto costNone = (sFront != tCurrent.front)
342                     ? float.infinity
343                     : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j));
344 
345                 tCurrent.popFront();
346 
347                 Cell cell;
348 
349                 if (costNone <= costDeletion)
350                 {
351                     if (costNone <= costInsertion)
352                     {
353                         cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op));
354                     }
355                     else
356                     {
357                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
358                     }
359                 }
360                 else
361                 {
362                     if (costDeletion <= costInsertion)
363                     {
364                         cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op));
365                     }
366                     else
367                     {
368                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
369                     }
370                 }
371 
372                 matrix(i, j) = cell;
373             }
374             s.popFront();
375         }
376     }
377 
378     private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc
379     {
380         import std.algorithm : startsWith;
381 
382         enum Path
383         {
384             insert,
385             remove,
386         }
387 
388         struct PathRange(Path path)
389         {
390             Levenshtein* self;
391 
392             size_t i;
393             size_t j;
394 
395             EditOp op;
396 
397             @property EditOp.Type front() const
398             in
399             {
400                 final switch (path)
401                 {
402                     case Path.insert:
403                         assert(op.type != EditOp.Type.remove);
404                         break;
405                     case Path.remove:
406                         assert(op.type != EditOp.Type.insert);
407                         break;
408                 }
409             }
410             do
411             {
412 
413                 return this.op.type;
414             }
415 
416             bool empty()
417             {
418                 if (this.op.type == EditOp.Type.keep && this.op.count == 0)
419                 {
420                     assert(this.i == 0 && this.j == 0);
421 
422                     return true;
423                 }
424 
425                 assert(this.op.count > 0);
426 
427                 return false;
428             }
429 
430             // advance until front is again on the path
431             void advance()
432             {
433                 while (true)
434                 {
435                     final switch (path)
436                     {
437                         case Path.insert:
438                             if (this.op.type != EditOp.Type.remove)
439                             {
440                                 return;
441                             }
442                             break;
443                         case Path.remove:
444                             if (this.op.type != EditOp.Type.insert)
445                             {
446                                 return;
447                             }
448                             break;
449                     }
450 
451                     this.self.skipByOp(this.op, this.i, this.j);
452                     this.op = this.self.matrix(this.i, this.j).op;
453                 }
454             }
455 
456             void popFront()
457             {
458                 this.self.moveByOp(this.op, this.i, this.j);
459                 this.op = this.self.matrix(this.i, this.j).op;
460 
461                 advance;
462             }
463         }
464 
465         static assert(isInputRange!(PathRange!(Path.insert)));
466         static assert(isInputRange!(PathRange!(Path.remove)));
467 
468         auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp);
469         auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp);
470 
471         // prepare for use
472         traceInsert.advance;
473         traceRemove.advance;
474 
475         // significantly penalize orphaned "no change" lines
476         alias orphan = op => only(op, EditOp.Type.keep, op);
477         const orphanPathCost =
478             traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 +
479             traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0;
480 
481         // slightly penalize mode changes, to avoid pathologies arising from distant matches
482         const pathChangesModeCost =
483             traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 +
484             traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0;
485 
486         return orphanPathCost + pathChangesModeCost;
487     }
488 
489     public EditOp.Type[] reconstructPath()
490     {
491         import std.algorithm.mutation : reverse;
492 
493         EditOp.Type[] result;
494         size_t i = this.rows - 1;
495         size_t j = this.cols - 1;
496 
497         while (i > 0 || j > 0)
498         {
499             const op = matrix(i, j).op;
500 
501             assert(op.count > 0);
502 
503             result ~= op.type.repeat(op.count).array;
504 
505             skipByOp(op, i, j);
506         }
507         reverse(result);
508         return result;
509     }
510 
511     private void moveByOp(EditOp op, ref size_t i, ref size_t j)
512     in
513     {
514         assert(i > 0 || j > 0);
515         assert(op.count > 0);
516     }
517     do
518     {
519         final switch (op.type)
520         {
521             case EditOp.Type.keep:
522                 i--;
523                 j--;
524                 break;
525             case EditOp.Type.insert:
526                 j--;
527                 break;
528             case EditOp.Type.remove:
529                 i--;
530                 break;
531         }
532     }
533 
534     private void skipByOp(EditOp op, ref size_t i, ref size_t j)
535     {
536         final switch (op.type)
537         {
538             case EditOp.Type.keep:
539                 assert(i >= op.count && j >= op.count);
540 
541                 i -= op.count;
542                 j -= op.count;
543                 break;
544             case EditOp.Type.insert:
545                 assert(j >= op.count);
546 
547                 j -= op.count;
548                 break;
549             case EditOp.Type.remove:
550                 assert(i >= op.count);
551 
552                 i -= op.count;
553                 break;
554         }
555     }
556 
557     struct EditOp
558     {
559         enum Type
560         {
561             insert,
562             remove,
563             keep,
564         }
565 
566         template constructByType(Type type)
567         {
568             static EditOp constructByType(size_t count)
569             {
570                 return EditOp(type, count);
571             }
572 
573             static EditOp constructByType(EditOp previousOp)
574             {
575                 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1);
576             }
577         }
578 
579         alias insert = constructByType!(Type.insert);
580         alias remove = constructByType!(Type.remove);
581         alias keep = constructByType!(Type.keep);
582 
583         Type type;
584 
585         size_t count; // number of times this op is repeated on the best path
586     }
587 
588     private enum deletionIncrement = 1;
589     private enum insertionIncrement = 1;
590     private enum orphanCost = 2.5;
591     private enum modeChangeCost = 0.05;
592 
593     private alias Cell = Tuple!(float, "cost", EditOp, "op");
594     private Cell[] matrixData = null;
595     private size_t rows = 0;
596     private size_t cols = 0;
597 
598     invariant
599     {
600         assert(matrixData.length == rows * cols);
601     }
602 
603     // Treat _matrix as a rectangular array
604     private ref Cell matrix(size_t row, size_t col)
605     in
606     {
607         assert(row >= 0 && row < this.rows);
608         assert(col >= 0 && col < this.cols);
609     }
610     do
611     {
612         return this.matrixData[row * this.cols + col];
613     }
614 
615     private void initMatrix()
616     {
617         this.matrixData[] = Cell(0, EditOp.keep(0));
618 
619         foreach (r; 1 .. rows)
620         {
621             this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r));
622         }
623 
624         foreach (c; 1 .. cols)
625         {
626             this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c));
627         }
628     }
629 }