1 module dshould.stringcmp;
2 
3 import std.algorithm : EditOp, map;
4 import std.format : format;
5 import std.range;
6 import std.typecons;
7 import dshould.ShouldType;
8 import dshould.basic;
9 
10 void equal(Should, T)(Should should, T value, string file = __FILE__, size_t line = __LINE__)
11 if (isInstanceOf!(ShouldType, Should) && is(T == string))
12 {
13     should.allowOnlyWordsBefore!([], "equal (string)");
14 
15     should.addWord!"equal".addData!"rhs"(value).stringCmp(file, line);
16 }
17 
18 void stringCmp(Should)(Should should, string file, size_t line)
19 if (isInstanceOf!(ShouldType, Should))
20 {
21     should.allowOnlyWordsBefore!(["equal"], "");
22 
23     with (should)
24     {
25         terminateChain;
26         if (data.lhs() != data.rhs)
27         {
28             string original;
29             string diff;
30 
31             if (data.lhs().canFind("\n"))
32             {
33                 auto diffPair = multiLineDiff(data.rhs.split("\n"), data.lhs().split("\n"));
34 
35                 original = diffPair.original.join("\n") ~ "\n";
36                 diff = "\n" ~ diffPair.target.join("\n");
37             }
38             else
39             {
40                 auto diffPair = oneLineDiff(data.rhs, data.lhs());
41 
42                 original = diffPair.original;
43                 diff = diffPair.target;
44             }
45 
46             throw new FluentException(
47                 "test failed",
48                 format!`: expected '%s', but got '%s'`(original, diff),
49                 file, line
50             );
51         }
52     }
53 }
54 
55 @("collates successive replacements")
56 unittest
57 {
58     import unit_threaded.should;
59 
60     const expectedOriginal = "Hello W" ~ RED_CODE ~ "or" ~ CLEAR_CODE ~ "ld";
61     const expectedTarget = "Hello W" ~ GREEN_CODE ~ "ey" ~ CLEAR_CODE ~ "ld";
62     const diff = "Hello World".oneLineDiff("Hello Weyld");
63 
64     (diff.original ~ diff.target).shouldEqual(expectedOriginal ~ expectedTarget);
65 }
66 
67 @("does not colorize diff view for strings that are too different")
68 unittest
69 {
70     import unit_threaded.should;
71 
72     const diff = "Hello World".oneLineDiff("Goodbye Universe");
73     (diff.original ~ diff.target).shouldEqual("Hello WorldGoodbye Universe");
74 }
75 
76 @("tries to not change diff mode too often")
77 unittest
78 {
79     import unit_threaded.should;
80 
81     const cleanDiff = `method="` ~ GREEN_CODE ~ `multiply` ~ CLEAR_CODE ~ `"`;
82 
83     `method="update"`.oneLineDiff(`method="multiply"`).target.shouldEqual(cleanDiff);
84 }
85 
86 @("supports multiline diff")
87 unittest
88 {
89     import std..string : join, split;
90     import unit_threaded.should;
91 
92     // given
93     const originalText = `{
94         "id": 1,
95         "speakers": ["KK__LK02"],
96         "startTime": "2003-02-01T12:00:00Z",
97         "stopTime": "2003-02-01T14:00:00Z",
98         "route": "ROUTE",
99         "phraseId": 42
100     }`;
101     const modifiedText = `{
102         "id": 1,
103         "speakers": ["KK__LK02"],
104         "route": "ROUTE",
105         "somethingElse": "goes here",
106         "phraseId": 42
107     }`;
108 
109     // then
110     const MINUS = RED_CODE ~ `-`;
111     const PLUS = GREEN_CODE ~ `+`;
112 
113     const patchTextOriginal = ` {
114          "id": 1,
115          "speakers": ["KK__LK02"],
116 ` ~ MINUS ~ `        "startTime": "2003-02-01T12:00:00Z",` ~ CLEAR_CODE ~ `
117 ` ~ MINUS ~ `        "stopTime": "2003-02-01T14:00:00Z",` ~ CLEAR_CODE ~ `
118          "route": "ROUTE",
119          "phraseId": 42
120      }`;
121 
122     const patchTextTarget = ` {
123          "id": 1,
124          "speakers": ["KK__LK02"],
125          "route": "ROUTE",
126 ` ~ PLUS ~ `        "somethingElse": "goes here",` ~ CLEAR_CODE ~ `
127          "phraseId": 42
128      }`;
129 
130     const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n"));
131 
132     (diff.original.join("\n") ~ diff.target.join("\n")).shouldEqual(patchTextOriginal ~ patchTextTarget);
133 }
134 
135 auto oneLineDiff(string expected, string text) @safe
136 {
137     alias removePred = text => RED_CODE ~ text ~ CLEAR_CODE;
138     alias addPred = text => GREEN_CODE ~ text ~ CLEAR_CODE;
139     alias keepPred = text => text;
140     alias ditchPred = lines => string.init;
141 
142     auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text);
143     auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text);
144 
145     alias DiffPair = Tuple!(string, "original", string, "target");
146 
147     if (originalColored.isNull)
148     {
149         return DiffPair(expected, text);
150     }
151     else
152     {
153         return DiffPair(originalColored.get, targetColored.get);
154     }
155 }
156 
157 auto multiLineDiff(string[] expected, string[] text) @safe
158 {
159     alias removePred = lines => lines.map!(line => RED_CODE ~ "-" ~ line ~ CLEAR_CODE);
160     alias addPred = lines => lines.map!(line => GREEN_CODE ~ "+" ~ line ~ CLEAR_CODE);
161     alias keepPred = lines => lines.map!(line => " " ~ line);
162     alias ditchPred = lines => (string[]).init;
163 
164     auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text);
165     auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text);
166 
167     alias DiffPair = Tuple!(string[], "original", string[], "target");
168 
169     if (originalColored.isNull)
170     {
171         return DiffPair(expected, text);
172     }
173     else
174     {
175         return DiffPair(originalColored.get, targetColored.get);
176     }
177 }
178 
179 // TODO bracket crossing cost
180 Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted
181 {
182     import std.algorithm : count;
183     import std.array : Appender, empty;
184     import std.range : dropOne, front;
185 
186     Appender!T diff;
187     diff.reserve(text.length);
188 
189     // preds are called with continuous runs
190     Appender!(ElementType!T[]) addBuffer;
191     Appender!(ElementType!T[]) removeBuffer;
192 
193     auto levenshtein = Levenshtein!T(expected, text);
194     const path = levenshtein.reconstructPath;
195 
196     if (path.count!(a => a != EditOp.none) > text.length)
197     {
198         return Nullable!T.init; // no diff view, too different
199     }
200 
201     void flushAdd()
202     {
203         if (!addBuffer.data.empty)
204         {
205             diff ~= addPred(addBuffer.data);
206             addBuffer.clear;
207         }
208     }
209 
210     void flushRemove()
211     {
212         if (!removeBuffer.data.empty)
213         {
214             diff ~= removePred(removeBuffer.data);
215             removeBuffer.clear;
216         }
217     }
218 
219     void add(ElementType!T element)
220     {
221         flushRemove;
222         addBuffer ~= element;
223     }
224 
225     void remove(ElementType!T element)
226     {
227         flushAdd;
228         removeBuffer ~= element;
229     }
230 
231     void flush()
232     {
233         flushRemove;
234         flushAdd;
235     }
236 
237     void same(ElementType!T element)
238     {
239         flush;
240         diff ~= keepPred([element]);
241     }
242 
243     foreach (editOp; path)
244     {
245         final switch (editOp)
246         {
247             case EditOp.none:
248                 same(text.front);
249                 text = text.dropOne;
250                 expected = expected.dropOne;
251                 break;
252             case EditOp.insert:
253                 add(text.front);
254                 text = text.dropOne;
255                 break;
256             case EditOp.remove:
257                 remove(expected.front);
258                 expected = expected.dropOne;
259                 break;
260             case EditOp.substitute:
261                 assert(false);
262         }
263     }
264 
265     assert(text.empty, format!`leftover %s`(text));
266     assert(expected.empty);
267 
268     flush;
269 
270     return diff.data.nullable;
271 }
272 
273 enum RED_CODE = "\x1b[31m";
274 enum GREEN_CODE = "\x1b[32m";
275 enum CLEAR_CODE = "\x1b[39m";
276 
277 private struct Levenshtein(Range)
278 {
279     @disable this();
280 
281     public this(Range s, Range t)
282     {
283         import std.algorithm : min;
284 
285         const slen = walkLength(s.save);
286         const tlen = walkLength(t.save);
287 
288         this.rows = slen + 1;
289         this.cols = tlen + 1;
290         this.matrixData = new Cell[this.rows * this.cols];
291         initMatrix;
292 
293         foreach (i; 1 .. this.rows)
294         {
295             const sFront = s.front;
296             auto tCurrent = t.save;
297 
298             foreach (j; 1 .. this.cols)
299             {
300                 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement + pathCost(EditOp.insert, i, j);
301                 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement + pathCost(EditOp.remove, i, j);
302                 auto costNone = (sFront == tCurrent.front) ? this.matrix(i - 1, j - 1).cost : float.infinity;
303 
304                 tCurrent.popFront();
305 
306                 Cell cell;
307 
308                 if (costNone <= costDeletion)
309                 {
310                     if (costNone <= costInsertion)
311                     {
312                         cell = Cell(costNone, EditOp.none);
313                     }
314                     else
315                     {
316                         cell = Cell(costInsertion, EditOp.insert);
317                     }
318                 }
319                 else
320                 {
321                     if (costDeletion <= costInsertion)
322                     {
323                         cell = Cell(costDeletion, EditOp.remove);
324                     }
325                     else
326                     {
327                         cell = Cell(costInsertion, EditOp.insert);
328                     }
329                 }
330 
331                 matrix(i, j) = cell;
332             }
333             s.popFront();
334         }
335     }
336 
337     private void moveByOp(EditOp op, ref size_t i, ref size_t j)
338     in
339     {
340         assert(i > 0 || j > 0);
341     }
342     body
343     {
344         final switch (op)
345         {
346             case EditOp.none:
347                 i--;
348                 j--;
349                 break;
350             case EditOp.insert:
351                 j--;
352                 break;
353             case EditOp.remove:
354                 i--;
355                 break;
356             case EditOp.substitute:
357                 assert(false);
358         }
359     }
360 
361     private float pathCost(EditOp proposedOp, size_t i, size_t j)
362     {
363         import std.algorithm : countUntil, endsWith, equal, filter;
364 
365         alias step = (a, n) {
366             auto cell = a[n - 1];
367 
368             if (cell.i == 0 && cell.j == 0)
369             {
370                 return cell;
371             }
372 
373             moveByOp(cell.op, cell.i, cell.j);
374 
375             return typeof(cell)(matrix(cell.i, cell.j).op, cell.i, cell.j);
376         };
377 
378         auto infiniteTrace = tuple!("op", "i", "j")(proposedOp, i, j).recurrence!step;
379         auto trace = infiniteTrace.takeExactly(infiniteTrace.countUntil!(a => a.i == 0 && a.j == 0)).map!(a => a.op);
380         auto traceInsert = trace.filter!(op => op == EditOp.insert || op == EditOp.none);
381         auto traceRemove = trace.filter!(op => op == EditOp.remove || op == EditOp.none);
382 
383         return traceInsert.take(3).equal(only(EditOp.insert, EditOp.none, EditOp.insert)) ? orphanCost : 0
384             +  traceRemove.take(3).equal(only(EditOp.remove, EditOp.none, EditOp.remove)) ? orphanCost : 0;
385     }
386 
387     public EditOp[] reconstructPath()
388     {
389         import std.algorithm.mutation : reverse;
390 
391         EditOp[] result;
392         size_t i = this.rows - 1;
393         size_t j = this.cols - 1;
394 
395         while (i > 0 || j > 0)
396         {
397             const op = matrix(i, j).op;
398 
399             result ~= op;
400             moveByOp(op, i, j);
401         }
402         reverse(result);
403         return result;
404     }
405 
406     private enum deletionIncrement = 1;
407     private enum insertionIncrement = 1;
408     private enum orphanCost = 2.5;
409 
410     private alias Cell = Tuple!(float, "cost", EditOp, "op");
411     private Cell[] matrixData = null;
412     private size_t rows = 0;
413     private size_t cols = 0;
414 
415     invariant
416     {
417         assert(matrixData.length == rows * cols);
418     }
419 
420     // Treat _matrix as a rectangular array
421     private ref Cell matrix(size_t row, size_t col)
422     in
423     {
424         assert(row >= 0 && row < this.rows);
425         assert(col >= 0 && col < this.cols);
426     }
427     body
428     {
429         return this.matrixData[row * this.cols + col];
430     }
431 
432     private void initMatrix()
433     {
434         this.matrixData[] = Cell(0, EditOp.insert);
435 
436         foreach (r; 0 .. rows)
437         {
438             this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove);
439         }
440 
441         foreach (c; 0 .. cols)
442         {
443             this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert);
444         }
445     }
446 }