1 module dshould.stringcmp; 2 3 import std.algorithm : EditOp, map; 4 import std.format : format; 5 import std.range; 6 import std.typecons; 7 import dshould.ShouldType; 8 import dshould.basic; 9 10 /** 11 * Given string types, this version of the word `.equal` implements color-coded diffs. 12 * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically. 13 */ 14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__) 15 if (isInstanceOf!(ShouldType, Should) && is(T == string)) 16 { 17 should.allowOnlyWords!().before!"equal (string)"; 18 19 should.addWord!"equal".stringCmp(expected, file, line); 20 } 21 22 private void stringCmp(Should, T)(Should should, T expected, string file, size_t line) 23 if (isInstanceOf!(ShouldType, Should)) 24 { 25 import std.algorithm : canFind; 26 27 should.allowOnlyWords!("equal").before!"equal (string)"; 28 29 should.terminateChain; 30 31 auto got = should.got(); 32 33 if (got != expected) 34 { 35 string original; 36 string diff; 37 38 if (got.canFind("\n")) 39 { 40 auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n")); 41 42 original = diffPair.original.join("\n") ~ "\n"; 43 diff = "\n" ~ diffPair.target.join("\n"); 44 } 45 else 46 { 47 auto diffPair = oneLineDiff(expected, got); 48 49 original = diffPair.original; 50 diff = diffPair.target; 51 } 52 53 throw new FluentException( 54 "test failed", 55 format!`: expected '%s', but got '%s'`(original, diff), 56 file, line 57 ); 58 } 59 } 60 61 private auto oneLineDiff(string expected, string text) @safe 62 { 63 alias removePred = text => red(text); 64 alias addPred = text => green(text); 65 alias keepPred = text => text; 66 alias ditchPred = lines => string.init; 67 68 auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text); 69 auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text); 70 71 alias DiffPair = Tuple!(string, "original", string, "target"); 72 73 if (originalColored.isNull) 74 { 75 return DiffPair(expected, text); 76 } 77 else 78 { 79 return DiffPair(originalColored.get, targetColored.get); 80 } 81 } 82 83 @("collates successive replacements") 84 unittest 85 { 86 const expectedOriginal = "Hello W" ~ red("or") ~ "ld"; 87 const expectedTarget = "Hello W" ~ green("ey") ~ "ld"; 88 const diff = "Hello World".oneLineDiff("Hello Weyld"); 89 90 (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget); 91 } 92 93 @("does not colorize diff view for strings that are too different") 94 unittest 95 { 96 const diff = "Hello World".oneLineDiff("Goodbye Universe"); 97 98 (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe"); 99 } 100 101 @("tries to not change diff mode too often") 102 unittest 103 { 104 const cleanDiff = `method="` ~ GREEN_CODE ~ `multiply` ~ CLEAR_CODE ~ `"`; 105 106 `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff); 107 } 108 109 unittest 110 { 111 const originalText = "test"; 112 const targetText = "test, but"; 113 114 with (originalText.oneLineDiff(targetText)) 115 { 116 (original ~ target).should.equal(originalText ~ originalText ~ green(", but")); 117 } 118 } 119 120 private auto multiLineDiff(string[] expected, string[] text) @safe 121 { 122 alias removePred = lines => lines.map!(line => red("-" ~ line)); 123 alias addPred = lines => lines.map!(line => green("+" ~ line)); 124 alias keepPred = lines => lines.map!(line => " " ~ line); 125 alias ditchPred = lines => (string[]).init; 126 127 auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text); 128 auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text); 129 130 alias DiffPair = Tuple!(string[], "original", string[], "target"); 131 132 if (originalColored.isNull) 133 { 134 return DiffPair(expected, text); 135 } 136 else 137 { 138 return DiffPair(originalColored.get, targetColored.get); 139 } 140 } 141 142 @("supports multiline diff") 143 unittest 144 { 145 import std..string : join, split; 146 147 // given 148 const originalText = `{ 149 "int": 1, 150 "array": ["XX"], 151 "timecodeBegin": "2003-02-01T12:00:00Z", 152 "timecodeEnd": "2003-02-01T14:00:00Z", 153 "enum": "ENUM", 154 "lastEntry": "goodbye" 155 }`; 156 const modifiedText = `{ 157 "int": 1, 158 "array": ["XX"], 159 "enum": "ENUM", 160 "somethingElse": "goes here", 161 "lastEntry": "goodbye" 162 }`; 163 164 // then 165 const patchTextOriginal = ` { 166 "int": 1, 167 "array": ["XX"], 168 ` ~ red(`- "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ ` 169 ` ~ red(`- "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ ` 170 "enum": "ENUM", 171 "lastEntry": "goodbye" 172 }`; 173 174 const patchTextTarget = ` { 175 "int": 1, 176 "array": ["XX"], 177 "enum": "ENUM", 178 ` ~ green(`+ "somethingElse": "goes here",`) ~ ` 179 "lastEntry": "goodbye" 180 }`; 181 182 const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n")); 183 184 (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget); 185 } 186 187 // TODO bracket crossing cost 188 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted 189 { 190 import std.algorithm : count; 191 import std.array : Appender, empty; 192 import std.range : dropOne, front; 193 194 Appender!T diff; 195 diff.reserve(text.length); 196 197 // preds are called with continuous runs 198 Appender!(ElementType!T[]) addBuffer; 199 Appender!(ElementType!T[]) removeBuffer; 200 201 auto levenshtein = Levenshtein!T(expected, text); 202 const path = levenshtein.reconstructPath; 203 204 if (path.count!(a => a != EditOp.none) > text.length) 205 { 206 return Nullable!T.init; // no diff view, too different 207 } 208 209 void flushAdd() 210 { 211 if (!addBuffer.data.empty) 212 { 213 diff ~= addPred(addBuffer.data); 214 addBuffer.clear; 215 } 216 } 217 218 void flushRemove() 219 { 220 if (!removeBuffer.data.empty) 221 { 222 diff ~= removePred(removeBuffer.data); 223 removeBuffer.clear; 224 } 225 } 226 227 void add(ElementType!T element) 228 { 229 flushRemove; 230 addBuffer ~= element; 231 } 232 233 void remove(ElementType!T element) 234 { 235 flushAdd; 236 removeBuffer ~= element; 237 } 238 239 void flush() 240 { 241 flushRemove; 242 flushAdd; 243 } 244 245 void same(ElementType!T element) 246 { 247 flush; 248 diff ~= keepPred([element]); 249 } 250 251 foreach (editOp; path) 252 { 253 final switch (editOp) 254 { 255 case EditOp.none: 256 same(text.front); 257 text = text.dropOne; 258 expected = expected.dropOne; 259 break; 260 case EditOp.insert: 261 add(text.front); 262 text = text.dropOne; 263 break; 264 case EditOp.remove: 265 remove(expected.front); 266 expected = expected.dropOne; 267 break; 268 case EditOp.substitute: 269 assert(false); 270 } 271 } 272 273 assert(text.empty, format!`leftover %s`(text)); 274 assert(expected.empty); 275 276 flush; 277 278 return diff.data.nullable; 279 } 280 281 private auto red(T)(T text) 282 { 283 return RED_CODE ~ text ~ CLEAR_CODE; 284 } 285 286 private auto green(T)(T text) 287 { 288 return GREEN_CODE ~ text ~ CLEAR_CODE; 289 } 290 291 private enum RED_CODE = "\x1b[31m"; 292 private enum GREEN_CODE = "\x1b[32m"; 293 private enum CLEAR_CODE = "\x1b[39m"; 294 295 /** 296 * Modified Levenshtein distance from from std.algorithm 297 * Given two ranges, returns a sequence of insertions and deletions 298 * that turn the first range into the second. 299 * This is equivalent to diff computation, though it's 300 * comparatively inefficient at O(n^2) memory and runtime. 301 * 302 * This version adds customizable path cost, used to implement orphan avoidance. 303 */ 304 private struct Levenshtein(Range) 305 { 306 @disable this(); 307 308 public this(Range s, Range t) 309 { 310 import std.algorithm : min; 311 312 const slen = walkLength(s.save); 313 const tlen = walkLength(t.save); 314 315 this.rows = slen + 1; 316 this.cols = tlen + 1; 317 this.matrixData = new Cell[this.rows * this.cols]; 318 initMatrix; 319 320 foreach (i; 1 .. this.rows) 321 { 322 const sFront = s.front; 323 auto tCurrent = t.save; 324 325 foreach (j; 1 .. this.cols) 326 { 327 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement 328 + pathCost(EditOp.insert, i, j); 329 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement 330 + pathCost(EditOp.remove, i, j); 331 auto costNone = (sFront != tCurrent.front) 332 ? float.infinity 333 : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.none, i, j)); 334 335 tCurrent.popFront(); 336 337 Cell cell; 338 339 if (costNone <= costDeletion) 340 { 341 if (costNone <= costInsertion) 342 { 343 cell = Cell(costNone, EditOp.none); 344 } 345 else 346 { 347 cell = Cell(costInsertion, EditOp.insert); 348 } 349 } 350 else 351 { 352 if (costDeletion <= costInsertion) 353 { 354 cell = Cell(costDeletion, EditOp.remove); 355 } 356 else 357 { 358 cell = Cell(costInsertion, EditOp.insert); 359 } 360 } 361 362 matrix(i, j) = cell; 363 } 364 s.popFront(); 365 } 366 } 367 368 private float pathCost(EditOp proposedOp, size_t i, size_t j) 369 { 370 import std.algorithm : countUntil, endsWith, equal, filter; 371 372 alias step = (a, n) { 373 auto cell = a[n - 1]; 374 375 if (cell.i == 0 && cell.j == 0) 376 { 377 return cell; 378 } 379 380 moveByOp(cell.op, cell.i, cell.j); 381 382 return typeof(cell)(matrix(cell.i, cell.j).op, cell.i, cell.j); 383 }; 384 385 auto infiniteTrace = tuple!("op", "i", "j")(proposedOp, i, j).recurrence!step; 386 auto trace = infiniteTrace.takeExactly(infiniteTrace.countUntil!(a => a.i == 0 && a.j == 0)).map!(a => a.op); 387 auto traceInsert = trace.filter!(op => op == EditOp.insert || op == EditOp.none); 388 auto traceRemove = trace.filter!(op => op == EditOp.remove || op == EditOp.none); 389 390 // significantly penalize orphaned "no change" lines 391 alias orphan = op => only(op, EditOp.none, op); 392 const orphanPathCost = 393 traceInsert.take(3).equal(orphan(EditOp.insert)) ? orphanCost : 0 + 394 traceRemove.take(3).equal(orphan(EditOp.remove)) ? orphanCost : 0; 395 396 // slightly penalize mode changes, to avoid pathologies arising from distant matches 397 const pathChangesModeCost = 398 traceInsert.take(2).equal(only(EditOp.none, EditOp.insert)) ? modeChangeCost : 0 + 399 traceRemove.take(2).equal(only(EditOp.none, EditOp.remove)) ? modeChangeCost : 0; 400 401 return orphanPathCost + pathChangesModeCost; 402 } 403 404 public EditOp[] reconstructPath() 405 { 406 import std.algorithm.mutation : reverse; 407 408 EditOp[] result; 409 size_t i = this.rows - 1; 410 size_t j = this.cols - 1; 411 412 while (i > 0 || j > 0) 413 { 414 const op = matrix(i, j).op; 415 416 result ~= op; 417 moveByOp(op, i, j); 418 } 419 reverse(result); 420 return result; 421 } 422 423 private void moveByOp(EditOp op, ref size_t i, ref size_t j) 424 in 425 { 426 assert(i > 0 || j > 0); 427 } 428 do 429 { 430 final switch (op) 431 { 432 case EditOp.none: 433 i--; 434 j--; 435 break; 436 case EditOp.insert: 437 j--; 438 break; 439 case EditOp.remove: 440 i--; 441 break; 442 case EditOp.substitute: 443 assert(false); 444 } 445 } 446 447 private enum deletionIncrement = 1; 448 private enum insertionIncrement = 1; 449 private enum orphanCost = 2.5; 450 private enum modeChangeCost = 0.05; 451 452 private alias Cell = Tuple!(float, "cost", EditOp, "op"); 453 private Cell[] matrixData = null; 454 private size_t rows = 0; 455 private size_t cols = 0; 456 457 invariant 458 { 459 assert(matrixData.length == rows * cols); 460 } 461 462 // Treat _matrix as a rectangular array 463 private ref Cell matrix(size_t row, size_t col) 464 in 465 { 466 assert(row >= 0 && row < this.rows); 467 assert(col >= 0 && col < this.cols); 468 } 469 do 470 { 471 return this.matrixData[row * this.cols + col]; 472 } 473 474 private void initMatrix() 475 { 476 this.matrixData[] = Cell(0, EditOp.insert); 477 478 foreach (r; 0 .. rows) 479 { 480 this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove); 481 } 482 483 foreach (c; 0 .. cols) 484 { 485 this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert); 486 } 487 } 488 }