1 module dshould.stringcmp; 2 3 import std.algorithm : map; 4 import std.format : format; 5 import std.range; 6 import std.typecons; 7 import dshould.ShouldType; 8 import dshould.basic; 9 10 /** 11 * Given string types, this version of the word `.equal` implements color-coded diffs. 12 * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically. 13 */ 14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__) 15 if (isInstanceOf!(ShouldType, Should) && is(T == string)) 16 { 17 should.allowOnlyWords!().before!"equal (string)"; 18 19 should.addWord!"equal".stringCmp(expected, file, line); 20 } 21 22 private void stringCmp(Should, T)(Should should, T expected, string file, size_t line) 23 if (isInstanceOf!(ShouldType, Should)) 24 { 25 import std.algorithm : canFind; 26 27 should.allowOnlyWords!("equal").before!"equal (string)"; 28 29 should.terminateChain; 30 31 auto got = should.got(); 32 33 if (got != expected) 34 { 35 string original; 36 string diff; 37 38 if (got.canFind("\n")) 39 { 40 auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n")); 41 42 original = diffPair.original.join("\n") ~ "\n"; 43 diff = "\n" ~ diffPair.target.join("\n"); 44 } 45 else 46 { 47 auto diffPair = oneLineDiff(expected, got); 48 49 original = diffPair.original; 50 diff = diffPair.target; 51 } 52 53 throw new FluentException( 54 format!`'%s'`(original), 55 format!`'%s'`(diff), 56 file, line 57 ); 58 } 59 } 60 61 private auto oneLineDiff(string expected, string text) @safe 62 { 63 alias removePred = text => red(text); 64 alias addPred = text => green(text); 65 alias keepPred = text => text; 66 alias ditchPred = lines => string.init; 67 68 auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text); 69 auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text); 70 71 alias DiffPair = Tuple!(string, "original", string, "target"); 72 73 if (originalColored.isNull) 74 { 75 return DiffPair(expected, text); 76 } 77 else 78 { 79 return DiffPair(originalColored.get, targetColored.get); 80 } 81 } 82 83 @("collates successive replacements") 84 unittest 85 { 86 const expectedOriginal = "Hello W" ~ red("or") ~ "ld"; 87 const expectedTarget = "Hello W" ~ green("ey") ~ "ld"; 88 const diff = "Hello World".oneLineDiff("Hello Weyld"); 89 90 (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget); 91 } 92 93 @("does not colorize diff view for strings that are too different") 94 unittest 95 { 96 const diff = "Hello World".oneLineDiff("Goodbye Universe"); 97 98 (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe"); 99 } 100 101 @("tries to not change diff mode too often") 102 unittest 103 { 104 const cleanDiff = `method="` ~ green(`multiply`) ~ `"`; 105 106 `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff); 107 } 108 109 unittest 110 { 111 const originalText = "test"; 112 const targetText = "test, but"; 113 114 with (originalText.oneLineDiff(targetText)) 115 { 116 (original ~ target).should.equal(originalText ~ originalText ~ green(", but")); 117 } 118 } 119 120 private auto multiLineDiff(string[] expected, string[] text) @safe 121 { 122 alias removePred = lines => lines.map!(line => red("-" ~ line)); 123 alias addPred = lines => lines.map!(line => green("+" ~ line)); 124 alias keepPred = lines => lines.map!(line => " " ~ line); 125 alias ditchPred = lines => (string[]).init; 126 127 auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text); 128 auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text); 129 130 alias DiffPair = Tuple!(string[], "original", string[], "target"); 131 132 if (originalColored.isNull) 133 { 134 return DiffPair(expected, text); 135 } 136 else 137 { 138 return DiffPair(originalColored.get, targetColored.get); 139 } 140 } 141 142 @("supports multiline diff") 143 unittest 144 { 145 import std..string : join, split; 146 147 // given 148 const originalText = `{ 149 "int": 1, 150 "array": ["XX"], 151 "timecodeBegin": "2003-02-01T12:00:00Z", 152 "timecodeEnd": "2003-02-01T14:00:00Z", 153 "enum": "ENUM", 154 "lastEntry": "goodbye" 155 }`; 156 const modifiedText = `{ 157 "int": 1, 158 "array": ["XX"], 159 "enum": "ENUM", 160 "somethingElse": "goes here", 161 "lastEntry": "goodbye" 162 }`; 163 164 // then 165 const patchTextOriginal = ` { 166 "int": 1, 167 "array": ["XX"], 168 ` ~ red(`- "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ ` 169 ` ~ red(`- "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ ` 170 "enum": "ENUM", 171 "lastEntry": "goodbye" 172 }`; 173 174 const patchTextTarget = ` { 175 "int": 1, 176 "array": ["XX"], 177 "enum": "ENUM", 178 ` ~ green(`+ "somethingElse": "goes here",`) ~ ` 179 "lastEntry": "goodbye" 180 }`; 181 182 const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n")); 183 184 (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget); 185 } 186 187 @("supports comparison of large strings") 188 unittest 189 { 190 import std..string : join, split; 191 192 // given 193 const repetitions = 500/"Hello World".length; 194 const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join; 195 const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join; 196 197 originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan 198 } 199 200 // TODO bracket crossing cost 201 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted 202 { 203 import std.algorithm : count; 204 import std.array : Appender, empty; 205 import std.range : dropOne, front; 206 207 Appender!T diff; 208 diff.reserve(text.length); 209 210 // preds are called with continuous runs 211 Appender!(ElementType!T[]) addBuffer; 212 Appender!(ElementType!T[]) removeBuffer; 213 214 auto levenshtein = Levenshtein!T(expected, text); 215 const path = levenshtein.reconstructPath; 216 217 if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length) 218 { 219 return Nullable!T.init; // no diff view, too different 220 } 221 222 void flushAdd() 223 { 224 if (!addBuffer.data.empty) 225 { 226 diff ~= addPred(addBuffer.data); 227 addBuffer.clear; 228 } 229 } 230 231 void flushRemove() 232 { 233 if (!removeBuffer.data.empty) 234 { 235 diff ~= removePred(removeBuffer.data); 236 removeBuffer.clear; 237 } 238 } 239 240 void add(ElementType!T element) 241 { 242 flushRemove; 243 addBuffer ~= element; 244 } 245 246 void remove(ElementType!T element) 247 { 248 flushAdd; 249 removeBuffer ~= element; 250 } 251 252 void flush() 253 { 254 flushRemove; 255 flushAdd; 256 } 257 258 void same(ElementType!T element) 259 { 260 flush; 261 diff ~= keepPred([element]); 262 } 263 264 foreach (editOp; path) 265 { 266 final switch (editOp) 267 { 268 case levenshtein.EditOp.Type.keep: 269 same(text.front); 270 text = text.dropOne; 271 expected = expected.dropOne; 272 break; 273 case levenshtein.EditOp.Type.insert: 274 add(text.front); 275 text = text.dropOne; 276 break; 277 case levenshtein.EditOp.Type.remove: 278 remove(expected.front); 279 expected = expected.dropOne; 280 break; 281 } 282 } 283 284 assert(text.empty, format!`leftover %s`(text)); 285 assert(expected.empty); 286 287 flush; 288 289 return diff.data.nullable; 290 } 291 292 package auto red(T)(T text) 293 { 294 return RED_CODE ~ text ~ CLEAR_CODE; 295 } 296 297 package auto green(T)(T text) 298 { 299 return GREEN_CODE ~ text ~ CLEAR_CODE; 300 } 301 302 private enum RED_CODE = "\x1b[31m"; 303 private enum GREEN_CODE = "\x1b[32m"; 304 private enum CLEAR_CODE = "\x1b[39m"; 305 306 /** 307 * Modified Levenshtein distance from from std.algorithm 308 * Given two ranges, returns a sequence of insertions and deletions 309 * that turn the first range into the second. 310 * This is equivalent to diff computation, though it's 311 * comparatively inefficient at O(n^2) memory and runtime. 312 * 313 * This version adds customizable path cost, used to implement orphan avoidance. 314 */ 315 private struct Levenshtein(Range) 316 { 317 @disable this(); 318 319 public this(Range s, Range t) 320 { 321 import std.algorithm : min; 322 323 const slen = walkLength(s.save); 324 const tlen = walkLength(t.save); 325 326 this.rows = slen + 1; 327 this.cols = tlen + 1; 328 this.matrixData = new Cell[this.rows * this.cols]; 329 initMatrix; 330 331 foreach (i; 1 .. this.rows) 332 { 333 const sFront = s.front; 334 auto tCurrent = t.save; 335 336 foreach (j; 1 .. this.cols) 337 { 338 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement 339 + pathCost(EditOp.insert(1), i, j); 340 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement 341 + pathCost(EditOp.remove(1), i, j); 342 auto costNone = (sFront != tCurrent.front) 343 ? float.infinity 344 : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j)); 345 346 tCurrent.popFront(); 347 348 Cell cell; 349 350 if (costNone <= costDeletion) 351 { 352 if (costNone <= costInsertion) 353 { 354 cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op)); 355 } 356 else 357 { 358 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 359 } 360 } 361 else 362 { 363 if (costDeletion <= costInsertion) 364 { 365 cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op)); 366 } 367 else 368 { 369 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 370 } 371 } 372 373 matrix(i, j) = cell; 374 } 375 s.popFront(); 376 } 377 } 378 379 private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc 380 { 381 import std.algorithm : startsWith; 382 383 enum Path 384 { 385 insert, 386 remove, 387 } 388 389 struct PathRange(Path path) 390 { 391 Levenshtein* self; 392 393 size_t i; 394 size_t j; 395 396 EditOp op; 397 398 @property EditOp.Type front() const 399 in 400 { 401 final switch (path) 402 { 403 case Path.insert: 404 assert(op.type != EditOp.Type.remove); 405 break; 406 case Path.remove: 407 assert(op.type != EditOp.Type.insert); 408 break; 409 } 410 } 411 do 412 { 413 414 return this.op.type; 415 } 416 417 bool empty() 418 { 419 if (this.op.type == EditOp.Type.keep && this.op.count == 0) 420 { 421 assert(this.i == 0 && this.j == 0); 422 423 return true; 424 } 425 426 assert(this.op.count > 0); 427 428 return false; 429 } 430 431 // advance until front is again on the path 432 void advance() 433 { 434 while (true) 435 { 436 final switch (path) 437 { 438 case Path.insert: 439 if (this.op.type != EditOp.Type.remove) 440 { 441 return; 442 } 443 break; 444 case Path.remove: 445 if (this.op.type != EditOp.Type.insert) 446 { 447 return; 448 } 449 break; 450 } 451 452 this.self.skipByOp(this.op, this.i, this.j); 453 this.op = this.self.matrix(this.i, this.j).op; 454 } 455 } 456 457 void popFront() 458 { 459 this.self.moveByOp(this.op, this.i, this.j); 460 this.op = this.self.matrix(this.i, this.j).op; 461 462 advance; 463 } 464 } 465 466 static assert(isInputRange!(PathRange!(Path.insert))); 467 static assert(isInputRange!(PathRange!(Path.remove))); 468 469 auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp); 470 auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp); 471 472 // prepare for use 473 traceInsert.advance; 474 traceRemove.advance; 475 476 // significantly penalize orphaned "no change" lines 477 alias orphan = op => only(op, EditOp.Type.keep, op); 478 const orphanPathCost = 479 traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 + 480 traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0; 481 482 // slightly penalize mode changes, to avoid pathologies arising from distant matches 483 const pathChangesModeCost = 484 traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 + 485 traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0; 486 487 return orphanPathCost + pathChangesModeCost; 488 } 489 490 public EditOp.Type[] reconstructPath() 491 { 492 import std.algorithm.mutation : reverse; 493 494 EditOp.Type[] result; 495 size_t i = this.rows - 1; 496 size_t j = this.cols - 1; 497 498 while (i > 0 || j > 0) 499 { 500 const op = matrix(i, j).op; 501 502 assert(op.count > 0); 503 504 result ~= op.type.repeat(op.count).array; 505 506 skipByOp(op, i, j); 507 } 508 reverse(result); 509 return result; 510 } 511 512 private void moveByOp(EditOp op, ref size_t i, ref size_t j) 513 in 514 { 515 assert(i > 0 || j > 0); 516 assert(op.count > 0); 517 } 518 do 519 { 520 final switch (op.type) 521 { 522 case EditOp.Type.keep: 523 i--; 524 j--; 525 break; 526 case EditOp.Type.insert: 527 j--; 528 break; 529 case EditOp.Type.remove: 530 i--; 531 break; 532 } 533 } 534 535 private void skipByOp(EditOp op, ref size_t i, ref size_t j) 536 { 537 final switch (op.type) 538 { 539 case EditOp.Type.keep: 540 assert(i >= op.count && j >= op.count); 541 542 i -= op.count; 543 j -= op.count; 544 break; 545 case EditOp.Type.insert: 546 assert(j >= op.count); 547 548 j -= op.count; 549 break; 550 case EditOp.Type.remove: 551 assert(i >= op.count); 552 553 i -= op.count; 554 break; 555 } 556 } 557 558 struct EditOp 559 { 560 enum Type 561 { 562 insert, 563 remove, 564 keep, 565 } 566 567 template constructByType(Type type) 568 { 569 static EditOp constructByType(size_t count) 570 { 571 return EditOp(type, count); 572 } 573 574 static EditOp constructByType(EditOp previousOp) 575 { 576 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1); 577 } 578 } 579 580 alias insert = constructByType!(Type.insert); 581 alias remove = constructByType!(Type.remove); 582 alias keep = constructByType!(Type.keep); 583 584 Type type; 585 586 size_t count; // number of times this op is repeated on the best path 587 } 588 589 private enum deletionIncrement = 1; 590 private enum insertionIncrement = 1; 591 private enum orphanCost = 2.5; 592 private enum modeChangeCost = 0.05; 593 594 private alias Cell = Tuple!(float, "cost", EditOp, "op"); 595 private Cell[] matrixData = null; 596 private size_t rows = 0; 597 private size_t cols = 0; 598 599 invariant 600 { 601 assert(matrixData.length == rows * cols); 602 } 603 604 // Treat _matrix as a rectangular array 605 private ref Cell matrix(size_t row, size_t col) 606 in 607 { 608 assert(row >= 0 && row < this.rows); 609 assert(col >= 0 && col < this.cols); 610 } 611 do 612 { 613 return this.matrixData[row * this.cols + col]; 614 } 615 616 private void initMatrix() 617 { 618 this.matrixData[] = Cell(0, EditOp.keep(0)); 619 620 foreach (r; 1 .. rows) 621 { 622 this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r)); 623 } 624 625 foreach (c; 1 .. cols) 626 { 627 this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c)); 628 } 629 } 630 }