1 module dshould.stringcmp; 2 3 import std.algorithm : map; 4 import std.format : format; 5 import std.range; 6 import std.typecons; 7 import dshould.ShouldType; 8 import dshould.basic; 9 10 /** 11 * Given string types, this version of the word `.equal` implements color-coded diffs. 12 * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically. 13 */ 14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__) 15 if (isInstanceOf!(ShouldType, Should) && is(T == string)) 16 { 17 auto got = should.got(); 18 19 should.allowOnlyWords!().before!"equal (string)"; 20 should.terminateChain; 21 22 if (got != expected) 23 { 24 stringCmpError(got, expected, Yes.quote, file, line); 25 } 26 } 27 28 package void stringCmpError(string got, string expected, Flag!"quote" quote, string file, size_t line) 29 { 30 import std.algorithm : canFind; 31 32 string original; 33 string diff; 34 const bool multiline = got.canFind("\n"); 35 36 if (multiline) 37 { 38 auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n")); 39 40 original = diffPair.original.join("\n"); 41 diff = diffPair.target.join("\n"); 42 } 43 else 44 { 45 auto diffPair = oneLineDiff(expected, got); 46 47 original = diffPair.original; 48 diff = diffPair.target; 49 } 50 51 const string fmt = (multiline ? "\n" : "") ~ (quote ? "'%s'" : "%s"); 52 53 throw new FluentException( 54 format(fmt, original), 55 format(fmt, diff), 56 file, line 57 ); 58 } 59 60 private auto oneLineDiff(string expected, string text) @safe 61 { 62 alias removePred = text => red(text); 63 alias addPred = text => green(text); 64 alias keepPred = text => text; 65 alias ditchPred = lines => string.init; 66 67 auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text); 68 auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text); 69 70 alias DiffPair = Tuple!(string, "original", string, "target"); 71 72 if (originalColored.isNull) 73 { 74 return DiffPair(expected, text); 75 } 76 else 77 { 78 return DiffPair(originalColored.get, targetColored.get); 79 } 80 } 81 82 @("collates successive replacements") 83 unittest 84 { 85 const expectedOriginal = "Hello W" ~ red("or") ~ "ld"; 86 const expectedTarget = "Hello W" ~ green("ey") ~ "ld"; 87 const diff = "Hello World".oneLineDiff("Hello Weyld"); 88 89 (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget); 90 } 91 92 @("does not colorize diff view for strings that are too different") 93 unittest 94 { 95 const diff = "Hello World".oneLineDiff("Goodbye Universe"); 96 97 (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe"); 98 } 99 100 @("tries to not change diff mode too often") 101 unittest 102 { 103 const cleanDiff = `method="` ~ green(`multiply`) ~ `"`; 104 105 `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff); 106 } 107 108 unittest 109 { 110 const originalText = "test"; 111 const targetText = "test, but"; 112 113 with (originalText.oneLineDiff(targetText)) 114 { 115 (original ~ target).should.equal(originalText ~ originalText ~ green(", but")); 116 } 117 } 118 119 private auto multiLineDiff(string[] expected, string[] text) @safe 120 { 121 alias removePred = lines => lines.map!(line => red("-" ~ line)); 122 alias addPred = lines => lines.map!(line => green("+" ~ line)); 123 alias keepPred = lines => lines.map!(line => " " ~ line); 124 alias ditchPred = lines => (string[]).init; 125 126 auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text); 127 auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text); 128 129 alias DiffPair = Tuple!(string[], "original", string[], "target"); 130 131 if (originalColored.isNull) 132 { 133 return DiffPair(expected, text); 134 } 135 else 136 { 137 return DiffPair(originalColored.get, targetColored.get); 138 } 139 } 140 141 @("supports multiline diff") 142 unittest 143 { 144 import std..string : join, split; 145 146 // given 147 const originalText = `{ 148 "int": 1, 149 "array": ["XX"], 150 "timecodeBegin": "2003-02-01T12:00:00Z", 151 "timecodeEnd": "2003-02-01T14:00:00Z", 152 "enum": "ENUM", 153 "lastEntry": "goodbye" 154 }`; 155 const modifiedText = `{ 156 "int": 1, 157 "array": ["XX"], 158 "enum": "ENUM", 159 "somethingElse": "goes here", 160 "lastEntry": "goodbye" 161 }`; 162 163 // then 164 const patchTextOriginal = ` { 165 "int": 1, 166 "array": ["XX"], 167 ` ~ red(`- "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ ` 168 ` ~ red(`- "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ ` 169 "enum": "ENUM", 170 "lastEntry": "goodbye" 171 }`; 172 173 const patchTextTarget = ` { 174 "int": 1, 175 "array": ["XX"], 176 "enum": "ENUM", 177 ` ~ green(`+ "somethingElse": "goes here",`) ~ ` 178 "lastEntry": "goodbye" 179 }`; 180 181 const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n")); 182 183 (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget); 184 } 185 186 @("supports comparison of large strings") 187 unittest 188 { 189 import std..string : join, split; 190 191 // given 192 const repetitions = 500/"Hello World".length; 193 const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join; 194 const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join; 195 196 originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan 197 } 198 199 // TODO bracket crossing cost 200 private Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(T expected, T text) @trusted 201 { 202 import std.algorithm : count; 203 import std.array : Appender, empty; 204 import std.range : dropOne, front; 205 206 Appender!T diff; 207 diff.reserve(text.length); 208 209 // preds are called with continuous runs 210 Appender!(ElementType!T[]) addBuffer; 211 Appender!(ElementType!T[]) removeBuffer; 212 213 auto levenshtein = Levenshtein!T(expected, text); 214 const path = levenshtein.reconstructPath; 215 216 if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length) 217 { 218 return Nullable!T.init; // no diff view, too different 219 } 220 221 void flushAdd() 222 { 223 if (!addBuffer.data.empty) 224 { 225 diff ~= addPred(addBuffer.data); 226 addBuffer.clear; 227 } 228 } 229 230 void flushRemove() 231 { 232 if (!removeBuffer.data.empty) 233 { 234 diff ~= removePred(removeBuffer.data); 235 removeBuffer.clear; 236 } 237 } 238 239 void add(ElementType!T element) 240 { 241 flushRemove; 242 addBuffer ~= element; 243 } 244 245 void remove(ElementType!T element) 246 { 247 flushAdd; 248 removeBuffer ~= element; 249 } 250 251 void flush() 252 { 253 flushRemove; 254 flushAdd; 255 } 256 257 void same(ElementType!T element) 258 { 259 flush; 260 diff ~= keepPred([element]); 261 } 262 263 foreach (editOp; path) 264 { 265 final switch (editOp) 266 { 267 case levenshtein.EditOp.Type.keep: 268 same(text.front); 269 text = text.dropOne; 270 expected = expected.dropOne; 271 break; 272 case levenshtein.EditOp.Type.insert: 273 add(text.front); 274 text = text.dropOne; 275 break; 276 case levenshtein.EditOp.Type.remove: 277 remove(expected.front); 278 expected = expected.dropOne; 279 break; 280 } 281 } 282 283 assert(text.empty, format!`leftover %s`(text)); 284 assert(expected.empty); 285 286 flush; 287 288 return diff.data.nullable; 289 } 290 291 package auto red(T)(T text) 292 { 293 return RED_CODE ~ text ~ CLEAR_CODE; 294 } 295 296 package auto green(T)(T text) 297 { 298 return GREEN_CODE ~ text ~ CLEAR_CODE; 299 } 300 301 private enum RED_CODE = "\x1b[31m"; 302 private enum GREEN_CODE = "\x1b[32m"; 303 private enum CLEAR_CODE = "\x1b[39m"; 304 305 /** 306 * Modified Levenshtein distance from from std.algorithm 307 * Given two ranges, returns a sequence of insertions and deletions 308 * that turn the first range into the second. 309 * This is equivalent to diff computation, though it's 310 * comparatively inefficient at O(n^2) memory and runtime. 311 * 312 * This version adds customizable path cost, used to implement orphan avoidance. 313 */ 314 private struct Levenshtein(Range) 315 { 316 @disable this(); 317 318 public this(Range s, Range t) 319 { 320 import std.algorithm : min; 321 322 const slen = walkLength(s.save); 323 const tlen = walkLength(t.save); 324 325 this.rows = slen + 1; 326 this.cols = tlen + 1; 327 this.matrixData = new Cell[this.rows * this.cols]; 328 initMatrix; 329 330 foreach (i; 1 .. this.rows) 331 { 332 const sFront = s.front; 333 auto tCurrent = t.save; 334 335 foreach (j; 1 .. this.cols) 336 { 337 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement 338 + pathCost(EditOp.insert(1), i, j); 339 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement 340 + pathCost(EditOp.remove(1), i, j); 341 auto costNone = (sFront != tCurrent.front) 342 ? float.infinity 343 : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j)); 344 345 tCurrent.popFront(); 346 347 Cell cell; 348 349 if (costNone <= costDeletion) 350 { 351 if (costNone <= costInsertion) 352 { 353 cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op)); 354 } 355 else 356 { 357 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 358 } 359 } 360 else 361 { 362 if (costDeletion <= costInsertion) 363 { 364 cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op)); 365 } 366 else 367 { 368 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 369 } 370 } 371 372 matrix(i, j) = cell; 373 } 374 s.popFront(); 375 } 376 } 377 378 private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc 379 { 380 import std.algorithm : startsWith; 381 382 enum Path 383 { 384 insert, 385 remove, 386 } 387 388 struct PathRange(Path path) 389 { 390 Levenshtein* self; 391 392 size_t i; 393 size_t j; 394 395 EditOp op; 396 397 @property EditOp.Type front() const 398 in 399 { 400 final switch (path) 401 { 402 case Path.insert: 403 assert(op.type != EditOp.Type.remove); 404 break; 405 case Path.remove: 406 assert(op.type != EditOp.Type.insert); 407 break; 408 } 409 } 410 do 411 { 412 413 return this.op.type; 414 } 415 416 bool empty() 417 { 418 if (this.op.type == EditOp.Type.keep && this.op.count == 0) 419 { 420 assert(this.i == 0 && this.j == 0); 421 422 return true; 423 } 424 425 assert(this.op.count > 0); 426 427 return false; 428 } 429 430 // advance until front is again on the path 431 void advance() 432 { 433 while (true) 434 { 435 final switch (path) 436 { 437 case Path.insert: 438 if (this.op.type != EditOp.Type.remove) 439 { 440 return; 441 } 442 break; 443 case Path.remove: 444 if (this.op.type != EditOp.Type.insert) 445 { 446 return; 447 } 448 break; 449 } 450 451 this.self.skipByOp(this.op, this.i, this.j); 452 this.op = this.self.matrix(this.i, this.j).op; 453 } 454 } 455 456 void popFront() 457 { 458 this.self.moveByOp(this.op, this.i, this.j); 459 this.op = this.self.matrix(this.i, this.j).op; 460 461 advance; 462 } 463 } 464 465 static assert(isInputRange!(PathRange!(Path.insert))); 466 static assert(isInputRange!(PathRange!(Path.remove))); 467 468 auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp); 469 auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp); 470 471 // prepare for use 472 traceInsert.advance; 473 traceRemove.advance; 474 475 // significantly penalize orphaned "no change" lines 476 alias orphan = op => only(op, EditOp.Type.keep, op); 477 const orphanPathCost = 478 traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 + 479 traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0; 480 481 // slightly penalize mode changes, to avoid pathologies arising from distant matches 482 const pathChangesModeCost = 483 traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 + 484 traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0; 485 486 return orphanPathCost + pathChangesModeCost; 487 } 488 489 public EditOp.Type[] reconstructPath() 490 { 491 import std.algorithm.mutation : reverse; 492 493 EditOp.Type[] result; 494 size_t i = this.rows - 1; 495 size_t j = this.cols - 1; 496 497 while (i > 0 || j > 0) 498 { 499 const op = matrix(i, j).op; 500 501 assert(op.count > 0); 502 503 result ~= op.type.repeat(op.count).array; 504 505 skipByOp(op, i, j); 506 } 507 reverse(result); 508 return result; 509 } 510 511 private void moveByOp(EditOp op, ref size_t i, ref size_t j) 512 in 513 { 514 assert(i > 0 || j > 0); 515 assert(op.count > 0); 516 } 517 do 518 { 519 final switch (op.type) 520 { 521 case EditOp.Type.keep: 522 i--; 523 j--; 524 break; 525 case EditOp.Type.insert: 526 j--; 527 break; 528 case EditOp.Type.remove: 529 i--; 530 break; 531 } 532 } 533 534 private void skipByOp(EditOp op, ref size_t i, ref size_t j) 535 { 536 final switch (op.type) 537 { 538 case EditOp.Type.keep: 539 assert(i >= op.count && j >= op.count); 540 541 i -= op.count; 542 j -= op.count; 543 break; 544 case EditOp.Type.insert: 545 assert(j >= op.count); 546 547 j -= op.count; 548 break; 549 case EditOp.Type.remove: 550 assert(i >= op.count); 551 552 i -= op.count; 553 break; 554 } 555 } 556 557 struct EditOp 558 { 559 enum Type 560 { 561 insert, 562 remove, 563 keep, 564 } 565 566 template constructByType(Type type) 567 { 568 static EditOp constructByType(size_t count) 569 { 570 return EditOp(type, count); 571 } 572 573 static EditOp constructByType(EditOp previousOp) 574 { 575 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1); 576 } 577 } 578 579 alias insert = constructByType!(Type.insert); 580 alias remove = constructByType!(Type.remove); 581 alias keep = constructByType!(Type.keep); 582 583 Type type; 584 585 size_t count; // number of times this op is repeated on the best path 586 } 587 588 private enum deletionIncrement = 1; 589 private enum insertionIncrement = 1; 590 private enum orphanCost = 2.5; 591 private enum modeChangeCost = 0.05; 592 593 private alias Cell = Tuple!(float, "cost", EditOp, "op"); 594 private Cell[] matrixData = null; 595 private size_t rows = 0; 596 private size_t cols = 0; 597 598 invariant 599 { 600 assert(matrixData.length == rows * cols); 601 } 602 603 // Treat _matrix as a rectangular array 604 private ref Cell matrix(size_t row, size_t col) 605 in 606 { 607 assert(row >= 0 && row < this.rows); 608 assert(col >= 0 && col < this.cols); 609 } 610 do 611 { 612 return this.matrixData[row * this.cols + col]; 613 } 614 615 private void initMatrix() 616 { 617 this.matrixData[] = Cell(0, EditOp.keep(0)); 618 619 foreach (r; 1 .. rows) 620 { 621 this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r)); 622 } 623 624 foreach (c; 1 .. cols) 625 { 626 this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c)); 627 } 628 } 629 }