1 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms. 2 3 Copyright: Per Nordlöw 2014-. 4 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 5 Authors: $(WEB Per Nordlöw) 6 7 See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem 8 See_Also: https://en.wikipedia.org/wiki/Diff 9 */ 10 11 module nxt.lcs; 12 13 /** Longest Common Subsequence (LCS). 14 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version 15 */ 16 T[] lcsR(T)(in T[] a, 17 in T[] b) pure nothrow 18 { 19 import std.range.primitives : empty; 20 if (a.empty || b.empty) 21 return null; // undefined 22 if (a[0] == b[0]) 23 return a[0] ~ lcsR(a[1 .. $], 24 b[1 .. $]); 25 const longest = (T[] x, T[] y) => x.length > y.length ? x : y; 26 return longest(lcsR(a, b[1 .. $]), 27 lcsR(a[1 .. $], b)); 28 } 29 30 /** Longest Common Subsequence (LCS). 31 Faster Dynamic Programming Version. 32 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version 33 */ 34 T[] lcsDP(T)(in T[] a, 35 in T[] b) pure /* nothrow */ 36 { 37 import std.algorithm.comparison : max; 38 39 auto L = new uint[][](a.length + 1, b.length + 1); 40 41 foreach (immutable i; 0 .. a.length) 42 foreach (immutable j; 0 .. b.length) 43 L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) : max(L[i + 1][j], L[i][j + 1]); 44 45 import core.internal.traits : Unqual; 46 Unqual!T[] result; 47 48 for (auto i = a.length, j = b.length; i > 0 && j > 0; ) 49 { 50 if (a[i - 1] == b[j - 1]) 51 { 52 result ~= a[i - 1]; 53 i--; 54 j--; 55 } 56 else 57 { 58 if (L[i][j - 1] < L[i - 1][j]) 59 i--; 60 else 61 j--; 62 } 63 } 64 65 import std.algorithm : reverse; 66 result.reverse(); // not nothrow 67 return result; 68 } 69 70 /** Get LCS Lengths. */ 71 uint[] lcsLengths(R)(R xs, 72 R ys) pure nothrow 73 { 74 import std.algorithm.comparison : max; 75 76 auto prev = new typeof(return)(1 + ys.length); 77 auto curr = new typeof(return)(1 + ys.length); 78 79 foreach (immutable x; xs) 80 { 81 import std.algorithm: swap; 82 swap(curr, prev); 83 size_t i = 0; 84 foreach (immutable y; ys) 85 { 86 curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]); 87 i++; 88 } 89 } 90 91 return curr; 92 } 93 94 void lcsDo(T)(in T[] xs, 95 in T[] ys, 96 bool[] xsInLCS, 97 in size_t idx = 0) pure nothrow 98 { 99 immutable nx = xs.length; 100 immutable ny = ys.length; 101 102 if (nx == 0) 103 return; 104 105 if (nx == 1) 106 { 107 import std.algorithm: canFind; 108 if (ys.canFind(xs[0])) 109 xsInLCS[idx] = true; 110 } 111 else 112 { 113 immutable mid = nx / 2; 114 const xb = xs[0.. mid]; 115 const xe = xs[mid .. $]; 116 immutable ll_b = lcsLengths(xb, ys); 117 118 import std.range: retro; 119 const ll_e = lcsLengths(xe.retro, 120 ys.retro); // retro is slow with DMD 121 122 // import std.algorithm.comparison : max; 123 //immutable k = iota(ny + 1) 124 // .reduce!(max!(j => ll_b[j] + ll_e[ny - j])); 125 import std.range: iota; 126 import std.algorithm: minPos; 127 import std.typecons: tuple; 128 immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) > 129 tuple(ll_b[j] + ll_e[ny - j]))[0]; 130 131 lcsDo(xb, ys[0 .. k], xsInLCS, idx); 132 lcsDo(xe, ys[k .. $], xsInLCS, idx + mid); 133 } 134 } 135 136 /** Longest Common Subsequence (LCS) using Hirschberg. 137 Linear-Space Faster Dynamic Programming Version. 138 139 Time Complexity: O(m*n) 140 Space Complexity: O(min(m,n)) 141 142 To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and 143 do not use the $(D retro) range (replace it with $(D foreach_reverse)) 144 145 See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm 146 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version 147 */ 148 const(T)[] lcs(T)(in T[] xs, 149 in T[] ys) pure /*nothrow*/ 150 { 151 auto xsInLCS = new bool[xs.length]; 152 lcsDo(xs, ys, xsInLCS); 153 import std.range : zip; 154 import std.algorithm.iteration : filter, map; 155 import std.array : array; 156 return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow. 157 } 158 159 import std.string : representation, assumeUTF; 160 161 immutable(char)[] lcs(inout(char)[] a, inout(char)[] b) pure /*nothrow*/ 162 => cast(typeof(return))(lcs(a.representation, b.representation).assumeUTF); 163 164 unittest { 165 immutable x = "thisisatest"; 166 immutable y = "testing123testing"; 167 immutable z = "tsitest"; 168 assert(z == lcsR(x, y)); 169 assert(z == lcsDP(x, y)); 170 assert(z == lcs(x, y)); 171 assert("" == lcs("", "")); 172 } 173 174 unittest { 175 immutable x = [1, 2, 3]; 176 immutable y = [4, 5, 6]; 177 immutable z = []; 178 assert(z == lcsR(x, y)); 179 assert(z == lcsDP(x, y)); 180 assert(z == lcs(x, y)); 181 } 182 183 unittest { 184 immutable x = [1, 2, 3]; 185 immutable y = [2, 3, 4]; 186 immutable z = [2, 3]; 187 assert(z == lcsR(x, y)); 188 assert(z == lcsDP(x, y)); 189 assert(z == lcs(x, y)); 190 } 191 192 unittest { 193 alias T = int; 194 size_t n = 300; 195 auto x = new T[n]; 196 auto y = new T[n]; 197 assert(lcs(x, y).length == n); 198 } 199 200 version (nxt_benchmark) unittest { 201 import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath; 202 import nxt.random_ex: randInPlaceBlockwise; 203 204 alias T = int; 205 size_t n = 300; 206 auto x = new T[n]; 207 auto y = new T[n]; 208 209 x.randInPlaceBlockwise; 210 y.randInPlaceBlockwise; 211 212 void bLCS() { const d = lcs(x, y); } 213 void bLD() { const d = levenshteinDistance(x, y); } 214 void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); } 215 216 import std.datetime: benchmark; 217 import std.algorithm: map; 218 import std.array: array; 219 220 auto r = benchmark!(bLCS, bLD, bLDAP)(1); 221 auto q = r.array.map!(a => a.msecs); 222 223 import std.stdio: writeln; 224 writeln(q, " milliseconds"); 225 }