1 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms. 2 3 Copyright: Per Nordlöw 2014-. 4 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 5 Authors: $(WEB Per Nordlöw) 6 7 See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem 8 See_Also: https://en.wikipedia.org/wiki/Diff 9 */ 10 11 module nxt.lcs; 12 13 /** Longest Common Subsequence (LCS). 14 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version 15 */ 16 T[] lcsR(T)(in T[] a, 17 in T[] b) pure nothrow 18 { 19 import std.range.primitives : empty; 20 if (a.empty || b.empty) 21 { 22 return null; // undefined 23 } 24 if (a[0] == b[0]) 25 { 26 return a[0] ~ lcsR(a[1 .. $], 27 b[1 .. $]); 28 } 29 const longest = (T[] x, T[] y) => x.length > y.length ? x : y; 30 return longest(lcsR(a, b[1 .. $]), 31 lcsR(a[1 .. $], b)); 32 } 33 34 /** Longest Common Subsequence (LCS). 35 Faster Dynamic Programming Version. 36 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version 37 */ 38 T[] lcsDP(T)(in T[] a, 39 in T[] b) pure /* nothrow */ 40 { 41 import std.algorithm.comparison : max; 42 43 auto L = new uint[][](a.length + 1, b.length + 1); 44 45 foreach (immutable i; 0 .. a.length) 46 { 47 foreach (immutable j; 0 .. b.length) 48 { 49 L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) : 50 max(L[i + 1][j], L[i][j + 1]); 51 } 52 } 53 54 import core.internal.traits : Unqual; 55 Unqual!T[] result; 56 57 for (auto i = a.length, j = b.length; i > 0 && j > 0; ) 58 { 59 if (a[i - 1] == b[j - 1]) 60 { 61 result ~= a[i - 1]; 62 i--; 63 j--; 64 } 65 else 66 { 67 if (L[i][j - 1] < L[i - 1][j]) 68 i--; 69 else 70 j--; 71 } 72 } 73 74 import std.algorithm : reverse; 75 result.reverse(); // not nothrow 76 return result; 77 } 78 79 /** Get LCS Lengths. */ 80 uint[] lcsLengths(R)(R xs, 81 R ys) pure nothrow 82 { 83 import std.algorithm.comparison : max; 84 85 auto prev = new typeof(return)(1 + ys.length); 86 auto curr = new typeof(return)(1 + ys.length); 87 88 foreach (immutable x; xs) 89 { 90 import std.algorithm: swap; 91 swap(curr, prev); 92 size_t i = 0; 93 foreach (immutable y; ys) 94 { 95 curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]); 96 i++; 97 } 98 } 99 100 return curr; 101 } 102 103 void lcsDo(T)(in T[] xs, 104 in T[] ys, 105 bool[] xsInLCS, 106 in size_t idx = 0) pure nothrow 107 { 108 immutable nx = xs.length; 109 immutable ny = ys.length; 110 111 if (nx == 0) 112 return; 113 114 if (nx == 1) 115 { 116 import std.algorithm: canFind; 117 if (ys.canFind(xs[0])) 118 xsInLCS[idx] = true; 119 } 120 else 121 { 122 immutable mid = nx / 2; 123 const xb = xs[0.. mid]; 124 const xe = xs[mid .. $]; 125 immutable ll_b = lcsLengths(xb, ys); 126 127 import std.range: retro; 128 const ll_e = lcsLengths(xe.retro, 129 ys.retro); // retro is slow with DMD 130 131 // import std.algorithm.comparison : max; 132 //immutable k = iota(ny + 1) 133 // .reduce!(max!(j => ll_b[j] + ll_e[ny - j])); 134 import std.range: iota; 135 import std.algorithm: minPos; 136 import std.typecons: tuple; 137 immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) > 138 tuple(ll_b[j] + ll_e[ny - j]))[0]; 139 140 lcsDo(xb, ys[0 .. k], xsInLCS, idx); 141 lcsDo(xe, ys[k .. $], xsInLCS, idx + mid); 142 } 143 } 144 145 /** Longest Common Subsequence (LCS) using Hirschberg. 146 Linear-Space Faster Dynamic Programming Version. 147 148 Time Complexity: O(m*n) 149 Space Complexity: O(min(m,n)) 150 151 To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and 152 do not use the $(D retro) range (replace it with $(D foreach_reverse)) 153 154 See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm 155 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version 156 */ 157 const(T)[] lcs(T)(in T[] xs, 158 in T[] ys) pure /*nothrow*/ 159 { 160 auto xsInLCS = new bool[xs.length]; 161 lcsDo(xs, ys, xsInLCS); 162 import std.range : zip; 163 import std.algorithm.iteration : filter, map; 164 import std.array : array; 165 return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow. 166 } 167 168 string lcs(in string a, 169 in string b) pure /*nothrow*/ 170 { 171 import std.string : representation, assumeUTF; 172 return lcs(a.representation, 173 b.representation).assumeUTF; 174 } 175 176 unittest 177 { 178 immutable x = "thisisatest"; 179 immutable y = "testing123testing"; 180 immutable z = "tsitest"; 181 assert(z == lcsR(x, y)); 182 assert(z == lcsDP(x, y)); 183 assert(z == lcs(x, y)); 184 assert("" == lcs("", "")); 185 assert("" == lcs(null, null)); 186 } 187 188 unittest 189 { 190 immutable x = [1, 2, 3]; 191 immutable y = [4, 5, 6]; 192 immutable z = []; 193 assert(z == lcsR(x, y)); 194 assert(z == lcsDP(x, y)); 195 assert(z == lcs(x, y)); 196 } 197 198 unittest 199 { 200 immutable x = [1, 2, 3]; 201 immutable y = [2, 3, 4]; 202 immutable z = [2, 3]; 203 assert(z == lcsR(x, y)); 204 assert(z == lcsDP(x, y)); 205 assert(z == lcs(x, y)); 206 } 207 208 unittest 209 { 210 alias T = int; 211 size_t n = 300; 212 auto x = new T[n]; 213 auto y = new T[n]; 214 assert(lcs(x, y).length == n); 215 } 216 217 version(benchmark) unittest 218 { 219 import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath; 220 import nxt.random_ex: randInPlaceBlockwise; 221 222 alias T = int; 223 size_t n = 300; 224 auto x = new T[n]; 225 auto y = new T[n]; 226 227 x.randInPlaceBlockwise; 228 y.randInPlaceBlockwise; 229 230 void bLCS() { const d = lcs(x, y); } 231 void bLD() { const d = levenshteinDistance(x, y); } 232 void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); } 233 234 import std.datetime: benchmark; 235 import std.algorithm: map; 236 import std.array: array; 237 238 auto r = benchmark!(bLCS, bLD, bLDAP)(1); 239 auto q = r.array.map!(a => a.msecs); 240 241 import std.stdio: writeln; 242 writeln(q, " milliseconds"); 243 }