1 #!/usr/bin/env rdmd-dev 2 3 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms. 4 5 Copyright: Per Nordlöw 2014-. 6 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 7 Authors: $(WEB Per Nordlöw) 8 9 See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem 10 See_Also: https://en.wikipedia.org/wiki/Diff 11 */ 12 13 module nxt.lcs; 14 15 /** Longest Common Subsequence (LCS). 16 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version 17 */ 18 T[] lcsR(T)(in T[] a, 19 in T[] b) pure nothrow 20 { 21 import std.range.primitives : empty; 22 if (a.empty || b.empty) 23 { 24 return null; // undefined 25 } 26 if (a[0] == b[0]) 27 { 28 return a[0] ~ lcsR(a[1 .. $], 29 b[1 .. $]); 30 } 31 const longest = (T[] x, T[] y) => x.length > y.length ? x : y; 32 return longest(lcsR(a, b[1 .. $]), 33 lcsR(a[1 .. $], b)); 34 } 35 36 /** Longest Common Subsequence (LCS). 37 Faster Dynamic Programming Version. 38 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version 39 */ 40 T[] lcsDP(T)(in T[] a, 41 in T[] b) pure /* nothrow */ 42 { 43 import std.algorithm.comparison : max; 44 45 auto L = new uint[][](a.length + 1, b.length + 1); 46 47 foreach (immutable i; 0 .. a.length) 48 { 49 foreach (immutable j; 0 .. b.length) 50 { 51 L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) : 52 max(L[i + 1][j], L[i][j + 1]); 53 } 54 } 55 56 import core.internal.traits : Unqual; 57 Unqual!T[] result; 58 59 for (auto i = a.length, j = b.length; i > 0 && j > 0; ) 60 { 61 if (a[i - 1] == b[j - 1]) 62 { 63 result ~= a[i - 1]; 64 i--; 65 j--; 66 } 67 else 68 { 69 if (L[i][j - 1] < L[i - 1][j]) 70 i--; 71 else 72 j--; 73 } 74 } 75 76 import std.algorithm : reverse; 77 result.reverse(); // not nothrow 78 return result; 79 } 80 81 /** Get LCS Lengths. */ 82 uint[] lcsLengths(R)(R xs, 83 R ys) pure nothrow 84 { 85 import std.algorithm.comparison : max; 86 87 auto prev = new typeof(return)(1 + ys.length); 88 auto curr = new typeof(return)(1 + ys.length); 89 90 foreach (immutable x; xs) 91 { 92 import std.algorithm: swap; 93 swap(curr, prev); 94 size_t i = 0; 95 foreach (immutable y; ys) 96 { 97 curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]); 98 i++; 99 } 100 } 101 102 return curr; 103 } 104 105 void lcsDo(T)(in T[] xs, 106 in T[] ys, 107 bool[] xsInLCS, 108 in size_t idx = 0) pure nothrow 109 { 110 immutable nx = xs.length; 111 immutable ny = ys.length; 112 113 if (nx == 0) 114 return; 115 116 if (nx == 1) 117 { 118 import std.algorithm: canFind; 119 if (ys.canFind(xs[0])) 120 xsInLCS[idx] = true; 121 } 122 else 123 { 124 immutable mid = nx / 2; 125 const xb = xs[0.. mid]; 126 const xe = xs[mid .. $]; 127 immutable ll_b = lcsLengths(xb, ys); 128 129 import std.range: retro; 130 const ll_e = lcsLengths(xe.retro, 131 ys.retro); // retro is slow with DMD 132 133 // import std.algorithm.comparison : max; 134 //immutable k = iota(ny + 1) 135 // .reduce!(max!(j => ll_b[j] + ll_e[ny - j])); 136 import std.range: iota; 137 import std.algorithm: minPos; 138 import std.typecons: tuple; 139 immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) > 140 tuple(ll_b[j] + ll_e[ny - j]))[0]; 141 142 lcsDo(xb, ys[0 .. k], xsInLCS, idx); 143 lcsDo(xe, ys[k .. $], xsInLCS, idx + mid); 144 } 145 } 146 147 /** Longest Common Subsequence (LCS) using Hirschberg. 148 Linear-Space Faster Dynamic Programming Version. 149 150 Time Complexity: O(m*n) 151 Space Complexity: O(min(m,n)) 152 153 To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and 154 do not use the $(D retro) range (replace it with $(D foreach_reverse)) 155 156 See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm 157 See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version 158 */ 159 const(T)[] lcs(T)(in T[] xs, 160 in T[] ys) pure /*nothrow*/ 161 { 162 auto xsInLCS = new bool[xs.length]; 163 lcsDo(xs, ys, xsInLCS); 164 import std.range : zip; 165 import std.algorithm.iteration : filter, map; 166 import std.array : array; 167 return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow. 168 } 169 170 string lcs(in string a, 171 in string b) pure /*nothrow*/ 172 { 173 import std..string : representation, assumeUTF; 174 return lcs(a.representation, 175 b.representation).assumeUTF; 176 } 177 178 unittest 179 { 180 immutable x = "thisisatest"; 181 immutable y = "testing123testing"; 182 immutable z = "tsitest"; 183 assert(z == lcsR(x, y)); 184 assert(z == lcsDP(x, y)); 185 assert(z == lcs(x, y)); 186 assert("" == lcs("", "")); 187 assert("" == lcs(null, null)); 188 } 189 190 unittest 191 { 192 immutable x = [1, 2, 3]; 193 immutable y = [4, 5, 6]; 194 immutable z = []; 195 assert(z == lcsR(x, y)); 196 assert(z == lcsDP(x, y)); 197 assert(z == lcs(x, y)); 198 } 199 200 unittest 201 { 202 immutable x = [1, 2, 3]; 203 immutable y = [2, 3, 4]; 204 immutable z = [2, 3]; 205 assert(z == lcsR(x, y)); 206 assert(z == lcsDP(x, y)); 207 assert(z == lcs(x, y)); 208 } 209 210 unittest 211 { 212 alias T = int; 213 size_t n = 300; 214 auto x = new T[n]; 215 auto y = new T[n]; 216 assert(lcs(x, y).length == n); 217 } 218 219 version(benchmark) unittest 220 { 221 import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath; 222 import nxt.random_ex: randInPlaceBlockwise; 223 224 alias T = int; 225 size_t n = 300; 226 auto x = new T[n]; 227 auto y = new T[n]; 228 229 x.randInPlaceBlockwise; 230 y.randInPlaceBlockwise; 231 232 void bLCS() { const d = lcs(x, y); } 233 void bLD() { const d = levenshteinDistance(x, y); } 234 void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); } 235 236 import std.datetime: benchmark; 237 import std.algorithm: map; 238 import std.array: array; 239 240 auto r = benchmark!(bLCS, bLD, bLDAP)(1); 241 auto q = r.array.map!(a => a.msecs); 242 243 import std.stdio: writeln; 244 writeln(q, " milliseconds"); 245 }