1 #!/usr/bin/env rdmd-dev
2 
3 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms.
4 
5     Copyright: Per Nordlöw 2014-.
6     License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
7     Authors: $(WEB Per Nordlöw)
8 
9     See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
10     See_Also: https://en.wikipedia.org/wiki/Diff
11 */
12 
13 module nxt.lcs;
14 
15 /** Longest Common Subsequence (LCS).
16     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version
17 */
18 T[] lcsR(T)(in T[] a,
19             in T[] b) pure nothrow
20 {
21     import std.range.primitives : empty;
22     if (a.empty || b.empty)
23     {
24         return null;            // undefined
25     }
26     if (a[0] == b[0])
27     {
28         return a[0] ~ lcsR(a[1 .. $],
29                            b[1 .. $]);
30     }
31     const longest = (T[] x, T[] y) => x.length > y.length ? x : y;
32     return longest(lcsR(a, b[1 .. $]),
33                    lcsR(a[1 .. $], b));
34 }
35 
36 /** Longest Common Subsequence (LCS).
37     Faster Dynamic Programming Version.
38     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version
39 */
40 T[] lcsDP(T)(in T[] a,
41              in T[] b) pure /* nothrow */
42 {
43     import std.algorithm.comparison : max;
44 
45     auto L = new uint[][](a.length + 1, b.length + 1);
46 
47     foreach (immutable i; 0 .. a.length)
48     {
49         foreach (immutable j; 0 .. b.length)
50         {
51             L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) :
52                 max(L[i + 1][j], L[i][j + 1]);
53         }
54     }
55 
56     import core.internal.traits : Unqual;
57     Unqual!T[] result;
58 
59     for (auto i = a.length, j = b.length; i > 0 && j > 0; )
60     {
61         if (a[i - 1] == b[j - 1])
62         {
63             result ~= a[i - 1];
64             i--;
65             j--;
66         }
67         else
68         {
69             if (L[i][j - 1] < L[i - 1][j])
70                 i--;
71             else
72                 j--;
73         }
74     }
75 
76     import std.algorithm : reverse;
77     result.reverse();           // not nothrow
78     return result;
79 }
80 
81 /** Get LCS Lengths. */
82 uint[] lcsLengths(R)(R xs,
83                      R ys) pure nothrow
84 {
85     import std.algorithm.comparison : max;
86 
87     auto prev = new typeof(return)(1 + ys.length);
88     auto curr = new typeof(return)(1 + ys.length);
89 
90     foreach (immutable x; xs)
91     {
92         import std.algorithm: swap;
93         swap(curr, prev);
94         size_t i = 0;
95         foreach (immutable y; ys)
96         {
97             curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]);
98             i++;
99         }
100     }
101 
102     return curr;
103 }
104 
105 void lcsDo(T)(in T[] xs,
106               in T[] ys,
107               bool[] xsInLCS,
108               in size_t idx = 0) pure nothrow
109 {
110     immutable nx = xs.length;
111     immutable ny = ys.length;
112 
113     if (nx == 0)
114         return;
115 
116     if (nx == 1)
117     {
118         import std.algorithm: canFind;
119         if (ys.canFind(xs[0]))
120             xsInLCS[idx] = true;
121     }
122     else
123     {
124         immutable mid = nx / 2;
125         const xb = xs[0.. mid];
126         const xe = xs[mid .. $];
127         immutable ll_b = lcsLengths(xb, ys);
128 
129         import std.range: retro;
130         const ll_e = lcsLengths(xe.retro,
131                                 ys.retro); // retro is slow with DMD
132 
133         // import std.algorithm.comparison : max;
134         //immutable k = iota(ny + 1)
135         //              .reduce!(max!(j => ll_b[j] + ll_e[ny - j]));
136         import std.range: iota;
137         import std.algorithm: minPos;
138         import std.typecons: tuple;
139         immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) >
140                                            tuple(ll_b[j] + ll_e[ny - j]))[0];
141 
142         lcsDo(xb, ys[0 .. k], xsInLCS, idx);
143         lcsDo(xe, ys[k .. $], xsInLCS, idx + mid);
144     }
145 }
146 
147 /** Longest Common Subsequence (LCS) using Hirschberg.
148     Linear-Space Faster Dynamic Programming Version.
149 
150     Time Complexity: O(m*n)
151     Space Complexity: O(min(m,n))
152 
153     To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and
154     do not use the $(D retro) range (replace it with $(D foreach_reverse))
155 
156     See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm
157     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version
158 */
159 const(T)[] lcs(T)(in T[] xs,
160                   in T[] ys) pure /*nothrow*/
161 {
162     auto xsInLCS = new bool[xs.length];
163     lcsDo(xs, ys, xsInLCS);
164     import std.range : zip;
165     import std.algorithm.iteration : filter, map;
166     import std.array : array;
167     return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow.
168 }
169 
170 string lcs(in string a,
171            in string b) pure /*nothrow*/
172 {
173     import std..string : representation, assumeUTF;
174     return lcs(a.representation,
175                b.representation).assumeUTF;
176 }
177 
178 unittest
179 {
180     immutable x = "thisisatest";
181     immutable y = "testing123testing";
182     immutable z = "tsitest";
183     assert(z == lcsR(x, y));
184     assert(z == lcsDP(x, y));
185     assert(z == lcs(x, y));
186     assert("" == lcs("", ""));
187     assert("" == lcs(null, null));
188 }
189 
190 unittest
191 {
192     immutable x = [1, 2, 3];
193     immutable y = [4, 5, 6];
194     immutable z = [];
195     assert(z == lcsR(x, y));
196     assert(z == lcsDP(x, y));
197     assert(z == lcs(x, y));
198 }
199 
200 unittest
201 {
202     immutable x = [1, 2, 3];
203     immutable y = [2, 3, 4];
204     immutable z = [2, 3];
205     assert(z == lcsR(x, y));
206     assert(z == lcsDP(x, y));
207     assert(z == lcs(x, y));
208 }
209 
210 unittest
211 {
212     alias T = int;
213     size_t n = 300;
214     auto x = new T[n];
215     auto y = new T[n];
216     assert(lcs(x, y).length == n);
217 }
218 
219 version(benchmark) unittest
220 {
221     import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath;
222     import nxt.random_ex: randInPlaceBlockwise;
223 
224     alias T = int;
225     size_t n = 300;
226     auto x = new T[n];
227     auto y = new T[n];
228 
229     x.randInPlaceBlockwise;
230     y.randInPlaceBlockwise;
231 
232     void bLCS() { const d = lcs(x, y); }
233     void bLD() { const d = levenshteinDistance(x, y); }
234     void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); }
235 
236     import std.datetime: benchmark;
237     import std.algorithm: map;
238     import std.array: array;
239 
240     auto r = benchmark!(bLCS, bLD, bLDAP)(1);
241     auto q = r.array.map!(a => a.msecs);
242 
243     import std.stdio: writeln;
244     writeln(q, " milliseconds");
245 }