1 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms.
2 
3     Copyright: Per Nordlöw 2014-.
4     License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
5     Authors: $(WEB Per Nordlöw)
6 
7     See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
8     See_Also: https://en.wikipedia.org/wiki/Diff
9 */
10 
11 module nxt.lcs;
12 
13 /** Longest Common Subsequence (LCS).
14     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version
15 */
16 T[] lcsR(T)(in T[] a,
17             in T[] b) pure nothrow
18 {
19     import std.range.primitives : empty;
20     if (a.empty || b.empty)
21     {
22         return null;            // undefined
23     }
24     if (a[0] == b[0])
25     {
26         return a[0] ~ lcsR(a[1 .. $],
27                            b[1 .. $]);
28     }
29     const longest = (T[] x, T[] y) => x.length > y.length ? x : y;
30     return longest(lcsR(a, b[1 .. $]),
31                    lcsR(a[1 .. $], b));
32 }
33 
34 /** Longest Common Subsequence (LCS).
35     Faster Dynamic Programming Version.
36     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version
37 */
38 T[] lcsDP(T)(in T[] a,
39              in T[] b) pure /* nothrow */
40 {
41     import std.algorithm.comparison : max;
42 
43     auto L = new uint[][](a.length + 1, b.length + 1);
44 
45     foreach (immutable i; 0 .. a.length)
46     {
47         foreach (immutable j; 0 .. b.length)
48         {
49             L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) :
50                 max(L[i + 1][j], L[i][j + 1]);
51         }
52     }
53 
54     import core.internal.traits : Unqual;
55     Unqual!T[] result;
56 
57     for (auto i = a.length, j = b.length; i > 0 && j > 0; )
58     {
59         if (a[i - 1] == b[j - 1])
60         {
61             result ~= a[i - 1];
62             i--;
63             j--;
64         }
65         else
66         {
67             if (L[i][j - 1] < L[i - 1][j])
68                 i--;
69             else
70                 j--;
71         }
72     }
73 
74     import std.algorithm : reverse;
75     result.reverse();           // not nothrow
76     return result;
77 }
78 
79 /** Get LCS Lengths. */
80 uint[] lcsLengths(R)(R xs,
81                      R ys) pure nothrow
82 {
83     import std.algorithm.comparison : max;
84 
85     auto prev = new typeof(return)(1 + ys.length);
86     auto curr = new typeof(return)(1 + ys.length);
87 
88     foreach (immutable x; xs)
89     {
90         import std.algorithm: swap;
91         swap(curr, prev);
92         size_t i = 0;
93         foreach (immutable y; ys)
94         {
95             curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]);
96             i++;
97         }
98     }
99 
100     return curr;
101 }
102 
103 void lcsDo(T)(in T[] xs,
104               in T[] ys,
105               bool[] xsInLCS,
106               in size_t idx = 0) pure nothrow
107 {
108     immutable nx = xs.length;
109     immutable ny = ys.length;
110 
111     if (nx == 0)
112         return;
113 
114     if (nx == 1)
115     {
116         import std.algorithm: canFind;
117         if (ys.canFind(xs[0]))
118             xsInLCS[idx] = true;
119     }
120     else
121     {
122         immutable mid = nx / 2;
123         const xb = xs[0.. mid];
124         const xe = xs[mid .. $];
125         immutable ll_b = lcsLengths(xb, ys);
126 
127         import std.range: retro;
128         const ll_e = lcsLengths(xe.retro,
129                                 ys.retro); // retro is slow with DMD
130 
131         // import std.algorithm.comparison : max;
132         //immutable k = iota(ny + 1)
133         //              .reduce!(max!(j => ll_b[j] + ll_e[ny - j]));
134         import std.range: iota;
135         import std.algorithm: minPos;
136         import std.typecons: tuple;
137         immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) >
138                                            tuple(ll_b[j] + ll_e[ny - j]))[0];
139 
140         lcsDo(xb, ys[0 .. k], xsInLCS, idx);
141         lcsDo(xe, ys[k .. $], xsInLCS, idx + mid);
142     }
143 }
144 
145 /** Longest Common Subsequence (LCS) using Hirschberg.
146     Linear-Space Faster Dynamic Programming Version.
147 
148     Time Complexity: O(m*n)
149     Space Complexity: O(min(m,n))
150 
151     To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and
152     do not use the $(D retro) range (replace it with $(D foreach_reverse))
153 
154     See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm
155     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version
156 */
157 const(T)[] lcs(T)(in T[] xs,
158                   in T[] ys) pure /*nothrow*/
159 {
160     auto xsInLCS = new bool[xs.length];
161     lcsDo(xs, ys, xsInLCS);
162     import std.range : zip;
163     import std.algorithm.iteration : filter, map;
164     import std.array : array;
165     return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow.
166 }
167 
168 string lcs(in string a,
169            in string b) pure /*nothrow*/
170 {
171     import std.string : representation, assumeUTF;
172     return lcs(a.representation,
173                b.representation).assumeUTF;
174 }
175 
176 unittest
177 {
178     immutable x = "thisisatest";
179     immutable y = "testing123testing";
180     immutable z = "tsitest";
181     assert(z == lcsR(x, y));
182     assert(z == lcsDP(x, y));
183     assert(z == lcs(x, y));
184     assert("" == lcs("", ""));
185     assert("" == lcs(null, null));
186 }
187 
188 unittest
189 {
190     immutable x = [1, 2, 3];
191     immutable y = [4, 5, 6];
192     immutable z = [];
193     assert(z == lcsR(x, y));
194     assert(z == lcsDP(x, y));
195     assert(z == lcs(x, y));
196 }
197 
198 unittest
199 {
200     immutable x = [1, 2, 3];
201     immutable y = [2, 3, 4];
202     immutable z = [2, 3];
203     assert(z == lcsR(x, y));
204     assert(z == lcsDP(x, y));
205     assert(z == lcs(x, y));
206 }
207 
208 unittest
209 {
210     alias T = int;
211     size_t n = 300;
212     auto x = new T[n];
213     auto y = new T[n];
214     assert(lcs(x, y).length == n);
215 }
216 
217 version(benchmark) unittest
218 {
219     import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath;
220     import nxt.random_ex: randInPlaceBlockwise;
221 
222     alias T = int;
223     size_t n = 300;
224     auto x = new T[n];
225     auto y = new T[n];
226 
227     x.randInPlaceBlockwise;
228     y.randInPlaceBlockwise;
229 
230     void bLCS() { const d = lcs(x, y); }
231     void bLD() { const d = levenshteinDistance(x, y); }
232     void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); }
233 
234     import std.datetime: benchmark;
235     import std.algorithm: map;
236     import std.array: array;
237 
238     auto r = benchmark!(bLCS, bLD, bLDAP)(1);
239     auto q = r.array.map!(a => a.msecs);
240 
241     import std.stdio: writeln;
242     writeln(q, " milliseconds");
243 }