1 /** Longest Common Subsequence, typically used as a base for writing diff/compare algorithms.
2 
3     Copyright: Per Nordlöw 2014-.
4     License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
5     Authors: $(WEB Per Nordlöw)
6 
7     See_Also: https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
8     See_Also: https://en.wikipedia.org/wiki/Diff
9 */
10 
11 module nxt.lcs;
12 
13 /** Longest Common Subsequence (LCS).
14     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Recursive_version
15 */
16 T[] lcsR(T)(in T[] a,
17             in T[] b) pure nothrow
18 {
19     import std.range.primitives : empty;
20     if (a.empty || b.empty)
21         return null;            // undefined
22     if (a[0] == b[0])
23         return a[0] ~ lcsR(a[1 .. $],
24                            b[1 .. $]);
25     const longest = (T[] x, T[] y) => x.length > y.length ? x : y;
26     return longest(lcsR(a, b[1 .. $]),
27                    lcsR(a[1 .. $], b));
28 }
29 
30 /** Longest Common Subsequence (LCS).
31     Faster Dynamic Programming Version.
32     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Faster_dynamic_programming_version
33 */
34 T[] lcsDP(T)(in T[] a,
35              in T[] b) pure /* nothrow */
36 {
37     import std.algorithm.comparison : max;
38 
39     auto L = new uint[][](a.length + 1, b.length + 1);
40 
41     foreach (immutable i; 0 .. a.length)
42         foreach (immutable j; 0 .. b.length)
43             L[i + 1][j + 1] = (a[i] == b[j]) ? (1 + L[i][j]) : max(L[i + 1][j], L[i][j + 1]);
44 
45     import core.internal.traits : Unqual;
46     Unqual!T[] result;
47 
48     for (auto i = a.length, j = b.length; i > 0 && j > 0; )
49     {
50         if (a[i - 1] == b[j - 1])
51         {
52             result ~= a[i - 1];
53             i--;
54             j--;
55         }
56         else
57         {
58             if (L[i][j - 1] < L[i - 1][j])
59                 i--;
60             else
61                 j--;
62         }
63     }
64 
65     import std.algorithm : reverse;
66     result.reverse();           // not nothrow
67     return result;
68 }
69 
70 /** Get LCS Lengths. */
71 uint[] lcsLengths(R)(R xs,
72                      R ys) pure nothrow
73 {
74     import std.algorithm.comparison : max;
75 
76     auto prev = new typeof(return)(1 + ys.length);
77     auto curr = new typeof(return)(1 + ys.length);
78 
79     foreach (immutable x; xs)
80     {
81         import std.algorithm: swap;
82         swap(curr, prev);
83         size_t i = 0;
84         foreach (immutable y; ys)
85         {
86             curr[i + 1] = (x == y) ? prev[i] + 1 : max(curr[i], prev[i + 1]);
87             i++;
88         }
89     }
90 
91     return curr;
92 }
93 
94 void lcsDo(T)(in T[] xs,
95               in T[] ys,
96               bool[] xsInLCS,
97               in size_t idx = 0) pure nothrow
98 {
99     immutable nx = xs.length;
100     immutable ny = ys.length;
101 
102     if (nx == 0)
103         return;
104 
105     if (nx == 1)
106     {
107         import std.algorithm: canFind;
108         if (ys.canFind(xs[0]))
109             xsInLCS[idx] = true;
110     }
111     else
112     {
113         immutable mid = nx / 2;
114         const xb = xs[0.. mid];
115         const xe = xs[mid .. $];
116         immutable ll_b = lcsLengths(xb, ys);
117 
118         import std.range: retro;
119         const ll_e = lcsLengths(xe.retro,
120                                 ys.retro); // retro is slow with DMD
121 
122         // import std.algorithm.comparison : max;
123         //immutable k = iota(ny + 1)
124         //              .reduce!(max!(j => ll_b[j] + ll_e[ny - j]));
125         import std.range: iota;
126         import std.algorithm: minPos;
127         import std.typecons: tuple;
128         immutable k = iota(ny + 1).minPos!((i, j) => tuple(ll_b[i] + ll_e[ny - i]) >
129                                            tuple(ll_b[j] + ll_e[ny - j]))[0];
130 
131         lcsDo(xb, ys[0 .. k], xsInLCS, idx);
132         lcsDo(xe, ys[k .. $], xsInLCS, idx + mid);
133     }
134 }
135 
136 /** Longest Common Subsequence (LCS) using Hirschberg.
137     Linear-Space Faster Dynamic Programming Version.
138 
139     Time Complexity: O(m*n)
140     Space Complexity: O(min(m,n))
141 
142     To speed up this code on DMD remove the memory allocations from $(D lcsLengths), and
143     do not use the $(D retro) range (replace it with $(D foreach_reverse))
144 
145     See_Also: https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm
146     See_Also: http://rosettacode.org/wiki/Longest_common_subsequence#Hirschberg_algorithm_version
147 */
148 const(T)[] lcs(T)(in T[] xs,
149                   in T[] ys) pure /*nothrow*/
150 {
151     auto xsInLCS = new bool[xs.length];
152     lcsDo(xs, ys, xsInLCS);
153     import std.range : zip;
154     import std.algorithm.iteration : filter, map;
155     import std.array : array;
156     return zip(xs, xsInLCS).filter!q{ a[1] }.map!q{ a[0] }.array; // Not nothrow.
157 }
158 
159 import std.string : representation, assumeUTF;
160 
161 immutable(char)[] lcs(inout(char)[] a, inout(char)[] b) pure /*nothrow*/
162 	=> cast(typeof(return))(lcs(a.representation, b.representation).assumeUTF);
163 
164 unittest
165 {
166     immutable x = "thisisatest";
167     immutable y = "testing123testing";
168     immutable z = "tsitest";
169     assert(z == lcsR(x, y));
170     assert(z == lcsDP(x, y));
171     assert(z == lcs(x, y));
172     assert("" == lcs("", ""));
173 }
174 
175 unittest
176 {
177     immutable x = [1, 2, 3];
178     immutable y = [4, 5, 6];
179     immutable z = [];
180     assert(z == lcsR(x, y));
181     assert(z == lcsDP(x, y));
182     assert(z == lcs(x, y));
183 }
184 
185 unittest
186 {
187     immutable x = [1, 2, 3];
188     immutable y = [2, 3, 4];
189     immutable z = [2, 3];
190     assert(z == lcsR(x, y));
191     assert(z == lcsDP(x, y));
192     assert(z == lcs(x, y));
193 }
194 
195 unittest
196 {
197     alias T = int;
198     size_t n = 300;
199     auto x = new T[n];
200     auto y = new T[n];
201     assert(lcs(x, y).length == n);
202 }
203 
204 version(benchmark) unittest
205 {
206     import std.algorithm : levenshteinDistance, levenshteinDistanceAndPath;
207     import nxt.random_ex: randInPlaceBlockwise;
208 
209     alias T = int;
210     size_t n = 300;
211     auto x = new T[n];
212     auto y = new T[n];
213 
214     x.randInPlaceBlockwise;
215     y.randInPlaceBlockwise;
216 
217     void bLCS() { const d = lcs(x, y); }
218     void bLD() { const d = levenshteinDistance(x, y); }
219     void bLDAP() { const dp = levenshteinDistanceAndPath(x, y); }
220 
221     import std.datetime: benchmark;
222     import std.algorithm: map;
223     import std.array: array;
224 
225     auto r = benchmark!(bLCS, bLD, bLDAP)(1);
226     auto q = r.array.map!(a => a.msecs);
227 
228     import std.stdio: writeln;
229     writeln(q, " milliseconds");
230 }