1 module nxt.find_ex;
2 
3 import std.typecons: Tuple, tuple;
4 import std..string: CaseSensitive;
5 import std.functional: binaryFun;
6 
7 enum FindContext { inWord, inSymbol,
8                    asWord, asSymbol }
9 
10 /** Return true if $(D a) is a C-style Identifier symbol character. */
11 bool isSymbol(T)(in T a)
12     @safe pure nothrow @nogc
13 {
14     import std.ascii: isAlpha;
15     return a.isAlpha || a == '_';
16 }
17 
18 bool isSymbolASCII(string rest, ptrdiff_t off, size_t end)
19     @safe pure nothrow @nogc
20 in
21 {
22     assert(end <= rest.length);
23 }
24 do
25 {
26     import std.ascii: isAlphaNum;
27     return ((off == 0 || // either beginning of line
28              !rest[off - 1].isAlphaNum &&
29              rest[off - 1] != '_') &&
30             (end == rest.length || // either end of line
31              !rest[end].isAlphaNum &&
32              rest[end] != '_'));
33 }
34 
35 ///
36 @safe pure nothrow @nogc unittest
37 {
38     assert(isSymbolASCII("alpha", 0, 5));
39     assert(isSymbolASCII(" alpha ", 1, 6));
40     assert(!isSymbolASCII("driver", 0, 5));
41     assert(!isSymbolASCII("a_word", 0, 1));
42     assert(!isSymbolASCII("first_a_word", 6, 7));
43 }
44 
45 bool isWordASCII(string rest, ptrdiff_t off, size_t end)
46     @safe pure nothrow @nogc
47 in
48 {
49     assert(end <= rest.length);
50 }
51 do
52 {
53     import std.ascii: isAlphaNum;
54     return ((off == 0 || // either beginning of line
55              !rest[off - 1].isAlphaNum) &&
56             (end == rest.length || // either end of line
57              !rest[end].isAlphaNum));
58 }
59 
60 ///
61 @safe pure nothrow @nogc unittest
62 {
63     assert(isSymbolASCII("alpha", 0, 5));
64     assert(isSymbolASCII(" alpha ", 1, 6));
65     assert(!isSymbolASCII("driver", 0, 5));
66     assert(isWordASCII("a_word", 0, 1));
67     assert(isWordASCII("first_a_word", 6, 7));
68     assert(isWordASCII("first_a", 6, 7));
69 }
70 
71 // Parameterize on isAlpha and isSymbol.
72 
73 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack).
74     TODO Make it compatible (specialized) for InputRange or BidirectionalRange.
75 */
76 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b",
77                                      R,
78                                      E)(R haystack,
79                                         E needle,
80                                         FindContext ctx = FindContext.inWord,
81                                         CaseSensitive cs = CaseSensitive.yes, // TODO Use this
82                                         size_t haystackOffset = 0) @safe pure
83 {
84     import std.ascii: isAlpha;
85     import std.algorithm: find;
86     import std.range: empty;
87 
88     scope auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets
89 
90     auto rest = haystack[haystackOffset..$];
91     while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack
92     {
93         /* debug dbg(needle, ", ", rest); */
94 
95         // find first character
96         size_t nIx = 0;         // needle index
97         rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find!
98         if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case
99         aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym
100         rest = rest[1 .. $];
101         const ix0 = aOffs[0];
102 
103         // check context before point
104         final switch (ctx)
105         {
106             case FindContext.inWord:   break; // TODO find word characters before point and set start offset
107             case FindContext.inSymbol: break; // TODO find symbol characters before point and set start offset
108             case FindContext.asWord:
109                 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start
110                 break;
111             case FindContext.asSymbol:
112                 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat
113                 break;
114         }
115 
116         while (rest)            // while elements left in haystack
117         {
118 
119             // Check elements in between
120             ptrdiff_t hit = -1;
121             import std.algorithm: countUntil;
122             final switch (ctx)
123             {
124                 case FindContext.inWord:
125                 case FindContext.asWord:
126                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break;
127                 case FindContext.inSymbol:
128                 case FindContext.asSymbol:
129                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break;
130             }
131             if (hit == -1) { goto miss; } // no hit this time
132 
133             // Check if hit
134             if (hit == rest.length || // if we searched till the end
135                 rest[hit] != needle[nIx]) // acronym letter not found
136             {
137                 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit
138                 goto miss;      // no hit this time
139             }
140 
141             aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym
142             if (nIx == needle.length) // if complete acronym found
143             {
144                 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length
145             }
146             rest = rest[hit+1 .. $]; // advance in source beyound hit
147         }
148     miss:
149         continue;
150     }
151     return tuple(R.init, ptrdiff_t[].init); // no hit
152 }
153 
154 ///
155 @safe pure unittest
156 {
157     assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t");
158     assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t");
159     assert("åäö_ab".findAcronymAt("ab")[0] == "ab");
160     assert("fopen".findAcronymAt("fpn")[0] == "fopen");
161     assert("fopen_".findAcronymAt("fpn")[0] == "fopen");
162     assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen");
163     assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen");
164     assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []);
165     assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open");
166 }