1 module find_ex;
2 
3 import std.typecons: Tuple, tuple;
4 import std.string: CaseSensitive;
5 import std.functional: binaryFun;
6 
7 enum FindContext { inWord, inSymbol,
8                    asWord, asSymbol }
9 
10 /** Return true if $(D a) is a C-style Identifier symbol character. */
11 bool isSymbol(T)(in T a)
12     @safe @nogc pure nothrow
13 {
14     import std.ascii: isAlpha;
15     return a.isAlpha || a == '_';
16 }
17 
18 bool isSymbolASCII(string rest, ptrdiff_t off, size_t end)
19     @safe @nogc pure nothrow
20     in { assert(end <= rest.length); }
21 body
22 {
23     import std.ascii: isAlphaNum;
24     return ((off == 0 || // either beginning of line
25              !rest[off - 1].isAlphaNum &&
26              rest[off - 1] != '_') &&
27             (end == rest.length || // either end of line
28              !rest[end].isAlphaNum &&
29              rest[end] != '_'));
30 }
31 unittest
32 {
33     assert(isSymbolASCII("alpha", 0, 5));
34     assert(isSymbolASCII(" alpha ", 1, 6));
35     assert(!isSymbolASCII("driver", 0, 5));
36     assert(!isSymbolASCII("a_word", 0, 1));
37     assert(!isSymbolASCII("first_a_word", 6, 7));
38 }
39 
40 // ==============================================================================================
41 
42 bool isWordASCII(string rest, ptrdiff_t off, size_t end) @safe @nogc pure nothrow
43     in { assert(end <= rest.length); }
44 body
45 {
46     import std.ascii: isAlphaNum;
47     return ((off == 0 || // either beginning of line
48              !rest[off - 1].isAlphaNum) &&
49             (end == rest.length || // either end of line
50              !rest[end].isAlphaNum));
51 }
52 unittest {
53     assert(isSymbolASCII("alpha", 0, 5));
54     assert(isSymbolASCII(" alpha ", 1, 6));
55     assert(!isSymbolASCII("driver", 0, 5));
56     assert(isWordASCII("a_word", 0, 1));
57     assert(isWordASCII("first_a_word", 6, 7));
58     assert(isWordASCII("first_a", 6, 7));
59 }
60 
61 // Parameterize on isAlpha and isSymbol.
62 
63 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack).
64     TODO Make it compatible (specialized) for InputRange or BidirectionalRange.
65 */
66 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b",
67                                      R,
68                                      E)(R haystack,
69                                         E needle,
70                                         FindContext ctx = FindContext.inWord,
71                                         CaseSensitive cs = CaseSensitive.yes, // TODO Use this
72                                         size_t haystackOffset = 0) @safe pure
73 {
74     import std.ascii: isAlpha;
75     import std.algorithm: find;
76     import std.range: empty;
77 
78     auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets
79 
80     auto rest = haystack[haystackOffset..$];
81     while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack
82     {
83         /* debug dln(needle, ", ", rest); */
84 
85         // find first character
86         size_t nIx = 0;         // needle index
87         rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find!
88         if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case
89         aOffs[nIx++] = rest.ptr - haystack.ptr; // store hit offset and advance acronym
90         rest = rest[1 .. $];
91         const ix0 = aOffs[0];
92 
93         // check context before point
94         final switch (ctx)
95         {
96             case FindContext.inWord:   break; // TODO find word characters before point and set start offset
97             case FindContext.inSymbol: break; // TODO find symbol characters before point and set start offset
98             case FindContext.asWord:
99                 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start
100                 break;
101             case FindContext.asSymbol:
102                 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat
103                 break;
104         }
105 
106         while (rest)            // while elements left in haystack
107         {
108 
109             // Check elements in between
110             ptrdiff_t hit = -1;
111             import std.algorithm: countUntil;
112             final switch (ctx)
113             {
114                 case FindContext.inWord:
115                 case FindContext.asWord:
116                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break;
117                 case FindContext.inSymbol:
118                 case FindContext.asSymbol:
119                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break;
120             }
121             if (hit == -1) { goto miss; } // no hit this time
122 
123             // Check if hit
124             if (hit == rest.length || // if we searched till the end
125                 rest[hit] != needle[nIx]) // acronym letter not found
126             {
127                 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit
128                 goto miss;      // no hit this time
129             }
130 
131             aOffs[nIx++] = (rest.ptr - haystack.ptr) + hit; // store hit offset and advance acronym
132             if (nIx == needle.length) // if complete acronym found
133             {
134                 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length
135             }
136             rest = rest[hit+1 .. $]; // advance in source beyound hit
137         }
138     miss:
139         continue;
140     }
141     return tuple(R.init, ptrdiff_t[].init); // no hit
142 }
143 
144 unittest
145 {
146     assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t");
147     assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t");
148     assert("åäö_ab".findAcronymAt("ab")[0] == "ab");
149     assert("fopen".findAcronymAt("fpn")[0] == "fopen");
150     assert("fopen_".findAcronymAt("fpn")[0] == "fopen");
151     assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen");
152     assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen");
153     assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []);
154     assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open");
155 }