nxt.find_ex source code

1 module nxt.find_ex;
2 
3 import std.typecons: Tuple, tuple;
4 import std.string: CaseSensitive;
5 
6 enum FindContext { inWord, inSymbol,
7                    asWord, asSymbol }
8 
9 /** Return true if $(D a) is a C-style Identifier symbol character. */
10 bool isSymbol(T)(in T a) @safe pure nothrow @nogc
11 {
12     import std.ascii: isAlpha;
13     return a.isAlpha || a == '_';
14 }
15 
16 bool isSymbolASCII(in string rest, ptrdiff_t off, size_t end) @safe pure nothrow @nogc
17 in(end <= rest.length)
18 {
19     import std.ascii: isAlphaNum;
20     return ((off == 0 || // either beginning of line
21              !rest[off - 1].isAlphaNum &&
22              rest[off - 1] != '_') &&
23             (end == rest.length || // either end of line
24              !rest[end].isAlphaNum &&
25              rest[end] != '_'));
26 }
27 
28 ///
29 @safe pure nothrow @nogc unittest
30 {
31     assert(isSymbolASCII("alpha", 0, 5));
32     assert(isSymbolASCII(" alpha ", 1, 6));
33     assert(!isSymbolASCII("driver", 0, 5));
34     assert(!isSymbolASCII("a_word", 0, 1));
35     assert(!isSymbolASCII("first_a_word", 6, 7));
36 }
37 
38 bool isWordASCII(in string rest, ptrdiff_t off, size_t end) @safe pure nothrow @nogc
39 in(end <= rest.length)
40 {
41     import std.ascii: isAlphaNum;
42     return ((off == 0 || // either beginning of line
43              !rest[off - 1].isAlphaNum) &&
44             (end == rest.length || // either end of line
45              !rest[end].isAlphaNum));
46 }
47 
48 ///
49 @safe pure nothrow @nogc unittest
50 {
51     assert(isSymbolASCII("alpha", 0, 5));
52     assert(isSymbolASCII(" alpha ", 1, 6));
53     assert(!isSymbolASCII("driver", 0, 5));
54     assert(isWordASCII("a_word", 0, 1));
55     assert(isWordASCII("first_a_word", 6, 7));
56     assert(isWordASCII("first_a", 6, 7));
57 }
58 
59 // Parameterize on isAlpha and isSymbol.
60 
61 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack).
62     TODO: Make it compatible (specialized) for InputRange or BidirectionalRange.
63 */
64 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b",
65                                      R,
66                                      E)(R haystack,
67                                         in E needle,
68                                         FindContext ctx = FindContext.inWord,
69                                         CaseSensitive cs = CaseSensitive.yes, // TODO: Use this
70                                         size_t haystackOffset = 0) @safe pure
71 {
72     import std.ascii: isAlpha;
73     import std.algorithm: find;
74     import std.range: empty;
75 
76     auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets
77 
78     auto rest = haystack[haystackOffset..$];
79     while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack
80     {
81         /* debug dbg(needle, ", ", rest); */
82 
83         // find first character
84         size_t nIx = 0;         // needle index
85         rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find!
86         if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case
87         aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym
88         rest = rest[1 .. $];
89         const ix0 = aOffs[0];
90 
91         // check context before point
92         final switch (ctx)
93         {
94             case FindContext.inWord:   break; // TODO: find word characters before point and set start offset
95             case FindContext.inSymbol: break; // TODO: find symbol characters before point and set start offset
96             case FindContext.asWord:
97                 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start
98                 break;
99             case FindContext.asSymbol:
100                 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat
101                 break;
102         }
103 
104         while (rest)            // while elements left in haystack
105         {
106 
107             // Check elements in between
108             ptrdiff_t hit = -1;
109             import std.algorithm: countUntil;
110 			import std.functional: binaryFun;
111             final switch (ctx)
112             {
113                 case FindContext.inWord:
114                 case FindContext.asWord:
115                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break;
116                 case FindContext.inSymbol:
117                 case FindContext.asSymbol:
118                     hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break;
119             }
120             if (hit == -1) { goto miss; } // no hit this time
121 
122             // Check if hit
123             if (hit == rest.length || // if we searched till the end
124                 rest[hit] != needle[nIx]) // acronym letter not found
125             {
126                 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit
127                 goto miss;      // no hit this time
128             }
129 
130             aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym
131             if (nIx == needle.length) // if complete acronym found
132             {
133                 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length
134             }
135             rest = rest[hit+1 .. $]; // advance in source beyound hit
136         }
137     miss:
138         continue;
139     }
140     return tuple(R.init, ptrdiff_t[].init); // no hit
141 }
142 
143 ///
144 @safe pure unittest
145 {
146     assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t");
147     assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t");
148     assert("åäö_ab".findAcronymAt("ab")[0] == "ab");
149     assert("fopen".findAcronymAt("fpn")[0] == "fopen");
150     assert("fopen_".findAcronymAt("fpn")[0] == "fopen");
151     assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen");
152     assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen");
153     assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []);
154     assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open");
155 }