1 module nxt.find_ex;
2 
3 import std.typecons: Tuple, tuple;
4 import std.string: CaseSensitive;
5 
6 enum FindContext { inWord, inSymbol,
7 				   asWord, asSymbol }
8 
9 /** Return true if $(D a) is a C-style Identifier symbol character. */
10 bool isSymbol(T)(in T a) pure nothrow @safe @nogc
11 {
12 	import std.ascii: isAlpha;
13 	return a.isAlpha || a == '_';
14 }
15 
16 bool isSymbolASCII(in string rest, ptrdiff_t off, size_t end) pure nothrow @safe @nogc
17 in(end <= rest.length)
18 {
19 	import std.ascii: isAlphaNum;
20 	return ((off == 0 || // either beginning of line
21 			 !rest[off - 1].isAlphaNum &&
22 			 rest[off - 1] != '_') &&
23 			(end == rest.length || // either end of line
24 			 !rest[end].isAlphaNum &&
25 			 rest[end] != '_'));
26 }
27 
28 ///
29 pure nothrow @safe @nogc unittest {
30 	assert(isSymbolASCII("alpha", 0, 5));
31 	assert(isSymbolASCII(" alpha ", 1, 6));
32 	assert(!isSymbolASCII("driver", 0, 5));
33 	assert(!isSymbolASCII("a_word", 0, 1));
34 	assert(!isSymbolASCII("first_a_word", 6, 7));
35 }
36 
37 bool isWordASCII(in string rest, ptrdiff_t off, size_t end) pure nothrow @safe @nogc
38 in(end <= rest.length)
39 {
40 	import std.ascii: isAlphaNum;
41 	return ((off == 0 || // either beginning of line
42 			 !rest[off - 1].isAlphaNum) &&
43 			(end == rest.length || // either end of line
44 			 !rest[end].isAlphaNum));
45 }
46 
47 ///
48 pure nothrow @safe @nogc unittest {
49 	assert(isSymbolASCII("alpha", 0, 5));
50 	assert(isSymbolASCII(" alpha ", 1, 6));
51 	assert(!isSymbolASCII("driver", 0, 5));
52 	assert(isWordASCII("a_word", 0, 1));
53 	assert(isWordASCII("first_a_word", 6, 7));
54 	assert(isWordASCII("first_a", 6, 7));
55 }
56 
57 // Parameterize on isAlpha and isSymbol.
58 
59 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack).
60 	TODO: Make it compatible (specialized) for InputRange or BidirectionalRange.
61 */
62 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b",
63 									 R,
64 									 E)(R haystack,
65 										in E needle,
66 										FindContext ctx = FindContext.inWord,
67 										CaseSensitive cs = CaseSensitive.yes, /+ TODO: Use this +/
68 										size_t haystackOffset = 0) @safe pure
69 {
70 	import std.ascii: isAlpha;
71 	import std.algorithm: find;
72 	import std.range: empty;
73 
74 	auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets
75 
76 	auto rest = haystack[haystackOffset..$];
77 	while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack
78 	{
79 		/* debug dbg(needle, ", ", rest); */
80 
81 		// find first character
82 		size_t nIx = 0;		 // needle index
83 		rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find!
84 		if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case
85 		aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym
86 		rest = rest[1 .. $];
87 		const ix0 = aOffs[0];
88 
89 		// check context before point
90 		final switch (ctx)
91 		{
92 			case FindContext.inWord:   break; /+ TODO: find word characters before point and set start offset +/
93 			case FindContext.inSymbol: break; /+ TODO: find symbol characters before point and set start offset +/
94 			case FindContext.asWord:
95 				if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start
96 				break;
97 			case FindContext.asSymbol:
98 				if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat
99 				break;
100 		}
101 
102 		while (rest)			// while elements left in haystack
103 		{
104 
105 			// Check elements in between
106 			ptrdiff_t hit = -1;
107 			import std.algorithm: countUntil;
108 			import std.functional: binaryFun;
109 			final switch (ctx)
110 			{
111 				case FindContext.inWord:
112 				case FindContext.asWord:
113 					hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break;
114 				case FindContext.inSymbol:
115 				case FindContext.asSymbol:
116 					hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break;
117 			}
118 			if (hit == -1) { goto miss; } // no hit this time
119 
120 			// Check if hit
121 			if (hit == rest.length || // if we searched till the end
122 				rest[hit] != needle[nIx]) // acronym letter not found
123 			{
124 				rest = haystack[aOffs[0]+1 .. $]; // try beyond hit
125 				goto miss;	  // no hit this time
126 			}
127 
128 			aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym
129 			if (nIx == needle.length) // if complete acronym found
130 			{
131 				return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length
132 			}
133 			rest = rest[hit+1 .. $]; // advance in source beyound hit
134 		}
135 	miss:
136 		continue;
137 	}
138 	return tuple(R.init, ptrdiff_t[].init); // no hit
139 }
140 
141 ///
142 pure @safe unittest {
143 	assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t");
144 	assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t");
145 	assert("åäö_ab".findAcronymAt("ab")[0] == "ab");
146 	assert("fopen".findAcronymAt("fpn")[0] == "fopen");
147 	assert("fopen_".findAcronymAt("fpn")[0] == "fopen");
148 	assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen");
149 	assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen");
150 	assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []);
151 	assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open");
152 }