1 module nxt.find_ex; 2 3 import std.typecons: Tuple, tuple; 4 import std.string: CaseSensitive; 5 import std.functional: binaryFun; 6 7 enum FindContext { inWord, inSymbol, 8 asWord, asSymbol } 9 10 /** Return true if $(D a) is a C-style Identifier symbol character. */ 11 bool isSymbol(T)(in T a) 12 @safe pure nothrow @nogc 13 { 14 import std.ascii: isAlpha; 15 return a.isAlpha || a == '_'; 16 } 17 18 bool isSymbolASCII(string rest, ptrdiff_t off, size_t end) 19 @safe pure nothrow @nogc 20 in 21 { 22 assert(end <= rest.length); 23 } 24 do 25 { 26 import std.ascii: isAlphaNum; 27 return ((off == 0 || // either beginning of line 28 !rest[off - 1].isAlphaNum && 29 rest[off - 1] != '_') && 30 (end == rest.length || // either end of line 31 !rest[end].isAlphaNum && 32 rest[end] != '_')); 33 } 34 35 /// 36 @safe pure nothrow @nogc unittest 37 { 38 assert(isSymbolASCII("alpha", 0, 5)); 39 assert(isSymbolASCII(" alpha ", 1, 6)); 40 assert(!isSymbolASCII("driver", 0, 5)); 41 assert(!isSymbolASCII("a_word", 0, 1)); 42 assert(!isSymbolASCII("first_a_word", 6, 7)); 43 } 44 45 bool isWordASCII(string rest, ptrdiff_t off, size_t end) 46 @safe pure nothrow @nogc 47 in 48 { 49 assert(end <= rest.length); 50 } 51 do 52 { 53 import std.ascii: isAlphaNum; 54 return ((off == 0 || // either beginning of line 55 !rest[off - 1].isAlphaNum) && 56 (end == rest.length || // either end of line 57 !rest[end].isAlphaNum)); 58 } 59 60 /// 61 @safe pure nothrow @nogc unittest 62 { 63 assert(isSymbolASCII("alpha", 0, 5)); 64 assert(isSymbolASCII(" alpha ", 1, 6)); 65 assert(!isSymbolASCII("driver", 0, 5)); 66 assert(isWordASCII("a_word", 0, 1)); 67 assert(isWordASCII("first_a_word", 6, 7)); 68 assert(isWordASCII("first_a", 6, 7)); 69 } 70 71 // Parameterize on isAlpha and isSymbol. 72 73 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack). 74 TODO: Make it compatible (specialized) for InputRange or BidirectionalRange. 75 */ 76 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b", 77 R, 78 E)(R haystack, 79 E needle, 80 FindContext ctx = FindContext.inWord, 81 CaseSensitive cs = CaseSensitive.yes, // TODO: Use this 82 size_t haystackOffset = 0) @safe pure 83 { 84 import std.ascii: isAlpha; 85 import std.algorithm: find; 86 import std.range: empty; 87 88 scope auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets 89 90 auto rest = haystack[haystackOffset..$]; 91 while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack 92 { 93 /* debug dbg(needle, ", ", rest); */ 94 95 // find first character 96 size_t nIx = 0; // needle index 97 rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find! 98 if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case 99 aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym 100 rest = rest[1 .. $]; 101 const ix0 = aOffs[0]; 102 103 // check context before point 104 final switch (ctx) 105 { 106 case FindContext.inWord: break; // TODO: find word characters before point and set start offset 107 case FindContext.inSymbol: break; // TODO: find symbol characters before point and set start offset 108 case FindContext.asWord: 109 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start 110 break; 111 case FindContext.asSymbol: 112 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat 113 break; 114 } 115 116 while (rest) // while elements left in haystack 117 { 118 119 // Check elements in between 120 ptrdiff_t hit = -1; 121 import std.algorithm: countUntil; 122 final switch (ctx) 123 { 124 case FindContext.inWord: 125 case FindContext.asWord: 126 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break; 127 case FindContext.inSymbol: 128 case FindContext.asSymbol: 129 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break; 130 } 131 if (hit == -1) { goto miss; } // no hit this time 132 133 // Check if hit 134 if (hit == rest.length || // if we searched till the end 135 rest[hit] != needle[nIx]) // acronym letter not found 136 { 137 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit 138 goto miss; // no hit this time 139 } 140 141 aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym 142 if (nIx == needle.length) // if complete acronym found 143 { 144 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length 145 } 146 rest = rest[hit+1 .. $]; // advance in source beyound hit 147 } 148 miss: 149 continue; 150 } 151 return tuple(R.init, ptrdiff_t[].init); // no hit 152 } 153 154 /// 155 @safe pure unittest 156 { 157 assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t"); 158 assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t"); 159 assert("åäö_ab".findAcronymAt("ab")[0] == "ab"); 160 assert("fopen".findAcronymAt("fpn")[0] == "fopen"); 161 assert("fopen_".findAcronymAt("fpn")[0] == "fopen"); 162 assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen"); 163 assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen"); 164 assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []); 165 assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open"); 166 }