1 module nxt.find_ex; 2 3 import std.typecons: Tuple, tuple; 4 import std.string: CaseSensitive; 5 6 enum FindContext { inWord, inSymbol, 7 asWord, asSymbol } 8 9 /** Return true if $(D a) is a C-style Identifier symbol character. */ 10 bool isSymbol(T)(in T a) pure nothrow @safe @nogc 11 { 12 import std.ascii: isAlpha; 13 return a.isAlpha || a == '_'; 14 } 15 16 bool isSymbolASCII(in string rest, ptrdiff_t off, size_t end) pure nothrow @safe @nogc 17 in(end <= rest.length) 18 { 19 import std.ascii: isAlphaNum; 20 return ((off == 0 || // either beginning of line 21 !rest[off - 1].isAlphaNum && 22 rest[off - 1] != '_') && 23 (end == rest.length || // either end of line 24 !rest[end].isAlphaNum && 25 rest[end] != '_')); 26 } 27 28 /// 29 pure nothrow @safe @nogc unittest { 30 assert(isSymbolASCII("alpha", 0, 5)); 31 assert(isSymbolASCII(" alpha ", 1, 6)); 32 assert(!isSymbolASCII("driver", 0, 5)); 33 assert(!isSymbolASCII("a_word", 0, 1)); 34 assert(!isSymbolASCII("first_a_word", 6, 7)); 35 } 36 37 bool isWordASCII(in string rest, ptrdiff_t off, size_t end) pure nothrow @safe @nogc 38 in(end <= rest.length) 39 { 40 import std.ascii: isAlphaNum; 41 return ((off == 0 || // either beginning of line 42 !rest[off - 1].isAlphaNum) && 43 (end == rest.length || // either end of line 44 !rest[end].isAlphaNum)); 45 } 46 47 /// 48 pure nothrow @safe @nogc unittest { 49 assert(isSymbolASCII("alpha", 0, 5)); 50 assert(isSymbolASCII(" alpha ", 1, 6)); 51 assert(!isSymbolASCII("driver", 0, 5)); 52 assert(isWordASCII("a_word", 0, 1)); 53 assert(isWordASCII("first_a_word", 6, 7)); 54 assert(isWordASCII("first_a", 6, 7)); 55 } 56 57 // Parameterize on isAlpha and isSymbol. 58 59 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack). 60 TODO: Make it compatible (specialized) for InputRange or BidirectionalRange. 61 */ 62 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b", 63 R, 64 E)(R haystack, 65 in E needle, 66 FindContext ctx = FindContext.inWord, 67 CaseSensitive cs = CaseSensitive.yes, /+ TODO: Use this +/ 68 size_t haystackOffset = 0) @safe pure 69 { 70 import std.ascii: isAlpha; 71 import std.algorithm: find; 72 import std.range: empty; 73 74 auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets 75 76 auto rest = haystack[haystackOffset..$]; 77 while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack 78 { 79 /* debug dbg(needle, ", ", rest); */ 80 81 // find first character 82 size_t nIx = 0; // needle index 83 rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find! 84 if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case 85 aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym 86 rest = rest[1 .. $]; 87 const ix0 = aOffs[0]; 88 89 // check context before point 90 final switch (ctx) 91 { 92 case FindContext.inWord: break; /+ TODO: find word characters before point and set start offset +/ 93 case FindContext.inSymbol: break; /+ TODO: find symbol characters before point and set start offset +/ 94 case FindContext.asWord: 95 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start 96 break; 97 case FindContext.asSymbol: 98 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat 99 break; 100 } 101 102 while (rest) // while elements left in haystack 103 { 104 105 // Check elements in between 106 ptrdiff_t hit = -1; 107 import std.algorithm: countUntil; 108 import std.functional: binaryFun; 109 final switch (ctx) 110 { 111 case FindContext.inWord: 112 case FindContext.asWord: 113 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break; 114 case FindContext.inSymbol: 115 case FindContext.asSymbol: 116 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break; 117 } 118 if (hit == -1) { goto miss; } // no hit this time 119 120 // Check if hit 121 if (hit == rest.length || // if we searched till the end 122 rest[hit] != needle[nIx]) // acronym letter not found 123 { 124 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit 125 goto miss; // no hit this time 126 } 127 128 aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym 129 if (nIx == needle.length) // if complete acronym found 130 { 131 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length 132 } 133 rest = rest[hit+1 .. $]; // advance in source beyound hit 134 } 135 miss: 136 continue; 137 } 138 return tuple(R.init, ptrdiff_t[].init); // no hit 139 } 140 141 /// 142 pure @safe unittest { 143 assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t"); 144 assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t"); 145 assert("åäö_ab".findAcronymAt("ab")[0] == "ab"); 146 assert("fopen".findAcronymAt("fpn")[0] == "fopen"); 147 assert("fopen_".findAcronymAt("fpn")[0] == "fopen"); 148 assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen"); 149 assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen"); 150 assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []); 151 assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open"); 152 }