1 module nxt.find_ex; 2 3 import std.typecons: Tuple, tuple; 4 import std.string: CaseSensitive; 5 6 enum FindContext { inWord, inSymbol, 7 asWord, asSymbol } 8 9 /** Return true if $(D a) is a C-style Identifier symbol character. */ 10 bool isSymbol(T)(in T a) @safe pure nothrow @nogc 11 { 12 import std.ascii: isAlpha; 13 return a.isAlpha || a == '_'; 14 } 15 16 bool isSymbolASCII(in string rest, ptrdiff_t off, size_t end) @safe pure nothrow @nogc 17 in(end <= rest.length) 18 { 19 import std.ascii: isAlphaNum; 20 return ((off == 0 || // either beginning of line 21 !rest[off - 1].isAlphaNum && 22 rest[off - 1] != '_') && 23 (end == rest.length || // either end of line 24 !rest[end].isAlphaNum && 25 rest[end] != '_')); 26 } 27 28 /// 29 @safe pure nothrow @nogc unittest 30 { 31 assert(isSymbolASCII("alpha", 0, 5)); 32 assert(isSymbolASCII(" alpha ", 1, 6)); 33 assert(!isSymbolASCII("driver", 0, 5)); 34 assert(!isSymbolASCII("a_word", 0, 1)); 35 assert(!isSymbolASCII("first_a_word", 6, 7)); 36 } 37 38 bool isWordASCII(in string rest, ptrdiff_t off, size_t end) @safe pure nothrow @nogc 39 in(end <= rest.length) 40 { 41 import std.ascii: isAlphaNum; 42 return ((off == 0 || // either beginning of line 43 !rest[off - 1].isAlphaNum) && 44 (end == rest.length || // either end of line 45 !rest[end].isAlphaNum)); 46 } 47 48 /// 49 @safe pure nothrow @nogc unittest 50 { 51 assert(isSymbolASCII("alpha", 0, 5)); 52 assert(isSymbolASCII(" alpha ", 1, 6)); 53 assert(!isSymbolASCII("driver", 0, 5)); 54 assert(isWordASCII("a_word", 0, 1)); 55 assert(isWordASCII("first_a_word", 6, 7)); 56 assert(isWordASCII("first_a", 6, 7)); 57 } 58 59 // Parameterize on isAlpha and isSymbol. 60 61 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack). 62 TODO: Make it compatible (specialized) for InputRange or BidirectionalRange. 63 */ 64 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b", 65 R, 66 E)(R haystack, 67 in E needle, 68 FindContext ctx = FindContext.inWord, 69 CaseSensitive cs = CaseSensitive.yes, // TODO: Use this 70 size_t haystackOffset = 0) @safe pure 71 { 72 import std.ascii: isAlpha; 73 import std.algorithm: find; 74 import std.range: empty; 75 76 auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets 77 78 auto rest = haystack[haystackOffset..$]; 79 while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack 80 { 81 /* debug dbg(needle, ", ", rest); */ 82 83 // find first character 84 size_t nIx = 0; // needle index 85 rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find! 86 if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case 87 aOffs[nIx++] = &rest[0] - &haystack[0]; // store hit offset and advance acronym 88 rest = rest[1 .. $]; 89 const ix0 = aOffs[0]; 90 91 // check context before point 92 final switch (ctx) 93 { 94 case FindContext.inWord: break; // TODO: find word characters before point and set start offset 95 case FindContext.inSymbol: break; // TODO: find symbol characters before point and set start offset 96 case FindContext.asWord: 97 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start 98 break; 99 case FindContext.asSymbol: 100 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat 101 break; 102 } 103 104 while (rest) // while elements left in haystack 105 { 106 107 // Check elements in between 108 ptrdiff_t hit = -1; 109 import std.algorithm: countUntil; 110 import std.functional: binaryFun; 111 final switch (ctx) 112 { 113 case FindContext.inWord: 114 case FindContext.asWord: 115 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break; 116 case FindContext.inSymbol: 117 case FindContext.asSymbol: 118 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break; 119 } 120 if (hit == -1) { goto miss; } // no hit this time 121 122 // Check if hit 123 if (hit == rest.length || // if we searched till the end 124 rest[hit] != needle[nIx]) // acronym letter not found 125 { 126 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit 127 goto miss; // no hit this time 128 } 129 130 aOffs[nIx++] = (&rest[0] - &haystack[0]) + hit; // store hit offset and advance acronym 131 if (nIx == needle.length) // if complete acronym found 132 { 133 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length 134 } 135 rest = rest[hit+1 .. $]; // advance in source beyound hit 136 } 137 miss: 138 continue; 139 } 140 return tuple(R.init, ptrdiff_t[].init); // no hit 141 } 142 143 /// 144 @safe pure unittest 145 { 146 assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t"); 147 assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t"); 148 assert("åäö_ab".findAcronymAt("ab")[0] == "ab"); 149 assert("fopen".findAcronymAt("fpn")[0] == "fopen"); 150 assert("fopen_".findAcronymAt("fpn")[0] == "fopen"); 151 assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen"); 152 assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen"); 153 assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []); 154 assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open"); 155 }