1 module find_ex; 2 3 import std.typecons: Tuple, tuple; 4 import std.string: CaseSensitive; 5 import std.functional: binaryFun; 6 7 enum FindContext { inWord, inSymbol, 8 asWord, asSymbol } 9 10 /** Return true if $(D a) is a C-style Identifier symbol character. */ 11 bool isSymbol(T)(in T a) 12 @safe @nogc pure nothrow 13 { 14 import std.ascii: isAlpha; 15 return a.isAlpha || a == '_'; 16 } 17 18 bool isSymbolASCII(string rest, ptrdiff_t off, size_t end) 19 @safe @nogc pure nothrow 20 in { assert(end <= rest.length); } 21 body 22 { 23 import std.ascii: isAlphaNum; 24 return ((off == 0 || // either beginning of line 25 !rest[off - 1].isAlphaNum && 26 rest[off - 1] != '_') && 27 (end == rest.length || // either end of line 28 !rest[end].isAlphaNum && 29 rest[end] != '_')); 30 } 31 unittest 32 { 33 assert(isSymbolASCII("alpha", 0, 5)); 34 assert(isSymbolASCII(" alpha ", 1, 6)); 35 assert(!isSymbolASCII("driver", 0, 5)); 36 assert(!isSymbolASCII("a_word", 0, 1)); 37 assert(!isSymbolASCII("first_a_word", 6, 7)); 38 } 39 40 // ============================================================================================== 41 42 bool isWordASCII(string rest, ptrdiff_t off, size_t end) @safe @nogc pure nothrow 43 in { assert(end <= rest.length); } 44 body 45 { 46 import std.ascii: isAlphaNum; 47 return ((off == 0 || // either beginning of line 48 !rest[off - 1].isAlphaNum) && 49 (end == rest.length || // either end of line 50 !rest[end].isAlphaNum)); 51 } 52 unittest { 53 assert(isSymbolASCII("alpha", 0, 5)); 54 assert(isSymbolASCII(" alpha ", 1, 6)); 55 assert(!isSymbolASCII("driver", 0, 5)); 56 assert(isWordASCII("a_word", 0, 1)); 57 assert(isWordASCII("first_a_word", 6, 7)); 58 assert(isWordASCII("first_a", 6, 7)); 59 } 60 61 // Parameterize on isAlpha and isSymbol. 62 63 /** Find $(D needle) as Word or Symbol Acronym at $(D haystackOffset) in $(D haystack). 64 TODO Make it compatible (specialized) for InputRange or BidirectionalRange. 65 */ 66 Tuple!(R, ptrdiff_t[]) findAcronymAt(alias pred = "a == b", 67 R, 68 E)(R haystack, 69 E needle, 70 FindContext ctx = FindContext.inWord, 71 CaseSensitive cs = CaseSensitive.yes, // TODO Use this 72 size_t haystackOffset = 0) @safe pure 73 { 74 import std.ascii: isAlpha; 75 import std.algorithm: find; 76 import std.range: empty; 77 78 auto aOffs = new ptrdiff_t[needle.length]; // acronym hit offsets 79 80 auto rest = haystack[haystackOffset..$]; 81 while (needle.length <= rest.length) // for each new try at finding the needle at remainding part of haystack 82 { 83 /* debug dln(needle, ", ", rest); */ 84 85 // find first character 86 size_t nIx = 0; // needle index 87 rest = rest.find!pred(needle[nIx]); // reuse std.algorithm: find! 88 if (rest.empty) { return tuple(rest, ptrdiff_t[].init); } // degenerate case 89 aOffs[nIx++] = rest.ptr - haystack.ptr; // store hit offset and advance acronym 90 rest = rest[1 .. $]; 91 const ix0 = aOffs[0]; 92 93 // check context before point 94 final switch (ctx) 95 { 96 case FindContext.inWord: break; // TODO find word characters before point and set start offset 97 case FindContext.inSymbol: break; // TODO find symbol characters before point and set start offset 98 case FindContext.asWord: 99 if (ix0 >= 1 && haystack[ix0-1].isAlpha) { goto miss; } // quit if not word start 100 break; 101 case FindContext.asSymbol: 102 if (ix0 >= 1 && haystack[ix0-1].isSymbol) { goto miss; } // quit if not symbol stat 103 break; 104 } 105 106 while (rest) // while elements left in haystack 107 { 108 109 // Check elements in between 110 ptrdiff_t hit = -1; 111 import std.algorithm: countUntil; 112 final switch (ctx) 113 { 114 case FindContext.inWord: 115 case FindContext.asWord: 116 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isAlpha); break; 117 case FindContext.inSymbol: 118 case FindContext.asSymbol: 119 hit = rest.countUntil!(x => (binaryFun!pred(x, needle[nIx])) || !x.isSymbol); break; 120 } 121 if (hit == -1) { goto miss; } // no hit this time 122 123 // Check if hit 124 if (hit == rest.length || // if we searched till the end 125 rest[hit] != needle[nIx]) // acronym letter not found 126 { 127 rest = haystack[aOffs[0]+1 .. $]; // try beyond hit 128 goto miss; // no hit this time 129 } 130 131 aOffs[nIx++] = (rest.ptr - haystack.ptr) + hit; // store hit offset and advance acronym 132 if (nIx == needle.length) // if complete acronym found 133 { 134 return tuple(haystack[aOffs[0] .. aOffs[$-1] + 1], aOffs) ; // return its length 135 } 136 rest = rest[hit+1 .. $]; // advance in source beyound hit 137 } 138 miss: 139 continue; 140 } 141 return tuple(R.init, ptrdiff_t[].init); // no hit 142 } 143 144 unittest 145 { 146 assert("size_t".findAcronymAt("sz_t", FindContext.inWord)[0] == "size_t"); 147 assert("size_t".findAcronymAt("sz_t", FindContext.inSymbol)[0] == "size_t"); 148 assert("åäö_ab".findAcronymAt("ab")[0] == "ab"); 149 assert("fopen".findAcronymAt("fpn")[0] == "fopen"); 150 assert("fopen_".findAcronymAt("fpn")[0] == "fopen"); 151 assert("_fopen".findAcronymAt("fpn", FindContext.inWord)[0] == "fopen"); 152 assert("_fopen".findAcronymAt("fpn", FindContext.inSymbol)[0] == "fopen"); 153 assert("f_open".findAcronymAt("fpn", FindContext.inWord)[0] == []); 154 assert("f_open".findAcronymAt("fpn", FindContext.inSymbol)[0] == "f_open"); 155 }