1 /**
2    Patterns.
3 
4    Test: dmd -version=show -preview=dip1000 -preview=in -vcolumns -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d
5    Test: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d
6    Debug: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main pattern.d && lldb pattern
7 
8    Concepts and namings are inspired by regular expressions, symbolic (regular)
9    expressions (Emacs' rx.el package), predicate logic and grammars.
10 
11    String patterns are matched by their raw bytes for now.
12 
13    Copyright: Per Nordlöw 2022-.
14    License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
15    Authors: $(WEB Per Nordlöw)
16 
17    TODO: Merge with lpgen.d Node classes
18    TODO: Extend `Node` classes to represent `xlg.el` read by `LispFileParser`.
19    TODO: Replace `findRawAt(const Data haystack` with `findRawAt(in Data haystack`
20 
21    TODO:
22    Overload operators & (and), ! (not),
23 
24    TODO: Add pattern for expressing inference with `infer`
25    (infer
26 	((x instanceOf X) &
27 	 (X subClassOf Y))
28 	 (x instaceOf Y))
29 
30    TODO:
31    Variables are either
32    - _ (ignore)
33    - _`x`, _`y`, etc
34    - _'x', _'y'
35    - _!0, _!1, ..., _!(n-1)
36 
37    infer(rel!`desire`(_!`x`, _!`y`) &&
38 		 rel!`madeOf`(_!`z`, _!`y`),
39 		 rel!`desire`(_!`x`, _!`z`))
40 
41    TODO: Support variables of specific types and inference using predicate logic:
42 		infer(and(fact(var!'x', rel'desire', var!'y'),
43 				  fact(var!'z', opt(rel'madeOf',
44 							  rel'instanceOf'), var!'y'))),
45 			  pred(var!'x', rel'desire', var!'z'
46 			  ))
47 
48    TODO: Make returns from factory functions immutable.
49    TODO: Reuse return patterns from Lit
50 
51    TODO
52    const s = seq(`al.`.lit,`pha`.lit);
53    const t = `al`.lit ~ `pha`.lit;
54    assert(s !is t);
55    assert(equal(s, t));
56 
57  */
58 module nxt.pattern;
59 
60 import std.algorithm : find, all, map, min, max, joiner;
61 import std.range : empty;
62 import std.array : array;
63 import std.string : representation;
64 import std.traits : isSomeString;
65 import nxt.find_ex : findAcronymAt, FindContext;
66 import nxt.debugio;
67 import nxt.container.dynamic_array;
68 
69 @safe:
70 
71 /++ Untyped data.
72  +/
73 alias Data = ubyte[];
74 
75 /++ Matching `input` with `node`.
76 	Compatibility with `std.regex.matchFirst`.
77  +/
78 inout(char)[] matchFirst(scope return /+ref+/ inout(char)[] input, const Node node) pure nothrow /+@nogc+/ {
79 	return input.match(node);
80 }
81 
82 /// ditto
83 @safe pure unittest {
84 	const x = "ab";
85 	assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]);
86 	assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]);
87 }
88 
89 /** Match `input` with `node`.
90 	Returns: Matched slice or `[]` if not match.
91  */
92 auto ref matchFirst(in return /+ref+/ Data input, const Node node) pure nothrow /+@nogc+/ {
93 	return node.findRawAt(input, 0);
94 }
95 
96 /// ditto
97 @safe pure unittest {
98 	/+const+/ Data x = [1,2];
99 	assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]);
100 	assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]);
101 }
102 
103 inout(char)[] match(scope return /+ref+/ inout(char)[] input, const Node node) @trusted pure nothrow /+@nogc+/ {
104 	return cast(typeof(return))input.representation.matchFirst(node);
105 }
106 
107 /++ Pattern (length) bounds. +/
108 struct Bounds {
109 	static immutable inf = size_t.max;
110 	size_t low; ///< Smallest length possible.
111 	size_t high; ///< Largest length possible.
112 }
113 
114 /** Base Pattern.
115  */
116 abstract extern(C++) class Node { extern(D):
117 @safe pure nothrow:
118 
119 	Seq opBinary(string op)(Node rhs) if (op == `~`) => opCatImpl(rhs); // template can't be overridden
120 	Alt opBinary(string op)(Node rhs) if (op == `|`) => opAltImpl(rhs); // template can't be overridden
121 
122 	protected Seq opCatImpl(Node rhs) => seq(this, rhs); /+ TODO: check if this and rhs is Seq +/
123 	protected Alt opAltImpl(Node rhs) => alt(this, rhs);  /+ TODO: check if this and rhs is Alt +/
124 
125 	final size_t at(const scope string input, size_t soff = 0) const
126 	/+ TODO: Activate this +/
127 	/* out (hit) { */
128 	/*	 assert((!hit) || hit >= bounds.min); /+ TODO: Is this needed? +/ */
129 	/* } */
130 	/* do */
131 	{
132 		return atRaw(input.representation, soff);
133 	}
134 
135 	abstract size_t atRaw(in Data input, size_t soff = 0) const @nogc;
136 
137 	/** Find $(D this) in String `input` at offset `soff`. */
138 	final const(Data) findAt(const return scope string input, size_t soff = 0) const {
139 		return findRawAt(input.representation, soff, []); /+ TODO: this is ugly +/
140 	}
141 
142 	/** Find $(D this) in Raw Bytes `input` at offset `soff`. */
143 	const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const @nogc {
144 		auto i = soff;
145 		while (i < input.length) { // while bytes left at i
146 			if (input.length - i < bounds.low)  // and bytes left to find pattern
147 				return [];
148 			const hit = atRaw(input, i);
149 			if (hit != size_t.max) // hit at i
150 				return input[i..i + hit];
151 			i++;
152 		}
153 		return [];
154 	}
155 
156 @property @nogc:
157 	abstract Bounds bounds() const;
158 
159 	abstract bool isFixed() const; /// Returns: true if all possible instances have same length.
160 	abstract bool isConstant() const; /// Returns: true if all possible instances have same length.
161 
162 	final bool isVariable() => !isConstant;
163 	const(Data) tryGetConstant() const => []; /// Returns: data if literal otherwise empty array.
164 
165 	/** Get All Literals that must match a given source $(D X) in order for $(D
166 		this) to match $(D X) somewhere.
167 	*/
168 	version (none) abstract Lit[] mandatories();
169 
170 	/** Get Optional Literals that may match a given source $(D X) if $(D this)
171 		matches $(D X) somewhere.
172 	*/
173 	version (none) Lit[] optionals() => mandatories;
174 
175 	protected Node _parent; /// Parenting (Super) Pattern.
176 }
177 
178 /** Literal Pattern with Cached Binary Byte Histogram.
179  */
180 final extern(C++) class Lit : Node { extern(D):
181 @safe pure nothrow:
182 
183 	this(string bytes_) { assert(!bytes_.empty); this(bytes_.representation); }
184 	this(ubyte ch) { this._bytes ~= ch; }
185 	this(Data bytes_) { this._bytes = bytes_; }
186 	this(immutable Data bytes_) { this._bytes = bytes_.dup; }
187 
188 	override size_t atRaw(in Data input, size_t soff = 0) const {
189 		const l = _bytes.length;
190 		return (soff + l <= input.length && // fits in input and
191 				_bytes[] == input[soff..soff + l]) ? l : size_t.max; // same contents
192 	}
193 
194 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow {
195 		return input[soff..$].find(_bytes); // reuse std.algorithm: find!
196 	}
197 
198 @property nothrow @nogc:
199 	auto ref bytes() const => _bytes;
200 	private Data _bytes;
201 	alias _bytes this;
202 override:
203 	Bounds bounds() const => Bounds(_bytes.length, _bytes.length);
204 	bool isFixed() const => true;
205 	bool isConstant() const => true;
206 	const(Data) tryGetConstant() const => _bytes;
207 	version (none) Lit[] mandatories() => [this];
208 }
209 
210 /++ Literal. +/
211 auto lit(Args...)(Args args) @safe pure nothrow => new Lit(args); // instantiator
212 
213 /++ Full|Exact literal. +/
214 auto full(Args...)(Args args) @safe pure nothrow => seq(bob, lit(args), eob); // instantiator
215 
216 pure nothrow @safe unittest {
217 	const _ = lit(Data.init);
218 	const ab = lit(`ab`);
219 	assert(`ab`.match(ab));
220 	version (none) assert(!ab.match(`_`)); // TODO: enable
221 }
222 
223 pure nothrow @safe unittest {
224 	immutable ab = `ab`;
225 	assert(lit('b').at(`ab`, 1) == 1);
226 	const a = lit('a');
227 
228 	const ac = lit(`ac`);
229 	assert(`ac`.match(ac));
230 	assert(`ac`.match(ac).length == 2);
231  	assert(`ca`.match(ac) == []);
232 
233 	assert(a.isFixed);
234 	assert(a.isConstant);
235 	assert(a.at(ab) == 1);
236 	assert(lit(ab).at(`a`) == size_t.max);
237 	assert(lit(ab).at(`b`) == size_t.max);
238 
239 	assert(a.findAt(`cba`) == cast(immutable Data)`a`);
240 	assert(a.findAt(`ba`) == cast(immutable Data)`a`);
241 	assert(a.findAt(`a`) == cast(immutable Data)`a`);
242 	assert(a.findAt(``) == []);
243 	assert(a.findAt(`b`) == []);
244 	assert(ac.findAt(`__ac`) == cast(immutable Data)`ac`);
245 	assert(a.findAt(`b`).length == 0);
246 
247 	auto xyz = lit(`xyz`);
248 }
249 
250 pure nothrow @safe unittest {
251 	auto ac = lit(`ac`);
252 	/+ TODO: assert(ac.mandatories == [ac]); +/
253 	/+ TODO: assert(ac.optionals == [ac]); +/
254 }
255 
256 /** Word/Symbol Acronym Pattern.
257  */
258 version (none) // TODO: enable if and when I need this
259 final extern(C++) class Acronym : Node { extern(D):
260 @safe pure nothrow:
261 
262 	this(string bytes_, FindContext ctx = FindContext.inSymbol) {
263 		assert(!bytes_.empty);
264 		this(bytes_.representation, ctx);
265 	}
266 
267 	this(ubyte ch) { this._acros ~= ch; }
268 
269 	this(Data bytes_, FindContext ctx = FindContext.inSymbol) {
270 		this._acros = bytes_;
271 		this._ctx = ctx;
272 	}
273 
274 	this(immutable Data bytes_, FindContext ctx = FindContext.inSymbol) {
275 		this._acros = bytes_.dup;
276 		this._ctx = ctx;
277 	}
278 
279 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
280 		// scope auto offs = new size_t[_acros.length]; // hit offsets
281 		size_t a = 0;		 // acronym index
282 		foreach(s, ub; input[soff..$]) { // for each element in source
283 			import std.ascii: isAlpha;
284 
285 			// Check context
286 			final switch (_ctx) {
287 			case FindContext.inWord:
288 			case FindContext.asWord:
289 				if (!ub.isAlpha)
290 					return size_t.max;
291 				break;
292 			case FindContext.inSymbol:
293 			case FindContext.asSymbol:
294 				if (!ub.isAlpha && ub != '_')
295 					return size_t.max;
296 				break;
297 			}
298 
299 			if (_acros[a] == ub) {
300 				// offs[a] = s + soff; // store hit offset
301 				a++; // advance acronym
302 				if (a == _acros.length) { // if complete acronym found
303 					return s + 1;			 // return its length
304 				}
305 			}
306 		}
307 		return size_t.max; // no hit
308 	}
309 
310 	template Tuple(E...) { alias Tuple = E; }
311 
312 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow {
313 		import std.string: CaseSensitive;
314 		return input.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0];
315 	}
316 
317 @property:
318 	override size_t bounds() const nothrow @nogc => typeof(return)(_acros.length, size_t.max);
319 	override bool isFixed() const nothrow @nogc => false;
320 	override bool isConstant() const nothrow @nogc => false;
321 
322 private:
323 	Data _acros;
324 	FindContext _ctx;
325 }
326 
327 version (none)
328 @safe pure nothrow {
329 	auto inwac(Args...)(Args args) => new Acronym(args, FindContext.inWord); // word acronym
330 	auto insac(Args...)(Args args) => new Acronym(args, FindContext.inSymbol); // symbol acronym
331 	auto aswac(Args...)(Args args) => new Acronym(args, FindContext.asWord); // word acronym
332 	auto assac(Args...)(Args args) => new Acronym(args, FindContext.asSymbol); // symbol acronym
333 }
334 
335 version (none)
336 pure nothrow @safe unittest {
337 	assert(inwac(`a`).at(`a`) == 1);
338 	assert(inwac(`ab`).at(`ab`) == 2);
339 	assert(inwac(`ab`).at(`a`) == size_t.max);
340 	assert(inwac(`abc`).at(`abc`) == 3);
341 	assert(inwac(`abc`).at(`aabbcc`) == 5);
342 	assert(inwac(`abc`).at(`aaaabbcc`) == 7);
343 	assert(inwac(`fpn`).at(`fopen`) == 5);
344 }
345 
346 /** Any Byte.
347  */
348 final extern(C++) class Any : Node { extern(D):
349 @safe pure nothrow:
350 	this() {}
351 
352 	override size_t atRaw(in Data input, size_t soff = 0) const => soff < input.length ? 1 : size_t.max;
353 
354 @property:
355 	override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1);
356 	override bool isFixed() const nothrow @nogc => true;
357 	override bool isConstant() const nothrow @nogc => false;
358 
359 	version (none) override Lit[] mandatories() nothrow => [];
360 	version (none) override Lit[] optionals() nothrow {
361 		import std.range: iota;
362 		return iota(0, 256).map!(n => (cast(ubyte)n).lit).array;
363 	}
364 }
365 
366 auto any(Args...)(Args args) => new Any(args); // instantiator
367 
368 /** Abstract Super Pattern.
369  */
370 abstract extern(C++) class SPatt : Node { extern(D):
371 @safe pure nothrow:
372 
373 	this(Node[] subs_) { this._subs = subs_; }
374 	this(Args...)(Args subs_) {
375 		foreach (sub; subs_) {
376 			alias Sub = typeof(sub);
377 			/+ TODO: functionize to patternFromBuiltinType() or to!Node +/
378 			static if (is(Sub == string) ||
379 					   is(Sub == char)) {
380 				_subs ~= new Lit(sub);
381 			} else
382 				_subs ~= sub;
383 			sub._parent = this;
384 		}
385 	}
386 
387 	protected Node[] _subs;
388 }
389 
390 /** Sequence of Patterns.
391  */
392 final extern(C++) class Seq : SPatt { extern(D):
393 @safe pure nothrow:
394 
395 	this(Node[] subs_) { super(subs_); }
396 	this(Args...)(Args subs_) { super(subs_); }
397 
398 	@property auto ref inout (Node[]) elms() inout @nogc => super._subs;
399 
400 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
401 		assert(!elms.empty); /+ TODO: Move to in contract? +/
402 		const c = tryGetConstant;
403 		if (!c.empty) {
404 			return (soff + c.length <= input.length &&   // if equal size and
405 					c[] == input[soff..soff + c.length]); // equal contents
406 		}
407 		size_t sum = 0;
408 		size_t off = soff;
409 		foreach (ix, sub; elms) { /+ TODO: Reuse std.algorithm instead? +/
410 			size_t hit = sub.atRaw(input, off);
411 			if (hit == size_t.max) { sum = hit; break; } // if any miss skip
412 			sum += hit;
413 			off += hit;
414 		}
415 		return sum;
416 	}
417 
418 @property:
419 	override Bounds bounds() const nothrow @nogc {
420 		typeof(return) result;
421 		foreach (const ref sub; _subs) {
422 			if (sub.bounds.low != size_t.max)
423 				result.low += sub.bounds.low; // TODO: catch overflow
424 			if (sub.bounds.high != size_t.max)
425 				result.high += sub.bounds.high; // TODO: catch overflow
426 		}
427 		return result;
428 	}
429 	override bool isFixed() const nothrow @nogc => _subs.all!(a => a.isFixed);
430 	override bool isConstant() const nothrow @nogc => _subs.all!(a => a.isConstant);
431 	override const(Data) tryGetConstant() const @nogc => [];
432 	version (none) override Lit[] mandatories() nothrow => _subs.map!(node => node.mandatories).joiner.array;
433 	version (none) override Lit[] optionals() nothrow => _subs.map!(node => node.optionals).joiner.array;
434 }
435 
436 auto seq(Args...)(Args args) @safe pure nothrow => new Seq(args); // instantiator
437 
438 pure nothrow @safe unittest {
439 	immutable input = `alpha`;
440 
441 	const s = seq(`al.`.lit,
442 				  `pha`.lit);
443 	assert(s.isFixed);
444 	assert(s.isConstant);
445 	assert(s.at(input)); // TODO: this should fail
446 	assert(s.bounds.low == 6);
447 	assert(s.bounds.high== 6);
448 
449 	const t = `al`.lit ~ `pha`.lit;
450 
451 	assert(s !is t);
452 
453 	const al = seq(`a`.lit, `l`.lit);
454 	assert(al.at(`a`) == size_t.max); // `al not found in `a`
455 }
456 
457 @trusted pure nothrow unittest {
458 	auto a = `aa.`.lit;
459 	auto b = `bb.`.lit;
460 	auto c = `cc.`.lit;
461 	auto d = `dd.`.lit;
462 	auto s = seq(a.opt, b,
463 				 c.opt, d);
464 	assert(!s.isFixed);
465 	assert(!s.isConstant);
466 	assert(s.bounds.low == b.length + d.length);
467 	assert(s.bounds.high== a.length + b.length + c.length + d.length);
468 	/+ TODO: assert(equal(s.mandatories, [b, d])); +/
469 	/+ TODO: assert(equal(s.optionals, [a, b, c, d])); +/
470 }
471 
472 /** Alternative of Patterns in $(D ALTS).
473  */
474 final extern(C++) class Alt : SPatt { extern(D):
475 @safe pure nothrow:
476 
477 	this(Node[] subs_) { super(subs_); }
478 	this(Args...)(Args subs_) { super(subs_); }
479 
480 	size_t atIx(const scope string input, size_t soff, out size_t alt_hix) const {
481 		return atRaw(input.representation, soff, alt_hix);
482 	}
483 
484 	void opOpAssign(string s : "~")(Node sub) {
485 		import nxt.algorithm.searching : canFind;
486 		if (!_subs.canFind(sub))
487 			super._subs ~= sub;
488 	}
489 
490 	@property inout(Node[]) alts() inout @nogc => super._subs;
491 
492 	/** Get Length of hit at index soff in input or size_t.max if none.
493 	 */
494 	size_t atRaw(in Data input, size_t soff, out size_t alt_hix) const @nogc {
495 		assert(!alts.empty);	/+ TODO: Move to in contract? +/
496 		size_t hit = 0;
497 		size_t off = soff;
498 		foreach (ix, sub; alts) { /+ TODO: Reuse std.algorithm instead? +/
499 			hit = sub.atRaw(input[off..$]);					 // match alternative
500 			if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done
501 		}
502 		return hit;
503 	}
504 
505 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
506 		size_t alt_hix;
507 		return atRaw(input, soff, alt_hix);
508 	}
509 
510 	/** Find $(D this) in `input` at offset `soff`. */
511 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow {
512 		assert(!alts.empty);	/+ TODO: Move to in contract? +/
513 		switch (alts.length) {
514 			case 1:
515 				const a0 = alts[0].tryGetConstant;
516 				if (!a0.empty) {
517 					auto hit = input[soff..$].find(a0); // Use: second argument to return alt_hix
518 					return hit;
519 				} else
520 					return alts[0].findRawAt(input, soff, enders); // recurse to it
521 			case 2:
522 				const a0 = alts[0].tryGetConstant;
523 				const a1 = alts[1].tryGetConstant;
524 				if (!a0.empty &&
525 					!a1.empty) {
526 					auto hit = input[soff..$].find(a0, a1); // Use: second argument to return alt_hix
527 					return hit[0];
528 				}
529 				break;
530 			case 3:
531 				const a0 = alts[0].tryGetConstant;
532 				const a1 = alts[1].tryGetConstant;
533 				const a2 = alts[2].tryGetConstant;
534 				if (!a0.empty &&
535 					!a1.empty &&
536 					!a2.empty) {
537 					auto hit = input[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix
538 					return hit[0];
539 				}
540 				break;
541 			case 4:
542 				const a0 = alts[0].tryGetConstant;
543 				const a1 = alts[1].tryGetConstant;
544 				const a2 = alts[2].tryGetConstant;
545 				const a3 = alts[3].tryGetConstant;
546 				if (!a0.empty &&
547 					!a1.empty &&
548 					!a2.empty &&
549 					!a3.empty) {
550 					auto hit = input[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix
551 					return hit[0];
552 				}
553 				break;
554 			case 5:
555 				const a0 = alts[0].tryGetConstant;
556 				const a1 = alts[1].tryGetConstant;
557 				const a2 = alts[2].tryGetConstant;
558 				const a3 = alts[3].tryGetConstant;
559 				const a4 = alts[4].tryGetConstant;
560 				if (!a0.empty &&
561 					!a1.empty &&
562 					!a2.empty &&
563 					!a3.empty &&
564 					!a4.empty) {
565 					auto hit = input[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix
566 					return hit[0];
567 				}
568 				break;
569 			default:
570 				break;
571 		}
572 		return super.findRawAt(input, soff, enders); // revert to base case
573 	}
574 
575 @property:
576 	override Bounds bounds() const nothrow @nogc {
577 		auto result = typeof(return)(size_t.max, size_t.min);
578 		foreach (const ref sub; _subs) {
579 			result.low = min(result.low, sub.bounds.low);
580 			result.high = max(result.high, sub.bounds.high);
581 		}
582 		return result;
583 	}
584 	override bool isFixed() const nothrow @nogc {
585 		/+ TODO: Merge these loops using tuple algorithm. +/
586 		auto mins = _subs.map!(a => a.bounds.low);
587 		auto maxs = _subs.map!(a => a.bounds.high);
588 		import nxt.predicates: allEqual;
589 		return (mins.allEqual && maxs.allEqual);
590 	}
591 	override bool isConstant() const nothrow @nogc {
592 		if (_subs.length == 0)
593 			return true;
594 		else if (_subs.length == 1) {
595 			import std.range: front;
596 			return _subs.front.isConstant;
597 		} else
598 			return false;	   /+ TODO: Maybe handle case when _subs are different. +/
599 	}
600 }
601 
602 auto alt(Args...)(Args args) @safe pure nothrow => new Alt(args); // instantiator
603 
604 pure nothrow @safe unittest {
605 	immutable a_b = alt(`a`.lit,
606 						`b`.lit);
607 
608 	immutable a__b = (`a`.lit |
609 					  `b`.lit);
610 
611 	assert(a_b.isFixed);
612 	assert(!a_b.isConstant);
613 	assert(a_b.at(`a`));
614 	assert(a_b.at(`b`));
615 	assert(a_b.at(`c`) == size_t.max);
616 
617 	size_t hix = size_t.max;
618 	a_b.atIx(`a`, 0, hix); assert(hix == 0);
619 	a_b.atIx(`b`, 0, hix); assert(hix == 1);
620 
621 	/* assert(alt.at(`a`) == size_t.max); */
622 	/* assert(alt.at(``) == size_t.max); */
623 
624 	immutable a = alt(lit(`a`));
625 	immutable aa = alt(lit(`aa`));
626 	assert(aa.isConstant);
627 
628 	immutable aa_bb = alt(lit(`aa`),
629 						  lit(`bb`));
630 	assert(aa_bb.isFixed);
631 	assert(aa_bb.bounds.low == 2);
632 	assert(aa_bb.bounds.high== 2);
633 
634 	immutable a_bb = alt(lit(`a`),
635 						 lit(`bb`));
636 	assert(!a_bb.isFixed);
637 	assert(a_bb.bounds.low == 1);
638 	assert(a_bb.bounds.high== 2);
639 
640 	const string _aa = `_aa`;
641 	assert(aa_bb.findAt(_aa) == cast(immutable Data)`aa`);
642 	assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable Data)_aa)[0] == 1);
643 
644 	const string _bb = `_bb`;
645 	assert(aa_bb.findAt(_bb) == cast(immutable Data)`bb`);
646 	assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable Data)_bb)[0] == 1);
647 
648 	assert(a.findAt(`b`) == []);
649 	assert(aa.findAt(`cc`) == []);
650 	assert(aa_bb.findAt(`cc`) == []);
651 	assert(aa_bb.findAt(``) == []);
652 }
653 
654 pure nothrow @safe unittest {
655 	auto a_b = alt(`a`.lit);
656 	a_b ~= `b`.lit;
657 
658 	assert(a_b.isFixed);
659 	assert(!a_b.isConstant);
660 	assert(a_b.at(`a`));
661 	assert(a_b.at(`b`));
662 	assert(a_b.at(`c`) == size_t.max);
663 
664 	size_t hix = size_t.max;
665 	a_b.atIx(`a`, 0, hix); assert(hix == 0);
666 	a_b.atIx(`b`, 0, hix); assert(hix == 1);
667 }
668 
669 final extern(C++) class Space : Node { extern(D):
670 @safe pure nothrow @nogc:
671 override:
672 	size_t atRaw(in Data input, size_t soff = 0) const {
673 		import std.ascii: isWhite;
674 		return soff < input.length && isWhite(input[soff]) ? 1 : size_t.max;
675 	}
676 @property const:
677 	override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1);
678 	bool isFixed() => true;
679 	bool isConstant() => false;
680 }
681 
682 auto ws() @safe pure nothrow => new Space(); // instantiator
683 
684 pure nothrow @safe unittest {
685 	assert(ws.at(` `) == 1);
686 	assert(ws.at("\t") == 1);
687 	assert(ws.at("\n") == 1);
688 }
689 
690 /** Abstract Singleton Super Pattern.
691  */
692 abstract extern(C++) class SPatt1 : Node { extern(D):
693 @safe pure nothrow:
694 	this(Node sub) {
695 		this.sub = sub;
696 		sub._parent = this;
697 	}
698 	protected Node sub;
699 }
700 
701 /** Optional Sub Pattern $(D count) times.
702  */
703 final extern(C++) class Opt : SPatt1 { extern(D):
704 @safe pure nothrow:
705 	this(Node sub) { super(sub); }
706 override:
707 	size_t atRaw(in Data input, size_t soff = 0) const {
708 		assert(soff <= input.length); // include equality because input might be empty and size zero
709 		const hit = sub.atRaw(input[soff..$]);
710 		return hit == size_t.max ? 0 : hit;
711 	}
712 @property const nothrow @nogc:
713 	override Bounds bounds() const nothrow @nogc => typeof(return)(0, sub.bounds.high);
714 	bool isFixed() => false;
715 	bool isConstant() => false;
716 	version (none) Lit[] mandatories() nothrow => [];
717 	version (none) Lit[] optionals() nothrow => sub.optionals;
718 }
719 
720 auto opt(Args...)(Args args) => new Opt(args); // optional
721 
722 pure nothrow @safe unittest {
723 	assert(`a`.lit.opt.at(`b`) == 0);
724 	assert(`a`.lit.opt.at(`a`) == 1);
725 }
726 
727 /** Repetition Sub Pattern $(D count) times.
728  */
729 final extern(C++) class Rep : SPatt1 { extern(D):
730 @safe pure nothrow:
731 
732 	this(Node sub, size_t count) in(count >= 2) {
733 		super(sub);
734 		this.countReq = count;
735 		this.countOpt = 0; // fixed length repetion
736 	}
737 
738 	this(Node sub, size_t countMin, size_t countMax) in {
739 		assert(countMax >= 2);
740 		assert(countMin <= countMax);
741 	} do {
742 		super(sub);
743 		this.countReq = countMin;
744 		this.countOpt = countMax - countMin;
745 	}
746 
747 	override size_t atRaw(in Data input, size_t soff = 0) const {
748 		size_t sum = 0;
749 		size_t off = soff;
750 		/* mandatory */
751 		foreach (ix; 0..countReq) /+ TODO: Reuse std.algorithm instead? +/ {
752 			size_t hit = sub.atRaw(input[off..$]);
753 			if (hit == size_t.max) { return hit; } // if any miss skip
754 			off += hit;
755 			sum += hit;
756 		}
757 		/* optional part */
758 		foreach (ix; countReq..countReq + countOpt) /+ TODO: Reuse std.algorithm instead? +/ {
759 			size_t hit = sub.atRaw(input[off..$]);
760 			if (hit == size_t.max) { break; } // if any miss just break
761 			off += hit;
762 			sum += hit;
763 		}
764 		return sum;
765 	}
766 
767 @property override const nothrow @nogc:
768 	override Bounds bounds() const nothrow @nogc
769 		=> typeof(return)(countReq*sub.bounds.high, (countReq + countOpt)*sub.bounds.high);
770 	bool isFixed() => bounds.low == bounds.high && sub.isFixed;
771 	bool isConstant() => bounds.low == bounds.high && sub.isConstant;
772 	version (none) Lit[] mandatories() => sub.mandatories;
773 	version (none) Lit[] optionals() => sub.optionals;
774 
775 	// invariant { assert(countReq); }
776 	size_t countReq; // Required.
777 	size_t countOpt; // Optional.
778 }
779 
780 auto rep(Args...)(Args args) => new Rep(args); // repetition
781 auto zom(Args...)(Args args) => new Rep(args, 0, size_t.max); // zero or more
782 auto oom(Args...)(Args args) => new Rep(args, 1, size_t.max); // one or more
783 
784 pure nothrow @safe unittest {
785 	auto l = 'a'.lit;
786 
787 	const l5 = l.rep(5);
788 	assert(l5.isConstant);
789 	assert(l5.at(`aaaa`) == size_t.max);
790 	assert(l5.at(`aaaaa`));
791 	assert(l5.at(`aaaaaaa`));
792 	assert(l5.isFixed);
793 	assert(l5.bounds.low == 5);
794 	assert(l5.bounds.high== 5);
795 
796 	const countMin = 2;
797 	const countMax = 3;
798 	const l23 = l.rep(countMin, countMax);
799 	assert(l23.at(`a`) == size_t.max);
800 	assert(l23.at(`aa`) == 2);
801 	assert(l23.at(`aaa`) == 3);
802 	assert(l23.at(`aaaa`) == 3);
803 	assert(!l23.isConstant);
804 	assert(l23.bounds.low == countMin);
805 	assert(l23.bounds.high== countMax);
806 }
807 
808 pure nothrow @safe unittest {
809 	auto l = 'a'.lit;
810 	auto l5 = l.rep(5);
811 	/+ TODO: assert(l5.mandatories == [l]); +/
812 	/+ TODO: assert(l5.optionals == [l]); +/
813 }
814 
815 pure nothrow @safe unittest {
816 	auto l = 'a'.lit;
817 	auto l5 = l.opt.rep(5);
818 	/+ TODO: assert(l5.mandatories == []); +/
819 	/+ TODO: assert(l5.optionals == [l]); +/
820 }
821 
822 final extern(C++) class Ctx : Node { extern(D):
823 	enum Type {
824 		bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\``
825 		beginningOfBlock = bob,
826 		beginningOfFile = bob,
827 		beginningOfString = bob,
828 		eob, /// End	   Of \em Block/Name/File/String. @b Emacs: `\'`
829 		endOfBlock = eob,
830 		endOfFile = eob,
831 		endOfString = eob,
832 
833 		bol,			/// Beginning Of \em Line. @b Emacs: `^`
834 		beginningOfLine = bol,
835 		eol,			/// End	   Of \em Line. @b Emacs: `$`
836 		endOfLine = eol,
837 
838 		bos,			/// Beginning Of \em Symbol. @b Emacs: `\_<`
839 		beginningOfSymbol = bos,
840 		eos,			/// End	   Of \em Symbol. @b Emacs: `\_>`
841 		endOfSymbol = eos,
842 
843 		bow,			/// Beginning Of \em Word. @b Emacs: `\<`
844 		beginningOfWord = bow,
845 		eow,			/// End	   Of \em Word. @b Emacs: `\>`
846 		endOfWord = eow,
847 	}
848 
849 @safe pure nothrow:
850 
851 	this(Type type) { this.type = type; }
852 
853 	override size_t atRaw(in Data input, size_t soff = 0) const
854 	{
855 		assert(soff <= input.length); // include equality because input might be empty and size zero
856 		bool ok = false;
857 		import std.ascii : isAlphaNum/* , newline */;
858 		final switch (type)
859 		{
860 			/* buffer */
861 		case Type.bob: ok = (soff == 0); break;
862 		case Type.eob: ok = (soff == input.length); break;
863 
864 			/* line */
865 		case Type.bol: ok = (soff == 0		  || (input[soff - 1] == 0x0d ||
866 												   input[soff - 1] == 0x0a)); break;
867 		case Type.eol: ok = (soff == input.length || (input[soff	] == 0x0d ||
868 												   input[soff	] == 0x0a)); break;
869 
870 			/* symbol */
871 		case Type.bos: ok = ((soff == 0		 || (!input[soff - 1].isAlphaNum &&
872 													input[soff - 1] != '_')) && /+ TODO: Make '_' language-dependent +/
873 							 (soff < input.length &&  input[soff].isAlphaNum)) ; break;
874 		case Type.eos: ok = ((soff == input.length || (!input[soff].isAlphaNum &&
875 														  input[soff] != '_')) && /+ TODO: Make '_' language-dependent +/
876 							 (soff >= 1		  &&  input[soff - 1].isAlphaNum)) ; break;
877 
878 			/* word */
879 		case Type.bow: ok = ((soff == 0		 || !input[soff - 1].isAlphaNum) &&
880 							 (soff < input.length &&  input[soff].isAlphaNum)) ; break;
881 		case Type.eow: ok = ((soff == input.length || !input[soff].isAlphaNum) &&
882 							 (soff >= 1		  &&  input[soff - 1].isAlphaNum)) ; break;
883 		}
884 		return ok ? 0 : size_t.max;
885 	}
886 @property override const nothrow @nogc:
887 	Bounds bounds() => typeof(return)(0, 0);
888 	bool isFixed() => true;
889 	bool isConstant() => true;
890 	protected Type type;
891 }
892 
893 Ctx bob(Args...)(Args args) => new Ctx(args, Ctx.Type.bob);
894 Ctx eob(Args...)(Args args) => new Ctx(args, Ctx.Type.eob);
895 Ctx bol(Args...)(Args args) => new Ctx(args, Ctx.Type.bol);
896 Ctx eol(Args...)(Args args) => new Ctx(args, Ctx.Type.eol);
897 Ctx bos(Args...)(Args args) => new Ctx(args, Ctx.Type.bos);
898 Ctx eos(Args...)(Args args) => new Ctx(args, Ctx.Type.eos);
899 Ctx bow(Args...)(Args args) => new Ctx(args, Ctx.Type.bow);
900 Ctx eow(Args...)(Args args) => new Ctx(args, Ctx.Type.eow);
901 Seq buf(Args...)(Args args) => seq(bob, args, eob);
902 Seq line(Args...)(Args args) => seq(bol, args, eol);
903 Seq sym(Args...)(Args args) => seq(bos, args, eos);
904 Seq word(Args...)(Args args) => seq(bow, args, eow);
905 
906 pure nothrow @safe unittest {
907 	const bob_ = bob;
908 	const eob_ = eob;
909 	assert(bob_.at(`ab`) == 0);
910 	assert(eob_.at(`ab`, 2) == 0);
911 	assert(bob_.bounds.low == 0);
912 	assert(bob_.bounds.high== 0);
913 
914 	const bol_ = bol;
915 	assert(bol_.at(`ab`) == 0);
916 	assert(bol_.at("a\nb", 2) == 0);
917 	assert(bol_.at("a\nb", 1) == size_t.max);
918 	assert(bol_.bounds.low == 0);
919 	assert(bol_.bounds.high== 0);
920 
921 	const eol_ = eol;
922 	assert(eol_.at(`ab`, 2) == 0);
923 	assert(eol_.at("a\nb", 1) == 0);
924 	assert(eol_.at("a\nb", 2) == size_t.max);
925 	assert(eol_.bounds.low == 0);
926 	assert(eol_.bounds.high== 0);
927 
928 	const bos_ = bos;
929 	const eos_ = eos;
930 	assert(bos_.at(`ab`) == 0);
931 	assert(bos_.at(` ab`) == size_t.max);
932 	assert(eos_.at(`ab`, 2) == 0);
933 	assert(eos_.at(`a_b `, 1) == size_t.max);
934 	assert(eos_.at(`ab `, 2) == 0);
935 	assert(eos_.bounds.low == 0);
936 	assert(eos_.bounds.high== 0);
937 
938 	const bow_ = bow;
939 	const eow_ = eow;
940 	assert(bow_.bounds.low == 0);
941 	assert(bow_.bounds.high== 0);
942 	assert(eow_.bounds.low == 0);
943 	assert(eow_.bounds.high== 0);
944 
945 	assert(bow_.at(`ab`) == 0);
946 	assert(bow_.at(` ab`) == size_t.max);
947 
948 	assert(eow_.at(`ab`, 2) == 0);
949 	assert(eow_.at(`ab `, 0) == size_t.max);
950 
951 	assert(bow_.at(` ab `, 1) == 0);
952 	assert(bow_.at(` ab `, 0) == size_t.max);
953 
954 	assert(eow_.at(` ab `, 3) == 0);
955 	assert(eow_.at(` ab `, 4) == size_t.max);
956 
957 	auto l = lit(`ab`);
958 	auto w = word(l);
959 	assert(w.at(`ab`) == 2);
960 	assert(w.at(`ab_c`) == 2);
961 
962 	auto s = sym(l);
963 	assert(s.at(`ab`) == 2);
964 	assert(s.at(`ab_c`) == size_t.max);
965 
966 	assert(bob_.findAt(`a`) == []);
967 	assert(bob_.findAt(`a`).ptr != null);
968 	assert(eob_.findAt(`a`) == []);
969 
970 	assert(bol_.findAt(`a`) == []);
971 	assert(bol_.findAt(`a`).ptr != null);
972 	assert(eol_.findAt(`a`) == []);
973 
974 	assert(bow_.findAt(`a`) == []);
975 	assert(bow_.findAt(`a`).ptr != null);
976 	assert(eow_.findAt(`a`) == []);
977 
978 	assert(bos_.findAt(`a`) == []);
979 	assert(bos_.findAt(`a`).ptr != null);
980 	assert(eos_.findAt(`a`) == []);
981 	/+ TODO: This fails assert(eos_.findAt(`a`).ptr != null); +/
982 }
983 
984 /** Keyword $(D arg). */
985 Seq kwd(Arg)(Arg arg) => seq(bow, arg, eow);
986 
987 pure nothrow @safe unittest {
988 	const str = `int`;
989 	auto x = str.lit.kwd;
990 
991 	assert(x.at(str, 0));
992 	/* TODO: assert(!x.at(str, 1)); */
993 
994 	assert(x.at(` ` ~ str, 1));
995 	/* TODO: assert(!x.at(` int`, 0)); */
996 }
997 
998 /** Pattern Paired with Prefix and Suffix.
999  */
1000 final extern(C++) class Clause : SPatt1 { extern(D):
1001 @safe pure nothrow:
1002 	this(Node prefix_, Node suffix_, Node sub) {
1003 		super(sub);
1004 		this.prefix = prefix_;
1005 		this.suffix = suffix_;
1006 	}
1007 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @nogc {
1008 		import std.experimental.allocator.mallocator : Mallocator;
1009 		DynamicArray!(Node, Mallocator) enders_;
1010 		enders_.reserve(enders.length == 1);
1011 		() @trusted {
1012 			enders_ ~= cast(Node[])enders;
1013 			enders_ ~= cast()suffix;
1014 		}();
1015 		typeof(return) result = sub.findRawAt(input, soff, enders_[]);
1016 		return result;
1017 	}
1018 	Node prefix, suffix;
1019 }
1020 
1021 Clause paired(Args...)(Node prefix, Node suffix, Args args) => new Clause(prefix, suffix, args);
1022 Clause parend(Args...)(Args args) => new Clause(lit('('), lit(')'), args);
1023 Clause hooked(Args...)(Args args) => new Clause(lit('['), lit(']'), args);
1024 Clause braced(Args...)(Args args) => Clause(lit('{'), lit('}'), args);
1025 
1026 import nxt.assert_ex;
1027 
1028 pure nothrow @safe unittest {
1029 	/* auto p = `[alpha]`.lit.parend; */
1030 	/* assert(p.at(`([alpha])`) == 7); */
1031 
1032 	/* auto h = `[alpha]`.lit.hooked; */
1033 	/* assert(h.at(`[[alpha]]`) == 7); */
1034 
1035 	/* auto b = `[alpha]`.lit.braced; */
1036 	/* assert(b.at(`{[alpha]}`) == 7); */
1037 
1038 	/* auto pb = `[alpha]`.lit.parend; */
1039 }
1040 
1041 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern.
1042 	Example: #!/bin/env rdmd
1043 	See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix)
1044  */
1045 auto ref shebangLine(Node interpreter) @safe pure nothrow {
1046 	return seq(bob,
1047 			   `#!`.lit,
1048 			   `/usr`.lit.opt,
1049 			   `/bin/env`.lit.opt,
1050 			   ws.oom,
1051 			   interpreter);
1052 }
1053 
1054 ///
1055 pure nothrow @safe unittest {
1056 	assert(`rdmd`.lit.shebangLine
1057 				 .at(`#!/bin/env rdmd`) == 15);
1058 	assert(`rdmd`.lit.shebangLine
1059 				 .at(`#!/usr/bin/env rdmd`) == 19);
1060 	auto rgdmd = alt(`rdmd`.lit,
1061 					 `gdmd`.lit);
1062 	assert(rgdmd.shebangLine
1063 				.at(`#!/usr/bin/env rdmd-dev`) == 19);
1064 	assert(rgdmd.shebangLine
1065 				.at(`#!/usr/bin/env gdmd`) == 19);
1066 }