1 /**
2    Patterns.
3 
4    Test: dmd -version=show -preview=dip1000 -preview=in -vcolumns -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d
5    Test: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d
6    Debug: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main pattern.d && lldb pattern
7 
8    Concepts and namings are inspired by regular expressions, symbolic (regular)
9    expressions (Emacs' rx.el package), predicate logic and grammars.
10 
11    String patterns are matched by their raw bytes for now.
12 
13    Copyright: Per Nordlöw 2022-.
14    License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
15    Authors: $(WEB Per Nordlöw)
16 
17    TODO: Merge with lpgen.d Node classes
18    TODO: Extend `Node` classes to represent `xlg.el` read by `LispFileParser`.
19    TODO: Replace `findRawAt(const Data haystack` with `findRawAt(in Data haystack`
20 
21    TODO:
22    Overload operators & (and), ! (not),
23 
24    TODO: Add pattern for expressing inference with `infer`
25    (infer
26 	((x instanceOf X) &
27 	 (X subClassOf Y))
28 	 (x instaceOf Y))
29 
30    TODO:
31    Variables are either
32    - _ (ignore)
33    - _`x`, _`y`, etc
34    - _'x', _'y'
35    - _!0, _!1, ..., _!(n-1)
36 
37    infer(rel!`desire`(_!`x`, _!`y`) &&
38 		 rel!`madeOf`(_!`z`, _!`y`),
39 		 rel!`desire`(_!`x`, _!`z`))
40 
41    TODO: Support variables of specific types and inference using predicate logic:
42 		infer(and(fact(var!'x', rel'desire', var!'y'),
43 				  fact(var!'z', opt(rel'madeOf',
44 							  rel'instanceOf'), var!'y'))),
45 			  pred(var!'x', rel'desire', var!'z'
46 			  ))
47 
48    TODO: Make returns from factory functions immutable.
49    TODO: Reuse return patterns from Lit
50 
51    TODO
52    const s = seq(`al.`.lit,`pha`.lit);
53    const t = `al`.lit ~ `pha`.lit;
54    assert(s !is t);
55    assert(equal(s, t));
56 
57  */
58 module nxt.pattern;
59 
60 import std.algorithm : find, all, map, min, max, joiner;
61 import std.range : empty;
62 import std.array : array;
63 import std.string : representation;
64 import std.traits : isSomeString;
65 import nxt.find_ex : findAcronymAt, FindContext;
66 import nxt.debugio;
67 import nxt.container.dynamic_array;
68 
69 /++ String Substitution. +/
70 struct Substitution {
71 	string src;
72 	string dst;
73 }
74 
75 @safe:
76 
77 /++ Untyped data.
78  +/
79 alias Data = ubyte[];
80 
81 /++ Matching `input` with `node`.
82 	Compatibility with `std.regex.matchFirst`.
83  +/
84 inout(char)[] matchFirst(scope return /+ref+/ inout(char)[] input, const Node node) pure nothrow /+@nogc+/ {
85 	return input.match(node);
86 }
87 
88 ///
89 @safe pure unittest {
90 	const x = "a";
91 	assert(x.matchFirst(lit("b")).length == 0);
92 	assert(x.matchFirst(lit("b")) is []);
93 }
94 
95 ///
96 @safe pure unittest {
97 	const x = "ab";
98 	assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]);
99 	assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]);
100 }
101 
102 /** Match `input` with `node`.
103 	Returns: Matched slice or `[]` if not match.
104  */
105 auto ref matchFirst(in return /+ref+/ Data input, const Node node) pure nothrow /+@nogc+/ {
106 	return node.findRawAt(input, 0);
107 }
108 
109 ///
110 @safe pure unittest {
111 	/+const+/ Data x = [1,2];
112 	assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]);
113 	assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]);
114 }
115 
116 inout(char)[] match(scope return /+ref+/ inout(char)[] input, const Node node) @trusted pure nothrow /+@nogc+/ {
117 	return cast(typeof(return))input.representation.matchFirst(node);
118 }
119 
120 /++ Pattern (length) bounds. +/
121 struct Bounds {
122 	static immutable inf = size_t.max;
123 	size_t low; ///< Smallest length possible.
124 	size_t high; ///< Largest length possible.
125 }
126 
127 /** Base Pattern.
128  */
129 abstract extern(C++) class Node { extern(D):
130 @safe pure nothrow:
131 
132 	Seq opBinary(string op)(Node rhs) if (op == `~`) => opCatImpl(rhs); // template can't be overridden
133 	Alt opBinary(string op)(Node rhs) if (op == `|`) => opAltImpl(rhs); // template can't be overridden
134 
135 	protected Seq opCatImpl(Node rhs) => seq(this, rhs); /+ TODO: check if this and rhs is Seq +/
136 	protected Alt opAltImpl(Node rhs) => alt(this, rhs);  /+ TODO: check if this and rhs is Alt +/
137 
138 	final size_t at(const scope string input, size_t soff = 0) const
139 	/+ TODO: Activate this +/
140 	/* out (hit) { */
141 	/*	 assert((!hit) || hit >= bounds.min); /+ TODO: Is this needed? +/ */
142 	/* } */
143 	/* do */
144 	{
145 		return atRaw(input.representation, soff);
146 	}
147 
148 	abstract size_t atRaw(in Data input, size_t soff = 0) const @nogc;
149 
150 	/** Find $(D this) in String `input` at offset `soff`. */
151 	final const(Data) findAt(const return scope string input, size_t soff = 0) const {
152 		return findRawAt(input.representation, soff, []); /+ TODO: this is ugly +/
153 	}
154 
155 	/** Find $(D this) in Raw Bytes `input` at offset `soff`. */
156 	const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const @nogc {
157 		auto i = soff;
158 		while (i < input.length) { // while bytes left at i
159 			if (input.length - i < bounds.low)  // and bytes left to find pattern
160 				return [];
161 			const hit = atRaw(input, i);
162 			if (hit != size_t.max) // hit at i
163 				return input[i..i + hit];
164 			i++;
165 		}
166 		return [];
167 	}
168 
169 @property @nogc:
170 	abstract Bounds bounds() const;
171 
172 	abstract bool isFixed() const; /// Returns: true if all possible instances have same length.
173 	abstract bool isConstant() const; /// Returns: true if all possible instances have same length.
174 
175 	final bool isVariable() => !isConstant;
176 	const(Data) tryGetConstant() const => []; /// Returns: data if literal otherwise empty array.
177 
178 	/** Get All Literals that must match a given source $(D X) in order for $(D
179 		this) to match $(D X) somewhere.
180 	*/
181 	version (none) abstract Lit[] mandatories();
182 
183 	/** Get Optional Literals that may match a given source $(D X) if $(D this)
184 		matches $(D X) somewhere.
185 	*/
186 	version (none) Lit[] optionals() => mandatories;
187 
188 	protected Node _parent; /// Parenting (Super) Pattern.
189 }
190 
191 /** Literal Pattern with Cached Binary Byte Histogram.
192  */
193 final extern(C++) class Lit : Node { extern(D):
194 @safe pure nothrow:
195 
196 	this(string bytes_) in(!bytes_.empty) {
197 		this(bytes_.representation);
198 	}
199 	this(ubyte ch) {
200 		this._bytes ~= ch;
201 	}
202 	this(Data bytes_) /+in(!bytes_.empty)+/ {
203 		this._bytes = bytes_;
204 	}
205 	this(immutable Data bytes_) /+in(!bytes_.empty)+/ {
206 		this._bytes = bytes_.dup;
207 	}
208 
209 	override size_t atRaw(in Data input, size_t soff = 0) const {
210 		const l = _bytes.length;
211 		return (soff + l <= input.length && // fits in input and
212 				_bytes[] == input[soff..soff + l]) ? l : size_t.max; // same contents
213 	}
214 
215 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @trusted {
216 		const result = input[soff..$].find(_bytes); // Uses std.algorithm.find
217 		/+ IMHO, find should return null on miss for non-empty needle (`_bytes`)
218 		   so adjust so that it does. +/
219 		return result.length != 0 ? result : [];
220 	}
221 
222 @property nothrow @nogc:
223 	auto ref bytes() const => _bytes;
224 	private Data _bytes;
225 	alias _bytes this;
226 override:
227 	Bounds bounds() const => Bounds(_bytes.length, _bytes.length);
228 	bool isFixed() const => true;
229 	bool isConstant() const => true;
230 	const(Data) tryGetConstant() const => _bytes;
231 	version (none) Lit[] mandatories() => [this];
232 }
233 
234 /++ Literal. +/
235 auto lit(Args...)(Args args) @safe pure nothrow => new Lit(args); // instantiator
236 
237 /++ Full|Exact|Complete (Anchored) literal. +/
238 auto full(Args...)(Args args) @safe pure nothrow => seq(bob, lit(args), eob); // instantiator
239 
240 pure nothrow @safe unittest {
241 	const _ = lit(Data.init);
242 	const ab = lit(`ab`);
243 	assert(`ab`.match(ab));
244 	version (none) assert(!ab.match(`_`)); // TODO: enable
245 }
246 
247 pure nothrow @safe unittest {
248 	immutable ab = `ab`;
249 	assert(lit('b').at(`ab`, 1) == 1);
250 	const a = lit('a');
251 
252 	const ac = lit(`ac`);
253 	assert(`ac`.match(ac));
254 	assert(`ac`.match(ac).length == 2);
255  	assert(`ca`.match(ac) == []);
256 
257 	assert(a.isFixed);
258 	assert(a.isConstant);
259 	assert(a.at(ab) == 1);
260 	assert(lit(ab).at(`a`) == size_t.max);
261 	assert(lit(ab).at(`b`) == size_t.max);
262 
263 	assert(a.findAt(`cba`) == cast(immutable Data)`a`);
264 	assert(a.findAt(`ba`) == cast(immutable Data)`a`);
265 	assert(a.findAt(`a`) == cast(immutable Data)`a`);
266 	assert(a.findAt(``) == []);
267 	assert(a.findAt(`b`) == []);
268 	assert(ac.findAt(`__ac`) == cast(immutable Data)`ac`);
269 	assert(a.findAt(`b`).length == 0);
270 
271 	auto xyz = lit(`xyz`);
272 }
273 
274 pure nothrow @safe unittest {
275 	auto ac = lit(`ac`);
276 	/+ TODO: assert(ac.mandatories == [ac]); +/
277 	/+ TODO: assert(ac.optionals == [ac]); +/
278 }
279 
280 /** Word/Symbol Acronym Pattern.
281  */
282 version (none) // TODO: enable if and when I need this
283 final extern(C++) class Acronym : Node { extern(D):
284 @safe pure nothrow:
285 
286 	this(string bytes_, FindContext ctx = FindContext.inSymbol) in(!bytes_.empty) {
287 		this(bytes_.representation, ctx);
288 	}
289 
290 	this(ubyte ch) { this._acros ~= ch; }
291 
292 	this(Data bytes_, FindContext ctx = FindContext.inSymbol) {
293 		this._acros = bytes_;
294 		this._ctx = ctx;
295 	}
296 
297 	this(immutable Data bytes_, FindContext ctx = FindContext.inSymbol) {
298 		this._acros = bytes_.dup;
299 		this._ctx = ctx;
300 	}
301 
302 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
303 		// scope auto offs = new size_t[_acros.length]; // hit offsets
304 		size_t a = 0;		 // acronym index
305 		foreach(s, ub; input[soff..$]) { // for each element in source
306 			import std.ascii: isAlpha;
307 
308 			// Check context
309 			final switch (_ctx) {
310 			case FindContext.inWord:
311 			case FindContext.asWord:
312 				if (!ub.isAlpha)
313 					return size_t.max;
314 				break;
315 			case FindContext.inSymbol:
316 			case FindContext.asSymbol:
317 				if (!ub.isAlpha && ub != '_')
318 					return size_t.max;
319 				break;
320 			}
321 
322 			if (_acros[a] == ub) {
323 				// offs[a] = s + soff; // store hit offset
324 				a++; // advance acronym
325 				if (a == _acros.length) { // if complete acronym found
326 					return s + 1;			 // return its length
327 				}
328 			}
329 		}
330 		return size_t.max; // no hit
331 	}
332 
333 	template Tuple(E...) { alias Tuple = E; }
334 
335 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow {
336 		import std.string: CaseSensitive;
337 		return input.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0];
338 	}
339 
340 @property:
341 	override size_t bounds() const nothrow @nogc => typeof(return)(_acros.length, size_t.max);
342 	override bool isFixed() const nothrow @nogc => false;
343 	override bool isConstant() const nothrow @nogc => false;
344 
345 private:
346 	Data _acros;
347 	FindContext _ctx;
348 }
349 
350 version (none)
351 @safe pure nothrow {
352 	auto inwac(Args...)(Args args) => new Acronym(args, FindContext.inWord); // word acronym
353 	auto insac(Args...)(Args args) => new Acronym(args, FindContext.inSymbol); // symbol acronym
354 	auto aswac(Args...)(Args args) => new Acronym(args, FindContext.asWord); // word acronym
355 	auto assac(Args...)(Args args) => new Acronym(args, FindContext.asSymbol); // symbol acronym
356 }
357 
358 version (none)
359 pure nothrow @safe unittest {
360 	assert(inwac(`a`).at(`a`) == 1);
361 	assert(inwac(`ab`).at(`ab`) == 2);
362 	assert(inwac(`ab`).at(`a`) == size_t.max);
363 	assert(inwac(`abc`).at(`abc`) == 3);
364 	assert(inwac(`abc`).at(`aabbcc`) == 5);
365 	assert(inwac(`abc`).at(`aaaabbcc`) == 7);
366 	assert(inwac(`fpn`).at(`fopen`) == 5);
367 }
368 
369 /** Any Byte.
370  */
371 final extern(C++) class Any : Node { extern(D):
372 @safe pure nothrow:
373 	this() {}
374 
375 	override size_t atRaw(in Data input, size_t soff = 0) const => soff < input.length ? 1 : size_t.max;
376 
377 @property:
378 	override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1);
379 	override bool isFixed() const nothrow @nogc => true;
380 	override bool isConstant() const nothrow @nogc => false;
381 
382 	version (none) override Lit[] mandatories() nothrow => [];
383 	version (none) override Lit[] optionals() nothrow {
384 		import std.range: iota;
385 		return iota(0, 256).map!(n => (cast(ubyte)n).lit).array;
386 	}
387 }
388 
389 auto any(Args...)(Args args) => new Any(args); // instantiator
390 
391 /** Abstract Super Pattern.
392  */
393 abstract extern(C++) class SPatt : Node { extern(D):
394 @safe pure nothrow:
395 
396 	this(Node[] subs_) { this._subs = subs_; }
397 	this(Args...)(Args subs_) {
398 		foreach (sub; subs_) {
399 			alias Sub = typeof(sub);
400 			/+ TODO: functionize to patternFromBuiltinType() or to!Node +/
401 			static if (is(Sub == string) ||
402 					   is(Sub == char)) {
403 				_subs ~= new Lit(sub);
404 			} else
405 				_subs ~= sub;
406 			sub._parent = this;
407 		}
408 	}
409 
410 	protected Node[] _subs;
411 }
412 
413 /** Sequence of Patterns.
414  */
415 final extern(C++) class Seq : SPatt { extern(D):
416 @safe pure nothrow:
417 
418 	this(Node[] subs_) { super(subs_); }
419 	this(Args...)(Args subs_) { super(subs_); }
420 
421 	@property auto ref inout (Node[]) elms() inout @nogc => super._subs;
422 
423 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
424 		assert(!elms.empty); /+ TODO: Move to in contract? +/
425 		const c = tryGetConstant;
426 		if (!c.empty) {
427 			return (soff + c.length <= input.length &&   // if equal size and
428 					c[] == input[soff..soff + c.length]); // equal contents
429 		}
430 		size_t sum = 0;
431 		size_t off = soff;
432 		foreach (ix, sub; elms) { /+ TODO: Reuse std.algorithm instead? +/
433 			size_t hit = sub.atRaw(input, off);
434 			if (hit == size_t.max) { sum = hit; break; } // if any miss skip
435 			sum += hit;
436 			off += hit;
437 		}
438 		return sum;
439 	}
440 
441 @property:
442 	override Bounds bounds() const nothrow @nogc {
443 		typeof(return) result;
444 		foreach (const ref sub; _subs) {
445 			if (sub.bounds.low != size_t.max)
446 				result.low += sub.bounds.low; // TODO: catch overflow
447 			if (sub.bounds.high != size_t.max)
448 				result.high += sub.bounds.high; // TODO: catch overflow
449 		}
450 		return result;
451 	}
452 	override bool isFixed() const nothrow @nogc => _subs.all!(a => a.isFixed);
453 	override bool isConstant() const nothrow @nogc => _subs.all!(a => a.isConstant);
454 	override const(Data) tryGetConstant() const @nogc => [];
455 	version (none) override Lit[] mandatories() nothrow => _subs.map!(node => node.mandatories).joiner.array;
456 	version (none) override Lit[] optionals() nothrow => _subs.map!(node => node.optionals).joiner.array;
457 }
458 
459 auto seq(Args...)(Args args) @safe pure nothrow => new Seq(args); // instantiator
460 
461 pure nothrow @safe unittest {
462 	immutable input = `alpha`;
463 
464 	const s = seq(`al.`.lit,
465 				  `pha`.lit);
466 	assert(s.isFixed);
467 	assert(s.isConstant);
468 	assert(s.at(input)); // TODO: this should fail
469 	assert(s.bounds.low == 6);
470 	assert(s.bounds.high== 6);
471 
472 	const t = `al`.lit ~ `pha`.lit;
473 
474 	assert(s !is t);
475 
476 	const al = seq(`a`.lit, `l`.lit);
477 	assert(al.at(`a`) == size_t.max); // `al not found in `a`
478 }
479 
480 @trusted pure nothrow unittest {
481 	auto a = `aa.`.lit;
482 	auto b = `bb.`.lit;
483 	auto c = `cc.`.lit;
484 	auto d = `dd.`.lit;
485 	auto s = seq(a.opt, b,
486 				 c.opt, d);
487 	assert(!s.isFixed);
488 	assert(!s.isConstant);
489 	assert(s.bounds.low == b.length + d.length);
490 	assert(s.bounds.high== a.length + b.length + c.length + d.length);
491 	/+ TODO: assert(equal(s.mandatories, [b, d])); +/
492 	/+ TODO: assert(equal(s.optionals, [a, b, c, d])); +/
493 }
494 
495 /** Alternative of Patterns in $(D ALTS).
496  */
497 final extern(C++) class Alt : SPatt { extern(D):
498 @safe pure nothrow:
499 
500 	this(Node[] subs_) { super(subs_); }
501 	this(Args...)(Args subs_) { super(subs_); }
502 
503 	size_t atIx(const scope string input, size_t soff, out size_t alt_hix) const {
504 		return atRaw(input.representation, soff, alt_hix);
505 	}
506 
507 	void opOpAssign(string s : "~")(Node sub) {
508 		import nxt.algorithm.searching : canFind;
509 		if (!_subs.canFind(sub))
510 			super._subs ~= sub;
511 	}
512 
513 	@property inout(Node[]) alts() inout @nogc => super._subs;
514 
515 	/** Get Length of hit at index soff in input or size_t.max if none.
516 	 */
517 	size_t atRaw(in Data input, size_t soff, out size_t alt_hix) const @nogc {
518 		assert(!alts.empty);	/+ TODO: Move to in contract? +/
519 		size_t hit = 0;
520 		size_t off = soff;
521 		foreach (ix, sub; alts) { /+ TODO: Reuse std.algorithm instead? +/
522 			hit = sub.atRaw(input[off..$]);					 // match alternative
523 			if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done
524 		}
525 		return hit;
526 	}
527 
528 	override size_t atRaw(in Data input, size_t soff = 0) const @nogc {
529 		size_t alt_hix;
530 		return atRaw(input, soff, alt_hix);
531 	}
532 
533 	/** Find $(D this) in `input` at offset `soff`. */
534 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow {
535 		assert(!alts.empty);	/+ TODO: Move to in contract? +/
536 		switch (alts.length) {
537 			case 1:
538 				const a0 = alts[0].tryGetConstant;
539 				if (!a0.empty) {
540 					auto hit = input[soff..$].find(a0); // Use: second argument to return alt_hix
541 					return hit.length != 0 ? hit : [];
542 				} else
543 					return alts[0].findRawAt(input, soff, enders); // recurse to it
544 			case 2:
545 				const a0 = alts[0].tryGetConstant;
546 				const a1 = alts[1].tryGetConstant;
547 				if (!a0.empty &&
548 					!a1.empty) {
549 					auto hit = input[soff..$].find(a0, a1); // Use: second argument to return alt_hix
550 					return hit[0];
551 				}
552 				break;
553 			case 3:
554 				const a0 = alts[0].tryGetConstant;
555 				const a1 = alts[1].tryGetConstant;
556 				const a2 = alts[2].tryGetConstant;
557 				if (!a0.empty &&
558 					!a1.empty &&
559 					!a2.empty) {
560 					auto hit = input[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix
561 					return hit[0];
562 				}
563 				break;
564 			case 4:
565 				const a0 = alts[0].tryGetConstant;
566 				const a1 = alts[1].tryGetConstant;
567 				const a2 = alts[2].tryGetConstant;
568 				const a3 = alts[3].tryGetConstant;
569 				if (!a0.empty &&
570 					!a1.empty &&
571 					!a2.empty &&
572 					!a3.empty) {
573 					auto hit = input[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix
574 					return hit[0];
575 				}
576 				break;
577 			case 5:
578 				const a0 = alts[0].tryGetConstant;
579 				const a1 = alts[1].tryGetConstant;
580 				const a2 = alts[2].tryGetConstant;
581 				const a3 = alts[3].tryGetConstant;
582 				const a4 = alts[4].tryGetConstant;
583 				if (!a0.empty &&
584 					!a1.empty &&
585 					!a2.empty &&
586 					!a3.empty &&
587 					!a4.empty) {
588 					auto hit = input[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix
589 					return hit[0];
590 				}
591 				break;
592 			default:
593 				break;
594 		}
595 		return super.findRawAt(input, soff, enders); // revert to base case
596 	}
597 
598 @property:
599 	override Bounds bounds() const nothrow @nogc {
600 		auto result = typeof(return)(size_t.max, size_t.min);
601 		foreach (const ref sub; _subs) {
602 			result.low = min(result.low, sub.bounds.low);
603 			result.high = max(result.high, sub.bounds.high);
604 		}
605 		return result;
606 	}
607 	override bool isFixed() const nothrow @nogc {
608 		/+ TODO: Merge these loops using tuple algorithm. +/
609 		auto mins = _subs.map!(a => a.bounds.low);
610 		auto maxs = _subs.map!(a => a.bounds.high);
611 		import nxt.predicates: allEqual;
612 		return (mins.allEqual && maxs.allEqual);
613 	}
614 	override bool isConstant() const nothrow @nogc {
615 		if (_subs.length == 0)
616 			return true;
617 		else if (_subs.length == 1) {
618 			import std.range: front;
619 			return _subs.front.isConstant;
620 		} else
621 			return false;	   /+ TODO: Maybe handle case when _subs are different. +/
622 	}
623 }
624 
625 auto alt(Args...)(Args args) @safe pure nothrow => new Alt(args); // instantiator
626 
627 pure nothrow @safe unittest {
628 	immutable a_b = alt(`a`.lit,
629 						`b`.lit);
630 
631 	immutable a__b = (`a`.lit |
632 					  `b`.lit);
633 
634 	assert(a_b.isFixed);
635 	assert(!a_b.isConstant);
636 	assert(a_b.at(`a`));
637 	assert(a_b.at(`b`));
638 	assert(a_b.at(`c`) == size_t.max);
639 
640 	size_t hix = size_t.max;
641 	a_b.atIx(`a`, 0, hix); assert(hix == 0);
642 	a_b.atIx(`b`, 0, hix); assert(hix == 1);
643 
644 	/* assert(alt.at(`a`) == size_t.max); */
645 	/* assert(alt.at(``) == size_t.max); */
646 
647 	immutable a = alt(lit(`a`));
648 	immutable aa = alt(lit(`aa`));
649 	assert(aa.isConstant);
650 
651 	immutable aa_bb = alt(lit(`aa`),
652 						  lit(`bb`));
653 	assert(aa_bb.isFixed);
654 	assert(aa_bb.bounds.low == 2);
655 	assert(aa_bb.bounds.high== 2);
656 
657 	immutable a_bb = alt(lit(`a`),
658 						 lit(`bb`));
659 	assert(!a_bb.isFixed);
660 	assert(a_bb.bounds.low == 1);
661 	assert(a_bb.bounds.high== 2);
662 
663 	const string _aa = `_aa`;
664 	assert(aa_bb.findAt(_aa) == cast(immutable Data)`aa`);
665 	assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable Data)_aa)[0] == 1);
666 
667 	const string _bb = `_bb`;
668 	assert(aa_bb.findAt(_bb) == cast(immutable Data)`bb`);
669 	assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable Data)_bb)[0] == 1);
670 
671 	assert(a.findAt(`b`) == []);
672 	assert(aa.findAt(`cc`) == []);
673 	assert(aa_bb.findAt(`cc`) == []);
674 	assert(aa_bb.findAt(``) == []);
675 }
676 
677 pure nothrow @safe unittest {
678 	auto a_b = alt(`a`.lit);
679 	a_b ~= `b`.lit;
680 
681 	assert(a_b.isFixed);
682 	assert(!a_b.isConstant);
683 	assert(a_b.at(`a`));
684 	assert(a_b.at(`b`));
685 	assert(a_b.at(`c`) == size_t.max);
686 
687 	size_t hix = size_t.max;
688 	a_b.atIx(`a`, 0, hix); assert(hix == 0);
689 	a_b.atIx(`b`, 0, hix); assert(hix == 1);
690 }
691 
692 final extern(C++) class Space : Node { extern(D):
693 @safe pure nothrow @nogc:
694 override:
695 	size_t atRaw(in Data input, size_t soff = 0) const {
696 		import std.ascii: isWhite;
697 		return soff < input.length && isWhite(input[soff]) ? 1 : size_t.max;
698 	}
699 @property const:
700 	override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1);
701 	bool isFixed() => true;
702 	bool isConstant() => false;
703 }
704 
705 auto ws() @safe pure nothrow => new Space(); // instantiator
706 
707 pure nothrow @safe unittest {
708 	assert(ws.at(` `) == 1);
709 	assert(ws.at("\t") == 1);
710 	assert(ws.at("\n") == 1);
711 }
712 
713 /** Abstract Singleton Super Pattern.
714  */
715 abstract extern(C++) class SPatt1 : Node { extern(D):
716 @safe pure nothrow:
717 	this(Node sub) {
718 		this.sub = sub;
719 		sub._parent = this;
720 	}
721 	protected Node sub;
722 }
723 
724 /** Optional Sub Pattern $(D count) times.
725  */
726 final extern(C++) class Opt : SPatt1 { extern(D):
727 @safe pure nothrow:
728 	this(Node sub) { super(sub); }
729 override:
730 	size_t atRaw(in Data input, size_t soff = 0) const {
731 		assert(soff <= input.length); // include equality because input might be empty and size zero
732 		const hit = sub.atRaw(input[soff..$]);
733 		return hit == size_t.max ? 0 : hit;
734 	}
735 @property const nothrow @nogc:
736 	override Bounds bounds() const nothrow @nogc => typeof(return)(0, sub.bounds.high);
737 	bool isFixed() => false;
738 	bool isConstant() => false;
739 	version (none) Lit[] mandatories() nothrow => [];
740 	version (none) Lit[] optionals() nothrow => sub.optionals;
741 }
742 
743 auto opt(Args...)(Args args) => new Opt(args); // optional
744 
745 pure nothrow @safe unittest {
746 	assert(`a`.lit.opt.at(`b`) == 0);
747 	assert(`a`.lit.opt.at(`a`) == 1);
748 }
749 
750 /** Repetition Sub Pattern $(D count) times.
751  */
752 final extern(C++) class Rep : SPatt1 { extern(D):
753 @safe pure nothrow:
754 
755 	this(Node sub, size_t count) in(count >= 2) {
756 		super(sub);
757 		this.countReq = count;
758 		this.countOpt = 0; // fixed length repetion
759 	}
760 
761 	this(Node sub, size_t countMin, size_t countMax) in {
762 		assert(countMax >= 2);
763 		assert(countMin <= countMax);
764 	} do {
765 		super(sub);
766 		this.countReq = countMin;
767 		this.countOpt = countMax - countMin;
768 	}
769 
770 	override size_t atRaw(in Data input, size_t soff = 0) const {
771 		size_t sum = 0;
772 		size_t off = soff;
773 		/* mandatory */
774 		foreach (ix; 0..countReq) /+ TODO: Reuse std.algorithm instead? +/ {
775 			size_t hit = sub.atRaw(input[off..$]);
776 			if (hit == size_t.max) { return hit; } // if any miss skip
777 			off += hit;
778 			sum += hit;
779 		}
780 		/* optional part */
781 		foreach (ix; countReq..countReq + countOpt) /+ TODO: Reuse std.algorithm instead? +/ {
782 			size_t hit = sub.atRaw(input[off..$]);
783 			if (hit == size_t.max) { break; } // if any miss just break
784 			off += hit;
785 			sum += hit;
786 		}
787 		return sum;
788 	}
789 
790 @property override const nothrow @nogc:
791 	override Bounds bounds() const nothrow @nogc
792 		=> typeof(return)(countReq*sub.bounds.high, (countReq + countOpt)*sub.bounds.high);
793 	bool isFixed() => bounds.low == bounds.high && sub.isFixed;
794 	bool isConstant() => bounds.low == bounds.high && sub.isConstant;
795 	version (none) Lit[] mandatories() => sub.mandatories;
796 	version (none) Lit[] optionals() => sub.optionals;
797 
798 	// invariant { assert(countReq); }
799 	size_t countReq; // Required.
800 	size_t countOpt; // Optional.
801 }
802 
803 auto rep(Args...)(Args args) => new Rep(args); // repetition
804 auto zom(Args...)(Args args) => new Rep(args, 0, size_t.max); // zero or more
805 auto oom(Args...)(Args args) => new Rep(args, 1, size_t.max); // one or more
806 
807 pure nothrow @safe unittest {
808 	auto l = 'a'.lit;
809 
810 	const l5 = l.rep(5);
811 	assert(l5.isConstant);
812 	assert(l5.at(`aaaa`) == size_t.max);
813 	assert(l5.at(`aaaaa`));
814 	assert(l5.at(`aaaaaaa`));
815 	assert(l5.isFixed);
816 	assert(l5.bounds.low == 5);
817 	assert(l5.bounds.high== 5);
818 
819 	const countMin = 2;
820 	const countMax = 3;
821 	const l23 = l.rep(countMin, countMax);
822 	assert(l23.at(`a`) == size_t.max);
823 	assert(l23.at(`aa`) == 2);
824 	assert(l23.at(`aaa`) == 3);
825 	assert(l23.at(`aaaa`) == 3);
826 	assert(!l23.isConstant);
827 	assert(l23.bounds.low == countMin);
828 	assert(l23.bounds.high== countMax);
829 }
830 
831 pure nothrow @safe unittest {
832 	auto l = 'a'.lit;
833 	auto l5 = l.rep(5);
834 	/+ TODO: assert(l5.mandatories == [l]); +/
835 	/+ TODO: assert(l5.optionals == [l]); +/
836 }
837 
838 pure nothrow @safe unittest {
839 	auto l = 'a'.lit;
840 	auto l5 = l.opt.rep(5);
841 	/+ TODO: assert(l5.mandatories == []); +/
842 	/+ TODO: assert(l5.optionals == [l]); +/
843 }
844 
845 final extern(C++) class Ctx : Node { extern(D):
846 	enum Type {
847 		bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\``
848 		beginningOfBlock = bob,
849 		beginningOfFile = bob,
850 		beginningOfString = bob,
851 		eob, /// End	   Of \em Block/Name/File/String. @b Emacs: `\'`
852 		endOfBlock = eob,
853 		endOfFile = eob,
854 		endOfString = eob,
855 
856 		bol,			/// Beginning Of \em Line. @b Emacs: `^`
857 		beginningOfLine = bol,
858 		eol,			/// End	   Of \em Line. @b Emacs: `$`
859 		endOfLine = eol,
860 
861 		bos,			/// Beginning Of \em Symbol. @b Emacs: `\_<`
862 		beginningOfSymbol = bos,
863 		eos,			/// End	   Of \em Symbol. @b Emacs: `\_>`
864 		endOfSymbol = eos,
865 
866 		bow,			/// Beginning Of \em Word. @b Emacs: `\<`
867 		beginningOfWord = bow,
868 		eow,			/// End	   Of \em Word. @b Emacs: `\>`
869 		endOfWord = eow,
870 	}
871 
872 @safe pure nothrow:
873 
874 	this(Type type) { this.type = type; }
875 
876 	override size_t atRaw(in Data input, size_t soff = 0) const
877 	{
878 		assert(soff <= input.length); // include equality because input might be empty and size zero
879 		bool ok = false;
880 		import std.ascii : isAlphaNum/* , newline */;
881 		final switch (type)
882 		{
883 			/* buffer */
884 		case Type.bob: ok = (soff == 0); break;
885 		case Type.eob: ok = (soff == input.length); break;
886 
887 			/* line */
888 		case Type.bol: ok = (soff == 0		  || (input[soff - 1] == 0x0d ||
889 												   input[soff - 1] == 0x0a)); break;
890 		case Type.eol: ok = (soff == input.length || (input[soff	] == 0x0d ||
891 												   input[soff	] == 0x0a)); break;
892 
893 			/* symbol */
894 		case Type.bos: ok = ((soff == 0		 || (!input[soff - 1].isAlphaNum &&
895 													input[soff - 1] != '_')) && /+ TODO: Make '_' language-dependent +/
896 							 (soff < input.length &&  input[soff].isAlphaNum)) ; break;
897 		case Type.eos: ok = ((soff == input.length || (!input[soff].isAlphaNum &&
898 														  input[soff] != '_')) && /+ TODO: Make '_' language-dependent +/
899 							 (soff >= 1		  &&  input[soff - 1].isAlphaNum)) ; break;
900 
901 			/* word */
902 		case Type.bow: ok = ((soff == 0		 || !input[soff - 1].isAlphaNum) &&
903 							 (soff < input.length &&  input[soff].isAlphaNum)) ; break;
904 		case Type.eow: ok = ((soff == input.length || !input[soff].isAlphaNum) &&
905 							 (soff >= 1		  &&  input[soff - 1].isAlphaNum)) ; break;
906 		}
907 		return ok ? 0 : size_t.max;
908 	}
909 @property override const nothrow @nogc:
910 	Bounds bounds() => typeof(return)(0, 0);
911 	bool isFixed() => true;
912 	bool isConstant() => true;
913 	protected Type type;
914 }
915 
916 Ctx bob(Args...)(Args args) => new Ctx(args, Ctx.Type.bob);
917 Ctx eob(Args...)(Args args) => new Ctx(args, Ctx.Type.eob);
918 Ctx bol(Args...)(Args args) => new Ctx(args, Ctx.Type.bol);
919 Ctx eol(Args...)(Args args) => new Ctx(args, Ctx.Type.eol);
920 Ctx bos(Args...)(Args args) => new Ctx(args, Ctx.Type.bos);
921 Ctx eos(Args...)(Args args) => new Ctx(args, Ctx.Type.eos);
922 Ctx bow(Args...)(Args args) => new Ctx(args, Ctx.Type.bow);
923 Ctx eow(Args...)(Args args) => new Ctx(args, Ctx.Type.eow);
924 Seq buf(Args...)(Args args) => seq(bob, args, eob);
925 Seq line(Args...)(Args args) => seq(bol, args, eol);
926 Seq sym(Args...)(Args args) => seq(bos, args, eos);
927 Seq word(Args...)(Args args) => seq(bow, args, eow);
928 
929 pure nothrow @safe unittest {
930 	const bob_ = bob;
931 	const eob_ = eob;
932 	assert(bob_.at(`ab`) == 0);
933 	assert(eob_.at(`ab`, 2) == 0);
934 	assert(bob_.bounds.low == 0);
935 	assert(bob_.bounds.high== 0);
936 
937 	const bol_ = bol;
938 	assert(bol_.at(`ab`) == 0);
939 	assert(bol_.at("a\nb", 2) == 0);
940 	assert(bol_.at("a\nb", 1) == size_t.max);
941 	assert(bol_.bounds.low == 0);
942 	assert(bol_.bounds.high== 0);
943 
944 	const eol_ = eol;
945 	assert(eol_.at(`ab`, 2) == 0);
946 	assert(eol_.at("a\nb", 1) == 0);
947 	assert(eol_.at("a\nb", 2) == size_t.max);
948 	assert(eol_.bounds.low == 0);
949 	assert(eol_.bounds.high== 0);
950 
951 	const bos_ = bos;
952 	const eos_ = eos;
953 	assert(bos_.at(`ab`) == 0);
954 	assert(bos_.at(` ab`) == size_t.max);
955 	assert(eos_.at(`ab`, 2) == 0);
956 	assert(eos_.at(`a_b `, 1) == size_t.max);
957 	assert(eos_.at(`ab `, 2) == 0);
958 	assert(eos_.bounds.low == 0);
959 	assert(eos_.bounds.high== 0);
960 
961 	const bow_ = bow;
962 	const eow_ = eow;
963 	assert(bow_.bounds.low == 0);
964 	assert(bow_.bounds.high== 0);
965 	assert(eow_.bounds.low == 0);
966 	assert(eow_.bounds.high== 0);
967 
968 	assert(bow_.at(`ab`) == 0);
969 	assert(bow_.at(` ab`) == size_t.max);
970 
971 	assert(eow_.at(`ab`, 2) == 0);
972 	assert(eow_.at(`ab `, 0) == size_t.max);
973 
974 	assert(bow_.at(` ab `, 1) == 0);
975 	assert(bow_.at(` ab `, 0) == size_t.max);
976 
977 	assert(eow_.at(` ab `, 3) == 0);
978 	assert(eow_.at(` ab `, 4) == size_t.max);
979 
980 	auto l = lit(`ab`);
981 	auto w = word(l);
982 	assert(w.at(`ab`) == 2);
983 	assert(w.at(`ab_c`) == 2);
984 
985 	auto s = sym(l);
986 	assert(s.at(`ab`) == 2);
987 	assert(s.at(`ab_c`) == size_t.max);
988 
989 	assert(bob_.findAt(`a`) == []);
990 	assert(bob_.findAt(`a`).ptr != null);
991 	assert(eob_.findAt(`a`) == []);
992 
993 	assert(bol_.findAt(`a`) == []);
994 	assert(bol_.findAt(`a`).ptr != null);
995 	assert(eol_.findAt(`a`) == []);
996 
997 	assert(bow_.findAt(`a`) == []);
998 	assert(bow_.findAt(`a`).ptr != null);
999 	assert(eow_.findAt(`a`) == []);
1000 
1001 	assert(bos_.findAt(`a`) == []);
1002 	assert(bos_.findAt(`a`).ptr != null);
1003 	assert(eos_.findAt(`a`) == []);
1004 	/+ TODO: This fails assert(eos_.findAt(`a`).ptr != null); +/
1005 }
1006 
1007 /** Keyword $(D arg). */
1008 Seq kwd(Arg)(Arg arg) => seq(bow, arg, eow);
1009 
1010 pure nothrow @safe unittest {
1011 	const str = `int`;
1012 	auto x = str.lit.kwd;
1013 
1014 	assert(x.at(str, 0));
1015 	/* TODO: assert(!x.at(str, 1)); */
1016 
1017 	assert(x.at(` ` ~ str, 1));
1018 	/* TODO: assert(!x.at(` int`, 0)); */
1019 }
1020 
1021 /** Pattern Paired with Prefix and Suffix.
1022  */
1023 final extern(C++) class Clause : SPatt1 { extern(D):
1024 @safe pure nothrow:
1025 	this(Node prefix_, Node suffix_, Node sub) {
1026 		super(sub);
1027 		this.prefix = prefix_;
1028 		this.suffix = suffix_;
1029 	}
1030 	override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @nogc {
1031 		import std.experimental.allocator.mallocator : Mallocator;
1032 		DynamicArray!(Node, Mallocator) enders_;
1033 		enders_.reserve(enders.length == 1);
1034 		() @trusted {
1035 			enders_ ~= cast(Node[])enders;
1036 			enders_ ~= cast()suffix;
1037 		}();
1038 		typeof(return) result = sub.findRawAt(input, soff, enders_[]);
1039 		return result;
1040 	}
1041 	Node prefix, suffix;
1042 }
1043 
1044 Clause paired(Args...)(Node prefix, Node suffix, Args args) => new Clause(prefix, suffix, args);
1045 Clause parend(Args...)(Args args) => new Clause(lit('('), lit(')'), args);
1046 Clause hooked(Args...)(Args args) => new Clause(lit('['), lit(']'), args);
1047 Clause braced(Args...)(Args args) => Clause(lit('{'), lit('}'), args);
1048 
1049 import nxt.assert_ex;
1050 
1051 pure nothrow @safe unittest {
1052 	/* auto p = `[alpha]`.lit.parend; */
1053 	/* assert(p.at(`([alpha])`) == 7); */
1054 
1055 	/* auto h = `[alpha]`.lit.hooked; */
1056 	/* assert(h.at(`[[alpha]]`) == 7); */
1057 
1058 	/* auto b = `[alpha]`.lit.braced; */
1059 	/* assert(b.at(`{[alpha]}`) == 7); */
1060 
1061 	/* auto pb = `[alpha]`.lit.parend; */
1062 }
1063 
1064 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern.
1065 	Example: #!/bin/env rdmd
1066 	See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix)
1067  */
1068 auto ref shebangLine(Node interpreter) @safe pure nothrow {
1069 	return seq(bob,
1070 			   `#!`.lit,
1071 			   `/usr`.lit.opt,
1072 			   `/bin/env`.lit.opt,
1073 			   ws.oom,
1074 			   interpreter);
1075 }
1076 
1077 ///
1078 pure nothrow @safe unittest {
1079 	assert(`rdmd`.lit.shebangLine
1080 				 .at(`#!/bin/env rdmd`) == 15);
1081 	assert(`rdmd`.lit.shebangLine
1082 				 .at(`#!/usr/bin/env rdmd`) == 19);
1083 	auto rgdmd = alt(`rdmd`.lit,
1084 					 `gdmd`.lit);
1085 	assert(rgdmd.shebangLine
1086 				.at(`#!/usr/bin/env rdmd-dev`) == 19);
1087 	assert(rgdmd.shebangLine
1088 				.at(`#!/usr/bin/env gdmd`) == 19);
1089 }