1 /**
2    Symbolic (Regular) Expressions and Predicate Logic.
3 
4    Syntax is similar to Emacs' sregex, rx.
5 
6    String patterns are matched by their raw bytes for now.
7 
8    Copyright: Per Nordlöw 2022-.
9    License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
10    Authors: $(WEB Per Nordlöw)
11 
12    TODO
13    Overload operators & (and), ! (not),
14 
15    TODO: Add pattern for expressing inference with `infer`
16    (infer
17     ((x instanceOf X) &
18      (X subClassOf Y))
19      (x instaceOf Y))
20 
21    TODO
22    Variables are either
23    - _ (ignore)
24    - _`x`, _`y`, etc
25    - _'x', _'y'
26    - _!0, _!1, ..., _!(n-1)
27 
28    infer(rel!`desire`(_!`x`, _!`y`) &&
29          rel!`madeOf`(_!`z`, _!`y`),
30          rel!`desire`(_!`x`, _!`z`))
31 
32    TODO: Support variables of specific types and inference using predicate logic:
33         infer(and(fact(var!'x', rel'desire', var!'y'),
34                   fact(var!'z', opt(rel'madeOf',
35                               rel'instanceOf'), var!'y'))),
36               pred(var!'x', rel'desire', var!'z'
37               ))
38 
39    TODO: Make returns from factory functions immutable.
40    TODO: Reuse return patterns from Lit
41 
42    TODO
43    const s = seq(`al.`.lit,`pha`.lit);
44    const t = `al`.lit ~ `pha`.lit;
45    assert(s !is t);
46    assert(equal(s, t));
47 
48  */
49 module nxt.symbolic;
50 
51 import std.algorithm: find, all, map, reduce, min, max, joiner, equal, each, filter;
52 import std.range: empty;
53 import std.array: array;
54 import std.string: representation;
55 import std.traits: isSomeString;
56 import nxt.find_ex: findAcronymAt, FindContext;
57 import nxt.dbgio;
58 import nxt.static_bitarray;
59 
60 /** Base Pattern.
61  */
62 class Patt
63 {
64     @safe pure nothrow:
65 
66     /** Match $(D this) with $(D haystack) at Offset $(D soff).
67      Returns: Matched slice or [] if not match.
68     */
69     final auto ref matchU(in ubyte[] haystack, size_t soff = 0) const
70     {
71         const hit = atU(haystack, soff);
72         return hit != hit.max ? haystack[0..hit] : [];
73     }
74     final auto ref match(const scope string haystack, size_t soff = 0) const
75     {
76         return matchU(haystack.representation, soff);
77     }
78 
79     Seq opBinary(string op)(Patt rhs) if (op == `~`) // template can't be overridden
80     {
81         return opCatImpl(rhs);
82     }
83     Alt opBinary(string op)(Patt rhs) if (op == `|`) // template can't be overridden
84     {
85         return opAltImpl(rhs);
86     }
87 
88     Seq opCatImpl(Patt rhs)     // can be overridden
89     {
90         return seq(this, rhs); // TODO: check if this and rhs is Seq
91     }
92     Alt opAltImpl(Patt rhs)     // can be overridden
93     {
94         return alt(this, rhs);  // TODO: check if this and rhs is Alt
95     }
96 
97     final size_t at(const scope string haystack, size_t soff = 0) const
98     // TODO: Activate this
99     /* out (hit) { */
100     /*     assert((!hit) || hit >= minLength); // TODO: Is this needed? */
101     /* } */
102     /* do */
103     {
104         return atU(haystack.representation, soff);
105     }
106 
107     size_t atU(in ubyte[] haystack, size_t soff = 0) const
108     {
109         assert(false);
110     }
111 
112     /** Find $(D this) in String $(D haystack) at Offset $(D soff). */
113     final const(ubyte[]) findAt(const scope string haystack, size_t soff = 0) const
114     {
115         return findRawAt(haystack.representation, soff, []); // TODO: this is ugly
116     }
117 
118     /** Find $(D this) in Raw Bytes $(D haystack) at Offset $(D soff). */
119     const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0,
120                              in Patt[] enders = []) const
121     {
122         auto i = soff;
123         while (i < haystack.length) // while bytes left at i
124         {
125             if (haystack.length - i < minLength)  // and bytes left to find pattern
126             {
127                 return [];
128             }
129             const hit = atU(haystack, i);
130             if (hit != size_t.max) // hit at i
131             {
132                 return haystack[i..i + hit];
133             }
134             i++;
135         }
136         return [];
137     }
138 
139     @property:
140     size_t minLength() const { assert(false); } /// Returns: minimum possible instance length.
141     size_t maxLength() const { assert(false); } /// Returns: maximum possible instance length.
142 
143     bool isFixed() const { assert(false); } /// Returns: true if all possible instances have same length.
144     bool isConstant() const { return false; } /// Returns: true if all possible instances have same length.
145 
146     final bool isVariable() const { return !isConstant; }
147     const(ubyte[]) getConstant() const { return []; } /// Returns: data if literal otherwise empty array.
148 
149     /** Get All Literals that must match a given source $(D X) in order for $(D
150         this) to match $(D X) somewhere.
151     */
152     Lit[] mandatories() { assert(false); }
153 
154     /** Get Optional Literals that may match a given source $(D X) if $(D this)
155         matches $(D X) somewhere.
156     */
157     Lit[] optionals() { return mandatories; }
158 
159     protected Patt _parent; /// Parenting (Super) Pattern.
160     bool greedy = false; // Scan as far as possible if true
161 }
162 
163 /** Literal Pattern with Cached Binary Byte Histogram.
164  */
165 class Lit : Patt
166 {
167     @safe pure nothrow:
168 
169     this(string bytes_) { assert(!bytes_.empty); this(bytes_.representation); }
170     this(ubyte ch) { this._bytes ~= ch; }
171     this(ubyte[] bytes_) { this._bytes = bytes_; }
172     this(immutable ubyte[] bytes_) { this._bytes = bytes_.dup; }
173 
174     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
175     {
176         const l = _bytes.length;
177         return (soff + l <= haystack.length && // fits in haystack and
178                 _bytes[] == haystack[soff..soff + l]) ? l : size_t.max; // same contents
179     }
180 
181     override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0,
182                                       in Patt[] enders = []) const
183     {
184         return haystack[soff..$].find(_bytes); // reuse std.algorithm: find!
185     }
186 
187     @property:
188 
189     override size_t maxLength() const { return _bytes.length; }
190     override bool isFixed() const { return true; }
191     override bool isConstant() const { return true; }
192     override const(ubyte[]) getConstant() const { return _bytes; }
193 
194     @property auto ref bytes() const { return _bytes; }
195     private ubyte[] _bytes;
196     alias _bytes this;
197 
198     alias SinglesHist = StaticBitArray!(2^^ubyte.sizeof);
199 
200     /// Get (Cached) (Binary) Histogram over single elements contained in this $(D Lit).
201     SinglesHist singlesHist()
202     {
203         if (!_bytes.empty &&
204             _singlesHist.allZero)
205         {
206             foreach (b; _bytes)
207             {
208                 _singlesHist[b] = true;
209             }
210         }
211         return _singlesHist;
212     }
213     private SinglesHist _singlesHist;
214 
215     override Lit[] mandatories() { return [this]; }
216 }
217 
218 auto lit(Args...)(Args args) @safe pure nothrow { return new Lit(args); } // instantiator
219 
220 @safe pure nothrow unittest
221 {
222     immutable ab = `ab`;
223     assert(lit('b').at(`ab`, 1) == 1);
224     const a = lit('a');
225 
226     const ac = lit(`ac`);
227     assert(ac.match(`ac`));
228     assert(ac.match(`ac`).length == 2);
229     assert(!ac.match(`ca`));
230     assert(ac.match(`ca`) == []);
231 
232     assert(a.isFixed);
233     assert(a.isConstant);
234     assert(a.at(ab) == 1);
235     assert(lit(ab).at(`a`) == size_t.max);
236     assert(lit(ab).at(`b`) == size_t.max);
237 
238     assert(a.findAt(`cba`) == cast(immutable ubyte[])`a`);
239     assert(a.findAt(`ba`) == cast(immutable ubyte[])`a`);
240     assert(a.findAt(`a`) == cast(immutable ubyte[])`a`);
241     assert(a.findAt(``) == []);
242     assert(a.findAt(`b`) == []);
243     assert(ac.findAt(`__ac`) == cast(immutable ubyte[])`ac`);
244     assert(a.findAt(`b`).length == 0);
245 
246     auto xyz = lit(`xyz`);
247 }
248 
249 @safe pure nothrow unittest
250 {
251     auto ac = lit(`ac`);
252     assert(ac.mandatories == [ac]);
253     assert(ac.optionals == [ac]);
254 }
255 
256 /** Word/Symbol Acronym Pattern.
257  */
258 class Acronym : Patt
259 {
260     @safe pure nothrow:
261 
262     this(string bytes_, FindContext ctx = FindContext.inSymbol)
263     {
264         assert(!bytes_.empty);
265         this(bytes_.representation, ctx);
266     }
267 
268     this(ubyte ch) { this._acros ~= ch; }
269     this(ubyte[] bytes_, FindContext ctx = FindContext.inSymbol)
270     {
271         this._acros = bytes_;
272         this._ctx = ctx;
273     }
274     this(immutable ubyte[] bytes_, FindContext ctx = FindContext.inSymbol)
275     {
276         this._acros = bytes_.dup;
277         this._ctx = ctx;
278     }
279 
280     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
281     {
282         auto offs = new size_t[_acros.length]; // hit offsets
283         size_t a = 0;         // acronym index
284         foreach(s, ub; haystack[soff..$])  // for each element in source
285         {
286             import std.ascii: isAlpha;
287 
288             // Check context
289             final switch (_ctx)
290             {
291             case FindContext.inWord:
292             case FindContext.asWord:
293                 if (!ub.isAlpha) { return size_t.max; } break;
294             case FindContext.inSymbol:
295             case FindContext.asSymbol:
296                 if (!ub.isAlpha && ub != '_') { return size_t.max; } break;
297             }
298 
299             if (_acros[a] == ub)
300             {
301                 offs[a] = s + soff; // store hit offset
302                 a++; // advance acronym
303                 if (a == _acros.length) // if complete acronym found
304                 {
305                     return s + 1;             // return its length
306                 }
307             }
308         }
309         return size_t.max; // no hit
310     }
311 
312     template Tuple(E...) { alias Tuple = E; }
313 
314     override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0,
315                                       in Patt[] enders = []) const
316     {
317         import std.string: CaseSensitive;
318         return haystack.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0];
319     }
320 
321     @property:
322     override size_t minLength() const { return _acros.length; }
323     override size_t maxLength() const { return size_t.max; }
324     override bool isFixed() const { return false; }
325     override bool isConstant() const { return false; }
326 
327 private:
328     ubyte[] _acros;
329     FindContext _ctx;
330 }
331 
332 @safe pure nothrow
333 {
334     auto inwac(Args...)(Args args) { return new Acronym(args, FindContext.inWord); } // word acronym
335     auto insac(Args...)(Args args) { return new Acronym(args, FindContext.inSymbol); } // symbol acronym
336     auto aswac(Args...)(Args args) { return new Acronym(args, FindContext.asWord); } // word acronym
337     auto assac(Args...)(Args args) { return new Acronym(args, FindContext.asSymbol); } // symbol acronym
338 }
339 
340 @safe pure nothrow unittest
341 {
342     assert(inwac(`a`).at(`a`) == 1);
343     assert(inwac(`ab`).at(`ab`) == 2);
344     assert(inwac(`ab`).at(`a`) == size_t.max);
345     assert(inwac(`abc`).at(`abc`) == 3);
346     assert(inwac(`abc`).at(`aabbcc`) == 5);
347     assert(inwac(`abc`).at(`aaaabbcc`) == 7);
348     assert(inwac(`fpn`).at(`fopen`) == 5);
349 }
350 
351 /** Any Byte.
352  */
353 class Any : Patt
354 {
355     @safe pure nothrow:
356     this() {}
357 
358     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
359     {
360         return soff < haystack.length ? 1 : size_t.max;
361     }
362 
363     @property:
364     override size_t maxLength() const { return 1; }
365     override bool isFixed() const { return true; }
366     override bool isConstant() const { return false; }
367 
368     override Lit[] mandatories() { return []; }
369     override Lit[] optionals()
370     {
371         import std.range: iota;
372         return iota(0, 256).map!(n => (cast(ubyte)n).lit).array;
373     }
374 }
375 
376 auto any(Args...)(Args args) { return new Any(args); } // instantiator
377 
378 /** Abstract Super Pattern.
379  */
380 abstract class SPatt : Patt
381 {
382     @safe pure nothrow:
383 
384     this(Patt[] subs_) { this._subs = subs_; }
385 
386     this(Args...)(Args subs_)
387     {
388         foreach (sub; subs_)
389         {
390             alias Sub = typeof(sub);
391 
392             // TODO: functionize to patternFromBuiltinType() or to!Patt
393             static if (is(Sub == string) ||
394                        is(Sub == char))
395             {
396                 _subs ~= new Lit(sub);
397             }
398             else
399             {
400                 _subs ~= sub;
401             }
402             sub._parent = this;
403         }
404     }
405 
406     protected Patt[] _subs;
407 }
408 
409 /** Sequence of Patterns.
410  */
411 class Seq : SPatt
412 {
413     @safe pure nothrow:
414 
415     this(Patt[] subs_) { super(subs_); }
416     this(Args...)(Args subs_) { super(subs_); }
417 
418     @property auto ref inout (Patt[]) elms() inout { return super._subs; }
419 
420     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
421     {
422         assert(!elms.empty); // TODO: Move to in contract?
423         const c = getConstant;
424         if (!c.empty)
425         {
426             return (soff + c.length <= haystack.length &&   // if equal size and
427                     c[] == haystack[soff..soff + c.length]); // equal contents
428         }
429         size_t sum = 0;
430         size_t off = soff;
431         foreach (ix, sub; elms) // TODO: Reuse std.algorithm instead?
432         {
433             size_t hit = sub.atU(haystack, off);
434             if (hit == size_t.max) { sum = hit; break; } // if any miss skip
435             sum += hit;
436             off += hit;
437         }
438         return sum;
439     }
440 
441     @property:
442     override size_t maxLength() const { assert(false); }
443     override bool isFixed() const { return _subs.all!(a => a.isFixed); }
444     override bool isConstant() const { return _subs.all!(a => a.isConstant); }
445     override const(ubyte[]) getConstant() const { return []; }
446 
447     override Lit[] mandatories()
448     {
449         auto x = _subs.map!(patt => patt.mandatories);
450         return x.joiner.array;
451     }
452 
453     override Lit[] optionals()
454     {
455         auto x = _subs.map!(patt => patt.optionals);
456         return x.joiner.array;
457     }
458 }
459 
460 auto seq(Args...)(Args args) @safe pure nothrow { return new Seq(args); } // instantiator
461 
462 @safe pure nothrow unittest
463 {
464     const s = seq(`al.`.lit,
465                   `pha`.lit);
466     const t = `al`.lit ~ `pha`.lit;
467     assert(s !is t);
468 
469     immutable haystack = `alpha`;
470     assert(s.isFixed);
471     assert(s.isConstant);
472     assert(s.at(haystack));
473     const al = seq(`a`.lit,
474                    `l`.lit);
475     assert(al.at(`a`) == size_t.max);
476 }
477 
478 @safe pure nothrow unittest
479 {
480     auto a = `aa.`.lit;
481     auto b = `bb.`.lit;
482     auto c = `cc.`.lit;
483     auto d = `dd.`.lit;
484     auto s = seq(a.opt, b,
485                  c.opt, d);
486     assert(equal(s.mandatories, [b, d]));
487     assert(equal(s.optionals, [a, b, c, d]));
488 }
489 
490 /** Alternative of Patterns in $(D ALTS).
491  */
492 class Alt : SPatt
493 {
494     @safe pure nothrow:
495 
496     this(Patt[] subs_) { super(subs_); }
497     this(Args...)(Args subs_) { super(subs_); }
498 
499     size_t atIx(const scope string haystack, size_t soff, out size_t alt_hix) const
500     {
501         return atU(haystack.representation, soff, alt_hix);
502     }
503 
504     void opOpAssign(string s : "~")(Patt sub)
505     {
506         super._subs ~= sub;
507     }
508 
509     @property inout(Patt[]) alts() inout { return super._subs; }
510 
511     /** Get Length of hit at index soff in haystack or size_t.max if none.
512      */
513     size_t atU(in ubyte[] haystack, size_t soff, out size_t alt_hix) const
514     {
515         assert(!alts.empty);    // TODO: Move to in contract?
516         size_t hit = 0;
517         size_t off = soff;
518         foreach (ix, sub; alts)  // TODO: Reuse std.algorithm instead?
519         {
520             hit = sub.atU(haystack[off..$]);                     // match alternative
521             if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done
522         }
523         return hit;
524     }
525 
526     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
527     {
528         size_t alt_hix;
529         return atU(haystack, soff, alt_hix);
530     }
531 
532     /** Find $(D this) in $(D haystack) at Offset $(D soff).
533     */
534     override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0,
535                                       in Patt[] enders = []) const
536     {
537         assert(!alts.empty);    // TODO: Move to in contract?
538 
539         import nxt.dbgio;
540 
541         switch (alts.length)
542         {
543             case 1:
544                 const a0 = alts[0].getConstant;
545                 if (!a0.empty)
546                 {
547                     auto hit = haystack[soff..$].find(a0); // Use: second argument to return alt_hix
548                     return hit;
549                 }
550                 else
551                 {
552                     return alts[0].findRawAt(haystack, soff, enders); // recurse to it
553                 }
554             case 2:
555                 const a0 = alts[0].getConstant;
556                 const a1 = alts[1].getConstant;
557                 if (!a0.empty &&
558                     !a1.empty)
559                 {
560                     auto hit = haystack[soff..$].find(a0, a1); // Use: second argument to return alt_hix
561                     return hit[0];
562                 }
563                 break;
564             case 3:
565                 const a0 = alts[0].getConstant;
566                 const a1 = alts[1].getConstant;
567                 const a2 = alts[2].getConstant;
568                 if (!a0.empty &&
569                     !a1.empty &&
570                     !a2.empty)
571                 {
572                     auto hit = haystack[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix
573                     return hit[0];
574                 }
575                 break;
576             case 4:
577                 const a0 = alts[0].getConstant;
578                 const a1 = alts[1].getConstant;
579                 const a2 = alts[2].getConstant;
580                 const a3 = alts[3].getConstant;
581                 if (!a0.empty &&
582                     !a1.empty &&
583                     !a2.empty &&
584                     !a3.empty)
585                 {
586                     auto hit = haystack[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix
587                     return hit[0];
588                 }
589                 break;
590             case 5:
591                 const a0 = alts[0].getConstant;
592                 const a1 = alts[1].getConstant;
593                 const a2 = alts[2].getConstant;
594                 const a3 = alts[3].getConstant;
595                 const a4 = alts[4].getConstant;
596                 if (!a0.empty &&
597                     !a1.empty &&
598                     !a2.empty &&
599                     !a3.empty &&
600                     !a4.empty)
601                 {
602                     auto hit = haystack[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix
603                     return hit[0];
604                 }
605                 break;
606             default:
607                 break;
608         }
609 
610         return super.findRawAt(haystack, soff, enders); // revert to base case
611     }
612 
613     @property:
614     override size_t minLength() const { return reduce!min(size_t.max,
615                                                           _subs.map!(a => a.minLength)); }
616     override size_t maxLength() const { return reduce!max(size_t.min,
617                                                           _subs.map!(a => a.maxLength)); }
618     override bool isFixed() const
619     {
620         // TODO: Merge these loops using tuple algorithm.
621         auto mins = _subs.map!(a => a.minLength);
622         auto maxs = _subs.map!(a => a.maxLength);
623         import nxt.predicates: allEqual;
624         return (mins.allEqual &&
625                 maxs.allEqual);
626     }
627     override bool isConstant() const
628     {
629         if (_subs.length == 0)
630         {
631             return true;
632         }
633         else if (_subs.length == 1)
634         {
635             import std.range: front;
636             return _subs.front.isConstant;
637         }
638         else
639         {
640             return false;       // TODO: Maybe handle case when _subs are different.
641         }
642     }
643 }
644 
645 auto alt(Args...)(Args args) @safe pure nothrow { return new Alt(args); } // instantiator
646 
647 @safe pure nothrow unittest
648 {
649     immutable a_b = alt(`a`.lit,
650                         `b`.lit);
651 
652     immutable a__b = (`a`.lit |
653                       `b`.lit);
654 
655     assert(a_b.isFixed);
656     assert(!a_b.isConstant);
657     assert(a_b.at(`a`));
658     assert(a_b.at(`b`));
659     assert(a_b.at(`c`) == size_t.max);
660 
661     size_t hix = size_t.max;
662     a_b.atIx(`a`, 0, hix); assert(hix == 0);
663     a_b.atIx(`b`, 0, hix); assert(hix == 1);
664 
665     /* assert(alt.at(`a`) == size_t.max); */
666     /* assert(alt.at(``) == size_t.max); */
667 
668     immutable a = alt(lit(`a`));
669     immutable aa = alt(lit(`aa`));
670     assert(aa.isConstant);
671 
672     immutable aa_bb = alt(lit(`aa`),
673                           lit(`bb`));
674     assert(aa_bb.isFixed);
675     assert(aa_bb.minLength == 2);
676     assert(aa_bb.maxLength == 2);
677 
678     immutable a_bb = alt(lit(`a`),
679                          lit(`bb`));
680     assert(!a_bb.isFixed);
681     assert(a_bb.minLength == 1);
682     assert(a_bb.maxLength == 2);
683 
684     const string _aa = `_aa`;
685     assert(aa_bb.findAt(_aa) == cast(immutable ubyte[])`aa`);
686     assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable ubyte[])_aa)[0] == 1);
687 
688     const string _bb = `_bb`;
689     assert(aa_bb.findAt(_bb) == cast(immutable ubyte[])`bb`);
690     assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable ubyte[])_bb)[0] == 1);
691 
692     assert(a.findAt(`b`) == []);
693     assert(aa.findAt(`cc`) == []);
694     assert(aa_bb.findAt(`cc`) == []);
695     assert(aa_bb.findAt(``) == []);
696 }
697 
698 @safe pure nothrow unittest
699 {
700     auto a_b = alt(`a`.lit);
701     a_b ~= `b`.lit;
702 
703     assert(a_b.isFixed);
704     assert(!a_b.isConstant);
705     assert(a_b.at(`a`));
706     assert(a_b.at(`b`));
707     assert(a_b.at(`c`) == size_t.max);
708 
709     size_t hix = size_t.max;
710     a_b.atIx(`a`, 0, hix); assert(hix == 0);
711     a_b.atIx(`b`, 0, hix); assert(hix == 1);
712 }
713 
714 class Space : Patt
715 {
716     @safe pure nothrow:
717 
718     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
719     {
720         import std.ascii: isWhite;
721         return soff < haystack.length && isWhite(haystack[soff]) ? 1 : size_t.max;
722     }
723 
724     @property:
725     override size_t maxLength() const { return 1; }
726     override bool isFixed() const { return true; }
727     override bool isConstant() const { return false; }
728 }
729 
730 auto ws() @safe pure nothrow { return new Space(); } // instantiator
731 
732 @safe pure nothrow unittest
733 {
734     assert(ws.at(` `) == 1);
735     assert(ws.at("\t") == 1);
736     assert(ws.at("\n") == 1);
737 }
738 
739 /** Abstract Singleton Super Pattern.
740  */
741 abstract class SPatt1 : Patt
742 {
743     @safe pure nothrow:
744 
745     this(Patt sub)
746     {
747         this.sub = sub;
748         sub._parent = this;
749     }
750     protected Patt sub;
751 }
752 
753 /** Optional Sub Pattern $(D count) times.
754  */
755 class Opt : SPatt1
756 {
757     @safe pure nothrow:
758 
759     this(Patt sub) { super(sub); }
760 
761     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
762     {
763         assert(soff <= haystack.length); // include equality because haystack might be empty and size zero
764         const hit = sub.atU(haystack[soff..$]);
765         return hit == size_t.max ? 0 : hit;
766     }
767 
768     @property:
769     override size_t minLength() const { return 0; }
770     override size_t maxLength() const { return sub.maxLength; }
771     override bool isFixed() const { return false; }
772     override bool isConstant() const { return false; }
773 
774     override Lit[] mandatories() { return []; }
775     override Lit[] optionals() { return sub.optionals; }
776 }
777 
778 auto opt(Args...)(Args args) { return new Opt(args); } // optional
779 
780 @safe pure nothrow unittest
781 {
782     assert(`a`.lit.opt.at(`b`) == 0);
783     assert(`a`.lit.opt.at(`a`) == 1);
784 }
785 
786 /** Repetition Sub Pattern $(D count) times.
787  */
788 class Rep : SPatt1
789 {
790     @safe pure nothrow:
791 
792     this(Patt sub, size_t count)
793     in
794     {
795         assert(count >= 2);
796     }
797     do
798     {
799         super(sub);
800         this.countReq = count;
801         this.countOpt = 0; // fixed length repetion
802     }
803 
804     this(Patt sub, size_t countMin, size_t countMax)
805     in
806     {
807         assert(countMax >= 2);
808         assert(countMin <= countMax);
809     }
810     do
811     {
812         super(sub);
813         this.countReq = countMin;
814         this.countOpt = countMax - countMin;
815     }
816 
817     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
818     {
819         size_t sum = 0;
820         size_t off = soff;
821         /* mandatory */
822         foreach (ix; 0..countReq)  // TODO: Reuse std.algorithm instead?
823         {
824             size_t hit = sub.atU(haystack[off..$]);
825             if (hit == size_t.max) { return hit; } // if any miss skip
826             off += hit;
827             sum += hit;
828         }
829         /* optional part */
830         foreach (ix; countReq..countReq + countOpt) // TODO: Reuse std.algorithm instead?
831         {
832             size_t hit = sub.atU(haystack[off..$]);
833             if (hit == size_t.max) { break; } // if any miss just break
834             off += hit;
835             sum += hit;
836         }
837         return sum;
838     }
839 
840     @property:
841     override size_t minLength() const { return countReq*sub.maxLength; }
842     override size_t maxLength() const { return (countReq + countOpt)*sub.maxLength; }
843     override bool isFixed() const { return minLength == maxLength && sub.isFixed; }
844     override bool isConstant() const { return minLength == maxLength && sub.isConstant; }
845 
846     override Lit[] mandatories() { return sub.mandatories; }
847     override Lit[] optionals() { return sub.optionals; }
848 
849     // invariant { assert(countReq); }
850     size_t countReq; // Required.
851     size_t countOpt; // Optional.
852 }
853 
854 auto rep(Args...)(Args args) { return new Rep(args); } // repetition
855 auto zom(Args...)(Args args) { return new Rep(args, 0, size_t.max); } // zero or more
856 auto oom(Args...)(Args args) { return new Rep(args, 1, size_t.max); } // one or more
857 
858 @safe pure nothrow unittest
859 {
860     auto l = 'a'.lit;
861 
862     const l5 = l.rep(5);
863     assert(l5.isConstant);
864     assert(l5.at(`aaaa`) == size_t.max);
865     assert(l5.at(`aaaaa`));
866     assert(l5.at(`aaaaaaa`));
867     assert(l5.isFixed);
868     assert(l5.maxLength == 5);
869 
870     const l23 = l.rep(2, 3);
871     assert(l23.at(`a`) == size_t.max);
872     assert(l23.at(`aa`) == 2);
873     assert(l23.at(`aaa`) == 3);
874     assert(l23.at(`aaaa`) == 3);
875     assert(!l23.isConstant);
876 }
877 
878 @safe pure nothrow unittest
879 {
880     auto l = 'a'.lit;
881     auto l5 = l.rep(5);
882     assert(l5.mandatories == [l]);
883     assert(l5.optionals == [l]);
884 }
885 
886 @safe pure nothrow unittest
887 {
888     auto l = 'a'.lit;
889     auto l5 = l.opt.rep(5);
890     assert(l5.mandatories == []);
891     assert(l5.optionals == [l]);
892 }
893 
894 class Ctx : Patt
895 {
896     enum Type
897     {
898         bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\``
899         eob, /// End       Of \em Block/Name/File/String. @b Emacs: `\'`
900 
901         bol,            /// Beginning Of \em Line. @b Emacs: `^`
902         eol,            /// End       Of \em Line. @b Emacs: `$`
903 
904         bos,            /// Beginning Of \em Symbol. @b Emacs: `\_<`
905         eos,            /// End       Of \em Symbol. @b Emacs: `\_>`
906 
907         bow,            /// Beginning Of \em Word. @b Emacs: `\<`
908         eow,            /// End       Of \em Word. @b Emacs: `\>`
909     }
910 
911     @safe pure nothrow:
912 
913     this(Type type) { this.type = type; }
914 
915     override size_t atU(in ubyte[] haystack, size_t soff = 0) const
916     {
917         assert(soff <= haystack.length); // include equality because haystack might be empty and size zero
918         bool ok = false;
919         import std.ascii : isAlphaNum/* , newline */;
920         final switch (type)
921         {
922             /* buffer */
923         case Type.bob: ok = (soff == 0); break;
924         case Type.eob: ok = (soff == haystack.length); break;
925 
926             /* line */
927         case Type.bol: ok = (soff == 0          || (haystack[soff - 1] == 0x0d ||
928                                                    haystack[soff - 1] == 0x0a)); break;
929         case Type.eol: ok = (soff == haystack.length || (haystack[soff    ] == 0x0d ||
930                                                    haystack[soff    ] == 0x0a)); break;
931 
932             /* symbol */
933         case Type.bos: ok = ((soff == 0         || (!haystack[soff - 1].isAlphaNum &&
934                                                     haystack[soff - 1] != '_')) && // TODO: Make '_' language-dependent
935                              (soff < haystack.length &&  haystack[soff].isAlphaNum)) ; break;
936         case Type.eos: ok = ((soff == haystack.length || (!haystack[soff].isAlphaNum &&
937                                                           haystack[soff] != '_')) && // TODO: Make '_' language-dependent
938                              (soff >= 1          &&  haystack[soff - 1].isAlphaNum)) ; break;
939 
940             /* word */
941         case Type.bow: ok = ((soff == 0         || !haystack[soff - 1].isAlphaNum) &&
942                              (soff < haystack.length &&  haystack[soff].isAlphaNum)) ; break;
943         case Type.eow: ok = ((soff == haystack.length || !haystack[soff].isAlphaNum) &&
944                              (soff >= 1          &&  haystack[soff - 1].isAlphaNum)) ; break;
945         }
946         return ok ? 0 : size_t.max;
947     }
948 
949     @property:
950     override size_t maxLength() const { return 0; }
951     override bool isFixed() const { return true; }
952     override bool isConstant() const { return true; }
953 
954     protected Type type;
955 }
956 
957 Ctx bob(Args...)(Args args) { return new Ctx(args, Ctx.Type.bob); }
958 Ctx eob(Args...)(Args args) { return new Ctx(args, Ctx.Type.eob); }
959 Ctx bol(Args...)(Args args) { return new Ctx(args, Ctx.Type.bol); }
960 Ctx eol(Args...)(Args args) { return new Ctx(args, Ctx.Type.eol); }
961 Ctx bos(Args...)(Args args) { return new Ctx(args, Ctx.Type.bos); }
962 Ctx eos(Args...)(Args args) { return new Ctx(args, Ctx.Type.eos); }
963 Ctx bow(Args...)(Args args) { return new Ctx(args, Ctx.Type.bow); }
964 Ctx eow(Args...)(Args args) { return new Ctx(args, Ctx.Type.eow); }
965 Seq buf(Args...)(Args args) { return seq(bob, args, eob); }
966 Seq line(Args...)(Args args) { return seq(bol, args, eol); }
967 Seq sym(Args...)(Args args) { return seq(bos, args, eos); }
968 Seq word(Args...)(Args args) { return seq(bow, args, eow); }
969 
970 @safe pure nothrow unittest
971 {
972     const bob_ = bob;
973     const eob_ = eob;
974     assert(bob_.at(`ab`) == 0);
975     assert(eob_.at(`ab`, 2) == 0);
976 
977     const bol_ = bol;
978     assert(bol_.at(`ab`) == 0);
979     assert(bol_.at("a\nb", 2) == 0);
980     assert(bol_.at("a\nb", 1) == size_t.max);
981 
982     const eol_ = eol;
983     assert(eol_.at(`ab`, 2) == 0);
984     assert(eol_.at("a\nb", 1) == 0);
985     assert(eol_.at("a\nb", 2) == size_t.max);
986 
987     const bos_ = bos;
988     const eos_ = eos;
989     assert(bos_.at(`ab`) == 0);
990     assert(bos_.at(` ab`) == size_t.max);
991     assert(eos_.at(`ab`, 2) == 0);
992     assert(eos_.at(`a_b `, 1) == size_t.max);
993     assert(eos_.at(`ab `, 2) == 0);
994 
995     const bow_ = bow;
996     const eow_ = eow;
997 
998     assert(bow_.at(`ab`) == 0);
999     assert(bow_.at(` ab`) == size_t.max);
1000 
1001     assert(eow_.at(`ab`, 2) == 0);
1002     assert(eow_.at(`ab `, 0) == size_t.max);
1003 
1004     assert(bow_.at(` ab `, 1) == 0);
1005     assert(bow_.at(` ab `, 0) == size_t.max);
1006 
1007     assert(eow_.at(` ab `, 3) == 0);
1008     assert(eow_.at(` ab `, 4) == size_t.max);
1009 
1010     auto l = lit(`ab`);
1011     auto w = word(l);
1012     assert(w.at(`ab`) == 2);
1013     assert(w.at(`ab_c`) == 2);
1014 
1015     auto s = sym(l);
1016     assert(s.at(`ab`) == 2);
1017     assert(s.at(`ab_c`) == size_t.max);
1018 
1019     assert(bob_.findAt(`a`) == []);
1020     assert(bob_.findAt(`a`).ptr != null);
1021     assert(eob_.findAt(`a`) == []);
1022 
1023     assert(bol_.findAt(`a`) == []);
1024     assert(bol_.findAt(`a`).ptr != null);
1025     assert(eol_.findAt(`a`) == []);
1026 
1027     assert(bow_.findAt(`a`) == []);
1028     assert(bow_.findAt(`a`).ptr != null);
1029     assert(eow_.findAt(`a`) == []);
1030 
1031     assert(bos_.findAt(`a`) == []);
1032     assert(bos_.findAt(`a`).ptr != null);
1033     assert(eos_.findAt(`a`) == []);
1034     // TODO: This fails assert(eos_.findAt(`a`).ptr != null);
1035 }
1036 
1037 /** Keyword $(D arg). */
1038 Seq kwd(Arg)(Arg arg) { return seq(bow, arg, eow); }
1039 
1040 @safe pure nothrow unittest
1041 {
1042     const str = `int`;
1043     auto x = str.lit.kwd;
1044 
1045     assert(x.at(str, 0));
1046     /* TODO: assert(!x.at(str, 1)); */
1047 
1048     assert(x.at(` ` ~ str, 1));
1049     /* TODO: assert(!x.at(` int`, 0)); */
1050 }
1051 
1052 /** Pattern Paired with Prefix and Suffix.
1053  */
1054 class Clause : SPatt1
1055 {
1056     @safe pure nothrow:
1057 
1058     this(Patt prefix_,
1059          Patt suffix_,
1060          Patt sub)
1061     {
1062         super(sub);
1063         this.prefix = prefix_;
1064         this.suffix = suffix_;
1065     }
1066 
1067     override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0,
1068                                       in Patt[] enders = []) const
1069     {
1070         return sub.findRawAt(haystack, soff, enders ~ [suffix]);
1071     }
1072 
1073     Patt prefix, suffix;
1074 }
1075 
1076 Clause paired(Args...)(Patt prefix, Patt suffix, Args args) { return new Clause(prefix, suffix, args); }
1077 Clause parend(Args...)(Args args) { return new Clause(lit('('), lit(')'), args); }
1078 Clause hooked(Args...)(Args args) { return new Clause(lit('['), lit(']'), args); }
1079 Clause braced(Args...)(Args args) { return new Clause(lit('{'), lit('}'), args); }
1080 
1081 import nxt.assert_ex;
1082 
1083 @safe pure nothrow unittest
1084 {
1085     /* auto p = `[alpha]`.lit.parend; */
1086     /* assert(p.at(`([alpha])`) == 7); */
1087 
1088     /* auto h = `[alpha]`.lit.hooked; */
1089     /* assert(h.at(`[[alpha]]`) == 7); */
1090 
1091     /* auto b = `[alpha]`.lit.braced; */
1092     /* assert(b.at(`{[alpha]}`) == 7); */
1093 
1094     /* auto pb = `[alpha]`.lit.parend; */
1095 }
1096 
1097 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern.
1098     Example: #!/bin/env rdmd
1099     See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix)
1100  */
1101 auto ref shebangLine(Patt interpreter) @safe pure nothrow
1102 {
1103     return seq(`#!`.lit,
1104                `/usr`.lit.opt,
1105                `/bin/env`.lit.opt,
1106                ws.oom,
1107                interpreter);
1108 }
1109 
1110 @safe pure nothrow unittest
1111 {
1112     assert(`rdmd`.lit.shebangLine
1113                  .at(`#!/bin/env rdmd`) == 15);
1114     assert(`rdmd`.lit.shebangLine
1115                  .at(`#!/usr/bin/env rdmd`) == 19);
1116     auto rgdmd = alt(`rdmd`.lit,
1117                      `gdmd`.lit);
1118     assert(rgdmd.shebangLine
1119                 .at(`#!/usr/bin/env rdmd-dev`) == 19);
1120     assert(rgdmd.shebangLine
1121                 .at(`#!/usr/bin/env gdmd`) == 19);
1122 }