1 /** 2 Symbolic (Regular) Expressions and Predicate Logic. 3 4 Syntax is similar to Emacs' sregex, rx. 5 6 String patterns are matched by their raw bytes for now. 7 8 Copyright: Per Nordlöw 2022-. 9 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 10 Authors: $(WEB Per Nordlöw) 11 12 TODO 13 Overload operators & (and), ! (not), 14 15 TODO: Add pattern for expressing inference with `infer` 16 (infer 17 ((x instanceOf X) & 18 (X subClassOf Y)) 19 (x instaceOf Y)) 20 21 TODO 22 Variables are either 23 - _ (ignore) 24 - _`x`, _`y`, etc 25 - _'x', _'y' 26 - _!0, _!1, ..., _!(n-1) 27 28 infer(rel!`desire`(_!`x`, _!`y`) && 29 rel!`madeOf`(_!`z`, _!`y`), 30 rel!`desire`(_!`x`, _!`z`)) 31 32 TODO: Support variables of specific types and inference using predicate logic: 33 infer(and(fact(var!'x', rel'desire', var!'y'), 34 fact(var!'z', opt(rel'madeOf', 35 rel'instanceOf'), var!'y'))), 36 pred(var!'x', rel'desire', var!'z' 37 )) 38 39 TODO: Make returns from factory functions immutable. 40 TODO: Reuse return patterns from Lit 41 42 TODO 43 const s = seq(`al.`.lit,`pha`.lit); 44 const t = `al`.lit ~ `pha`.lit; 45 assert(s !is t); 46 assert(equal(s, t)); 47 48 */ 49 module nxt.symbolic; 50 51 import std.algorithm: find, all, map, reduce, min, max, joiner, equal, each, filter; 52 import std.range: empty; 53 import std.array: array; 54 import std.string: representation; 55 import std.traits: isSomeString; 56 import nxt.find_ex: findAcronymAt, FindContext; 57 import nxt.dbgio; 58 import nxt.static_bitarray; 59 60 /** Base Pattern. 61 */ 62 class Patt 63 { 64 @safe pure nothrow: 65 66 /** Match $(D this) with $(D haystack) at Offset $(D soff). 67 Returns: Matched slice or [] if not match. 68 */ 69 final auto ref matchU(in ubyte[] haystack, size_t soff = 0) const 70 { 71 const hit = atU(haystack, soff); 72 return hit != hit.max ? haystack[0..hit] : []; 73 } 74 final auto ref match(const scope string haystack, size_t soff = 0) const 75 { 76 return matchU(haystack.representation, soff); 77 } 78 79 Seq opBinary(string op)(Patt rhs) if (op == `~`) // template can't be overridden 80 { 81 return opCatImpl(rhs); 82 } 83 Alt opBinary(string op)(Patt rhs) if (op == `|`) // template can't be overridden 84 { 85 return opAltImpl(rhs); 86 } 87 88 Seq opCatImpl(Patt rhs) // can be overridden 89 { 90 return seq(this, rhs); // TODO: check if this and rhs is Seq 91 } 92 Alt opAltImpl(Patt rhs) // can be overridden 93 { 94 return alt(this, rhs); // TODO: check if this and rhs is Alt 95 } 96 97 final size_t at(const scope string haystack, size_t soff = 0) const 98 // TODO: Activate this 99 /* out (hit) { */ 100 /* assert((!hit) || hit >= minLength); // TODO: Is this needed? */ 101 /* } */ 102 /* do */ 103 { 104 return atU(haystack.representation, soff); 105 } 106 107 size_t atU(in ubyte[] haystack, size_t soff = 0) const 108 { 109 assert(false); 110 } 111 112 /** Find $(D this) in String $(D haystack) at Offset $(D soff). */ 113 final const(ubyte[]) findAt(const scope string haystack, size_t soff = 0) const 114 { 115 return findRawAt(haystack.representation, soff, []); // TODO: this is ugly 116 } 117 118 /** Find $(D this) in Raw Bytes $(D haystack) at Offset $(D soff). */ 119 const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0, 120 in Patt[] enders = []) const 121 { 122 auto i = soff; 123 while (i < haystack.length) // while bytes left at i 124 { 125 if (haystack.length - i < minLength) // and bytes left to find pattern 126 { 127 return []; 128 } 129 const hit = atU(haystack, i); 130 if (hit != size_t.max) // hit at i 131 { 132 return haystack[i..i + hit]; 133 } 134 i++; 135 } 136 return []; 137 } 138 139 @property: 140 size_t minLength() const { assert(false); } /// Returns: minimum possible instance length. 141 size_t maxLength() const { assert(false); } /// Returns: maximum possible instance length. 142 143 bool isFixed() const { assert(false); } /// Returns: true if all possible instances have same length. 144 bool isConstant() const { return false; } /// Returns: true if all possible instances have same length. 145 146 final bool isVariable() const { return !isConstant; } 147 const(ubyte[]) getConstant() const { return []; } /// Returns: data if literal otherwise empty array. 148 149 /** Get All Literals that must match a given source $(D X) in order for $(D 150 this) to match $(D X) somewhere. 151 */ 152 Lit[] mandatories() { assert(false); } 153 154 /** Get Optional Literals that may match a given source $(D X) if $(D this) 155 matches $(D X) somewhere. 156 */ 157 Lit[] optionals() { return mandatories; } 158 159 protected Patt _parent; /// Parenting (Super) Pattern. 160 bool greedy = false; // Scan as far as possible if true 161 } 162 163 /** Literal Pattern with Cached Binary Byte Histogram. 164 */ 165 class Lit : Patt 166 { 167 @safe pure nothrow: 168 169 this(string bytes_) { assert(!bytes_.empty); this(bytes_.representation); } 170 this(ubyte ch) { this._bytes ~= ch; } 171 this(ubyte[] bytes_) { this._bytes = bytes_; } 172 this(immutable ubyte[] bytes_) { this._bytes = bytes_.dup; } 173 174 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 175 { 176 const l = _bytes.length; 177 return (soff + l <= haystack.length && // fits in haystack and 178 _bytes[] == haystack[soff..soff + l]) ? l : size_t.max; // same contents 179 } 180 181 override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0, 182 in Patt[] enders = []) const 183 { 184 return haystack[soff..$].find(_bytes); // reuse std.algorithm: find! 185 } 186 187 @property: 188 189 override size_t maxLength() const { return _bytes.length; } 190 override bool isFixed() const { return true; } 191 override bool isConstant() const { return true; } 192 override const(ubyte[]) getConstant() const { return _bytes; } 193 194 @property auto ref bytes() const { return _bytes; } 195 private ubyte[] _bytes; 196 alias _bytes this; 197 198 alias SinglesHist = StaticBitArray!(2^^ubyte.sizeof); 199 200 /// Get (Cached) (Binary) Histogram over single elements contained in this $(D Lit). 201 SinglesHist singlesHist() 202 { 203 if (!_bytes.empty && 204 _singlesHist.allZero) 205 { 206 foreach (b; _bytes) 207 { 208 _singlesHist[b] = true; 209 } 210 } 211 return _singlesHist; 212 } 213 private SinglesHist _singlesHist; 214 215 override Lit[] mandatories() { return [this]; } 216 } 217 218 auto lit(Args...)(Args args) @safe pure nothrow { return new Lit(args); } // instantiator 219 220 @safe pure nothrow unittest 221 { 222 immutable ab = `ab`; 223 assert(lit('b').at(`ab`, 1) == 1); 224 const a = lit('a'); 225 226 const ac = lit(`ac`); 227 assert(ac.match(`ac`)); 228 assert(ac.match(`ac`).length == 2); 229 assert(!ac.match(`ca`)); 230 assert(ac.match(`ca`) == []); 231 232 assert(a.isFixed); 233 assert(a.isConstant); 234 assert(a.at(ab) == 1); 235 assert(lit(ab).at(`a`) == size_t.max); 236 assert(lit(ab).at(`b`) == size_t.max); 237 238 assert(a.findAt(`cba`) == cast(immutable ubyte[])`a`); 239 assert(a.findAt(`ba`) == cast(immutable ubyte[])`a`); 240 assert(a.findAt(`a`) == cast(immutable ubyte[])`a`); 241 assert(a.findAt(``) == []); 242 assert(a.findAt(`b`) == []); 243 assert(ac.findAt(`__ac`) == cast(immutable ubyte[])`ac`); 244 assert(a.findAt(`b`).length == 0); 245 246 auto xyz = lit(`xyz`); 247 } 248 249 @safe pure nothrow unittest 250 { 251 auto ac = lit(`ac`); 252 assert(ac.mandatories == [ac]); 253 assert(ac.optionals == [ac]); 254 } 255 256 /** Word/Symbol Acronym Pattern. 257 */ 258 class Acronym : Patt 259 { 260 @safe pure nothrow: 261 262 this(string bytes_, FindContext ctx = FindContext.inSymbol) 263 { 264 assert(!bytes_.empty); 265 this(bytes_.representation, ctx); 266 } 267 268 this(ubyte ch) { this._acros ~= ch; } 269 this(ubyte[] bytes_, FindContext ctx = FindContext.inSymbol) 270 { 271 this._acros = bytes_; 272 this._ctx = ctx; 273 } 274 this(immutable ubyte[] bytes_, FindContext ctx = FindContext.inSymbol) 275 { 276 this._acros = bytes_.dup; 277 this._ctx = ctx; 278 } 279 280 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 281 { 282 auto offs = new size_t[_acros.length]; // hit offsets 283 size_t a = 0; // acronym index 284 foreach(s, ub; haystack[soff..$]) // for each element in source 285 { 286 import std.ascii: isAlpha; 287 288 // Check context 289 final switch (_ctx) 290 { 291 case FindContext.inWord: 292 case FindContext.asWord: 293 if (!ub.isAlpha) { return size_t.max; } break; 294 case FindContext.inSymbol: 295 case FindContext.asSymbol: 296 if (!ub.isAlpha && ub != '_') { return size_t.max; } break; 297 } 298 299 if (_acros[a] == ub) 300 { 301 offs[a] = s + soff; // store hit offset 302 a++; // advance acronym 303 if (a == _acros.length) // if complete acronym found 304 { 305 return s + 1; // return its length 306 } 307 } 308 } 309 return size_t.max; // no hit 310 } 311 312 template Tuple(E...) { alias Tuple = E; } 313 314 override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0, 315 in Patt[] enders = []) const 316 { 317 import std.string: CaseSensitive; 318 return haystack.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0]; 319 } 320 321 @property: 322 override size_t minLength() const { return _acros.length; } 323 override size_t maxLength() const { return size_t.max; } 324 override bool isFixed() const { return false; } 325 override bool isConstant() const { return false; } 326 327 private: 328 ubyte[] _acros; 329 FindContext _ctx; 330 } 331 332 @safe pure nothrow 333 { 334 auto inwac(Args...)(Args args) { return new Acronym(args, FindContext.inWord); } // word acronym 335 auto insac(Args...)(Args args) { return new Acronym(args, FindContext.inSymbol); } // symbol acronym 336 auto aswac(Args...)(Args args) { return new Acronym(args, FindContext.asWord); } // word acronym 337 auto assac(Args...)(Args args) { return new Acronym(args, FindContext.asSymbol); } // symbol acronym 338 } 339 340 @safe pure nothrow unittest 341 { 342 assert(inwac(`a`).at(`a`) == 1); 343 assert(inwac(`ab`).at(`ab`) == 2); 344 assert(inwac(`ab`).at(`a`) == size_t.max); 345 assert(inwac(`abc`).at(`abc`) == 3); 346 assert(inwac(`abc`).at(`aabbcc`) == 5); 347 assert(inwac(`abc`).at(`aaaabbcc`) == 7); 348 assert(inwac(`fpn`).at(`fopen`) == 5); 349 } 350 351 /** Any Byte. 352 */ 353 class Any : Patt 354 { 355 @safe pure nothrow: 356 this() {} 357 358 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 359 { 360 return soff < haystack.length ? 1 : size_t.max; 361 } 362 363 @property: 364 override size_t maxLength() const { return 1; } 365 override bool isFixed() const { return true; } 366 override bool isConstant() const { return false; } 367 368 override Lit[] mandatories() { return []; } 369 override Lit[] optionals() 370 { 371 import std.range: iota; 372 return iota(0, 256).map!(n => (cast(ubyte)n).lit).array; 373 } 374 } 375 376 auto any(Args...)(Args args) { return new Any(args); } // instantiator 377 378 /** Abstract Super Pattern. 379 */ 380 abstract class SPatt : Patt 381 { 382 @safe pure nothrow: 383 384 this(Patt[] subs_) { this._subs = subs_; } 385 386 this(Args...)(Args subs_) 387 { 388 foreach (sub; subs_) 389 { 390 alias Sub = typeof(sub); 391 392 // TODO: functionize to patternFromBuiltinType() or to!Patt 393 static if (is(Sub == string) || 394 is(Sub == char)) 395 { 396 _subs ~= new Lit(sub); 397 } 398 else 399 { 400 _subs ~= sub; 401 } 402 sub._parent = this; 403 } 404 } 405 406 protected Patt[] _subs; 407 } 408 409 /** Sequence of Patterns. 410 */ 411 class Seq : SPatt 412 { 413 @safe pure nothrow: 414 415 this(Patt[] subs_) { super(subs_); } 416 this(Args...)(Args subs_) { super(subs_); } 417 418 @property auto ref inout (Patt[]) elms() inout { return super._subs; } 419 420 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 421 { 422 assert(!elms.empty); // TODO: Move to in contract? 423 const c = getConstant; 424 if (!c.empty) 425 { 426 return (soff + c.length <= haystack.length && // if equal size and 427 c[] == haystack[soff..soff + c.length]); // equal contents 428 } 429 size_t sum = 0; 430 size_t off = soff; 431 foreach (ix, sub; elms) // TODO: Reuse std.algorithm instead? 432 { 433 size_t hit = sub.atU(haystack, off); 434 if (hit == size_t.max) { sum = hit; break; } // if any miss skip 435 sum += hit; 436 off += hit; 437 } 438 return sum; 439 } 440 441 @property: 442 override size_t maxLength() const { assert(false); } 443 override bool isFixed() const { return _subs.all!(a => a.isFixed); } 444 override bool isConstant() const { return _subs.all!(a => a.isConstant); } 445 override const(ubyte[]) getConstant() const { return []; } 446 447 override Lit[] mandatories() 448 { 449 auto x = _subs.map!(patt => patt.mandatories); 450 return x.joiner.array; 451 } 452 453 override Lit[] optionals() 454 { 455 auto x = _subs.map!(patt => patt.optionals); 456 return x.joiner.array; 457 } 458 } 459 460 auto seq(Args...)(Args args) @safe pure nothrow { return new Seq(args); } // instantiator 461 462 @safe pure nothrow unittest 463 { 464 const s = seq(`al.`.lit, 465 `pha`.lit); 466 const t = `al`.lit ~ `pha`.lit; 467 assert(s !is t); 468 469 immutable haystack = `alpha`; 470 assert(s.isFixed); 471 assert(s.isConstant); 472 assert(s.at(haystack)); 473 const al = seq(`a`.lit, 474 `l`.lit); 475 assert(al.at(`a`) == size_t.max); 476 } 477 478 @safe pure nothrow unittest 479 { 480 auto a = `aa.`.lit; 481 auto b = `bb.`.lit; 482 auto c = `cc.`.lit; 483 auto d = `dd.`.lit; 484 auto s = seq(a.opt, b, 485 c.opt, d); 486 assert(equal(s.mandatories, [b, d])); 487 assert(equal(s.optionals, [a, b, c, d])); 488 } 489 490 /** Alternative of Patterns in $(D ALTS). 491 */ 492 class Alt : SPatt 493 { 494 @safe pure nothrow: 495 496 this(Patt[] subs_) { super(subs_); } 497 this(Args...)(Args subs_) { super(subs_); } 498 499 size_t atIx(const scope string haystack, size_t soff, out size_t alt_hix) const 500 { 501 return atU(haystack.representation, soff, alt_hix); 502 } 503 504 void opOpAssign(string s : "~")(Patt sub) 505 { 506 super._subs ~= sub; 507 } 508 509 @property inout(Patt[]) alts() inout { return super._subs; } 510 511 /** Get Length of hit at index soff in haystack or size_t.max if none. 512 */ 513 size_t atU(in ubyte[] haystack, size_t soff, out size_t alt_hix) const 514 { 515 assert(!alts.empty); // TODO: Move to in contract? 516 size_t hit = 0; 517 size_t off = soff; 518 foreach (ix, sub; alts) // TODO: Reuse std.algorithm instead? 519 { 520 hit = sub.atU(haystack[off..$]); // match alternative 521 if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done 522 } 523 return hit; 524 } 525 526 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 527 { 528 size_t alt_hix; 529 return atU(haystack, soff, alt_hix); 530 } 531 532 /** Find $(D this) in $(D haystack) at Offset $(D soff). 533 */ 534 override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0, 535 in Patt[] enders = []) const 536 { 537 assert(!alts.empty); // TODO: Move to in contract? 538 539 import nxt.dbgio; 540 541 switch (alts.length) 542 { 543 case 1: 544 const a0 = alts[0].getConstant; 545 if (!a0.empty) 546 { 547 auto hit = haystack[soff..$].find(a0); // Use: second argument to return alt_hix 548 return hit; 549 } 550 else 551 { 552 return alts[0].findRawAt(haystack, soff, enders); // recurse to it 553 } 554 case 2: 555 const a0 = alts[0].getConstant; 556 const a1 = alts[1].getConstant; 557 if (!a0.empty && 558 !a1.empty) 559 { 560 auto hit = haystack[soff..$].find(a0, a1); // Use: second argument to return alt_hix 561 return hit[0]; 562 } 563 break; 564 case 3: 565 const a0 = alts[0].getConstant; 566 const a1 = alts[1].getConstant; 567 const a2 = alts[2].getConstant; 568 if (!a0.empty && 569 !a1.empty && 570 !a2.empty) 571 { 572 auto hit = haystack[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix 573 return hit[0]; 574 } 575 break; 576 case 4: 577 const a0 = alts[0].getConstant; 578 const a1 = alts[1].getConstant; 579 const a2 = alts[2].getConstant; 580 const a3 = alts[3].getConstant; 581 if (!a0.empty && 582 !a1.empty && 583 !a2.empty && 584 !a3.empty) 585 { 586 auto hit = haystack[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix 587 return hit[0]; 588 } 589 break; 590 case 5: 591 const a0 = alts[0].getConstant; 592 const a1 = alts[1].getConstant; 593 const a2 = alts[2].getConstant; 594 const a3 = alts[3].getConstant; 595 const a4 = alts[4].getConstant; 596 if (!a0.empty && 597 !a1.empty && 598 !a2.empty && 599 !a3.empty && 600 !a4.empty) 601 { 602 auto hit = haystack[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix 603 return hit[0]; 604 } 605 break; 606 default: 607 break; 608 } 609 610 return super.findRawAt(haystack, soff, enders); // revert to base case 611 } 612 613 @property: 614 override size_t minLength() const { return reduce!min(size_t.max, 615 _subs.map!(a => a.minLength)); } 616 override size_t maxLength() const { return reduce!max(size_t.min, 617 _subs.map!(a => a.maxLength)); } 618 override bool isFixed() const 619 { 620 // TODO: Merge these loops using tuple algorithm. 621 auto mins = _subs.map!(a => a.minLength); 622 auto maxs = _subs.map!(a => a.maxLength); 623 import nxt.predicates: allEqual; 624 return (mins.allEqual && 625 maxs.allEqual); 626 } 627 override bool isConstant() const 628 { 629 if (_subs.length == 0) 630 { 631 return true; 632 } 633 else if (_subs.length == 1) 634 { 635 import std.range: front; 636 return _subs.front.isConstant; 637 } 638 else 639 { 640 return false; // TODO: Maybe handle case when _subs are different. 641 } 642 } 643 } 644 645 auto alt(Args...)(Args args) @safe pure nothrow { return new Alt(args); } // instantiator 646 647 @safe pure nothrow unittest 648 { 649 immutable a_b = alt(`a`.lit, 650 `b`.lit); 651 652 immutable a__b = (`a`.lit | 653 `b`.lit); 654 655 assert(a_b.isFixed); 656 assert(!a_b.isConstant); 657 assert(a_b.at(`a`)); 658 assert(a_b.at(`b`)); 659 assert(a_b.at(`c`) == size_t.max); 660 661 size_t hix = size_t.max; 662 a_b.atIx(`a`, 0, hix); assert(hix == 0); 663 a_b.atIx(`b`, 0, hix); assert(hix == 1); 664 665 /* assert(alt.at(`a`) == size_t.max); */ 666 /* assert(alt.at(``) == size_t.max); */ 667 668 immutable a = alt(lit(`a`)); 669 immutable aa = alt(lit(`aa`)); 670 assert(aa.isConstant); 671 672 immutable aa_bb = alt(lit(`aa`), 673 lit(`bb`)); 674 assert(aa_bb.isFixed); 675 assert(aa_bb.minLength == 2); 676 assert(aa_bb.maxLength == 2); 677 678 immutable a_bb = alt(lit(`a`), 679 lit(`bb`)); 680 assert(!a_bb.isFixed); 681 assert(a_bb.minLength == 1); 682 assert(a_bb.maxLength == 2); 683 684 const string _aa = `_aa`; 685 assert(aa_bb.findAt(_aa) == cast(immutable ubyte[])`aa`); 686 assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable ubyte[])_aa)[0] == 1); 687 688 const string _bb = `_bb`; 689 assert(aa_bb.findAt(_bb) == cast(immutable ubyte[])`bb`); 690 assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable ubyte[])_bb)[0] == 1); 691 692 assert(a.findAt(`b`) == []); 693 assert(aa.findAt(`cc`) == []); 694 assert(aa_bb.findAt(`cc`) == []); 695 assert(aa_bb.findAt(``) == []); 696 } 697 698 @safe pure nothrow unittest 699 { 700 auto a_b = alt(`a`.lit); 701 a_b ~= `b`.lit; 702 703 assert(a_b.isFixed); 704 assert(!a_b.isConstant); 705 assert(a_b.at(`a`)); 706 assert(a_b.at(`b`)); 707 assert(a_b.at(`c`) == size_t.max); 708 709 size_t hix = size_t.max; 710 a_b.atIx(`a`, 0, hix); assert(hix == 0); 711 a_b.atIx(`b`, 0, hix); assert(hix == 1); 712 } 713 714 class Space : Patt 715 { 716 @safe pure nothrow: 717 718 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 719 { 720 import std.ascii: isWhite; 721 return soff < haystack.length && isWhite(haystack[soff]) ? 1 : size_t.max; 722 } 723 724 @property: 725 override size_t maxLength() const { return 1; } 726 override bool isFixed() const { return true; } 727 override bool isConstant() const { return false; } 728 } 729 730 auto ws() @safe pure nothrow { return new Space(); } // instantiator 731 732 @safe pure nothrow unittest 733 { 734 assert(ws.at(` `) == 1); 735 assert(ws.at("\t") == 1); 736 assert(ws.at("\n") == 1); 737 } 738 739 /** Abstract Singleton Super Pattern. 740 */ 741 abstract class SPatt1 : Patt 742 { 743 @safe pure nothrow: 744 745 this(Patt sub) 746 { 747 this.sub = sub; 748 sub._parent = this; 749 } 750 protected Patt sub; 751 } 752 753 /** Optional Sub Pattern $(D count) times. 754 */ 755 class Opt : SPatt1 756 { 757 @safe pure nothrow: 758 759 this(Patt sub) { super(sub); } 760 761 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 762 { 763 assert(soff <= haystack.length); // include equality because haystack might be empty and size zero 764 const hit = sub.atU(haystack[soff..$]); 765 return hit == size_t.max ? 0 : hit; 766 } 767 768 @property: 769 override size_t minLength() const { return 0; } 770 override size_t maxLength() const { return sub.maxLength; } 771 override bool isFixed() const { return false; } 772 override bool isConstant() const { return false; } 773 774 override Lit[] mandatories() { return []; } 775 override Lit[] optionals() { return sub.optionals; } 776 } 777 778 auto opt(Args...)(Args args) { return new Opt(args); } // optional 779 780 @safe pure nothrow unittest 781 { 782 assert(`a`.lit.opt.at(`b`) == 0); 783 assert(`a`.lit.opt.at(`a`) == 1); 784 } 785 786 /** Repetition Sub Pattern $(D count) times. 787 */ 788 class Rep : SPatt1 789 { 790 @safe pure nothrow: 791 792 this(Patt sub, size_t count) 793 in 794 { 795 assert(count >= 2); 796 } 797 do 798 { 799 super(sub); 800 this.countReq = count; 801 this.countOpt = 0; // fixed length repetion 802 } 803 804 this(Patt sub, size_t countMin, size_t countMax) 805 in 806 { 807 assert(countMax >= 2); 808 assert(countMin <= countMax); 809 } 810 do 811 { 812 super(sub); 813 this.countReq = countMin; 814 this.countOpt = countMax - countMin; 815 } 816 817 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 818 { 819 size_t sum = 0; 820 size_t off = soff; 821 /* mandatory */ 822 foreach (ix; 0..countReq) // TODO: Reuse std.algorithm instead? 823 { 824 size_t hit = sub.atU(haystack[off..$]); 825 if (hit == size_t.max) { return hit; } // if any miss skip 826 off += hit; 827 sum += hit; 828 } 829 /* optional part */ 830 foreach (ix; countReq..countReq + countOpt) // TODO: Reuse std.algorithm instead? 831 { 832 size_t hit = sub.atU(haystack[off..$]); 833 if (hit == size_t.max) { break; } // if any miss just break 834 off += hit; 835 sum += hit; 836 } 837 return sum; 838 } 839 840 @property: 841 override size_t minLength() const { return countReq*sub.maxLength; } 842 override size_t maxLength() const { return (countReq + countOpt)*sub.maxLength; } 843 override bool isFixed() const { return minLength == maxLength && sub.isFixed; } 844 override bool isConstant() const { return minLength == maxLength && sub.isConstant; } 845 846 override Lit[] mandatories() { return sub.mandatories; } 847 override Lit[] optionals() { return sub.optionals; } 848 849 // invariant { assert(countReq); } 850 size_t countReq; // Required. 851 size_t countOpt; // Optional. 852 } 853 854 auto rep(Args...)(Args args) { return new Rep(args); } // repetition 855 auto zom(Args...)(Args args) { return new Rep(args, 0, size_t.max); } // zero or more 856 auto oom(Args...)(Args args) { return new Rep(args, 1, size_t.max); } // one or more 857 858 @safe pure nothrow unittest 859 { 860 auto l = 'a'.lit; 861 862 const l5 = l.rep(5); 863 assert(l5.isConstant); 864 assert(l5.at(`aaaa`) == size_t.max); 865 assert(l5.at(`aaaaa`)); 866 assert(l5.at(`aaaaaaa`)); 867 assert(l5.isFixed); 868 assert(l5.maxLength == 5); 869 870 const l23 = l.rep(2, 3); 871 assert(l23.at(`a`) == size_t.max); 872 assert(l23.at(`aa`) == 2); 873 assert(l23.at(`aaa`) == 3); 874 assert(l23.at(`aaaa`) == 3); 875 assert(!l23.isConstant); 876 } 877 878 @safe pure nothrow unittest 879 { 880 auto l = 'a'.lit; 881 auto l5 = l.rep(5); 882 assert(l5.mandatories == [l]); 883 assert(l5.optionals == [l]); 884 } 885 886 @safe pure nothrow unittest 887 { 888 auto l = 'a'.lit; 889 auto l5 = l.opt.rep(5); 890 assert(l5.mandatories == []); 891 assert(l5.optionals == [l]); 892 } 893 894 class Ctx : Patt 895 { 896 enum Type 897 { 898 bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\`` 899 eob, /// End Of \em Block/Name/File/String. @b Emacs: `\'` 900 901 bol, /// Beginning Of \em Line. @b Emacs: `^` 902 eol, /// End Of \em Line. @b Emacs: `$` 903 904 bos, /// Beginning Of \em Symbol. @b Emacs: `\_<` 905 eos, /// End Of \em Symbol. @b Emacs: `\_>` 906 907 bow, /// Beginning Of \em Word. @b Emacs: `\<` 908 eow, /// End Of \em Word. @b Emacs: `\>` 909 } 910 911 @safe pure nothrow: 912 913 this(Type type) { this.type = type; } 914 915 override size_t atU(in ubyte[] haystack, size_t soff = 0) const 916 { 917 assert(soff <= haystack.length); // include equality because haystack might be empty and size zero 918 bool ok = false; 919 import std.ascii : isAlphaNum/* , newline */; 920 final switch (type) 921 { 922 /* buffer */ 923 case Type.bob: ok = (soff == 0); break; 924 case Type.eob: ok = (soff == haystack.length); break; 925 926 /* line */ 927 case Type.bol: ok = (soff == 0 || (haystack[soff - 1] == 0x0d || 928 haystack[soff - 1] == 0x0a)); break; 929 case Type.eol: ok = (soff == haystack.length || (haystack[soff ] == 0x0d || 930 haystack[soff ] == 0x0a)); break; 931 932 /* symbol */ 933 case Type.bos: ok = ((soff == 0 || (!haystack[soff - 1].isAlphaNum && 934 haystack[soff - 1] != '_')) && // TODO: Make '_' language-dependent 935 (soff < haystack.length && haystack[soff].isAlphaNum)) ; break; 936 case Type.eos: ok = ((soff == haystack.length || (!haystack[soff].isAlphaNum && 937 haystack[soff] != '_')) && // TODO: Make '_' language-dependent 938 (soff >= 1 && haystack[soff - 1].isAlphaNum)) ; break; 939 940 /* word */ 941 case Type.bow: ok = ((soff == 0 || !haystack[soff - 1].isAlphaNum) && 942 (soff < haystack.length && haystack[soff].isAlphaNum)) ; break; 943 case Type.eow: ok = ((soff == haystack.length || !haystack[soff].isAlphaNum) && 944 (soff >= 1 && haystack[soff - 1].isAlphaNum)) ; break; 945 } 946 return ok ? 0 : size_t.max; 947 } 948 949 @property: 950 override size_t maxLength() const { return 0; } 951 override bool isFixed() const { return true; } 952 override bool isConstant() const { return true; } 953 954 protected Type type; 955 } 956 957 Ctx bob(Args...)(Args args) { return new Ctx(args, Ctx.Type.bob); } 958 Ctx eob(Args...)(Args args) { return new Ctx(args, Ctx.Type.eob); } 959 Ctx bol(Args...)(Args args) { return new Ctx(args, Ctx.Type.bol); } 960 Ctx eol(Args...)(Args args) { return new Ctx(args, Ctx.Type.eol); } 961 Ctx bos(Args...)(Args args) { return new Ctx(args, Ctx.Type.bos); } 962 Ctx eos(Args...)(Args args) { return new Ctx(args, Ctx.Type.eos); } 963 Ctx bow(Args...)(Args args) { return new Ctx(args, Ctx.Type.bow); } 964 Ctx eow(Args...)(Args args) { return new Ctx(args, Ctx.Type.eow); } 965 Seq buf(Args...)(Args args) { return seq(bob, args, eob); } 966 Seq line(Args...)(Args args) { return seq(bol, args, eol); } 967 Seq sym(Args...)(Args args) { return seq(bos, args, eos); } 968 Seq word(Args...)(Args args) { return seq(bow, args, eow); } 969 970 @safe pure nothrow unittest 971 { 972 const bob_ = bob; 973 const eob_ = eob; 974 assert(bob_.at(`ab`) == 0); 975 assert(eob_.at(`ab`, 2) == 0); 976 977 const bol_ = bol; 978 assert(bol_.at(`ab`) == 0); 979 assert(bol_.at("a\nb", 2) == 0); 980 assert(bol_.at("a\nb", 1) == size_t.max); 981 982 const eol_ = eol; 983 assert(eol_.at(`ab`, 2) == 0); 984 assert(eol_.at("a\nb", 1) == 0); 985 assert(eol_.at("a\nb", 2) == size_t.max); 986 987 const bos_ = bos; 988 const eos_ = eos; 989 assert(bos_.at(`ab`) == 0); 990 assert(bos_.at(` ab`) == size_t.max); 991 assert(eos_.at(`ab`, 2) == 0); 992 assert(eos_.at(`a_b `, 1) == size_t.max); 993 assert(eos_.at(`ab `, 2) == 0); 994 995 const bow_ = bow; 996 const eow_ = eow; 997 998 assert(bow_.at(`ab`) == 0); 999 assert(bow_.at(` ab`) == size_t.max); 1000 1001 assert(eow_.at(`ab`, 2) == 0); 1002 assert(eow_.at(`ab `, 0) == size_t.max); 1003 1004 assert(bow_.at(` ab `, 1) == 0); 1005 assert(bow_.at(` ab `, 0) == size_t.max); 1006 1007 assert(eow_.at(` ab `, 3) == 0); 1008 assert(eow_.at(` ab `, 4) == size_t.max); 1009 1010 auto l = lit(`ab`); 1011 auto w = word(l); 1012 assert(w.at(`ab`) == 2); 1013 assert(w.at(`ab_c`) == 2); 1014 1015 auto s = sym(l); 1016 assert(s.at(`ab`) == 2); 1017 assert(s.at(`ab_c`) == size_t.max); 1018 1019 assert(bob_.findAt(`a`) == []); 1020 assert(bob_.findAt(`a`).ptr != null); 1021 assert(eob_.findAt(`a`) == []); 1022 1023 assert(bol_.findAt(`a`) == []); 1024 assert(bol_.findAt(`a`).ptr != null); 1025 assert(eol_.findAt(`a`) == []); 1026 1027 assert(bow_.findAt(`a`) == []); 1028 assert(bow_.findAt(`a`).ptr != null); 1029 assert(eow_.findAt(`a`) == []); 1030 1031 assert(bos_.findAt(`a`) == []); 1032 assert(bos_.findAt(`a`).ptr != null); 1033 assert(eos_.findAt(`a`) == []); 1034 // TODO: This fails assert(eos_.findAt(`a`).ptr != null); 1035 } 1036 1037 /** Keyword $(D arg). */ 1038 Seq kwd(Arg)(Arg arg) { return seq(bow, arg, eow); } 1039 1040 @safe pure nothrow unittest 1041 { 1042 const str = `int`; 1043 auto x = str.lit.kwd; 1044 1045 assert(x.at(str, 0)); 1046 /* TODO: assert(!x.at(str, 1)); */ 1047 1048 assert(x.at(` ` ~ str, 1)); 1049 /* TODO: assert(!x.at(` int`, 0)); */ 1050 } 1051 1052 /** Pattern Paired with Prefix and Suffix. 1053 */ 1054 class Clause : SPatt1 1055 { 1056 @safe pure nothrow: 1057 1058 this(Patt prefix_, 1059 Patt suffix_, 1060 Patt sub) 1061 { 1062 super(sub); 1063 this.prefix = prefix_; 1064 this.suffix = suffix_; 1065 } 1066 1067 override const(ubyte[]) findRawAt(in ubyte[] haystack, size_t soff = 0, 1068 in Patt[] enders = []) const 1069 { 1070 return sub.findRawAt(haystack, soff, enders ~ [suffix]); 1071 } 1072 1073 Patt prefix, suffix; 1074 } 1075 1076 Clause paired(Args...)(Patt prefix, Patt suffix, Args args) { return new Clause(prefix, suffix, args); } 1077 Clause parend(Args...)(Args args) { return new Clause(lit('('), lit(')'), args); } 1078 Clause hooked(Args...)(Args args) { return new Clause(lit('['), lit(']'), args); } 1079 Clause braced(Args...)(Args args) { return new Clause(lit('{'), lit('}'), args); } 1080 1081 import nxt.assert_ex; 1082 1083 @safe pure nothrow unittest 1084 { 1085 /* auto p = `[alpha]`.lit.parend; */ 1086 /* assert(p.at(`([alpha])`) == 7); */ 1087 1088 /* auto h = `[alpha]`.lit.hooked; */ 1089 /* assert(h.at(`[[alpha]]`) == 7); */ 1090 1091 /* auto b = `[alpha]`.lit.braced; */ 1092 /* assert(b.at(`{[alpha]}`) == 7); */ 1093 1094 /* auto pb = `[alpha]`.lit.parend; */ 1095 } 1096 1097 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern. 1098 Example: #!/bin/env rdmd 1099 See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix) 1100 */ 1101 auto ref shebangLine(Patt interpreter) @safe pure nothrow 1102 { 1103 return seq(`#!`.lit, 1104 `/usr`.lit.opt, 1105 `/bin/env`.lit.opt, 1106 ws.oom, 1107 interpreter); 1108 } 1109 1110 @safe pure nothrow unittest 1111 { 1112 assert(`rdmd`.lit.shebangLine 1113 .at(`#!/bin/env rdmd`) == 15); 1114 assert(`rdmd`.lit.shebangLine 1115 .at(`#!/usr/bin/env rdmd`) == 19); 1116 auto rgdmd = alt(`rdmd`.lit, 1117 `gdmd`.lit); 1118 assert(rgdmd.shebangLine 1119 .at(`#!/usr/bin/env rdmd-dev`) == 19); 1120 assert(rgdmd.shebangLine 1121 .at(`#!/usr/bin/env gdmd`) == 19); 1122 }