1 /** 2 Patterns. 3 4 Test: dmd -version=show -preview=dip1000 -preview=in -vcolumns -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d 5 Test: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d 6 Debug: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main pattern.d && lldb pattern 7 8 Concepts and namings are inspired by regular expressions, symbolic (regular) 9 expressions (Emacs' rx.el package), predicate logic and grammars. 10 11 String patterns are matched by their raw bytes for now. 12 13 Copyright: Per Nordlöw 2022-. 14 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 15 Authors: $(WEB Per Nordlöw) 16 17 TODO: Merge with lpgen.d Node classes 18 TODO: Extend `Node` classes to represent `xlg.el` read by `LispFileParser`. 19 TODO: Replace `findRawAt(const Data haystack` with `findRawAt(in Data haystack` 20 21 TODO: 22 Overload operators & (and), ! (not), 23 24 TODO: Add pattern for expressing inference with `infer` 25 (infer 26 ((x instanceOf X) & 27 (X subClassOf Y)) 28 (x instaceOf Y)) 29 30 TODO: 31 Variables are either 32 - _ (ignore) 33 - _`x`, _`y`, etc 34 - _'x', _'y' 35 - _!0, _!1, ..., _!(n-1) 36 37 infer(rel!`desire`(_!`x`, _!`y`) && 38 rel!`madeOf`(_!`z`, _!`y`), 39 rel!`desire`(_!`x`, _!`z`)) 40 41 TODO: Support variables of specific types and inference using predicate logic: 42 infer(and(fact(var!'x', rel'desire', var!'y'), 43 fact(var!'z', opt(rel'madeOf', 44 rel'instanceOf'), var!'y'))), 45 pred(var!'x', rel'desire', var!'z' 46 )) 47 48 TODO: Make returns from factory functions immutable. 49 TODO: Reuse return patterns from Lit 50 51 TODO 52 const s = seq(`al.`.lit,`pha`.lit); 53 const t = `al`.lit ~ `pha`.lit; 54 assert(s !is t); 55 assert(equal(s, t)); 56 57 */ 58 module nxt.pattern; 59 60 import std.algorithm : find, all, map, min, max, joiner; 61 import std.range : empty; 62 import std.array : array; 63 import std.string : representation; 64 import std.traits : isSomeString; 65 import nxt.find_ex : findAcronymAt, FindContext; 66 import nxt.debugio; 67 import nxt.container.dynamic_array; 68 69 /++ String Substitution. +/ 70 struct Substitution { 71 string src; 72 string dst; 73 } 74 75 @safe: 76 77 /++ Untyped data. 78 +/ 79 alias Data = ubyte[]; 80 81 /++ Matching `input` with `node`. 82 Compatibility with `std.regex.matchFirst`. 83 +/ 84 inout(char)[] matchFirst(scope return /+ref+/ inout(char)[] input, const Node node) pure nothrow /+@nogc+/ { 85 return input.match(node); 86 } 87 88 /// 89 @safe pure unittest { 90 const x = "a"; 91 assert(x.matchFirst(lit("b")).length == 0); 92 assert(x.matchFirst(lit("b")) is []); 93 } 94 95 /// 96 @safe pure unittest { 97 const x = "ab"; 98 assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]); 99 assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]); 100 } 101 102 /** Match `input` with `node`. 103 Returns: Matched slice or `[]` if not match. 104 */ 105 auto ref matchFirst(in return /+ref+/ Data input, const Node node) pure nothrow /+@nogc+/ { 106 return node.findRawAt(input, 0); 107 } 108 109 /// 110 @safe pure unittest { 111 /+const+/ Data x = [1,2]; 112 assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]); 113 assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]); 114 } 115 116 inout(char)[] match(scope return /+ref+/ inout(char)[] input, const Node node) @trusted pure nothrow /+@nogc+/ { 117 return cast(typeof(return))input.representation.matchFirst(node); 118 } 119 120 /++ Pattern (length) bounds. +/ 121 struct Bounds { 122 static immutable inf = size_t.max; 123 size_t low; ///< Smallest length possible. 124 size_t high; ///< Largest length possible. 125 } 126 127 /** Base Pattern. 128 */ 129 abstract extern(C++) class Node { extern(D): 130 @safe pure nothrow: 131 132 Seq opBinary(string op)(Node rhs) if (op == `~`) => opCatImpl(rhs); // template can't be overridden 133 Alt opBinary(string op)(Node rhs) if (op == `|`) => opAltImpl(rhs); // template can't be overridden 134 135 protected Seq opCatImpl(Node rhs) => seq(this, rhs); /+ TODO: check if this and rhs is Seq +/ 136 protected Alt opAltImpl(Node rhs) => alt(this, rhs); /+ TODO: check if this and rhs is Alt +/ 137 138 final size_t at(const scope string input, size_t soff = 0) const 139 /+ TODO: Activate this +/ 140 /* out (hit) { */ 141 /* assert((!hit) || hit >= bounds.min); /+ TODO: Is this needed? +/ */ 142 /* } */ 143 /* do */ 144 { 145 return atRaw(input.representation, soff); 146 } 147 148 abstract size_t atRaw(in Data input, size_t soff = 0) const @nogc; 149 150 /** Find $(D this) in String `input` at offset `soff`. */ 151 final const(Data) findAt(const return scope string input, size_t soff = 0) const { 152 return findRawAt(input.representation, soff, []); /+ TODO: this is ugly +/ 153 } 154 155 /** Find $(D this) in Raw Bytes `input` at offset `soff`. */ 156 const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const @nogc { 157 auto i = soff; 158 while (i < input.length) { // while bytes left at i 159 if (input.length - i < bounds.low) // and bytes left to find pattern 160 return []; 161 const hit = atRaw(input, i); 162 if (hit != size_t.max) // hit at i 163 return input[i..i + hit]; 164 i++; 165 } 166 return []; 167 } 168 169 @property @nogc: 170 abstract Bounds bounds() const; 171 172 abstract bool isFixed() const; /// Returns: true if all possible instances have same length. 173 abstract bool isConstant() const; /// Returns: true if all possible instances have same length. 174 175 final bool isVariable() => !isConstant; 176 const(Data) tryGetConstant() const => []; /// Returns: data if literal otherwise empty array. 177 178 /** Get All Literals that must match a given source $(D X) in order for $(D 179 this) to match $(D X) somewhere. 180 */ 181 version (none) abstract Lit[] mandatories(); 182 183 /** Get Optional Literals that may match a given source $(D X) if $(D this) 184 matches $(D X) somewhere. 185 */ 186 version (none) Lit[] optionals() => mandatories; 187 188 protected Node _parent; /// Parenting (Super) Pattern. 189 } 190 191 /** Literal Pattern with Cached Binary Byte Histogram. 192 */ 193 final extern(C++) class Lit : Node { extern(D): 194 @safe pure nothrow: 195 196 this(string bytes_) in(!bytes_.empty) { 197 this(bytes_.representation); 198 } 199 this(ubyte ch) { 200 this._bytes ~= ch; 201 } 202 this(Data bytes_) /+in(!bytes_.empty)+/ { 203 this._bytes = bytes_; 204 } 205 this(immutable Data bytes_) /+in(!bytes_.empty)+/ { 206 this._bytes = bytes_.dup; 207 } 208 209 override size_t atRaw(in Data input, size_t soff = 0) const { 210 const l = _bytes.length; 211 return (soff + l <= input.length && // fits in input and 212 _bytes[] == input[soff..soff + l]) ? l : size_t.max; // same contents 213 } 214 215 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @trusted { 216 const result = input[soff..$].find(_bytes); // Uses std.algorithm.find 217 /+ IMHO, find should return null on miss for non-empty needle (`_bytes`) 218 so adjust so that it does. +/ 219 return result.length != 0 ? result : []; 220 } 221 222 @property nothrow @nogc: 223 auto ref bytes() const => _bytes; 224 private Data _bytes; 225 alias _bytes this; 226 override: 227 Bounds bounds() const => Bounds(_bytes.length, _bytes.length); 228 bool isFixed() const => true; 229 bool isConstant() const => true; 230 const(Data) tryGetConstant() const => _bytes; 231 version (none) Lit[] mandatories() => [this]; 232 } 233 234 /++ Literal. +/ 235 auto lit(Args...)(Args args) @safe pure nothrow => new Lit(args); // instantiator 236 237 /++ Full|Exact|Complete (Anchored) literal. +/ 238 auto full(Args...)(Args args) @safe pure nothrow => seq(bob, lit(args), eob); // instantiator 239 240 pure nothrow @safe unittest { 241 const _ = lit(Data.init); 242 const ab = lit(`ab`); 243 assert(`ab`.match(ab)); 244 version (none) assert(!ab.match(`_`)); // TODO: enable 245 } 246 247 pure nothrow @safe unittest { 248 immutable ab = `ab`; 249 assert(lit('b').at(`ab`, 1) == 1); 250 const a = lit('a'); 251 252 const ac = lit(`ac`); 253 assert(`ac`.match(ac)); 254 assert(`ac`.match(ac).length == 2); 255 assert(`ca`.match(ac) == []); 256 257 assert(a.isFixed); 258 assert(a.isConstant); 259 assert(a.at(ab) == 1); 260 assert(lit(ab).at(`a`) == size_t.max); 261 assert(lit(ab).at(`b`) == size_t.max); 262 263 assert(a.findAt(`cba`) == cast(immutable Data)`a`); 264 assert(a.findAt(`ba`) == cast(immutable Data)`a`); 265 assert(a.findAt(`a`) == cast(immutable Data)`a`); 266 assert(a.findAt(``) == []); 267 assert(a.findAt(`b`) == []); 268 assert(ac.findAt(`__ac`) == cast(immutable Data)`ac`); 269 assert(a.findAt(`b`).length == 0); 270 271 auto xyz = lit(`xyz`); 272 } 273 274 pure nothrow @safe unittest { 275 auto ac = lit(`ac`); 276 /+ TODO: assert(ac.mandatories == [ac]); +/ 277 /+ TODO: assert(ac.optionals == [ac]); +/ 278 } 279 280 /** Word/Symbol Acronym Pattern. 281 */ 282 version (none) // TODO: enable if and when I need this 283 final extern(C++) class Acronym : Node { extern(D): 284 @safe pure nothrow: 285 286 this(string bytes_, FindContext ctx = FindContext.inSymbol) in(!bytes_.empty) { 287 this(bytes_.representation, ctx); 288 } 289 290 this(ubyte ch) { this._acros ~= ch; } 291 292 this(Data bytes_, FindContext ctx = FindContext.inSymbol) { 293 this._acros = bytes_; 294 this._ctx = ctx; 295 } 296 297 this(immutable Data bytes_, FindContext ctx = FindContext.inSymbol) { 298 this._acros = bytes_.dup; 299 this._ctx = ctx; 300 } 301 302 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 303 // scope auto offs = new size_t[_acros.length]; // hit offsets 304 size_t a = 0; // acronym index 305 foreach(s, ub; input[soff..$]) { // for each element in source 306 import std.ascii: isAlpha; 307 308 // Check context 309 final switch (_ctx) { 310 case FindContext.inWord: 311 case FindContext.asWord: 312 if (!ub.isAlpha) 313 return size_t.max; 314 break; 315 case FindContext.inSymbol: 316 case FindContext.asSymbol: 317 if (!ub.isAlpha && ub != '_') 318 return size_t.max; 319 break; 320 } 321 322 if (_acros[a] == ub) { 323 // offs[a] = s + soff; // store hit offset 324 a++; // advance acronym 325 if (a == _acros.length) { // if complete acronym found 326 return s + 1; // return its length 327 } 328 } 329 } 330 return size_t.max; // no hit 331 } 332 333 template Tuple(E...) { alias Tuple = E; } 334 335 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow { 336 import std.string: CaseSensitive; 337 return input.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0]; 338 } 339 340 @property: 341 override size_t bounds() const nothrow @nogc => typeof(return)(_acros.length, size_t.max); 342 override bool isFixed() const nothrow @nogc => false; 343 override bool isConstant() const nothrow @nogc => false; 344 345 private: 346 Data _acros; 347 FindContext _ctx; 348 } 349 350 version (none) 351 @safe pure nothrow { 352 auto inwac(Args...)(Args args) => new Acronym(args, FindContext.inWord); // word acronym 353 auto insac(Args...)(Args args) => new Acronym(args, FindContext.inSymbol); // symbol acronym 354 auto aswac(Args...)(Args args) => new Acronym(args, FindContext.asWord); // word acronym 355 auto assac(Args...)(Args args) => new Acronym(args, FindContext.asSymbol); // symbol acronym 356 } 357 358 version (none) 359 pure nothrow @safe unittest { 360 assert(inwac(`a`).at(`a`) == 1); 361 assert(inwac(`ab`).at(`ab`) == 2); 362 assert(inwac(`ab`).at(`a`) == size_t.max); 363 assert(inwac(`abc`).at(`abc`) == 3); 364 assert(inwac(`abc`).at(`aabbcc`) == 5); 365 assert(inwac(`abc`).at(`aaaabbcc`) == 7); 366 assert(inwac(`fpn`).at(`fopen`) == 5); 367 } 368 369 /** Any Byte. 370 */ 371 final extern(C++) class Any : Node { extern(D): 372 @safe pure nothrow: 373 this() {} 374 375 override size_t atRaw(in Data input, size_t soff = 0) const => soff < input.length ? 1 : size_t.max; 376 377 @property: 378 override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1); 379 override bool isFixed() const nothrow @nogc => true; 380 override bool isConstant() const nothrow @nogc => false; 381 382 version (none) override Lit[] mandatories() nothrow => []; 383 version (none) override Lit[] optionals() nothrow { 384 import std.range: iota; 385 return iota(0, 256).map!(n => (cast(ubyte)n).lit).array; 386 } 387 } 388 389 auto any(Args...)(Args args) => new Any(args); // instantiator 390 391 /** Abstract Super Pattern. 392 */ 393 abstract extern(C++) class SPatt : Node { extern(D): 394 @safe pure nothrow: 395 396 this(Node[] subs_) { this._subs = subs_; } 397 this(Args...)(Args subs_) { 398 foreach (sub; subs_) { 399 alias Sub = typeof(sub); 400 /+ TODO: functionize to patternFromBuiltinType() or to!Node +/ 401 static if (is(Sub == string) || 402 is(Sub == char)) { 403 _subs ~= new Lit(sub); 404 } else 405 _subs ~= sub; 406 sub._parent = this; 407 } 408 } 409 410 protected Node[] _subs; 411 } 412 413 /** Sequence of Patterns. 414 */ 415 final extern(C++) class Seq : SPatt { extern(D): 416 @safe pure nothrow: 417 418 this(Node[] subs_) { super(subs_); } 419 this(Args...)(Args subs_) { super(subs_); } 420 421 @property auto ref inout (Node[]) elms() inout @nogc => super._subs; 422 423 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 424 assert(!elms.empty); /+ TODO: Move to in contract? +/ 425 const c = tryGetConstant; 426 if (!c.empty) { 427 return (soff + c.length <= input.length && // if equal size and 428 c[] == input[soff..soff + c.length]); // equal contents 429 } 430 size_t sum = 0; 431 size_t off = soff; 432 foreach (ix, sub; elms) { /+ TODO: Reuse std.algorithm instead? +/ 433 size_t hit = sub.atRaw(input, off); 434 if (hit == size_t.max) { sum = hit; break; } // if any miss skip 435 sum += hit; 436 off += hit; 437 } 438 return sum; 439 } 440 441 @property: 442 override Bounds bounds() const nothrow @nogc { 443 typeof(return) result; 444 foreach (const ref sub; _subs) { 445 if (sub.bounds.low != size_t.max) 446 result.low += sub.bounds.low; // TODO: catch overflow 447 if (sub.bounds.high != size_t.max) 448 result.high += sub.bounds.high; // TODO: catch overflow 449 } 450 return result; 451 } 452 override bool isFixed() const nothrow @nogc => _subs.all!(a => a.isFixed); 453 override bool isConstant() const nothrow @nogc => _subs.all!(a => a.isConstant); 454 override const(Data) tryGetConstant() const @nogc => []; 455 version (none) override Lit[] mandatories() nothrow => _subs.map!(node => node.mandatories).joiner.array; 456 version (none) override Lit[] optionals() nothrow => _subs.map!(node => node.optionals).joiner.array; 457 } 458 459 auto seq(Args...)(Args args) @safe pure nothrow => new Seq(args); // instantiator 460 461 pure nothrow @safe unittest { 462 immutable input = `alpha`; 463 464 const s = seq(`al.`.lit, 465 `pha`.lit); 466 assert(s.isFixed); 467 assert(s.isConstant); 468 assert(s.at(input)); // TODO: this should fail 469 assert(s.bounds.low == 6); 470 assert(s.bounds.high== 6); 471 472 const t = `al`.lit ~ `pha`.lit; 473 474 assert(s !is t); 475 476 const al = seq(`a`.lit, `l`.lit); 477 assert(al.at(`a`) == size_t.max); // `al not found in `a` 478 } 479 480 @trusted pure nothrow unittest { 481 auto a = `aa.`.lit; 482 auto b = `bb.`.lit; 483 auto c = `cc.`.lit; 484 auto d = `dd.`.lit; 485 auto s = seq(a.opt, b, 486 c.opt, d); 487 assert(!s.isFixed); 488 assert(!s.isConstant); 489 assert(s.bounds.low == b.length + d.length); 490 assert(s.bounds.high== a.length + b.length + c.length + d.length); 491 /+ TODO: assert(equal(s.mandatories, [b, d])); +/ 492 /+ TODO: assert(equal(s.optionals, [a, b, c, d])); +/ 493 } 494 495 /** Alternative of Patterns in $(D ALTS). 496 */ 497 final extern(C++) class Alt : SPatt { extern(D): 498 @safe pure nothrow: 499 500 this(Node[] subs_) { super(subs_); } 501 this(Args...)(Args subs_) { super(subs_); } 502 503 size_t atIx(const scope string input, size_t soff, out size_t alt_hix) const { 504 return atRaw(input.representation, soff, alt_hix); 505 } 506 507 void opOpAssign(string s : "~")(Node sub) { 508 import nxt.algorithm.searching : canFind; 509 if (!_subs.canFind(sub)) 510 super._subs ~= sub; 511 } 512 513 @property inout(Node[]) alts() inout @nogc => super._subs; 514 515 /** Get Length of hit at index soff in input or size_t.max if none. 516 */ 517 size_t atRaw(in Data input, size_t soff, out size_t alt_hix) const @nogc { 518 assert(!alts.empty); /+ TODO: Move to in contract? +/ 519 size_t hit = 0; 520 size_t off = soff; 521 foreach (ix, sub; alts) { /+ TODO: Reuse std.algorithm instead? +/ 522 hit = sub.atRaw(input[off..$]); // match alternative 523 if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done 524 } 525 return hit; 526 } 527 528 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 529 size_t alt_hix; 530 return atRaw(input, soff, alt_hix); 531 } 532 533 /** Find $(D this) in `input` at offset `soff`. */ 534 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow { 535 assert(!alts.empty); /+ TODO: Move to in contract? +/ 536 switch (alts.length) { 537 case 1: 538 const a0 = alts[0].tryGetConstant; 539 if (!a0.empty) { 540 auto hit = input[soff..$].find(a0); // Use: second argument to return alt_hix 541 return hit.length != 0 ? hit : []; 542 } else 543 return alts[0].findRawAt(input, soff, enders); // recurse to it 544 case 2: 545 const a0 = alts[0].tryGetConstant; 546 const a1 = alts[1].tryGetConstant; 547 if (!a0.empty && 548 !a1.empty) { 549 auto hit = input[soff..$].find(a0, a1); // Use: second argument to return alt_hix 550 return hit[0]; 551 } 552 break; 553 case 3: 554 const a0 = alts[0].tryGetConstant; 555 const a1 = alts[1].tryGetConstant; 556 const a2 = alts[2].tryGetConstant; 557 if (!a0.empty && 558 !a1.empty && 559 !a2.empty) { 560 auto hit = input[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix 561 return hit[0]; 562 } 563 break; 564 case 4: 565 const a0 = alts[0].tryGetConstant; 566 const a1 = alts[1].tryGetConstant; 567 const a2 = alts[2].tryGetConstant; 568 const a3 = alts[3].tryGetConstant; 569 if (!a0.empty && 570 !a1.empty && 571 !a2.empty && 572 !a3.empty) { 573 auto hit = input[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix 574 return hit[0]; 575 } 576 break; 577 case 5: 578 const a0 = alts[0].tryGetConstant; 579 const a1 = alts[1].tryGetConstant; 580 const a2 = alts[2].tryGetConstant; 581 const a3 = alts[3].tryGetConstant; 582 const a4 = alts[4].tryGetConstant; 583 if (!a0.empty && 584 !a1.empty && 585 !a2.empty && 586 !a3.empty && 587 !a4.empty) { 588 auto hit = input[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix 589 return hit[0]; 590 } 591 break; 592 default: 593 break; 594 } 595 return super.findRawAt(input, soff, enders); // revert to base case 596 } 597 598 @property: 599 override Bounds bounds() const nothrow @nogc { 600 auto result = typeof(return)(size_t.max, size_t.min); 601 foreach (const ref sub; _subs) { 602 result.low = min(result.low, sub.bounds.low); 603 result.high = max(result.high, sub.bounds.high); 604 } 605 return result; 606 } 607 override bool isFixed() const nothrow @nogc { 608 /+ TODO: Merge these loops using tuple algorithm. +/ 609 auto mins = _subs.map!(a => a.bounds.low); 610 auto maxs = _subs.map!(a => a.bounds.high); 611 import nxt.predicates: allEqual; 612 return (mins.allEqual && maxs.allEqual); 613 } 614 override bool isConstant() const nothrow @nogc { 615 if (_subs.length == 0) 616 return true; 617 else if (_subs.length == 1) { 618 import std.range: front; 619 return _subs.front.isConstant; 620 } else 621 return false; /+ TODO: Maybe handle case when _subs are different. +/ 622 } 623 } 624 625 auto alt(Args...)(Args args) @safe pure nothrow => new Alt(args); // instantiator 626 627 pure nothrow @safe unittest { 628 immutable a_b = alt(`a`.lit, 629 `b`.lit); 630 631 immutable a__b = (`a`.lit | 632 `b`.lit); 633 634 assert(a_b.isFixed); 635 assert(!a_b.isConstant); 636 assert(a_b.at(`a`)); 637 assert(a_b.at(`b`)); 638 assert(a_b.at(`c`) == size_t.max); 639 640 size_t hix = size_t.max; 641 a_b.atIx(`a`, 0, hix); assert(hix == 0); 642 a_b.atIx(`b`, 0, hix); assert(hix == 1); 643 644 /* assert(alt.at(`a`) == size_t.max); */ 645 /* assert(alt.at(``) == size_t.max); */ 646 647 immutable a = alt(lit(`a`)); 648 immutable aa = alt(lit(`aa`)); 649 assert(aa.isConstant); 650 651 immutable aa_bb = alt(lit(`aa`), 652 lit(`bb`)); 653 assert(aa_bb.isFixed); 654 assert(aa_bb.bounds.low == 2); 655 assert(aa_bb.bounds.high== 2); 656 657 immutable a_bb = alt(lit(`a`), 658 lit(`bb`)); 659 assert(!a_bb.isFixed); 660 assert(a_bb.bounds.low == 1); 661 assert(a_bb.bounds.high== 2); 662 663 const string _aa = `_aa`; 664 assert(aa_bb.findAt(_aa) == cast(immutable Data)`aa`); 665 assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable Data)_aa)[0] == 1); 666 667 const string _bb = `_bb`; 668 assert(aa_bb.findAt(_bb) == cast(immutable Data)`bb`); 669 assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable Data)_bb)[0] == 1); 670 671 assert(a.findAt(`b`) == []); 672 assert(aa.findAt(`cc`) == []); 673 assert(aa_bb.findAt(`cc`) == []); 674 assert(aa_bb.findAt(``) == []); 675 } 676 677 pure nothrow @safe unittest { 678 auto a_b = alt(`a`.lit); 679 a_b ~= `b`.lit; 680 681 assert(a_b.isFixed); 682 assert(!a_b.isConstant); 683 assert(a_b.at(`a`)); 684 assert(a_b.at(`b`)); 685 assert(a_b.at(`c`) == size_t.max); 686 687 size_t hix = size_t.max; 688 a_b.atIx(`a`, 0, hix); assert(hix == 0); 689 a_b.atIx(`b`, 0, hix); assert(hix == 1); 690 } 691 692 final extern(C++) class Space : Node { extern(D): 693 @safe pure nothrow @nogc: 694 override: 695 size_t atRaw(in Data input, size_t soff = 0) const { 696 import std.ascii: isWhite; 697 return soff < input.length && isWhite(input[soff]) ? 1 : size_t.max; 698 } 699 @property const: 700 override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1); 701 bool isFixed() => true; 702 bool isConstant() => false; 703 } 704 705 auto ws() @safe pure nothrow => new Space(); // instantiator 706 707 pure nothrow @safe unittest { 708 assert(ws.at(` `) == 1); 709 assert(ws.at("\t") == 1); 710 assert(ws.at("\n") == 1); 711 } 712 713 /** Abstract Singleton Super Pattern. 714 */ 715 abstract extern(C++) class SPatt1 : Node { extern(D): 716 @safe pure nothrow: 717 this(Node sub) { 718 this.sub = sub; 719 sub._parent = this; 720 } 721 protected Node sub; 722 } 723 724 /** Optional Sub Pattern $(D count) times. 725 */ 726 final extern(C++) class Opt : SPatt1 { extern(D): 727 @safe pure nothrow: 728 this(Node sub) { super(sub); } 729 override: 730 size_t atRaw(in Data input, size_t soff = 0) const { 731 assert(soff <= input.length); // include equality because input might be empty and size zero 732 const hit = sub.atRaw(input[soff..$]); 733 return hit == size_t.max ? 0 : hit; 734 } 735 @property const nothrow @nogc: 736 override Bounds bounds() const nothrow @nogc => typeof(return)(0, sub.bounds.high); 737 bool isFixed() => false; 738 bool isConstant() => false; 739 version (none) Lit[] mandatories() nothrow => []; 740 version (none) Lit[] optionals() nothrow => sub.optionals; 741 } 742 743 auto opt(Args...)(Args args) => new Opt(args); // optional 744 745 pure nothrow @safe unittest { 746 assert(`a`.lit.opt.at(`b`) == 0); 747 assert(`a`.lit.opt.at(`a`) == 1); 748 } 749 750 /** Repetition Sub Pattern $(D count) times. 751 */ 752 final extern(C++) class Rep : SPatt1 { extern(D): 753 @safe pure nothrow: 754 755 this(Node sub, size_t count) in(count >= 2) { 756 super(sub); 757 this.countReq = count; 758 this.countOpt = 0; // fixed length repetion 759 } 760 761 this(Node sub, size_t countMin, size_t countMax) in { 762 assert(countMax >= 2); 763 assert(countMin <= countMax); 764 } do { 765 super(sub); 766 this.countReq = countMin; 767 this.countOpt = countMax - countMin; 768 } 769 770 override size_t atRaw(in Data input, size_t soff = 0) const { 771 size_t sum = 0; 772 size_t off = soff; 773 /* mandatory */ 774 foreach (ix; 0..countReq) /+ TODO: Reuse std.algorithm instead? +/ { 775 size_t hit = sub.atRaw(input[off..$]); 776 if (hit == size_t.max) { return hit; } // if any miss skip 777 off += hit; 778 sum += hit; 779 } 780 /* optional part */ 781 foreach (ix; countReq..countReq + countOpt) /+ TODO: Reuse std.algorithm instead? +/ { 782 size_t hit = sub.atRaw(input[off..$]); 783 if (hit == size_t.max) { break; } // if any miss just break 784 off += hit; 785 sum += hit; 786 } 787 return sum; 788 } 789 790 @property override const nothrow @nogc: 791 override Bounds bounds() const nothrow @nogc 792 => typeof(return)(countReq*sub.bounds.high, (countReq + countOpt)*sub.bounds.high); 793 bool isFixed() => bounds.low == bounds.high && sub.isFixed; 794 bool isConstant() => bounds.low == bounds.high && sub.isConstant; 795 version (none) Lit[] mandatories() => sub.mandatories; 796 version (none) Lit[] optionals() => sub.optionals; 797 798 // invariant { assert(countReq); } 799 size_t countReq; // Required. 800 size_t countOpt; // Optional. 801 } 802 803 auto rep(Args...)(Args args) => new Rep(args); // repetition 804 auto zom(Args...)(Args args) => new Rep(args, 0, size_t.max); // zero or more 805 auto oom(Args...)(Args args) => new Rep(args, 1, size_t.max); // one or more 806 807 pure nothrow @safe unittest { 808 auto l = 'a'.lit; 809 810 const l5 = l.rep(5); 811 assert(l5.isConstant); 812 assert(l5.at(`aaaa`) == size_t.max); 813 assert(l5.at(`aaaaa`)); 814 assert(l5.at(`aaaaaaa`)); 815 assert(l5.isFixed); 816 assert(l5.bounds.low == 5); 817 assert(l5.bounds.high== 5); 818 819 const countMin = 2; 820 const countMax = 3; 821 const l23 = l.rep(countMin, countMax); 822 assert(l23.at(`a`) == size_t.max); 823 assert(l23.at(`aa`) == 2); 824 assert(l23.at(`aaa`) == 3); 825 assert(l23.at(`aaaa`) == 3); 826 assert(!l23.isConstant); 827 assert(l23.bounds.low == countMin); 828 assert(l23.bounds.high== countMax); 829 } 830 831 pure nothrow @safe unittest { 832 auto l = 'a'.lit; 833 auto l5 = l.rep(5); 834 /+ TODO: assert(l5.mandatories == [l]); +/ 835 /+ TODO: assert(l5.optionals == [l]); +/ 836 } 837 838 pure nothrow @safe unittest { 839 auto l = 'a'.lit; 840 auto l5 = l.opt.rep(5); 841 /+ TODO: assert(l5.mandatories == []); +/ 842 /+ TODO: assert(l5.optionals == [l]); +/ 843 } 844 845 final extern(C++) class Ctx : Node { extern(D): 846 enum Type { 847 bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\`` 848 beginningOfBlock = bob, 849 beginningOfFile = bob, 850 beginningOfString = bob, 851 eob, /// End Of \em Block/Name/File/String. @b Emacs: `\'` 852 endOfBlock = eob, 853 endOfFile = eob, 854 endOfString = eob, 855 856 bol, /// Beginning Of \em Line. @b Emacs: `^` 857 beginningOfLine = bol, 858 eol, /// End Of \em Line. @b Emacs: `$` 859 endOfLine = eol, 860 861 bos, /// Beginning Of \em Symbol. @b Emacs: `\_<` 862 beginningOfSymbol = bos, 863 eos, /// End Of \em Symbol. @b Emacs: `\_>` 864 endOfSymbol = eos, 865 866 bow, /// Beginning Of \em Word. @b Emacs: `\<` 867 beginningOfWord = bow, 868 eow, /// End Of \em Word. @b Emacs: `\>` 869 endOfWord = eow, 870 } 871 872 @safe pure nothrow: 873 874 this(Type type) { this.type = type; } 875 876 override size_t atRaw(in Data input, size_t soff = 0) const 877 { 878 assert(soff <= input.length); // include equality because input might be empty and size zero 879 bool ok = false; 880 import std.ascii : isAlphaNum/* , newline */; 881 final switch (type) 882 { 883 /* buffer */ 884 case Type.bob: ok = (soff == 0); break; 885 case Type.eob: ok = (soff == input.length); break; 886 887 /* line */ 888 case Type.bol: ok = (soff == 0 || (input[soff - 1] == 0x0d || 889 input[soff - 1] == 0x0a)); break; 890 case Type.eol: ok = (soff == input.length || (input[soff ] == 0x0d || 891 input[soff ] == 0x0a)); break; 892 893 /* symbol */ 894 case Type.bos: ok = ((soff == 0 || (!input[soff - 1].isAlphaNum && 895 input[soff - 1] != '_')) && /+ TODO: Make '_' language-dependent +/ 896 (soff < input.length && input[soff].isAlphaNum)) ; break; 897 case Type.eos: ok = ((soff == input.length || (!input[soff].isAlphaNum && 898 input[soff] != '_')) && /+ TODO: Make '_' language-dependent +/ 899 (soff >= 1 && input[soff - 1].isAlphaNum)) ; break; 900 901 /* word */ 902 case Type.bow: ok = ((soff == 0 || !input[soff - 1].isAlphaNum) && 903 (soff < input.length && input[soff].isAlphaNum)) ; break; 904 case Type.eow: ok = ((soff == input.length || !input[soff].isAlphaNum) && 905 (soff >= 1 && input[soff - 1].isAlphaNum)) ; break; 906 } 907 return ok ? 0 : size_t.max; 908 } 909 @property override const nothrow @nogc: 910 Bounds bounds() => typeof(return)(0, 0); 911 bool isFixed() => true; 912 bool isConstant() => true; 913 protected Type type; 914 } 915 916 Ctx bob(Args...)(Args args) => new Ctx(args, Ctx.Type.bob); 917 Ctx eob(Args...)(Args args) => new Ctx(args, Ctx.Type.eob); 918 Ctx bol(Args...)(Args args) => new Ctx(args, Ctx.Type.bol); 919 Ctx eol(Args...)(Args args) => new Ctx(args, Ctx.Type.eol); 920 Ctx bos(Args...)(Args args) => new Ctx(args, Ctx.Type.bos); 921 Ctx eos(Args...)(Args args) => new Ctx(args, Ctx.Type.eos); 922 Ctx bow(Args...)(Args args) => new Ctx(args, Ctx.Type.bow); 923 Ctx eow(Args...)(Args args) => new Ctx(args, Ctx.Type.eow); 924 Seq buf(Args...)(Args args) => seq(bob, args, eob); 925 Seq line(Args...)(Args args) => seq(bol, args, eol); 926 Seq sym(Args...)(Args args) => seq(bos, args, eos); 927 Seq word(Args...)(Args args) => seq(bow, args, eow); 928 929 pure nothrow @safe unittest { 930 const bob_ = bob; 931 const eob_ = eob; 932 assert(bob_.at(`ab`) == 0); 933 assert(eob_.at(`ab`, 2) == 0); 934 assert(bob_.bounds.low == 0); 935 assert(bob_.bounds.high== 0); 936 937 const bol_ = bol; 938 assert(bol_.at(`ab`) == 0); 939 assert(bol_.at("a\nb", 2) == 0); 940 assert(bol_.at("a\nb", 1) == size_t.max); 941 assert(bol_.bounds.low == 0); 942 assert(bol_.bounds.high== 0); 943 944 const eol_ = eol; 945 assert(eol_.at(`ab`, 2) == 0); 946 assert(eol_.at("a\nb", 1) == 0); 947 assert(eol_.at("a\nb", 2) == size_t.max); 948 assert(eol_.bounds.low == 0); 949 assert(eol_.bounds.high== 0); 950 951 const bos_ = bos; 952 const eos_ = eos; 953 assert(bos_.at(`ab`) == 0); 954 assert(bos_.at(` ab`) == size_t.max); 955 assert(eos_.at(`ab`, 2) == 0); 956 assert(eos_.at(`a_b `, 1) == size_t.max); 957 assert(eos_.at(`ab `, 2) == 0); 958 assert(eos_.bounds.low == 0); 959 assert(eos_.bounds.high== 0); 960 961 const bow_ = bow; 962 const eow_ = eow; 963 assert(bow_.bounds.low == 0); 964 assert(bow_.bounds.high== 0); 965 assert(eow_.bounds.low == 0); 966 assert(eow_.bounds.high== 0); 967 968 assert(bow_.at(`ab`) == 0); 969 assert(bow_.at(` ab`) == size_t.max); 970 971 assert(eow_.at(`ab`, 2) == 0); 972 assert(eow_.at(`ab `, 0) == size_t.max); 973 974 assert(bow_.at(` ab `, 1) == 0); 975 assert(bow_.at(` ab `, 0) == size_t.max); 976 977 assert(eow_.at(` ab `, 3) == 0); 978 assert(eow_.at(` ab `, 4) == size_t.max); 979 980 auto l = lit(`ab`); 981 auto w = word(l); 982 assert(w.at(`ab`) == 2); 983 assert(w.at(`ab_c`) == 2); 984 985 auto s = sym(l); 986 assert(s.at(`ab`) == 2); 987 assert(s.at(`ab_c`) == size_t.max); 988 989 assert(bob_.findAt(`a`) == []); 990 assert(bob_.findAt(`a`).ptr != null); 991 assert(eob_.findAt(`a`) == []); 992 993 assert(bol_.findAt(`a`) == []); 994 assert(bol_.findAt(`a`).ptr != null); 995 assert(eol_.findAt(`a`) == []); 996 997 assert(bow_.findAt(`a`) == []); 998 assert(bow_.findAt(`a`).ptr != null); 999 assert(eow_.findAt(`a`) == []); 1000 1001 assert(bos_.findAt(`a`) == []); 1002 assert(bos_.findAt(`a`).ptr != null); 1003 assert(eos_.findAt(`a`) == []); 1004 /+ TODO: This fails assert(eos_.findAt(`a`).ptr != null); +/ 1005 } 1006 1007 /** Keyword $(D arg). */ 1008 Seq kwd(Arg)(Arg arg) => seq(bow, arg, eow); 1009 1010 pure nothrow @safe unittest { 1011 const str = `int`; 1012 auto x = str.lit.kwd; 1013 1014 assert(x.at(str, 0)); 1015 /* TODO: assert(!x.at(str, 1)); */ 1016 1017 assert(x.at(` ` ~ str, 1)); 1018 /* TODO: assert(!x.at(` int`, 0)); */ 1019 } 1020 1021 /** Pattern Paired with Prefix and Suffix. 1022 */ 1023 final extern(C++) class Clause : SPatt1 { extern(D): 1024 @safe pure nothrow: 1025 this(Node prefix_, Node suffix_, Node sub) { 1026 super(sub); 1027 this.prefix = prefix_; 1028 this.suffix = suffix_; 1029 } 1030 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @nogc { 1031 import std.experimental.allocator.mallocator : Mallocator; 1032 DynamicArray!(Node, Mallocator) enders_; 1033 enders_.reserve(enders.length == 1); 1034 () @trusted { 1035 enders_ ~= cast(Node[])enders; 1036 enders_ ~= cast()suffix; 1037 }(); 1038 typeof(return) result = sub.findRawAt(input, soff, enders_[]); 1039 return result; 1040 } 1041 Node prefix, suffix; 1042 } 1043 1044 Clause paired(Args...)(Node prefix, Node suffix, Args args) => new Clause(prefix, suffix, args); 1045 Clause parend(Args...)(Args args) => new Clause(lit('('), lit(')'), args); 1046 Clause hooked(Args...)(Args args) => new Clause(lit('['), lit(']'), args); 1047 Clause braced(Args...)(Args args) => Clause(lit('{'), lit('}'), args); 1048 1049 import nxt.assert_ex; 1050 1051 pure nothrow @safe unittest { 1052 /* auto p = `[alpha]`.lit.parend; */ 1053 /* assert(p.at(`([alpha])`) == 7); */ 1054 1055 /* auto h = `[alpha]`.lit.hooked; */ 1056 /* assert(h.at(`[[alpha]]`) == 7); */ 1057 1058 /* auto b = `[alpha]`.lit.braced; */ 1059 /* assert(b.at(`{[alpha]}`) == 7); */ 1060 1061 /* auto pb = `[alpha]`.lit.parend; */ 1062 } 1063 1064 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern. 1065 Example: #!/bin/env rdmd 1066 See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix) 1067 */ 1068 auto ref shebangLine(Node interpreter) @safe pure nothrow { 1069 return seq(bob, 1070 `#!`.lit, 1071 `/usr`.lit.opt, 1072 `/bin/env`.lit.opt, 1073 ws.oom, 1074 interpreter); 1075 } 1076 1077 /// 1078 pure nothrow @safe unittest { 1079 assert(`rdmd`.lit.shebangLine 1080 .at(`#!/bin/env rdmd`) == 15); 1081 assert(`rdmd`.lit.shebangLine 1082 .at(`#!/usr/bin/env rdmd`) == 19); 1083 auto rgdmd = alt(`rdmd`.lit, 1084 `gdmd`.lit); 1085 assert(rgdmd.shebangLine 1086 .at(`#!/usr/bin/env rdmd-dev`) == 19); 1087 assert(rgdmd.shebangLine 1088 .at(`#!/usr/bin/env gdmd`) == 19); 1089 }