1 /** 2 Patterns. 3 4 Test: dmd -version=show -preview=dip1000 -preview=in -vcolumns -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d 5 Test: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main -run pattern.d 6 Debug: ldmd2 -fsanitize=address -I.. -i -debug -g -checkaction=context -allinst -unittest -main pattern.d && lldb pattern 7 8 Concepts and namings are inspired by regular expressions, symbolic (regular) 9 expressions (Emacs' rx.el package), predicate logic and grammars. 10 11 String patterns are matched by their raw bytes for now. 12 13 Copyright: Per Nordlöw 2022-. 14 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 15 Authors: $(WEB Per Nordlöw) 16 17 TODO: Merge with lpgen.d Node classes 18 TODO: Extend `Node` classes to represent `xlg.el` read by `LispFileParser`. 19 TODO: Replace `findRawAt(const Data haystack` with `findRawAt(in Data haystack` 20 21 TODO: 22 Overload operators & (and), ! (not), 23 24 TODO: Add pattern for expressing inference with `infer` 25 (infer 26 ((x instanceOf X) & 27 (X subClassOf Y)) 28 (x instaceOf Y)) 29 30 TODO: 31 Variables are either 32 - _ (ignore) 33 - _`x`, _`y`, etc 34 - _'x', _'y' 35 - _!0, _!1, ..., _!(n-1) 36 37 infer(rel!`desire`(_!`x`, _!`y`) && 38 rel!`madeOf`(_!`z`, _!`y`), 39 rel!`desire`(_!`x`, _!`z`)) 40 41 TODO: Support variables of specific types and inference using predicate logic: 42 infer(and(fact(var!'x', rel'desire', var!'y'), 43 fact(var!'z', opt(rel'madeOf', 44 rel'instanceOf'), var!'y'))), 45 pred(var!'x', rel'desire', var!'z' 46 )) 47 48 TODO: Make returns from factory functions immutable. 49 TODO: Reuse return patterns from Lit 50 51 TODO 52 const s = seq(`al.`.lit,`pha`.lit); 53 const t = `al`.lit ~ `pha`.lit; 54 assert(s !is t); 55 assert(equal(s, t)); 56 57 */ 58 module nxt.pattern; 59 60 import std.algorithm : find, all, map, min, max, joiner; 61 import std.range : empty; 62 import std.array : array; 63 import std.string : representation; 64 import std.traits : isSomeString; 65 import nxt.find_ex : findAcronymAt, FindContext; 66 import nxt.debugio; 67 import nxt.container.dynamic_array; 68 69 @safe: 70 71 /++ Untyped data. 72 +/ 73 alias Data = ubyte[]; 74 75 /++ Matching `input` with `node`. 76 Compatibility with `std.regex.matchFirst`. 77 +/ 78 inout(char)[] matchFirst(scope return /+ref+/ inout(char)[] input, const Node node) pure nothrow /+@nogc+/ { 79 return input.match(node); 80 } 81 82 /// ditto 83 @safe pure unittest { 84 const x = "ab"; 85 assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]); 86 assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]); 87 } 88 89 /** Match `input` with `node`. 90 Returns: Matched slice or `[]` if not match. 91 */ 92 auto ref matchFirst(in return /+ref+/ Data input, const Node node) pure nothrow /+@nogc+/ { 93 return node.findRawAt(input, 0); 94 } 95 96 /// ditto 97 @safe pure unittest { 98 /+const+/ Data x = [1,2]; 99 assert(x.matchFirst(lit(x[0 .. 1])) is x[0 .. $]); 100 assert(x.matchFirst(lit(x[1 .. 2])) is x[1 .. 2]); 101 } 102 103 inout(char)[] match(scope return /+ref+/ inout(char)[] input, const Node node) @trusted pure nothrow /+@nogc+/ { 104 return cast(typeof(return))input.representation.matchFirst(node); 105 } 106 107 /++ Pattern (length) bounds. +/ 108 struct Bounds { 109 static immutable inf = size_t.max; 110 size_t low; ///< Smallest length possible. 111 size_t high; ///< Largest length possible. 112 } 113 114 /** Base Pattern. 115 */ 116 abstract extern(C++) class Node { extern(D): 117 @safe pure nothrow: 118 119 Seq opBinary(string op)(Node rhs) if (op == `~`) => opCatImpl(rhs); // template can't be overridden 120 Alt opBinary(string op)(Node rhs) if (op == `|`) => opAltImpl(rhs); // template can't be overridden 121 122 protected Seq opCatImpl(Node rhs) => seq(this, rhs); /+ TODO: check if this and rhs is Seq +/ 123 protected Alt opAltImpl(Node rhs) => alt(this, rhs); /+ TODO: check if this and rhs is Alt +/ 124 125 final size_t at(const scope string input, size_t soff = 0) const 126 /+ TODO: Activate this +/ 127 /* out (hit) { */ 128 /* assert((!hit) || hit >= bounds.min); /+ TODO: Is this needed? +/ */ 129 /* } */ 130 /* do */ 131 { 132 return atRaw(input.representation, soff); 133 } 134 135 abstract size_t atRaw(in Data input, size_t soff = 0) const @nogc; 136 137 /** Find $(D this) in String `input` at offset `soff`. */ 138 final const(Data) findAt(const return scope string input, size_t soff = 0) const { 139 return findRawAt(input.representation, soff, []); /+ TODO: this is ugly +/ 140 } 141 142 /** Find $(D this) in Raw Bytes `input` at offset `soff`. */ 143 const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const @nogc { 144 auto i = soff; 145 while (i < input.length) { // while bytes left at i 146 if (input.length - i < bounds.low) // and bytes left to find pattern 147 return []; 148 const hit = atRaw(input, i); 149 if (hit != size_t.max) // hit at i 150 return input[i..i + hit]; 151 i++; 152 } 153 return []; 154 } 155 156 @property @nogc: 157 abstract Bounds bounds() const; 158 159 abstract bool isFixed() const; /// Returns: true if all possible instances have same length. 160 abstract bool isConstant() const; /// Returns: true if all possible instances have same length. 161 162 final bool isVariable() => !isConstant; 163 const(Data) tryGetConstant() const => []; /// Returns: data if literal otherwise empty array. 164 165 /** Get All Literals that must match a given source $(D X) in order for $(D 166 this) to match $(D X) somewhere. 167 */ 168 version (none) abstract Lit[] mandatories(); 169 170 /** Get Optional Literals that may match a given source $(D X) if $(D this) 171 matches $(D X) somewhere. 172 */ 173 version (none) Lit[] optionals() => mandatories; 174 175 protected Node _parent; /// Parenting (Super) Pattern. 176 } 177 178 /** Literal Pattern with Cached Binary Byte Histogram. 179 */ 180 final extern(C++) class Lit : Node { extern(D): 181 @safe pure nothrow: 182 183 this(string bytes_) { assert(!bytes_.empty); this(bytes_.representation); } 184 this(ubyte ch) { this._bytes ~= ch; } 185 this(Data bytes_) { this._bytes = bytes_; } 186 this(immutable Data bytes_) { this._bytes = bytes_.dup; } 187 188 override size_t atRaw(in Data input, size_t soff = 0) const { 189 const l = _bytes.length; 190 return (soff + l <= input.length && // fits in input and 191 _bytes[] == input[soff..soff + l]) ? l : size_t.max; // same contents 192 } 193 194 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow { 195 return input[soff..$].find(_bytes); // reuse std.algorithm: find! 196 } 197 198 @property nothrow @nogc: 199 auto ref bytes() const => _bytes; 200 private Data _bytes; 201 alias _bytes this; 202 override: 203 Bounds bounds() const => Bounds(_bytes.length, _bytes.length); 204 bool isFixed() const => true; 205 bool isConstant() const => true; 206 const(Data) tryGetConstant() const => _bytes; 207 version (none) Lit[] mandatories() => [this]; 208 } 209 210 /++ Literal. +/ 211 auto lit(Args...)(Args args) @safe pure nothrow => new Lit(args); // instantiator 212 213 /++ Full|Exact literal. +/ 214 auto full(Args...)(Args args) @safe pure nothrow => seq(bob, lit(args), eob); // instantiator 215 216 pure nothrow @safe unittest { 217 const _ = lit(Data.init); 218 const ab = lit(`ab`); 219 assert(`ab`.match(ab)); 220 version (none) assert(!ab.match(`_`)); // TODO: enable 221 } 222 223 pure nothrow @safe unittest { 224 immutable ab = `ab`; 225 assert(lit('b').at(`ab`, 1) == 1); 226 const a = lit('a'); 227 228 const ac = lit(`ac`); 229 assert(`ac`.match(ac)); 230 assert(`ac`.match(ac).length == 2); 231 assert(`ca`.match(ac) == []); 232 233 assert(a.isFixed); 234 assert(a.isConstant); 235 assert(a.at(ab) == 1); 236 assert(lit(ab).at(`a`) == size_t.max); 237 assert(lit(ab).at(`b`) == size_t.max); 238 239 assert(a.findAt(`cba`) == cast(immutable Data)`a`); 240 assert(a.findAt(`ba`) == cast(immutable Data)`a`); 241 assert(a.findAt(`a`) == cast(immutable Data)`a`); 242 assert(a.findAt(``) == []); 243 assert(a.findAt(`b`) == []); 244 assert(ac.findAt(`__ac`) == cast(immutable Data)`ac`); 245 assert(a.findAt(`b`).length == 0); 246 247 auto xyz = lit(`xyz`); 248 } 249 250 pure nothrow @safe unittest { 251 auto ac = lit(`ac`); 252 /+ TODO: assert(ac.mandatories == [ac]); +/ 253 /+ TODO: assert(ac.optionals == [ac]); +/ 254 } 255 256 /** Word/Symbol Acronym Pattern. 257 */ 258 version (none) // TODO: enable if and when I need this 259 final extern(C++) class Acronym : Node { extern(D): 260 @safe pure nothrow: 261 262 this(string bytes_, FindContext ctx = FindContext.inSymbol) { 263 assert(!bytes_.empty); 264 this(bytes_.representation, ctx); 265 } 266 267 this(ubyte ch) { this._acros ~= ch; } 268 269 this(Data bytes_, FindContext ctx = FindContext.inSymbol) { 270 this._acros = bytes_; 271 this._ctx = ctx; 272 } 273 274 this(immutable Data bytes_, FindContext ctx = FindContext.inSymbol) { 275 this._acros = bytes_.dup; 276 this._ctx = ctx; 277 } 278 279 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 280 // scope auto offs = new size_t[_acros.length]; // hit offsets 281 size_t a = 0; // acronym index 282 foreach(s, ub; input[soff..$]) { // for each element in source 283 import std.ascii: isAlpha; 284 285 // Check context 286 final switch (_ctx) { 287 case FindContext.inWord: 288 case FindContext.asWord: 289 if (!ub.isAlpha) 290 return size_t.max; 291 break; 292 case FindContext.inSymbol: 293 case FindContext.asSymbol: 294 if (!ub.isAlpha && ub != '_') 295 return size_t.max; 296 break; 297 } 298 299 if (_acros[a] == ub) { 300 // offs[a] = s + soff; // store hit offset 301 a++; // advance acronym 302 if (a == _acros.length) { // if complete acronym found 303 return s + 1; // return its length 304 } 305 } 306 } 307 return size_t.max; // no hit 308 } 309 310 template Tuple(E...) { alias Tuple = E; } 311 312 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow { 313 import std.string: CaseSensitive; 314 return input.findAcronymAt(_acros, _ctx, CaseSensitive.yes, soff)[0]; 315 } 316 317 @property: 318 override size_t bounds() const nothrow @nogc => typeof(return)(_acros.length, size_t.max); 319 override bool isFixed() const nothrow @nogc => false; 320 override bool isConstant() const nothrow @nogc => false; 321 322 private: 323 Data _acros; 324 FindContext _ctx; 325 } 326 327 version (none) 328 @safe pure nothrow { 329 auto inwac(Args...)(Args args) => new Acronym(args, FindContext.inWord); // word acronym 330 auto insac(Args...)(Args args) => new Acronym(args, FindContext.inSymbol); // symbol acronym 331 auto aswac(Args...)(Args args) => new Acronym(args, FindContext.asWord); // word acronym 332 auto assac(Args...)(Args args) => new Acronym(args, FindContext.asSymbol); // symbol acronym 333 } 334 335 version (none) 336 pure nothrow @safe unittest { 337 assert(inwac(`a`).at(`a`) == 1); 338 assert(inwac(`ab`).at(`ab`) == 2); 339 assert(inwac(`ab`).at(`a`) == size_t.max); 340 assert(inwac(`abc`).at(`abc`) == 3); 341 assert(inwac(`abc`).at(`aabbcc`) == 5); 342 assert(inwac(`abc`).at(`aaaabbcc`) == 7); 343 assert(inwac(`fpn`).at(`fopen`) == 5); 344 } 345 346 /** Any Byte. 347 */ 348 final extern(C++) class Any : Node { extern(D): 349 @safe pure nothrow: 350 this() {} 351 352 override size_t atRaw(in Data input, size_t soff = 0) const => soff < input.length ? 1 : size_t.max; 353 354 @property: 355 override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1); 356 override bool isFixed() const nothrow @nogc => true; 357 override bool isConstant() const nothrow @nogc => false; 358 359 version (none) override Lit[] mandatories() nothrow => []; 360 version (none) override Lit[] optionals() nothrow { 361 import std.range: iota; 362 return iota(0, 256).map!(n => (cast(ubyte)n).lit).array; 363 } 364 } 365 366 auto any(Args...)(Args args) => new Any(args); // instantiator 367 368 /** Abstract Super Pattern. 369 */ 370 abstract extern(C++) class SPatt : Node { extern(D): 371 @safe pure nothrow: 372 373 this(Node[] subs_) { this._subs = subs_; } 374 this(Args...)(Args subs_) { 375 foreach (sub; subs_) { 376 alias Sub = typeof(sub); 377 /+ TODO: functionize to patternFromBuiltinType() or to!Node +/ 378 static if (is(Sub == string) || 379 is(Sub == char)) { 380 _subs ~= new Lit(sub); 381 } else 382 _subs ~= sub; 383 sub._parent = this; 384 } 385 } 386 387 protected Node[] _subs; 388 } 389 390 /** Sequence of Patterns. 391 */ 392 final extern(C++) class Seq : SPatt { extern(D): 393 @safe pure nothrow: 394 395 this(Node[] subs_) { super(subs_); } 396 this(Args...)(Args subs_) { super(subs_); } 397 398 @property auto ref inout (Node[]) elms() inout @nogc => super._subs; 399 400 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 401 assert(!elms.empty); /+ TODO: Move to in contract? +/ 402 const c = tryGetConstant; 403 if (!c.empty) { 404 return (soff + c.length <= input.length && // if equal size and 405 c[] == input[soff..soff + c.length]); // equal contents 406 } 407 size_t sum = 0; 408 size_t off = soff; 409 foreach (ix, sub; elms) { /+ TODO: Reuse std.algorithm instead? +/ 410 size_t hit = sub.atRaw(input, off); 411 if (hit == size_t.max) { sum = hit; break; } // if any miss skip 412 sum += hit; 413 off += hit; 414 } 415 return sum; 416 } 417 418 @property: 419 override Bounds bounds() const nothrow @nogc { 420 typeof(return) result; 421 foreach (const ref sub; _subs) { 422 if (sub.bounds.low != size_t.max) 423 result.low += sub.bounds.low; // TODO: catch overflow 424 if (sub.bounds.high != size_t.max) 425 result.high += sub.bounds.high; // TODO: catch overflow 426 } 427 return result; 428 } 429 override bool isFixed() const nothrow @nogc => _subs.all!(a => a.isFixed); 430 override bool isConstant() const nothrow @nogc => _subs.all!(a => a.isConstant); 431 override const(Data) tryGetConstant() const @nogc => []; 432 version (none) override Lit[] mandatories() nothrow => _subs.map!(node => node.mandatories).joiner.array; 433 version (none) override Lit[] optionals() nothrow => _subs.map!(node => node.optionals).joiner.array; 434 } 435 436 auto seq(Args...)(Args args) @safe pure nothrow => new Seq(args); // instantiator 437 438 pure nothrow @safe unittest { 439 immutable input = `alpha`; 440 441 const s = seq(`al.`.lit, 442 `pha`.lit); 443 assert(s.isFixed); 444 assert(s.isConstant); 445 assert(s.at(input)); // TODO: this should fail 446 assert(s.bounds.low == 6); 447 assert(s.bounds.high== 6); 448 449 const t = `al`.lit ~ `pha`.lit; 450 451 assert(s !is t); 452 453 const al = seq(`a`.lit, `l`.lit); 454 assert(al.at(`a`) == size_t.max); // `al not found in `a` 455 } 456 457 @trusted pure nothrow unittest { 458 auto a = `aa.`.lit; 459 auto b = `bb.`.lit; 460 auto c = `cc.`.lit; 461 auto d = `dd.`.lit; 462 auto s = seq(a.opt, b, 463 c.opt, d); 464 assert(!s.isFixed); 465 assert(!s.isConstant); 466 assert(s.bounds.low == b.length + d.length); 467 assert(s.bounds.high== a.length + b.length + c.length + d.length); 468 /+ TODO: assert(equal(s.mandatories, [b, d])); +/ 469 /+ TODO: assert(equal(s.optionals, [a, b, c, d])); +/ 470 } 471 472 /** Alternative of Patterns in $(D ALTS). 473 */ 474 final extern(C++) class Alt : SPatt { extern(D): 475 @safe pure nothrow: 476 477 this(Node[] subs_) { super(subs_); } 478 this(Args...)(Args subs_) { super(subs_); } 479 480 size_t atIx(const scope string input, size_t soff, out size_t alt_hix) const { 481 return atRaw(input.representation, soff, alt_hix); 482 } 483 484 void opOpAssign(string s : "~")(Node sub) { 485 import nxt.algorithm.searching : canFind; 486 if (!_subs.canFind(sub)) 487 super._subs ~= sub; 488 } 489 490 @property inout(Node[]) alts() inout @nogc => super._subs; 491 492 /** Get Length of hit at index soff in input or size_t.max if none. 493 */ 494 size_t atRaw(in Data input, size_t soff, out size_t alt_hix) const @nogc { 495 assert(!alts.empty); /+ TODO: Move to in contract? +/ 496 size_t hit = 0; 497 size_t off = soff; 498 foreach (ix, sub; alts) { /+ TODO: Reuse std.algorithm instead? +/ 499 hit = sub.atRaw(input[off..$]); // match alternative 500 if (hit != size_t.max) { alt_hix = ix; break; } // if any hit were done 501 } 502 return hit; 503 } 504 505 override size_t atRaw(in Data input, size_t soff = 0) const @nogc { 506 size_t alt_hix; 507 return atRaw(input, soff, alt_hix); 508 } 509 510 /** Find $(D this) in `input` at offset `soff`. */ 511 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow { 512 assert(!alts.empty); /+ TODO: Move to in contract? +/ 513 switch (alts.length) { 514 case 1: 515 const a0 = alts[0].tryGetConstant; 516 if (!a0.empty) { 517 auto hit = input[soff..$].find(a0); // Use: second argument to return alt_hix 518 return hit; 519 } else 520 return alts[0].findRawAt(input, soff, enders); // recurse to it 521 case 2: 522 const a0 = alts[0].tryGetConstant; 523 const a1 = alts[1].tryGetConstant; 524 if (!a0.empty && 525 !a1.empty) { 526 auto hit = input[soff..$].find(a0, a1); // Use: second argument to return alt_hix 527 return hit[0]; 528 } 529 break; 530 case 3: 531 const a0 = alts[0].tryGetConstant; 532 const a1 = alts[1].tryGetConstant; 533 const a2 = alts[2].tryGetConstant; 534 if (!a0.empty && 535 !a1.empty && 536 !a2.empty) { 537 auto hit = input[soff..$].find(a0, a1, a2); // Use: second argument to return alt_hix 538 return hit[0]; 539 } 540 break; 541 case 4: 542 const a0 = alts[0].tryGetConstant; 543 const a1 = alts[1].tryGetConstant; 544 const a2 = alts[2].tryGetConstant; 545 const a3 = alts[3].tryGetConstant; 546 if (!a0.empty && 547 !a1.empty && 548 !a2.empty && 549 !a3.empty) { 550 auto hit = input[soff..$].find(a0, a1, a2, a3); // Use: second argument to return alt_hix 551 return hit[0]; 552 } 553 break; 554 case 5: 555 const a0 = alts[0].tryGetConstant; 556 const a1 = alts[1].tryGetConstant; 557 const a2 = alts[2].tryGetConstant; 558 const a3 = alts[3].tryGetConstant; 559 const a4 = alts[4].tryGetConstant; 560 if (!a0.empty && 561 !a1.empty && 562 !a2.empty && 563 !a3.empty && 564 !a4.empty) { 565 auto hit = input[soff..$].find(a0, a1, a2, a3, a4); // Use: second argument to return alt_hix 566 return hit[0]; 567 } 568 break; 569 default: 570 break; 571 } 572 return super.findRawAt(input, soff, enders); // revert to base case 573 } 574 575 @property: 576 override Bounds bounds() const nothrow @nogc { 577 auto result = typeof(return)(size_t.max, size_t.min); 578 foreach (const ref sub; _subs) { 579 result.low = min(result.low, sub.bounds.low); 580 result.high = max(result.high, sub.bounds.high); 581 } 582 return result; 583 } 584 override bool isFixed() const nothrow @nogc { 585 /+ TODO: Merge these loops using tuple algorithm. +/ 586 auto mins = _subs.map!(a => a.bounds.low); 587 auto maxs = _subs.map!(a => a.bounds.high); 588 import nxt.predicates: allEqual; 589 return (mins.allEqual && maxs.allEqual); 590 } 591 override bool isConstant() const nothrow @nogc { 592 if (_subs.length == 0) 593 return true; 594 else if (_subs.length == 1) { 595 import std.range: front; 596 return _subs.front.isConstant; 597 } else 598 return false; /+ TODO: Maybe handle case when _subs are different. +/ 599 } 600 } 601 602 auto alt(Args...)(Args args) @safe pure nothrow => new Alt(args); // instantiator 603 604 pure nothrow @safe unittest { 605 immutable a_b = alt(`a`.lit, 606 `b`.lit); 607 608 immutable a__b = (`a`.lit | 609 `b`.lit); 610 611 assert(a_b.isFixed); 612 assert(!a_b.isConstant); 613 assert(a_b.at(`a`)); 614 assert(a_b.at(`b`)); 615 assert(a_b.at(`c`) == size_t.max); 616 617 size_t hix = size_t.max; 618 a_b.atIx(`a`, 0, hix); assert(hix == 0); 619 a_b.atIx(`b`, 0, hix); assert(hix == 1); 620 621 /* assert(alt.at(`a`) == size_t.max); */ 622 /* assert(alt.at(``) == size_t.max); */ 623 624 immutable a = alt(lit(`a`)); 625 immutable aa = alt(lit(`aa`)); 626 assert(aa.isConstant); 627 628 immutable aa_bb = alt(lit(`aa`), 629 lit(`bb`)); 630 assert(aa_bb.isFixed); 631 assert(aa_bb.bounds.low == 2); 632 assert(aa_bb.bounds.high== 2); 633 634 immutable a_bb = alt(lit(`a`), 635 lit(`bb`)); 636 assert(!a_bb.isFixed); 637 assert(a_bb.bounds.low == 1); 638 assert(a_bb.bounds.high== 2); 639 640 const string _aa = `_aa`; 641 assert(aa_bb.findAt(_aa) == cast(immutable Data)`aa`); 642 assert(&aa_bb.findAt(_aa)[0] - &(cast(immutable Data)_aa)[0] == 1); 643 644 const string _bb = `_bb`; 645 assert(aa_bb.findAt(_bb) == cast(immutable Data)`bb`); 646 assert(&aa_bb.findAt(_bb)[0] - &(cast(immutable Data)_bb)[0] == 1); 647 648 assert(a.findAt(`b`) == []); 649 assert(aa.findAt(`cc`) == []); 650 assert(aa_bb.findAt(`cc`) == []); 651 assert(aa_bb.findAt(``) == []); 652 } 653 654 pure nothrow @safe unittest { 655 auto a_b = alt(`a`.lit); 656 a_b ~= `b`.lit; 657 658 assert(a_b.isFixed); 659 assert(!a_b.isConstant); 660 assert(a_b.at(`a`)); 661 assert(a_b.at(`b`)); 662 assert(a_b.at(`c`) == size_t.max); 663 664 size_t hix = size_t.max; 665 a_b.atIx(`a`, 0, hix); assert(hix == 0); 666 a_b.atIx(`b`, 0, hix); assert(hix == 1); 667 } 668 669 final extern(C++) class Space : Node { extern(D): 670 @safe pure nothrow @nogc: 671 override: 672 size_t atRaw(in Data input, size_t soff = 0) const { 673 import std.ascii: isWhite; 674 return soff < input.length && isWhite(input[soff]) ? 1 : size_t.max; 675 } 676 @property const: 677 override Bounds bounds() const nothrow @nogc => typeof(return)(1, 1); 678 bool isFixed() => true; 679 bool isConstant() => false; 680 } 681 682 auto ws() @safe pure nothrow => new Space(); // instantiator 683 684 pure nothrow @safe unittest { 685 assert(ws.at(` `) == 1); 686 assert(ws.at("\t") == 1); 687 assert(ws.at("\n") == 1); 688 } 689 690 /** Abstract Singleton Super Pattern. 691 */ 692 abstract extern(C++) class SPatt1 : Node { extern(D): 693 @safe pure nothrow: 694 this(Node sub) { 695 this.sub = sub; 696 sub._parent = this; 697 } 698 protected Node sub; 699 } 700 701 /** Optional Sub Pattern $(D count) times. 702 */ 703 final extern(C++) class Opt : SPatt1 { extern(D): 704 @safe pure nothrow: 705 this(Node sub) { super(sub); } 706 override: 707 size_t atRaw(in Data input, size_t soff = 0) const { 708 assert(soff <= input.length); // include equality because input might be empty and size zero 709 const hit = sub.atRaw(input[soff..$]); 710 return hit == size_t.max ? 0 : hit; 711 } 712 @property const nothrow @nogc: 713 override Bounds bounds() const nothrow @nogc => typeof(return)(0, sub.bounds.high); 714 bool isFixed() => false; 715 bool isConstant() => false; 716 version (none) Lit[] mandatories() nothrow => []; 717 version (none) Lit[] optionals() nothrow => sub.optionals; 718 } 719 720 auto opt(Args...)(Args args) => new Opt(args); // optional 721 722 pure nothrow @safe unittest { 723 assert(`a`.lit.opt.at(`b`) == 0); 724 assert(`a`.lit.opt.at(`a`) == 1); 725 } 726 727 /** Repetition Sub Pattern $(D count) times. 728 */ 729 final extern(C++) class Rep : SPatt1 { extern(D): 730 @safe pure nothrow: 731 732 this(Node sub, size_t count) in(count >= 2) { 733 super(sub); 734 this.countReq = count; 735 this.countOpt = 0; // fixed length repetion 736 } 737 738 this(Node sub, size_t countMin, size_t countMax) in { 739 assert(countMax >= 2); 740 assert(countMin <= countMax); 741 } do { 742 super(sub); 743 this.countReq = countMin; 744 this.countOpt = countMax - countMin; 745 } 746 747 override size_t atRaw(in Data input, size_t soff = 0) const { 748 size_t sum = 0; 749 size_t off = soff; 750 /* mandatory */ 751 foreach (ix; 0..countReq) /+ TODO: Reuse std.algorithm instead? +/ { 752 size_t hit = sub.atRaw(input[off..$]); 753 if (hit == size_t.max) { return hit; } // if any miss skip 754 off += hit; 755 sum += hit; 756 } 757 /* optional part */ 758 foreach (ix; countReq..countReq + countOpt) /+ TODO: Reuse std.algorithm instead? +/ { 759 size_t hit = sub.atRaw(input[off..$]); 760 if (hit == size_t.max) { break; } // if any miss just break 761 off += hit; 762 sum += hit; 763 } 764 return sum; 765 } 766 767 @property override const nothrow @nogc: 768 override Bounds bounds() const nothrow @nogc 769 => typeof(return)(countReq*sub.bounds.high, (countReq + countOpt)*sub.bounds.high); 770 bool isFixed() => bounds.low == bounds.high && sub.isFixed; 771 bool isConstant() => bounds.low == bounds.high && sub.isConstant; 772 version (none) Lit[] mandatories() => sub.mandatories; 773 version (none) Lit[] optionals() => sub.optionals; 774 775 // invariant { assert(countReq); } 776 size_t countReq; // Required. 777 size_t countOpt; // Optional. 778 } 779 780 auto rep(Args...)(Args args) => new Rep(args); // repetition 781 auto zom(Args...)(Args args) => new Rep(args, 0, size_t.max); // zero or more 782 auto oom(Args...)(Args args) => new Rep(args, 1, size_t.max); // one or more 783 784 pure nothrow @safe unittest { 785 auto l = 'a'.lit; 786 787 const l5 = l.rep(5); 788 assert(l5.isConstant); 789 assert(l5.at(`aaaa`) == size_t.max); 790 assert(l5.at(`aaaaa`)); 791 assert(l5.at(`aaaaaaa`)); 792 assert(l5.isFixed); 793 assert(l5.bounds.low == 5); 794 assert(l5.bounds.high== 5); 795 796 const countMin = 2; 797 const countMax = 3; 798 const l23 = l.rep(countMin, countMax); 799 assert(l23.at(`a`) == size_t.max); 800 assert(l23.at(`aa`) == 2); 801 assert(l23.at(`aaa`) == 3); 802 assert(l23.at(`aaaa`) == 3); 803 assert(!l23.isConstant); 804 assert(l23.bounds.low == countMin); 805 assert(l23.bounds.high== countMax); 806 } 807 808 pure nothrow @safe unittest { 809 auto l = 'a'.lit; 810 auto l5 = l.rep(5); 811 /+ TODO: assert(l5.mandatories == [l]); +/ 812 /+ TODO: assert(l5.optionals == [l]); +/ 813 } 814 815 pure nothrow @safe unittest { 816 auto l = 'a'.lit; 817 auto l5 = l.opt.rep(5); 818 /+ TODO: assert(l5.mandatories == []); +/ 819 /+ TODO: assert(l5.optionals == [l]); +/ 820 } 821 822 final extern(C++) class Ctx : Node { extern(D): 823 enum Type { 824 bob, /// Beginning Of \em Block/Name/File/String. @b Emacs: `\`` 825 beginningOfBlock = bob, 826 beginningOfFile = bob, 827 beginningOfString = bob, 828 eob, /// End Of \em Block/Name/File/String. @b Emacs: `\'` 829 endOfBlock = eob, 830 endOfFile = eob, 831 endOfString = eob, 832 833 bol, /// Beginning Of \em Line. @b Emacs: `^` 834 beginningOfLine = bol, 835 eol, /// End Of \em Line. @b Emacs: `$` 836 endOfLine = eol, 837 838 bos, /// Beginning Of \em Symbol. @b Emacs: `\_<` 839 beginningOfSymbol = bos, 840 eos, /// End Of \em Symbol. @b Emacs: `\_>` 841 endOfSymbol = eos, 842 843 bow, /// Beginning Of \em Word. @b Emacs: `\<` 844 beginningOfWord = bow, 845 eow, /// End Of \em Word. @b Emacs: `\>` 846 endOfWord = eow, 847 } 848 849 @safe pure nothrow: 850 851 this(Type type) { this.type = type; } 852 853 override size_t atRaw(in Data input, size_t soff = 0) const 854 { 855 assert(soff <= input.length); // include equality because input might be empty and size zero 856 bool ok = false; 857 import std.ascii : isAlphaNum/* , newline */; 858 final switch (type) 859 { 860 /* buffer */ 861 case Type.bob: ok = (soff == 0); break; 862 case Type.eob: ok = (soff == input.length); break; 863 864 /* line */ 865 case Type.bol: ok = (soff == 0 || (input[soff - 1] == 0x0d || 866 input[soff - 1] == 0x0a)); break; 867 case Type.eol: ok = (soff == input.length || (input[soff ] == 0x0d || 868 input[soff ] == 0x0a)); break; 869 870 /* symbol */ 871 case Type.bos: ok = ((soff == 0 || (!input[soff - 1].isAlphaNum && 872 input[soff - 1] != '_')) && /+ TODO: Make '_' language-dependent +/ 873 (soff < input.length && input[soff].isAlphaNum)) ; break; 874 case Type.eos: ok = ((soff == input.length || (!input[soff].isAlphaNum && 875 input[soff] != '_')) && /+ TODO: Make '_' language-dependent +/ 876 (soff >= 1 && input[soff - 1].isAlphaNum)) ; break; 877 878 /* word */ 879 case Type.bow: ok = ((soff == 0 || !input[soff - 1].isAlphaNum) && 880 (soff < input.length && input[soff].isAlphaNum)) ; break; 881 case Type.eow: ok = ((soff == input.length || !input[soff].isAlphaNum) && 882 (soff >= 1 && input[soff - 1].isAlphaNum)) ; break; 883 } 884 return ok ? 0 : size_t.max; 885 } 886 @property override const nothrow @nogc: 887 Bounds bounds() => typeof(return)(0, 0); 888 bool isFixed() => true; 889 bool isConstant() => true; 890 protected Type type; 891 } 892 893 Ctx bob(Args...)(Args args) => new Ctx(args, Ctx.Type.bob); 894 Ctx eob(Args...)(Args args) => new Ctx(args, Ctx.Type.eob); 895 Ctx bol(Args...)(Args args) => new Ctx(args, Ctx.Type.bol); 896 Ctx eol(Args...)(Args args) => new Ctx(args, Ctx.Type.eol); 897 Ctx bos(Args...)(Args args) => new Ctx(args, Ctx.Type.bos); 898 Ctx eos(Args...)(Args args) => new Ctx(args, Ctx.Type.eos); 899 Ctx bow(Args...)(Args args) => new Ctx(args, Ctx.Type.bow); 900 Ctx eow(Args...)(Args args) => new Ctx(args, Ctx.Type.eow); 901 Seq buf(Args...)(Args args) => seq(bob, args, eob); 902 Seq line(Args...)(Args args) => seq(bol, args, eol); 903 Seq sym(Args...)(Args args) => seq(bos, args, eos); 904 Seq word(Args...)(Args args) => seq(bow, args, eow); 905 906 pure nothrow @safe unittest { 907 const bob_ = bob; 908 const eob_ = eob; 909 assert(bob_.at(`ab`) == 0); 910 assert(eob_.at(`ab`, 2) == 0); 911 assert(bob_.bounds.low == 0); 912 assert(bob_.bounds.high== 0); 913 914 const bol_ = bol; 915 assert(bol_.at(`ab`) == 0); 916 assert(bol_.at("a\nb", 2) == 0); 917 assert(bol_.at("a\nb", 1) == size_t.max); 918 assert(bol_.bounds.low == 0); 919 assert(bol_.bounds.high== 0); 920 921 const eol_ = eol; 922 assert(eol_.at(`ab`, 2) == 0); 923 assert(eol_.at("a\nb", 1) == 0); 924 assert(eol_.at("a\nb", 2) == size_t.max); 925 assert(eol_.bounds.low == 0); 926 assert(eol_.bounds.high== 0); 927 928 const bos_ = bos; 929 const eos_ = eos; 930 assert(bos_.at(`ab`) == 0); 931 assert(bos_.at(` ab`) == size_t.max); 932 assert(eos_.at(`ab`, 2) == 0); 933 assert(eos_.at(`a_b `, 1) == size_t.max); 934 assert(eos_.at(`ab `, 2) == 0); 935 assert(eos_.bounds.low == 0); 936 assert(eos_.bounds.high== 0); 937 938 const bow_ = bow; 939 const eow_ = eow; 940 assert(bow_.bounds.low == 0); 941 assert(bow_.bounds.high== 0); 942 assert(eow_.bounds.low == 0); 943 assert(eow_.bounds.high== 0); 944 945 assert(bow_.at(`ab`) == 0); 946 assert(bow_.at(` ab`) == size_t.max); 947 948 assert(eow_.at(`ab`, 2) == 0); 949 assert(eow_.at(`ab `, 0) == size_t.max); 950 951 assert(bow_.at(` ab `, 1) == 0); 952 assert(bow_.at(` ab `, 0) == size_t.max); 953 954 assert(eow_.at(` ab `, 3) == 0); 955 assert(eow_.at(` ab `, 4) == size_t.max); 956 957 auto l = lit(`ab`); 958 auto w = word(l); 959 assert(w.at(`ab`) == 2); 960 assert(w.at(`ab_c`) == 2); 961 962 auto s = sym(l); 963 assert(s.at(`ab`) == 2); 964 assert(s.at(`ab_c`) == size_t.max); 965 966 assert(bob_.findAt(`a`) == []); 967 assert(bob_.findAt(`a`).ptr != null); 968 assert(eob_.findAt(`a`) == []); 969 970 assert(bol_.findAt(`a`) == []); 971 assert(bol_.findAt(`a`).ptr != null); 972 assert(eol_.findAt(`a`) == []); 973 974 assert(bow_.findAt(`a`) == []); 975 assert(bow_.findAt(`a`).ptr != null); 976 assert(eow_.findAt(`a`) == []); 977 978 assert(bos_.findAt(`a`) == []); 979 assert(bos_.findAt(`a`).ptr != null); 980 assert(eos_.findAt(`a`) == []); 981 /+ TODO: This fails assert(eos_.findAt(`a`).ptr != null); +/ 982 } 983 984 /** Keyword $(D arg). */ 985 Seq kwd(Arg)(Arg arg) => seq(bow, arg, eow); 986 987 pure nothrow @safe unittest { 988 const str = `int`; 989 auto x = str.lit.kwd; 990 991 assert(x.at(str, 0)); 992 /* TODO: assert(!x.at(str, 1)); */ 993 994 assert(x.at(` ` ~ str, 1)); 995 /* TODO: assert(!x.at(` int`, 0)); */ 996 } 997 998 /** Pattern Paired with Prefix and Suffix. 999 */ 1000 final extern(C++) class Clause : SPatt1 { extern(D): 1001 @safe pure nothrow: 1002 this(Node prefix_, Node suffix_, Node sub) { 1003 super(sub); 1004 this.prefix = prefix_; 1005 this.suffix = suffix_; 1006 } 1007 override const(Data) findRawAt(const Data input, size_t soff = 0, in Node[] enders = []) const nothrow @nogc { 1008 import std.experimental.allocator.mallocator : Mallocator; 1009 DynamicArray!(Node, Mallocator) enders_; 1010 enders_.reserve(enders.length == 1); 1011 () @trusted { 1012 enders_ ~= cast(Node[])enders; 1013 enders_ ~= cast()suffix; 1014 }(); 1015 typeof(return) result = sub.findRawAt(input, soff, enders_[]); 1016 return result; 1017 } 1018 Node prefix, suffix; 1019 } 1020 1021 Clause paired(Args...)(Node prefix, Node suffix, Args args) => new Clause(prefix, suffix, args); 1022 Clause parend(Args...)(Args args) => new Clause(lit('('), lit(')'), args); 1023 Clause hooked(Args...)(Args args) => new Clause(lit('['), lit(']'), args); 1024 Clause braced(Args...)(Args args) => Clause(lit('{'), lit('}'), args); 1025 1026 import nxt.assert_ex; 1027 1028 pure nothrow @safe unittest { 1029 /* auto p = `[alpha]`.lit.parend; */ 1030 /* assert(p.at(`([alpha])`) == 7); */ 1031 1032 /* auto h = `[alpha]`.lit.hooked; */ 1033 /* assert(h.at(`[[alpha]]`) == 7); */ 1034 1035 /* auto b = `[alpha]`.lit.braced; */ 1036 /* assert(b.at(`{[alpha]}`) == 7); */ 1037 1038 /* auto pb = `[alpha]`.lit.parend; */ 1039 } 1040 1041 /** Create Matcher for a UNIX Shell $(LUCKY Shebang) Pattern. 1042 Example: #!/bin/env rdmd 1043 See_Also: https://en.wikipedia.org/wiki/Shebang_(Unix) 1044 */ 1045 auto ref shebangLine(Node interpreter) @safe pure nothrow { 1046 return seq(bob, 1047 `#!`.lit, 1048 `/usr`.lit.opt, 1049 `/bin/env`.lit.opt, 1050 ws.oom, 1051 interpreter); 1052 } 1053 1054 /// 1055 pure nothrow @safe unittest { 1056 assert(`rdmd`.lit.shebangLine 1057 .at(`#!/bin/env rdmd`) == 15); 1058 assert(`rdmd`.lit.shebangLine 1059 .at(`#!/usr/bin/env rdmd`) == 19); 1060 auto rgdmd = alt(`rdmd`.lit, 1061 `gdmd`.lit); 1062 assert(rgdmd.shebangLine 1063 .at(`#!/usr/bin/env rdmd-dev`) == 19); 1064 assert(rgdmd.shebangLine 1065 .at(`#!/usr/bin/env gdmd`) == 19); 1066 }