1 /** Lexer/Parser Generator for ANTLR (G, G2, G4) and (E)BNF grammars.
2 
3     See_Also: https://theantlrguy.atlassian.net/wiki/spaces/ANTLR3/pages/2687036/ANTLR+Cheat+Sheet
4     See_Also: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
5     See_Also: https://github.com/antlr/grammars-v4
6     See_Also: https://github.com/antlr/grammars-v4/blob/master/bnf/bnf.g4
7     See_Also: https://stackoverflow.com/questions/53245751/convert-a-form-of-bnf-grammar-to-g4-grammar
8     See_Also: https://bnfc.digitalgrammars.com/
9     See_Also: https://forum.dlang.org/post/rsmlqfwowpnggwyuibok@forum.dlang.org
10     See_Also: https://www.regular-expressions.info/unicode.html
11     See_Also: https://stackoverflow.com/questions/64654430/meaning-of-plu-in-antlr-grammar/64658336#64658336
12     See_Also: https://stackoverflow.com/questions/28829049/antlr4-any-difference-between-import-and-tokenvocab
13     See_Also: https://github.com/antlr/antlr4/blob/master/doc/grammars.md
14     See_Also: https://github.com/antlr/antlr4/tree/master/doc
15     See_Also: https://slebok.github.io/zoo/index.html
16 
17     TODO:
18 
19     - Use import std.algorithm.searching : commonPrefix; in alternatives and call it commonPrefixLiteral
20 
21     - Add Syntax Tree Nodes as structs with members being sub-nodes. Composition
22       over inheritance. If we use structs over classes more languages, such as
23       Vox, can be supported in the code generation phase. Optionally use
24       extern(C++) classes. Sub-node pointers should be defined as unique
25       pointers with deterministic destruction.
26 
27     - Should be allowed instead of warning:
28     grammars-v4/lua/Lua.g4(329,5): Warning: missing left-hand side, token (leftParen) at offset 5967
29 
30     - Parallelize grammar parsing and generation of parser files using https://dlang.org/phobos/std_parallelism.html#.parallel
31       After that compilation of parser files should grouped into CPU-count number of groups.
32 
33     - Use: https://forum.dlang.org/post/zcvjwdetohmklaxriswk@forum.dlang.org
34 
35     - Rewriting (X+)? as X* in ANTLR grammars and `commit` to `grammars-v4`.
36       See https://stackoverflow.com/questions/64706408/rewriting-x-as-x-in-antlr-grammars
37 
38     - Add errors for missing symbols during code generation
39 
40     - Literal indexing:
41       - Add map from string literal to fixed-length (typically lexer) rule
42       - Warn about string literals, such as str(`...`), that are equal to tokens
43         such `ELLIPSIS` in `Python3.g4`.
44 
45     - Make `Rule.root` be of type `Matcher` and make
46       - `dcharCountSpan` and
47       - `toMatchInSource`
48       members of `Matcher`.
49       - Remove `Symbol.toMatchInSource`
50 
51     - Support `tokens { INDENT_WS, DEDENT_WS, LINE_BREAK_WS }` to get
52       Python3.g4` with TOK.whitespaceIndent, whitespaceDedent, whitespaceLineBreak useWhitespaceClassesFlag
53       See: https://stackoverflow.com/questions/8642154/antlr-what-is-simpliest-way-to-realize-python-like-indent-depending-grammar
54 
55     - Unicode regular expressions.
56       Use https://www.regular-expressions.info/unicode.html
57       Use https://forum.dlang.org/post/rsmlqfwowpnggwyuibok@forum.dlang.org
58 
59     - Use to detect conflicting rules with `import` and `tokenVocab`
60 
61     - Use a region allocator on top of the GC to pre-allocate the nodes. Either
62     copied from std.allocator or Vox. Maybe one region for each file. Calculate
63     the region size from lexer statistics (number of operators, symbols and
64     literals).
65 
66     - `not(...)`'s implementation needs to be adjusted. often used in conjunction with `altN`?
67 
68     - handle all TODO's in `makeRule`
69 
70     - Move parserSourceBegin to gxbnf_rdbase.d
71 
72     - Use `TOK.tokenSpecOptions` in parsing. Ignored for now.
73 
74     - Essentially, Packrat parsing just means caching whether sub-expressions
75       match at the current position in the string when they are tested -- this
76       means that if the current attempt to fit the string into an expression
77       fails then attempts to fit other possible expressions can benefit from the
78       known pass/fail of subexpressions at the points in the string where they
79       have already been tested.
80 
81     - Deal with differences between `import` and `tokenVocab`.
82       See: https://stackoverflow.com/questions/28829049/antlr4-any-difference-between-import-and-tokenvocab
83 
84     - Add `Rule` in generated code that defines opApply for matching that overrides
85     - Detect indirect mutual left-recursion by check if `Rule.lastOffset` (in
86       generated code) is same as current parser offset. Simple-way in generated
87       parsers: enters a rule again without offset change. Requires storing last
88       offset for each non-literal rule.
89       ** Last offset during parsing.
90       *
91       * Used to detect infinite recursion, `size_t.max` indicates no last offset
92       * yet defined for `this` rule. *
93       size_t lastOffset = size_t.max;
94 
95     - Warn about `options{greedy=false;}:` and advice to replace with non-greedy variants
96     - Warn about `options{greedy=true;}:` being deprecated
97 
98     - Display column range for tokens in messages. Use `head.input.length`.
99       Requires updating FlyCheck.
100       See: `-fdiagnostics-print-source-range-info` at https://clang.llvm.org/docs/UsersManual.html.
101       See: https://clang.llvm.org/diagnostics.html
102       Use GNU-style formatting such as: fix-it:"test.c":{45:3-45:21}:"gtk_widget_show_all".
103 
104     - Use: `nxt.git` to scan parsing examples in `grammars-v4`
105 
106     - If performance is needed:
107     - Avoid casts and instead compare against `head.tok` for `isA!NodeType`
108     - use `RuleAltN(uint n)` in `makeAlt`
109     - use `SeqN(uint n)` in `makeSeq`
110 
111     - Support reading parsers from [Grammar Zoom](https://slebok.github.io/zoo/index.html).
112 */
113 module nxt.gxbnf;
114 
115 // version = show;
116 version = Do_Inline;
117 
118 enum useStaticTempArrays = false; ///< Use fixed-size (statically allocated) sequence and alternative buffers.
119 
120 import core.lifetime : move;
121 import core.stdc.stdio : putchar, printf;
122 
123 import std.conv : to;
124 import std.algorithm.comparison : min, max;
125 import std.algorithm.iteration : map, joiner, substitute;
126 import std.array : array;
127 import std.file : tempDir;
128 import std.path;
129 import std.exception : enforce;
130 
131 // `d-deps.el` requires these to be at the top:
132 import nxt.line_column : offsetLineColumn;
133 import nxt.fixed_array : FixedArray;
134 import nxt.dynamic_array : DynamicArray;
135 import nxt.file_ex : rawReadPath;
136 import nxt.array_algorithm : startsWith, endsWith, endsWithEither, skipOver, skipOverBack, skipOverAround, canFind, indexOf, indexOfEither;
137 import nxt.conv_ex : toDefaulted;
138 import nxt.dbgio;
139 
140 import std.stdio : File, stdout, write, writeln;
141 
142 @safe:
143 
144 alias Input = string;      ///< Grammar input source.
145 alias Output = DynamicArray!char; ///< Generated parser output source.
146 
147 alias RulesByName = Rule[Input];
148 
149 enum matcherFunctionNamePrefix = `m__`;
150 
151 ///< Token kind. TODO: make this a string type like with std.experimental.lexer
152 enum TOK
153 {
154     none,
155 
156     unknown,                    ///< Unknown
157 
158     whitespace,                 ///< Whitespace
159 
160     symbol,                     ///< Symbol
161     attributeSymbol,            ///< Attribute Symbol (starting with `$`)
162     actionSymbol,               ///< Action Symbol (starting with `@`)
163 
164     number,                     ///< Number
165 
166     lineComment,                ///< Single line comment
167     blockComment,               ///< Multi-line (block) comment
168 
169     leftParen,                  ///< Left parenthesis
170     rightParen,                 ///< Right parenthesis
171 
172     action,                     ///< Code block
173 
174     brackets,                   ///< Alternatives within '[' ... ']'
175 
176     literal,                    ///< Text literal, single or double quoted
177 
178     colon,                      ///< Colon `:`
179     semicolon,                  ///< Semicolon `;`
180     hash,                       ///< Hash `#`
181     labelAssignment,            ///< Label assignment `=`
182     listLabelAssignment,        ///< List label assignment `+=`
183 
184     qmark,                      ///< Greedy optional or semantic predicate (`?`)
185     qmarkQmark,                 ///< Non-Greedy optional (`??`)
186 
187     star,                       ///< Greedy zero or more (`*`)
188     starQmark,                  ///< Non-Greedy Zero or more (`*?`)
189 
190     plus,                       ///< Greedy one or more (`+`)
191     plusQmark,                  ///< Non-Greedy One or more (`+?`)
192 
193     pipe,                       ///< Alternative (`|`)
194     tilde,                      ///< Match negation (`~`)
195     lt,                         ///< `<`
196     gt,                         ///< `>`
197     comma,                      ///< `.`
198     exclamation,                ///< Exclude from AST (`!`)
199     rootNode,                   ///< Root node (`^`)
200     wildcard,                   ///< `.`
201     dotdot,                     ///< `..`
202 
203     rewrite,                    ///< Rewrite rule (`->`)
204 
205     /** Syntactic predicate rule rewrite (`=>`).
206      *
207      * Wikipedia: A syntactic predicate specifies the syntactic validity of
208      * applying a production in a formal grammar and is analogous to a semantic
209      * predicate that specifies the semantic validity of applying a
210      * production. It is a simple and effective means of dramatically improving
211      * the recognition strength of an LL parser by providing arbitrary
212      * lookahead. In their original implementation, syntactic predicates had the
213      * form “( α )?” and could only appear on the left edge of a production.
214      * The required syntactic condition α could be any valid context-free
215      * grammar fragment.
216      *
217      * See_Also: https://en.wikipedia.org/wiki/Syntactic_predicate
218      * See_Also: https://wincent.com/wiki/ANTLR_predicates
219      */
220     rewriteSyntacticPredicate,
221 
222     /** Token spec options:
223         "<"
224         id ASSIGN optionValue
225         ( SEMI id ASSIGN optionValue )*
226         ">"
227         ;
228     */
229     tokenSpecOptions,
230 
231     _error,                     ///< Error token
232 }
233 
234 /// Gx rule.
235 @safe struct Token
236 {
237 nothrow:
238     this(in TOK tok, Input input = null) @nogc pure
239     {
240         this.tok = tok;
241         this.input = input;
242     }
243     Input input;
244     TOK tok;
245 }
246 
247 static bool isSymbolStart(in dchar ch) pure nothrow @safe @nogc
248 {
249     import std.uni : isAlpha;
250     return (ch.isAlpha ||
251             ch == '_' ||
252             ch == '$' ||
253             ch == '@');
254 }
255 
256 /** Gx lexer for all version ANTLR grammsrs (`.g`, `.g2`, `.g4`).
257  *
258  * See_Also: `ANTLRv4Lexer.g4`
259  */
260 @safe struct GxLexer
261 {
262     import std.algorithm.comparison : among;
263 
264     this(const Input input,
265          const string path = null,
266          in bool includeComments = false,
267          in bool includeWhitespace = false)
268     {
269         _input = input;
270         this.path = path;
271 
272         import nxt.parsing : isNullTerminated;
273         enforce(_input.isNullTerminated, "Input isn't null-terminated"); // input cannot be trusted
274 
275         _includeComments = includeComments;
276         _includeWhitespace = includeWhitespace;
277 
278         nextFront();
279     }
280 
281     @disable this(this);
282 
283     @property bool empty() const pure nothrow scope @nogc
284     {
285         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
286         return _endOfFile;
287     }
288 
289     inout(Token) front() inout pure scope return nothrow @nogc
290     {
291         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
292         assert(!empty);
293         return _token;
294     }
295 
296     void popFront() scope nothrow
297     {
298         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
299         assert(!empty);
300         nextFront();
301     }
302 
303     void frontEnforce(in TOK tok, const scope Input msg = "") nothrow // TODO: @nogc
304     {
305         if (front.tok != tok)
306             errorAtFront(msg ~ ", expected `TOK." ~ tok.toDefaulted!string(null) ~ "`");
307     }
308 
309     void popFrontEnforce(in TOK tok, const scope Input msg) nothrow // TODO: @nogc
310     {
311         if (frontPop().tok != tok)
312             errorAtFront(msg ~ ", expected `TOK." ~ tok.toDefaulted!string(null) ~ "`");
313     }
314 
315     Token frontPopEnforce(in TOK tok, const scope Input msg = "") nothrow // TODO: @nogc
316     {
317         const result = frontPop();
318         if (result.tok != tok)
319             errorAtFront(msg ~ ", expected `TOK." ~ tok.toDefaulted!string(null) ~ "`");
320         return result;
321     }
322 
323     Token frontPop() scope return nothrow
324     {
325         version(D_Coverage) {} else version(LDC) version(Do_Inline) pragma(inline, true);
326         const result = front;
327         popFront();
328         return result;
329     }
330 
331     Token skipOverToken(in Token token) scope return nothrow
332     {
333         if (front == token)
334             return frontPop();
335         return typeof(return).init;
336     }
337 
338     Token skipOverTOK(in TOK tok) scope return nothrow
339     {
340         if (front.tok == tok)
341             return frontPop();
342         return typeof(return).init;
343     }
344 
345     import std.meta : AliasSeq;
346 
347     // from std.ascii.isWhite
348     alias endOfLineChars = AliasSeq!('\n', // (0x0a)
349                                      '\r', // (0x0c)
350         );
351     alias whiteChars = AliasSeq!(' ', // 0x20
352                                  '\t', // (0x09)
353                                  '\n', // (0x0a)
354                                  '\v', // (0x0b)
355                                  '\r', // (0x0c)
356                                  '\f' // (0x0d)
357         );
358 
359 private:
360 
361     /// Peek next `char` in input.
362     dchar peek0() const pure scope nothrow @nogc
363     {
364         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
365         return _input[_offset]; // TODO: decode `dchar`
366     }
367 
368     /// Peek next next `char` in input.
369     dchar peek1() const pure scope nothrow @nogc
370     {
371         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
372         return _input[_offset + 1]; // TODO: decode `dchar`
373     }
374 
375     /// Peek `n`-th next `char` in input.
376     dchar peekN(in size_t n) const pure scope nothrow @nogc
377     {
378         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
379         return _input[_offset + n]; // TODO: decode `dchar`
380     }
381 
382     /// Drop next byte in input.
383     void drop1() pure nothrow @nogc
384     {
385         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
386         _offset += 1;
387     }
388 
389     /// Drop next `n` bytes in input.
390     void dropN(in size_t n) pure nothrow @nogc
391     {
392         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
393         _offset += n;           // TODO: decode `dchar`
394     }
395 
396     /// Skip over `n` bytes in input.
397     Input skipOverN(in size_t n) pure return nothrow @nogc
398     {
399         version(D_Coverage) {} else pragma(inline);
400         const part = _input[_offset .. _offset + n]; // TODO: decode `dchar`
401         dropN(n);
402         return part;
403     }
404 
405     /// Skip over next `char`.
406     Input skipOver1() pure return nothrow @nogc
407     {
408         version(D_Coverage) {} else pragma(inline);
409         return _input[_offset .. ++_offset]; // TODO: decode `dchar`
410     }
411 
412     /// Skip over next two `char`s.
413     Input skipOver2() pure return nothrow @nogc
414     {
415         version(D_Coverage) {} else pragma(inline);
416         return _input[_offset .. (_offset += 2)]; // TODO: decode `dchar`
417     }
418 
419     /// Skip line comment.
420     void skipLineComment() pure scope nothrow @nogc
421     {
422         while (!peek0().among!('\0', endOfLineChars))
423             _offset += 1;       // TODO: decode `dchar`
424     }
425 
426     /// Skip line comment.
427     Input getLineComment() pure return nothrow @nogc
428     {
429         size_t i;
430         while (!peekN(i).among!('\0', endOfLineChars))
431             i += 1;                // TODO: decode `dchar`
432         return skipOverN(i);    // TODO: decode `dchar`
433     }
434 
435     /// Skip block comment.
436     void skipBlockComment() scope nothrow
437     {
438         while (!peek0().among!('\0'))
439         {
440             if (peek0() == '*' &&
441                 peek1() == '/')
442             {
443                 _offset += 2;
444                 return;
445             }
446             _offset += 1;
447         }
448         errorAtFront("unterminated block comment");
449     }
450 
451     /// Get symbol.
452     Input getSymbol() pure return nothrow @nogc
453     {
454         import std.uni : isAlphaNum; // TODO: decode `dchar`
455         size_t i;
456         const bool attributeFlag = peek0() == '@';
457         if (peek0().isSymbolStart)
458             i += 1;
459         while (peekN(i).isAlphaNum ||
460                peekN(i) == '_' ||
461                (attributeFlag && // attribute name
462                 peekN(i) == ':')) // may include colon qualifier
463             i += 1;
464 
465         // skip optional whitespace before label assignment
466         auto j = i;
467         while (peekN(j).among!(whiteChars)) // NOTE this is faster than `src[i].isWhite`
468             j += 1;
469 
470         if (peekN(j) == '=')         // label assignment
471             return skipOverN(j + 1);
472         else if (peekN(j) == '+' &&
473                  peekN(j + 1) == '=') // list label assignment
474             return skipOverN(j + 2);
475         else
476             return skipOverN(i);
477     }
478 
479     /// Get number.
480     Input getNumber() pure return nothrow @nogc
481     {
482         import std.ascii : isDigit;
483         size_t i;
484         while (peekN(i).isDigit)
485             i += 1;
486         return skipOverN(i);
487     }
488 
489     Input getWhitespace() pure return nothrow @nogc
490     {
491         size_t i;
492         while (peekN(i).among!(whiteChars)) // NOTE this is faster than `src[i].isWhite`
493             i += 1;
494         return skipOverN(i);
495     }
496 
497     bool skipOverEsc(ref size_t i) nothrow @nogc
498     {
499         if (peekN(i) == '\\')   // TODO: decode `dchar`
500         {
501             i += 1;
502             if (peekN(i) == 'n')
503                 i += 1;            // TODO: convert to "\r"
504             else if (peekN(i) == 't')
505                 i += 1;            // TODO: convert to "\t"
506             else if (peekN(i) == 'r')
507                 i += 1;            // TODO: convert to ASCII "\r"
508             else if (peekN(i) == ']')
509                 i += 1;            // TODO: convert to ASCII "]"
510             else if (peekN(i) == 'u')
511             {
512                 i += 1;
513                 import std.ascii : isDigit;
514                 while (peekN(i).isDigit)
515                     i += 1;
516                 // TODO: convert to `dchar`
517             }
518             else if (peekN(i) == '\0')
519                 errorAtOffset("unterminated escape sequence at end of file");
520             else
521                 i += 1;
522             return true;
523         }
524         return false;
525     }
526 
527     Input getLiteral(dchar terminator)() return nothrow @nogc
528     {
529         size_t i = 1;
530         while (!peekN(i).among!('\0', terminator))
531             if (!skipOverEsc(i))
532                 i += 1;
533         if (peekN(i) == '\0')
534             errorAtOffset("unterminated string literal at end of file");
535         return skipOverN(i + 1); // include terminator
536     }
537 
538     Input getTokenSpecOptions() return nothrow @nogc
539     {
540         enum dchar terminator = '>';
541         size_t i = 1;
542         while (!peekN(i).among!('\0', terminator))
543             i += 1;
544         if (peekN(i) != terminator)
545         {
546             if (peekN(i) == '\0')
547                 errorAtOffset("unterminated string literal at end of file");
548             else
549                 errorAtOffset("unterminated token spec option");
550         }
551         return skipOverN(i + 1); // include terminator '>'
552     }
553 
554     Input getHooks() return nothrow @nogc
555     {
556         size_t i;
557         while (!peekN(i).among!('\0', ']')) // may contain whitespace
558             if (!skipOverEsc(i))
559                 i += 1;
560         if (peekN(i) == ']') // skip ']'
561             i += 1;
562         return skipOverN(i);
563     }
564 
565     Input getAction() return nothrow @nogc
566     {
567         size_t i;
568 
569         DynamicArray!char ds;   // delimiter stack
570 
571         bool inBlockComment;
572         bool inLineComment;
573         bool inChar;
574         bool inString;
575 
576         const infoFlag = false;
577 
578         while (!peekN(i).among!('\0'))
579         {
580             // skip over all escape sequences in quoted
581             if (inChar ||
582                 inString)
583                 while (skipOverEsc(i)) {}
584 
585             if (!inBlockComment &&
586                 !inLineComment &&
587                 !inChar &&
588                 !inString)
589             {
590                 if (peekN(i) == '/' &&
591                     peekN(i + 1) == '/')
592                 {
593                     if (infoFlag)
594                         infoAtOffset("line comment start", i, ds[]);
595                     inLineComment = true;
596                     i += 2;
597                     continue;
598                 }
599                 else if (peekN(i) == '/' &&
600                          peekN(i + 1) == '*')
601                 {
602                     if (infoFlag)
603                         infoAtOffset("block comment start", i, ds[]);
604                     inBlockComment = true;
605                     i += 2;
606                     continue;
607                 }
608                 else if (peekN(i) == '{')
609                 {
610                     if (infoFlag)
611                         infoAtOffset("brace open", i, ds[]);
612                     ds.put('{');
613                 }
614                 else if (peekN(i) == '}')
615                 {
616                     if (infoFlag)
617                         infoAtOffset("brace close", i, ds[]);
618                     if (!ds.empty &&
619                         ds.back != '{')
620                         errorAtOffset("unmatched", i);
621                     ds.popBack();
622                 }
623                 else if (peekN(i) == '[')
624                 {
625                     if (infoFlag)
626                         infoAtOffset("hook open", i, ds[]);
627                     ds.put('[');
628                 }
629                 else if (peekN(i) == ']')
630                 {
631                     if (infoFlag)
632                         infoAtOffset("hook close", i, ds[]);
633                     if (!ds.empty &&
634                         ds.back != '[')
635                         errorAtOffset("unmatched", i);
636                     ds.popBack();
637                 }
638                 else if (peekN(i) == '(')
639                 {
640                     if (infoFlag)
641                         infoAtOffset("paren open", i, ds[]);
642                     ds.put('(');
643                 }
644                 else if (peekN(i) == ')')
645                 {
646                     if (infoFlag)
647                         infoAtOffset("paren close", i, ds[]);
648                     if (!ds.empty &&
649                         ds.back != '(')
650                         errorAtOffset("unmatched", i);
651                     ds.popBack();
652                 }
653             }
654 
655             // block comment close
656             if (inBlockComment &&
657                 peekN(i) == '*' &&
658                 peekN(i + 1) == '/')
659             {
660                 if (infoFlag)
661                     infoAtOffset("block comment close", i, ds[]);
662                 inBlockComment = false;
663                 i += 2;
664                 continue;
665             }
666 
667             // line comment close
668             if (inLineComment &&
669                 (peekN(i) == '\n' ||
670                  peekN(i) == '\r'))
671             {
672                 if (infoFlag)
673                     infoAtOffset("line comment close", i, ds[]);
674                 inLineComment = false;
675             }
676 
677             // single-quote open/close
678             if (!inBlockComment &&
679                 !inLineComment &&
680                 !inString &&
681                 peekN(i) == '\'')
682             {
683                 if (!ds.empty &&
684                     ds.back == '\'')
685                 {
686                     if (infoFlag)
687                         infoAtOffset("single-quote close", i, ds[]);
688                     ds.popBack();
689                     inChar = false;
690                 }
691                 else
692                 {
693                     if (infoFlag)
694                         infoAtOffset("single-quote open", i, ds[]);
695                     ds.put('\'');
696                     inChar = true;
697                 }
698             }
699 
700             // double-quote open/close
701             if (!inBlockComment &&
702                 !inLineComment &&
703                 !inChar &&
704                 peekN(i) == '"')
705             {
706                 if (!ds.empty &&
707                     ds.back == '"')
708                 {
709                     if (infoFlag)
710                         infoAtOffset("double-quote close", i, ds[]);
711                     ds.popBack();
712                     inString = false;
713                 }
714                 else
715                 {
716                     if (infoFlag)
717                         infoAtOffset("doubl-quote open", i, ds[]);
718                     ds.put('"');
719                     inString = true;
720                 }
721             }
722 
723             i += 1;
724 
725             if (ds.length == 0)
726                 break;
727         }
728 
729         if (inBlockComment)
730             errorAtOffset("unterminated block comment", i);
731         if (ds.length != 0)
732             errorAtOffset("unbalanced code block", i);
733 
734         return skipOverN(i);
735     }
736 
737     void nextFront() scope nothrow @trusted // TODO: remove `@trusted`
738     {
739         switch (peek0())
740         {
741         case '/':
742             if (peek1() == '/') // `//`
743             {
744                 _offset += 2;
745                 skipLineComment();
746                 if (_includeComments)
747                     _token = Token(TOK.lineComment);
748                 else
749                     nextFront();
750             }
751             else if (peek1() == '*') // `/*`
752             {
753                 _offset += 2;
754                 skipBlockComment();
755                 if (_includeComments)
756                     _token = Token(TOK.blockComment);
757                 else
758                     return nextFront();
759             }
760             else
761                 errorAtOffset("unexpected character");
762             break;
763         case '(':
764             _token = Token(TOK.leftParen, skipOver1());
765             break;
766         case ')':
767             _token = Token(TOK.rightParen, skipOver1());
768             break;
769         case '{':
770             _token = Token(TOK.action, getAction());
771             break;
772         case '[':
773             _token = Token(TOK.brackets, getHooks());
774             break;
775         case '"':
776             _token = Token(TOK.literal, getLiteral!('"')());
777             break;
778         case '\'':
779             _token = Token(TOK.literal, getLiteral!('\'')());
780             break;
781         case ':':
782             _token = Token(TOK.colon, skipOver1());
783             break;
784         case ';':
785             _token = Token(TOK.semicolon, skipOver1());
786             break;
787         case '#':
788             _token = Token(TOK.hash, skipOver1());
789             break;
790         case '=':
791             if (peek1() == '>')
792                 _token = Token(TOK.rewriteSyntacticPredicate, skipOver2());
793             else
794                 errorAtFront("expected '>' after '='");
795             break;
796         case '?':
797             if (peek1() == '?')
798                 _token = Token(TOK.qmarkQmark, skipOver2());
799             else
800                 _token = Token(TOK.qmark, skipOver1());
801             break;
802         case '*':
803             if (peek1() == '?')
804                 _token = Token(TOK.starQmark, skipOver2());
805             else
806                 _token = Token(TOK.star, skipOver1());
807             break;
808         case '+':
809             if (peek1() == '=')
810                 _token = Token(TOK.listLabelAssignment, skipOver2());
811             else if (peek1() == '?')
812                 _token = Token(TOK.plusQmark, skipOver2());
813             else
814                 _token = Token(TOK.plus, skipOver1());
815             break;
816         case '|':
817             _token = Token(TOK.pipe, skipOver1());
818             break;
819         case '~':
820             _token = Token(TOK.tilde, skipOver1());
821             break;
822         case '<':
823             _token = Token(TOK.tokenSpecOptions, getTokenSpecOptions());
824             break;
825         case ',':
826             _token = Token(TOK.comma, skipOver1());
827             break;
828         case '!':
829             _token = Token(TOK.exclamation, skipOver1());
830             break;
831         case '^':
832             _token = Token(TOK.rootNode, skipOver1());
833             break;
834         case '.':
835             if (peek1() == '.') // `..`
836                 _token = Token(TOK.dotdot, skipOver2());
837             else
838                 _token = Token(TOK.wildcard, skipOver1());
839             break;
840         case '-':
841             if (peek1() == '>') // `->`
842                 _token = Token(TOK.rewrite, skipOver2());
843             else
844                 errorAtOffset("unexpected character");
845             break;
846         case '0':
847             ..
848         case '9':
849             _token = Token(TOK.number, getNumber());
850             break;
851         case ' ':
852         case '\t':
853         case '\n':
854         case '\v':
855         case '\r':
856         case '\f':
857             // TODO: extend to std.uni
858             // import std.uni : isWhite;
859             // assert(peek0().isWhite);
860             const ws = getWhitespace();
861             if (_includeWhitespace)
862                 _token = Token(TOK.whitespace, ws);
863             else
864                 return nextFront();
865             break;
866         case '\0':
867             _token = Token.init;
868             _endOfFile = true;
869             return;
870         default:
871             if (peek0().isSymbolStart)
872             {
873                 const symbol = getSymbol();
874                 if (symbol.endsWith("+="))
875                 {
876                     if (_includeListLabelAssignment)
877                         _token = Token(TOK.listLabelAssignment, symbol);
878                     else
879                         return nextFront();
880                 }
881                 else if (symbol.endsWith('='))
882                 {
883                     if (_includeLabelAssignment)
884                         _token = Token(TOK.labelAssignment, symbol);
885                     else
886                         return nextFront();
887                 }
888                 else
889                     switch (symbol[0])
890                     {
891                     case '$':
892                         _token = Token(TOK.attributeSymbol, symbol);
893                         break;
894                     case '@':
895                         _token = Token(TOK.actionSymbol, symbol);
896                         break;
897                     default:
898                         _token = Token(TOK.symbol, symbol);
899                         break;
900                     }
901             }
902             else
903             {
904                 _token = Token(TOK._error);
905                 errorAtOffset("unexpected character");
906             }
907         }
908     }
909 
910     void infoAtFront(const scope Input msg) const nothrow scope @nogc
911     {
912         messageAtToken(front, "Info", msg);
913     }
914 
915     void warningAtFront(const scope Input msg) const nothrow scope @nogc
916     {
917         messageAtToken(front, "Warning", msg);
918     }
919 
920     void errorAtFront(const scope Input msg) const nothrow scope @nogc
921     {
922         messageAtToken(front, "Error", msg);
923         assert(false);          ///< TODO: propagate error instead of assert
924     }
925 
926     private void infoAtToken(const scope Token token,
927                              const scope Input msg) const nothrow scope @nogc
928     {
929         messageAtToken(token, "Info", msg);
930     }
931 
932     private void warningAtToken(const scope Token token,
933                                 const scope Input msg) const nothrow scope @nogc
934     {
935         messageAtToken(token, "Warning", msg);
936     }
937 
938     private void errorAtToken(const scope Token token,
939                               const scope Input msg) const nothrow scope @nogc
940     {
941         messageAtToken(token, "Error", msg);
942         assert(false);          ///< TODO: propagate error instead of assert
943     }
944 
945     private void messageAtToken(const scope Token token,
946                                 const scope string tag,
947                                 const scope Input msg) const nothrow scope @nogc
948     {
949         ptrdiff_t offset;
950         () @trusted {
951             offset = (token.input.ptr && _input.ptr) ? token.input.ptr - _input.ptr : 0; // unsafe
952         } ();
953         const lc = offsetLineColumn(_input, offset);
954         import nxt.conv_ex : toDefaulted;
955         const tokString = token.tok.toDefaulted!string("unknown");
956         () @trusted {
957             printf("%.*s(%u,%u): %s: %.*s, `%.*s` (%.*s) at offset %llu\n", // move this to a member
958                    cast(int)path.length, &path[0],
959                    lc.line + 1, lc.column + 1,
960                    &tag[0],
961                    cast(int)msg.length, &msg[0],
962                    cast(int)token.input.length, &token.input[0],
963                    cast(int)tokString.length, &tokString[0],
964                    offset);
965         } ();
966     }
967 
968     // TODO: into warning(const char* format...) like in `dmd` and put in `nxt.parsing` and reuse here and in lispy.d
969     void errorAtOffset(const scope Input msg,
970                        in size_t i = 0) const nothrow scope @nogc
971     {
972         messageAtOffset("Error", msg, i);
973         assert(false);          ///< TODO: propagate error instead of assert
974     }
975 
976     void warningAtOffset(const scope Input msg,
977                          in size_t i = 0) const nothrow scope @nogc
978     {
979         messageAtOffset("Warning", msg, i);
980     }
981 
982     void infoAtOffset(const scope Input msg,
983                       in size_t i = 0, in const(char)[] ds = null) const nothrow scope @nogc
984     {
985         messageAtOffset("Info", msg, i, ds);
986     }
987 
988     void messageAtOffset(const scope string tag,
989                          const scope Input msg,
990                          in size_t i = 0,
991                          in const(char)[] ds = null) const @trusted nothrow @nogc scope
992     {
993         const lc = offsetLineColumn(_input, _offset + i);
994         // TODO: remove printf
995         debug printf("%.*s(%u,%u): %s: %.*s at offset %llu being char `%c` ds:`%.*s`\n",
996                      cast(int)path.length, path.ptr,
997                      lc.line + 1, lc.column + 1,
998                      tag.ptr,
999                      cast(int)msg.length, msg.ptr,
1000                      _offset + i,
1001                      peekN(i),
1002                      cast(int)ds.length, ds.ptr);
1003     }
1004 
1005 private:
1006     size_t _offset;             // current offset in `_input`
1007     const Input _input;         ///< Input data.
1008     const string path;         ///< Input file (or null if in-memory).
1009 
1010     Token _token;
1011     bool _endOfFile;            // signals null terminator found
1012     bool _includeComments;
1013     bool _includeWhitespace;
1014     bool _includeLabelAssignment;
1015     bool _includeListLabelAssignment;
1016     bool _diagnoseLeftRecursion; ///< Diagnose left-recursion.
1017 }
1018 
1019 /// Node.
1020 enum NODE
1021 {
1022     grammar,                    ///< Grammar defintion (name).
1023     rule                        ///< Grammar rule.
1024 }
1025 
1026 /// Format when printing AST (nodes).
1027 enum Layout : ubyte
1028 {
1029     source,                     ///< Try to mimic original source.
1030     tree                        ///< Makes AST-structure clear.
1031 }
1032 
1033 enum indentStep = 4;        ///< Indentation size in number of spaces.
1034 
1035 @safe struct Format
1036 {
1037     uint indentDepth;           ///< Indentation depth.
1038     Layout layout;
1039     void showIndent() @safe const nothrow @nogc
1040     {
1041         showNSpaces(indentDepth);
1042     }
1043 }
1044 
1045 void showNSpaces(uint indentDepth) @safe nothrow @nogc
1046 {
1047     foreach (_; 0 .. indentDepth*indentStep)
1048         putchar(' ');
1049 }
1050 
1051 void showNSpaces(scope ref Output sink, uint n) @safe pure nothrow @nogc
1052 {
1053     foreach (_; 0 .. n)
1054         sink.put(" ");
1055 }
1056 
1057 void showNIndents(scope ref Output sink, uint indentDepth) @safe pure nothrow @nogc
1058 {
1059     foreach (_; 0 .. indentDepth*indentStep)
1060         sink.put(" ");
1061 }
1062 
1063 /// Put `x` indented at `indentDepth`.
1064 void iput(T)(scope ref Output sink,
1065              uint indentDepth, T x) @safe pure nothrow @nogc
1066 if (is(typeof(sink.put(x))))
1067 {
1068     foreach (_; 0 .. indentDepth*indentStep)
1069         sink.put(" ");
1070     sink.put(x);
1071 }
1072 
1073 private void showChars(in const(char)[] chars) @trusted
1074 {
1075     printf("%.*s", cast(uint)chars.length, chars.ptr);
1076 }
1077 
1078 private void showToken(Token token, in Format fmt)
1079 {
1080     fmt.showIndent();
1081     showChars(token.input);
1082 }
1083 
1084 /** Lower and upper limit of `dchar` count.
1085  */
1086 @safe struct DcharCountSpan
1087 {
1088 pure nothrow:
1089     @disable this();       // be explicit for now as default init is not obvious
1090     static typeof(this) start() @nogc
1091     {
1092         return typeof(this)(this.lower.min,
1093                             this.upper.max);
1094     }
1095     static typeof(this) full() @nogc
1096     {
1097         return typeof(this)(this.lower.min,
1098                             this.upper.max);
1099     }
1100     this(in uint lower, in uint upper) @nogc
1101     {
1102         this.lower = lower;
1103         this.upper = upper;
1104     }
1105     this(in size_t lower, in size_t upper) @nogc
1106     {
1107         assert(lower <= this.lower.max);
1108         assert(upper <= this.upper.max);
1109         this.lower = cast(typeof(this.lower))lower;
1110         this.upper = cast(typeof(this.upper))upper;
1111     }
1112     this(in uint length) @nogc
1113     {
1114         this(length, length);
1115     }
1116     this(in size_t length) @safe nothrow @nogc
1117     {
1118         this(length, length);
1119     }
1120     uint lower = uint.max;
1121     uint upper = 0;
1122 }
1123 
1124 /// AST node.
1125 private abstract class Node
1126 {
1127     abstract void show(in Format fmt = Format.init) const;
1128 nothrow:
1129     abstract bool equals(const Node o) const pure @nogc;
1130     abstract void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const;
1131     this() @nogc {}
1132 }
1133 
1134 import nxt.my_mallocator : Mallocator;
1135 
1136 alias NodeArray = DynamicArray!(Node, Mallocator, uint); // `uint` capacity is enough
1137 alias PatternArray = DynamicArray!(Pattern, Mallocator, uint); // `uint` capacity is enough
1138 
1139 bool equalsAll(const scope Node[] a,
1140                const scope Node[] b) pure nothrow @nogc
1141 {
1142     if (a.length != b.length)
1143         return false;
1144     foreach (const i; 0 .. a.length)
1145         if (!a[i].equals(b[i])) // TODO: use `.ptr` if needed
1146             return false;
1147     return true;
1148 }
1149 
1150 /// N-ary expression.
1151 @safe abstract class NaryOpPattern : Pattern
1152 {
1153 @safe nothrow:
1154     this(Token head, PatternArray subs) @nogc
1155     {
1156         super(head);
1157         this.subs = subs.move(); // TODO: remove when compiler does this for us
1158     }
1159     override bool equals(const Node o) const @nogc
1160     {
1161         if (this is o)
1162             return true;
1163         if (const o_ = cast(const typeof(this))o)
1164             return equalsAll(this.subs[], o_.subs[]);
1165         return false;
1166     }
1167     this(uint n)(Token head, Pattern[n] subs) @nogc if (n >= 2)
1168     {
1169         super(head);
1170         foreach (const Pattern sub; subs)
1171             this.subs.put(sub);
1172     }
1173     PatternArray subs;
1174 }
1175 
1176 /** Sequence.
1177  *
1178  * A `Sequence` is empty in case when a rule provides an empty alternative.
1179  * Such cases `() | ...` should be rewritten to `(...)?` in `makeAlt`.
1180  */
1181 @safe final class SeqM : NaryOpPattern
1182 {
1183     override void show(in Format fmt = Format.init) const
1184     {
1185         fmt.showIndent();
1186         foreach (const i, const sub; subs)
1187         {
1188             if (i)
1189                 putchar(' ');
1190             sub.show();
1191         }
1192     }
1193 nothrow:
1194     this(Token head) @nogc
1195     {
1196         super(head, PatternArray.init);
1197     }
1198     this(PatternArray subs) @nogc
1199     {
1200         super(Token.init, subs.move());
1201     }
1202     this(uint n)(Node[n] subs) if (n >= 2)
1203     {
1204         super(subs);
1205     }
1206     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1207     {
1208         sink.put("seq(");
1209         foreach (const i, const sub; subs)
1210         {
1211             if (i)
1212                 sink.put(", "); // separator
1213             sub.toMatchInSource(sink, parser);
1214         }
1215         sink.put(")");
1216     }
1217     override DcharCountSpan dcharCountSpan() const pure @nogc
1218     {
1219         assert(!subs.empty);
1220         auto result = typeof(return)(0, uint.max);
1221         foreach (const sub; subs)
1222         {
1223             const sublr = sub.dcharCountSpan;
1224             if (result.lower == uint.max ||
1225                 sublr.lower == uint.max)
1226                 result.lower = uint.max;
1227             else
1228                 result.lower += sublr.lower;
1229             if (result.upper == uint.max ||
1230                 sublr.upper == uint.max)
1231                 result.upper = uint.max;
1232             else
1233                 result.upper += sublr.upper;
1234         }
1235         return result;
1236     }
1237     override bool opEquals(const scope Pattern that) const @nogc
1238     {
1239         if (auto that_ = cast(const typeof(this))that)
1240         {
1241             if (head.input == that_.head.input)
1242             {
1243                 foreach (const index, const sub; subs)
1244                     if (!sub.opEquals(that_.subs[index]))
1245                         return false;
1246                 return true;
1247             }
1248         }
1249         return false;
1250     }
1251 }
1252 
1253 Pattern makeSeq(PatternArray subs,
1254                 const ref GxLexer lexer,
1255                 in bool rewriteFlag = true) nothrow
1256 {
1257     subs = flattenSubs!SeqM(subs.move());
1258     if (subs.length == 1)
1259         return subs[0];
1260     if (rewriteFlag)
1261     {
1262         foreach (const i, const sub; subs)
1263         {
1264             if (i + 1 == subs.length) // skip last
1265                 break;
1266             if (const zom = cast(const GreedyZeroOrMore)subs[i + 1])
1267                 if (zom.sub.equals(sub))
1268                     lexer.warningAtToken(zom.head, "should be rewritten into `X+`");
1269         }
1270     }
1271     if (subs.empty)
1272         lexer.warningAtToken(Token(TOK.leftParen, lexer._input[0 .. 0]),
1273                              "empty sequence");
1274     return new SeqM(subs.move());
1275 }
1276 
1277 Pattern makeSeq(scope Pattern[] subs,
1278                 const ref GxLexer lexer,
1279                 in bool rewriteFlag = true) nothrow
1280 {
1281     return makeSeq(PatternArray(subs), lexer, rewriteFlag);
1282 }
1283 
1284 Pattern makeSeq(scope Node[] subs,
1285                 const ref GxLexer lexer,
1286                 in bool rewriteFlag = true) nothrow
1287 {
1288     return makeSeq(checkedCastSubs(subs, lexer), lexer, rewriteFlag);
1289 }
1290 
1291 private PatternArray checkedCastSubs(scope Node[] subs,
1292                                      const ref GxLexer lexer) nothrow
1293 {
1294     auto psubs = typeof(return).withLength(subs.length);
1295     foreach (const i, sub; subs)
1296     {
1297         psubs[i] = cast(Pattern)sub;
1298         if (!psubs[i])
1299         {
1300             lexer.errorAtFront("non-`Pattern` sub");
1301             debug sub.show();
1302         }
1303     }
1304     return psubs;
1305 }
1306 
1307 PatternArray flattenSubs(BranchPattern)(PatternArray subs) pure nothrow @nogc
1308 if (is(BranchPattern : SeqM) ||
1309     is(BranchPattern : AltM))
1310 {
1311     typeof(subs) subs_;
1312     foreach (sub; subs)
1313     {
1314         if (auto sub_ = cast(BranchPattern)sub)
1315             subs_.insertBack(flattenSubs!(BranchPattern)(sub_.subs.move()));
1316         else
1317             subs_.insertBack(sub);
1318     }
1319     return subs_.move();
1320 }
1321 
1322 /// Rule.
1323 @safe class Rule : Node
1324 {
1325     override void show(in Format fmt = Format.init) const
1326     {
1327         showToken(head, fmt);
1328         showChars(":\n");
1329         if (root)
1330             root.show(Format(fmt.indentDepth + 1));
1331         showChars(" ;\n");
1332     }
1333 nothrow:
1334     void diagnoseDirectLeftRecursion(const scope ref GxLexer lexer)
1335     {
1336         void checkLeft(const scope Pattern root) @safe nothrow
1337         {
1338             if (const alt = cast(const AltM)root) // common case
1339                 foreach (const sub; alt.subs[]) // all alternatives
1340                     checkLeft(sub);
1341             else if (const seq = cast(const SeqM)root)
1342                 return checkLeft(seq.subs[0]); // only first in sequence
1343             else if (const s = cast(const SymbolRef)root)
1344                 if (head.input == s.head.input)
1345                     lexer.warningAtToken(s.head, "left-recursion");
1346         }
1347         checkLeft(root);
1348     }
1349     this(Token head, Pattern root, bool skipFlag) @nogc
1350     {
1351         this.head = head;
1352         this.root = root;
1353         this.skipFlag = skipFlag;
1354     }
1355     override bool equals(const Node o) const @nogc
1356     {
1357         if (this is o)
1358             return true;
1359         if (const o_ = cast(const typeof(this))o)
1360             return head == o_.head && root.equals(o_.root);
1361         return false;
1362     }
1363     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1364     {
1365         // dummy
1366     }
1367     void toMatcherInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1368     {
1369         sink.iput(1, `Match `);
1370         if (head.input != "EOF")
1371             sink.put(matcherFunctionNamePrefix);
1372         sink.put(head.input); sink.put("()\n");
1373         sink.iput(1, "{\n");
1374         import std.ascii : isUpper;
1375         if (head.input[0].isUpper ||
1376             cast(const FragmentRule)this)
1377             sink.iput(2, "pragma(inline, true);\n");
1378         sink.iput(2, `return`);
1379         if (root)
1380         {
1381             sink.put(` `);
1382             root.toMatchInSource(sink, parser);
1383         }
1384         else
1385             sink.put(` Match.zero()`);
1386         sink.put(";\n");
1387         sink.iput(1, "}\n");
1388     }
1389     @property bool isFragmentRule() const @nogc
1390     {
1391         return false;
1392     }
1393     /** Is a lexer (token) rule (beginning with a capital letter) defining a token type.
1394      *
1395      * A lexer rule name starts with an uppercase letter distinguishing it from
1396      * a parser rule.
1397      *
1398      * See_Also: https://github.com/antlr/antlr4/blob/master/doc/lexer-rules.md#lexer-rule-elements
1399      */
1400     @property final bool isLexerTokenRule() const @nogc
1401     {
1402         import std.ascii : isUpper;
1403         return (head.input.length &&
1404                 head.input[0].isUpper);
1405     }
1406     const Token head;           ///< Name.
1407     Pattern root;               ///< Root pattern.
1408     /** Set to `true` if is referenced by a `SymbolRef`.
1409         Set in `GxParserByStatement.tagReferencedRules`
1410     */
1411     bool hasRef = false;
1412     /** Rule is skipped (ignored). For instance
1413         WS : [\r\n]+ -> skip ;
1414      */
1415     const bool skipFlag;
1416 }
1417 
1418 /** A reusable part of a lexer rule that doesn't match (a token) on its own.
1419 
1420     A consequence is that fragment rules matches are never stored as a separate
1421     parse tree nodes.
1422 
1423   For example:
1424   INTEGER: DIGIT+
1425          | '0' [Xx] HEX_DIGIT+
1426          ;
1427   fragment DIGIT: [0-9];
1428   fragment HEX_DIGIT: [0-9A-Fa-f];
1429 
1430   See_Also: https://sodocumentation.net/antlr/topic/3271/lexer-rules-in-v4#fragments
1431  */
1432 @safe final class FragmentRule : Rule
1433 {
1434 nothrow:
1435     this(Token head, Pattern root, bool skipFlag) @nogc
1436     {
1437         super(head, root, skipFlag);
1438     }
1439     @property final override bool isFragmentRule() const @nogc
1440     {
1441         return true;
1442     }
1443 }
1444 
1445 @safe final class AltM : NaryOpPattern
1446 {
1447     override void show(in Format fmt = Format.init) const
1448     {
1449         const wrapFlag = needsWrapping(subs[]);
1450         if (wrapFlag)
1451             putchar('(');
1452         showSubs(fmt);
1453         if (wrapFlag)
1454             putchar(')');
1455     }
1456     final void showSubs(in Format fmt) const
1457     {
1458         foreach (const i, const sub; subs)
1459         {
1460             sub.show(fmt);
1461             if (i + 1 != subs.length)
1462             {
1463                 if (fmt.indentDepth)
1464                     showChars(" |\n");
1465                 else
1466                     showChars(" | ");
1467             }
1468         }
1469     }
1470 nothrow:
1471     this(Token head, PatternArray subs) @nogc
1472     {
1473         assert(!subs.empty);
1474         super(head, subs.move());
1475     }
1476     this(uint n)(Pattern[n] subs) @nogc if (n >= 2)
1477     {
1478         super(subs);
1479     }
1480     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1481     {
1482         // preprocess
1483         bool allSubChars = true; // true if all sub-patterns are characters
1484         foreach (const sub; subs)
1485         {
1486             if (const lit = cast(const StrLiteral)sub)
1487             {
1488                 if (lit.head.input.length != 3) // non-character literal
1489                 {
1490                     allSubChars = false;
1491                     break;
1492                 }
1493             }
1494             else
1495             {
1496                 allSubChars = false;
1497                 break;
1498             }
1499         }
1500 
1501         // prefix
1502         if (allSubChars)
1503         {
1504             switch (subs.length)
1505             {
1506             case 2:
1507                 sink.put("alt2!(");
1508                 break;
1509             case 3:
1510                 sink.put("alt3!(");
1511                 break;
1512             case 4:
1513                 sink.put("alt4!(");
1514                 break;
1515             case 5:
1516                 sink.put("alt5!(");
1517                 break;
1518             default:
1519                 sink.put("altN!(");
1520                 break;
1521             }
1522         }
1523         else
1524             sink.put("alt(");
1525 
1526         // iterate subs
1527         foreach (const i, const sub; subs)
1528         {
1529             if (allSubChars)
1530             {
1531                 if (i)
1532                     sink.put(","); // separator
1533                 const lsub = cast(const StrLiteral)sub;
1534                 if (lsub.head.input == "'")
1535                     sink.put(`\'`);
1536                 else
1537                     sink.put(lsub.head.input);
1538             }
1539             else
1540             {
1541                 if (i)
1542                 {
1543                     sink.put(",\n");
1544                     sink.showNIndents(2);
1545                     sink.showNSpaces(11);
1546                 }
1547                 sub.toMatchInSource(sink, parser);
1548             }
1549         }
1550 
1551         // postfix
1552         sink.put(")");
1553         if (allSubChars)
1554             sink.put("()");
1555     }
1556     override DcharCountSpan dcharCountSpan() const pure @nogc
1557     {
1558         return dcharCountSpanOf(subs[]);
1559     }
1560     override bool opEquals(const scope Pattern that) const @nogc
1561     {
1562         if (auto that_ = cast(const typeof(this))that)
1563         {
1564             if (head.input == that_.head.input)
1565             {
1566                 foreach (const index, const sub; subs)
1567                     if (!sub.opEquals(that_.subs[index]))
1568                         return false;
1569                 return true;
1570             }
1571         }
1572         return false;
1573     }
1574 }
1575 
1576 DcharCountSpan dcharCountSpanOf(const scope Pattern[] subs) @safe pure nothrow @nogc
1577 {
1578     if (subs.length == 0)
1579         return typeof(return)(0, 0);
1580     auto result = typeof(return).start();
1581     foreach (const sub; subs)
1582     {
1583         const sublr = sub.dcharCountSpan;
1584         result.lower = min(result.lower, sublr.lower);
1585         result.upper = max(result.upper, sublr.upper);
1586     }
1587     return result;
1588 }
1589 
1590 Pattern makeAltA(Token head,
1591                  PatternArray subs,
1592                  in bool rewriteFlag = true) nothrow
1593 {
1594     subs = flattenSubs!AltM(subs.move());
1595     switch (subs.length)
1596     {
1597     case 0:
1598         return null;
1599     case 1:
1600         return subs[0];
1601     default:
1602         return new AltM(head, subs.move());
1603     }
1604 }
1605 
1606 Pattern makeAltM(Token head,
1607                  Pattern[] subs,
1608                  in bool rewriteFlag = true) nothrow
1609 {
1610     return makeAltA(head, PatternArray(subs), rewriteFlag);
1611 }
1612 
1613 Pattern makeAltN(uint n)(Token head,
1614                          Pattern[n] subs,
1615                          in bool rewriteFlag = true) nothrow
1616 if (n >= 2)
1617 {
1618     return makeAltA(head, PatternArray(subs), rewriteFlag);
1619 }
1620 
1621 @safe class TokenNode : Node
1622 {
1623     override void show(in Format fmt = Format.init) const
1624     {
1625         showToken(head, fmt);
1626     }
1627 nothrow:
1628     this(Token head) @nogc
1629     {
1630         this.head = head;
1631     }
1632     override bool equals(const Node o) const pure @nogc
1633     {
1634         if (this is o)
1635             return true;
1636         if (const o_ = cast(const typeof(this))o)
1637             return head == o_.head;
1638         return false;
1639     }
1640     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1641     {
1642         sink.put(`tok(`);
1643         sink.put(head.input[]);
1644         sink.put(`)`);
1645     }
1646     const Token head;
1647 }
1648 
1649 /// Unary match combinator.
1650 @safe abstract class UnaryOpPattern : Pattern
1651 {
1652     final override void show(in Format fmt = Format.init) const
1653     {
1654         putchar('(');
1655         sub.show(fmt);
1656         putchar(')');
1657         showToken(head, fmt);
1658     }
1659 nothrow:
1660     this(Token head, Pattern sub) @nogc
1661     {
1662         debug assert(head.input.ptr);
1663         assert(sub);
1664         super(head);
1665         this.sub = sub;
1666     }
1667     final override bool equals(const Node o) const
1668     {
1669         if (this is o)
1670             return true;
1671         if (const o_ = cast(const typeof(this))o)
1672             return (head == o_.head &&
1673                     this.sub.equals(o_.sub));
1674         return false;
1675     }
1676     Pattern sub;
1677 }
1678 
1679 /// Don't match an instance of type `sub`.
1680 @safe final class NotPattern : UnaryOpPattern
1681 {
1682 nothrow:
1683     this(Token head, Pattern sub) @nogc
1684     {
1685         super(head, sub);
1686     }
1687     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1688     {
1689         sink.put("not(");
1690         sub.toMatchInSource(sink, parser);
1691         sink.put(")");
1692     }
1693     override DcharCountSpan dcharCountSpan() const pure @nogc
1694     {
1695         return sub.dcharCountSpan();
1696     }
1697     final override bool opEquals(const scope Pattern that) const @nogc
1698     {
1699         if (auto that_ = cast(const typeof(this))that)
1700             return sub.opEquals(that_.sub);
1701         return false;
1702     }
1703 }
1704 
1705 /// Match (greedily) zero or one instances of type `sub`.
1706 @safe final class GreedyZeroOrOne : UnaryOpPattern
1707 {
1708 nothrow:
1709     this(Token head, Pattern sub) @nogc
1710     {
1711         super(head, sub);
1712     }
1713     this(Token head, Node sub) @nogc
1714     {
1715         Pattern psub = cast(Pattern)sub;
1716         assert(psub);
1717         super(head, psub);
1718     }
1719     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1720     {
1721         sink.put("gzo(");
1722         sub.toMatchInSource(sink, parser);
1723         sink.put(")");
1724     }
1725     override DcharCountSpan dcharCountSpan() const pure @nogc
1726     {
1727         return typeof(return)(0, sub.dcharCountSpan.upper);
1728     }
1729     final override bool opEquals(const scope Pattern that) const @nogc
1730     {
1731         if (auto that_ = cast(const typeof(this))that)
1732             return sub.opEquals(that_.sub);
1733         return false;
1734     }
1735 }
1736 
1737 /// Match (greedily) zero or more instances of type `sub`.
1738 @safe final class GreedyZeroOrMore : UnaryOpPattern
1739 {
1740 nothrow:
1741     this(Token head, Pattern sub) @nogc
1742     {
1743         super(head, sub);
1744     }
1745     this(Token head, Node sub) @nogc
1746     {
1747         Pattern psub = cast(Pattern)sub;
1748         assert(psub);
1749         super(head, psub);
1750     }
1751     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1752     {
1753         sink.put("gzm(");
1754         sub.toMatchInSource(sink, parser);
1755         sink.put(")");
1756     }
1757     override DcharCountSpan dcharCountSpan() const pure @nogc
1758     {
1759         return typeof(return).full();
1760     }
1761     final override bool opEquals(const scope Pattern that) const @nogc
1762     {
1763         if (auto that_ = cast(const typeof(this))that)
1764             return sub.opEquals(that_.sub);
1765         return false;
1766     }
1767 }
1768 
1769 /// Match (greedily) one or more instances of type `sub`.
1770 @safe final class GreedyOneOrMore : UnaryOpPattern
1771 {
1772 nothrow:
1773     this(Token head, Pattern sub) @nogc
1774     {
1775         super(head, sub);
1776     }
1777     this(Token head, Node sub) @nogc
1778     {
1779         Pattern psub = cast(Pattern)sub;
1780         if (!psub)
1781         {
1782             debug sub.show();
1783             assert(false);
1784         }
1785         super(head, psub);
1786     }
1787     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1788     {
1789         sink.put("gom(");
1790         sub.toMatchInSource(sink, parser);
1791         sink.put(")");
1792     }
1793     override DcharCountSpan dcharCountSpan() const pure @nogc
1794     {
1795         return typeof(return)(sub.dcharCountSpan.lower,
1796                               typeof(return).upper.max);
1797     }
1798     final override bool opEquals(const scope Pattern that) const @nogc
1799     {
1800         if (auto that_ = cast(const typeof(this))that)
1801             return sub.opEquals(that_.sub);
1802         return false;
1803     }
1804 }
1805 
1806 @safe abstract class TerminatedUnaryOpPattern : UnaryOpPattern
1807 {
1808 nothrow:
1809     this(Token head, Pattern sub, Pattern terminator = null) @nogc
1810     {
1811         debug assert(head.input.ptr);
1812         super(head, sub);
1813         this.terminator = terminator;
1814     }
1815     Pattern terminator;
1816 }
1817 
1818 /// Match (non-greedily) zero or one instances of type `sub`.
1819 @safe final class NonGreedyZeroOrOne : TerminatedUnaryOpPattern
1820 {
1821 nothrow:
1822     this(Token head, Pattern sub, Pattern terminator = null) @nogc
1823     {
1824         super(head, sub, terminator);
1825     }
1826     this(Token head, Node sub, Pattern terminator = null) @nogc
1827     {
1828         Pattern psub = cast(Pattern)sub;
1829         assert(psub);
1830         super(head, psub, terminator);
1831     }
1832     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1833     {
1834         sink.put("nzo(");
1835         sub.toMatchInSource(sink, parser);
1836         if (terminator)
1837         {
1838             sink.put(",");
1839             terminator.toMatchInSource(sink, parser);
1840         }
1841         else
1842             parser._lexer.warningAtToken(head, "no terminator after non-greedy");
1843         sink.put(")");
1844     }
1845     override DcharCountSpan dcharCountSpan() const pure @nogc
1846     {
1847         return typeof(return)(0, sub.dcharCountSpan.upper);
1848     }
1849     override bool opEquals(const scope Pattern that) const @nogc
1850     {
1851         if (auto that_ = cast(const typeof(this))that)
1852         {
1853             return (sub.opEquals(that_.sub) &&
1854                     terminator.opEquals(that_.terminator));
1855         }
1856         return false;
1857     }
1858 }
1859 
1860 /// Match (non-greedily) zero or more instances of type `sub`.
1861 @safe final class NonGreedyZeroOrMore : TerminatedUnaryOpPattern
1862 {
1863 nothrow:
1864     this(Token head, Pattern sub, Pattern terminator = null) @nogc
1865     {
1866         debug assert(head.input.ptr);
1867         super(head, sub, terminator);
1868     }
1869     this(Token head, Node sub, Pattern terminator = null) @nogc
1870     {
1871         debug assert(head.input.ptr);
1872         Pattern psub = cast(Pattern)sub;
1873         assert(psub);
1874         super(head, psub, terminator);
1875     }
1876     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1877     {
1878         sink.put("nzm(");
1879         sub.toMatchInSource(sink, parser);
1880         debug assert(head.input.ptr);
1881         if (terminator)
1882         {
1883             sink.put(",");
1884             terminator.toMatchInSource(sink, parser);
1885         }
1886         else
1887         {
1888             parser._lexer.warningAtToken(head, "no terminator after non-greedy");
1889         }
1890         sink.put(")");
1891     }
1892     override DcharCountSpan dcharCountSpan() const pure @nogc
1893     {
1894         return typeof(return).full();
1895     }
1896     override bool opEquals(const scope Pattern that) const @nogc
1897     {
1898         if (auto that_ = cast(const typeof(this))that)
1899         {
1900             return (sub.opEquals(that_.sub) &&
1901                     terminator.opEquals(that_.terminator));
1902         }
1903         return false;
1904     }
1905 }
1906 
1907 /// Match (non-greedily) one or more instances of type `sub`.
1908 @safe final class NonGreedyOneOrMore : TerminatedUnaryOpPattern
1909 {
1910 nothrow:
1911     this(Token head, Pattern sub, Pattern terminator = null) @nogc
1912     {
1913         super(head, sub, terminator);
1914     }
1915     this(Token head, Node sub, Pattern terminator = null) @nogc
1916     {
1917         Pattern psub = cast(Pattern)sub;
1918         assert(psub);
1919         super(head, psub, terminator);
1920     }
1921     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1922     {
1923         sink.put("nom(");
1924         sub.toMatchInSource(sink, parser);
1925         if (terminator)
1926         {
1927             sink.put(",");
1928             terminator.toMatchInSource(sink, parser);
1929         }
1930         else
1931             parser._lexer.warningAtToken(head, "no terminator after non-greedy");
1932         sink.put(")");
1933     }
1934     override DcharCountSpan dcharCountSpan() const pure @nogc
1935     {
1936         return typeof(return)(sub.dcharCountSpan.lower,
1937                               typeof(return).upper.max);
1938     }
1939     override bool opEquals(const scope Pattern that) const @nogc
1940     {
1941         if (auto that_ = cast(const typeof(this))that)
1942         {
1943             return (sub.opEquals(that_.sub) &&
1944                     terminator.opEquals(that_.terminator));
1945         }
1946         return false;
1947     }
1948 }
1949 
1950 /// Match `count` number of instances of type `sub`.
1951 @safe final class GreedyCount : UnaryOpPattern
1952 {
1953 nothrow:
1954     this(Token head, Pattern sub) @nogc
1955     {
1956         super(head, sub);
1957     }
1958     this(Token head, Node sub) @nogc
1959     {
1960         Pattern psub = cast(Pattern)sub;
1961         assert(psub);
1962         super(head, psub);
1963     }
1964     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
1965     {
1966         sink.put("cnt(");
1967         sub.toMatchInSource(sink, parser);
1968         sink.put(")");
1969     }
1970     override DcharCountSpan dcharCountSpan() const pure @nogc
1971     {
1972         const ss = sub.dcharCountSpan;
1973         return typeof(return)(ss.lower == ss.lower.max ? ss.lower.max : ss.lower * count,
1974                               ss.upper == ss.upper.max ? ss.upper.max : ss.upper * count);
1975     }
1976     override bool opEquals(const scope Pattern that) const @nogc
1977     {
1978         if (auto that_ = cast(const typeof(this))that)
1979             return (count == that_.count &&
1980                     sub.opEquals(that_.sub));
1981         return false;
1982     }
1983     ulong count;
1984 }
1985 
1986 @safe final class RewriteSyntacticPredicate : UnaryOpPattern
1987 {
1988 nothrow:
1989     this(Token head, Pattern sub) @nogc
1990     {
1991         super(head, sub);
1992     }
1993     this(Token head, Node sub) @nogc
1994     {
1995         Pattern psub = cast(Pattern)sub;
1996         assert(psub);
1997         super(head, psub);
1998     }
1999     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2000     {
2001         sink.put("syn(");
2002         sub.toMatchInSource(sink, parser);
2003         sink.put(")");
2004     }
2005     override DcharCountSpan dcharCountSpan() const pure @nogc
2006     {
2007         return sub.dcharCountSpan;
2008     }
2009     override bool opEquals(const scope Pattern that) const @nogc
2010     {
2011         if (auto that_ = cast(const typeof(this))that)
2012             return sub.opEquals(that_.sub);
2013         return false;
2014     }
2015 }
2016 
2017 @safe final class OtherSymbol : TokenNode
2018 {
2019     override void show(in Format fmt = Format.init) const
2020     {
2021         showToken(head, fmt);
2022     }
2023 nothrow:
2024     this(Token head) @nogc
2025     {
2026         super(head);
2027     }
2028     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2029     {
2030         if (head.input != "EOF")
2031             sink.put(matcherFunctionNamePrefix);
2032         sink.put(head.input);
2033         sink.put(`()`);
2034     }
2035 }
2036 
2037 @safe final class SymbolRef : Pattern
2038 {
2039     override void show(in Format fmt = Format.init) const
2040     {
2041         showToken(head, fmt);
2042     }
2043 nothrow:
2044     this(Token head) @nogc
2045     {
2046         super(head);
2047     }
2048     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2049     {
2050         if (head.input != "EOF")
2051             sink.put(matcherFunctionNamePrefix);
2052         sink.put(head.input);
2053         if (parser.warnUnknownSymbolFlag &&
2054             head.input !in parser.rulesByName)
2055             parser._lexer.warningAtToken(head, "No rule for symbol");
2056     }
2057     override DcharCountSpan dcharCountSpan() const pure @nogc
2058     {
2059         assert(false);
2060         // return typeof(return).init;
2061     }
2062     override bool opEquals(const scope Pattern that) const @nogc
2063     {
2064         if (auto that_ = cast(const typeof(this))that)
2065             return head.input == that_.head.input;
2066         return false;
2067     }
2068 }
2069 
2070 @safe final class LeftParenSentinel : TokenNode
2071 {
2072 nothrow:
2073     this(Token head) @nogc
2074     {
2075         super(head);
2076     }
2077 }
2078 
2079 @safe final class PipeSentinel : TokenNode
2080 {
2081 nothrow:
2082     this(Token head) @nogc
2083     {
2084         super(head);
2085     }
2086 }
2087 
2088 @safe final class DotDotSentinel : TokenNode
2089 {
2090 nothrow:
2091     this(Token head) @nogc
2092     {
2093         super(head);
2094     }
2095 }
2096 
2097 @safe final class TildeSentinel : TokenNode
2098 {
2099 nothrow:
2100     this(Token head) @nogc
2101     {
2102         super(head);
2103     }
2104 }
2105 
2106 @safe final class AnyClass : Pattern
2107 {
2108 nothrow:
2109     this(Token head) @nogc
2110     {
2111         super(head);
2112     }
2113     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2114     {
2115         sink.put(`any()`);
2116     }
2117     override DcharCountSpan dcharCountSpan() const pure @nogc
2118     {
2119         return typeof(return)(1);
2120     }
2121     override bool opEquals(const scope Pattern that) const @nogc
2122     {
2123         if (auto that_ = cast(const typeof(this))that)
2124             return true;
2125         return false;
2126     }
2127 }
2128 
2129 private bool isASCIICharacterLiteral(Input x) pure nothrow @nogc
2130 {
2131     return (x.length == 1 ||
2132             (x.length == 2 &&
2133              x[0] == '\\')); // backquoted character
2134 }
2135 
2136 private uint isUnicodeCharacterLiteral(scope Input x) pure nothrow @nogc
2137 {
2138     if (!x.skipOver('\\'))
2139         return 0;
2140     if (!(x.skipOver('u') ||
2141           x.skipOver('U')))
2142         return 0;
2143 
2144     x.skipOverAround('{', '}'); // optional
2145 
2146     if (x.skipOver(`0x`) ||     // optional
2147         x.skipOver(`0X`)) {}
2148 
2149     while (x.length &&
2150            x[0] == '0')   // trim leading zero
2151         x = x[1 .. $];
2152 
2153     uint u;
2154     while (x.length)
2155     {
2156         u *= 16;
2157         import std.ascii : isDigit, isLower, isUpper;
2158         const c = x[0];
2159         if (c.isDigit)
2160             u += c - '0';
2161         else if (c.isUpper)
2162             u += c - 'A' + 10;
2163         else if (c.isLower)
2164             u += c - 'a' + 10;
2165         else
2166             return 0;           // string literal such as '\uD835\uDD38'
2167         x = x[1 .. $];      // pop front
2168     }
2169 
2170     return u;
2171 }
2172 
2173 @safe abstract class Pattern : TokenNode
2174 {
2175 nothrow:
2176     this(Token head) @nogc
2177     {
2178         super(head);
2179     }
2180     abstract DcharCountSpan dcharCountSpan() const pure @nogc;
2181     abstract bool opEquals(const scope Pattern that) const @nogc;
2182 }
2183 
2184 @safe final class StrLiteral : Pattern
2185 {
2186 nothrow:
2187     this(Token head) @nogc
2188     {
2189         assert(head.input.length >= 2);
2190         assert((head.input[0] == '\'' &&
2191                 head.input[$-1] == '\'') ||
2192                (head.input[0] ==  '"' &&
2193                 head.input[$-1] ==  '"'));
2194         super(head);
2195     }
2196     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2197     {
2198         auto inp = unquotedInput; // skipping single-quotes
2199         if (inp.isASCIICharacterLiteral())
2200         {
2201             sink.put(`ch(`);
2202             sink.putCharLiteral(inp);
2203             sink.put(`)`);
2204         }
2205         else if (const uvalue = inp.isUnicodeCharacterLiteral())
2206         {
2207             if (uvalue <= 0x7f)
2208                 sink.put(`ch(`);
2209             else
2210                 sink.put(`dch(`);
2211             sink.putCharLiteral(inp);
2212             sink.put(`)`);
2213         }
2214         else
2215         {
2216             if (inp.canFind('`'))
2217             {
2218                 sink.put(`str("`);
2219                 sink.putStringLiteralDoubleQuoted(inp);
2220                 sink.put(`")`);
2221             }
2222             else
2223             {
2224                 sink.put("str(`");
2225                 sink.putStringLiteralBackQuoted(inp);
2226                 sink.put("`)");
2227             }
2228         }
2229     }
2230     override DcharCountSpan dcharCountSpan() const pure @nogc
2231     {
2232         const inp = unquotedInput; // skipping single-quotes
2233         size_t cnt;
2234         if (inp.isASCIICharacterLiteral()) // must come first
2235             cnt = 1;
2236         else if (const _ = inp.isUnicodeCharacterLiteral()) // must come second
2237             cnt = 1;
2238         else if (inp.isASCIIString) // must come third
2239             cnt = inp.length;
2240         else
2241         {
2242             // TODO: optimize
2243             import std.utf : byDchar;
2244             import std.algorithm.searching : count;
2245             cnt = inp.byDchar.count;
2246         }
2247         return typeof(return)(cnt);
2248     }
2249     final override bool opEquals(const scope Pattern that) const @nogc
2250     {
2251         if (auto that_ = cast(const typeof(this))that)
2252             return head.input == that_.head.input;
2253         return false;
2254     }
2255     Input unquotedInput() const pure scope return @nogc
2256     {
2257         return head.input[1 .. $ - 1];
2258     }
2259 }
2260 
2261 // TODO: avoid linker error when using version defined in `nxt.string_traits`
2262 private bool isASCIIString(scope const(char)[] input) pure nothrow @nogc
2263 {
2264     foreach (const e; cast(const(ubyte)[])input) // no decoding to `dchar` needed
2265         if (e >= 0x7F)
2266             return false;
2267     return true;
2268 }
2269 
2270 void putStringLiteralDoubleQuoted(scope ref Output sink,
2271                                   const scope Input inp) pure nothrow @nogc
2272 {
2273     for (size_t i; i < inp.length; ++i)
2274     {
2275         if (inp[i] == '"')
2276             sink.put(`\"`);     // backslash doublequote in D string
2277         else if (i + 2 <= inp.length &&
2278                  inp[i .. i + 2] == `\'`)
2279         {
2280             i += 1;             // one extra char
2281             sink.put('\'');
2282         }
2283         else
2284             sink.put(inp[i]);
2285     }
2286 }
2287 
2288 void putStringLiteralBackQuoted(scope ref Output sink,
2289                                 const scope Input inp) pure nothrow @nogc
2290 {
2291     for (size_t i; i < inp.length; ++i)
2292     {
2293         if (inp[i] == '`')
2294             sink.put("\\`");    // backslash backquote in D raw string
2295         else if (inp[i] == '\\' &&
2296                  i + 1 < inp.length)
2297         {
2298             if (inp[i + 1] == '\\')
2299                 sink.put('\\'); // strip quoting of backslash in D raw string
2300             else if (inp[i + 1] == '\'')
2301                 sink.put('\''); // strip quoting of single-quote in D raw string
2302             else if (inp[i + 1] == '"')
2303                 sink.put('"'); // strip quoting of double-quote in D raw string
2304             else
2305                 sink.put(inp[i]);
2306             i += 1;             // one extra char
2307         }
2308         else
2309             sink.put(inp[i]);
2310     }
2311 }
2312 
2313 @safe final class AltCharLiteral : Pattern
2314 {
2315 nothrow:
2316     this(Token head) @nogc
2317     {
2318         super(head);
2319     }
2320     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2321     {
2322         if (head.input.startsWith(`\p`) || // https://github.com/antlr/antlr4/pull/1688
2323             head.input.startsWith(`\P`))
2324         {
2325             sink.put(`cc!(`);
2326         }
2327         else if (head.input[0] >= 0x80)
2328         {
2329             sink.put(`dch(`);
2330         }
2331         else
2332         {
2333             const uvalue = head.input.isUnicodeCharacterLiteral();
2334             if (uvalue < 0x00)
2335                 sink.put(`ch(`);
2336             else
2337                 sink.put(`dch(`);
2338         }
2339         sink.putCharLiteral(head.input);
2340         sink.put(`)`);
2341     }
2342     override DcharCountSpan dcharCountSpan() const pure @nogc
2343     {
2344         return DcharCountSpan(1, 1);
2345         // if (head.input.isASCIICharacterLiteral)
2346         //     return DcharCountSpan(1, 1);
2347         // else
2348         //     return DcharCountSpan(0, uint.max);
2349     }
2350     final override bool opEquals(const scope Pattern that) const @nogc
2351     {
2352         if (auto that_ = cast(const typeof(this))that)
2353             return head.input == that_.head.input;
2354         return false;
2355     }
2356 }
2357 
2358 void putCharLiteral(scope ref Output sink,
2359                     scope Input inp) pure nothrow @nogc
2360 {
2361     if (inp.skipOver(`\u`) ||
2362         inp.skipOver(`\U`))
2363     {
2364         inp.skipOverAround('{', '}');
2365 
2366         // strip leading zeros
2367         while (inp.length > 2 &&
2368                inp[0] == '0')
2369             inp = inp[1 .. $];
2370 
2371         if (inp.length == 2)    // if ASCII
2372         {
2373             sink.put(`'\u00`);
2374             sink.put(inp);
2375             sink.put('\'');
2376         }
2377         else
2378         {
2379             sink.put(`(cast(dchar)0x`); // TODO: use `dchar(...)` for valid numbers
2380             sink.put(inp);
2381             sink.put(`)`);
2382         }
2383     }
2384     else if (inp.skipOver(`\p`) || // https://github.com/antlr/antlr4/pull/1688
2385              inp.skipOver(`\P`))
2386     {
2387         inp.skipOverAround('{', '}');
2388         sink.put('"');
2389         sink.put(inp);
2390         sink.put('"');
2391     }
2392     else
2393     {
2394         sink.put(`'`);
2395         if (inp.length == 1 &&
2396             (inp[0] == '\'' ||
2397              inp[0] == '\\'))
2398             sink.put(`\`);      // need backquoting
2399         sink.put(inp);
2400         sink.put(`'`);
2401     }
2402 }
2403 
2404 version(none)                   // TODO use
2405 TokenNode makeLiteral(Token head) pure nothrow
2406 {
2407     assert(head.input.length >= 3);
2408     if (head.input[1 .. $-1].isASCIICharacterLiteral)
2409         return new AltCharLiteral(head);
2410     else
2411         return new StrLiteral(head);
2412 }
2413 
2414 bool needsWrapping(const scope Node[] subs) @safe pure nothrow @nogc
2415 {
2416     bool wrapFlag;
2417     foreach (const sub; subs)
2418         if (!cast(const TokenNode)sub)
2419             wrapFlag = true;
2420     return wrapFlag;
2421 }
2422 
2423 /// Binary pattern combinator.
2424 @safe abstract class BinaryOpPattern : Pattern
2425 {
2426     override void show(in Format fmt = Format.init) const
2427     {
2428         fmt.showIndent();
2429         subs[0].show(fmt);
2430         putchar(' ');
2431         showChars(head.input);
2432         putchar(' ');
2433         subs[1].show(fmt);
2434     }
2435 nothrow:
2436     this(Token head, Pattern[2] subs) @nogc
2437     {
2438         assert(subs[0]);
2439         assert(subs[1]);
2440         super(head);
2441         this.subs = subs;
2442     }
2443     final override bool equals(const Node o) const @nogc
2444     {
2445         if (this is o)
2446             return true;
2447         if (const o_ = cast(const typeof(this))o)
2448             return (head == o_.head &&
2449                     equalsAll(this.subs[], o_.subs[]));
2450         return false;
2451     }
2452     Pattern[2] subs;
2453 }
2454 
2455 /// Match value range between `limits[0]` and `limits[1]`.
2456 @safe final class Range : BinaryOpPattern
2457 {
2458     override void show(in Format fmt = Format.init) const
2459     {
2460         const wrapFlag = needsWrapping(subs[]);
2461         fmt.showIndent();
2462         if (wrapFlag)
2463             putchar('(');
2464         subs[0].show(fmt);
2465         showChars(" .. ");
2466         subs[1].show(fmt);
2467         if (wrapFlag)
2468             putchar(')');
2469     }
2470 nothrow:
2471     this(Token head, Pattern[2] limits) @nogc
2472     {
2473         assert(limits[0]);
2474         assert(limits[1]);
2475         super(head, limits);
2476     }
2477     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2478     {
2479         sink.put("rng(");
2480 
2481         if (const lower = cast(const StrLiteral)subs[0])
2482             sink.putCharLiteral(lower.unquotedInput);
2483         else if (const lower = cast(const AltCharLiteral)subs[0])
2484             sink.putCharLiteral(lower.head.input);
2485         else
2486         {
2487             debug writeln("handle sub[0] of type ", typeid(subs[0]).name);
2488             debug subs[0].show();
2489             assert(false);
2490         }
2491 
2492         sink.put(",");
2493 
2494         if (const upper = cast(const StrLiteral)subs[1])
2495             sink.putCharLiteral(upper.unquotedInput);
2496         else if (const upper = cast(const AltCharLiteral)subs[1])
2497             sink.putCharLiteral(upper.head.input);
2498         else
2499             assert(false);
2500 
2501         sink.put(")");
2502     }
2503     override DcharCountSpan dcharCountSpan() const pure @nogc
2504     {
2505         return dcharCountSpanOf(subs[]);
2506     }
2507     final override bool opEquals(const scope Pattern that) const @nogc
2508     {
2509         if (auto that_ = cast(const typeof(this))that)
2510             return (head.input == that_.head.input &&
2511                     subs[0].opEquals(that_.subs[0]) &&
2512                     subs[1].opEquals(that_.subs[1]));
2513         return false;
2514     }
2515  }
2516 
2517 Pattern parseCharAltM(const CharAltM alt,
2518                       const scope ref GxLexer lexer) @safe nothrow
2519 {
2520     const Input inp = alt.unquotedInput;
2521 
2522     bool inRange;
2523     PatternArray subs;
2524     for (size_t i; i < inp.length;)
2525     {
2526         Input inpi;
2527 
2528         if (inp[i] == '-' &&
2529             !subs.empty)        // not first character
2530         {
2531             inRange = true;
2532             i += 1;
2533             continue;
2534         }
2535 
2536         if (inp[i] == '\\')
2537         {
2538             i += 1;             // skip '\\'
2539             switch (inp[i])
2540             {
2541             case ']':
2542             case '-':
2543             case '\\':
2544                 inpi = inp[i .. i + 1];
2545                 break;
2546             case 'p':
2547                 if (inp[i + 1] != '{')
2548                     lexer.errorAtToken(Token(alt.head.tok, inp[i + 1 .. $]),
2549                                        "expected brace");
2550                 const hit = inp[i + 1 .. $].indexOf('}');
2551                 if (hit >= 0)
2552                 {
2553                     inpi = inp[i - 1 .. i + 1 + hit + 1];
2554                     i += hit + 1;
2555                 }
2556                 else
2557                     lexer.errorAtToken(Token(alt.head.tok, inp[i + 1 .. $]),
2558                                        "incorrect unicode escape sequence, missing matching closing brace '}'");
2559                 break;
2560             case 'u':
2561                 if (inp[i + 1] == '{')
2562                 {
2563                     const hit = inp[i + 1 .. $].indexOf('}');
2564                     if (hit >= 0)
2565                     {
2566                         inpi = inp[i - 1 .. i + 1 + hit + 1];
2567                         i += hit + 1;
2568                     }
2569                     else
2570                         lexer.errorAtToken(Token(alt.head.tok, inp[i + 1 .. $]),
2571                                            "incorrect unicode escape sequence, missing matching closing brace '}'");
2572                 }
2573                 else
2574                 {
2575                     /* Unicode code point `\u....` where `....` is the hexadecimal
2576                        number of the code point you want to match. */
2577                     import std.ascii : isHexDigit;
2578                     if (i + 5 > inp.length &&
2579                         !(inp[i + 1].isHexDigit &&
2580                           inp[i + 2].isHexDigit &&
2581                           inp[i + 3].isHexDigit &&
2582                           inp[i + 4].isHexDigit))
2583                         lexer.errorAtToken(Token(alt.head.tok, inp[i + 1 .. $]),
2584                                            "incorrect unicode escape sequence");
2585                     inpi = inp[i - 1 .. i + 5];
2586                     i += 4;
2587                 }
2588                 break;
2589             default:
2590                 inpi = inp[i - 1 .. i + 1];
2591                 break;
2592             }
2593             i += 1;
2594         }
2595         else if (inp[i] >= 0x80)
2596         {
2597             import std.typecons : Yes;
2598             import std.utf : decode;
2599             const replacementChar = cast(dchar)0x110000;
2600             const i0 = i;
2601             const ch = decode!(Yes.useReplacementDchar)(inp, i);
2602             if (ch == replacementChar)
2603                 lexer.errorAtToken(alt.head, "invalid UTF-sequence `" ~ inp[i0 .. $] ~ "`");
2604             inpi = inp[i0 .. i];
2605         }
2606         else
2607             inpi = inp[i .. ++i];
2608 
2609         auto lit = new AltCharLiteral(Token(TOK.literal, inpi));
2610         if (inRange)
2611             subs.insertBack(new Range(Token.init, [subs.backPop(), lit]));
2612         else
2613             subs.insertBack(lit);
2614         inRange = false;
2615     }
2616     return makeAltA(alt.head, subs.move()); // potentially flatten
2617 }
2618 
2619 @safe final class CharAltM : Pattern
2620 {
2621 nothrow:
2622 
2623     this(Token head) @nogc
2624     {
2625         super(head);
2626     }
2627 
2628     Input unquotedInput() const @nogc
2629     {
2630         Input inp = head.input;
2631         assert(inp.skipOverAround('[', ']')); // trim
2632         return inp;
2633     }
2634 
2635     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
2636     {
2637         Input inp = unquotedInput;
2638 
2639         // TODO use `switch` `case` ranges
2640 
2641         if (inp.canFind('-')) // check that range is not backquoted
2642         {
2643             size_t altCount;
2644             const asink = toMatchRangeInSource(inp, altCount);
2645             if (altCount >= 2)
2646                 sink.put("alt(");
2647             sink.put(asink[]);
2648             if (altCount >= 2)
2649                 sink.put(")");
2650             return;
2651         }
2652 
2653         sink.put("altN!(");
2654         for (size_t i; i < inp.length;)
2655         {
2656             if (i)
2657                 sink.put(", "); // separator
2658 
2659             sink.put('\'');     // prefix
2660 
2661             // contents:
2662             if (inp[i] == '\\')
2663             {
2664                 i += 1;
2665                 switch (inp[i])
2666                 {
2667                 case ']':
2668                 case '-':
2669                     sink.put(inp[i]); // for instance: `\]` => `]`
2670                     break;
2671                 case '\\':
2672                     sink.put(`\\`); // `\\` => `\\`
2673                     break;
2674                 default:
2675                     sink.put('\\');
2676                     if (inp[i] == 'u')
2677                     {
2678                         import std.ascii : isHexDigit;
2679                         if (i + 5 > inp.length &&
2680                             !(inp[i + 1].isHexDigit &&
2681                               inp[i + 2].isHexDigit &&
2682                               inp[i + 3].isHexDigit &&
2683                               inp[i + 4].isHexDigit))
2684                             parser._lexer.errorAtToken(Token(head.tok, inp[i + 1 .. $]), "incorrect unicode escape sequence");
2685                         sink.put(inp[i .. i + 5]);
2686                         i += 4;
2687                     }
2688                     else
2689                         sink.put(inp[i]);
2690                     break;
2691                 }
2692             }
2693             else if (inp[i] == '\'')
2694                 sink.put(`\'`);
2695             else
2696                 sink.put(inp[i]);
2697             i += 1;
2698 
2699             sink.put('\'');     // suffix
2700         }
2701         sink.put(")()");
2702     }
2703 
2704     private Output toMatchRangeInSource(Input input,
2705                                         out size_t altCount) const // alt count
2706     {
2707         typeof(return) sink;       // argument sink
2708         for (size_t i; i < input.length; ++altCount)
2709         {
2710             if (i)
2711                 sink.put(", "); // separator
2712 
2713             if (i + 3 <= input.length &&
2714                 input[i] != '\\' &&
2715                 input[i + 1] == '-') // such as: `a-z`
2716             {
2717                 sink.put("rng('");
2718                 sink.put(input[i]),
2719                 sink.put("', '");
2720                 sink.put(input[i + 2]),
2721                 sink.put("')");
2722                 i += 3;
2723             }
2724             else if (i + 13 <= input.length &&
2725                      input[i] == '\\' &&
2726                      input[i + 1] == 'u' &&
2727                      input[i + 5] != '\\' &&
2728                      input[i + 6] == '-') // such as: `\u0021-\u0031`
2729             {
2730                 sink.put("rng('");
2731                 sink.put(input[i .. i + 6]),
2732                 sink.put("', '");
2733                 sink.put(input[i + 7 .. i + 7 + 6]),
2734                 sink.put("')");
2735                 i += 13;
2736             }
2737             else
2738             {
2739                 sink.put("ch('");
2740                 if (input[i] == '\'')
2741                     sink.put(`\'`); // escaped single quote
2742                 else if (input[i] == '\\')
2743                 {
2744                     i += 1;                 // skip '\\'
2745                     switch (input[i])
2746                     {
2747                     case ']':
2748                     case '-':
2749                         sink.put(input[i]); // for instance: `\]` => `]`
2750                         break;
2751                     case '\\':
2752                         sink.put(`\\`); // `\\` => `\\`
2753                         break;
2754                     default:
2755                         sink.put('\\');
2756                         sink.put(input[i]);
2757                         break;
2758                     }
2759                 }
2760                 else
2761                     sink.put(input[i]);
2762                 i += 1;
2763                 sink.put("')");
2764             }
2765         }
2766         return sink;
2767     }
2768     override DcharCountSpan dcharCountSpan() const pure @nogc
2769     {
2770         return typeof(return)(1);
2771     }
2772     override bool opEquals(const scope Pattern that) const @nogc
2773     {
2774         if (auto that_ = cast(const typeof(this))that)
2775             return head.input == that_.head.input;
2776         return false;
2777     }
2778 }
2779 
2780 @safe final class LineComment : TokenNode
2781 {
2782 nothrow:
2783     this(Token head) @nogc
2784     {
2785         super(head);
2786     }
2787 }
2788 
2789 @safe final class BlockComment : TokenNode
2790 {
2791 nothrow:
2792     this(Token head) @nogc
2793     {
2794         super(head);
2795     }
2796 }
2797 
2798 /// Grammar named `name`.
2799 @safe final class Grammar : TokenNode
2800 {
2801     override void show(in Format fmt = Format.init) const
2802     {
2803         showToken(head, fmt);
2804         putchar(' ');
2805         showChars(name);
2806         showChars(";\n");
2807     }
2808 nothrow:
2809     this(Token head, Input name) @nogc
2810     {
2811         super(head);
2812         this.name = name;
2813     }
2814     final override bool equals(const Node o) const
2815     {
2816         if (this is o)
2817             return true;
2818         if (const o_ = cast(const typeof(this))o)
2819             return (head == o_.head &&
2820                     name == o_.name);
2821         return false;
2822     }
2823     Input name;
2824 }
2825 
2826 /// Lexer grammar named `name`.
2827 @safe final class LexerGrammar : TokenNode
2828 {
2829 nothrow:
2830     this(Token head, Input name) @nogc
2831     {
2832         super(head);
2833         this.name = name;
2834     }
2835     Input name;
2836 }
2837 
2838 /** Parser grammar named `name`.
2839  *
2840  * See_Also: https://theantlrguy.atlassian.net/wiki/spaces/ANTLR3/pages/2687210/Quick+Starter+on+Parser+Grammars+-+No+Past+Experience+Required
2841  */
2842 @safe final class ParserGrammar : TokenNode
2843 {
2844 nothrow:
2845     this(Token head, Input name) @nogc
2846     {
2847         super(head);
2848         this.name = name;
2849     }
2850     Input name;
2851 }
2852 
2853 /// Import of `modules`.
2854 @safe final class Import : TokenNode
2855 {
2856     override void show(in Format fmt = Format.init) const
2857     {
2858         showToken(head, fmt);
2859         putchar(' ');
2860         foreach (const i, const m ; moduleNames)
2861         {
2862             if (i)
2863                 putchar(',');
2864             showChars(m);
2865         }
2866         putchar(';');
2867         putchar('\n');
2868     }
2869 nothrow:
2870     this(Token head, DynamicArray!(Input) moduleNames) @nogc
2871     {
2872         super(head);
2873         move(moduleNames, this.moduleNames);
2874     }
2875     DynamicArray!(Input) moduleNames;
2876 }
2877 
2878 @safe final class Mode : TokenNode
2879 {
2880 nothrow:
2881     this(Token head, Input name) @nogc
2882     {
2883         super(head);
2884         this.name = name;
2885     }
2886     Input name;
2887 }
2888 
2889 @safe final class Options : TokenNode
2890 {
2891 nothrow:
2892     this(Token head, Token code) @nogc
2893     {
2894         super(head);
2895         this.code = code;
2896     }
2897     Input name;
2898     Token code;
2899 }
2900 
2901 @safe final class Header : TokenNode
2902 {
2903 nothrow:
2904     this(Token head, Token name, Token code) @nogc
2905     {
2906         super(head);
2907         this.name = name;
2908         this.code = code;
2909     }
2910     Token name;
2911     Token code;
2912 }
2913 
2914 @safe final class ScopeSymbolAction : TokenNode
2915 {
2916 nothrow:
2917     this(Token head,
2918          Input name,
2919          Token code) @nogc
2920     {
2921         super(head);
2922         this.name = name;
2923         this.code = code;
2924     }
2925     Input name;
2926     Token code;
2927 }
2928 
2929 @safe final class ScopeSymbol : TokenNode
2930 {
2931 nothrow:
2932     this(Token head,
2933          Input name) @nogc
2934     {
2935         super(head);
2936         this.name = name;
2937     }
2938     Input name;
2939 }
2940 
2941 @safe final class ScopeAction : TokenNode
2942 {
2943     override void show(in Format fmt = Format.init) const
2944     {
2945         showToken(head, fmt);
2946     }
2947 nothrow:
2948     this(Token head,
2949          Token code) @nogc
2950     {
2951         super(head);
2952         this.code = code;
2953     }
2954     Token code;
2955 }
2956 
2957 @safe final class AttributeSymbol : TokenNode
2958 {
2959     override void show(in Format fmt = Format.init) const
2960     {
2961         showToken(head, fmt);
2962     }
2963 nothrow:
2964     this(Token head, Token code) @nogc
2965     {
2966         super(head);
2967         this.code = code;
2968     }
2969     Token code;
2970 }
2971 
2972 @safe final class Action : TokenNode
2973 {
2974 nothrow:
2975     this(Token head) @nogc
2976     {
2977         super(head);
2978     }
2979 }
2980 
2981 @safe final class ActionSymbol : TokenNode
2982 {
2983     override void show(in Format fmt = Format.init) const
2984     {
2985         showToken(head, fmt);
2986     }
2987 nothrow:
2988     this(Token head, Token code) @nogc
2989     {
2990         super(head);
2991         this.code = code;
2992     }
2993     Token code;
2994 }
2995 
2996 @safe final class Channels : TokenNode
2997 {
2998 nothrow:
2999     this(Token head, Token code) @nogc
3000     {
3001         super(head);
3002         this.code = code;
3003     }
3004     Token code;
3005 }
3006 
3007 @safe final class Tokens : TokenNode
3008 {
3009 nothrow:
3010     this(Token head, Token code) @nogc
3011     {
3012         super(head);
3013         this.code = code;
3014     }
3015     override void toMatchInSource(scope ref Output sink, const scope GxParserByStatement parser) const
3016     {
3017     }
3018     Token code;
3019 }
3020 
3021 @safe final class Class : TokenNode
3022 {
3023 nothrow:
3024     this(Token head, Input name, Input baseName) @nogc
3025     {
3026         super(head);
3027         this.name = name;
3028         this.baseName = baseName;
3029     }
3030     Input name;
3031     Input baseName;             ///< Base class name.
3032 }
3033 
3034 alias Imports = DynamicArray!(Import, Mallocator, uint);
3035 alias Rules = DynamicArray!(Rule, Mallocator, uint);
3036 alias SymbolRefs = DynamicArray!(SymbolRef, Mallocator, uint);
3037 
3038 /** Gx parser with range interface over all statements.
3039  *
3040  * See: `ANTLRv4Parser.g4`
3041  */
3042 @safe class GxParserByStatement
3043 {
3044     this(Input input,
3045          const string path = null,
3046          in bool includeComments = false)
3047     {
3048         _lexer = GxLexer(input, path, includeComments);
3049         if (!_lexer.empty)
3050             _front = nextFront();
3051     }
3052 
3053     @property bool empty() const nothrow scope @nogc
3054     {
3055         version(D_Coverage) {} else version(Do_Inline) version(LDC) pragma(inline, true);
3056         return _front is null;
3057     }
3058 
3059     inout(Node) front() inout scope return
3060     {
3061         version(D_Coverage) {} else version(Do_Inline) pragma(inline, true);
3062         assert(!empty);
3063         return _front;
3064     }
3065 
3066     void popFront()
3067     {
3068         version(D_Coverage) {} else version(Do_Inline) version(LDC) pragma(inline, true);
3069         assert(!empty);
3070         if (_lexer.empty)
3071             _front = null;      // make `this` empty
3072         else
3073             _front = nextFront();
3074     }
3075 
3076     private Rule makeRule(Token name,
3077                           in bool isFragmentRule,
3078                           ActionSymbol actionSymbol = null,
3079                           Action action = null)
3080     {
3081         _lexer.popFrontEnforce(TOK.colon, "no colon");
3082 
3083         bool skipFlag;
3084 
3085         static if (useStaticTempArrays)
3086             FixedArray!(Pattern, 100) alts;
3087         else
3088             PatternArray alts;
3089 
3090         while (_lexer.front.tok != TOK.semicolon)
3091         {
3092             size_t parentDepth = 0;
3093 
3094             // temporary node sequence stack
3095             static if (useStaticTempArrays)
3096                 FixedArray!(Node, 70) tseq; // doesn't speed up that much
3097             else
3098                 NodeArray tseq;
3099 
3100             void seqPutCheck(Node last)
3101             {
3102                 if (last is null)
3103                     return _lexer.warningAtToken(name, "empty sequence");
3104                 if (!_lexer.empty &&
3105                     _lexer.front.tok == TOK.dotdot)
3106                     return tseq.put(last); // ... has higher prescedence
3107                 if (!tseq.empty)
3108                 {
3109                     if (auto dotdot = cast(DotDotSentinel)tseq.back) // binary operator
3110                     {
3111                         tseq.popBack(); // pop `DotDotSentinel`
3112                         return seqPutCheck(new Range(dotdot.head,
3113                                                      [cast(Pattern)tseq.backPop(), cast(Pattern)last]));
3114                     }
3115                     if (auto tilde = cast(TildeSentinel)tseq.back) // prefix unary operator
3116                     {
3117                         tseq.popBack(); // pop `TildeSentinel`
3118                         return seqPutCheck(new NotPattern(tilde.head,
3119                                                           cast(Pattern)last));
3120                     }
3121                     if (auto ng = cast(TerminatedUnaryOpPattern)tseq.back)
3122                     {
3123                         Pattern lastp = cast(Pattern)last;
3124                         assert(lastp);
3125                         ng.terminator = lastp;
3126                     }
3127                 }
3128                 return tseq.put(last);
3129             }
3130 
3131             while ((parentDepth != 0 ||
3132                     _lexer.front.tok != TOK.pipe) &&
3133                    _lexer.front.tok != TOK.semicolon)
3134             {
3135                 // TODO: use static array with length being number of tokens till `TOK.pipe`
3136                 const head = _lexer.frontPop();
3137 
3138                 void groupLastSeq() @safe nothrow
3139                 {
3140                     // find backwards index `ih` in `tseq` at '(' or '|'. TODO: reuse `lastIndexOf`
3141                     size_t ih = tseq.length;
3142                     foreach_reverse (const i, const e; tseq)
3143                     {
3144                         if (auto sym = cast(const PipeSentinel)e)
3145                         {
3146                             ih = i;
3147                             break;
3148                         }
3149                         else if (auto sym = cast(const LeftParenSentinel)e)
3150                         {
3151                             ih = i;
3152                             break;
3153                         }
3154                     }
3155                     if (ih == tseq.length)
3156                         _lexer.errorAtToken(head, "missing left-hand side");
3157                     Pattern nseq = makeSeq(tseq[ih + 1 .. $], _lexer);
3158                     tseq.popBackN(tseq.length - (ih + 1)); // exclude op sentinel
3159                     tseq.insertBack(nseq);                 // put it back
3160                 }
3161 
3162                 switch (head.tok)
3163                 {
3164                 case TOK.symbol:
3165                     if (head.input == "options")
3166                         auto _ = makeRuleOptions(head, true);
3167                     else
3168                     {
3169                         if (_lexer.front.tok == TOK.colon)
3170                         {
3171                             _lexer.popFront();
3172                             continue; // skip element label: SYMBOL '.'. See_Also: https://www.antlr2.org/doc/metalang.html section "Element Labels"
3173                         }
3174                         auto sref = new SymbolRef(head);
3175                         seqPutCheck(sref);
3176                         symbolRefs.insertBack(sref);
3177                     }
3178                     break;
3179                 case TOK.literal:
3180                     seqPutCheck(new StrLiteral(head));
3181                     break;
3182                 case TOK.qmark:
3183                     if (tseq.empty)
3184                         _lexer.errorAtToken(head, "missing left-hand side");
3185                     Pattern node;
3186                     if (auto oom = cast(GreedyOneOrMore)tseq.back)
3187                     {
3188                         // See_Also: https://stackoverflow.com/questions/64706408/rewriting-x-as-x-in-antlr-grammars
3189                         node = new GreedyZeroOrMore(head, oom.sub);
3190                         tseq.popBack();
3191                         _lexer.warningAtToken(head, "read `(X+)?` as `X*`");
3192                     }
3193                     else
3194                         node = new GreedyZeroOrOne(head, tseq.backPop());
3195                     seqPutCheck(node);
3196                     break;
3197                 case TOK.star:
3198                     if (tseq.empty)
3199                         _lexer.errorAtToken(head, "missing left-hand side");
3200                     seqPutCheck(new GreedyZeroOrMore(head, tseq.backPop()));
3201                     break;
3202                 case TOK.plus:
3203                     if (tseq.empty)
3204                         _lexer.errorAtToken(head, "missing left-hand side");
3205                     seqPutCheck(new GreedyOneOrMore(head, tseq.backPop()));
3206                     break;
3207                 case TOK.qmarkQmark:
3208                     if (tseq.empty)
3209                         _lexer.errorAtToken(head, "missing left-hand side");
3210                     seqPutCheck(new NonGreedyZeroOrOne(head, tseq.backPop()));
3211                     break;
3212                 case TOK.starQmark:
3213                     if (tseq.empty)
3214                         _lexer.errorAtToken(head, "missing left-hand side");
3215                     seqPutCheck(new NonGreedyZeroOrMore(head, tseq.backPop()));
3216                     break;
3217                 case TOK.plusQmark:
3218                     if (tseq.empty)
3219                         _lexer.errorAtToken(head, "missing left-hand side");
3220                     seqPutCheck(new NonGreedyOneOrMore(head, tseq.backPop()));
3221                     break;
3222                 case TOK.rewriteSyntacticPredicate:
3223                     if (tseq.empty)
3224                         _lexer.errorAtToken(head, "missing left-hand side");
3225                     seqPutCheck(new RewriteSyntacticPredicate(head, tseq.backPop()));
3226                     break;
3227                 case TOK.tilde:
3228                     tseq.put(new TildeSentinel(head));
3229                     break;
3230                 case TOK.pipe:
3231                     if (tseq.empty)
3232                     {
3233                         _lexer.warningAtFront("missing left-hand side");
3234                         continue;
3235                     }
3236                     else if (const symbol = cast(LeftParenSentinel)tseq.back)
3237                     {
3238                         _lexer.warningAtToken(symbol.head, "missing left-hand side");
3239                         continue;
3240                     }
3241                     groupLastSeq();
3242                     tseq.put(new PipeSentinel(head));
3243                     break;
3244                 case TOK.dotdot:
3245                     tseq.put(new DotDotSentinel(head));
3246                     break;
3247                 case TOK.wildcard:
3248                     seqPutCheck(new AnyClass(head));
3249                     break;
3250                 case TOK.brackets:
3251                     seqPutCheck(parseCharAltM(new CharAltM(head), _lexer));
3252                     break;
3253                 case TOK.hash:
3254                 case TOK.rewrite:
3255                     // ignore `head`
3256                     if (_lexer.front == Token(TOK.symbol, "skip"))
3257                     {
3258                         skipFlag = true;
3259                         _lexer.popFront();
3260                     }
3261                     while (_lexer.front.tok != TOK.pipe &&
3262                            _lexer.front.tok != TOK.semicolon)
3263                     {
3264                         _lexer.warningAtFront("TODO: use rewrite argument");
3265                         _lexer.popFront(); // ignore for now
3266                     }
3267                     break;
3268                 case TOK.leftParen:
3269                     parentDepth += 1;
3270                     tseq.put(new LeftParenSentinel(head));
3271                     break;
3272                 case TOK.rightParen:
3273                     parentDepth -= 1;
3274 
3275                     groupLastSeq();
3276 
3277                     // find matching '(' if any
3278                     size_t si = tseq.length; // left paren sentinel index
3279                     LeftParenSentinel ss;    // left parent index Symbol
3280                     foreach_reverse (const i, Node node; tseq[])
3281                     {
3282                         if (auto lp = cast(LeftParenSentinel)node)
3283                         {
3284                             si = i;
3285                             ss = lp;
3286                             break;
3287                         }
3288                     }
3289 
3290                     PatternArray asubs; // TODO: use stack allocation of length tseq[si .. $].length - number of `PipeSentinel`s
3291                     Token ahead;
3292                     foreach (e; tseq[si + 1.. $])
3293                     {
3294                         if (auto ps = cast(PipeSentinel)e)
3295                         {
3296                             if (ahead != Token.init)
3297                                 ahead = ps.head; // use first '|' as head of alternative below
3298                         }
3299                         else
3300                         {
3301                             Pattern pe = cast(Pattern)e;
3302                             assert(pe);
3303                             asubs.put(pe);
3304                         }
3305                     }
3306                     if (asubs.empty)
3307                     {
3308                         auto nothing = new SeqM(ss.head);
3309                         tseq.popBack(); // pop '('
3310                         seqPutCheck(nothing);
3311                     }
3312                     else
3313                     {
3314                         auto lalt = makeAltA(ahead, asubs.move());
3315                         tseq.popBackN(tseq.length - si);
3316                         Node[1] ssubs = [lalt];
3317                         seqPutCheck(makeSeq(ssubs, _lexer));
3318                     }
3319                     break;
3320                 case TOK.action:
3321                     // ignore action
3322                     _lexer.skipOverTOK(TOK.qmark); // TODO: handle in a more generic way
3323                     break;
3324                 case TOK.labelAssignment:
3325                     // ignore for now: SYMBOL '='
3326                     if (!cast(OtherSymbol)tseq.back)
3327                         _lexer.errorAtFront("non-symbol before label assignment");
3328                     tseq.popBack(); // ignore
3329                     break;
3330                 case TOK.tokenSpecOptions:
3331                     // ignore
3332                     break;
3333                 case TOK.colon:
3334                     // ignore
3335                     _lexer.warningAtFront("ignoring colon with no effect");
3336                     continue;
3337                 case TOK.rootNode:
3338                     /* AST root operator. When generating abstract syntax trees
3339                      * (ASTs), token references suffixed with the "^" root
3340                      * operator force AST nodes to be created and added as the
3341                      * root of the current tree. This symbol is only effective
3342                      * when the buildAST option is set. More information about
3343                      * ASTs is also available. */
3344                     // ignore
3345                     break;
3346                 case TOK.exclamation:
3347                     /* AST exclude operator. When generating abstract syntax
3348                      * trees, token references suffixed with the "!" exclude
3349                      * operator are not included in the AST constructed for that
3350                      * rule. Rule references can also be suffixed with the
3351                      * exclude operator, which implies that, while the tree for
3352                      * the referenced rule is constructed, it is not linked into
3353                      * the tree for the referencing rule. This symbol is only
3354                      * effective when the buildAST option is set. More
3355                      * information about ASTs is also available. */
3356                     // ignore
3357                     break;
3358                 case TOK.semicolon:
3359                     assert(false);
3360                 default:
3361                     _lexer.infoAtFront("TODO: unhandled token type" ~ _lexer.front.to!string);
3362                     seqPutCheck(new OtherSymbol(head));
3363                     break;
3364                 }
3365             }
3366             if (!tseq.length)   // may be empty
3367             {
3368                 // _lexer.warningAtFront("empty rule sequence");
3369             }
3370             static if (useStaticTempArrays)
3371             {
3372                 alts.put(makeSeq(tseq[], _lexer));
3373                 tseq.clear();
3374             }
3375             else
3376             {
3377                 alts.put(makeSeq(tseq[], _lexer)); // TODO: use `tseq.move()` when tseq is a `PatternArray`
3378                 tseq.clear();
3379             }
3380             if (_lexer.front.tok == TOK.pipe)
3381                 _lexer.popFront(); // skip terminator
3382         }
3383 
3384         _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3385 
3386         // needed for ANTLRv2.g2:
3387         if (!_lexer.empty)
3388         {
3389             // if (_lexer.front == Token(TOK.symbol, "exception"))
3390             //     _lexer.popFront();
3391             // if (_lexer.front == Token(TOK.symbol, "catch"))
3392             //     _lexer.popFront();
3393             if (_lexer.front.tok == TOK.brackets)
3394                 _lexer.popFront();
3395             if (_lexer.front.tok == TOK.action)
3396                 _lexer.popFront();
3397         }
3398 
3399         static if (useStaticTempArrays)
3400         {
3401             Pattern root = alts.length == 1 ? alts.backPop() : makeAltM(Token.init, alts[]);
3402             alts.clear();
3403         }
3404         else
3405             Pattern root = alts.length == 1 ? alts.backPop() : makeAltA(Token.init, alts.move());
3406 
3407         Rule rule = (isFragmentRule
3408                      ? new FragmentRule(name, root, skipFlag)
3409                      : new Rule(name, root, skipFlag));
3410 
3411         if (_lexer._diagnoseLeftRecursion)
3412             rule.diagnoseDirectLeftRecursion(_lexer);
3413 
3414         // insert rule
3415         const warnDuplicateRulePattern = false; // this is normally not an error, for instance, in `oncrpcv2.g4`
3416         if (warnDuplicateRulePattern)
3417             foreach (const existingRule; rules)
3418             {
3419                 if (rule.root.opEquals(existingRule.root))
3420                 {
3421                     _lexer.warningAtToken(rule.head, "rule with same root pattern"); // TODO: error
3422                     _lexer.warningAtToken(existingRule.head, "existing definition here"); // TODO: errorSupplemental
3423                 }
3424             }
3425         rules.insertBack(rule);
3426         rulesByName.update(rule.head.input,        // See_Also: https://dlang.org/spec/hash-map.html#advanced_updating
3427                            {
3428                                return rule;
3429                            },
3430                            (const scope ref Rule existingRule)
3431                            {
3432                                _lexer.warningAtToken(rule.head, "rule with same name already exists"); // TODO: error
3433                                _lexer.warningAtToken(existingRule.head, "existing definition here"); // TODO: errorSupplemental
3434                                return rule;
3435                            });
3436 
3437         return rule;
3438     }
3439 
3440     DynamicArray!(Input) makeArgs(in TOK separator,
3441                                   in TOK terminator)
3442     {
3443         typeof(return) result;
3444         while (true)
3445         {
3446             result.put(_lexer.frontPopEnforce(TOK.symbol).input);
3447             if (_lexer.front.tok != separator)
3448                 break;
3449             _lexer.popFront();
3450         }
3451         _lexer.popFrontEnforce(terminator, "no terminating semicolon");
3452         return result;
3453     }
3454 
3455     AttributeSymbol makeAttributeSymbol(Token head) nothrow
3456     {
3457         return new AttributeSymbol(head, _lexer.frontPopEnforce(TOK.action, "missing action"));
3458     }
3459 
3460     ActionSymbol makeActionSymbol(Token head) nothrow
3461     {
3462         return new ActionSymbol(head, _lexer.frontPopEnforce(TOK.action, "missing action"));
3463     }
3464 
3465     TokenNode makeScope(Token head)
3466     {
3467         if (_lexer.front.tok == TOK.symbol)
3468         {
3469             const symbol = _lexer.frontPop().input;
3470             if (_lexer.front.tok == TOK.action)
3471                 return new ScopeSymbolAction(head, symbol,
3472                                              _lexer.frontPopEnforce(TOK.action, "missing action"));
3473             else
3474             {
3475                 auto result = new ScopeSymbol(head, symbol);
3476                 _lexer.frontPopEnforce(TOK.semicolon,
3477                                           "missing terminating semicolon");
3478                 return result;
3479             }
3480         }
3481         else
3482         {
3483             return new ScopeAction(head,
3484                                    _lexer.frontPopEnforce(TOK.action, "missing action"));
3485         }
3486     }
3487 
3488     Import makeImport(Token head)
3489     {
3490         auto import_ = new Import(head, makeArgs(TOK.comma, TOK.semicolon));
3491         this.imports.put(import_);
3492         return import_;
3493     }
3494 
3495     TokenNode makeClass(Token head)
3496     {
3497         auto result = new Class(head,
3498                                _lexer.frontPopEnforce(TOK.symbol, "missing symbol").input,
3499                                _lexer.skipOverToken(Token(TOK.symbol, "extends")).input ?
3500                                _lexer.frontPop().input :
3501                                null);
3502         _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3503         return result;
3504     }
3505 
3506     OtherSymbol skipOverOtherSymbol(string symbolIdentifier) return
3507     {
3508         if (_lexer.front == Token(TOK.symbol, symbolIdentifier))
3509             return new typeof(return)(_lexer.frontPop());
3510         return null;
3511     }
3512 
3513     /// Skip over scope if any.
3514     TokenNode skipOverScope()
3515     {
3516         if (_lexer.front == Token(TOK.symbol, "scope"))
3517             return makeScope(_lexer.frontPop());
3518         return null;
3519     }
3520 
3521     Options makeRuleOptions(Token head,
3522                             in bool skipOverColon = false) nothrow
3523     {
3524         version(Do_Inline) version(LDC) pragma(inline, true);
3525         const action = _lexer.frontPopEnforce(TOK.action, "missing action");
3526         if (skipOverColon)
3527             _lexer.skipOverTOK(TOK.colon);
3528         return new Options(head, action);
3529     }
3530 
3531     Options makeTopOptions(Token head) nothrow
3532     {
3533         version(Do_Inline) version(LDC) pragma(inline, true);
3534         const action = _lexer.frontPopEnforce(TOK.action, "missing action");
3535         _lexer.skipOverTOK(TOK.colon); // optionally scoped. See_Also: https://stackoverflow.com/questions/64477446/meaning-of-colon-inside-parenthesises/64477817#64477817
3536         return new Options(head, action);
3537     }
3538 
3539     Channels makeChannels(Token head) nothrow
3540     {
3541         version(Do_Inline) version(LDC) pragma(inline, true);
3542         return new Channels(head, _lexer.frontPopEnforce(TOK.action, "missing action"));
3543     }
3544 
3545     Tokens makeTokens(Token head) nothrow
3546     {
3547         version(Do_Inline) version(LDC) pragma(inline, true);
3548         return new Tokens(head, _lexer.frontPopEnforce(TOK.action, "missing action"));
3549     }
3550 
3551     Header makeHeader(Token head)
3552     {
3553         const name = (_lexer.front.tok == TOK.literal ?
3554                       _lexer.frontPop() :
3555                       Token.init);
3556         const action = _lexer.frontPopEnforce(TOK.action, "missing action");
3557         return new Header(head, name, action);
3558     }
3559 
3560     Mode makeMode(Token head)
3561     {
3562         auto result = new Mode(head, _lexer.frontPop().input);
3563         _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3564         return result;
3565     }
3566 
3567     Action makeAction(Token head)
3568     {
3569         version(Do_Inline) version(LDC) pragma(inline, true);
3570         return new Action(head);
3571     }
3572 
3573     /// Skip over options if any.
3574     Options skipOverPreRuleOptions()
3575     {
3576         if (_lexer.front == Token(TOK.symbol, "options"))
3577             return makeRuleOptions(_lexer.frontPop());
3578         return null;
3579     }
3580 
3581     bool skipOverExclusion()
3582     {
3583         if (_lexer.front.tok == TOK.exclamation)
3584         {
3585             _lexer.frontPop();
3586             return true;
3587         }
3588         return false;
3589     }
3590 
3591     bool skipOverReturns()
3592     {
3593         if (_lexer.front == Token(TOK.symbol, "returns"))
3594         {
3595             _lexer.frontPop();
3596             return true;
3597         }
3598         return false;
3599     }
3600 
3601     bool skipOverHooks()
3602     {
3603         if (_lexer.front.tok == TOK.brackets)
3604         {
3605             // _lexer.infoAtFront("TODO: use TOK.brackets");
3606             _lexer.frontPop();
3607             return true;
3608         }
3609         return false;
3610     }
3611 
3612     Action skipOverAction()
3613     {
3614         if (_lexer.front.tok == TOK.action)
3615             return makeAction(_lexer.frontPop());
3616         return null;
3617     }
3618 
3619     ActionSymbol skipOverActionSymbol()
3620     {
3621         if (_lexer.front.tok == TOK.actionSymbol)
3622             return makeActionSymbol(_lexer.frontPop);
3623         return null;
3624     }
3625 
3626     Node makeRuleOrOther(Token head)
3627     {
3628         if (_lexer.front.tok == TOK.colon) // normal case
3629             return makeRule(head, false);  // fast path
3630 
3631         if (head.input == "lexer" ||
3632             head.input == "parser" ||
3633             head.input == "grammar")
3634         {
3635             bool lexerFlag;
3636             bool parserFlag;
3637             if (head.input == "lexer")
3638             {
3639                 lexerFlag = true;
3640                 _lexer.popFrontEnforce(TOK.symbol, "expected `grammar` after `lexer`"); // TODO: enforce input grammar
3641             }
3642             else if (head.input == "parser")
3643             {
3644                 parserFlag = true;
3645                 _lexer.popFrontEnforce(TOK.symbol, "expected `grammar` after `parser`"); // TODO: enforce input grammar
3646             }
3647 
3648             if (lexerFlag)
3649             {
3650                 auto lexerGrammar = new LexerGrammar(head, _lexer.frontPop().input);
3651                 _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3652                 return this.grammar = lexerGrammar;
3653             }
3654             else if (parserFlag)
3655             {
3656                 auto parserGrammar = new ParserGrammar(head, _lexer.frontPop().input);
3657                 _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3658                 return this.grammar = parserGrammar;
3659             }
3660             else
3661             {
3662                 if (_lexer.front.tok == TOK.colon)
3663                     return makeRule(head, false);
3664                 else
3665                 {
3666                     auto grammar = new Grammar(head, _lexer.frontPop().input);
3667                     _lexer.popFrontEnforce(TOK.semicolon, "no terminating semicolon");
3668                     this.grammar = grammar;
3669                     return grammar;
3670                 }
3671             }
3672         }
3673 
3674         switch (head.input)
3675         {
3676         case `private`:
3677             _lexer.frontEnforce(TOK.symbol, "expected symbol after `private`");
3678             return makeRuleOrOther(_lexer.frontPop); // TODO: set private qualifier
3679         case `protected`:
3680             _lexer.frontEnforce(TOK.symbol, "expected symbol after `protected`");
3681             return makeRuleOrOther(_lexer.frontPop); // TODO: set protected qualifier
3682         case `channels`:
3683             return makeChannels(head);
3684         case `tokens`:
3685             return makeTokens(head);
3686         case `options`:
3687             auto options = makeTopOptions(head);
3688             optionsSet.insertBack(options);
3689             return options;
3690         case `header`:
3691             return makeHeader(head);
3692         case `mode`:
3693             return makeMode(head);
3694         case `class`:
3695             return makeClass(head);
3696         case `scope`:
3697             return makeScope(head);
3698         case `import`:
3699             return makeImport(head);
3700         case `fragment`: // lexer helper rule, not real token for parser.
3701             return makeRule(_lexer.frontPop(), true);
3702         default:
3703             while (_lexer.front.tok != TOK.colon)
3704             {
3705                 // TODO: use switch
3706                 if (skipOverExclusion()) // TODO: use
3707                     continue;
3708                 if (skipOverReturns())  // TODO: use
3709                     continue;
3710                 if (skipOverHooks())    // TODO: use
3711                     continue;
3712                 if (const _ = skipOverOtherSymbol("locals")) // TODO: use
3713                     continue;
3714                 if (const _ = skipOverPreRuleOptions()) // TODO: use
3715                     continue;
3716                 if (const _ = skipOverScope())     // TODO: use
3717                     continue;
3718                 if (const _ = skipOverAction()) // TODO: use
3719                     continue;
3720                 if (const _ = skipOverActionSymbol()) // TODO: use
3721                     continue;
3722                 break;          // no progression so done
3723             }
3724             return makeRule(head, false);
3725         }
3726     }
3727 
3728     Node nextFront()
3729     {
3730         const head = _lexer.frontPop();
3731         switch (head.tok)
3732         {
3733         case TOK.attributeSymbol:
3734             return makeAttributeSymbol(head);
3735         case TOK.actionSymbol:
3736             return makeActionSymbol(head);
3737         case TOK.blockComment:
3738             return new BlockComment(head);
3739         case TOK.lineComment:
3740             return new LineComment(head);
3741         case TOK.action:
3742             return new Action(head);
3743         case TOK.symbol:
3744             return makeRuleOrOther(head);
3745         default:
3746             _lexer.errorAtFront("TODO: handle");
3747             assert(false);
3748         }
3749     }
3750 
3751     TokenNode grammar;
3752     DynamicArray!(Options) optionsSet;
3753     Imports imports;
3754     Rules rules;
3755     RulesByName rulesByName;
3756     SymbolRefs symbolRefs;      ///< All of `SymbolRef` instances.
3757     bool warnUnknownSymbolFlag;
3758 private:
3759     GxLexer _lexer;
3760     Node _front;
3761     Rule _rootRule;               ///< Root rule for grammar.
3762 }
3763 
3764 /// Returns: `path` as module name.
3765 string toPathModuleName(string path)
3766 {
3767     string adjustDirectoryName(const return scope string name) pure nothrow @nogc
3768     {
3769         if (name == "asm")      // TODO extend to check if a keyword
3770             return "asm_";
3771         return name;
3772     }
3773 
3774     const stripLeadingSlashesFlag = false;
3775     if (stripLeadingSlashesFlag)
3776         while (path[0] == '/' ||
3777                path[0] == '\\')
3778             path = path[1 .. $];    // strip leading '/'s
3779 
3780     const grammarsV4DirPath = "~/Work/grammars-v4".expandTilde; // TODO: move somewhere suitable
3781 
3782     return path.expandTilde
3783                .relativePath(grammarsV4DirPath)
3784                .stripExtension
3785                .pathSplitter()
3786                .map!(_ => adjustDirectoryName(_))
3787                .joiner("__")    // "/" => "__"
3788                .substitute('-', '_')
3789                .to!string ~ "_parser"; // TODO: use lazy ranges that return `char`;
3790 }
3791 
3792 /// Gx filer parser.
3793 @safe class GxFileParser : GxParserByStatement
3794 {
3795     this(string path,
3796          GxFileParser[string] cachedParsersByModuleName)
3797     {
3798         Input data = cast(Input)rawReadPath(path.expandTilde); // cast to Input because we don't want to keep all file around:
3799         super(data, path, false);
3800         this.cachedParsersByModuleName = cachedParsersByModuleName;
3801     }
3802 
3803     alias RuleNames = DynamicArray!string;
3804 
3805     void generateParserSourceString(scope ref Output output,
3806                                     out string moduleName)
3807     {
3808         const path = _lexer.path;
3809         moduleName = path.toPathModuleName();
3810 
3811         output.put("/// Automatically generated from `");
3812         output.put(path);
3813         output.put("`.\n");
3814         output.put("module " ~ moduleName ~ q{;
3815 
3816 });
3817         output.put(parserSourceBegin);
3818         toMatchers(output);
3819         output.put(parserSourceEnd);
3820 
3821         rootRule();      // TODO: use this
3822     }
3823 
3824     /** Get root rule.
3825      *
3826      * See_Also: https://github.com/antlr/grammars-v4/issues/2097
3827      * See_Also: https://stackoverflow.com/questions/29879626/antlr4-how-to-find-the-root-rules-in-a-gramar-which-maybe-used-to-find-the-star
3828      */
3829     @property Rule rootRule()
3830     {
3831         if (_rootRule)
3832             return _rootRule;
3833         retagReferencedRules();
3834         foreach (Rule rule; rules)
3835         {
3836             if (rule.hasRef ||
3837                 rule.skipFlag)
3838                 continue;
3839             else if (rule.isFragmentRule)
3840                 _lexer.warningAtToken(rule.head, "unused fragment rule");
3841             else if (rule.isLexerTokenRule)
3842                 _lexer.warningAtToken(rule.head, "unused (lexical) lexer token rule"); // TODO: don't warn about skipped rules -> skip
3843             else if (_rootRule)
3844             {
3845                 _lexer.warningAtToken(rule.head, "second root rule defined");
3846                 _lexer.warningAtToken(_rootRule.head, "  existing root rule defined here");
3847             }
3848             else
3849                 _rootRule = rule;
3850         }
3851         if (!_rootRule)
3852             _lexer.warningAtToken(grammar.head, "missing root rule, all rule symbols are referenced (cyclic grammar)");
3853         return _rootRule;
3854     }
3855 
3856     /** Retag all referenced rules.
3857      */
3858     void retagReferencedRules()
3859     {
3860         untagReferencedRules();
3861         tagReferencedRules();
3862     }
3863 
3864     /** Untag all rules (as unreferenced).
3865      */
3866     void untagReferencedRules() nothrow
3867     {
3868         foreach (Rule rule; rulesByName.byValue)
3869             rule.hasRef = false;
3870     }
3871 
3872     /** Tag all referenced rules in either `this` or imported modules.
3873      */
3874     void tagReferencedRules()
3875     {
3876         foreach (const symbolRef; symbolRefs)
3877         {
3878             if (symbolRef.head.input == "EOF")
3879                 continue;
3880             if (tryTagReferencedRule(symbolRef))
3881                 continue;
3882             foreach (const Import import_; imports)
3883                 foreach (const Input moduleName; import_.moduleNames)
3884                 {
3885                     const string path = _lexer.path;
3886                     string cwd = path.dirName; // current working directory
3887                     const string ext = path.extension;
3888                     typeof(this) sub = findModuleUpwards(cwd, moduleName, ext, cachedParsersByModuleName);
3889                     //debug writeln("sub: ", sub);
3890                     if (sub.tryTagReferencedRule(symbolRef))
3891                         goto done;
3892                 }
3893             _lexer.warningAtToken(symbolRef.head, "no symbol named `" ~ symbolRef.head.input ~ "`");
3894         done:
3895         }
3896     }
3897 
3898     bool tryTagReferencedRule(const SymbolRef symbolRef) nothrow @nogc
3899     {
3900         debug writeln("path: ", _lexer.path,
3901                       " symbolRef.head=", symbolRef.head,
3902                       " rulesByName.length=", rulesByName.length,
3903                       " rules=", rules.length);
3904         assert(rulesByName.length);
3905         if (auto hit = symbolRef.head.input in rulesByName)
3906         {
3907             hit.hasRef = true;
3908             return true;
3909         }
3910         return false;
3911     }
3912 
3913     void toMatchers(scope ref Output output)
3914     {
3915         RuleNames doneRuleNames;
3916         toMatchersForRules(doneRuleNames, output);
3917         toMatchersForImports(doneRuleNames, output);
3918         toMatchersForOptionsTokenVocab(doneRuleNames, output);
3919     }
3920 
3921     void toMatchersForImportedModule(in const(char)[] moduleName,
3922                                      scope ref RuleNames doneRuleNames,
3923                                      scope ref Output output) scope
3924     {
3925         const string path = _lexer.path;
3926         string cwd = path.dirName; // current working directory
3927         const string ext = path.extension;
3928 
3929         GxFileParser fp_ = findModuleUpwards(cwd, moduleName, ext, cachedParsersByModuleName);
3930 
3931         while (!fp_.empty)
3932             fp_.popFront();
3933 
3934         fp_.toMatchersForImports(doneRuleNames, output); // transitive imports
3935 
3936         /** Rules in the “main grammar” override rules from imported
3937             grammars to implement inheritance.
3938             See_Also: https://github.com/antlr/antlr4/blob/master/doc/grammars.md#grammar-imports
3939         */
3940         bool isOverridden(const scope Rule rule) const @safe pure nothrow @nogc
3941         {
3942             return doneRuleNames[].canFind(rule.head.input);
3943         }
3944 
3945         foreach (const Rule importedRule; fp_.rules)
3946         {
3947             if (isOverridden(importedRule)) // if `importedRule` has already been defined
3948             {
3949                 fp_._lexer.warningAtToken(importedRule.head, "ignoring rule overridden in top grammar");
3950                 continue;
3951             }
3952             importedRule.toMatcherInSource(output, this);
3953             doneRuleNames.put(importedRule.head.input);
3954         }
3955     }
3956 
3957     private static GxFileParser findModuleUpwards(const string cwd,
3958                                                   scope const(char)[] moduleName,
3959                                                   scope const string ext,
3960                                                   GxFileParser[string] cachedParsersByModuleName)
3961     {
3962         if (auto existingParser = moduleName in cachedParsersByModuleName)
3963         {
3964             debug writeln("reusing existing parser for module named ", moduleName.idup);
3965             return *existingParser;
3966         }
3967         import std.file : FileException;
3968         string modulePath;
3969         () @trusted {
3970             /* TODO: avoid cast somehow
3971                https://forum.dlang.org/post/ypnssfszqfuhjremnsqo@forum.dlang.org
3972             */
3973             modulePath = cast(string)chainPath(cwd, moduleName ~ ext).array;
3974         } ();
3975         try
3976             return cachedParsersByModuleName[moduleName.to!string] = new GxFileParser(modulePath, cachedParsersByModuleName);
3977         catch (Exception e)
3978         {
3979             const cwdNext = cwd.dirName;
3980             if (cwdNext == cwd) // stuck at top directory
3981                 throw new FileException("couldn't find module named " ~ moduleName); // TODO: add source of import statement
3982             return findModuleUpwards(cwdNext, moduleName, ext, cachedParsersByModuleName);
3983         }
3984     }
3985 
3986     void toMatchersForRules(scope ref RuleNames doneRuleNames, scope ref Output output) const scope
3987     {
3988         foreach (const Rule rule; rules)
3989         {
3990             // rule.show();
3991             rule.toMatcherInSource(output, this);
3992             doneRuleNames.put(rule.head.input);
3993         }
3994     }
3995 
3996     void toMatchersForImports(scope ref RuleNames doneRuleNames, scope ref Output output) scope
3997     {
3998         foreach (const import_; imports)
3999             foreach (const moduleName; import_.moduleNames)
4000                 toMatchersForImportedModule(moduleName, doneRuleNames, output);
4001     }
4002 
4003     void toMatchersForOptionsTokenVocab(scope ref RuleNames doneRuleNames, scope ref Output output) scope
4004     {
4005         foreach (const options; optionsSet[])
4006         {
4007             const(char)[] co = options.code.input;
4008 
4009             void skipWhitespace()
4010             {
4011                 import std.algorithm.comparison : among;
4012                 size_t i;
4013                 while (co.length &&
4014                        co[i].among!(GxLexer.whiteChars))
4015                     i += 1;
4016                 co = co[i .. $];
4017             }
4018 
4019             co.skipOverAround('{', '}');
4020 
4021             skipWhitespace();
4022 
4023             // See_Also: https://stackoverflow.com/questions/28829049/antlr4-any-difference-between-import-and-tokenvocab
4024             if (co.skipOver("tokenVocab"))
4025             {
4026                 skipWhitespace();
4027                 co.skipOver('=');
4028                 skipWhitespace();
4029                 if (const ix = co.indexOfEither(" ;"))
4030                 {
4031                     const module_ = co[0 .. ix];
4032                     toMatchersForImportedModule(module_, doneRuleNames, output);
4033                 }
4034             }
4035         }
4036     }
4037     GxFileParser[string] cachedParsersByModuleName;
4038 }
4039 
4040 static immutable parserSourceBegin =
4041 `alias Input = const(char)[];
4042 
4043 @safe struct Match
4044 {
4045 @safe pure nothrow @nogc:
4046     static Match zero()
4047     {
4048         return typeof(return)(0);
4049     }
4050     static Match none()
4051     {
4052         return typeof(return)(_length.max);
4053     }
4054     /// Match length in number of UTF-8 chars or 0 if empty.
4055     @property uint length()
4056     {
4057         return _length;
4058     }
4059     bool opCast(U : bool)() const
4060     {
4061         return _length != _length.max;
4062     }
4063     this(size_t length)
4064     {
4065         assert(length <= _length.max);
4066         this._length = cast(typeof(_length))length;
4067     }
4068     const uint _length;                // length == uint.max is no match
4069 }
4070 
4071 /// https://forum.dlang.org/post/zcvjwdetohmklaxriswk@forum.dlang.org
4072 version(none) alias Matcher = Match function(lazy Matcher[] matchers...);
4073 
4074 @safe struct Parser
4075 {
4076     Input inp;                  ///< Input.
4077     size_t off;                 ///< Current offset into inp.
4078 
4079     Match EOF() pure nothrow @nogc
4080     {
4081         pragma(inline, true);
4082         if (inp[off] == '\r' &&
4083             inp[off + 1] == '\n') // Windows
4084         {
4085             off += 2;
4086             return Match(2);
4087         }
4088         if (inp[off] == '\n' || // Unix/Linux
4089             inp[off] == '\r')   // Mac?
4090         {
4091             off += 1;
4092             return Match(1);
4093         }
4094         return Match.none();
4095     }
4096 
4097     Match any() pure nothrow @nogc
4098     {
4099         version(LDC) pragma(inline, true);
4100         if (off == inp.length)  // TODO:
4101             return Match.none();
4102         off += 1;
4103         return Match(1);
4104     }
4105 
4106     Match ch(in char x) pure nothrow @nogc
4107     {
4108         version(LDC) pragma(inline, true);
4109         if (off == inp.length)  // TODO:
4110             return Match.none();
4111         if (inp[off] == x)
4112         {
4113             off += 1;
4114             return Match(1);
4115         }
4116         return Match.none();
4117     }
4118 
4119     Match dch(const dchar x) pure nothrow @nogc
4120     {
4121         import std.typecons : Yes;
4122         import std.utf : encode;
4123         char[4] ch4;
4124         const replacementChar = cast(dchar)0x110000;
4125         const n = encode!(Yes.useReplacementDchar)(ch4, replacementChar);
4126         if (ch4[0 .. n] == [239, 191, 189]) // encoding of replacementChar
4127             return Match.none();
4128         if (off + n > inp.length) // TODO:
4129             return Match.none();
4130         if (inp[off .. off + n] == ch4[0 .. n])
4131         {
4132             off += n;
4133             return Match(n);
4134         }
4135         return Match.none();
4136     }
4137 
4138     Match cc(string cclass)() pure nothrow @nogc
4139     {
4140         pragma(inline, true);
4141         off += 1;               // TODO: switch on cclass
4142         if (off > inp.length)   // TODO:
4143             return Match.none();
4144         return Match(1);
4145     }
4146 
4147     /// Match string x.
4148     Match str(const scope string x) pure nothrow @nogc
4149     {
4150         pragma(inline, true);
4151         if (off + x.length <= inp.length && // TODO: optimize by using null-sentinel
4152             inp[off .. off + x.length] == x) // inp[off .. $].startsWith(x)
4153         {
4154             off += x.length;
4155             return Match(x.length);
4156         }
4157         return Match.none();
4158     }
4159 
4160     Match seq(Matchers...)(const scope lazy Matchers matchers)
4161     {
4162         const off0 = off;
4163         static foreach (const matcher; matchers)
4164         {{                      // scoped
4165             const match = matcher();
4166             if (!match)
4167             {
4168                 off = off0;     // backtrack
4169                 return match;   // propagate failure
4170             }
4171         }}
4172         return Match(off - off0);
4173     }
4174 
4175     Match alt(Matchers...)(const scope lazy Matchers matchers)
4176     {
4177         static foreach (const matcher; matchers)
4178         {{                      // scoped
4179             const off0 = off;
4180             if (const match = matcher())
4181                 return match;
4182             else
4183                 off = off0;     // backtrack
4184         }}
4185         return Match.none();
4186     }
4187 
4188     Match not(Matcher)(const scope lazy Matcher matcher)
4189     {
4190         const off0 = off;
4191         const match = matcher();
4192         if (!match)
4193             return match;
4194         off = off0;             // backtrack
4195         return Match.none();
4196     }
4197 
4198     Match alt2(char a, char b)() pure nothrow @nogc
4199     {
4200         pragma(inline, true);
4201         const x = inp[off];
4202         if (x == a ||
4203             x == b)
4204         {
4205             off += 1;
4206             return Match(1);
4207         }
4208         return Match.none();
4209     }
4210 
4211     Match alt3(char a, char b, char c)() pure nothrow @nogc
4212     {
4213         pragma(inline, true);
4214         const x = inp[off];
4215         if (x == a ||
4216             x == b ||
4217             x == c)
4218         {
4219             off += 1;
4220             return Match(1);
4221         }
4222         return Match.none();
4223     }
4224 
4225     Match alt4(char a, char b, char c, char d)() pure nothrow @nogc
4226     {
4227         pragma(inline, true);
4228         const x = inp[off];
4229         if (x == a ||
4230             x == b ||
4231             x == c ||
4232             x == d)
4233         {
4234             off += 1;
4235             return Match(1);
4236         }
4237         return Match.none();
4238     }
4239 
4240     Match alt5(char a, char b, char c, char d, char e)() pure nothrow @nogc
4241     {
4242         pragma(inline, true);
4243         const x = inp[off];
4244         if (x == a ||
4245             x == b ||
4246             x == c ||
4247             x == d ||
4248             x == e)
4249         {
4250             off += 1;
4251             return Match(1);
4252         }
4253         return Match.none();
4254     }
4255 
4256     Match altN(chars...)() pure nothrow @nogc // TODO: non-char type in chars
4257     {
4258         pragma(inline, true);
4259         import std.algorithm.comparison : among; // TODO: replace with switch over static foreach to speed up compilation
4260         const x = inp[off];
4261         if (x.among!(chars))
4262         {
4263             off += 1; // TODO: skip over number of chars needed to encode hit
4264             return Match(1);
4265         }
4266         return Match.none();
4267     }
4268 
4269     Match rng(in char lower, in char upper) pure nothrow @nogc
4270     {
4271         pragma(inline, true);
4272         const x = inp[off];
4273         if (lower <= x &&
4274             x <= upper)
4275         {
4276             off += 1;
4277             return Match(1);
4278         }
4279         return Match.none();
4280     }
4281 
4282     Match rng(in dchar lower, in dchar upper) pure nothrow @nogc
4283     {
4284         pragma(inline, true);
4285         // TODO: decode dchar at inp[off]
4286         const x = inp[off];
4287         if (lower <= x &&
4288             x <= upper)
4289         {
4290             off += 1; // TODO: handle dchar at inp[off]
4291             return Match(1);
4292         }
4293         return Match.none();
4294     }
4295 
4296     Match gzm(Matcher)(const scope lazy Matcher matcher)
4297     {
4298         const off0 = off;
4299         while (true)
4300         {
4301             const off1 = off;
4302             const match = matcher();
4303             if (!match)
4304             {
4305                 off = off1;     // backtrack
4306                 break;
4307             }
4308         }
4309         return Match(off - off0);
4310     }
4311 
4312     Match gzo(Matcher)(const scope lazy Matcher matcher)
4313     {
4314         const off0 = off;
4315         const match = matcher();
4316         if (!match)
4317         {
4318             off = off0;         // backtrack
4319             return Match.none();
4320         }
4321         return Match(off - off0);
4322     }
4323 
4324     Match gom(Matcher)(const scope lazy Matcher matcher)
4325     {
4326         const off0 = off;
4327         const match0 = matcher;
4328         if (!match0)
4329         {
4330             off = off0;         // backtrack
4331             return Match.none();
4332         }
4333         while (true)
4334         {
4335             const off1 = off;
4336             const match1 = matcher;
4337             if (!match1)
4338             {
4339                 off = off1;     // backtrack
4340                 break;
4341             }
4342         }
4343         return Match(off - off0);
4344     }
4345 
4346     // TODO merge overloads of nzo by using a default type and value for Matcher2
4347     Match nzo(Matcher1)(const scope lazy Matcher1 matcher)
4348     {
4349         const off0 = off;
4350         off = off0;             // backtrack
4351         const match = matcher();
4352         if (!match)
4353         {
4354             off = off0;         // backtrack
4355             return Match.none();
4356         }
4357         return Match(off - off0);
4358     }
4359     Match nzo(Matcher1, Matcher2)(const scope lazy Matcher1 matcher, const scope lazy Matcher2 terminator)
4360     {
4361         const off0 = off;
4362         if (terminator())
4363         {
4364             off = off0;         // backtrack
4365             return Match.zero(); // done
4366         }
4367         off = off0;             // backtrack
4368         const match = matcher();
4369         if (!match)
4370         {
4371             off = off0;         // backtrack
4372             return Match.none();
4373         }
4374         return Match(off - off0);
4375     }
4376 
4377     // TODO merge overloads of nzm by using a default type and value for Matcher2
4378     Match nzm(Matcher1)(const scope lazy Matcher1 matcher)
4379     {
4380         const off0 = off;
4381         while (true)
4382         {
4383             const off1 = off;
4384             off = off1;         // backtrack
4385             const off2 = off;
4386             const match = matcher();
4387             if (!match)
4388             {
4389                 off = off2;     // backtrack
4390                 break;
4391             }
4392         }
4393         return Match(off - off0);
4394     }
4395     Match nzm(Matcher1, Matcher2)(const scope lazy Matcher1 matcher, const scope lazy Matcher2 terminator)
4396     {
4397         const off0 = off;
4398         while (true)
4399         {
4400             const off1 = off;
4401             if (terminator())
4402             {
4403                 off = off1;     // backtrack
4404                 return Match(off1 - off0); // done
4405             }
4406             off = off1;         // backtrack
4407             const off2 = off;
4408             const match = matcher();
4409             if (!match)
4410             {
4411                 off = off2;     // backtrack
4412                 break;
4413             }
4414         }
4415         return Match(off - off0);
4416     }
4417 
4418     // TODO merge overloads of nom by using a default type and value for Matcher2
4419     Match nom(Matcher1)(const scope lazy Matcher1 matcher)
4420     {
4421         const off0 = off;
4422         bool firstFlag;
4423         while (true)
4424         {
4425             const off1 = off;
4426             off = off1;         // backtrack
4427             const off2 = off;
4428             const match = matcher();
4429             if (!match)
4430             {
4431                 off = off2;     // backtrack
4432                 break;
4433             }
4434             firstFlag = true;
4435         }
4436         if (!firstFlag)
4437         {
4438             off = off0;         // backtrack
4439             return Match.none();
4440         }
4441         return Match(off - off0);
4442     }
4443     Match nom(Matcher1, Matcher2)(const scope lazy Matcher1 matcher, const scope lazy Matcher2 terminator)
4444     {
4445         const off0 = off;
4446         bool firstFlag;
4447         while (true)
4448         {
4449             const off1 = off;
4450             if (terminator())
4451             {
4452                 off = off1;     // backtrack
4453                 return Match(off1 - off0); // done
4454             }
4455             off = off1;         // backtrack
4456             const off2 = off;
4457             const match = matcher();
4458             if (!match)
4459             {
4460                 off = off2;     // backtrack
4461                 break;
4462             }
4463             firstFlag = true;
4464         }
4465         if (!firstFlag)
4466         {
4467             off = off0;         // backtrack
4468             return Match.none();
4469         }
4470         return Match(off - off0);
4471     }
4472 
4473     Match syn(Matcher)(const scope lazy Matcher matcher)
4474     {
4475         return Match.zero(); // pass, backtracking is performed by default
4476     }
4477 
4478 `;
4479 
4480 static immutable parserSourceEnd =
4481 `} // struct Parser
4482 `;
4483 
4484 @safe struct GxFileReader
4485 {
4486     GxFileParser fp;
4487     this(string path, GxFileParser[string] cachedParsersByModuleName)
4488     {
4489         fp = new GxFileParser(path, cachedParsersByModuleName);
4490         while (!fp.empty)
4491             fp.popFront();
4492     }
4493 
4494     string createParserSourceFilePath(out string moduleName)
4495     {
4496         Output pss;
4497         fp.generateParserSourceString(pss, moduleName);
4498         import std.file : write;
4499         const path = fp._lexer.path;
4500         const ppath = chainPath(tempDir(), path.baseName.stripExtension).array ~ "_parser.d";
4501         write(ppath, pss[]);
4502         debug writeln("Wrote ", ppath);
4503         return ppath.to!(typeof(return));
4504     }
4505 
4506     ~this() @nogc {}
4507 }
4508 
4509 @safe struct SourceFile
4510 {
4511     this(string name, scope const(char)[] stdioOpenmode = "rb") @safe
4512     {
4513         _file = File(name, stdioOpenmode);
4514     }
4515     private File _file;
4516     alias _file this;
4517 }
4518 
4519 @safe struct ObjectFile
4520 {
4521     this(string name, scope const(char)[] stdioOpenmode = "rb") @safe
4522     {
4523         _file = File(name, stdioOpenmode);
4524     }
4525     private File _file;
4526     alias _file this;
4527 }
4528 
4529 @safe struct ExecutableFile
4530 {
4531     this(string name, scope const(char)[] stdioOpenmode = "rb") @safe
4532     {
4533         _file = File(name, stdioOpenmode);
4534     }
4535     File _file;
4536     alias _file this;
4537 }
4538 
4539 static immutable mainName = `main`;
4540 
4541 static immutable mainSource =
4542 `int ` ~ mainName ~ `(string[] args)
4543 {
4544 	return 0;
4545 }
4546 `;
4547 
4548 SourceFile createMainFile(string path,
4549                           const string[] parserPaths,
4550                           const string[] parserModules)
4551 {
4552     assert(parserPaths.length == parserModules.length);
4553     auto file = typeof(return)(path, "w");
4554     foreach (const index, const ppath; parserPaths)
4555     {
4556         file.write(`import `);
4557         file.write(parserModules[index]);
4558         file.write(";\n");
4559     }
4560 
4561     file.write("\n");           // separator
4562     file.write(mainSource);
4563     file.close();
4564     return file;
4565 }
4566 
4567 /// Build the D source files `parserPaths`.
4568 string buildSourceFiles(const string[] parserPaths,
4569                         const string[] parserModules,
4570                         in bool linkFlag = false)
4571 {
4572     import std.process : execute;
4573 
4574     const mainFilePath = buildPath(tempDir(), "gxmain.d");
4575     const string mainDirPath = dirName(mainFilePath);
4576 
4577     File mainFile = createMainFile(mainFilePath, parserPaths, parserModules);
4578 
4579     const parserName = "parser";
4580     const outFile = parserName ~ (linkFlag ? "" : ".o");
4581     const args = (["dmd"] ~
4582                   (linkFlag ? [] : ["-c"]) ~
4583                   ["-dip25", "-dip1000", "-vcolumns", "-wi"] ~
4584                   parserPaths ~
4585                   (linkFlag ? [mainFilePath] : []) ~
4586                   ("-of=" ~ outFile));
4587     writeln("args:", args);
4588     const dmd = execute(args);
4589     if (dmd.status == 0)
4590         writeln("Compilation of ", parserPaths, " successful");
4591     else
4592         writeln("Compilation of ", parserPaths, " failed with output:\n",
4593                 dmd.output);
4594     return outFile;
4595 }
4596 
4597 private bool isGxFilename(const scope char[] name) @safe pure nothrow @nogc
4598 {
4599     return name.endsWith(`.g4`);
4600 }
4601 
4602 private bool isGxFilenameParsed(const scope char[] name) @safe pure nothrow @nogc
4603 {
4604     if (!isGxFilename(name))
4605          return false;
4606     // Pick specific file:
4607     // if (name != `Arithmetic.g4`)
4608     //     return false;
4609     if (// TODO:
4610         name == `Python2.g4` ||
4611         name == `Python3.g4` ||
4612         name == `AltPython3.g4` ||
4613         name == `PythonParser.g4` ||
4614         // TODO:
4615         name == `ResourcePlanParser.g4` ||
4616         name == `SelectClauseParser.g4` ||
4617         name == `IdentifiersParser.g4` ||
4618         // TODO:
4619         name == `AspectJParser.g4` || // TODO: find rule for `annotationName` in apex.g4
4620         name == `AspectJLexer.g4` ||
4621         // TODO: missing tokens
4622         name == `FromClauseParser.g4` ||
4623         name == `TSqlParser.g4` ||
4624         name == `informix.g4` ||
4625         name == `icon.g4` ||
4626         name == `ANTLRv4Parser.g4` ||
4627         name == `JPA.g4` || // INT_NUMERAL missing
4628         name == `STParser.g4` ||
4629         name == `STGParser.g4` ||
4630         // TODO:
4631         name == `RexxParser.g4` ||
4632         name == `RexxLexer.g4` ||
4633         name == `StackTrace.g4` ||
4634         name == `memcached_protocol.g4`) // skip this crap
4635         return false;
4636     return true;
4637 }
4638 
4639 import std.datetime.stopwatch : StopWatch;
4640 import std.file : dirEntries, SpanMode, getcwd;
4641 
4642 enum showProgressFlag = true;
4643 
4644 void lexAllInDirTree(scope ref BuildCtx bcx) @system
4645 {
4646     scope StopWatch swAll;
4647     swAll.start();
4648     foreach (const e; dirEntries(bcx.rootDirPath, SpanMode.breadth))
4649     {
4650         const fn = e.name;
4651         if (fn.isGxFilename)
4652         {
4653             static if (showProgressFlag)
4654                 bcx.outFile.writeln("Lexing ", tryRelativePath(bcx.rootDirPath, fn), " ...");  // TODO: read use curren directory
4655             const data = cast(Input)rawReadPath(fn); // exclude from benchmark
4656             scope StopWatch swOne;
4657             swOne.start();
4658             auto lexer = GxLexer(data, fn, false);
4659             while (!lexer.empty)
4660                 lexer.popFront();
4661             static if (showProgressFlag)
4662                 bcx.outFile.writeln("Lexing ", tryRelativePath(bcx.rootDirPath, fn), " took ", swOne.peek());
4663         }
4664     }
4665     bcx.outFile.writeln("Lexing all took ", swAll.peek());
4666 }
4667 
4668 /// Build Context
4669 @safe struct BuildCtx
4670 {
4671     string rootDirPath;
4672     File outFile;
4673     bool buildSingleFlag;       ///< Build each parser separately in a separate compilation.
4674     bool buildAllFlag;          ///< Build all parsers together in a common single compilation.
4675     bool lexerFlag;             ///< Flag for separate lexing pass.
4676     bool parserFlag;            ///< Flag for separate parsing pass.
4677     private GxFileParser[string] cachedParsersByModuleName; ///< Parser cache.
4678 }
4679 
4680 void parseAllInDirTree(scope ref BuildCtx bcx) @system
4681 {
4682     scope StopWatch swAll;
4683     swAll.start();
4684     DynamicArray!string parserPaths; ///< Paths to generated parsers in D.
4685     DynamicArray!string parserModules;
4686     foreach (const e; dirEntries(bcx.rootDirPath, SpanMode.breadth))
4687     {
4688         const fn = e.name;
4689         const dn = fn.dirName;
4690         const bn = fn.baseName;
4691         if (bn.isGxFilenameParsed)
4692         {
4693             const exDirPath = buildPath(dn, "examples"); // examples directory
4694             import std.file : exists, isDir;
4695             const showParseExample = false;
4696             if (showParseExample &&
4697                 exDirPath.exists &&
4698                 exDirPath.isDir)
4699                 foreach (const exf; dirEntries(exDirPath, SpanMode.breadth))
4700                     bcx.outFile.writeln("TODO: Parse example file: ", exf);
4701             static if (showProgressFlag)
4702                 bcx.outFile.writeln("Reading ", tryRelativePath(bcx.rootDirPath, fn), " ...");
4703 
4704             scope StopWatch swOne;
4705             swOne.start();
4706 
4707             auto reader = GxFileReader(fn, bcx.cachedParsersByModuleName);
4708             string parserModule;
4709             const parserPath = reader.createParserSourceFilePath(parserModule);
4710             if (parserPaths[].canFind(parserPath)) // TODO: remove because this should not happen
4711                 bcx.outFile.writeln("Warning: duplicate entry outFile ", parserPath);
4712             else
4713             {
4714                 parserPaths.insertBack(parserPath);
4715                 parserModules.insertBack(parserModule);
4716             }
4717 
4718             if (bcx.buildSingleFlag)
4719                 const parseExePath = buildSourceFiles([parserPath], [parserModule], true);
4720 
4721             static if (showProgressFlag)
4722                 bcx.outFile.writeln("Reading ", tryRelativePath(bcx.rootDirPath, fn), " took ", swOne.peek());
4723         }
4724     }
4725     if (bcx.buildAllFlag)
4726         const parseExePath = buildSourceFiles(parserPaths[], parserModules[]);
4727     bcx.outFile.writeln("Reading all took ", swAll.peek());
4728 }
4729 
4730 void doTree(scope ref BuildCtx bcx) @system
4731 {
4732     if (bcx.lexerFlag)
4733         lexAllInDirTree(bcx);
4734     if (bcx.parserFlag)
4735         parseAllInDirTree(bcx);
4736 }
4737 
4738 string tryRelativePath(scope string rootDirPath,
4739                        const return scope string path) @safe
4740 {
4741     const cwd = getcwd();
4742     if (rootDirPath.startsWith(cwd))
4743         return path.relativePath(cwd);
4744     return path;
4745 }
4746 
4747 ///
4748 version(show)
4749 @system unittest
4750 {
4751     BuildCtx bcx = {
4752         rootDirPath : "~/Work/grammars-v4/".expandTilde,
4753         outFile : stdout,
4754         buildSingleFlag : true,
4755         buildAllFlag : true,
4756         lexerFlag : false,
4757         parserFlag : true,
4758     };
4759     doTree(bcx);
4760 }