1 /**
2    File Scanning Engine.
3 
4    Make rich use of Sparse Distributed Representations (SDR) using Hash Digests
5    for relating Data and its Relations/Properties/Meta-Data.
6 
7    See_Also: http://stackoverflow.com/questions/12629749/how-does-grep-run-so-fast
8    See_Also: http:www.regular-expressions.info/powergrep.html
9    See_Also: http://ridiculousfish.com/blog/posts/old-age-and-treachery.html
10    See_Also: http://www.olark.com/spw/2011/08/you-can-list-a-directory-with-8-million-files-but-not-with-ls/
11 
12    TODO: Make use parallelism_ex: pmap
13 
14    TODO: Call filterUnderAnyOfPaths using std.algorithm.filter directly on AAs. Use byPair or use AA.get(key, defaultValue)
15          See_Also: http://forum.dlang.org/thread/mailman.75.1392335793.6445.digitalmars-d-learn@puremagic.com
16          See_Also: https://github.com/D-Programming-Language/druntime/pull/574
17 
18    TODO: Count logical lines.
19    TODO: Lexers should be loosely coupled to FKinds instead of Files
20    TODO: Generic Token[] and specific CToken[], CxxToken[]
21 
22    TODO: Don't scan for duplicates inside vc-dirs by default
23 
24    TODO: Assert that files along duplicates path don't include symlinks
25 
26    TODO: Implement FOp.deduplicate
27    TODO: Prevent rescans of duplicates
28 
29    TODO: Defined generalized_specialized_two_way_relationship(kindD, kindDi)
30 
31    TODO: Visualize hits using existingFileHitContext.asH!1 followed by a table:
32          ROW_NR | hit string in <code lang=LANG></code>
33 
34    TODO: Parse and Sort GCC/Clang Compiler Messages on WARN_TYPE FILE:LINE:COL:MSG[WARN_TYPE] and use Collapsable HTML Widgets:
35          http://api.jquerymobile.com/collapsible/
36          when presenting them
37 
38    TODO: Maybe make use of https://github.com/Abscissa/scriptlike
39 
40    TODO: Calculate Tree grams and bist
41 
42    TODO: Get stats of the link itself not the target in SymLink constructors
43 
44    TODO: RegFile with FileContent.text should be decodable to Unicode using
45    either iso-latin1, utf-8, etc. Check std.uni for how to try and decode stuff.
46 
47    TODO: Search for subwords.
48    For example gtk_widget should also match widget_gtk and GtkWidget etc.
49 
50    TODO: Support multi-line keys
51 
52    TODO: Use hash-lookup in txtFKinds.byExt for faster guessing of source file
53    kind. Merge it with binary kind lookup. And check FileContent member of
54    kind to instead determine if it should be scanned or not.
55    Sub-Task: Case-Insensitive Matching of extensions if
56    nothing else passes.
57 
58    TODO: Detect symlinks with duplicate targets and only follow one of them and
59    group them together in visualization
60 
61    TODO: Add addTag, removeTag, etc and interface to fs.d for setting tags:
62    --add-tag=comedy, remove-tag=comedy
63 
64    TODO: If files ends with ~ or .backup assume its a backup file, strip it from
65    end match it again and set backupFlag in FileKind
66 
67    TODO: Acronym match can make use of normal histogram counts. Check denseness
68    of binary histogram (bist) to determine if we should use a sparse or dense
69    histogram.
70 
71    TODO: Activate and test support for ELF and Cxx11 subkinds
72 
73    TODO: Call either File.checkObseleted upon inotify. checkObseleted should remove stuff from hash tables
74    TODO: Integrate logic in clearCStat to RegFile.makeObselete
75    TODO: Upon Dir inotify call invalidate _depth, etc.
76 
77    TODO: Following command: fs.d --color -d ~/ware/emacs -s lispy  -k
78    shows "Skipped PNG file (png) at first extension try".
79    Assure that this logic reuses cache and instead prints something like "Skipped PNG file using cached FKind".
80 
81    TODO: Cache each Dir separately to a file named after SHA1 of its path
82 
83    TODO: Add ASCII kind: Requires optional stream analyzer member of FKind in
84    replacement for magicData. ASCIIFile
85 
86    TODO: Defined NotAnyKind(binaryKinds) and cache it
87 
88    TODO: Create PkZipFile() in Dir.load() when FKind "pkZip Archive" is found.
89    Use std.zip.ZipArchive(void[] from mmfile)
90 
91    TODO: Scan Subversion Dirs with http://pastebin.com/6ZzPvpBj
92 
93    TODO: Change order (binHit || allBHist8Miss) and benchmark
94 
95    TODO: Display modification/access times as:
96    See: http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com
97 
98    TODO: Use User Defined Attributes (UDA): http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com
99    TODO: Use msgPack @nonPacked when needed
100 
101    TODO: Limit lines to terminal width
102 
103    TODO: Create array of (OFFSET, LENGTH) and this in FKind Pattern factory
104    function.  Then for source file extra slice at (OFFSET, LENGTH) and use as
105    input into hash-table from magic (if its a Lit-pattern to)
106 
107    TODO: Verify that "f.tar.z" gets tuple extensions tuple("tar", "z")
108    TODO: Verify that "libc.so.1.2.3" gets tuple extensions tuple("so", "1", "2", "3") and "so" extensions should the be tried
109    TODO: Cache Symbols larger than three characters in a global hash from symbol to path
110 
111    TODO: Benchmark horspool.d and perhaps use instead of std.find
112 
113    TODO: Splitting into keys should not split arguments such as "a b"
114 
115    TODO: Perhaps use http://www.chartjs.org/ to visualize stuff
116 
117    TODO: Make use of @nonPacked in version(msgpack).
118 */
119 module nxt.fs;
120 
121 version = msgpack; // Use msgpack serialization
122 /* version = cerealed; // Use cerealed serialization */
123 
124 import std.stdio: ioFile = File, stdout;
125 import std.typecons: Tuple, tuple;
126 import std.algorithm: find, map, filter, reduce, max, min, uniq, all, joiner;
127 import std.string: representation, chompPrefix;
128 import std.stdio: write, writeln, writefln;
129 import std.path: baseName, dirName, isAbsolute, dirSeparator, extension, buildNormalizedPath, expandTilde, absolutePath;
130 import std.datetime;
131 import std.file: FileException;
132 import std.digest.sha: sha1Of, toHexString;
133 import std.range: repeat, array, empty, cycle, chain;
134 import std.stdint: uint64_t;
135 import std.traits: Unqual, isInstanceOf, isIterable;
136 import std.experimental.allocator;
137 import std.functional: memoize;
138 import std.complex: Complex;
139 
140 import nxt.predicates: isUntouched;
141 
142 import core.memory: GC;
143 import core.exception;
144 
145 import nxt.traits_ex;
146 import nxt.getopt_ex;
147 import nxt.digest_ex;
148 import nxt.algorithm_ex;
149 import nxt.codec;
150 import nxt.csunits;
151 alias Bytes64 = Bytes!ulong;
152 import arsd.terminal : Color;
153 import nxt.symbolic;
154 import nxt.static_bitarray;
155 import nxt.dbgio;
156 import nxt.tempfs;
157 import nxt.rational: Rational;
158 import nxt.ngram;
159 import nxt.notnull;
160 import nxt.pretty;
161 
162 import nxt.geometry;
163 import nxt.random_ex;
164 import nxt.mathml;
165 import nxt.mangling;
166 import nxt.lingua;
167 import nxt.attributes;
168 import nxt.find_ex;
169 
170 import nxt.elf;
171 import nxt.typedoc;
172 import lock_free.rwqueue;
173 
174 /* NGram Aliases */
175 /** Not very likely that we are interested in histograms 64-bit precision
176  * Bucket/Bin Counts so pick 32-bit for now. */
177 alias RequestedBinType = uint;
178 enum NGramOrder = 3;
179 alias Bist  = NGram!(ubyte, 1, ngram.Kind.binary, ngram.Storage.denseStatic, ngram.Symmetry.ordered, void, immutable(ubyte)[]);
180 alias XGram = NGram!(ubyte, NGramOrder, ngram.Kind.saturated, ngram.Storage.sparse, ngram.Symmetry.ordered, RequestedBinType, immutable(ubyte)[]);
181 
182 /* Need for signal handling */
183 import core.stdc.stdlib;
184 version(linux) import core.sys.posix.sys.stat;
185 version(linux) import core.sys.posix.signal;
186 //version(linux) import std.c.linux.linux;
187 
188 /* TODO: Set global state.
189    http://forum.dlang.org/thread/cu9fgg$28mr$1@digitaldaemon.com
190 */
191 /** Exception Describing Process Signal. */
192 
193 shared uint ctrlC = 0; // Number of times Ctrl-C has been presed
194 class SignalCaughtException : Exception
195 {
196     int signo = int.max;
197     this(int signo, string file = __FILE__, size_t line = __LINE__ ) @safe {
198         this.signo = signo;
199         import std.conv: to;
200         super(`Signal number ` ~ to!string(signo) ~ ` at ` ~ file ~ `:` ~ to!string(line));
201     }
202 }
203 
204 void signalHandler(int signo)
205 {
206     import core.atomic: atomicOp;
207     if (signo == 2)
208     {
209         core.atomic.atomicOp!`+=`(ctrlC, 1);
210     }
211     // throw new SignalCaughtException(signo);
212 }
213 
214 alias signalHandler_t = void function(int);
215 extern (C) signalHandler_t signal(int signal, signalHandler_t handler);
216 
217 version(msgpack)
218 {
219     import msgpack;
220 }
221 version(cerealed)
222 {
223     /* import cerealed.cerealiser; */
224     /* import cerealed.decerealiser; */
225     /* import cerealed.cereal; */
226 }
227 
228 /** File Content Type Code. */
229 enum FileContent
230 {
231     unknown,
232     binaryUnknown,
233     binary,
234     text,
235     textASCII,
236     text8Bit,
237     document,
238     spreadsheet,
239     database,
240     tagsDatabase,
241     image,
242     imageIcon,
243     audio,
244     sound = audio,
245     music = audio,
246 
247     modemData,
248     imageModemFax1BPP, // One bit per pixel
249     voiceModem,
250 
251     video,
252     movie,
253     media,
254     sourceCode,
255     scriptCode,
256     buildSystemCode,
257     byteCode,
258     machineCode,
259     versionControl,
260     numericalData,
261     archive,
262     compressed,
263     cache,
264     binaryCache,
265     firmware,
266     spellCheckWordList,
267     font,
268     performanceBenchmark,
269     fingerprint,
270 }
271 
272 /** How File Kinds are detected. */
273 enum FileKindDetection
274 {
275     equalsParentPathDirsAndName, // Parenting path file name must match
276     equalsName, // Only name must match
277     equalsNameAndContents, // Both name and contents must match
278     equalsNameOrContents, // Either name or contents must match
279     equalsContents, // Only contents must match
280     equalsWhatsGiven, // All information defined must match
281 }
282 
283 /** Key Scan (Search) Context. */
284 enum ScanContext
285 {
286     /* code, */
287     /* comment, */
288     /* string, */
289 
290     /* word, */
291     /* symbol, */
292 
293     dirName,     // Name of directory being scanned
294     dir = dirName,
295 
296     fileName,    // Name of file being scanned
297     name = fileName,
298 
299     regularFilename,    // Name of file being scanned
300     symlinkName, // Name of symbolic linke being scanned
301 
302     fileContent, // Contents of file being scanned
303     content = fileContent,
304 
305     /* modTime, */
306     /* accessTime, */
307     /* xattr, */
308     /* size, */
309 
310     all,
311     standard = all,
312 }
313 
314 enum DuplicatesContext
315 {
316     internal, // All duplicates must lie inside topDirs
317     external, // At least one duplicate lie inside
318     // topDirs. Others may lie outside
319 }
320 
321 /** File Operation Type Code. */
322 enum FOp
323 {
324     none,
325 
326     checkSyntax,                // Check syntax
327     lint = checkSyntax,         // Check syntax alias
328 
329     build, // Project-Wide Build
330     compile, // Compile
331     byteCompile, // Byte compile
332     run, // Run (Execute)
333     execute = run,
334 
335     preprocess, // Preprocess C/C++/Objective-C (using cpp)
336     cpp = preprocess,
337 
338     /* VCS Operations */
339     vcStatus,
340     vcs = vcStatus,
341 
342     deduplicate, // Deduplicate Files using hardlinks and Dirs using Symlink
343 }
344 
345 /** Directory Operation Type Code. */
346 enum DirOp
347 {
348     /* VCS Operations */
349     vcStatus,
350 }
351 
352 /** Shell Command.
353  */
354 alias ShCmd = string; // Just simply a string for now.
355 
356 /** Pair of Delimiters.
357     Used to desribe for example comment and string delimiter syntax.
358  */
359 struct Delim
360 {
361     this(string intro)
362     {
363         this.intro = intro;
364         this.finish = finish.init;
365     }
366     this(string intro, string finish)
367     {
368         this.intro = intro;
369         this.finish = finish;
370     }
371     string intro;
372     string finish; // Defaults to end of line if not defined.
373 }
374 
375 /* Comment Delimiters */
376 enum defaultCommentDelims = [Delim(`#`)];
377 enum cCommentDelims = [Delim(`/*`, `*/`),
378                        Delim(`//`)];
379 enum dCommentDelims = [Delim(`/+`, `+/`)] ~ cCommentDelims;
380 
381 /* String Delimiters */
382 enum defaultStringDelims = [Delim(`"`),
383                             Delim(`'`),
384                             Delim("`")];
385 enum pythonStringDelims = [Delim(`"""`),
386                            Delim(`"`),
387                            Delim(`'`),
388                            Delim("`")];
389 
390 /** File Kind.
391  */
392 class FKind
393 {
394     this(T, MagicData, RefPattern)(string kindName_,
395                                    T baseNaming_,
396                                    const string[] exts_,
397                                    MagicData magicData, size_t magicOffset = 0,
398                                    RefPattern refPattern_ = RefPattern.init,
399                                    const string[] keywords_ = [],
400 
401                                    Delim[] strings_ = [],
402 
403                                    Delim[] comments_ = [],
404 
405                                    FileContent content_ = FileContent.unknown,
406                                    FileKindDetection detection_ = FileKindDetection.equalsWhatsGiven,
407                                    Lang lang_ = Lang.unknown,
408 
409                                    FKind superKind = null,
410                                    FKind[] subKinds = [],
411                                    string description = null,
412                                    string wikip = null) @trusted pure
413     {
414         this.kindName = kindName_;
415 
416         // Basename
417         import std.traits: isArray;
418         import std.range: ElementType;
419         static if (is(T == string))
420         {
421             this.baseNaming = lit(baseNaming_);
422         }
423         else static if (isArrayOf!(T, string))
424         {
425             // TODO: Move to a factory function strs(x)
426             auto alt_ = alt();
427             foreach (ext; baseNaming_)  // add each string as an alternative
428             {
429                 alt_ ~= lit(ext);
430             }
431             this.baseNaming = alt_;
432         }
433         else static if (is(T == Patt))
434         {
435             this.baseNaming = baseNaming_;
436         }
437 
438         this.exts = exts_;
439 
440         import std.traits: isAssignable;
441         static      if (is(MagicData == ubyte[])) { this.magicData = lit(magicData) ; }
442         else static if (is(MagicData == string)) { this.magicData = lit(magicData.representation.dup); }
443         else static if (is(MagicData == void[])) { this.magicData = lit(cast(ubyte[])magicData); }
444         else static if (isAssignable!(Patt, MagicData)) { this.magicData = magicData; }
445         else static assert(0, `Cannot handle MagicData being type ` ~ MagicData.stringof);
446 
447         this.magicOffset = magicOffset;
448 
449         static      if (is(RefPattern == ubyte[])) { this.refPattern = refPattern_; }
450         else static if (is(RefPattern == string)) { this.refPattern = refPattern_.representation.dup; }
451         else static if (is(RefPattern == void[])) { this.refPattern = (cast(ubyte[])refPattern_).dup; }
452         else static assert(0, `Cannot handle RefPattern being type ` ~ RefPattern.stringof);
453 
454         this.keywords = keywords_;
455 
456         this.strings = strings_;
457         this.comments = comments_;
458 
459         this.content = content_;
460 
461         if ((content_ == FileContent.sourceCode ||
462              content_ == FileContent.scriptCode) &&
463             detection_ == FileKindDetection.equalsWhatsGiven)
464         {
465             // relax matching of sourcecode to only need name until we have complete parsers
466             this.detection = FileKindDetection.equalsName;
467         }
468         else
469         {
470             this.detection = detection_;
471         }
472         this.lang = lang_;
473 
474         this.superKind = superKind;
475         this.subKinds = subKinds;
476         this.description = description;
477         this.wikip = wikip.asURL;
478     }
479 
480     override string toString() const @property @trusted pure nothrow { return kindName; }
481 
482     /** Returns: Id Unique to matching behaviour of `this` FKind. If match
483         behaviour of `this` FKind changes returned id will change.
484         value is memoized.
485     */
486     auto ref const(SHA1Digest) behaviorId() @property @safe /* pure nothrow */
487         out(result) { assert(!result.empty); }
488     do
489     {
490         if (_behaviourDigest.empty) // if not yet defined
491         {
492             ubyte[] bytes;
493             const magicLit = cast(Lit)magicData;
494             if (magicLit)
495             {
496                 bytes = msgpack.pack(exts, magicLit.bytes, magicOffset, refPattern, keywords, content, detection);
497             }
498             else
499             {
500                 //dln(`warning: Handle magicData of type `, kindName);
501             }
502             _behaviourDigest = bytes.sha1Of;
503         }
504         return _behaviourDigest;
505     }
506 
507     string kindName;    // Kind Nick Name.
508     string description; // Kind Documenting Description.
509     AsURL!string wikip; // Wikipedia URL
510 
511     FKind superKind;    // Inherited pattern. For example ELF => ELF core file
512     FKind[] subKinds;   // Inherited pattern. For example ELF => ELF core file
513     Patt baseNaming;    // Pattern that matches typical file basenames of this Kind. May be null.
514 
515     string[] parentPathDirs; // example [`lib`, `firmware`] for `/lib/firmware` or `../lib/firmware`
516 
517     const string[] exts;      // Typical Extensions.
518     Patt magicData;     // Magic Data.
519     size_t magicOffset; // Magit Offset.
520     ubyte[] refPattern; // Reference pattern.
521     const FileContent content;
522     const FileKindDetection detection;
523     Lang lang; // Language if any
524 
525     // Volatile Statistics:
526     private SHA1Digest _behaviourDigest;
527     RegFile[] hitFiles;     // Files of this kind.
528 
529     const string[] keywords; // Keywords
530     string[] builtins; // Builtin Functions
531     Op[] opers; // Language Opers
532 
533     /* TODO: Move this to CompLang class */
534     Delim[] strings; // String syntax.
535     Delim[] comments; // Comment syntax.
536 
537     bool machineGenerated; // True if this is a machine generated file.
538 
539     Tuple!(FOp, ShCmd)[] operations; // Operation and Corresponding Shell Command
540 }
541 
542 /** Set of File Kinds with Internal Hashing. */
543 class FKinds
544 {
545     void opOpAssign(string op)(FKind kind) @safe /* pure */ if (op == `~`)
546     {
547         mixin(`this.byIndex ` ~ op ~ `= kind;`);
548         this.register(kind);
549     }
550     void opOpAssign(string op)(FKinds kinds) @safe /* pure */ if (op == `~`)
551     {
552         mixin(`this.byIndex ` ~ op ~ `= kinds.byIndex;`);
553         foreach (kind; kinds.byIndex)
554             this.register(kind);
555     }
556 
557     FKinds register(FKind kind) @safe /* pure */
558     {
559         this.byName[kind.kindName] = kind;
560         foreach (const ext; kind.exts)
561         {
562             this.byExt[ext] ~= kind;
563         }
564         this.byId[kind.behaviorId] = kind;
565         if (kind.magicOffset == 0 && // only if zero-offset for now
566             kind.magicData)
567         {
568             if (const magicLit = cast(Lit)kind.magicData)
569             {
570                 this.byMagic[magicLit.bytes][magicLit.bytes.length] ~= kind;
571                 _magicLengths ~= magicLit.bytes.length; // add it
572             }
573         }
574         return this;
575     }
576 
577     /** Rehash Internal AAs.
578         TODO: Change to @safe when https://github.com/D-Programming-Language/druntime/pull/942 has been merged
579         TODO: Change to nothrow when uniq becomes nothrow.
580     */
581     FKinds rehash() @trusted pure /* nothrow */
582     {
583         import std.algorithm: sort;
584         _magicLengths = _magicLengths.uniq.array; // remove duplicates
585         _magicLengths.sort();
586         this.byName.rehash;
587         this.byExt.rehash;
588         this.byMagic.rehash;
589         this.byId.rehash;
590         return this;
591     }
592 
593     FKind[] byIndex;
594 private:
595     /* TODO: These are "slaves" under byIndex and should not be modifiable outside
596      of this class but their FKind's can mutable.
597      */
598     FKind[string] byName; // Index by unique name string
599     FKind[][string] byExt; // Index by possibly non-unique extension string
600 
601     FKind[][size_t][immutable ubyte[]] byMagic; // length => zero-offset magic byte array to Binary FKind[]
602     size_t[] _magicLengths; // List of magic lengths to try as index in byMagic
603 
604     FKind[SHA1Digest] byId;    // Index Kinds by their behaviour
605 }
606 
607 /** Match `kind` with full filename `full`. */
608 bool matchFullName(in FKind kind,
609                    const scope string full, size_t six = 0) @safe pure nothrow
610 {
611     return (kind.baseNaming &&
612             !kind.baseNaming.match(full, six).empty);
613 }
614 
615 /** Match `kind` with file extension `ext`. */
616 bool matchExtension(in FKind kind,
617                     const scope string ext) @safe pure nothrow
618 {
619     return !kind.exts.find(ext).empty;
620 }
621 
622 bool matchName(in FKind kind,
623                const scope string full, size_t six = 0,
624                const scope string ext = null) @safe pure nothrow
625 {
626     return (kind.matchFullName(full) ||
627             kind.matchExtension(ext));
628 }
629 
630 import std.range: hasSlicing;
631 
632 /** Match (Magic) Contents of `kind` with `range`.
633     Returns: `true` iff match. */
634 bool matchContents(Range)(in FKind kind,
635                           in Range range,
636                           in RegFile regFile) pure nothrow if (hasSlicing!Range)
637 {
638     const hit = kind.magicData.matchU(range, kind.magicOffset);
639     return (!hit.empty);
640 }
641 
642 enum KindHit
643 {
644     none = 0,     // No hit.
645     cached = 1,   // Cached hit.
646     uncached = 2, // Uncached (fresh) hit.
647 }
648 
649 Tuple!(KindHit, FKind, size_t) ofAnyKindIn(NotNull!RegFile regFile,
650                                            FKinds kinds,
651                                            bool collectTypeHits)
652 {
653     // using kindId
654     if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate
655     {
656         if (regFile._cstat.kindId in kinds.byId)
657         {
658             return tuple(KindHit.cached,
659                          kinds.byId[regFile._cstat.kindId],
660                          0UL);
661         }
662     }
663 
664     // using extension
665     immutable ext = regFile.realExtension; // extension sans dot
666     if (!ext.empty &&
667         ext in kinds.byExt)
668     {
669         foreach (kindIndex, kind; kinds.byExt[ext])
670         {
671             auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds);
672             if (hit)
673             {
674                 return tuple(hit, kind, kindIndex);
675             }
676         }
677     }
678 
679     // try all
680     foreach (kindIndex, kind; kinds.byIndex) // Iterate each kind
681     {
682         auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds);
683         if (hit)
684         {
685             return tuple(hit, kind, kindIndex);
686         }
687     }
688 
689     // no hit
690     return tuple(KindHit.none,
691                  FKind.init,
692                  0UL);
693 }
694 
695 /** Returns: true if file with extension `ext` is of type `kind`. */
696 KindHit ofKind(NotNull!RegFile regFile,
697                NotNull!FKind kind,
698                bool collectTypeHits,
699                FKinds allFKinds) /* nothrow */ @trusted
700 {
701     immutable hit = regFile.ofKind1(kind,
702                                     collectTypeHits,
703                                     allFKinds);
704     return hit;
705 }
706 
707 KindHit ofKind(NotNull!RegFile regFile,
708                string kindName,
709                bool collectTypeHits,
710                FKinds allFKinds) /* nothrow */ @trusted
711 {
712     typeof(return) hit;
713     if (kindName in allFKinds.byName)
714     {
715         auto kind = assumeNotNull(allFKinds.byName[kindName]);
716         hit = regFile.ofKind(kind,
717                              collectTypeHits,
718                              allFKinds);
719     }
720     return hit;
721 }
722 
723 /** Helper for ofKind. */
724 KindHit ofKind1(NotNull!RegFile regFile,
725                 NotNull!FKind kind,
726                 bool collectTypeHits,
727                 FKinds allFKinds) /* nothrow */ @trusted
728 {
729     // Try cached first
730     if (regFile._cstat.kindId.defined &&
731         (regFile._cstat.kindId in allFKinds.byId) && // if kind is known
732         allFKinds.byId[regFile._cstat.kindId] is kind)  // if cached kind equals
733     {
734         return KindHit.cached;
735     }
736 
737     immutable ext = regFile.realExtension;
738 
739     if (kind.superKind)
740     {
741         immutable baseHit = regFile.ofKind(enforceNotNull(kind.superKind),
742                                            collectTypeHits,
743                                            allFKinds);
744         if (!baseHit)
745         {
746             return baseHit;
747         }
748     }
749 
750     bool hit = false;
751     final switch (kind.detection)
752     {
753     case FileKindDetection.equalsParentPathDirsAndName:
754         hit = (!regFile.parents.map!(a => a.name).find(kind.parentPathDirs).empty && // I love D :)
755                kind.matchName(regFile.name, 0, ext));
756         break;
757     case FileKindDetection.equalsName:
758         hit = kind.matchName(regFile.name, 0, ext);
759         break;
760     case FileKindDetection.equalsNameAndContents:
761         hit = (kind.matchName(regFile.name, 0, ext) &&
762                kind.matchContents(regFile.readOnlyContents, regFile));
763         break;
764     case FileKindDetection.equalsNameOrContents:
765         hit = (kind.matchName(regFile.name, 0, ext) ||
766                kind.matchContents(regFile.readOnlyContents, regFile));
767         break;
768     case FileKindDetection.equalsContents:
769         hit = kind.matchContents(regFile.readOnlyContents, regFile);
770         break;
771     case FileKindDetection.equalsWhatsGiven:
772         // something must be defined
773         assert(is(kind.baseNaming) ||
774                !kind.exts.empty ||
775                !(kind.magicData is null));
776         hit = ((kind.matchName(regFile.name, 0, ext) &&
777                 (kind.magicData is null ||
778                  kind.matchContents(regFile.readOnlyContents, regFile))));
779         break;
780     }
781     if (hit)
782     {
783         if (collectTypeHits)
784         {
785             kind.hitFiles ~= regFile;
786         }
787         regFile._cstat.kindId = kind.behaviorId;       // store reference in File
788     }
789 
790     return hit ? KindHit.uncached : KindHit.none;
791 }
792 
793 /** Directory Kind.
794  */
795 class DirKind
796 {
797     this(string fn,
798          string kn)
799     {
800         this.fileName = fn;
801         this.kindName = kn;
802     }
803 
804     version(msgpack)
805     {
806         this(Unpacker)(ref Unpacker unpacker)
807         {
808             fromMsgpack(msgpack.Unpacker(unpacker));
809         }
810         void toMsgpack(Packer)(ref Packer packer) const
811         {
812             packer.beginArray(this.tupleof.length);
813             packer.pack(this.tupleof);
814         }
815         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
816         {
817             unpacker.beginArray;
818             unpacker.unpack(this.tupleof);
819         }
820     }
821 
822     string fileName;
823     string kindName;
824 }
825 version(msgpack) unittest
826 {
827     auto k = tuple(``, ``);
828     auto data = pack(k);
829     Tuple!(string, string) k_; data.unpack(k_);
830     assert(k == k_);
831 }
832 
833 import std.file: DirEntry, getLinkAttributes;
834 import std.datetime: SysTime, Interval;
835 
836 /** File.
837  */
838 class File
839 {
840     this(Dir parent)
841     {
842         this.parent = parent;
843         if (parent) { ++parent.gstats.noFiles; }
844     }
845     this(string name, Dir parent, Bytes64 size,
846          SysTime timeLastModified,
847          SysTime timeLastAccessed)
848     {
849         this.name = name;
850         this.parent = parent;
851         this.size = size;
852         this.timeLastModified = timeLastModified;
853         this.timeLastAccessed = timeLastAccessed;
854         if (parent) { ++parent.gstats.noFiles; }
855     }
856 
857     // The Real Extension without leading dot.
858     string realExtension() @safe pure nothrow const { return name.extension.chompPrefix(`.`); }
859     alias ext = realExtension; // shorthand
860 
861     string toTextual() const @property { return `Any File`; }
862 
863     Bytes64 treeSize() @property @trusted /* @safe pure nothrow */ { return size; }
864 
865     /** Content Digest of Tree under this Directory. */
866     const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */
867     {
868         return typeof(return).init; // default to undefined
869     }
870 
871     Face!Color face() const @property @safe pure nothrow { return fileFace; }
872 
873     /** Check if `this` File has been invalidated by `dent`.
874         Returns: `true` iff `this` was obseleted.
875     */
876     bool checkObseleted(ref DirEntry dent) @trusted
877     {
878         // Git-Style Check for Changes (called Decider in SCons Build Tool)
879         bool flag = false;
880         if (dent.size != this.size || // size has changes
881             (dent.timeLastModified != this.timeLastModified) // if current modtime has changed or
882             )
883         {
884             makeObselete;
885             this.timeLastModified = dent.timeLastModified; // use new time
886             this.size = dent.size; // use new time
887             flag = true;
888         }
889         this.timeLastAccessed = dent.timeLastAccessed; // use new time
890         return flag;
891     }
892 
893     void makeObselete() @trusted {}
894     void makeUnObselete() @safe {}
895 
896     /** Returns: Depth of Depth from File System root to this File. */
897     int depth() @property @safe pure nothrow
898     {
899         return parent ? parent.depth + 1 : 0; // NOTE: this is fast because parent is memoized
900     }
901     /** NOTE: Currently not used. */
902     int depthIterative() @property @safe pure
903         out (depth) { debug assert(depth == depth); }
904     do
905     {
906         typeof(return) depth = 0;
907         for (auto curr = dir; curr !is null && !curr.isRoot; depth++)
908         {
909             curr = curr.parent;
910         }
911         return depth;
912     }
913 
914     /** Get Parenting Dirs starting from parent of `this` upto root.
915         Make this even more lazily evaluted.
916     */
917     Dir[] parentsUpwards()
918     {
919         typeof(return) parents; // collected parents
920         for (auto curr = dir; (curr !is null &&
921                                !curr.isRoot); curr = curr.parent)
922         {
923             parents ~= curr;
924         }
925         return parents;
926     }
927     alias dirsDownward = parentsUpwards;
928 
929     /** Get Parenting Dirs starting from file system root downto containing
930         directory of `this`.
931     */
932     auto parents()
933     {
934         return parentsUpwards.retro;
935     }
936     alias dirs = parents;     // SCons style alias
937     alias parentsDownward = parents;
938 
939     bool underAnyDir(alias pred = `a`)()
940     {
941         import std.algorithm: any;
942         import std.functional: unaryFun;
943         return parents.any!(unaryFun!pred);
944     }
945 
946     /** Returns: Path to `this` File.
947         TODO: Reuse parents.
948      */
949     string path() @property @trusted pure out (result) {
950         /* assert(result == pathRecursive); */
951     }
952     do
953     {
954         if (!parent) { return dirSeparator; }
955 
956         size_t pathLength = 1 + name.length; // returned path length
957         Dir[] parents; // collected parents
958 
959         for (auto curr = parent; (curr !is null &&
960                                   !curr.isRoot); curr = curr.parent)
961         {
962             pathLength += 1 + curr.name.length;
963             parents ~= curr;
964         }
965 
966         // build path
967         auto thePath = new char[pathLength];
968         size_t i = 0; // index to thePath
969         import std.range: retro;
970         foreach (currParent_; parents.retro)
971         {
972             immutable parentName = currParent_.name;
973             thePath[i++] = dirSeparator[0];
974             thePath[i .. i + parentName.length] = parentName[];
975             i += parentName.length;
976         }
977         thePath[i++] = dirSeparator[0];
978         thePath[i .. i + name.length] = name[];
979 
980         return thePath;
981     }
982 
983     /** Returns: Path to `this` File.
984         Recursive Heap-active implementation, slower than $(D path()).
985     */
986     string pathRecursive() @property @trusted pure
987     {
988         if (parent)
989         {
990             static if (true)
991             {
992                 import std.path: dirSeparator;
993                 // NOTE: This is more efficient than buildPath(parent.path,
994                 // name) because we can guarantee things about parent.path and
995                 // name
996                 immutable parentPath = parent.isRoot ? `` : parent.pathRecursive;
997                 return parentPath ~ dirSeparator ~ name;
998             }
999             else
1000             {
1001                 import std.path: buildPath;
1002                 return buildPath(parent.pathRecursive, name);
1003             }
1004         }
1005         else
1006         {
1007             return `/`;  // assume root folder with beginning slash
1008         }
1009     }
1010 
1011     version(msgpack)
1012     {
1013         void toMsgpack(Packer)(ref Packer packer) const
1014         {
1015             writeln(`Entering File.toMsgpack `, name);
1016             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1017         }
1018         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1019         {
1020             long stdTime;
1021             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize
1022             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize
1023         }
1024     }
1025 
1026     Dir parent;               // Reference to parenting directory (or null if this is a root directory)
1027     alias dir = parent;       // SCons style alias
1028 
1029     string name;              // Empty if root directory
1030     Bytes64 size;             // Size of file in bytes
1031     SysTime timeLastModified; // Last modification time
1032     SysTime timeLastAccessed; // Last access time
1033 }
1034 
1035 /** Maps Files to their tags. */
1036 class FileTags
1037 {
1038     FileTags addTag(File file, const scope string tag) @safe pure /* nothrow */
1039     {
1040         if (file in _tags)
1041         {
1042             if (_tags[file].find(tag).empty)
1043             {
1044                 _tags[file] ~= tag; // add it
1045             }
1046         }
1047         else
1048         {
1049             _tags[file] = [tag];
1050         }
1051         return this;
1052     }
1053     FileTags removeTag(File file, string tag) @safe pure
1054     {
1055         if (file in _tags)
1056         {
1057             import std.algorithm: remove;
1058             _tags[file] = _tags[file].remove!(a => a == tag);
1059         }
1060         return this;
1061     }
1062     auto ref getTags(File file) const @safe pure nothrow
1063     {
1064         return file in _tags ? _tags[file] : null;
1065     }
1066     private string[][File] _tags; // Tags for each registered file.
1067 }
1068 
1069 version(linux) unittest
1070 {
1071     auto ftags = new FileTags();
1072 
1073     GStats gstats = new GStats();
1074 
1075     auto root = assumeNotNull(new Dir(cast(Dir)null, gstats));
1076     auto etc = getDir(root, `/etc`);
1077     assert(etc.path == `/etc`);
1078 
1079     auto dent = DirEntry(`/etc/passwd`);
1080     auto passwd = getFile(root, `/etc/passwd`, dent.isDir);
1081     assert(passwd.path == `/etc/passwd`);
1082     assert(passwd.parent == etc);
1083     assert(etc.sub(`passwd`) == passwd);
1084 
1085     ftags.addTag(passwd, `Password`);
1086     ftags.addTag(passwd, `Password`);
1087     ftags.addTag(passwd, `Secret`);
1088     assert(ftags.getTags(passwd) == [`Password`, `Secret`]);
1089     ftags.removeTag(passwd, `Password`);
1090     assert(ftags._tags[passwd] == [`Secret`]);
1091 }
1092 
1093 /** Symlink Target Status.
1094  */
1095 enum SymlinkTargetStatus
1096 {
1097     unknown,
1098     present,
1099     broken,
1100 }
1101 
1102 /** Symlink.
1103  */
1104 class Symlink : File
1105 {
1106     this(NotNull!Dir parent)
1107     {
1108         super(parent);
1109         ++parent.gstats.noSymlinks;
1110     }
1111     this(ref DirEntry dent, NotNull!Dir parent)
1112     {
1113         Bytes64 sizeBytes;
1114         SysTime modified, accessed;
1115         bool ok = true;
1116         try
1117         {
1118             sizeBytes = dent.size.Bytes64;
1119             modified = dent.timeLastModified;
1120             accessed = dent.timeLastAccessed;
1121         }
1122         catch (Exception)
1123         {
1124             ok = false;
1125         }
1126         // const attrs = getLinkAttributes(dent.name); // attributes of target file
1127         // super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0);
1128         super(dent.name.baseName, parent, sizeBytes, modified, accessed);
1129         if (ok)
1130         {
1131             this.retarget(dent); // trigger lazy load
1132         }
1133         ++parent.gstats.noSymlinks;
1134     }
1135 
1136     override Face!Color face() const @property @safe pure nothrow
1137     {
1138         if (_targetStatus == SymlinkTargetStatus.broken)
1139             return symlinkBrokenFace;
1140         else
1141             return symlinkFace;
1142     }
1143 
1144     override string toTextual() const @property { return `Symbolic Link`; }
1145 
1146     string retarget(ref DirEntry dent) @trusted
1147     {
1148         import std.file: readLink;
1149         return _target = readLink(dent);
1150     }
1151 
1152     /** Cached/Memoized/Lazy Lookup for target. */
1153     string target() @property @trusted
1154     {
1155         if (!_target)         // if target not yet read
1156         {
1157             auto targetDent = DirEntry(path);
1158             return retarget(targetDent); // read it
1159         }
1160         return _target;
1161     }
1162     /** Cached/Memoized/Lazy Lookup for target as absolute normalized path. */
1163     string absoluteNormalizedTargetPath() @property @trusted
1164     {
1165         import std.path: absolutePath, buildNormalizedPath;
1166         return target.absolutePath(path.dirName).buildNormalizedPath;
1167     }
1168 
1169     version(msgpack)
1170     {
1171         /** Construct from msgpack `unpacker`.  */
1172         this(Unpacker)(ref Unpacker unpacker)
1173         {
1174             fromMsgpack(msgpack.Unpacker(unpacker));
1175         }
1176         void toMsgpack(Packer)(ref Packer packer) const
1177         {
1178             /* writeln(`Entering File.toMsgpack `, name); */
1179             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1180         }
1181         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1182         {
1183             unpacker.unpack(name, size);
1184             long stdTime;
1185             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize
1186             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize
1187         }
1188     }
1189 
1190     string _target;
1191     SymlinkTargetStatus _targetStatus = SymlinkTargetStatus.unknown;
1192 }
1193 
1194 /** Special File (Character or Block Device).
1195  */
1196 class SpecFile : File
1197 {
1198     this(NotNull!Dir parent)
1199     {
1200         super(parent);
1201         ++parent.gstats.noSpecialFiles;
1202     }
1203     this(ref DirEntry dent, NotNull!Dir parent)
1204     {
1205         super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0);
1206         ++parent.gstats.noSpecialFiles;
1207     }
1208 
1209     override Face!Color face() const @property @safe pure nothrow { return specialFileFace; }
1210 
1211     override string toTextual() const @property { return `Special File`; }
1212 
1213     version(msgpack)
1214     {
1215         /** Construct from msgpack `unpacker`.  */
1216         this(Unpacker)(ref Unpacker unpacker)
1217         {
1218             fromMsgpack(msgpack.Unpacker(unpacker));
1219         }
1220         void toMsgpack(Packer)(ref Packer packer) const
1221         {
1222             /* writeln(`Entering File.toMsgpack `, name); */
1223             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1224         }
1225         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1226         {
1227             unpacker.unpack(name, size);
1228             long stdTime;
1229             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize
1230             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize
1231         }
1232     }
1233 }
1234 
1235 /** Bit (Content) Status. */
1236 enum BitStatus
1237 {
1238     unknown,
1239     bits7,
1240     bits8,
1241 }
1242 
1243 /** Regular File.
1244  */
1245 class RegFile : File
1246 {
1247     this(NotNull!Dir parent)
1248     {
1249         super(parent);
1250         ++parent.gstats.noRegFiles;
1251     }
1252     this(ref DirEntry dent, NotNull!Dir parent)
1253     {
1254         this(dent.name.baseName, parent, dent.size.Bytes64,
1255              dent.timeLastModified, dent.timeLastAccessed);
1256     }
1257     this(string name, NotNull!Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed)
1258     {
1259         super(name, parent, size, timeLastModified, timeLastAccessed);
1260         ++parent.gstats.noRegFiles;
1261     }
1262 
1263     ~this() nothrow @nogc
1264     {
1265         _cstat.deallocate(false);
1266     }
1267 
1268     override string toTextual() const @property { return `Regular File`; }
1269 
1270     /** Returns: Content Id of `this`. */
1271     const(SHA1Digest) contentId() @property @trusted /* @safe pure nothrow */
1272     {
1273         if (_cstat._contentId.isUntouched)
1274         {
1275             enum doSHA1 = true;
1276             calculateCStatInChunks(parent.gstats.filesByContentId,
1277                                    32*pageSize(),
1278                                    doSHA1);
1279             freeContents(); // TODO: Call lazily only when open count is too large
1280         }
1281         return _cstat._contentId;
1282     }
1283 
1284     /** Returns: Tree Content Id of `this`. */
1285     override const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */
1286     {
1287         return contentId;
1288     }
1289 
1290     override Face!Color face() const @property @safe pure nothrow { return regFileFace; }
1291 
1292     /** Returns: SHA-1 of `this` `File` Contents at `src`. */
1293     const(SHA1Digest) contId(inout (ubyte[]) src,
1294                              File[][SHA1Digest] filesByContentId)
1295         @property pure out(result) { assert(!result.empty); } // must have be defined
1296     do
1297     {
1298         if (_cstat._contentId.empty) // if not yet defined
1299         {
1300             _cstat._contentId = src.sha1Of;
1301             filesByContentId[_cstat._contentId] ~= this;
1302         }
1303         return _cstat._contentId;
1304     }
1305 
1306     /** Returns: Cached/Memoized Binary Histogram of `this` `File`. */
1307     auto ref bistogram8() @property @safe // ref needed here!
1308     {
1309         if (_cstat.bist.empty)
1310         {
1311             _cstat.bist.put(readOnlyContents); // memoized calculated
1312         }
1313         return _cstat.bist;
1314     }
1315 
1316     /** Returns: Cached/Memoized XGram of `this` `File`. */
1317     auto ref xgram() @property @safe // ref needed here!
1318     {
1319         if (_cstat.xgram.empty)
1320         {
1321             _cstat.xgram.put(readOnlyContents); // memoized calculated
1322         }
1323         return _cstat.xgram;
1324     }
1325 
1326     /** Returns: Cached/Memoized XGram Deep Denseness of `this` `File`. */
1327     auto ref xgramDeepDenseness() @property @safe
1328     {
1329         if (!_cstat._xgramDeepDenseness)
1330         {
1331             _cstat._xgramDeepDenseness = xgram.denseness(-1).numerator;
1332         }
1333         return Rational!ulong(_cstat._xgramDeepDenseness,
1334                               _cstat.xgram.noBins);
1335     }
1336 
1337     /** Returns: true if empty file (zero length). */
1338     bool empty() @property const @safe { return size == 0; }
1339 
1340     /** Process File in Cache Friendly Chunks. */
1341     void calculateCStatInChunks(NotNull!File[][SHA1Digest] filesByContentId,
1342                                 size_t chunkSize = 32*pageSize(),
1343                                 bool doSHA1 = false,
1344                                 bool doBist = false,
1345                                 bool doBitStatus = false) @safe
1346     {
1347         if (_cstat._contentId.defined || empty) { doSHA1 = false; }
1348         if (!_cstat.bist.empty) { doBist = false; }
1349         if (_cstat.bitStatus != BitStatus.unknown) { doBitStatus = false; }
1350 
1351         import std.digest.sha;
1352         SHA1 sha1;
1353         if (doSHA1) { sha1.start(); }
1354 
1355         bool isASCII = true;
1356 
1357         if (doSHA1 || doBist || doBitStatus)
1358         {
1359             import std.range: chunks;
1360             foreach (chunk; readOnlyContents.chunks(chunkSize))
1361             {
1362                 if (doSHA1) { sha1.put(chunk); }
1363                 if (doBist) { _cstat.bist.put(chunk); }
1364                 if (doBitStatus)
1365                 {
1366                     /* TODO: This can be parallelized using 64-bit wording!
1367                      * Write automatic parallelizing library for this? */
1368                     foreach (elt; chunk)
1369                     {
1370                         import nxt.bitop_ex: bt;
1371                         isASCII = isASCII && !elt.bt(7); // ASCII has no topmost bit set
1372                     }
1373                 }
1374             }
1375         }
1376 
1377         if (doBitStatus)
1378         {
1379             _cstat.bitStatus = isASCII ? BitStatus.bits7 : BitStatus.bits8;
1380         }
1381 
1382         if (doSHA1)
1383         {
1384             _cstat._contentId = sha1.finish();
1385             filesByContentId[_cstat._contentId] ~= cast(NotNull!File)assumeNotNull(this); // TODO: Prettier way?
1386         }
1387     }
1388 
1389     /** Clear/Reset Contents Statistics of `this` `File`. */
1390     void clearCStat(File[][SHA1Digest] filesByContentId) @safe nothrow
1391     {
1392         // SHA1-digest
1393         if (_cstat._contentId in filesByContentId)
1394         {
1395             auto dups = filesByContentId[_cstat._contentId];
1396             import std.algorithm: remove;
1397             immutable n = dups.length;
1398             dups = dups.remove!(a => a is this);
1399             assert(n == dups.length + 1); // assert that dups were not decreased by one);
1400         }
1401     }
1402 
1403     override string toString() @property @trusted
1404     {
1405         // import std.traits: fullyQualifiedName;
1406         // return fullyQualifiedName!(typeof(this)) ~ `(` ~ buildPath(parent.name, name) ~ `)`; // TODO: typenameof
1407         return (typeof(this)).stringof ~ `(` ~ this.path ~ `)`; // TODO: typenameof
1408     }
1409 
1410     version(msgpack)
1411     {
1412         /** Construct from msgpack `unpacker`.  */
1413         this(Unpacker)(ref Unpacker unpacker)
1414         {
1415             fromMsgpack(msgpack.Unpacker(unpacker));
1416         }
1417 
1418         /** Pack. */
1419         void toMsgpack(Packer)(ref Packer packer) const {
1420             /* writeln(`Entering RegFile.toMsgpack `, name); */
1421 
1422             packer.pack(name, size,
1423                         timeLastModified.stdTime,
1424                         timeLastAccessed.stdTime);
1425 
1426             // CStat: TODO: Group
1427             packer.pack(_cstat.kindId); // FKind
1428             packer.pack(_cstat._contentId); // Digest
1429 
1430             // Bist
1431             immutable bistFlag = !_cstat.bist.empty;
1432             packer.pack(bistFlag);
1433             if (bistFlag) { packer.pack(_cstat.bist); }
1434 
1435             // XGram
1436             immutable xgramFlag = !_cstat.xgram.empty;
1437             packer.pack(xgramFlag);
1438             if (xgramFlag)
1439             {
1440                 /* debug dln("packing xgram. empty:", _cstat.xgram.empty); */
1441                 packer.pack(_cstat.xgram,
1442                             _cstat._xgramDeepDenseness);
1443             }
1444 
1445             /*     auto this_ = (cast(RegFile)this); // TODO: Ugly! Is there another way? */
1446             /*     const tags = this_.parent.gstats.ftags.getTags(this_); */
1447             /*     immutable tagsFlag = !tags.empty; */
1448             /*     packer.pack(tagsFlag); */
1449             /*     debug dln(`Packing tags `, tags, ` of `, this_.path); */
1450             /*     if (tagsFlag) { packer.pack(tags); } */
1451         }
1452 
1453         /** Unpack. */
1454         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) @trusted
1455         {
1456             unpacker.unpack(name, size); // Name, Size
1457 
1458             // Time
1459             long stdTime;
1460             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize
1461             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize
1462 
1463             // CStat: TODO: Group
1464             unpacker.unpack(_cstat.kindId); // FKind
1465             if (_cstat.kindId.defined &&
1466                 _cstat.kindId !in parent.gstats.allFKinds.byId)
1467             {
1468                 dln(`warning: kindId `, _cstat.kindId, ` not found for `,
1469                     path, `, FKinds length `, parent.gstats.allFKinds.byIndex.length);
1470                 _cstat.kindId.reset; // forget it
1471             }
1472             unpacker.unpack(_cstat._contentId); // Digest
1473             if (_cstat._contentId)
1474             {
1475                 parent.gstats.filesByContentId[_cstat._contentId] ~= cast(NotNull!File)this;
1476             }
1477 
1478             // Bist
1479             bool bistFlag; unpacker.unpack(bistFlag);
1480             if (bistFlag)
1481             {
1482                 unpacker.unpack(_cstat.bist);
1483             }
1484 
1485             // XGram
1486             bool xgramFlag; unpacker.unpack(xgramFlag);
1487             if (xgramFlag)
1488             {
1489                 /* if (_cstat.xgram == null) { */
1490                 /*     _cstat.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */
1491                 /* } */
1492                 /* unpacker.unpack(*_cstat.xgram); */
1493                 unpacker.unpack(_cstat.xgram,
1494                                 _cstat._xgramDeepDenseness);
1495                 /* debug dln(`unpacked xgram. empty:`, _cstat.xgram.empty); */
1496             }
1497 
1498             // tags
1499             /* bool tagsFlag; unpacker.unpack(tagsFlag); */
1500             /* if (tagsFlag) { */
1501             /*     string[] tags; */
1502             /*     unpacker.unpack(tags); */
1503             /* } */
1504         }
1505 
1506         override void makeObselete() @trusted { _cstat.reset(); /* debug dln(`Reset CStat for `, path); */ }
1507     }
1508 
1509     /** Returns: Read-Only Contents of `this` Regular File. */
1510     // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); }
1511     // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo);
1512     immutable(ubyte[]) readOnlyContents(string file = __FILE__, int line = __LINE__)() @trusted
1513     {
1514         if (_mmfile is null)
1515         {
1516             if (size == 0) // munmap fails for empty files
1517             {
1518                 static assert([] !is null);
1519                 return []; // empty file
1520             }
1521             else
1522             {
1523                 _mmfile = new MmFile(path, MmFile.Mode.read,
1524                                      mmfile_size, null, pageSize());
1525                 if (parent.gstats.showMMaps)
1526                 {
1527                     writeln(`Mapped `, path, ` of size `, size);
1528                 }
1529             }
1530         }
1531         return cast(typeof(return))_mmfile[];
1532     }
1533 
1534     /** Returns: Read-Writable Contents of `this` Regular File. */
1535     // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); }
1536     // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo);
1537     ubyte[] readWriteableContents() @trusted
1538     {
1539         if (!_mmfile)
1540         {
1541             _mmfile = new MmFile(path, MmFile.Mode.readWrite,
1542                                  mmfile_size, null, pageSize());
1543         }
1544         return cast(typeof(return))_mmfile[];
1545     }
1546 
1547     /** If needed Free Allocated Contents of `this` Regular File. */
1548     bool freeContents()
1549     {
1550         if (_mmfile) {
1551             delete _mmfile; _mmfile = null; return true;
1552         }
1553         else { return false; }
1554     }
1555 
1556     import std.mmfile;
1557     private MmFile _mmfile = null;
1558     private CStat _cstat;     // Statistics about the contents of this RegFile.
1559 }
1560 
1561 /** Traits */
1562 enum isFile(T) = (is(T == File) || is(T == NotNull!File));
1563 enum isDir(T) = (is(T == Dir) || is(T == NotNull!Dir));
1564 enum isSymlink(T) = (is(T == Symlink) || is(T == NotNull!Symlink));
1565 enum isRegFile(T) = (is(T == RegFile) || is(T == NotNull!RegFile));
1566 enum isSpecialFile(T) = (is(T == SpecFile) || is(T == NotNull!SpecFile));
1567 enum isAnyFile(T) = (isFile!T ||
1568                      isDir!T ||
1569                      isSymlink!T ||
1570                      isRegFile!T ||
1571                      isSpecialFile!T);
1572 
1573 /** Return true if T is a class representing File IO. */
1574 enum isFileIO(T) = (isAnyFile!T ||
1575                     is(T == ioFile));
1576 
1577 /** Contents Statistics of a Regular File. */
1578 struct CStat
1579 {
1580     void reset() @safe nothrow
1581     {
1582         kindId[] = 0;
1583         _contentId[] = 0;
1584         hitCount = 0;
1585         bist.reset();
1586         xgram.reset();
1587         _xgramDeepDenseness = 0;
1588         deallocate();
1589     }
1590 
1591     void deallocate(bool nullify = true) @trusted nothrow
1592     {
1593         kindId[] = 0;
1594         /* if (xgram != null) { */
1595         /*     import core.stdc.stdlib; */
1596         /*     free(xgram); */
1597         /*     if (nullify) { */
1598         /*         xgram = null; */
1599         /*     } */
1600         /* } */
1601     }
1602 
1603     SHA1Digest kindId; // FKind Identifier/Fingerprint of this regular file.
1604     SHA1Digest _contentId; // Content Identifier/Fingerprint.
1605 
1606     /** Boolean Single Bistogram over file contents. If
1607         binHist0[cast(ubyte)x] is set then this file contains byte x. Consumes
1608         32 bytes. */
1609     Bist bist; // TODO: Put in separate slice std.allocator.
1610 
1611     /** Boolean Pair Bistogram (Digram) over file contents (higher-order statistics).
1612         If this RegFile contains a sequence of [byte0, bytes1],
1613         then bit at index byte0 + byte1 * 256 is set in xgram.
1614     */
1615     XGram xgram; // TODO: Use slice std.allocator
1616     private ulong _xgramDeepDenseness = 0;
1617 
1618     uint64_t hitCount = 0;
1619     BitStatus bitStatus = BitStatus.unknown;
1620 }
1621 
1622 import core.sys.posix.sys.types;
1623 
1624 enum SymlinkFollowContext
1625 {
1626     none,                       // Follow no symlinks
1627     internal,                   // Follow only symlinks outside of scanned tree
1628     external,                   // Follow only symlinks inside of scanned tree
1629     all,                        // Follow all symlinks
1630     standard = external
1631 }
1632 
1633 /** Global Scanner Statistics. */
1634 class GStats
1635 {
1636     NotNull!File[][string] filesByName;    // Potential File Name Duplicates
1637     NotNull!File[][ino_t] filesByInode;    // Potential Link Duplicates
1638     NotNull!File[][SHA1Digest] filesByContentId; // File(s) (Duplicates) Indexed on Contents SHA1.
1639     NotNull!RegFile[][string] elfFilesBySymbol; // File(s) (Duplicates) Indexed on raw unmangled symbol.
1640     FileTags ftags;
1641 
1642     Bytes64[NotNull!File] treeSizesByFile; // Tree sizes.
1643     size_t[NotNull!File] lineCountsByFile; // Line counts.
1644 
1645     // VCS Directories
1646     DirKind[] vcDirKinds;
1647     DirKind[string] vcDirKindsMap;
1648 
1649     // Skipped Directories
1650     DirKind[] skippedDirKinds;
1651     DirKind[string] skippedDirKindsMap;
1652 
1653     FKinds txtFKinds = new FKinds; // Textual
1654     FKinds binFKinds = new FKinds; // Binary (Non-Textual)
1655     FKinds allFKinds = new FKinds; // All
1656     FKinds selFKinds = new FKinds; // User selected
1657 
1658     void loadFileKinds()
1659     {
1660         txtFKinds ~= new FKind("SCons", ["SConstruct", "SConscript"],
1661                                ["scons"],
1662                                [], 0, [], [],
1663                                defaultCommentDelims,
1664                                pythonStringDelims,
1665                                FileContent.buildSystemCode, FileKindDetection.equalsNameAndContents); // TOOD: Inherit Python
1666 
1667         txtFKinds ~= new FKind("Makefile", ["GNUmakefile", "Makefile", "makefile"],
1668                                ["mk", "mak", "makefile", "make", "gnumakefile"], [], 0, [], [],
1669                                defaultCommentDelims,
1670                                defaultStringDelims,
1671                                FileContent.sourceCode, FileKindDetection.equalsName);
1672         txtFKinds ~= new FKind("Automakefile", ["Makefile.am", "makefile.am"],
1673                                ["am"], [], 0, [], [],
1674                                defaultCommentDelims,
1675                                defaultStringDelims,
1676                                FileContent.sourceCode);
1677         txtFKinds ~= new FKind("Autoconffile", ["configure.ac", "configure.in"],
1678                                [], [], 0, [], [],
1679                                defaultCommentDelims,
1680                                defaultStringDelims,
1681                                FileContent.sourceCode);
1682         txtFKinds ~= new FKind("Doxygen", ["Doxyfile"],
1683                                ["doxygen"], [], 0, [], [],
1684                                defaultCommentDelims,
1685                                defaultStringDelims,
1686                                FileContent.sourceCode);
1687 
1688         txtFKinds ~= new FKind("Rake", ["Rakefile"],// TODO: inherit Ruby
1689                                ["mk", "makefile", "make", "gnumakefile"], [], 0, [], [],
1690                                [Delim("#"), Delim("=begin", "=end")],
1691                                defaultStringDelims,
1692                                FileContent.sourceCode, FileKindDetection.equalsName);
1693 
1694         txtFKinds ~= new FKind("HTML", [], ["htm", "html", "shtml", "xhtml"], [], 0, [], [],
1695                                [Delim("<!--", "-->")],
1696                                defaultStringDelims,
1697                                FileContent.text, FileKindDetection.equalsContents); // markup text
1698         txtFKinds ~= new FKind("XML", [], ["xml", "dtd", "xsl", "xslt", "ent", ], [], 0, "<?xml", [],
1699                                [Delim("<!--", "-->")],
1700                                defaultStringDelims,
1701                                FileContent.text, FileKindDetection.equalsContents); // TODO: markup text
1702         txtFKinds ~= new FKind("YAML", [], ["yaml", "yml"], [], 0, [], [],
1703                                defaultCommentDelims,
1704                                defaultStringDelims,
1705                                FileContent.text); // TODO: markup text
1706         txtFKinds ~= new FKind("CSS", [], ["css"], [], 0, [], [],
1707                                [Delim("/*", "*/")],
1708                                defaultStringDelims,
1709                                FileContent.text, FileKindDetection.equalsContents);
1710 
1711         txtFKinds ~= new FKind("Audacity Project", [], ["aup"], [], 0, "<?xml", [],
1712                                defaultCommentDelims,
1713                                defaultStringDelims,
1714                                FileContent.text, FileKindDetection.equalsNameAndContents);
1715 
1716         txtFKinds ~= new FKind("Comma-separated values", [], ["csv"], [], 0, [], [], // TODO: decribe with symbolic
1717                                defaultCommentDelims,
1718                                defaultStringDelims,
1719                                FileContent.text, FileKindDetection.equalsNameAndContents);
1720 
1721         txtFKinds ~= new FKind("Tab-separated values", [], ["tsv"], [], 0, [], [], // TODO: describe with symbolic
1722                                defaultCommentDelims,
1723                                defaultStringDelims,
1724                                FileContent.text, FileKindDetection.equalsNameAndContents);
1725 
1726         static immutable keywordsC = [
1727             "auto", "const", "double", "float", "int", "short", "struct",
1728             "unsigned", "break", "continue", "else", "for", "long", "signed",
1729             "switch", "void", "case", "default", "enum", "goto", "register",
1730             "sizeof", "typedef", "volatile", "char", "do", "extern", "if",
1731             "return", "static", "union", "while",
1732             ];
1733 
1734         /* See_Also: https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B */
1735         auto opersCBasic = [
1736             // Arithmetic
1737             Op("+", OpArity.binary, OpAssoc.LR, 6, "Add"),
1738             Op("-", OpArity.binary, OpAssoc.LR, 6, "Subtract"),
1739             Op("*", OpArity.binary, OpAssoc.LR, 5, "Multiply"),
1740             Op("/", OpArity.binary, OpAssoc.LR, 5, "Divide"),
1741             Op("%", OpArity.binary, OpAssoc.LR, 5, "Remainder/Moduls"),
1742 
1743             Op("+", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary plus"),
1744             Op("-", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary minus"),
1745 
1746             Op("++", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix increment"),
1747             Op("--", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix decrement"),
1748 
1749             Op("++", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix increment"),
1750             Op("--", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix decrement"),
1751 
1752             // Assignment Arithmetic (binary)
1753             Op("=", OpArity.binary, OpAssoc.RL, 16, "Assign"),
1754             Op("+=", OpArity.binary, OpAssoc.RL, 16, "Assignment by sum"),
1755             Op("-=", OpArity.binary, OpAssoc.RL, 16, "Assignment by difference"),
1756             Op("*=", OpArity.binary, OpAssoc.RL, 16, "Assignment by product"),
1757             Op("/=", OpArity.binary, OpAssoc.RL, 16, "Assignment by quotient"),
1758             Op("%=", OpArity.binary, OpAssoc.RL, 16, "Assignment by remainder"),
1759 
1760             Op("&=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise AND"),
1761             Op("|=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise OR"),
1762 
1763             Op("^=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise XOR"),
1764             Op("<<=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise left shift"),
1765             Op(">>=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise right shift"),
1766 
1767             Op("==", OpArity.binary, OpAssoc.LR, 9, "Equal to"),
1768             Op("!=", OpArity.binary, OpAssoc.LR, 9, "Not equal to"),
1769 
1770             Op("<", OpArity.binary, OpAssoc.LR, 8, "Less than"),
1771             Op(">", OpArity.binary, OpAssoc.LR, 8, "Greater than"),
1772             Op("<=", OpArity.binary, OpAssoc.LR, 8, "Less than or equal to"),
1773             Op(">=", OpArity.binary, OpAssoc.LR, 8, "Greater than or equal to"),
1774 
1775             Op("&&", OpArity.binary, OpAssoc.LR, 13, "Logical AND"), // TODO: Convert to math in smallcaps AND
1776             Op("||", OpArity.binary, OpAssoc.LR, 14, "Logical OR"), // TODO: Convert to math in smallcaps OR
1777 
1778             Op("!", OpArity.unaryPrefix, OpAssoc.LR, 3, "Logical NOT"), // TODO: Convert to math in smallcaps NOT
1779 
1780             Op("&", OpArity.binary, OpAssoc.LR, 10, "Bitwise AND"),
1781             Op("^", OpArity.binary, OpAssoc.LR, 11, "Bitwise XOR (exclusive or)"),
1782             Op("|", OpArity.binary, OpAssoc.LR, 12, "Bitwise OR"),
1783 
1784             Op("<<", OpArity.binary, OpAssoc.LR, 7, "Bitwise left shift"),
1785             Op(">>", OpArity.binary, OpAssoc.LR, 7, "Bitwise right shift"),
1786 
1787             Op("~", OpArity.unaryPrefix, OpAssoc.LR, 3, "Bitwise NOT (One's Complement)"),
1788             Op(",", OpArity.binary, OpAssoc.LR, 18, "Comma"),
1789             Op("sizeof", OpArity.unaryPrefix, OpAssoc.LR, 3, "Size-of"),
1790 
1791             Op("->", OpArity.binary, OpAssoc.LR, 2, "Element selection through pointer"),
1792             Op(".", OpArity.binary, OpAssoc.LR, 2, "Element selection by reference"),
1793 
1794             ];
1795 
1796         /* See_Also: https://en.wikipedia.org/wiki/Iso646.h */
1797         auto opersC_ISO646 = [
1798             OpAlias("and", "&&"),
1799             OpAlias("or", "||"),
1800             OpAlias("and_eq", "&="),
1801 
1802             OpAlias("bitand", "&"),
1803             OpAlias("bitor", "|"),
1804 
1805             OpAlias("compl", "~"),
1806             OpAlias("not", "!"),
1807             OpAlias("not_eq", "!="),
1808             OpAlias("or_eq", "|="),
1809             OpAlias("xor", "^"),
1810             OpAlias("xor_eq", "^="),
1811             ];
1812 
1813         auto opersC = opersCBasic /* ~ opersC_ISO646 */;
1814 
1815         auto kindC = new FKind("C", [], ["c", "h"], [], 0, [],
1816                                keywordsC,
1817                                cCommentDelims,
1818                                defaultStringDelims,
1819                                FileContent.sourceCode,
1820                                FileKindDetection.equalsWhatsGiven,
1821                                Lang.c);
1822         txtFKinds ~= kindC;
1823         kindC.operations ~= tuple(FOp.checkSyntax, `gcc -x c -fsyntax-only -c`);
1824         kindC.operations ~= tuple(FOp.checkSyntax, `clang -x c -fsyntax-only -c`);
1825         kindC.operations ~= tuple(FOp.preprocess, `cpp`);
1826         kindC.opers = opersC;
1827 
1828         static immutable keywordsCxx = (keywordsC ~ ["asm", "dynamic_cast", "namespace", "reinterpret_cast", "try",
1829                                                      "bool", "explicit", "new", "static_cast", "typeid",
1830                                                      "catch", "false", "operator", "template", "typename",
1831                                                      "class", "friend", "private", "this", "using",
1832                                                      "const_cast", "inline", "public", "throw", "virtual",
1833                                                      "delete", "mutable", "protected", "true", "wchar_t",
1834                                                      // The following are not essential when
1835                                                      // the standard ASCII character set is
1836                                                      // being used, but they have been added
1837                                                      // to provide more readable alternatives
1838                                                      // for some of the C++ operators, and
1839                                                      // also to facilitate programming with
1840                                                      // character sets that lack characters
1841                                                      // needed by C++.
1842                                                      "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq",
1843                                                      "and_eq", "bitor", "not", "or", "xor", ]).uniq.array;
1844 
1845         auto opersCxx = opersC ~ [
1846             Op("->*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"),
1847             Op(".*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"),
1848             Op("::", OpArity.binary, OpAssoc.none, 1, "Scope resolution"),
1849             Op("typeid", OpArity.unaryPrefix, OpAssoc.LR, 2, "Run-time type information (RTTI))"),
1850             //Op("alignof", OpArity.unaryPrefix, OpAssoc.LR, _, _),
1851             Op("new", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory allocation"),
1852             Op("delete", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"),
1853             Op("delete[]", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"),
1854             /* Op("noexcept", OpArity.unaryPrefix, OpAssoc.none, _, _), */
1855 
1856             Op("dynamic_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1857             Op("reinterpret_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1858             Op("static_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1859             Op("const_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1860 
1861             Op("throw", OpArity.unaryPrefix, OpAssoc.LR, 17, "Throw operator"),
1862             /* Op("catch", OpArity.unaryPrefix, OpAssoc.LR, _, _) */
1863             ];
1864 
1865         static immutable extsCxx = ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"];
1866         auto kindCxx = new FKind("C++", [], extsCxx, [], 0, [],
1867                                  keywordsCxx,
1868                                  cCommentDelims,
1869                                  defaultStringDelims,
1870                                  FileContent.sourceCode,
1871                                  FileKindDetection.equalsWhatsGiven,
1872                                  Lang.cxx);
1873         kindCxx.operations ~= tuple(FOp.checkSyntax, `gcc -x c++ -fsyntax-only -c`);
1874         kindCxx.operations ~= tuple(FOp.checkSyntax, `clang -x c++ -fsyntax-only -c`);
1875         kindCxx.operations ~= tuple(FOp.preprocess, `cpp`);
1876         kindCxx.opers = opersCxx;
1877         txtFKinds ~= kindCxx;
1878         static immutable keywordsCxx11 = keywordsCxx ~ ["alignas", "alignof",
1879                                                         "char16_t", "char32_t",
1880                                                         "constexpr",
1881                                                         "decltype",
1882                                                         "override", "final",
1883                                                         "noexcept", "nullptr",
1884                                                         "auto",
1885                                                         "thread_local",
1886                                                         "static_assert", ];
1887         // TODO: Define as subkind
1888         /* txtFKinds ~= new FKind("C++11", [], ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"], [], 0, [], */
1889         /*                        keywordsCxx11, */
1890         /*                        [Delim("/\*", "*\/"), */
1891         /*                         Delim("//")], */
1892         /*                        defaultStringDelims, */
1893         /*                        FileContent.sourceCode, */
1894         /*                        FileKindDetection.equalsWhatsGiven); */
1895 
1896         /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */
1897         static immutable opersCxxMicrosoft = ["__alignof"];
1898 
1899         /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */
1900         static immutable keywordsCxxMicrosoft = (keywordsCxx ~ [/* __abstract 2 */
1901                                                      "__asm",
1902                                                      "__assume",
1903                                                      "__based",
1904                                                      /* __box 2 */
1905                                                      "__cdecl",
1906                                                      "__declspec",
1907                                                      /* __delegate 2 */
1908                                                      "__event",
1909                                                      "__except",
1910                                                      "__fastcall",
1911                                                      "__finally",
1912                                                      "__forceinline",
1913                                                      /* __gc 2 */
1914                                                      /* __hook 3 */
1915                                                      "__identifier",
1916                                                      "__if_exists",
1917                                                      "__if_not_exists",
1918                                                      "__inline",
1919                                                      "__int16",
1920                                                      "__int32",
1921                                                      "__int64",
1922                                                      "__int8",
1923                                                      "__interface",
1924                                                      "__leave",
1925                                                      "__m128",
1926                                                      "__m128d",
1927                                                      "__m128i",
1928                                                      "__m64",
1929                                                      "__multiple_inheritance",
1930                                                      /* __nogc 2 */
1931                                                      "__noop",
1932                                                      /* __pin 2 */
1933                                                      /* __property 2 */
1934                                                      "__raise",
1935                                                      /* __sealed 2 */
1936                                                      "__single_inheritance",
1937                                                      "__stdcall",
1938                                                      "__super",
1939                                                      "__thiscall",
1940                                                      "__try",
1941                                                      "__except",
1942                                                      "__finally",
1943                                                      /* __try_cast 2 */
1944                                                      "__unaligned",
1945                                                      /* __unhook 3 */
1946                                                      "__uuidof",
1947                                                      /* __value 2 */
1948                                                      "__virtual_inheritance",
1949                                                      "__w64",
1950                                                      "__wchar_t",
1951                                                      "wchar_t",
1952                                                      "abstract",
1953                                                      "array",
1954                                                      "auto",
1955                                                      "bool",
1956                                                      "break",
1957                                                      "case",
1958                                                      "catch",
1959                                                      "char",
1960                                                      "class",
1961                                                      "const",
1962                                                      "const_cast",
1963                                                      "continue",
1964                                                      "decltype",
1965                                                      "default",
1966                                                      "delegate",
1967                                                      "delete",
1968                                                      /* deprecated 1 */
1969                                                      /* dllexport 1 */
1970                                                      /* dllimport 1 */
1971                                                      "do",
1972                                                      "double",
1973                                                      "dynamic_cast",
1974                                                      "else",
1975                                                      "enum",
1976                                                      "enum class",
1977                                                      "enum struct",
1978                                                      "event",
1979                                                      "explicit",
1980                                                      "extern",
1981                                                      "false",
1982                                                      "finally",
1983                                                      "float",
1984                                                      "for",
1985                                                      "for each",
1986                                                      "in",
1987                                                      "friend",
1988                                                      "friend_as",
1989                                                      "gcnew",
1990                                                      "generic",
1991                                                      "goto",
1992                                                      "if",
1993                                                      "initonly",
1994                                                      "inline",
1995                                                      "int",
1996                                                      "interface class",
1997                                                      "interface struct",
1998                                                      "interior_ptr",
1999                                                      "literal",
2000                                                      "long",
2001                                                      "mutable",
2002                                                      /* naked 1 */
2003                                                      "namespace",
2004                                                      "new",
2005                                                      "new",
2006                                                      /* noinline 1 */
2007                                                      /* noreturn 1 */
2008                                                      /* nothrow 1 */
2009                                                      /* novtable 1 */
2010                                                      "nullptr",
2011                                                      "operator",
2012                                                      "private",
2013                                                      "property",
2014                                                      /* property 1 */
2015                                                      "protected",
2016                                                      "public",
2017                                                      "ref class",
2018                                                      "ref struct",
2019                                                      "register",
2020                                                      "reinterpret_cast",
2021                                                      "return",
2022                                                      "safecast",
2023                                                      "sealed",
2024                                                      /* selectany 1 */
2025                                                      "short",
2026                                                      "signed",
2027                                                      "sizeof",
2028                                                      "static",
2029                                                      "static_assert",
2030                                                      "static_cast",
2031                                                      "struct",
2032                                                      "switch",
2033                                                      "template",
2034                                                      "this",
2035                                                      /* thread 1 */
2036                                                      "throw",
2037                                                      "true",
2038                                                      "try",
2039                                                      "typedef",
2040                                                      "typeid",
2041                                                      "typeid",
2042                                                      "typename",
2043                                                      "union",
2044                                                      "unsigned",
2045                                                      "using" /* declaration */,
2046                                                      "using" /* directive */,
2047                                                      /* uuid 1 */
2048                                                      "value class",
2049                                                      "value struct",
2050                                                      "virtual",
2051                                                      "void",
2052                                                      "volatile",
2053                                                      "while"]).uniq.array;
2054 
2055         static immutable xattrCxxMicrosoft = [];
2056 
2057         static immutable keywordsNewObjectiveC = ["id",
2058                                                   "in",
2059                                                   "out", // Returned by reference
2060                                                   "inout", // Argument is used both to provide information and to get information back
2061                                                   "bycopy",
2062                                                   "byref", "oneway", "self",
2063                                                   "super", "@interface", "@end",
2064                                                   "@implementation", "@end",
2065                                                   "@interface", "@end",
2066                                                   "@implementation", "@end",
2067                                                   "@protoco", "@end", "@class" ];
2068 
2069         static immutable keywordsObjectiveC = keywordsC ~ keywordsNewObjectiveC;
2070         txtFKinds ~= new FKind("Objective-C", [], ["m", "h"], [], 0, [],
2071                                keywordsObjectiveC,
2072                                cCommentDelims,
2073                                defaultStringDelims,
2074                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven,
2075                                Lang.objectiveC);
2076 
2077         static immutable keywordsObjectiveCxx = keywordsCxx ~ keywordsNewObjectiveC;
2078         txtFKinds ~= new FKind("Objective-C++", [], ["mm", "h"], [], 0, [],
2079                                keywordsObjectiveCxx,
2080                                defaultCommentDelims,
2081                                defaultStringDelims,
2082                                FileContent.sourceCode,
2083                                FileKindDetection.equalsWhatsGiven,
2084                                Lang.objectiveCxx);
2085 
2086         static immutable keywordsSwift = ["break", "class", "continue", "default", "do", "else", "for", "func", "if", "import",
2087                               "in", "let", "return", "self", "struct", "super", "switch", "unowned", "var", "weak", "while",
2088                               "mutating", "extension"];
2089         auto opersOverflowSwift = opersC ~ [Op("&+"), Op("&-"), Op("&*"), Op("&/"), Op("&%")];
2090         auto builtinsSwift = ["print", "println"];
2091         auto kindSwift = new FKind("Swift", [], ["swift"], [], 0, [],
2092                                    keywordsSwift,
2093                                    cCommentDelims,
2094                                    defaultStringDelims,
2095                                    FileContent.sourceCode,
2096                                    FileKindDetection.equalsWhatsGiven,
2097                                    Lang.swift);
2098         kindSwift.builtins = builtinsSwift;
2099         kindSwift.opers = opersOverflowSwift;
2100         txtFKinds ~= kindSwift;
2101 
2102         static immutable keywordsCSharp = ["if"]; // TODO: Add keywords
2103         txtFKinds ~= new FKind("C#", [], ["cs"], [], 0, [], keywordsCSharp,
2104                                cCommentDelims,
2105                                defaultStringDelims,
2106                                FileContent.sourceCode,
2107                                FileKindDetection.equalsWhatsGiven,
2108                                Lang.cSharp);
2109 
2110         static immutable keywordsOCaml = ["and", "as", "assert", "begin", "class",
2111                                           "constraint", "do", "done", "downto", "else",
2112                                           "end", "exception", "external", "false", "for",
2113                                           "fun", "function", "functor", "if", "in",
2114                                           "include", "inherit", "inherit!", "initializer",
2115                                           "lazy", "let", "match", "method", "method!",
2116                                           "module", "mutable", "new", "object", "of",
2117                                           "open", "or",
2118                                           "private", "rec", "sig", "struct", "then", "to",
2119                                           "true", "try", "type",
2120                                           "val", "val!", "virtual",
2121                                           "when", "while", "with"];
2122         txtFKinds ~= new FKind("OCaml", [], ["ocaml"], [], 0, [], keywordsOCaml,
2123                                [Delim("(*", "*)")],
2124                                defaultStringDelims,
2125                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2126 
2127         txtFKinds ~= new FKind("Parrot", [], ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg", ], [], 0, [], keywordsOCaml,
2128                                [Delim("#"),
2129                                 Delim("^=", // TODO: Needs beginning of line instead of ^
2130                                       "=cut")],
2131                                defaultStringDelims,
2132                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2133 
2134         static immutable keywordsProlog = [];
2135         txtFKinds ~= new FKind("Prolog", [], ["pl", "pro", "P"], [], 0, [], keywordsProlog,
2136                                [],
2137                                [],
2138                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2139 
2140         auto opersD = [
2141             // Arithmetic
2142             Op("+", OpArity.binary, OpAssoc.LR, 10*2, "Add"),
2143             Op("-", OpArity.binary, OpAssoc.LR, 10*2, "Subtract"),
2144             Op("~", OpArity.binary, OpAssoc.LR, 10*2, "Concatenate"),
2145 
2146             Op("*", OpArity.binary, OpAssoc.LR, 11*2, "Multiply"),
2147             Op("/", OpArity.binary, OpAssoc.LR, 11*2, "Divide"),
2148             Op("%", OpArity.binary, OpAssoc.LR, 11*2, "Remainder/Moduls"),
2149 
2150             Op("++", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix increment"),
2151             Op("--", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix decrement"),
2152 
2153             Op("^^", OpArity.binary, OpAssoc.RL, 13*2, "Power"),
2154 
2155             Op("++", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix increment"),
2156             Op("--", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix decrement"),
2157             Op("&", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Address off"),
2158             Op("*", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Pointer Dereference"),
2159             Op("+", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Plus"),
2160             Op("-", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Minus"),
2161             Op("!", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Logical NOT"), // TODO: Convert to math in smallcaps NOT
2162             Op("~", OpArity.unaryPrefix, OpAssoc.LR, 12*2, "Bitwise NOT (One's Complement)"),
2163 
2164             // Bit shift
2165             Op("<<", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise left shift"),
2166             Op(">>", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise right shift"),
2167 
2168             // Comparison
2169             Op("==", OpArity.binary, OpAssoc.LR, 6*2, "Equal to"),
2170             Op("!=", OpArity.binary, OpAssoc.LR, 6*2, "Not equal to"),
2171             Op("<", OpArity.binary, OpAssoc.LR, 6*2, "Less than"),
2172             Op(">", OpArity.binary, OpAssoc.LR, 6*2, "Greater than"),
2173             Op("<=", OpArity.binary, OpAssoc.LR, 6*2, "Less than or equal to"),
2174             Op(">=", OpArity.binary, OpAssoc.LR, 6*2, "Greater than or equal to"),
2175             Op("in", OpArity.binary, OpAssoc.LR, 6*2, "In"),
2176             Op("!in", OpArity.binary, OpAssoc.LR, 6*2, "Not In"),
2177             Op("is", OpArity.binary, OpAssoc.LR, 6*2, "Is"),
2178             Op("!is", OpArity.binary, OpAssoc.LR, 6*2, "Not Is"),
2179 
2180             Op("&", OpArity.binary, OpAssoc.LR, 8*2, "Bitwise AND"),
2181             Op("^", OpArity.binary, OpAssoc.LR, 7*2, "Bitwise XOR (exclusive or)"),
2182             Op("|", OpArity.binary, OpAssoc.LR, 6*2, "Bitwise OR"),
2183 
2184             Op("&&", OpArity.binary, OpAssoc.LR, 5*2, "Logical AND"), // TODO: Convert to math in smallcaps AND
2185             Op("||", OpArity.binary, OpAssoc.LR, 4*2, "Logical OR"), // TODO: Convert to math in smallcaps OR
2186 
2187             // Assignment Arithmetic (binary)
2188             Op("=", OpArity.binary, OpAssoc.RL, 2*2, "Assign"),
2189             Op("+=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by sum"),
2190             Op("-=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by difference"),
2191             Op("*=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by product"),
2192             Op("/=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by quotient"),
2193             Op("%=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by remainder"),
2194             Op("&=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise AND"),
2195             Op("|=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise OR"),
2196             Op("^=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise XOR"),
2197             Op("<<=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise left shift"),
2198             Op(">>=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise right shift"),
2199 
2200             Op(",", OpArity.binary, OpAssoc.LR, 1*2, "Comma"),
2201             Op("..", OpArity.binary, OpAssoc.LR, cast(int)(0*2), "Range separator"),
2202             ];
2203 
2204         enum interpretersForD = ["rdmd",
2205                                  "gdmd"];
2206         auto magicForD = shebangLine(alt(lit("rdmd"),
2207                                          lit("gdmd")));
2208 
2209         static immutable keywordsD = [`@property`, `@safe`, `@trusted`, `@system`, `@disable`, `abstract`, `alias`, `align`, `asm`, `assert`, `auto`, `body`, `bool`, `break`, `byte`, `case`, `cast`, `catch`,
2210                                       `cdouble`, `cent`, `cfloat`, `char`, `class`, `const`, `continue`, `creal`, `dchar`, `debug`, `default`, `delegate`, `delete`, `deprecated`,
2211                                       `do`, `double`, `else`, `enum`, `export`, `extern`, `false`, `final`, `finally`, `float`, `for`, `foreach`, `foreach_reverse`,
2212                                       `function`, `goto`, `idouble`, `if`, `ifloat`, `immutable`, `import`, `in`, `inout`, `int`, `interface`, `invariant`, `ireal`,
2213                                       `is`, `lazy`, `long`, `macro`, `mixin`, `module`, `new`, `nothrow`, `null`, `out`, `override`, `package`, `pragma`, `private`,
2214                                       `protected`, `public`, `pure`, `real`, `ref`, `return`, `scope`, `shared`, `short`, `static`, `struct`, `super`, `switch`,
2215                                       `synchronized`, `template`, `this`, `throw`, `true`, `try`, `typedef`, `typeid`, `typeof`, `ubyte`, `ucent`, `uint`, `ulong`,
2216                                       `union`, `unittest`, `ushort`, `version`, `void`, `volatile`, `wchar`, `while`, `with`, `__gshared`,
2217                                       `__thread`, `__traits`,
2218                                       `string`, `wstring`, `dstring`, `size_t`, `hash_t`, `ptrdiff_t`, `equals_`]; // aliases
2219 
2220         static immutable builtinsD = [`toString`, `toHash`, `opCmp`, `opEquals`,
2221                           `opUnary`, `opBinary`, `opApply`, `opCall`, `opAssign`, `opIndexAssign`, `opSliceAssign`, `opOpAssign`,
2222                           `opIndex`, `opSlice`, `opDispatch`,
2223                           `toString`, `toHash`, `opCmp`, `opEquals`, `Monitor`, `factory`, `classinfo`, `vtbl`, `offset`, `getHash`, `equals`, `compare`, `tsize`, `swap`, `next`, `init`, `flags`, `offTi`, `destroy`, `postblit`, `toString`, `toHash`,
2224                           `factory`, `classinfo`, `Throwable`, `Exception`, `Error`, `capacity`, `reserve`, `assumeSafeAppend`, `clear`,
2225                           `ModuleInfo`, `ClassInfo`, `MemberInfo`, `TypeInfo`];
2226 
2227         static immutable propertiesD = [`sizeof`, `stringof`, `mangleof`, `nan`, `init`, `alignof`, `max`, `min`, `infinity`, `epsilon`, `mant_dig`, ``,
2228                             `max_10_exp`, `max_exp`, `min_10_exp`, `min_exp`, `min_normal`, `re`, `im`];
2229 
2230         static immutable specialsD = [`__FILE__`, `__LINE__`, `__DATE__`, `__EOF__`, `__TIME__`, `__TIMESTAMP__`, `__VENDOR__`, `__VERSION__`, `#line`];
2231 
2232         auto kindDInterface = new FKind("D Interface", [], ["di"],
2233                                         magicForD, 0,
2234                                         [],
2235                                         keywordsD,
2236                                         dCommentDelims,
2237                                         defaultStringDelims,
2238                                         FileContent.sourceCode,
2239                                         FileKindDetection.equalsNameOrContents,
2240                                         Lang.d);
2241         kindDInterface.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2242         kindDInterface.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths
2243         txtFKinds ~= kindDInterface;
2244 
2245         auto kindDDoc = new FKind("D Documentation", [], ["dd"],
2246                                   magicForD, 0,
2247                                   [],
2248                                   keywordsD,
2249                                   dCommentDelims,
2250                                   defaultStringDelims,
2251                                   FileContent.sourceCode,
2252                                   FileKindDetection.equalsNameOrContents);
2253         txtFKinds ~= kindDDoc;
2254 
2255         auto kindD = new FKind("D", [], ["d", "di"],
2256                                magicForD, 0,
2257                                [],
2258                                keywordsD,
2259                                dCommentDelims,
2260                                defaultStringDelims,
2261                                FileContent.sourceCode,
2262                                FileKindDetection.equalsNameOrContents,
2263                                Lang.d);
2264         kindD.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2265         kindD.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths
2266         txtFKinds ~= kindD;
2267 
2268         auto kindDi = new FKind("D Interface", [], ["di"],
2269                                 magicForD, 0,
2270                                 [],
2271                                 keywordsD,
2272                                 dCommentDelims,
2273                                 defaultStringDelims,
2274                                 FileContent.sourceCode,
2275                                 FileKindDetection.equalsNameOrContents,
2276                                 Lang.d);
2277         kindDi.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2278         kindDi.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths
2279         txtFKinds ~= kindDi;
2280 
2281         static immutable keywordsRust = ["as", "box", "break", "continue", "crate",
2282                                          "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in",
2283                                          "let", "loop", "match", "mod", "mut", "priv", "proc", "pub", "ref",
2284                                          "return", "self", "static", "struct", "super", "true", "trait",
2285                                          "type", "unsafe", "use", "while"];
2286 
2287         auto kindRust = new FKind("Rust", [], ["rs"],
2288                                   [], 0,
2289                                   [],
2290                                   keywordsRust,
2291                                   cCommentDelims,
2292                                   defaultStringDelims,
2293                                   FileContent.sourceCode,
2294                                   FileKindDetection.equalsNameOrContents,
2295                                   Lang.rust);
2296         txtFKinds ~= kindRust;
2297 
2298         static immutable keywordsFortran77 = ["if", "else"];
2299         // TODO: Support .h files but require it to contain some Fortran-specific or be parseable.
2300         auto kindFortan = new FKind("Fortran", [], ["f", "fortran", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], [], 0, [], keywordsFortran77,
2301                                     [Delim("^C")], // TODO: Need beginning of line instead ^. seq(bol(), alt(lit('C'), lit('c'))); // TODO: Add chars chs("cC");
2302                                     defaultStringDelims,
2303                                     FileContent.sourceCode,
2304                                     FileKindDetection.equalsNameOrContents,
2305                                     Lang.fortran);
2306         kindFortan.operations ~= tuple(FOp.checkSyntax, `gcc -x fortran -fsyntax-only`);
2307         txtFKinds ~= kindFortan;
2308 
2309         // Ada
2310         import nxt.ada_defs;
2311         static immutable keywordsAda83 = ada_defs.keywords83;
2312         static immutable keywordsAda95 = keywordsAda83 ~ ada_defs.keywordsNew95;
2313         static immutable keywordsAda2005 = keywordsAda95 ~ ada_defs.keywordsNew2005;
2314         static immutable keywordsAda2012 = keywordsAda2005 ~ ada_defs.keywordsNew2012;
2315         static immutable extsAda = ["ada", "adb", "ads"];
2316         txtFKinds ~= new FKind("Ada 82", [], extsAda, [], 0, [], keywordsAda83,
2317                                [Delim("--")],
2318                                defaultStringDelims,
2319                                FileContent.sourceCode);
2320         txtFKinds ~= new FKind("Ada 95", [], extsAda, [], 0, [], keywordsAda95,
2321                                [Delim("--")],
2322                                defaultStringDelims,
2323                                FileContent.sourceCode);
2324         txtFKinds ~= new FKind("Ada 2005", [], extsAda, [], 0, [], keywordsAda2005,
2325                                [Delim("--")],
2326                                defaultStringDelims,
2327                                FileContent.sourceCode);
2328         txtFKinds ~= new FKind("Ada 2012", [], extsAda, [], 0, [], keywordsAda2012,
2329                                [Delim("--")],
2330                                defaultStringDelims,
2331                                FileContent.sourceCode);
2332         txtFKinds ~= new FKind("Ada", [], extsAda, [], 0, [], keywordsAda2012,
2333                                [Delim("--")],
2334                                defaultStringDelims,
2335                                FileContent.sourceCode);
2336 
2337         auto aliKind = new FKind("Ada Library File", [], ["ali"], [], 0, `V "GNAT Lib v`, [],
2338                                  [], // N/A
2339                                  defaultStringDelims,
2340                                  FileContent.fingerprint); // TODO: Parse version following magic tag?
2341         aliKind.machineGenerated = true;
2342         txtFKinds ~= aliKind;
2343 
2344         txtFKinds ~= new FKind("Pascal", [], ["pas", "pascal"], [], 0, [], [],
2345                                [Delim("(*", "*)"),// Old-Style
2346                                 Delim("{", "}"),// Turbo Pascal
2347                                 Delim("//")],// Delphi
2348                                defaultStringDelims,
2349                                FileContent.sourceCode, FileKindDetection.equalsContents);
2350         txtFKinds ~= new FKind("Delphi", [], ["pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj"],
2351                                [], 0, [], [],
2352                                [Delim("//")],
2353                                defaultStringDelims,
2354                                FileContent.sourceCode, FileKindDetection.equalsContents);
2355 
2356         txtFKinds ~= new FKind("Objective-C", [], ["m"], [], 0, [], [],
2357                                cCommentDelims,
2358                                defaultStringDelims,
2359                                FileContent.sourceCode);
2360 
2361         static immutable keywordsPython = ["and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return",
2362                                "break", "else", "global", "not", "try", "class", "except", "if", "or", "while",
2363                                "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print"];
2364 
2365         // Scripting
2366 
2367         auto kindPython = new FKind("Python", [], ["py"],
2368                                     shebangLine(lit("python")), 0, [],
2369                                     keywordsPython,
2370                                     defaultCommentDelims,
2371                                     pythonStringDelims,
2372                                     FileContent.scriptCode);
2373         txtFKinds ~= kindPython;
2374 
2375         txtFKinds ~= new FKind("Ruby", [], ["rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec", ],
2376                                shebangLine(lit("ruby")), 0,
2377                                [], [],
2378                                [Delim("#"), Delim("=begin", "=end")],
2379                                defaultStringDelims,
2380                                FileContent.scriptCode);
2381 
2382         txtFKinds ~= new FKind("Scala", [], ["scala", ],
2383                                shebangLine(lit("scala")), 0,
2384                                [], [],
2385                                cCommentDelims,
2386                                defaultStringDelims,
2387                                FileContent.scriptCode);
2388         txtFKinds ~= new FKind("Scheme", [], ["scm", "ss"],
2389                                [], 0,
2390                                [], [],
2391                                [Delim(";")],
2392                                defaultStringDelims,
2393                                FileContent.scriptCode);
2394 
2395         txtFKinds ~= new FKind("Smalltalk", [], ["st"], [], 0, [], [],
2396                                [Delim("\"", "\"")],
2397                                defaultStringDelims,
2398                                FileContent.sourceCode);
2399 
2400         txtFKinds ~= new FKind("Perl", [], ["pl", "pm", "pm6", "pod", "t", "psgi", ],
2401                                shebangLine(lit("perl")), 0,
2402                                [], [],
2403                                defaultCommentDelims,
2404                                defaultStringDelims,
2405                                FileContent.scriptCode);
2406         txtFKinds ~= new FKind("PHP", [], ["php", "phpt", "php3", "php4", "php5", "phtml", ],
2407                                shebangLine(lit("php")), 0,
2408                                [], [],
2409                                defaultCommentDelims ~ cCommentDelims,
2410                                defaultStringDelims,
2411                                FileContent.scriptCode);
2412         txtFKinds ~= new FKind("Plone", [], ["pt", "cpt", "metadata", "cpy", "py", ], [], 0, [], [],
2413                                defaultCommentDelims,
2414                                defaultStringDelims,
2415                                FileContent.scriptCode);
2416 
2417         txtFKinds ~= new FKind("Shell", [], ["sh"],
2418                                shebangLine(lit("sh")), 0,
2419                                [], [],
2420                                defaultCommentDelims,
2421                                defaultStringDelims,
2422                                FileContent.scriptCode);
2423         txtFKinds ~= new FKind("Bash", [], ["bash"],
2424                                shebangLine(lit("bash")), 0,
2425                                [], [],
2426                                defaultCommentDelims,
2427                                defaultStringDelims,
2428                                FileContent.scriptCode);
2429         txtFKinds ~= new FKind("Zsh", [], ["zsh"],
2430                                shebangLine(lit("zsh")), 0,
2431                                [], [],
2432                                defaultCommentDelims,
2433                                defaultStringDelims,
2434                                FileContent.scriptCode);
2435 
2436         txtFKinds ~= new FKind("Batch", [], ["bat", "cmd"], [], 0, [], [],
2437                                [Delim("REM")],
2438                                defaultStringDelims,
2439                                FileContent.scriptCode);
2440 
2441         txtFKinds ~= new FKind("TCL", [], ["tcl", "itcl", "itk", ], [], 0, [], [],
2442                                defaultCommentDelims,
2443                                defaultStringDelims,
2444                                FileContent.scriptCode);
2445         txtFKinds ~= new FKind("Tex", [], ["tex", "cls", "sty", ], [], 0, [], [],
2446                                [Delim("%")],
2447                                defaultStringDelims,
2448                                FileContent.scriptCode);
2449         txtFKinds ~= new FKind("TT", [], ["tt", "tt2", "ttml", ], [], 0, [], [],
2450                                defaultCommentDelims,
2451                                defaultStringDelims,
2452                                FileContent.scriptCode);
2453         txtFKinds ~= new FKind("Viz Basic", [], ["bas", "cls", "frm", "ctl", "vb", "resx", ], [], 0, [], [],
2454                                [Delim("'")],
2455                                defaultStringDelims,
2456                                FileContent.scriptCode);
2457 
2458         txtFKinds ~= new FKind("Verilog", [], ["v", "vh", "sv"], [], 0, [], [],
2459                                cCommentDelims,
2460                                defaultStringDelims,
2461                                FileContent.scriptCode);
2462         txtFKinds ~= new FKind("VHDL", [], ["vhd", "vhdl"], [], 0, [], [],
2463                                [Delim("--")],
2464                                defaultStringDelims,
2465                                FileContent.scriptCode);
2466 
2467         txtFKinds ~= new FKind("Clojure", [], ["clj"], [], 0, [], [],
2468                                [Delim(";")],
2469                                defaultStringDelims,
2470                                FileContent.sourceCode);
2471         txtFKinds ~= new FKind("Go", [], ["go"], [], 0, [], [],
2472                                cCommentDelims,
2473                                defaultStringDelims,
2474                                FileContent.sourceCode);
2475 
2476         auto kindJava = new FKind("Java", [], ["java", "properties"], [], 0, [], [],
2477                                   cCommentDelims,
2478                                   defaultStringDelims,
2479                                   FileContent.sourceCode);
2480         txtFKinds ~= kindJava;
2481         kindJava.operations ~= tuple(FOp.byteCompile, `javac`);
2482 
2483         txtFKinds ~= new FKind("Groovy", [], ["groovy", "gtmpl", "gpp", "grunit"], [], 0, [], [],
2484                                cCommentDelims,
2485                                defaultStringDelims,
2486                                FileContent.sourceCode);
2487         txtFKinds ~= new FKind("Haskell", [], ["hs", "lhs"], [], 0, [], [],
2488                                [Delim("--}"),
2489                                 Delim("{-", "-}")],
2490                                defaultStringDelims,
2491                                FileContent.sourceCode);
2492 
2493         static immutable keywordsJavascript = ["break", "case", "catch", "continue", "debugger", "default", "delete",
2494                                                "do", "else", "finally", "for", "function", "if", "in", "instanceof",
2495                                                "new", "return", "switch", "this", "throw", "try", "typeof", "var",
2496                                                "void", "while", "with" ];
2497         txtFKinds ~= new FKind("JavaScript", [], ["js"],
2498                                [], 0, [],
2499                                keywordsJavascript,
2500                                cCommentDelims,
2501                                defaultStringDelims,
2502                                FileContent.scriptCode);
2503         txtFKinds ~= new FKind("JavaScript Object Notation",
2504                                [], ["json"],
2505                                [], 0, [], [],
2506                                [], // N/A
2507                                defaultStringDelims,
2508                                FileContent.sourceCode);
2509 
2510         auto dubFKind = new FKind("DUB",
2511                                   ["dub.json"], ["json"],
2512                                   [], 0, [], [],
2513                                   [], // N/A
2514                                   defaultStringDelims,
2515                                   FileContent.scriptCode);
2516         txtFKinds ~= dubFKind;
2517         dubFKind.operations ~= tuple(FOp.build, `dub`);
2518 
2519         // TODO: Inherit XML
2520         txtFKinds ~= new FKind("JSP", [], ["jsp", "jspx", "jhtm", "jhtml"], [], 0, [], [],
2521                                [Delim("<!--", "--%>"), // XML
2522                                 Delim("<%--", "--%>")],
2523                                defaultStringDelims,
2524                                FileContent.scriptCode);
2525 
2526         txtFKinds ~= new FKind("ActionScript", [], ["as", "mxml"], [], 0, [], [],
2527                                cCommentDelims, // N/A
2528                                defaultStringDelims,
2529                                FileContent.scriptCode);
2530 
2531         txtFKinds ~= new FKind("LUA", [], ["lua"], [], 0, [], [],
2532                                [Delim("--")],
2533                                defaultStringDelims,
2534                                FileContent.scriptCode);
2535         txtFKinds ~= new FKind("Mason", [], ["mas", "mhtml", "mpl", "mtxt"], [], 0, [], [],
2536                                [], // TODO: Need symbolic
2537                                defaultStringDelims,
2538                                FileContent.scriptCode);
2539 
2540         txtFKinds ~= new FKind("CFMX", [], ["cfc", "cfm", "cfml"], [], 0, [], [],
2541                                [], // N/A
2542                                defaultStringDelims,
2543                                FileContent.scriptCode);
2544 
2545         // Simulation
2546         static immutable keywordsModelica = ["algorithm", "discrete", "false", "loop", "pure",
2547                                              "and", "each", "final", "model", "record",
2548                                              "annotation", "else", "flow", "not", "redeclare",
2549                                              "elseif", "for", "operator", "replaceable",
2550                                              "block", "elsewhen", "function", "or", "return",
2551                                              "break", "encapsulated", "if", "outer", "stream",
2552                                              "class", "end", "import", "output", "then",
2553                                              "connect", "enumeration", "impure", "package", "true",
2554                                              "connector", "equation", "in", "parameter", "type",
2555                                              "constant", "expandable", "initial", "partial", "when",
2556                                              "constrainedby", "extends", "inner", "protected", "while",
2557                                              "der", "external", "input", "public", "within"];
2558         auto kindModelica = new FKind("Modelica", [], ["mo"], [], 0, [],
2559                                       keywordsModelica,
2560                                       cCommentDelims,
2561                                       defaultStringDelims,
2562                                       FileContent.sourceCode,
2563                                       FileKindDetection.equalsWhatsGiven,
2564                                       Lang.modelica);
2565 
2566         // Numerical Computing
2567 
2568         txtFKinds ~= new FKind("Matlab", [], ["m"], [], 0, [], [],
2569                                [Delim("%{", "}%"), // TODO: Prio 1
2570                                 Delim("%")], // TODO: Prio 2
2571                                defaultStringDelims,
2572                                FileContent.sourceCode);
2573         auto kindOctave = new FKind("Octave", [], ["m"], [], 0, [], [],
2574                                     [Delim("%{", "}%"), // TODO: Prio 1
2575                                      Delim("%"),
2576                                      Delim("#")],
2577                                     defaultStringDelims,
2578                                     FileContent.sourceCode);
2579         txtFKinds ~= kindOctave;
2580         kindOctave.operations ~= tuple(FOp.byteCompile, `octave`);
2581 
2582         txtFKinds ~= new FKind("Julia", [], ["jl"], [], 0, [], [],
2583                                defaultCommentDelims,
2584                                defaultStringDelims,
2585                                FileContent.sourceCode); // ((:execute "julia") (:evaluate "julia -e"))
2586 
2587         txtFKinds ~= new FKind("Erlang", [], ["erl", "hrl"], [], 0, [], [],
2588                                [Delim("%")],
2589                                defaultStringDelims,
2590                                FileContent.sourceCode);
2591 
2592         auto magicForElisp = seq(shebangLine(lit("emacs")),
2593                                  ws(),
2594                                  lit("--script"));
2595         auto kindElisp = new FKind("Emacs-Lisp", [],
2596                                    ["el", "lisp"],
2597                                    magicForElisp, 0, // Script Execution
2598                                    [], [],
2599                                    [Delim(";")],
2600                                    defaultStringDelims,
2601                                    FileContent.sourceCode);
2602         kindElisp.operations ~= tuple(FOp.byteCompile, `emacs -batch -f batch-byte-compile`);
2603         kindElisp.operations ~= tuple(FOp.byteCompile, `emacs --script`);
2604         /* kindELisp.moduleName = "(provide 'MODULE_NAME)"; */
2605         /* kindELisp.moduleImport = "(require 'MODULE_NAME)"; */
2606         txtFKinds ~= kindElisp;
2607 
2608         txtFKinds ~= new FKind("Lisp", [], ["lisp", "lsp"], [], 0, [], [],
2609                                [Delim(";")],
2610                                defaultStringDelims,
2611                                FileContent.sourceCode);
2612         txtFKinds ~= new FKind("PostScript", [], ["ps", "postscript"], [], 0, "%!", [],
2613                                [Delim("%")],
2614                                defaultStringDelims,
2615                                FileContent.sourceCode);
2616 
2617         txtFKinds ~= new FKind("CMake", [], ["cmake"], [], 0, [], [],
2618                                defaultCommentDelims,
2619                                defaultStringDelims,
2620                                FileContent.sourceCode);
2621 
2622         // http://stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary
2623         txtFKinds ~= new FKind("Pure ASCII", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [],
2624                                [], // N/A
2625                                defaultStringDelims,
2626                                FileContent.textASCII); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126
2627         txtFKinds ~= new FKind("8-Bit Text", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [],
2628                                [], // N/A
2629                                defaultStringDelims,
2630                                FileContent.text8Bit); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 or 128–255
2631 
2632         txtFKinds ~= new FKind("Assembler", [], ["asm", "s"], [], 0, [], [],
2633                                [], // N/A
2634                                defaultStringDelims,
2635                                FileContent.sourceCode);
2636 
2637         // https://en.wikipedia.org/wiki/Diff
2638         auto diffKind = new FKind("Diff", [], ["diff", "patch"],
2639                                   "diff", 0,
2640                                   [], [],
2641                                   [], // N/A
2642                                   defaultStringDelims,
2643                                   FileContent.text);
2644         txtFKinds ~= diffKind;
2645         diffKind.wikip = "https://en.wikipedia.org/wiki/Diff";
2646 
2647         auto pemCertKind = new FKind(`PEM certificate`, [], [`cert`],
2648                                      `-----BEGIN CERTIFICATE-----`, 0,
2649                                      [], [],
2650                                      [], // N/A
2651                                      [], // N/A
2652                                      FileContent.text,
2653                                      FileKindDetection.equalsContents);
2654         txtFKinds ~= pemCertKind;
2655 
2656         auto pemCertReqKind = new FKind(`PEM certificate request`, [], [`cert`],
2657                                         `-----BEGIN CERTIFICATE REQ`, 0,
2658                                         [], [],
2659                                         [], // N/A
2660                                         [], // N/A
2661                                         FileContent.text,
2662                                         FileKindDetection.equalsContents);
2663         txtFKinds ~= pemCertReqKind;
2664 
2665         auto pemRSAPrivateKeyKind = new FKind(`PEM RSA private key`, [], [`cert`],
2666                                               `-----BEGIN RSA PRIVATE`, 0,
2667                                               [], [],
2668                                               [], // N/A
2669                                               [], // N/A
2670                                               FileContent.text,
2671                                               FileKindDetection.equalsContents);
2672         txtFKinds ~= pemRSAPrivateKeyKind;
2673 
2674         auto pemDSAPrivateKeyKind = new FKind(`PEM DSA private key`, [], [`cert`],
2675                                               `-----BEGIN DSA PRIVATE`, 0,
2676                                               [], [],
2677                                               [], // N/A
2678                                               [], // N/A
2679                                               FileContent.text,
2680                                               FileKindDetection.equalsContents);
2681         txtFKinds ~= pemDSAPrivateKeyKind;
2682 
2683         auto pemECPrivateKeyKind = new FKind(`PEM EC private key`, [], [`cert`],
2684                                               `-----BEGIN EC PRIVATE`, 0,
2685                                               [], [],
2686                                               [], // N/A
2687                                               [], // N/A
2688                                               FileContent.text,
2689                                               FileKindDetection.equalsContents);
2690         txtFKinds ~= pemECPrivateKeyKind;
2691 
2692         // Binaries
2693 
2694         static immutable extsELF = ["o", "so", "ko", "os", "out", "bin", "x", "elf", "axf", "prx", "puff", "none"]; // ELF file extensions
2695 
2696         auto elfKind = new FKind("ELF",
2697                                  [], extsELF, x"7F 45 4C 46", 0, [], [],
2698                                  [], // N/A
2699                                  [], // N/A
2700                                  FileContent.machineCode,
2701                                  FileKindDetection.equalsContents);
2702         elfKind.wikip = "https://en.wikipedia.org/wiki/Executable_and_Linkable_Format";
2703         binFKinds ~= elfKind;
2704         /* auto extsExeELF = ["out", "bin", "x", "elf", ]; // ELF file extensions */
2705         /* auto elfExeKind  = new FKind("ELF executable",    [], extsExeELF,  [0x2, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2706         /* auto elfSOKind   = new FKind("ELF shared object", [], ["so", "ko"],  [0x3, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2707         /* auto elfCoreKind = new FKind("ELF core file",     [], ["core"], [0x4, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2708         /* binFKinds ~= elfExeKind; */
2709         /* elfKind.subKinds ~= elfSOKind; */
2710         /* elfKind.subKinds ~= elfCoreKind; */
2711         /* elfKind.subKinds ~= elfKind; */
2712 
2713         // TODO: Specialize to not steal results from file's magics.
2714         auto linuxFirmwareKind = new FKind("Linux Firmware",
2715                                  [], ["bin", "ucode", "dat", "sbcf", "fw"], [], 0, [], [],
2716                                  [], // N/A
2717                                  [], // N/A
2718                                  FileContent.binaryUnknown,
2719                                  FileKindDetection.equalsParentPathDirsAndName);
2720         linuxFirmwareKind.parentPathDirs = ["lib", "firmware"];
2721         binFKinds ~= linuxFirmwareKind;
2722 
2723         // TODO: Specialize to not steal results from file's magics.
2724         auto linuxHwDbKind = new FKind("Linux Hardware Database Index",
2725                                        "hwdb.bin", ["bin"], "KSLPHHRH", 0, [], [],
2726                                        [], // N/A
2727                                        [], // N/A
2728                                        FileContent.binaryUnknown,
2729                                        FileKindDetection.equalsNameAndContents);
2730         binFKinds ~= linuxHwDbKind;
2731 
2732         // Executables
2733         binFKinds ~= new FKind("Mach-O", [], ["o"], x"CE FA ED FE", 0, [], [],
2734                                [], // N/A
2735                                [], // N/A
2736                                FileContent.machineCode, FileKindDetection.equalsContents);
2737 
2738         binFKinds ~= new FKind("modules.symbols.bin", [], ["bin"],
2739                                cast(ubyte[])[0xB0, 0x07, 0xF4, 0x57, 0x00, 0x02, 0x00, 0x01, 0x20], 0, [], [],
2740                                [], // N/A
2741                                [], // N/A
2742                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
2743 
2744         auto kindCOFF = new FKind("COFF/i386/32", [], ["o"], x"4C 01", 0, [], [],
2745                                   [], // N/A
2746                                   [], // N/A
2747                                   FileContent.machineCode, FileKindDetection.equalsContents);
2748         kindCOFF.description = "Common Object File Format";
2749         binFKinds ~= kindCOFF;
2750 
2751         auto kindPECOFF = new FKind("PE/COFF", [], ["cpl", "exe", "dll", "ocx", "sys", "scr", "drv", "obj"],
2752                                     "PE\0\0", 0x60, // And ("MZ") at offset 0x0
2753                                     [], [],
2754                                     [], // N/A
2755                                     [], // N/A
2756                                     FileContent.machineCode, FileKindDetection.equalsContents);
2757         kindPECOFF.description = "COFF Portable Executable";
2758         binFKinds ~= kindPECOFF;
2759 
2760         auto kindDOSMZ = new FKind("DOS-MZ", [], ["exe", "dll"], "MZ", 0, [], [],
2761                                    [], // N/A
2762                                    [], // N/A
2763                                    FileContent.machineCode);
2764         kindDOSMZ.description = "MS-DOS, OS/2 or MS Windows executable";
2765         binFKinds ~= kindDOSMZ;
2766 
2767         // Caches
2768         binFKinds ~= new FKind("ld.so.cache", [], ["cache"], "ld.so-", 0, [], [],
2769                                [], // N/A
2770                                [], // N/A
2771                                FileContent.binaryCache);
2772 
2773         // Profile Data
2774         binFKinds ~= new FKind("perf benchmark data", [], ["data"], "PERFILE2h", 0, [], [],
2775                                [], // N/A
2776                                [], // N/A
2777                                FileContent.performanceBenchmark);
2778 
2779         // Images
2780         binFKinds ~= new FKind("GIF87a", [], ["gif"], "GIF87a", 0, [], [],
2781                                [], // N/A
2782                                [], // N/A
2783                                FileContent.image);
2784         binFKinds ~= new FKind("GIF89a", [], ["gif"], "GIF89a", 0, [], [],
2785                                [], // N/A
2786                                [], // N/A
2787                                FileContent.image);
2788         auto extJPEG = ["jpeg", "jpg", "j2k", "jpeg2000"];
2789         binFKinds ~= new FKind("JPEG", [], extJPEG, x"FF D8", 0, [], [],
2790                                [], // N/A
2791                                [], // N/A
2792                                FileContent.image); // TODO: Support ends with [0xFF, 0xD9]
2793         binFKinds ~= new FKind("JPEG/JFIF", [], extJPEG, x"FF D8", 0, [], [],
2794                                [], // N/A
2795                                [], // N/A
2796                                FileContent.image); // TODO: Support ends with ['J','F','I','F', 0x00]
2797         binFKinds ~= new FKind("JPEG/Exif", [], extJPEG, x"FF D8", 0, [], [],
2798                                [], // N/A
2799                                [], // N/A
2800                                FileContent.image); // TODO: Support contains ['E','x','i','f', 0x00] followed by metadata
2801 
2802         binFKinds ~= new FKind("Pack200-Compressed Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [],
2803                                [], // N/A
2804                                [], // N/A
2805                                FileContent.machineCode);
2806 
2807         binFKinds ~= new FKind("JRun Server Application", [], ["jsa"],
2808                                cast(ubyte[])[0xa2,0xab,0x0b,0xf0,
2809                                              0x01,0x00,0x00,0x00,
2810                                              0x00,0x00,0x20,0x00], 0, [], [],
2811                                [], // N/A
2812                                [], // N/A
2813                                FileContent.machineCode);
2814 
2815         binFKinds ~= new FKind("PNG", [], ["png"],
2816                                cast(ubyte[])[137, 80, 78, 71, 13, 10, 26, 10], 0, [], [],
2817                                [], // N/A
2818                                [], // N/A
2819                                FileContent.image);
2820 
2821         auto icnsKind = new FKind("Apple Icon Image", [], ["icns"],
2822                                   "icns", 0, [], [],
2823                                   [], // N/A
2824                                   [], // N/A
2825                                   FileContent.imageIcon);
2826         icnsKind.wikip = "https://en.wikipedia.org/wiki/Apple_Icon_Image_format";
2827         binFKinds ~= icnsKind;
2828         // TODO: read with http://icns.sourceforge.net/
2829 
2830         auto kindPDF = new FKind("PDF", [], ["pdf"], "%PDF", 0, [], [],
2831                                  [], // N/A
2832                                  [], // N/A
2833                                  FileContent.document);
2834         kindPDF.description = "Portable Document Format";
2835         binFKinds ~= kindPDF;
2836 
2837         auto kindMarkdownFmt = new FKind("Markdown", [], ["md", "markdown"],
2838                                          [], 0,
2839                                          [], [],
2840                                          [], // N/A
2841                                          defaultStringDelims,
2842                                          FileContent.binaryCache);
2843         kindMarkdownFmt.wikip = "https://en.wikipedia.org/wiki/Markdown";
2844         binFKinds ~= kindMarkdownFmt;
2845 
2846         auto kindAsciiDocFmt = new FKind("AsciiDoc", [], ["ad", "adoc", "asciidoc"],
2847                                          [], 0,
2848                                          [], [],
2849                                          [], // N/A
2850                                          defaultStringDelims,
2851                                          FileContent.binaryCache);
2852         binFKinds ~= kindAsciiDocFmt;
2853 
2854         auto kindLatexPDFFmt = new FKind("LaTeX PDF Format", [], ["fmt"],
2855                                          cast(ubyte[])['W','2','T','X',
2856                                                        0x00,0x00,0x00,0x08,
2857                                                        0x70,0x64,0x66,0x74,
2858                                                        0x65,0x78], 0, [], [],
2859                                          [], // N/A
2860                                          defaultStringDelims,
2861                                          FileContent.binaryCache);
2862         binFKinds ~= kindLatexPDFFmt;
2863 
2864         binFKinds ~= new FKind("Microsoft Office Document", [], ["doc", "docx", "xls", "ppt"], x"D0 CF 11 E0", 0, [], [],
2865                                [], // N/A
2866                                defaultStringDelims,
2867                                FileContent.document);
2868 
2869         // Fonts
2870 
2871         auto kindTTF = new FKind("TrueType Font", [], ["ttf"], x"00 01 00 00 00", 0, [], [],
2872                                  [], // N/A
2873                                  defaultStringDelims,
2874                                  FileContent.font);
2875         binFKinds ~= kindTTF;
2876 
2877         auto kindTTCF = new FKind("TrueType/OpenType Font Collection", [], ["ttc"], "ttcf", 0, [], [],
2878                                   [], // N/A
2879                                   defaultStringDelims,
2880                                   FileContent.font);
2881         binFKinds ~= kindTTCF;
2882 
2883         auto kindWOFF = new FKind("Web Open Font", [], ["woff"], "wOFF", 0, [], [],
2884                                   [], // N/A
2885                                   defaultStringDelims,
2886                                   FileContent.font); // TODO: container for kindSFNT
2887         binFKinds ~= kindWOFF;
2888 
2889         auto kindSFNT = new FKind("Spline Font", [], ["sfnt"], "sfnt", 0, [], [],
2890                                   [], // N/A
2891                                   defaultStringDelims,
2892                                   FileContent.font); // TODO: container for Sfnt
2893         binFKinds ~= kindSFNT;
2894 
2895         // Audio
2896 
2897         binFKinds ~= new FKind("MIDI", [], ["mid", "midi"], "MThd", 0, [], [],
2898                                [], // N/A
2899                                defaultStringDelims,
2900                                FileContent.audio, FileKindDetection.equalsNameAndContents);
2901 
2902         // Au
2903         auto auKind = new FKind("Au", [], ["au", "snd"], ".snd", 0, [], [],
2904                                 [], // N/A
2905                                 defaultStringDelims,
2906                                 FileContent.audio, FileKindDetection.equalsNameAndContents);
2907         auKind.wikip = "https://en.wikipedia.org/wiki/Au_file_format";
2908         binFKinds ~= auKind;
2909 
2910         binFKinds ~= new FKind("Ogg", [], ["ogg", "oga", "ogv"],
2911                                cast(ubyte[])[0x4F,0x67,0x67,0x53,
2912                                              0x00,0x02,0x00,0x00,
2913                                              0x00,0x00,0x00,0x00,
2914                                              0x00, 0x00], 0, [], [],
2915                                [], // N/A
2916                                defaultStringDelims,
2917                                FileContent.media);
2918 
2919         // TODO: Support RIFF....WAVEfmt using symbolic seq(lit("RIFF"), any(4), lit("WAVEfmt"))
2920         binFKinds ~= new FKind("WAV", [], ["wav", "wave"], "RIFF", 0, [], [],
2921                                [], // N/A
2922                                defaultStringDelims,
2923                                FileContent.audio, FileKindDetection.equalsContents);
2924 
2925         // Archives
2926 
2927         auto kindBSDAr = new FKind("BSD Archive", [], ["a", "ar"], "!<arch>\n", 0, [], [],
2928                                    [], // N/A
2929                                    defaultStringDelims,
2930                                    FileContent.archive, FileKindDetection.equalsContents);
2931         kindBSDAr.description = "BSD 4.4 and Mac OSX Archive";
2932         binFKinds ~= kindBSDAr;
2933 
2934         binFKinds ~= new FKind("GNU tar Archive", [], ["tar"], "ustar\040\040\0", 257, [], [],
2935                                [], // N/A
2936                                defaultStringDelims,
2937                                FileContent.archive, FileKindDetection.equalsContents); // TODO: Specialized Derivation of "POSIX tar Archive"
2938         binFKinds ~= new FKind("POSIX tar Archive", [], ["tar"], "ustar\0", 257, [], [],
2939                                [], // N/A
2940                                defaultStringDelims,
2941                                FileContent.archive, FileKindDetection.equalsContents);
2942 
2943         binFKinds ~= new FKind("pkZip Archive", [], ["zip", "jar", "pptx", "docx", "xlsx"], "PK\003\004", 0, [], [],
2944                                [], // N/A
2945                                defaultStringDelims,
2946                                FileContent.archive, FileKindDetection.equalsContents);
2947         binFKinds ~= new FKind("pkZip Archive (empty)", [], ["zip", "jar"], "PK\005\006", 0, [], [],
2948                                [], // N/A
2949                                defaultStringDelims,
2950                                FileContent.archive, FileKindDetection.equalsContents);
2951 
2952         binFKinds ~= new FKind("PAK file", [], ["pak"], cast(ubyte[])[0x40, 0x00, 0x00, 0x00,
2953                                                                       0x4a, 0x12, 0x00, 0x00,
2954                                                                       0x01, 0x2d, 0x23, 0xcb,
2955                                                                       0x6d, 0x00, 0x00, 0x2f], 0, [], [],
2956                                [], // N/A
2957                                defaultStringDelims,
2958                                FileContent.spellCheckWordList,
2959                                FileKindDetection.equalsNameAndContents);
2960 
2961         binFKinds ~= new FKind("LZW-Compressed", [], ["z", "tar.z"], x"1F 9D", 0, [], [],
2962                                [], // N/A
2963                                defaultStringDelims,
2964                                FileContent.compressed);
2965         binFKinds ~= new FKind("LZH-Compressed", [], ["z", "tar.z"], x"1F A0", 0, [], [],
2966                                [], // N/A
2967                                defaultStringDelims,
2968                                FileContent.compressed);
2969 
2970         binFKinds ~= new FKind("CompressedZ", [], ["z"], "\037\235", 0, [], [],
2971                                [], // N/A
2972                                defaultStringDelims,
2973                                FileContent.compressed);
2974         binFKinds ~= new FKind("GNU-Zip (gzip)", [], ["tgz", "gz", "gzip", "dz"], "\037\213", 0, [], [],
2975                                [], // N/A
2976                                defaultStringDelims,
2977                                FileContent.compressed);
2978         binFKinds ~= new FKind("BZip", [], ["bz2", "bz", "tbz2", "bzip2"], "BZh", 0, [], [],
2979                                [], // N/A
2980                                defaultStringDelims,
2981                                FileContent.compressed);
2982         binFKinds ~= new FKind("XZ/7-Zip", [], ["xz", "txz", "7z", "t7z", "lzma", "tlzma", "lz", "tlz"],
2983                                cast(ubyte[])[0xFD, '7', 'z', 'X', 'Z', 0x00], 0, [], [],
2984                                [], // N/A
2985                                defaultStringDelims,
2986                                FileContent.compressed);
2987         binFKinds ~= new FKind("LZX", [], ["lzx"], "LZX", 0, [], [],
2988                                [], // N/A
2989                                defaultStringDelims,
2990                                FileContent.compressed);
2991         binFKinds ~= new FKind("SZip", [], ["szip"], "SZ\x0a\4", 0, [], [],
2992                                [], // N/A
2993                                defaultStringDelims,
2994                                FileContent.compressed);
2995 
2996         binFKinds ~= new FKind("Git Bundle", [], ["bundle"], "# v2 git bundle", 0, [], [],
2997                                [], // N/A
2998                                defaultStringDelims,
2999                                FileContent.versionControl);
3000 
3001         binFKinds ~= new FKind("Emacs-Lisp Bytes Code", [], ["elc"], ";ELC\27\0\0\0", 0, [], [],
3002                                [], // N/A
3003                                defaultStringDelims,
3004                                FileContent.byteCode, FileKindDetection.equalsContents);
3005         binFKinds ~= new FKind("Python Bytes Code", [], ["pyc"], x"0D 0A", 2, [], [],
3006                                [], // N/A
3007                                defaultStringDelims,
3008                                FileContent.byteCode, FileKindDetection.equalsNameAndContents); // TODO: Handle versions at src[0..2]
3009 
3010         binFKinds ~= new FKind("Zshell Wordcode", [], ["zwc"], x"07 06 05 04", 0, [], [],
3011                                [], // N/A
3012                                defaultStringDelims,
3013                                FileContent.byteCode);
3014 
3015         binFKinds ~= new FKind("Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [],
3016                                [], // N/A
3017                                defaultStringDelims,
3018                                FileContent.byteCode, FileKindDetection.equalsContents);
3019         binFKinds ~= new FKind("Java KeyStore", [], [], x"FE ED FE ED", 0, [], [],
3020                                [], // N/A
3021                                defaultStringDelims,
3022                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
3023         binFKinds ~= new FKind("Java JCE KeyStore", [], [], x"CE CE CE CE", 0, [], [],
3024                                [], // N/A
3025                                defaultStringDelims,
3026                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
3027 
3028         binFKinds ~= new FKind("LLVM Bitcode", [], ["bc"], "BC", 0, [], [],
3029                                [], // N/A
3030                                defaultStringDelims,
3031                                FileContent.byteCode, FileKindDetection.equalsNameAndContents);
3032 
3033         binFKinds ~= new FKind("MATLAB MAT", [], ["mat"], "MATLAB 5.0 MAT-file", 0, [], [],
3034                                [], // N/A
3035                                defaultStringDelims,
3036                                FileContent.numericalData, FileKindDetection.equalsContents);
3037 
3038         auto hdf4Kind = new FKind("HDF4", [], ["hdf", "h4", "hdf4", "he4"], x"0E 03 13 01", 0, [], [],
3039                                   [], // N/A
3040                                   defaultStringDelims,
3041                                   FileContent.numericalData);
3042         binFKinds ~= hdf4Kind;
3043         hdf4Kind.description = "Hierarchical Data Format version 4";
3044 
3045         auto hdf5Kind = new FKind("HDF5", "Hierarchical Data Format version 5", ["hdf", "h5", "hdf5", "he5"], x"89 48 44 46 0D 0A 1A 0A", 0, [], [],
3046                                   [], // N/A
3047                                   defaultStringDelims,
3048                                   FileContent.numericalData);
3049         binFKinds ~= hdf5Kind;
3050         hdf5Kind.description = "Hierarchical Data Format version 5";
3051 
3052         auto numpyKind = new FKind("NUMPY", "NUMPY", ["npy", "numpy"], x"93 4E 55 4D 50 59", 0, [], [],
3053                                   [], // N/A
3054                                   defaultStringDelims,
3055                                   FileContent.numericalData);
3056         binFKinds ~= numpyKind;
3057 
3058         binFKinds ~= new FKind("GNU GLOBAL Database", ["GTAGS", "GRTAGS", "GPATH", "GSYMS"], [], "b1\5\0", 0, [], [],
3059                                [], // N/A
3060                                defaultStringDelims,
3061                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3062 
3063         // SQLite
3064         static immutable extsSQLite = ["sql", "sqlite", "sqlite3"];
3065         binFKinds ~= new FKind("MySQL table definition file", [], extsSQLite, x"FE 01", 0, [], [],
3066                                [], // N/A
3067                                defaultStringDelims,
3068                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3069         binFKinds ~= new FKind("MySQL MyISAM index file", [], extsSQLite, x"FE FE 07", 0, [], [],
3070                                [], // N/A
3071                                defaultStringDelims,
3072                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3073         binFKinds ~= new FKind("MySQL MyISAM compressed data file", [], extsSQLite, x"FE FE 08", 0, [], [],
3074                                [], // N/A
3075                                defaultStringDelims,
3076                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3077         binFKinds ~= new FKind("MySQL Maria index file", [], extsSQLite, x"FF FF FF", 0, [], [],
3078                                [], // N/A
3079                                defaultStringDelims,
3080                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3081         binFKinds ~= new FKind("MySQL Maria compressed data file", [], extsSQLite, x"FF FF FF", 0, [], [],
3082                                [], // N/A
3083                                defaultStringDelims,
3084                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3085         binFKinds ~= new FKind("SQLite format 3", [], extsSQLite , "SQLite format 3", 0, [], [],
3086                                [], // N/A
3087                                defaultStringDelims,
3088                                FileContent.tagsDatabase, FileKindDetection.equalsContents); // TODO: Why is this detected at 49:th try?
3089 
3090         binFKinds ~= new FKind("Vim swap", [], ["swo"], [], 0, "b0VIM ", [],
3091                                [], // N/A
3092                                defaultStringDelims,
3093                                FileContent.binaryCache);
3094 
3095         binFKinds ~= new FKind("PCH", "(GCC) Precompiled header", ["pch", "gpch"], "gpch", 0, [], [],
3096                                [], // N/A
3097                                defaultStringDelims,
3098                                FileContent.cache);
3099 
3100         binFKinds ~= new FKind("Firmware", [], ["fw"], cast(ubyte[])[], 0, [], [],
3101                                [], // N/A
3102                                defaultStringDelims,
3103                                FileContent.cache, FileKindDetection.equalsName); // TODO: Add check for binary contents and that some parenting directory is named "firmware"
3104 
3105         binFKinds ~= new FKind("LibreOffice or OpenOffice RDB", [], ["rdb"],
3106                                cast(ubyte[])[0x43,0x53,0x4d,0x48,
3107                                              0x4a,0x2d,0xd0,0x26,
3108                                              0x00,0x02,0x00,0x00,
3109                                              0x00,0x02,0x00,0x02], 0, [], [],
3110                                [], // N/A
3111                                defaultStringDelims,
3112                                FileContent.database, FileKindDetection.equalsName); // TODO: Add check for binary contents and that some parenting directory is named "firmware"
3113 
3114         binFKinds ~= new FKind("sconsign", [], ["sconsign", "sconsign.dblite", "dblite"], x"7d 71 01 28", 0, [], [],
3115                                [], // N/A
3116                                defaultStringDelims,
3117                                FileContent.cache, FileKindDetection.equalsNameAndContents);
3118 
3119         binFKinds ~= new FKind("GnuPG (GPG) key public ring", [], ["gpg"], x"99 01", 0, [], [],
3120                                [], // N/A
3121                                defaultStringDelims,
3122                                FileContent.binary, FileKindDetection.equalsNameOrContents);
3123         binFKinds ~= new FKind("GnuPG (GPG) encrypted data", [], [], x"85 02", 0, [], [],
3124                                [], // N/A
3125                                defaultStringDelims,
3126                                FileContent.binary, FileKindDetection.equalsContents);
3127         binFKinds ~= new FKind("GNUPG (GPG) key trust database", [], [], "\001gpg", 0, [], [],
3128                                [], // N/A
3129                                defaultStringDelims,
3130                                FileContent.binary, FileKindDetection.equalsContents);
3131 
3132         binFKinds ~= new FKind("aspell word list (rowl)", [], ["rws"], "aspell default speller rowl ", 0, [], [],
3133                                [], // N/A
3134                                defaultStringDelims,
3135                                FileContent.spellCheckWordList, FileKindDetection.equalsNameAndContents);
3136 
3137         binFKinds ~= new FKind("DS_Store", ".DS_Store", [], "Mac OS X Desktop Services Store ", 0, [], [],
3138                                [], // N/A
3139                                [],
3140                                FileContent.binary, FileKindDetection.equalsName);
3141 
3142         /* Fax image created in the CCITT Group 3 compressed format, which is
3143          * used for digital transmission of fax data and supports 1 bit per
3144          * pixel
3145          */
3146         binFKinds ~= new FKind("CCITT Group 3 compressed format", [], // TODO: Altenative name: Digifax-G3, G3 Fax
3147                                ["g3", "G3"],
3148                                "PC Research, Inc", 0, [], [],
3149                                [], // N/A
3150                                [],
3151                                FileContent.imageModemFax1BPP, FileKindDetection.equalsContents);
3152 
3153         binFKinds ~= new FKind("Raw Modem Data version 1", [],
3154                                ["rmd1"],
3155                                "RMD1", 0, [], [],
3156                                [], // N/A
3157                                [],
3158                                FileContent.modemData, FileKindDetection.equalsContents);
3159 
3160         binFKinds ~= new FKind("Portable voice format 1", [],
3161                                ["pvf1"],
3162                                "PVF1\n", 0, [], [],
3163                                [], // N/A
3164                                [],
3165                                FileContent.voiceModem, FileKindDetection.equalsContents);
3166 
3167         binFKinds ~= new FKind("Portable voice format 2", [],
3168                                ["pvf2"],
3169                                "PVF2\n", 0, [], [],
3170                                [], // N/A
3171                                [],
3172                                FileContent.voiceModem, FileKindDetection.equalsContents);
3173 
3174         allFKinds ~= txtFKinds;
3175         allFKinds ~= binFKinds;
3176 
3177         assert(allFKinds.byIndex.length ==
3178                (txtFKinds.byIndex.length +
3179                 binFKinds.byIndex.length));
3180 
3181         assert(allFKinds.byId.length ==
3182                (txtFKinds.byId.length +
3183                 binFKinds.byId.length));
3184 
3185         txtFKinds.rehash;
3186         binFKinds.rehash;
3187         allFKinds.rehash;
3188     }
3189 
3190     // Code
3191 
3192     // Interpret Command Line
3193     void loadDirKinds()
3194     {
3195         vcDirKinds ~= new DirKind(".git", "Git");
3196         vcDirKinds ~= new DirKind(".svn", "Subversion (Svn)");
3197         vcDirKinds ~= new DirKind(".bzr", "Bazaar (Bzr)");
3198         vcDirKinds ~= new DirKind("RCS", "RCS");
3199         vcDirKinds ~= new DirKind("CVS", "CVS");
3200         vcDirKinds ~= new DirKind("MCVS", "MCVS");
3201         vcDirKinds ~= new DirKind("RCS", "RCS");
3202         vcDirKinds ~= new DirKind(".hg", "Mercurial (Hg)");
3203         vcDirKinds ~= new DirKind("SCCS", "SCCS");
3204         vcDirKinds ~= new DirKind(".wact", "WACT");
3205         vcDirKinds ~= new DirKind("_MTN", "Monotone");
3206         vcDirKinds ~= new DirKind("_darcs", "Darcs");
3207         vcDirKinds ~= new DirKind("{arch}", "Arch");
3208 
3209         skippedDirKinds ~= vcDirKinds;
3210 
3211         DirKind[string] vcDirKindsMap_;
3212         foreach (kind; vcDirKinds)
3213         {
3214             vcDirKindsMap[kind.fileName] = kind;
3215         }
3216         vcDirKindsMap.rehash;
3217 
3218         skippedDirKinds ~= new DirKind(".trash",  "Trash");
3219         skippedDirKinds ~= new DirKind(".undo",  "Undo");
3220         skippedDirKinds ~= new DirKind(".deps",  "Dependencies");
3221         skippedDirKinds ~= new DirKind(".backups",  "Backups");
3222         skippedDirKinds ~= new DirKind(".autom4te.cache",  "Automake Cache");
3223 
3224         foreach (kind; skippedDirKinds) { skippedDirKindsMap[kind.fileName] = kind; }
3225         skippedDirKindsMap.rehash;
3226     }
3227 
3228     ScanContext scanContext = ScanContext.standard;
3229     KeyStrictness keyStrictness = KeyStrictness.standard;
3230 
3231     bool showNameDups = false;
3232     bool showTreeContentDups = false;
3233     bool showFileContentDups = false;
3234     bool showELFSymbolDups = false;
3235     bool linkContentDups = false;
3236 
3237     bool showLinkDups = false;
3238     SymlinkFollowContext followSymlinks = SymlinkFollowContext.external;
3239     bool showBrokenSymlinks = true;
3240     bool showSymlinkCycles = true;
3241 
3242     bool showAnyDups = false;
3243     bool showMMaps = false;
3244     bool showUsage = false;
3245     bool showSHA1 = false;
3246     bool showLineCounts = false;
3247 
3248     uint64_t noFiles = 0;
3249     uint64_t noRegFiles = 0;
3250     uint64_t noSymlinks = 0;
3251     uint64_t noSpecialFiles = 0;
3252     uint64_t noDirs = 0;
3253 
3254     uint64_t noScannedFiles = 0;
3255     uint64_t noScannedRegFiles = 0;
3256     uint64_t noScannedSymlinks = 0;
3257     uint64_t noScannedSpecialFiles = 0;
3258     uint64_t noScannedDirs = 0;
3259 
3260     auto shallowDensenessSum = Rational!ulong(0, 1);
3261     auto deepDensenessSum = Rational!ulong(0, 1);
3262     uint64_t densenessCount = 0;
3263 
3264     FOp fOp = FOp.none;
3265 
3266     bool keyAsWord = false;
3267     bool keyAsSymbol = false;
3268     bool keyAsAcronym = false;
3269     bool keyAsExact = false;
3270 
3271     bool showTree = false;
3272 
3273     bool useHTML = false;
3274     bool browseOutput = false;
3275     bool collectTypeHits = false;
3276     bool colorFlag = false;
3277 
3278     int scanDepth = -1;
3279 
3280     bool demangleELF = true;
3281 
3282     bool recache = false;
3283 
3284     bool useNGrams = false;
3285 
3286     PathFormat pathFormat = PathFormat.relative;
3287 
3288     DirSorting subsSorting = DirSorting.onTimeLastModified;
3289     BuildType buildType = BuildType.none;
3290     DuplicatesContext duplicatesContext = DuplicatesContext.internal;
3291 
3292     Dir[] topDirs;
3293     Dir rootDir;
3294 }
3295 
3296 struct Results
3297 {
3298     size_t numTotalHits; // Number of total hits.
3299     size_t numFilesWithHits; // Number of files with hits
3300     Bytes64 noBytesTotal; // Number of bytes total.
3301     Bytes64 noBytesTotalContents; // Number of contents bytes total.
3302     Bytes64 noBytesScanned; // Number of bytes scanned.
3303     Bytes64 noBytesSkipped; // Number of bytes skipped.
3304     Bytes64 noBytesUnreadable; // Number of bytes unreadable.
3305 }
3306 
3307 version(cerealed)
3308 {
3309     void grain(T)(ref Cereal cereal, ref SysTime systime)
3310     {
3311         auto stdTime = systime.stdTime;
3312         cereal.grain(stdTime);
3313         if (stdTime != 0)
3314         {
3315             systime = SysTime(stdTime);
3316         }
3317     }
3318 }
3319 
3320 /** Directory Sorting Order. */
3321 enum DirSorting
3322 {
3323     /* onTimeCreated, /\* Windows only. Currently stored in Linux on ext4 but no */
3324     /*               * standard interface exists yet, it will probably be called */
3325     /*               * xstat(). *\/ */
3326     onTimeLastModified,
3327     onTimeLastAccessed,
3328     onSize,
3329     onNothing,
3330 }
3331 
3332 enum BuildType
3333 {
3334     none,    // Don't compile
3335     devel,   // Compile with debug symbols
3336     release, // Compile without debugs symbols and optimizations
3337     standard = devel,
3338 }
3339 
3340 enum PathFormat
3341 {
3342     absolute,
3343     relative,
3344 }
3345 
3346 /** Dir.
3347  */
3348 class Dir : File
3349 {
3350     /** Construct File System Root Directory. */
3351     this(Dir parent = null, GStats gstats = null)
3352     {
3353         super(parent);
3354         this._gstats = gstats;
3355         if (gstats) { ++gstats.noDirs; }
3356     }
3357 
3358     this(string root_path, GStats gstats)
3359         in { assert(root_path == "/"); assert(gstats); }
3360     do
3361     {
3362         auto rootDent = DirEntry(root_path);
3363         Dir rootParent = null;
3364         this(rootDent, rootParent, gstats);
3365     }
3366 
3367     this(ref DirEntry dent, Dir parent, GStats gstats)
3368         in { assert(gstats); }
3369     do
3370     {
3371         this(dent.name.baseName, parent, dent.size.Bytes64, dent.timeLastModified, dent.timeLastAccessed, gstats);
3372     }
3373 
3374     this(string name, Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed,
3375          GStats gstats = null)
3376     {
3377         super(name, parent, size, timeLastModified, timeLastAccessed);
3378         this._gstats = gstats;
3379         if (gstats) { ++gstats.noDirs; }
3380     }
3381 
3382     override string toTextual() const @property { return "Directory"; }
3383 
3384     override Bytes64 treeSize() @property @trusted /* @safe nothrow */
3385     {
3386         if (_treeSize.isUntouched)
3387         {
3388             _treeSize = (this.size +
3389                          reduce!"a+b"(0.Bytes64,
3390                                       subs.byValue.map!"a.treeSize")); // recurse!
3391         }
3392         return _treeSize.get.bytes;
3393     }
3394 
3395     /** Returns: Directory Tree Content Id of `this`. */
3396     override const(SHA1Digest) treeContentId() @property @trusted /* @safe nothrow */
3397     {
3398         if (_treeContentId.isUntouched)
3399         {
3400             _treeContentId = subs.byValue.map!"a.treeContentId".sha1Of; // TODO: join loops for calculating treeSize
3401             assert(_treeContentId, "Zero tree content digest");
3402             if (treeSize() != 0)
3403             {
3404                 gstats.filesByContentId[_treeContentId] ~= assumeNotNull(cast(File)this); // TODO: Avoid cast when DMD and NotNull is fixed
3405             }
3406         }
3407         return _treeContentId;
3408     }
3409 
3410     override Face!Color face() const @property @safe pure nothrow { return dirFace; }
3411 
3412     /** Return true if `this` is a file system root directory. */
3413     bool isRoot() @property @safe const pure nothrow { return !parent; }
3414 
3415     GStats gstats(GStats gstats) @property @safe pure /* nothrow */ {
3416         return this._gstats = gstats;
3417     }
3418     GStats gstats() @property @safe nothrow
3419     {
3420         if (!_gstats && this.parent)
3421         {
3422             _gstats = this.parent.gstats();
3423         }
3424         return _gstats;
3425     }
3426 
3427     /** Returns: Depth of Depth from File System root to this File. */
3428     override int depth() @property @safe nothrow
3429     {
3430         if (_depth ==- 1)
3431         {
3432             _depth = parent ? parent.depth + 1 : 0; // memoized depth
3433         }
3434         return _depth;
3435     }
3436 
3437     /** Scan `this` recursively for a non-diretory file with basename `name`.
3438         TODO: Reuse range based algorithm this.tree(depthFirst|breadFirst)
3439      */
3440     File find(string name) @property
3441     {
3442         auto subs_ = subs();
3443         if (name in subs_)
3444         {
3445             auto hit = subs_[name];
3446             Dir hitDir = cast(Dir)hit;
3447             if (!hitDir) // if not a directory
3448                 return hit;
3449         }
3450         else
3451         {
3452             foreach (sub; subs_)
3453             {
3454                 Dir subDir = cast(Dir)sub;
3455                 if (subDir)
3456                 {
3457                     auto hit = subDir.find(name);
3458                     if (hit) // if not a directory
3459                         return hit;
3460                 }
3461             }
3462         }
3463         return null;
3464     }
3465 
3466     /** Append Tree Statistics. */
3467     void addTreeStatsFromSub(F)(NotNull!F subFile, ref DirEntry subDent)
3468     {
3469         if (subDent.isFile)
3470         {
3471             /* _treeSize += subDent.size.Bytes64; */
3472             // dbg("Updating ", _treeSize, " of ", path);
3473 
3474             /** TODO: Move these overloads to std.datetime */
3475             auto ref min(in SysTime a, in SysTime b) @trusted pure nothrow { return (a < b ? a : b); }
3476             auto ref max(in SysTime a, in SysTime b) @trusted pure nothrow { return (a > b ? a : b); }
3477 
3478             const lastMod = subDent.timeLastModified;
3479             _timeModifiedInterval = Interval!SysTime(min(lastMod, _timeModifiedInterval.begin),
3480                                                      max(lastMod, _timeModifiedInterval.end));
3481             const lastAcc = subDent.timeLastAccessed;
3482             _timeAccessedInterval = Interval!SysTime(min(lastAcc, _timeAccessedInterval.begin),
3483                                                      max(lastAcc, _timeAccessedInterval.end));
3484         }
3485     }
3486 
3487     /** Update Statistics for Sub-File `sub` with `subDent` of `this` Dir. */
3488     void updateStats(F)(NotNull!F subFile, ref DirEntry subDent, bool isRegFile)
3489     {
3490         auto lGS = gstats();
3491         if (lGS)
3492         {
3493             if (lGS.showNameDups/*  && */
3494                 /* !subFile.underAnyDir!(a => a.name in lGS.skippedDirKindsMap) */)
3495             {
3496                 lGS.filesByName[subFile.name] ~= cast(NotNull!File)subFile;
3497             }
3498             if (lGS.showLinkDups &&
3499                 isRegFile)
3500             {
3501                 import core.sys.posix.sys.stat;
3502                 immutable stat_t stat = subDent.statBuf();
3503                 if (stat.st_nlink >= 2)
3504                 {
3505                     lGS.filesByInode[stat.st_ino] ~= cast(NotNull!File)subFile;
3506                 }
3507             }
3508         }
3509     }
3510 
3511     /** Load Contents of `this` Directory from Disk using DirEntries.
3512         Returns: `true` iff Dir was updated (reread) from disk.
3513     */
3514     bool load(int depth = 0, bool force = false)
3515     {
3516         import std.range: empty;
3517         if (!_obseleteDir && // already loaded
3518             !force)          // and not forced reload
3519         {
3520             return false;    // signal already scanned
3521         }
3522 
3523         // dbg("Zeroing ", _treeSize, " of ", path);
3524         _treeSize.reset; // this.size;
3525         auto oldSubs = _subs;
3526         _subs.reset;
3527         assert(_subs.length == 0); // TODO: Remove when verified
3528 
3529         import std.file: dirEntries, SpanMode;
3530         auto entries = dirEntries(path, SpanMode.shallow, false); // false: skip symlinks
3531         foreach (dent; entries)
3532         {
3533             immutable basename = dent.name.baseName;
3534             File sub = null;
3535             if (basename in oldSubs)
3536             {
3537                 sub = oldSubs[basename]; // reuse from previous cache
3538             }
3539             else
3540             {
3541                 bool isRegFile = false;
3542                 if (dent.isSymlink)
3543                 {
3544                     sub = new Symlink(dent, assumeNotNull(this));
3545                 }
3546                 else if (dent.isDir)
3547                 {
3548                     sub = new Dir(dent, this, gstats);
3549                 }
3550                 else if (dent.isFile)
3551                 {
3552                     // TODO: Delay construction of and specific files such as
3553                     // CFile, ELFFile, after FKind-recognition has been made.
3554                     sub = new RegFile(dent, assumeNotNull(this));
3555                     isRegFile = true;
3556                 }
3557                 else
3558                 {
3559                     sub = new SpecFile(dent, assumeNotNull(this));
3560                 }
3561                 updateStats(enforceNotNull(sub), dent, isRegFile);
3562             }
3563             auto nnsub = enforceNotNull(sub);
3564             addTreeStatsFromSub(nnsub, dent);
3565             _subs[basename] = nnsub;
3566         }
3567         _subs.rehash;           // optimize hash for faster lookups
3568 
3569         _obseleteDir = false;
3570         return true;
3571     }
3572 
3573     bool reload(int depth = 0) { return load(depth, true); }
3574     alias sync = reload;
3575 
3576     /* TODO: Can we get make this const to the outside world perhaps using inout? */
3577     ref NotNull!File[string] subs() @property { load(); return _subs; }
3578 
3579     NotNull!File[] subsSorted(DirSorting sorted = DirSorting.onTimeLastModified) @property
3580     {
3581         load();
3582         auto ssubs = _subs.values;
3583         /* TODO: Use radix sort to speed things up. */
3584         final switch (sorted)
3585         {
3586             /* case DirSorting.onTimeCreated: */
3587             /*     break; */
3588         case DirSorting.onTimeLastModified:
3589             ssubs.sort!((a, b) => (a.timeLastModified >
3590                                    b.timeLastModified));
3591             break;
3592         case DirSorting.onTimeLastAccessed:
3593             ssubs.sort!((a, b) => (a.timeLastAccessed >
3594                                    b.timeLastAccessed));
3595             break;
3596         case DirSorting.onSize:
3597             ssubs.sort!((a, b) => (a.size >
3598                                    b.size));
3599             break;
3600         case DirSorting.onNothing:
3601             break;
3602         }
3603         return ssubs;
3604     }
3605 
3606     File sub(Name)(Name sub_name)
3607     {
3608         load();
3609         return (sub_name in _subs) ? _subs[sub_name] : null;
3610     }
3611     File sub(File sub)
3612     {
3613         load();
3614         return (sub.path in _subs) != null ? sub : null;
3615     }
3616 
3617     version(cerealed)
3618     {
3619         void accept(Cereal cereal)
3620         {
3621             auto stdTime = timeLastModified.stdTime;
3622             cereal.grain(name, size, stdTime);
3623             timeLastModified = SysTime(stdTime);
3624         }
3625     }
3626     version(msgpack)
3627     {
3628         /** Construct from msgpack `unpacker`.  */
3629         this(Unpacker)(ref Unpacker unpacker)
3630         {
3631             fromMsgpack(msgpack.Unpacker(unpacker));
3632         }
3633 
3634         void toMsgpack(Packer)(ref Packer packer) const
3635         {
3636             /* writeln("Entering Dir.toMsgpack ", this.name); */
3637             packer.pack(name, size,
3638                         timeLastModified.stdTime,
3639                         timeLastAccessed.stdTime,
3640                         kind);
3641 
3642             // Contents
3643             /* TODO: serialize map of polymorphic objects using
3644              * packer.packArray(_subs) and type trait lookup up all child-classes of
3645              * File */
3646             packer.pack(_subs.length);
3647 
3648             if (_subs.length >= 1)
3649             {
3650                 auto diffsLastModified = _subs.byValue.map!"a.timeLastModified.stdTime".encodeForwardDifference;
3651                 auto diffsLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime".encodeForwardDifference;
3652                 /* auto timesLastModified = _subs.byValue.map!"a.timeLastModified.stdTime"; */
3653                 /* auto timesLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime"; */
3654 
3655                 packer.pack(diffsLastModified, diffsLastAccessed);
3656 
3657                 /* debug dbg(this.name, " sub.length: ", _subs.length); */
3658                 /* debug dbg(name, " modified diffs: ", diffsLastModified.pack.length); */
3659                 /* debug dbg(name, " accessed diffs: ", diffsLastAccessed.pack.length); */
3660                 /* debug dbg(name, " modified: ", timesLastModified.array.pack.length); */
3661                 /* debug dbg(name, " accessed: ", timesLastAccessed.array.pack.length); */
3662             }
3663 
3664             foreach (sub; _subs)
3665             {
3666                 if        (const regFile = cast(RegFile)sub)
3667                 {
3668                     packer.pack("RegFile");
3669                     regFile.toMsgpack(packer);
3670                 }
3671                 else if (const dir = cast(Dir)sub)
3672                 {
3673                     packer.pack("Dir");
3674                     dir.toMsgpack(packer);
3675                 }
3676                 else if (const symlink = cast(Symlink)sub)
3677                 {
3678                     packer.pack("Symlink");
3679                     symlink.toMsgpack(packer);
3680                 }
3681                 else if (const special = cast(SpecFile)sub)
3682                 {
3683                     packer.pack("SpecFile");
3684                     special.toMsgpack(packer);
3685                 }
3686                 else
3687                 {
3688                     immutable subClassName = sub.classinfo.name;
3689                     assert(0, "Unknown sub File class " ~ subClassName); // TODO: Exception
3690                 }
3691             }
3692         }
3693 
3694         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
3695         {
3696             unpacker.unpack(name, size);
3697 
3698             long stdTime;
3699             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize
3700             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize
3701 
3702             /* dbg("before:", path, " ", size, " ", timeLastModified, " ", timeLastAccessed); */
3703 
3704             // FKind
3705             if (!kind) { kind = null; }
3706             unpacker.unpack(kind); /* TODO: kind = new DirKind(unpacker); */
3707             /* dbg("after:", path); */
3708 
3709             _treeSize.reset; // this.size;
3710 
3711             // Contents
3712             /* TODO: unpacker.unpack(_subs); */
3713             immutable noPreviousSubs = _subs.length == 0;
3714             size_t subs_length; unpacker.unpack(subs_length); // TODO: Functionize to unpacker.unpack!size_t()
3715 
3716             ForwardDifferenceCode!(long[]) diffsLastModified,
3717                 diffsLastAccessed;
3718             if (subs_length >= 1)
3719             {
3720                 unpacker.unpack(diffsLastModified, diffsLastAccessed);
3721                 /* auto x = diffsLastModified.decodeForwardDifference; */
3722             }
3723 
3724             foreach (ix; 0..subs_length) // repeat for subs_length times
3725             {
3726                 string subClassName; unpacker.unpack(subClassName); // TODO: Functionize
3727                 File sub = null;
3728                 try
3729                 {
3730                     switch (subClassName)
3731                     {
3732                     default:
3733                         assert(0, "Unknown File parent class " ~ subClassName); // TODO: Exception
3734                     case "Dir":
3735                         auto subDir = new Dir(this, gstats);
3736                         unpacker.unpack(subDir); sub = subDir;
3737                         auto subDent = DirEntry(sub.path);
3738                         subDir.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed
3739                         addTreeStatsFromSub(assumeNotNull(subDir), subDent);
3740                         break;
3741                     case "RegFile":
3742                         auto subRegFile = new RegFile(assumeNotNull(this));
3743                         unpacker.unpack(subRegFile); sub = subRegFile;
3744                         auto subDent = DirEntry(sub.path);
3745                         subRegFile.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed
3746                         updateStats(assumeNotNull(subRegFile), subDent, true);
3747                         addTreeStatsFromSub(assumeNotNull(subRegFile), subDent);
3748                         break;
3749                     case "Symlink":
3750                         auto subSymlink = new Symlink(assumeNotNull(this));
3751                         unpacker.unpack(subSymlink); sub = subSymlink;
3752                         break;
3753                     case "SpecFile":
3754                         auto SpecFile = new SpecFile(assumeNotNull(this));
3755                         unpacker.unpack(SpecFile); sub = SpecFile;
3756                         break;
3757                     }
3758                     if (noPreviousSubs ||
3759                         !(sub.name in _subs))
3760                     {
3761                         _subs[sub.name] = enforceNotNull(sub);
3762                     }
3763                     /* dbg("Unpacked Dir sub ", sub.path, " of type ", subClassName); */
3764                 } catch (FileException) { // this may be a too generic exception
3765                     /* dbg(sub.path, " is not accessible anymore"); */
3766                 }
3767             }
3768 
3769         }
3770     }
3771 
3772     override void makeObselete() @trusted
3773     {
3774         _obseleteDir = true;
3775         _treeSize.reset;
3776         _timeModifiedInterval.reset;
3777         _timeAccessedInterval.reset;
3778     }
3779     override void makeUnObselete() @safe
3780     {
3781         _obseleteDir = false;
3782     }
3783 
3784     private NotNull!File[string] _subs; // Directory contents
3785     DirKind kind;               // Kind of this directory
3786     uint64_t hitCount = 0;
3787     private int _depth = -1;            // Memoized Depth
3788     private bool _obseleteDir = true;  // Flags that this is obselete
3789     GStats _gstats = null;
3790 
3791     /* TODO: Reuse Span and span in Phobos. (Span!T).init should be (T.max, T.min) */
3792     Interval!SysTime _timeModifiedInterval;
3793     Interval!SysTime _timeAccessedInterval;
3794 
3795     Nullable!(size_t, size_t.max) _treeSize; // Size of tree with this directory as root.
3796     /* TODO: Make this work instead: */
3797     /* import std.typecons: Nullable; */
3798     /* Nullable!(Bytes64, Bytes64.max) _treeSize; // Size of tree with this directory as root. */
3799 
3800     SHA1Digest _treeContentId;
3801 }
3802 
3803 /** Externally Directory Memoized Calculation of Tree Size.
3804     Is it possible to make get any of @safe pure nothrow?
3805  */
3806 Bytes64 treeSizeMemoized(NotNull!File file, Bytes64[File] cache) @trusted /* nothrow */
3807 {
3808     typeof(return) sum = file.size;
3809     if (auto dir = cast(Dir)file)
3810     {
3811         if (file in cache)
3812         {
3813             sum = cache[file];
3814         }
3815         else
3816         {
3817             foreach (sub; dir.subs.byValue)
3818             {
3819                 sum += treeSizeMemoized(sub, cache);
3820             }
3821             cache[file] = sum;
3822         }
3823     }
3824     return sum;
3825 }
3826 
3827 /** Save File System Tree Cache under Directory `rootDir`.
3828     Returns: Serialized Byte Array.
3829 */
3830 const(ubyte[]) saveRootDirTree(Viz viz,
3831                                Dir rootDir, string cacheFile) @trusted
3832 {
3833     immutable tic = Clock.currTime;
3834     version(msgpack)
3835     {
3836         const data = rootDir.pack();
3837         import std.file: write;
3838     }
3839     else version(cerealed)
3840          {
3841              auto enc = new Cerealiser(); // encoder
3842              enc ~= rootDir;
3843              auto data = enc.bytes;
3844          }
3845     else
3846     {
3847         ubyte[] data;
3848     }
3849     cacheFile.write(data);
3850     immutable toc = Clock.currTime;
3851 
3852     viz.ppln("Cache Write".asH!2,
3853              "Wrote tree cache of size ",
3854              data.length.Bytes64, " to ",
3855              cacheFile.asPath,
3856              " in ",
3857              shortDurationString(toc - tic));
3858 
3859     return data;
3860 }
3861 
3862 /** Load File System Tree Cache from `cacheFile`.
3863     Returns: Root Directory of Loaded Tree.
3864 */
3865 Dir loadRootDirTree(Viz viz,
3866                     string cacheFile, GStats gstats) @trusted
3867 {
3868     immutable tic = Clock.currTime;
3869 
3870     import std.file: read;
3871     try
3872     {
3873         const data = read(cacheFile);
3874 
3875         auto rootDir = new Dir(cast(Dir)null, gstats);
3876         version(msgpack)
3877         {
3878             unpack(cast(ubyte[])data, rootDir); /* Dir rootDir = new Dir(cast(const(ubyte)[])data); */
3879         }
3880         immutable toc = Clock.currTime;
3881 
3882         viz.pp("Cache Read".asH!2,
3883                "Read cache of size ",
3884                data.length.Bytes64, " from ",
3885                cacheFile.asPath,
3886                " in ",
3887                shortDurationString(toc - tic), " containing",
3888                asUList(asItem(gstats.noDirs, " Dirs,"),
3889                        asItem(gstats.noRegFiles, " Regular Files,"),
3890                        asItem(gstats.noSymlinks, " Symbolic Links,"),
3891                        asItem(gstats.noSpecialFiles, " Special Files,"),
3892                        asItem("totalling ", gstats.noFiles + 1, " Files")));
3893         assert(gstats.noDirs +
3894                gstats.noRegFiles +
3895                gstats.noSymlinks +
3896                gstats.noSpecialFiles == gstats.noFiles + 1);
3897         return rootDir;
3898     }
3899     catch (FileException)
3900     {
3901         viz.ppln("Failed to read cache from ", cacheFile);
3902         return null;
3903     }
3904 }
3905 
3906 Dir[] getDirs(NotNull!Dir rootDir, string[] topDirNames)
3907 {
3908     Dir[] topDirs;
3909     foreach (topName; topDirNames)
3910     {
3911         Dir topDir = getDir(rootDir, topName);
3912 
3913         if (!topDir)
3914         {
3915             dbg("Directory " ~ topName ~ " is missing");
3916         }
3917         else
3918         {
3919             topDirs ~= topDir;
3920         }
3921     }
3922     return topDirs;
3923 }
3924 
3925 /** (Cached) Lookup of File `filePath`.
3926  */
3927 File getFile(NotNull!Dir rootDir, string filePath,
3928              bool isDir = false,
3929              bool tolerant = false) @trusted
3930 {
3931     if (isDir)
3932     {
3933         return getDir(rootDir, filePath);
3934     }
3935     else
3936     {
3937         auto parentDir = getDir(rootDir, filePath.dirName);
3938         if (parentDir)
3939         {
3940             auto hit = parentDir.sub(filePath.baseName);
3941             if (hit)
3942                 return hit;
3943             else
3944             {
3945                 dbg("File path " ~ filePath ~ " doesn't exist. TODO: Query user to instead find it under "
3946                     ~ parentDir.path);
3947                 parentDir.find(filePath.baseName);
3948             }
3949         }
3950         else
3951         {
3952             dbg("Directory " ~ parentDir.path ~ " doesn't exist");
3953         }
3954     }
3955     return null;
3956 }
3957 
3958 /** (Cached) Lookup of Directory `dirpath`.
3959     Returns: Dir if present under rootDir, null otherwise.
3960     TODO: Make use of dent
3961 */
3962 import std.path: isRooted;
3963 Dir getDir(NotNull!Dir rootDir, string dirPath, ref DirEntry dent,
3964            ref Symlink[] followedSymlinks) @trusted
3965     in { assert(dirPath.isRooted); }
3966 do
3967 {
3968     Dir currDir = rootDir;
3969 
3970     import std.range: drop;
3971     import std.path: pathSplitter;
3972     foreach (part; dirPath.pathSplitter().drop(1)) // all but first
3973     {
3974         auto sub = currDir.sub(part);
3975         if        (auto subDir = cast(Dir)sub)
3976         {
3977             currDir = subDir;
3978         }
3979         else if (auto subSymlink = cast(Symlink)sub)
3980         {
3981             auto subDent = DirEntry(subSymlink.absoluteNormalizedTargetPath);
3982             if (subDent.isDir)
3983             {
3984                 if (followedSymlinks.find(subSymlink))
3985                 {
3986                     dbg("Infinite recursion in ", subSymlink);
3987                     return null;
3988                 }
3989                 followedSymlinks ~= subSymlink;
3990                 currDir = getDir(rootDir, subSymlink.absoluteNormalizedTargetPath, subDent, followedSymlinks); // TODO: Check for infinite recursion
3991             }
3992             else
3993             {
3994                 dbg("Loaded path " ~ dirPath ~ " is not a directory");
3995                 return null;
3996             }
3997         }
3998         else
3999         {
4000             return null;
4001         }
4002     }
4003     return currDir;
4004 }
4005 
4006 /** (Cached) Lookup of Directory `dirPath`. */
4007 Dir getDir(NotNull!Dir rootDir, string dirPath) @trusted
4008 {
4009     Symlink[] followedSymlinks;
4010     try
4011     {
4012         auto dirDent = DirEntry(dirPath);
4013         return getDir(rootDir, dirPath, dirDent, followedSymlinks);
4014     }
4015     catch (FileException)
4016     {
4017         dbg("Exception getting Dir");
4018         return null;
4019     }
4020 }
4021 unittest {
4022     /* auto tmp = tempfile("/tmp/fsfile"); */
4023 }
4024 
4025 enum ulong mmfile_size = 0; // 100*1024
4026 
4027 auto pageSize() @trusted
4028 {
4029     version(linux)
4030     {
4031         import core.sys.posix.sys.shm: __getpagesize;
4032         return __getpagesize();
4033     }
4034     else
4035     {
4036         return 4096;
4037     }
4038 }
4039 
4040 enum KeyStrictness
4041 {
4042     exact,
4043     acronym,
4044     eitherExactOrAcronym,
4045     standard = eitherExactOrAcronym,
4046 }
4047 
4048 /** Language Operator Associativity. */
4049 enum OpAssoc { none,
4050                LR, // Left-to-Right
4051                RL, // Right-to-Left
4052 }
4053 
4054 /** Language Operator Arity. */
4055 enum OpArity
4056 {
4057     unknown,
4058     unaryPostfix, // 1-arguments
4059     unaryPrefix, // 1-arguments
4060     binary, // 2-arguments
4061     ternary, // 3-arguments
4062 }
4063 
4064 /** Language Operator. */
4065 struct Op
4066 {
4067     this(string op,
4068          OpArity arity = OpArity.unknown,
4069          OpAssoc assoc = OpAssoc.none,
4070          byte prec = -1,
4071          string desc = [])
4072     {
4073         this.op = op;
4074         this.arity = arity;
4075         this.assoc = assoc;
4076         this.prec = prec;
4077         this.desc = desc;
4078     }
4079     /** Make `this` an alias of `opOrig`. */
4080     Op aliasOf(string opOrig)
4081     {
4082         // TODO: set relation in map from op to opOrig
4083         return this;
4084     }
4085     string op; // Operator. TODO: Optimize this storage using a value type?
4086     string desc; // Description
4087     OpAssoc assoc; // Associativity
4088     ubyte prec; // Precedence
4089     OpArity arity; // Arity
4090     bool overloadable; // Overloadable
4091 }
4092 
4093 /** Language Operator Alias. */
4094 struct OpAlias
4095 {
4096     this(string op, string opOrigin)
4097     {
4098         this.op = op;
4099         this.opOrigin = opOrigin;
4100     }
4101     string op;
4102     string opOrigin;
4103 }
4104 
4105 FKind tryLookupKindIn(RegFile regFile,
4106                       FKind[SHA1Digest] kindsById)
4107 {
4108     immutable id = regFile._cstat.kindId;
4109     if (id in kindsById)
4110     {
4111         return kindsById[id];
4112     }
4113     else
4114     {
4115         return null;
4116     }
4117 }
4118 
4119 string displayedFilename(AnyFile)(GStats gstats,
4120                                   AnyFile theFile) @safe pure
4121 {
4122     return ((gstats.pathFormat == PathFormat.relative &&
4123              gstats.topDirs.length == 1) ?
4124             "./" ~ theFile.name :
4125             theFile.path);
4126 }
4127 
4128 /** File System Scanner. */
4129 class Scanner(Term)
4130 {
4131     this(string[] args, ref Term term)
4132     {
4133         prepare(args, term);
4134     }
4135 
4136     SysTime _currTime;
4137     import std.getopt;
4138     import std.string: toLower, toUpper, startsWith, CaseSensitive;
4139     import std.mmfile;
4140     import std.stdio: writeln, stdout, stderr, stdin, popen;
4141     import std.algorithm: find, count, countUntil, min, splitter;
4142     import std.range: join;
4143     import std.conv: to;
4144 
4145     import core.sys.posix.sys.mman;
4146     import core.sys.posix.pwd: passwd, getpwuid_r;
4147     version(linux)
4148     {
4149         // import core.sys.linux.sys.inotify;
4150         import core.sys.linux.sys.xattr;
4151     }
4152     import core.sys.posix.unistd: getuid, getgid;
4153     import std.file: read, FileException, exists, getcwd;
4154     import std.range: retro;
4155     import std.exception: ErrnoException;
4156     import core.sys.posix.sys.stat: stat_t, S_IRUSR, S_IRGRP, S_IROTH;
4157 
4158     uint64_t _hitsCountTotal = 0;
4159 
4160     Symlink[] _brokenSymlinks;
4161 
4162     bool _beVerbose = false;
4163     bool _caseFold = false;
4164     bool _showSkipped = false;
4165     bool listTxtFKinds = false;
4166     bool listBinFKinds = false;
4167     string selFKindNames;
4168     string[] _topDirNames;
4169     string[] addTags;
4170     string[] removeTags;
4171 
4172     private
4173     {
4174         GStats gstats = new GStats();
4175 
4176         string _cacheFile = "~/.cache/fs-root.msgpack";
4177 
4178         uid_t _uid;
4179         gid_t _gid;
4180     }
4181 
4182     ioFile outFile;
4183 
4184     string[] keys; // Keys to scan.
4185     typeof(keys.map!bistogramOverRepresentation) keysBists;
4186     typeof(keys.map!(sparseUIntNGramOverRepresentation!NGramOrder)) keysXGrams;
4187     Bist keysBistsUnion;
4188     XGram keysXGramsUnion;
4189 
4190     string selFKindsNote;
4191 
4192     void prepare(string[] args, ref Term term)
4193     {
4194         _scanChunkSize = 32*pageSize;
4195         gstats.loadFileKinds;
4196         gstats.loadDirKinds;
4197 
4198         bool helpPrinted = getoptEx("FS --- File System Scanning Utility in D.\n" ~
4199                                     "Usage: fs { --switches } [KEY]...\n" ~
4200                                     "Note that scanning for multiple KEYs is possible.\nIf so hits are highlighted in different colors!\n" ~
4201                                     "Sample calls: \n" ~
4202                                     "  fdo.d --color -d /lib/modules/3.13.0-24-generic/kernel/drivers/staging --browse --duplicates --recache lirc\n" ~
4203                                     "  fdo.d --color -d /etc -s --tree --usage -l --duplicates stallman\n"
4204                                     "  fdo.d --color -d /etc -d /var --acronym sttccc\n"
4205                                     "  fdo.d --color -d /etc -d /var --acronym dktp\n"
4206                                     "  fdo.d --color -d /etc -d /var --acronym tms sttc prc dtp xsr\n" ~
4207                                     "  fdo.d --color -d /etc min max delta\n" ~
4208                                     "  fdo.d --color -d /etc if elif return len --duplicates --sort=onSize\n" ~
4209                                     "  fdo.d --color -k -d /bin alpha\n" ~
4210                                     "  fdo.d --color -d /lib -k linus" ~
4211                                     "  fdo.d --color -d /etc --symbol alpha beta gamma delta" ~
4212                                     "  fdo.d --color -d /var/spool/postfix/dev " ~
4213                                     "  fdo.d --color -d /etc alpha" ~
4214                                     "  fdo.d --color -d ~/Work/dmd  --browse xyz --duplicates --do=preprocess",
4215 
4216                                     args,
4217                                     std.getopt.config.caseInsensitive,
4218 
4219                                     "verbose|v", "\tVerbose",  &_beVerbose,
4220 
4221                                     "color|C", "\tColorize Output" ~ defaultDoc(gstats.colorFlag),  &gstats.colorFlag,
4222                                     "types|T", "\tComma separated list (CSV) of file types/kinds to scan" ~ defaultDoc(selFKindNames), &selFKindNames,
4223                                     "list-textual-kinds", "\tList registered textual types/kinds" ~ defaultDoc(listTxtFKinds), &listTxtFKinds,
4224                                     "list-binary-kinds", "\tList registered binary types/kinds" ~ defaultDoc(listBinFKinds), &listBinFKinds,
4225                                     "group-types|G", "\tCollect and group file types found" ~ defaultDoc(gstats.collectTypeHits), &gstats.collectTypeHits,
4226 
4227                                     "i", "\tCase-Fold, Case-Insensitive" ~ defaultDoc(_caseFold), &_caseFold,
4228                                     "k", "\tShow Skipped Directories and Files" ~ defaultDoc(_showSkipped), &_showSkipped,
4229                                     "d", "\tRoot Directory(s) of tree(s) to scan, defaulted to current directory" ~ defaultDoc(_topDirNames), &_topDirNames,
4230                                     "depth", "\tDepth of tree to scan, defaulted to unlimited (-1) depth" ~ defaultDoc(gstats.scanDepth), &gstats.scanDepth,
4231 
4232                                     // Contexts
4233                                     "context|x", "\tComma Separated List of Contexts. Either: " ~ enumDoc!ScanContext, &gstats.scanContext,
4234 
4235                                     "word|w", "\tSearch for key as a complete Word (A Letter followed by more Letters and Digits)." ~ defaultDoc(gstats.keyAsWord), &gstats.keyAsWord,
4236                                     "symbol|ident|id|s", "\tSearch for key as a complete Symbol (Identifier)" ~ defaultDoc(gstats.keyAsSymbol), &gstats.keyAsSymbol,
4237                                     "acronym|a", "\tSearch for key as an acronym (relaxed)" ~ defaultDoc(gstats.keyAsAcronym), &gstats.keyAsAcronym,
4238                                     "exact", "\tSearch for key only with exact match (strict)" ~ defaultDoc(gstats.keyAsExact), &gstats.keyAsExact,
4239 
4240                                     "name-duplicates|snd", "\tDetect & Show file name duplicates" ~ defaultDoc(gstats.showNameDups), &gstats.showNameDups,
4241                                     "hardlink-duplicates|inode-duplicates|shd", "\tDetect & Show multiple links to same inode" ~ defaultDoc(gstats.showLinkDups), &gstats.showLinkDups,
4242                                     "file-content-duplicates|scd", "\tDetect & Show file contents duplicates" ~ defaultDoc(gstats.showFileContentDups), &gstats.showFileContentDups,
4243                                     "tree-content-duplicates", "\tDetect & Show directory tree contents duplicates" ~ defaultDoc(gstats.showTreeContentDups), &gstats.showTreeContentDups,
4244 
4245                                     "elf-symbol-duplicates", "\tDetect & Show ELF Symbol Duplicates" ~ defaultDoc(gstats.showELFSymbolDups), &gstats.showELFSymbolDups,
4246 
4247                                     "duplicates|D", "\tDetect & Show file name and contents duplicates" ~ defaultDoc(gstats.showAnyDups), &gstats.showAnyDups,
4248                                     "duplicates-context", "\tDuplicates Detection Context. Either: " ~ enumDoc!DuplicatesContext, &gstats.duplicatesContext,
4249                                     "hardlink-content-duplicates", "\tConvert all content duplicates into hardlinks (common inode) if they reside on the same file system" ~ defaultDoc(gstats.linkContentDups), &gstats.linkContentDups,
4250 
4251                                     "usage", "\tShow disk usage (tree size) of scanned directories" ~ defaultDoc(gstats.showUsage), &gstats.showUsage,
4252                                     "count-lines", "\tShow line counts of scanned files" ~ defaultDoc(gstats.showLineCounts), &gstats.showLineCounts,
4253 
4254                                     "sha1", "\tShow SHA1 content digests" ~ defaultDoc(gstats.showSHA1), &gstats.showSHA1,
4255 
4256                                     "mmaps", "\tShow when files are memory mapped (mmaped)" ~ defaultDoc(gstats.showMMaps), &gstats.showMMaps,
4257 
4258                                     "follow-symlinks|f", "\tFollow symbolic links" ~ defaultDoc(gstats.followSymlinks), &gstats.followSymlinks,
4259                                     "broken-symlinks|l", "\tDetect & Show broken symbolic links (target is non-existing file) " ~ defaultDoc(gstats.showBrokenSymlinks), &gstats.showBrokenSymlinks,
4260                                     "show-symlink-cycles|l", "\tDetect & Show symbolic links cycles" ~ defaultDoc(gstats.showSymlinkCycles), &gstats.showSymlinkCycles,
4261 
4262                                     "add-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(addTags), &addTags,
4263                                     "remove-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(removeTags), &removeTags,
4264 
4265                                     "tree|W", "\tShow Scanned Tree and Followed Symbolic Links" ~ defaultDoc(gstats.showTree), &gstats.showTree,
4266                                     "sort|S", "\tDirectory contents sorting order. Either: " ~ enumDoc!DirSorting, &gstats.subsSorting,
4267                                     "build", "\tBuild Source Code. Either: " ~ enumDoc!BuildType, &gstats.buildType,
4268 
4269                                     "path-format", "\tFormat of paths. Either: " ~ enumDoc!PathFormat ~ "." ~ defaultDoc(gstats.pathFormat), &gstats.pathFormat,
4270 
4271                                     "cache-file|F", "\tFile System Tree Cache File" ~ defaultDoc(_cacheFile), &_cacheFile,
4272                                     "recache", "\tSkip initial load of cache from disk" ~ defaultDoc(gstats.recache), &gstats.recache,
4273 
4274                                     "do", "\tOperation to perform on matching files. Either: " ~ enumDoc!FOp, &gstats.fOp,
4275 
4276                                     "demangle-elf", "\tDemangle ELF files.", &gstats.demangleELF,
4277 
4278                                     "use-ngrams", "\tUse NGrams to cache statistics and thereby speed up search" ~ defaultDoc(gstats.useNGrams), &gstats.useNGrams,
4279 
4280                                     "html|H", "\tFormat output as HTML" ~ defaultDoc(gstats.useHTML), &gstats.useHTML,
4281                                     "browse|B", ("\tFormat output as HTML to a temporary file" ~
4282                                                  defaultDoc(_cacheFile) ~
4283                                                  " and open it with default Web browser" ~
4284                                                  defaultDoc(gstats.browseOutput)), &gstats.browseOutput,
4285 
4286                                     "author", "\tPrint name of\n"~"\tthe author",
4287                                     delegate() { writeln("Per Nordlöw"); }
4288             );
4289 
4290         if (gstats.showAnyDups)
4291         {
4292             gstats.showNameDups = true;
4293             gstats.showLinkDups = true;
4294             gstats.showFileContentDups = true;
4295             gstats.showTreeContentDups = true;
4296             gstats.showELFSymbolDups = true;
4297         }
4298         if (helpPrinted)
4299             return;
4300 
4301         _cacheFile = std.path.expandTilde(_cacheFile);
4302 
4303         if (_topDirNames.empty)
4304         {
4305             _topDirNames = ["."];
4306         }
4307         if (_topDirNames == ["."])
4308         {
4309             gstats.pathFormat = PathFormat.relative;
4310         }
4311         else
4312         {
4313             gstats.pathFormat = PathFormat.absolute;
4314         }
4315         foreach (ref topName; _topDirNames)
4316         {
4317             if (topName ==  ".")
4318             {
4319                 topName = topName.absolutePath.buildNormalizedPath;
4320             }
4321             else
4322             {
4323                 topName = topName.expandTilde.buildNormalizedPath;
4324             }
4325         }
4326 
4327         // Output Handling
4328         if (gstats.browseOutput)
4329         {
4330             gstats.useHTML = true;
4331             immutable ext = gstats.useHTML ? "html" : "results.txt";
4332             import std.uuid: randomUUID;
4333             outFile = ioFile("/tmp/fs-" ~ randomUUID.toString() ~
4334                              "." ~ ext,
4335                              "w");
4336             /* popen("gnome-open " ~ outFile.name); */
4337             popen("firefox -new-tab " ~ outFile.name);
4338         }
4339         else
4340         {
4341             outFile = stdout;
4342         }
4343 
4344         auto cwd = getcwd();
4345 
4346         foreach (arg; args[1..$])
4347         {
4348             if (!arg.startsWith("-")) // if argument not a flag
4349             {
4350                 keys ~= arg;
4351             }
4352         }
4353 
4354         // Calc stats
4355         keysBists = keys.map!bistogramOverRepresentation;
4356         keysXGrams = keys.map!(sparseUIntNGramOverRepresentation!NGramOrder);
4357         keysBistsUnion = reduce!"a | b"(typeof(keysBists.front).init, keysBists);
4358         keysXGramsUnion = reduce!"a + b"(typeof(keysXGrams.front).init, keysXGrams);
4359 
4360         auto viz = new Viz(outFile,
4361                            &term,
4362                            gstats.showTree,
4363                            gstats.useHTML ? VizForm.HTML : VizForm.textAsciiDocUTF8,
4364                            gstats.colorFlag,
4365                            !gstats.useHTML, // only use if HTML
4366                            true, // TODO: Only set if in debug mode
4367             );
4368 
4369         if (gstats.useNGrams &&
4370             (!keys.empty) &&
4371             keysXGramsUnion.empty)
4372         {
4373             gstats.useNGrams = false;
4374             viz.ppln("Keys must be at least of length " ~
4375                      to!string(NGramOrder + 1) ~
4376                      " in order for " ~
4377                      keysXGrams[0].typeName ~
4378                      " to be calculated");
4379         }
4380 
4381         // viz.ppln("<meta http-equiv=\"refresh\" content=\"1\"/>"); // refresh every second
4382 
4383         if (selFKindNames)
4384         {
4385             foreach (lang; selFKindNames.splitterASCIIAmong!(","))
4386             {
4387                 if      (lang         in gstats.allFKinds.byName) // try exact match
4388                 {
4389                     gstats.selFKinds ~= gstats.allFKinds.byName[lang];
4390                 }
4391                 else if (lang.toLower in gstats.allFKinds.byName) // else try all in lower case
4392                 {
4393                     gstats.selFKinds ~= gstats.allFKinds.byName[lang.toLower];
4394                 }
4395                 else if (lang.toUpper in gstats.allFKinds.byName) // else try all in upper case
4396                 {
4397                     gstats.selFKinds ~= gstats.allFKinds.byName[lang.toUpper];
4398                 }
4399                 else
4400                 {
4401                     writeln("warning: Language ", lang, " not registered");
4402                 }
4403             }
4404             if (gstats.selFKinds.byIndex.empty)
4405             {
4406                 writeln("warning: None of the languages ", to!string(selFKindNames), " are registered. Defaulting to all file types.");
4407                 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds
4408             }
4409             else
4410             {
4411                 gstats.selFKinds.rehash;
4412             }
4413         }
4414         else
4415         {
4416             gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds
4417         }
4418 
4419         // Keys
4420         auto commaedKeys = keys.joiner(",");
4421         const keysPluralExt = keys.length >= 2 ? "s" : "";
4422         string commaedKeysString = to!string(commaedKeys);
4423         if (keys)
4424         {
4425             selFKindsNote = " in " ~ (gstats.selFKinds == gstats.allFKinds ?
4426                                       "all " :
4427                                       gstats.selFKinds.byIndex.map!(a => a.kindName).join(",") ~ "-") ~ "files";
4428             immutable underNote = " under \"" ~ (_topDirNames.reduce!"a ~ ',' ~ b") ~ "\"";
4429             const exactNote = gstats.keyAsExact ? "exact " : "";
4430             string asNote;
4431             if (gstats.keyAsAcronym)
4432             {
4433                 asNote = (" as " ~ exactNote ~
4434                           (gstats.keyAsWord ? "word" : "symbol") ~
4435                           " acronym" ~ keysPluralExt);
4436             }
4437             else if (gstats.keyAsSymbol)
4438             {
4439                 asNote = " as " ~ exactNote ~ "symbol" ~ keysPluralExt;
4440             }
4441             else if (gstats.keyAsWord)
4442             {
4443                 asNote = " as " ~ exactNote ~ "word" ~ keysPluralExt;
4444             }
4445             else
4446             {
4447                 asNote = "";
4448             }
4449 
4450             const title = ("Searching for \"" ~ commaedKeysString ~ "\"" ~
4451                            " case-" ~ (_caseFold ? "in" : "") ~"sensitively"
4452                            ~asNote ~selFKindsNote ~underNote);
4453             if (viz.form == VizForm.HTML) // only needed for HTML output
4454             {
4455                 viz.ppln(faze(title, titleFace));
4456             }
4457 
4458             viz.pp(asH!1("Searching for \"", commaedKeysString, "\"",
4459                          " case-", (_caseFold ? "in" : ""), "sensitively",
4460                          asNote, selFKindsNote,
4461                          " under ", _topDirNames.map!(a => a.asPath)));
4462         }
4463 
4464         if (listTxtFKinds)
4465         {
4466             viz.pp("Textual (Source) Kinds".asH!2,
4467                    gstats.txtFKinds.byIndex.asTable);
4468         }
4469 
4470         if (listBinFKinds)
4471         {
4472             viz.pp("Binary Kinds".asH!2,
4473                    gstats.binFKinds.byIndex.asTable);
4474         }
4475 
4476         /* binFKinds.asTable, */
4477 
4478         if (_showSkipped)
4479         {
4480             viz.pp("Skipping files of type".asH!2,
4481                    asUList(gstats.binFKinds.byIndex.map!(a => asItem(a.kindName.asBold,
4482                                                                      ": ",
4483                                                                      asCSL(a.exts.map!(b => b.asCode))))));
4484             viz.pp("Skipping directories of type".asH!2,
4485                    asUList(gstats.skippedDirKinds.map!(a => asItem(a.kindName.asBold,
4486                                                                    ": ",
4487                                                                    a.fileName.asCode))));
4488         }
4489 
4490         // if (key && key == key.toLower()) { // if search key is all lowercase
4491         //     _caseFold = true;               // we do case-insensitive search like in Emacs
4492         // }
4493 
4494         _uid = getuid;
4495         _gid = getgid;
4496 
4497         // Setup root directory
4498         if (!gstats.recache)
4499         {
4500             GC.disable;
4501             gstats.rootDir = loadRootDirTree(viz, _cacheFile, gstats);
4502             GC.enable;
4503         }
4504         if (!gstats.rootDir) // if first time
4505         {
4506             gstats.rootDir = new Dir("/", gstats); // filesystem root directory. TODO: Make this uncopyable?
4507         }
4508 
4509         // Scan for exact key match
4510         gstats.topDirs = getDirs(enforceNotNull(gstats.rootDir), _topDirNames);
4511 
4512         _currTime = Clock.currTime;
4513 
4514         GC.disable;
4515         scanTopDirs(viz, commaedKeysString);
4516         GC.enable;
4517 
4518         GC.disable;
4519         saveRootDirTree(viz, gstats.rootDir, _cacheFile);
4520         GC.enable;
4521 
4522         // Print statistics
4523         showStats(viz);
4524     }
4525 
4526     void scanTopDirs(Viz viz,
4527                      string commaedKeysString)
4528     {
4529         viz.pp("Results".asH!2);
4530         if (gstats.topDirs)
4531         {
4532             foreach (topIndex, topDir; gstats.topDirs)
4533             {
4534                 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys);
4535                 if (ctrlC)
4536                 {
4537                     auto restDirs = gstats.topDirs[topIndex + 1..$];
4538                     if (!restDirs.empty)
4539                     {
4540                         debug dbg("Ctrl-C pressed: Skipping search of " ~ to!string(restDirs));
4541                         break;
4542                     }
4543                 }
4544             }
4545 
4546             viz.pp("Summary".asH!2);
4547 
4548             if ((gstats.noScannedFiles - gstats.noScannedDirs) == 0)
4549             {
4550                 viz.ppln("No files with any content found");
4551             }
4552             else
4553             {
4554                 // Scan for acronym key match
4555                 if (keys && _hitsCountTotal == 0)  // if keys given but no hit found
4556                 {
4557                     auto keysString = (keys.length >= 2 ? "s" : "") ~ " \"" ~ commaedKeysString;
4558                     if (gstats.keyAsAcronym)
4559                     {
4560                         viz.ppln(("No acronym matches for key" ~ keysString ~ `"` ~
4561                                   (gstats.keyAsSymbol ? " as symbol" : "") ~
4562                                   " found in files of type"));
4563                     }
4564                     else if (!gstats.keyAsExact)
4565                     {
4566                         viz.ppln(("No exact matches for key" ~ keysString ~ `"` ~
4567                                   (gstats.keyAsSymbol ? " as symbol" : "") ~
4568                                   " found" ~ selFKindsNote ~
4569                                   ". Relaxing scan to" ~ (gstats.keyAsSymbol ? " symbol" : "") ~ " acronym match."));
4570                         gstats.keyAsAcronym = true;
4571 
4572                         foreach (topDir; gstats.topDirs)
4573                         {
4574                             scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys);
4575                         }
4576                     }
4577                 }
4578             }
4579         }
4580 
4581         assert(gstats.noScannedDirs +
4582                gstats.noScannedRegFiles +
4583                gstats.noScannedSymlinks +
4584                gstats.noScannedSpecialFiles == gstats.noScannedFiles);
4585     }
4586 
4587     version(linux)
4588     {
4589         @trusted bool readable(in stat_t stat, uid_t uid, gid_t gid, ref string msg)
4590         {
4591             immutable mode = stat.st_mode;
4592             immutable ok = ((stat.st_uid == uid) && (mode & S_IRUSR) ||
4593                             (stat.st_gid == gid) && (mode & S_IRGRP) ||
4594                             (mode & S_IROTH));
4595             if (!ok)
4596             {
4597                 msg = " is not readable by you, but only by";
4598                 bool can = false; // someone can access
4599                 if (mode & S_IRUSR)
4600                 {
4601                     can = true;
4602                     msg ~= " user id " ~ to!string(stat.st_uid);
4603 
4604                     // Lookup user name from user id
4605                     passwd pw;
4606                     passwd* pw_ret;
4607                     immutable size_t bufsize = 16384;
4608                     char* buf = cast(char*)core.stdc.stdlib.malloc(bufsize);
4609                     getpwuid_r(stat.st_uid, &pw, buf, bufsize, &pw_ret);
4610                     if (pw_ret != null)
4611                     {
4612                         string userName;
4613                         {
4614                             size_t n = 0;
4615                             while (pw.pw_name[n] != 0)
4616                             {
4617                                 userName ~= pw.pw_name[n];
4618                                 n++;
4619                             }
4620                         }
4621                         msg ~= " (" ~ userName ~ ")";
4622 
4623                         // string realName;
4624                         // {
4625                         //     size_t n = 0;
4626                         //     while (pw.pw_gecos[n] != 0)
4627                         //     {
4628                         //         realName ~= pw.pw_gecos[n];
4629                         //         n++;
4630                         //     }
4631                         // }
4632                     }
4633                     core.stdc.stdlib.free(buf);
4634 
4635                 }
4636                 if (mode & S_IRGRP)
4637                 {
4638                     can = true;
4639                     if (msg != "")
4640                     {
4641                         msg ~= " or";
4642                     }
4643                     msg ~= " group id " ~ to!string(stat.st_gid);
4644                 }
4645                 if (!can)
4646                 {
4647                     msg ~= " root";
4648                 }
4649             }
4650             return ok;
4651         }
4652     }
4653 
4654     Results results;
4655 
4656     void handleError(F)(Viz viz,
4657                         NotNull!F file, bool isDir, size_t subIndex)
4658     {
4659         auto dent = DirEntry(file.path);
4660         immutable stat_t stat = dent.statBuf;
4661         string msg;
4662         if (!readable(stat, _uid, _gid, msg))
4663         {
4664             results.noBytesUnreadable += dent.size;
4665             if (_showSkipped)
4666             {
4667                 if (gstats.showTree)
4668                 {
4669                     auto parentDir = file.parent;
4670                     immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├";
4671                     viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ ");
4672                 }
4673                 viz.ppln(file,
4674                          ":  ", isDir ? "Directory" : "File",
4675                          faze(msg, warnFace));
4676             }
4677         }
4678     }
4679 
4680     void printSkipped(Viz viz,
4681                       NotNull!RegFile regFile,
4682                       size_t subIndex,
4683                       const NotNull!FKind kind, KindHit kindhit,
4684                       const string skipCause)
4685     {
4686         auto parentDir = regFile.parent;
4687         if (_showSkipped)
4688         {
4689             if (gstats.showTree)
4690             {
4691                 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├";
4692                 viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ ");
4693             }
4694             viz.pp(horizontalRuler,
4695                    asH!3(regFile,
4696                          ": Skipped ", kind, " file",
4697                          skipCause));
4698         }
4699     }
4700 
4701     size_t _scanChunkSize;
4702 
4703     KindHit isSelectedFKind(NotNull!RegFile regFile) @safe /* nothrow */
4704     {
4705         typeof(return) kindHit = KindHit.none;
4706         FKind hitKind;
4707 
4708         // Try cached kind first
4709         // First Try with kindId as try
4710         if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate
4711         {
4712             if (regFile._cstat.kindId in gstats.selFKinds.byId)
4713             {
4714                 hitKind = gstats.selFKinds.byId[regFile._cstat.kindId];
4715                 kindHit = KindHit.cached;
4716                 return kindHit;
4717             }
4718         }
4719 
4720         immutable ext = regFile.realExtension;
4721 
4722         // Try with hash table first
4723         if (!ext.empty && // if file has extension and
4724             ext in gstats.selFKinds.byExt) // and extensions may match specified included files
4725         {
4726             auto possibleKinds = gstats.selFKinds.byExt[ext];
4727             foreach (kind; possibleKinds)
4728             {
4729                 auto nnKind = enforceNotNull(kind);
4730                 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds);
4731                 if (hit)
4732                 {
4733                     hitKind = nnKind;
4734                     kindHit = hit;
4735                     break;
4736                 }
4737             }
4738         }
4739 
4740         if (!hitKind) // if no hit yet
4741         {
4742             // blindly try the rest
4743             foreach (kind; gstats.selFKinds.byIndex)
4744             {
4745                 auto nnKind = enforceNotNull(kind);
4746                 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds);
4747                 if (hit)
4748                 {
4749                     hitKind = nnKind;
4750                     kindHit = hit;
4751                     break;
4752                 }
4753             }
4754         }
4755 
4756         return kindHit;
4757     }
4758 
4759     /** Search for Keys `keys` in Source `src`.
4760      */
4761     size_t scanForKeys(Source, Keys)(Viz viz,
4762                                      NotNull!Dir topDir,
4763                                      NotNull!File theFile,
4764                                      NotNull!Dir parentDir,
4765                                      ref Symlink[] fromSymlinks,
4766                                      in Source src,
4767                                      in Keys keys,
4768                                      in bool[] bistHits = [],
4769                                      ScanContext ctx = ScanContext.standard)
4770     {
4771         bool anyFileHit = false; // will become true if any hit in this file
4772 
4773         typeof(return) hitCount = 0;
4774 
4775         import std.ascii: newline;
4776 
4777         auto thisFace = stdFace;
4778         if (gstats.colorFlag)
4779         {
4780             if (ScanContext.fileName)
4781             {
4782                 thisFace = fileFace;
4783             }
4784         }
4785 
4786         size_t nL = 0; // line counter
4787         foreach (line; src.splitterASCIIAmong!(newline))
4788         {
4789             auto rest = cast(string)line; // rest of line as a string
4790 
4791             bool anyLineHit = false; // will become true if any hit on current line
4792             // Hit search loop
4793             while (!rest.empty)
4794             {
4795                 // Find any key
4796 
4797                 /* TODO: Convert these to a range. */
4798                 ptrdiff_t offKB = -1;
4799                 ptrdiff_t offKE = -1;
4800 
4801                 foreach (uint ix, key; keys) // TODO: Call variadic-find instead to speed things up.
4802                 {
4803                     /* Bistogram Discardal */
4804                     if ((!bistHits.empty) &&
4805                         !bistHits[ix]) // if neither exact nor acronym match possible
4806                     {
4807                         continue; // try next key
4808                     }
4809 
4810                     /* dbg("key:", key, " line:", line); */
4811                     ptrdiff_t[] acronymOffsets;
4812                     if (gstats.keyAsAcronym) // acronym search
4813                     {
4814                         auto hit = (cast(immutable ubyte[])rest).findAcronymAt(key,
4815                                                                                gstats.keyAsSymbol ? FindContext.inSymbol : FindContext.inWord);
4816                         if (!hit[0].empty)
4817                         {
4818                             acronymOffsets = hit[1];
4819                             offKB = hit[1][0];
4820                             offKE = hit[1][$-1] + 1;
4821                         }
4822                     }
4823                     else
4824                     { // normal search
4825                         import std.string: indexOf;
4826                         offKB = rest.indexOf(key,
4827                                              _caseFold ? CaseSensitive.no : CaseSensitive.yes); // hit begin offset
4828                         offKE = offKB + key.length; // hit end offset
4829                     }
4830 
4831                     if (offKB >= 0) // if hit
4832                     {
4833                         if (!gstats.showTree && ctx == ScanContext.fileName)
4834                         {
4835                             viz.pp(parentDir, dirSeparator);
4836                         }
4837 
4838                         // Check Context
4839                         if ((gstats.keyAsSymbol && !isSymbolASCII(rest, offKB, offKE)) ||
4840                             (gstats.keyAsWord   && !isWordASCII  (rest, offKB, offKE)))
4841                         {
4842                             rest = rest[offKE..$]; // move forward in line
4843                             continue;
4844                         }
4845 
4846                         if (ctx == ScanContext.fileContent &&
4847                             !anyLineHit) // if this is first hit
4848                         {
4849                             if (viz.form == VizForm.HTML)
4850                             {
4851                                 if (!anyFileHit)
4852                                 {
4853                                     viz.pp(horizontalRuler,
4854                                            displayedFilename(gstats, theFile).asPath.asH!3);
4855                                     viz.ppTagOpen(`table`, `border=1`);
4856                                     anyFileHit = true;
4857                                 }
4858                             }
4859                             else
4860                             {
4861                                 if (gstats.showTree)
4862                                 {
4863                                     viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ "├" ~ "─ ");
4864                                 }
4865                                 else
4866                                 {
4867                                     foreach (fromSymlink; fromSymlinks)
4868                                     {
4869                                         viz.pp(fromSymlink,
4870                                                " modified ",
4871                                                faze(shortDurationString(_currTime - fromSymlink.timeLastModified),
4872                                                     timeFace),
4873                                                " ago",
4874                                                " -> ");
4875                                     }
4876                                     // show file path/name
4877                                     viz.pp(displayedFilename(gstats, theFile).asPath); // show path
4878                                 }
4879                             }
4880 
4881                             // show line:column
4882                             if (viz.form == VizForm.HTML)
4883                             {
4884                                 viz.ppTagOpen("tr");
4885                                 viz.pp(to!string(nL+1).asCell,
4886                                        to!string(offKB+1).asCell);
4887                                 viz.ppTagOpen("td");
4888                                 viz.ppTagOpen("code");
4889                             }
4890                             else
4891                             {
4892                                 viz.pp(faze(":" ~ to!string(nL+1) ~ ":" ~ to!string(offKB+1) ~ ":",
4893                                             contextFace));
4894                             }
4895                             anyLineHit = true;
4896                         }
4897 
4898                         // show content prefix
4899                         viz.pp(faze(to!string(rest[0..offKB]), thisFace));
4900 
4901                         // show hit part
4902                         if (!acronymOffsets.empty)
4903                         {
4904                             foreach (aIndex, currOff; acronymOffsets) // TODO: Reuse std.algorithm: zip or lockstep? Or create a new kind say named conv.
4905                             {
4906                                 // context before
4907                                 if (aIndex >= 1)
4908                                 {
4909                                     immutable prevOff = acronymOffsets[aIndex-1];
4910                                     if (prevOff + 1 < currOff) // at least one letter in between
4911                                     {
4912                                         viz.pp(asCtx(ix, to!string(rest[prevOff + 1 .. currOff])));
4913                                     }
4914                                 }
4915                                 // hit letter
4916                                 viz.pp(asHit(ix, to!string(rest[currOff])));
4917                             }
4918                         }
4919                         else
4920                         {
4921                             viz.pp(asHit(ix, to!string(rest[offKB..offKE])));
4922                         }
4923 
4924                         rest = rest[offKE..$]; // move forward in line
4925 
4926                         hitCount++; // increase hit count
4927                         parentDir.hitCount++;
4928                         _hitsCountTotal++;
4929 
4930                         goto foundHit;
4931                     }
4932                 }
4933             foundHit:
4934                 if (offKB == -1) { break; }
4935             }
4936 
4937             // finalize line
4938             if (anyLineHit)
4939             {
4940                 // show final context suffix
4941                 viz.ppln(faze(rest, thisFace));
4942                 if (viz.form == VizForm.HTML)
4943                 {
4944                     viz.ppTagClose("code");
4945                     viz.ppTagClose("td");
4946                     viz.pplnTagClose("tr");
4947                 }
4948             }
4949             nL++;
4950         }
4951 
4952         if (gstats.showLineCounts)
4953         {
4954             gstats.lineCountsByFile[theFile] = nL;
4955         }
4956 
4957         if (anyFileHit)
4958         {
4959             viz.pplnTagClose("table");
4960         }
4961 
4962         // Previous solution
4963         // version(none)
4964         // {
4965         //     ptrdiff_t offHit = 0;
4966         //     foreach (ix, key; keys)
4967         //     {
4968         //         scope immutable hit1 = src.find(key); // single key hit
4969         //         offHit = hit1.ptr - src.ptr;
4970         //         if (!hit1.empty)
4971         //         {
4972         //             scope immutable src0 = src[0..offHit]; // src beforce hi
4973         //             immutable rowHit = count(src0, newline);
4974         //             immutable colHit = src0.retro.countUntil(newline); // count backwards till beginning of rowHit
4975         //             immutable offBOL = offHit - colHit;
4976         //             immutable cntEOL = src[offHit..$].countUntil(newline); // count forwards to end of rowHit
4977         //             immutable offEOL = (cntEOL == -1 ? // if no hit
4978         //                                 src.length :   // end of file
4979         //                                 offHit + cntEOL); // normal case
4980         //             viz.pp(faze(asPath(gstats.useHTML, dent.name), pathFace));
4981         //             viz.ppln(":", rowHit + 1,
4982         //                                                                               ":", colHit + 1,
4983         //                                                                               ":", cast(string)src[offBOL..offEOL]);
4984         //         }
4985         //     }
4986         // }
4987 
4988         // switch (keys.length)
4989         // {
4990         // default:
4991         //     break;
4992         // case 0:
4993         //     break;
4994         // case 1:
4995         //     immutable hit1 = src.find(keys[0]);
4996         //     if (!hit1.empty)
4997         //     {
4998         //         viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit1.length);
4999         //     }
5000         //     break;
5001         // // case 2:
5002         // //     immutable hit2 = src.find(keys[0], keys[1]); // find two keys
5003         // //     if (!hit2[0].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit2[0].length); }
5004         // //     if (!hit2[1].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit2[1].length); }
5005         // //     break;
5006         // // case 3:
5007         // //     immutable hit3 = src.find(keys[0], keys[1], keys[2]); // find two keys
5008         // //     if (!hit3.empty)
5009         //        {
5010         // //         viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit1.length);
5011         // //     }
5012         // //     break;
5013         // }
5014         return hitCount;
5015     }
5016 
5017     /** Process Regular File `theRegFile`. */
5018     void processRegFile(Viz viz,
5019                         NotNull!Dir topDir,
5020                         NotNull!RegFile theRegFile,
5021                         NotNull!Dir parentDir,
5022                         const string[] keys,
5023                         ref Symlink[] fromSymlinks,
5024                         size_t subIndex,
5025                         GStats gstats)
5026     {
5027         scanRegFile(viz,
5028                     topDir,
5029                     theRegFile,
5030                     parentDir,
5031                     keys,
5032                     fromSymlinks,
5033                     subIndex);
5034 
5035         // check for operations
5036         // TODO: Reuse isSelectedFKind instead of this
5037         immutable ext = theRegFile.realExtension;
5038         if (ext in gstats.selFKinds.byExt)
5039         {
5040             auto matchingFKinds = gstats.selFKinds.byExt[ext];
5041             foreach (kind; matchingFKinds)
5042             {
5043                 const hit = kind.operations.find!(a => a[0] == gstats.fOp);
5044                 if (!hit.empty)
5045                 {
5046                     const fOp = hit.front;
5047                     const cmd = fOp[1]; // command string
5048                     import std.process: spawnProcess;
5049                     import std.algorithm: splitter;
5050                     dbg("TODO: Performing operation ", to!string(cmd),
5051                         " on ", theRegFile.path,
5052                         " by calling it using ", cmd);
5053                     auto pid = spawnProcess(cmd.splitterASCIIAmong!(" ").array ~ [theRegFile.path]);
5054                 }
5055             }
5056         }
5057     }
5058 
5059     /** Scan `elfFile` for ELF Symbols. */
5060     void scanELFFile(Viz viz,
5061                      NotNull!RegFile elfFile,
5062                      const string[] keys,
5063                      GStats gstats)
5064     {
5065         import nxt.elfdoc: sectionNameExplanations;
5066         /* TODO: Add mouse hovering help for sectionNameExplanations[section] */
5067         dbg("before: ", elfFile);
5068         ELF decoder = ELF.fromFile(elfFile._mmfile);
5069         dbg("after: ", elfFile);
5070 
5071         /* foreach (section; decoder.sections) */
5072         /* { */
5073         /*     if (section.name.length) */
5074         /*     { */
5075         /*         /\* auto sst = section.StringTable; *\/ */
5076         /*         //writeln("ELF Section named ", section.name); */
5077         /*     } */
5078         /* } */
5079 
5080         /* const sectionNames = [".symtab"/\* , ".strtab", ".dynsym" *\/];    // TODO: These two other sections causes range exceptions. */
5081         /* foreach (sectionName; sectionNames) */
5082         /* { */
5083         /*     auto sts = decoder.getSection(sectionName); */
5084         /*     if (!sts.isNull) */
5085         /*     { */
5086         /*         SymbolTable symtab = SymbolTable(sts); */
5087         /*         // TODO: Use range: auto symbolsDemangled = symtab.symbols.map!(sym => demangler(sym.name).decodeSymbol); */
5088         /*         foreach (sym; symtab.symbols) // you can add filters here */
5089         /*         { */
5090         /*             if (gstats.demangleELF) */
5091         /*             { */
5092         /*                 const hit = demangler(sym.name).decodeSymbol; */
5093         /*             } */
5094         /*             else */
5095         /*             { */
5096         /*                 writeln("?: ", sym.name); */
5097         /*             } */
5098         /*         } */
5099         /*     } */
5100         /* } */
5101 
5102         auto sst = decoder.getSymbolsStringTable;
5103         if (!sst.isNull)
5104         {
5105             import nxt.algorithm_ex: findFirstOfAnyInOrder;
5106             import std.range : tee;
5107 
5108             auto scan = (sst.strings
5109                             .filter!(raw => !raw.empty) // skip empty raw string
5110                             .tee!(raw => gstats.elfFilesBySymbol[raw.idup] ~= elfFile) // WARNING: needs raw.idup here because we can't rever to raw
5111                             .map!(raw => demangler(raw).decodeSymbol)
5112                             .filter!(demangling => (!keys.empty && // don't show anything if no keys given
5113                                                     demangling.unmangled.findFirstOfAnyInOrder(keys)[1]))); // I love D :)
5114 
5115             if (!scan.empty &&
5116                 `ELF` in gstats.selFKinds.byName) // if user selected ELF file show them
5117             {
5118                 viz.pp(horizontalRuler,
5119                        displayedFilename(gstats, elfFile).asPath.asH!3,
5120                        asH!4(`ELF Symbol Strings Table (`, `.strtab`.asCode, `)`),
5121                        scan.asTable);
5122             }
5123         }
5124     }
5125 
5126     /** Search for Keys `keys` in Regular File `theRegFile`. */
5127     void scanRegFile(Viz viz,
5128                      NotNull!Dir topDir,
5129                      NotNull!RegFile theRegFile,
5130                      NotNull!Dir parentDir,
5131                      const string[] keys,
5132                      ref Symlink[] fromSymlinks,
5133                      size_t subIndex)
5134     {
5135         results.noBytesTotal += theRegFile.size;
5136         results.noBytesTotalContents += theRegFile.size;
5137 
5138         // Scan name
5139         if ((gstats.scanContext == ScanContext.all ||
5140              gstats.scanContext == ScanContext.fileName ||
5141              gstats.scanContext == ScanContext.regularFilename) &&
5142             !keys.empty)
5143         {
5144             immutable hitCountInName = scanForKeys(viz,
5145                                                    topDir, cast(NotNull!File)theRegFile, parentDir,
5146                                                    fromSymlinks,
5147                                                    theRegFile.name, keys, [], ScanContext.fileName);
5148         }
5149 
5150         // Scan Contents
5151         if ((gstats.scanContext == ScanContext.all ||
5152              gstats.scanContext == ScanContext.fileContent) &&
5153             (gstats.showFileContentDups ||
5154              gstats.showELFSymbolDups ||
5155              !keys.empty) &&
5156             theRegFile.size != 0)        // non-empty file
5157         {
5158             // immutable upTo = size_t.max;
5159 
5160             // TODO: Flag for readText
5161             try
5162             {
5163                 ++gstats.noScannedRegFiles;
5164                 ++gstats.noScannedFiles;
5165 
5166                 // ELF Symbols
5167                 if (gstats.showELFSymbolDups &&
5168                     theRegFile.ofKind(`ELF`, gstats.collectTypeHits, gstats.allFKinds))
5169                 {
5170                     scanELFFile(viz, theRegFile, keys, gstats);
5171                 }
5172 
5173                 // Check included kinds first because they are fast.
5174                 KindHit incKindHit = isSelectedFKind(theRegFile);
5175                 if (!gstats.selFKinds.byIndex.empty && // TODO: Do we really need this one?
5176                     !incKindHit)
5177                 {
5178                     return;
5179                 }
5180 
5181                 // Super-Fast Key-File Bistogram Discardal. TODO: Trim scale factor to optimal value.
5182                 enum minFileSize = 256; // minimum size of file for discardal.
5183                 immutable bool doBist = theRegFile.size > minFileSize;
5184                 immutable bool doNGram = (gstats.useNGrams &&
5185                                           (!gstats.keyAsSymbol) &&
5186                                           theRegFile.size > minFileSize);
5187                 immutable bool doBitStatus = true;
5188 
5189                 // Chunked Calculation of CStat in one pass. TODO: call async.
5190                 theRegFile.calculateCStatInChunks(gstats.filesByContentId,
5191                                                   _scanChunkSize,
5192                                                   gstats.showFileContentDups,
5193                                                   doBist,
5194                                                   doBitStatus);
5195 
5196                 // Match Bist of Keys with BistX of File
5197                 bool[] bistHits;
5198                 bool noBistMatch = false;
5199                 if (doBist)
5200                 {
5201                     const theHist = theRegFile.bistogram8;
5202                     auto hitsHist = keysBists.map!(a =>
5203                                                    ((a.value & theHist.value) ==
5204                                                     a.value)); // TODO: Functionize to x.subsetOf(y) or reuse std.algorithm: setDifference or similar
5205                     bistHits = hitsHist.map!`a == true`.array;
5206                     noBistMatch = hitsHist.all!`a == false`;
5207                 }
5208                 /* int kix = 0; */
5209                 /* foreach (hit; bistHits) { if (!hit) { debug dbg(`Assert key ` ~ keys[kix] ~ ` not in file ` ~ theRegFile.path); } ++kix; } */
5210 
5211                 bool allXGramsMiss = false;
5212                 if (doNGram)
5213                 {
5214                     ulong keysXGramUnionMatch = keysXGramsUnion.matchDenser(theRegFile.xgram);
5215                     debug dbg(theRegFile.path,
5216                               ` sized `, theRegFile.size, ` : `,
5217                               keysXGramsUnion.length, `, `,
5218                               theRegFile.xgram.length,
5219                               ` gave match:`, keysXGramUnionMatch);
5220                     allXGramsMiss = keysXGramUnionMatch == 0;
5221                 }
5222 
5223                 auto binHit = theRegFile.ofAnyKindIn(gstats.binFKinds,
5224                                                      gstats.collectTypeHits);
5225                 const binKindHit = binHit[0];
5226                 if (binKindHit)
5227                 {
5228                     import nxt.numerals: toOrdinal;
5229                     const nnKind = binHit[1].enforceNotNull;
5230                     const kindIndex = binHit[2];
5231                     if (_showSkipped)
5232                     {
5233                         if (gstats.showTree)
5234                         {
5235                             immutable intro = subIndex == parentDir.subs.length - 1 ? `└` : `├`;
5236                             viz.pp(`│  `.repeat(parentDir.depth + 1).join(``) ~ intro ~ `─ `);
5237                         }
5238                         viz.ppln(theRegFile, `: Skipped `, nnKind, ` file at `,
5239                                  toOrdinal(kindIndex + 1), ` blind try`);
5240                     }
5241                     final switch (binKindHit)
5242                     {
5243                         case KindHit.none:
5244                             break;
5245                         case KindHit.cached:
5246                             printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit,
5247                                          ` using cached KindId`);
5248                             break;
5249                         case KindHit.uncached:
5250                             printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit,
5251                                          ` at ` ~ toOrdinal(kindIndex + 1) ~ ` extension try`);
5252                             break;
5253                     }
5254                 }
5255 
5256                 if (binKindHit != KindHit.none ||
5257                     noBistMatch ||
5258                     allXGramsMiss) // or no hits possible. TODO: Maybe more efficient to do histogram discardal first
5259                 {
5260                     results.noBytesSkipped += theRegFile.size;
5261                 }
5262                 else
5263                 {
5264                     // Search if not Binary
5265 
5266                     // If Source file is ok
5267                     auto src = theRegFile.readOnlyContents[];
5268 
5269                     results.noBytesScanned += theRegFile.size;
5270 
5271                     if (keys)
5272                     {
5273                         // Fast discardal of files with no match
5274                         bool fastOk = true;
5275                         if (!_caseFold) { // if no relaxation of search
5276                             if (gstats.keyAsAcronym) // if no relaxation of search
5277                             {
5278                                 /* TODO: Reuse findAcronym in algorith_ex. */
5279                             }
5280                             else // if no relaxation of search
5281                             {
5282                                 switch (keys.length)
5283                                 {
5284                                 default: break;
5285                                 case 1: immutable hit1 = src.find(keys[0]); fastOk = !hit1.empty; break;
5286                                     // case 2: immutable hit2 = src.find(keys[0], keys[1]); fastOk = !hit2[0].empty; break;
5287                                     // case 3: immutable hit3 = src.find(keys[0], keys[1], keys[2]); fastOk = !hit3[0].empty; break;
5288                                     // case 4: immutable hit4 = src.find(keys[0], keys[1], keys[2], keys[3]); fastOk = !hit4[0].empty; break;
5289                                     // case 5: immutable hit5 = src.find(keys[0], keys[1], keys[2], keys[3], keys[4]); fastOk = !hit5[0].empty; break;
5290                                 }
5291                             }
5292                         }
5293 
5294                         // TODO: Continue search from hit1, hit2 etc.
5295 
5296                         if (fastOk)
5297                         {
5298                             foreach (tag; addTags) gstats.ftags.addTag(theRegFile, tag);
5299                             foreach (tag; removeTags) gstats.ftags.removeTag(theRegFile, tag);
5300 
5301                             if (theRegFile.size >= 8192)
5302                             {
5303                                 /* if (theRegFile.xgram == null) { */
5304                                 /*     theRegFile.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */
5305                                 /* } */
5306                                 /* (*theRegFile.xgram).put(src); */
5307                                 /* theRegFile.xgram.put(src); */
5308                                 /* foreach (lix, ub0; line) { // for each ubyte in line */
5309                                 /*     if (lix + 1 < line.length) { */
5310                                 /*         immutable ub1 = line[lix + 1]; */
5311                                 /*         immutable dix = (cast(ushort)ub0 | */
5312                                 /*                          cast(ushort)ub1*256); */
5313                                 /*         (*theRegFile.xgram)[dix] = true; */
5314                                 /*     } */
5315                                 /* } */
5316                                 auto shallowDenseness = theRegFile.bistogram8.denseness;
5317                                 auto deepDenseness = theRegFile.xgramDeepDenseness;
5318                                 // assert(deepDenseness >= 1);
5319                                 gstats.shallowDensenessSum += shallowDenseness;
5320                                 gstats.deepDensenessSum += deepDenseness;
5321                                 ++gstats.densenessCount;
5322                                 /* dbg(theRegFile.path, `:`, theRegFile.size, */
5323                                 /*     `, length:`, theRegFile.xgram.length, */
5324                                 /*     `, deepDenseness:`, deepDenseness); */
5325                             }
5326 
5327                             theRegFile._cstat.hitCount = scanForKeys(viz,
5328                                                                      topDir, cast(NotNull!File)theRegFile, parentDir,
5329                                                                      fromSymlinks,
5330                                                                      src, keys, bistHits,
5331                                                                      ScanContext.fileContent);
5332                         }
5333                     }
5334                 }
5335 
5336             }
5337             catch (FileException)
5338             {
5339                 handleError(viz, theRegFile, false, subIndex);
5340             }
5341             catch (ErrnoException)
5342             {
5343                 handleError(viz, theRegFile, false, subIndex);
5344             }
5345             theRegFile.freeContents; // TODO: Call lazily only when open count is too large
5346         }
5347     }
5348 
5349     /** Scan Symlink `symlink` at `parentDir` for `keys`
5350         Put results in `results`. */
5351     void scanSymlink(Viz viz,
5352                      NotNull!Dir topDir,
5353                      NotNull!Symlink theSymlink,
5354                      NotNull!Dir parentDir,
5355                      const string[] keys,
5356                      ref Symlink[] fromSymlinks)
5357     {
5358         // check for symlink cycles
5359         if (!fromSymlinks.find(theSymlink).empty)
5360         {
5361             if (gstats.showSymlinkCycles)
5362             {
5363                 import std.range: back;
5364                 viz.ppln(`Cycle of symbolic links: `,
5365                          fromSymlinks.asPath,
5366                          ` -> `,
5367                          fromSymlinks.back.target);
5368             }
5369             return;
5370         }
5371 
5372         // Scan name
5373         if ((gstats.scanContext == ScanContext.all ||
5374              gstats.scanContext == ScanContext.fileName ||
5375              gstats.scanContext == ScanContext.symlinkName) &&
5376             !keys.empty)
5377         {
5378             scanForKeys(viz,
5379                         topDir, cast(NotNull!File)theSymlink, enforceNotNull(theSymlink.parent),
5380                         fromSymlinks,
5381                         theSymlink.name, keys, [], ScanContext.fileName);
5382         }
5383 
5384         // try {
5385         //     results.noBytesTotal += dent.size;
5386         // } catch (Exception)
5387         //   {
5388         //     dbg(`Couldn't get size of `,  dir.name);
5389         // }
5390         if (gstats.followSymlinks == SymlinkFollowContext.none) { return; }
5391 
5392         import std.range: popBackN;
5393         fromSymlinks ~= theSymlink;
5394         immutable targetPath = theSymlink.absoluteNormalizedTargetPath;
5395         if (targetPath.exists)
5396         {
5397             theSymlink._targetStatus = SymlinkTargetStatus.present;
5398             if (_topDirNames.all!(a => !targetPath.startsWith(a))) { // if target path lies outside of all rootdirs
5399                 auto targetDent = DirEntry(targetPath);
5400                 auto targetFile = getFile(enforceNotNull(gstats.rootDir), targetPath, targetDent.isDir);
5401 
5402                 if (gstats.showTree)
5403                 {
5404                     viz.ppln(`│  `.repeat(parentDir.depth + 1).join(``) ~ `├` ~ `─ `,
5405                              theSymlink,
5406                              ` modified `,
5407                              faze(shortDurationString(_currTime - theSymlink.timeLastModified),
5408                                   timeFace),
5409                              ` ago`, ` -> `,
5410                              targetFile.asPath,
5411                              faze(` outside of ` ~ (_topDirNames.length == 1 ? `tree ` : `all trees `),
5412                                   infoFace),
5413                              gstats.topDirs.asPath,
5414                              faze(` is followed`, infoFace));
5415                 }
5416 
5417                 ++gstats.noScannedSymlinks;
5418                 ++gstats.noScannedFiles;
5419 
5420                 if      (auto targetRegFile = cast(RegFile)targetFile)
5421                 {
5422                     processRegFile(viz, topDir, assumeNotNull(targetRegFile), parentDir, keys, fromSymlinks, 0, gstats);
5423                 }
5424                 else if (auto targetDir = cast(Dir)targetFile)
5425                 {
5426                     scanDir(viz, topDir, assumeNotNull(targetDir), keys, fromSymlinks);
5427                 }
5428                 else if (auto targetSymlink = cast(Symlink)targetFile) // target is a Symlink
5429                 {
5430                     scanSymlink(viz, topDir,
5431                                 assumeNotNull(targetSymlink),
5432                                 enforceNotNull(targetSymlink.parent),
5433                                 keys, fromSymlinks);
5434                 }
5435             }
5436         }
5437         else
5438         {
5439             theSymlink._targetStatus = SymlinkTargetStatus.broken;
5440 
5441             if (gstats.showBrokenSymlinks)
5442             {
5443                 _brokenSymlinks ~= theSymlink;
5444 
5445                 foreach (ix, fromSymlink; fromSymlinks)
5446                 {
5447                     if (gstats.showTree && ix == 0)
5448                     {
5449                         immutable intro = `├`;
5450                         viz.pp(`│  `.repeat(theSymlink.parent.depth + 1).join(``) ~ intro ~ `─ `,
5451                                theSymlink);
5452                     }
5453                     else
5454                     {
5455                         viz.pp(fromSymlink);
5456                     }
5457                     viz.pp(` -> `);
5458                 }
5459 
5460                 viz.ppln(faze(theSymlink.target, missingSymlinkTargetFace),
5461                          faze(` is missing`, warnFace));
5462             }
5463         }
5464         fromSymlinks.popBackN(1);
5465     }
5466 
5467     /** Scan Directory `parentDir` for `keys`. */
5468     void scanDir(Viz viz,
5469                  NotNull!Dir topDir,
5470                  NotNull!Dir theDir,
5471                  const string[] keys,
5472                  Symlink[] fromSymlinks = [],
5473                  int maxDepth = -1)
5474     {
5475         if (theDir.isRoot)  { results.reset; }
5476 
5477         // scan in directory name
5478         if ((gstats.scanContext == ScanContext.all ||
5479              gstats.scanContext == ScanContext.fileName ||
5480              gstats.scanContext == ScanContext.dirName) &&
5481             !keys.empty)
5482         {
5483             scanForKeys(viz,
5484                         topDir,
5485                         cast(NotNull!File)theDir,
5486                         enforceNotNull(theDir.parent),
5487                         fromSymlinks,
5488                         theDir.name, keys, [], ScanContext.fileName);
5489         }
5490 
5491         try
5492         {
5493             size_t subIndex = 0;
5494             if (gstats.showTree)
5495             {
5496                 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`;
5497 
5498                 viz.pp(`│  `.repeat(theDir.depth).join(``) ~ intro ~
5499                        `─ `, theDir, ` modified `,
5500                        faze(shortDurationString(_currTime -
5501                                                 theDir.timeLastModified),
5502                             timeFace),
5503                        ` ago`);
5504 
5505                 if (gstats.showUsage)
5506                 {
5507                     viz.pp(` of Tree-Size `, theDir.treeSize);
5508                 }
5509 
5510                 if (gstats.showSHA1)
5511                 {
5512                     viz.pp(` with Tree-Content-Id `, theDir.treeContentId);
5513                 }
5514                 viz.ppendl;
5515             }
5516 
5517             ++gstats.noScannedDirs;
5518             ++gstats.noScannedFiles;
5519 
5520             auto subsSorted = theDir.subsSorted(gstats.subsSorting);
5521             foreach (key, sub; subsSorted)
5522             {
5523                 /* TODO: Functionize to scanFile */
5524                 if (auto regFile = cast(RegFile)sub)
5525                 {
5526                     processRegFile(viz, topDir, assumeNotNull(regFile), theDir, keys, fromSymlinks, subIndex, gstats);
5527                 }
5528                 else if (auto subDir = cast(Dir)sub)
5529                 {
5530                     if (maxDepth == -1 || // if either all levels or
5531                         maxDepth >= 1) { // levels left
5532                         if (sub.name in gstats.skippedDirKindsMap) // if sub should be skipped
5533                         {
5534                             if (_showSkipped)
5535                             {
5536                                 if (gstats.showTree)
5537                                 {
5538                                     immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`;
5539                                     viz.pp(`│  `.repeat(theDir.depth + 1).join(``) ~ intro ~ `─ `);
5540                                 }
5541 
5542                                 viz.pp(subDir,
5543                                        ` modified `,
5544                                        faze(shortDurationString(_currTime -
5545                                                                 subDir.timeLastModified),
5546                                             timeFace),
5547                                        ` ago`,
5548                                        faze(`: Skipped Directory of type `, infoFace),
5549                                        gstats.skippedDirKindsMap[sub.name].kindName);
5550                             }
5551                         }
5552                         else
5553                         {
5554                             scanDir(viz, topDir,
5555                                     assumeNotNull(subDir),
5556                                     keys,
5557                                     fromSymlinks,
5558                                     maxDepth >= 0 ? --maxDepth : maxDepth);
5559                         }
5560                     }
5561                 }
5562                 else if (auto subSymlink = cast(Symlink)sub)
5563                 {
5564                     scanSymlink(viz, topDir, assumeNotNull(subSymlink), theDir, keys, fromSymlinks);
5565                 }
5566                 else
5567                 {
5568                     if (gstats.showTree) { viz.ppendl; }
5569                 }
5570                 ++subIndex;
5571 
5572                 if (ctrlC)
5573                 {
5574                     viz.ppln(`Ctrl-C pressed: Aborting scan of `, theDir);
5575                     break;
5576                 }
5577             }
5578 
5579             if (gstats.showTreeContentDups)
5580             {
5581                 theDir.treeContentId; // better to put this after file scan for now
5582             }
5583         }
5584         catch (FileException)
5585         {
5586             handleError(viz, theDir, true, 0);
5587         }
5588     }
5589 
5590     /** Filter out `files` that lie under any of the directories `dirPaths`. */
5591     F[] filterUnderAnyOfPaths(F)(F[] files,
5592                                  string[] dirPaths)
5593     {
5594         import std.algorithm: any;
5595         import std.array: array;
5596         auto dupFilesUnderAnyTopDirName = (files
5597                                            .filter!(dupFile =>
5598                                                     dirPaths.any!(dirPath =>
5599                                                                   dupFile.path.startsWith(dirPath)))
5600                                            .array // evaluate to array to get .length below
5601             );
5602         F[] hits;
5603         final switch (gstats.duplicatesContext)
5604         {
5605         case DuplicatesContext.internal:
5606             if (dupFilesUnderAnyTopDirName.length >= 2)
5607                 hits = dupFilesUnderAnyTopDirName;
5608             break;
5609         case DuplicatesContext.external:
5610             if (dupFilesUnderAnyTopDirName.length >= 1)
5611                 hits = files;
5612             break;
5613         }
5614         return hits;
5615     }
5616 
5617     /** Show Statistics. */
5618     void showContentDups(Viz viz)
5619     {
5620         import std.meta : AliasSeq;
5621         foreach (ix, kind; AliasSeq!(RegFile, Dir))
5622         {
5623             immutable typeName = ix == 0 ? `Regular File` : `Directory Tree`;
5624             viz.pp((typeName ~ ` Content Duplicates`).asH!2);
5625             foreach (digest, dupFiles; gstats.filesByContentId)
5626             {
5627                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5628                 if (dupFilesOk.length >= 2) // non-empty file/directory
5629                 {
5630                     auto firstDup = cast(kind)dupFilesOk[0];
5631                     if (firstDup)
5632                     {
5633                         static if (is(kind == RegFile))
5634                         {
5635                             if (firstDup._cstat.kindId)
5636                             {
5637                                 if (firstDup._cstat.kindId in gstats.allFKinds.byId)
5638                                 {
5639                                     viz.pp(asH!3(gstats.allFKinds.byId[firstDup._cstat.kindId],
5640                                                  ` files sharing digest `, digest, ` of size `, firstDup.treeSize));
5641                                 }
5642                                 else
5643                                 {
5644                                     dbg(firstDup.path ~ ` kind Id ` ~ to!string(firstDup._cstat.kindId) ~
5645                                         ` could not be found in allFKinds.byId`);
5646                                 }
5647                             }
5648                             viz.pp(asH!3((firstDup._cstat.bitStatus == BitStatus.bits7) ? `ASCII File` : typeName,
5649                                          `s sharing digest `, digest, ` of size `, firstDup.treeSize));
5650                         }
5651                         else
5652                         {
5653                             viz.pp(asH!3(typeName, `s sharing digest `, digest, ` of size `, firstDup.size));
5654                         }
5655 
5656                         viz.pp(asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5657                     }
5658                 }
5659             }
5660         }
5661     }
5662 
5663     /** Show Statistics. */
5664     void showStats(Viz viz)
5665     {
5666         /* Duplicates */
5667 
5668         if (gstats.showNameDups)
5669         {
5670             viz.pp(`Name Duplicates`.asH!2);
5671             foreach (digest, dupFiles; gstats.filesByName)
5672             {
5673                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5674                 if (!dupFilesOk.empty)
5675                 {
5676                     viz.pp(asH!3(`Files with same name `,
5677                                  faze(dupFilesOk[0].name, fileFace)),
5678                            asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5679                 }
5680             }
5681         }
5682 
5683         if (gstats.showLinkDups)
5684         {
5685             viz.pp(`Inode Duplicates (Hardlinks)`.asH!2);
5686             foreach (inode, dupFiles; gstats.filesByInode)
5687             {
5688                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5689                 if (dupFilesOk.length >= 2)
5690                 {
5691                     viz.pp(asH!3(`Files with same inode ` ~ to!string(inode) ~
5692                                  ` (hardlinks): `),
5693                            asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5694                 }
5695             }
5696         }
5697 
5698         if (gstats.showFileContentDups)
5699         {
5700             showContentDups(viz);
5701         }
5702 
5703         if (gstats.showELFSymbolDups &&
5704             !keys.empty) // don't show anything if no keys where given
5705         {
5706             viz.pp(`ELF Symbol Duplicates`.asH!2);
5707             foreach (raw, dupFiles; gstats.elfFilesBySymbol)
5708             {
5709                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5710                 if (dupFilesOk.length >= 2)
5711                 {
5712                     const demangling = demangler(raw).decodeSymbol;
5713                     if (demangling.unmangled.findFirstOfAnyInOrder(keys)[1])
5714                     {
5715                         viz.pp(asH!3(`ELF Files with same symbol ` ~ to!string(raw)),
5716                                asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5717                     }
5718                 }
5719             }
5720         }
5721 
5722         /* Broken Symlinks */
5723         if (gstats.showBrokenSymlinks &&
5724             !_brokenSymlinks.empty)
5725         {
5726             viz.pp(`Broken Symlinks `.asH!2,
5727                    asUList(_brokenSymlinks.map!(x => x.asPath.asItem)));
5728         }
5729 
5730         /* Counts */
5731         viz.pp(`Scanned Types`.asH!2,
5732                /* asUList(asItem(gstats.noScannedDirs, ` Dirs, `), */
5733                /*         asItem(gstats.noScannedRegFiles, ` Regular Files, `), */
5734                /*         asItem(gstats.noScannedSymlinks, ` Symbolic Links, `), */
5735                /*         asItem(gstats.noScannedSpecialFiles, ` Special Files, `), */
5736                /*         asItem(`totalling `, gstats.noScannedFiles, ` Files`) // on extra because of lack of root */
5737                /*     ) */
5738                asTable(asRow(asCell(asBold(`Scan Count`)),
5739                              asCell(asBold(`File Type`))),
5740                        asRow(asCell(gstats.noScannedDirs),
5741                              asCell(asItalic(`Dirs`))),
5742                        asRow(asCell(gstats.noScannedRegFiles),
5743                              asCell(asItalic(`Regular Files`))),
5744                        asRow(asCell(gstats.noScannedSymlinks),
5745                              asCell(asItalic(`Symbolic Links`))),
5746                        asRow(asCell(gstats.noScannedSpecialFiles),
5747                              asCell(asItalic(`Special Files`))),
5748                        asRow(asCell(gstats.noScannedFiles),
5749                              asCell(asItalic(`Files`)))
5750                    )
5751             );
5752 
5753         if (gstats.densenessCount)
5754         {
5755             viz.pp(`Histograms`.asH!2,
5756                    asUList(asItem(`Average Byte Bistogram (Binary Histogram) Denseness `,
5757                                   cast(real)(100*gstats.shallowDensenessSum / gstats.densenessCount), ` Percent`),
5758                            asItem(`Average Byte `, NGramOrder, `-Gram Denseness `,
5759                                   cast(real)(100*gstats.deepDensenessSum / gstats.densenessCount), ` Percent`)));
5760         }
5761 
5762         viz.pp(`Scanned Bytes`.asH!2,
5763                asUList(asItem(`Scanned `, results.noBytesScanned),
5764                        asItem(`Skipped `, results.noBytesSkipped),
5765                        asItem(`Unreadable `, results.noBytesUnreadable),
5766                        asItem(`Total Contents `, results.noBytesTotalContents),
5767                        asItem(`Total `, results.noBytesTotal),
5768                        asItem(`Total number of hits `, results.numTotalHits),
5769                        asItem(`Number of Files with hits `, results.numFilesWithHits)));
5770 
5771         viz.pp(`Some Math`.asH!2);
5772 
5773         {
5774             struct Stat
5775             {
5776                 particle2f particle;
5777                 point2r point;
5778                 vec2r velocity;
5779                 vec2r acceleration;
5780                 mat2 rotation;
5781                 Rational!uint ratInt;
5782                 Vector!(Rational!int, 4) ratIntVec;
5783                 Vector!(float, 2, true) normFloatVec2;
5784                 Vector!(float, 3, true) normFloatVec3;
5785                 Point!(Rational!int, 4) ratIntPoint;
5786             }
5787 
5788             /* Vector!(Complex!float, 4) complexVec; */
5789 
5790             viz.ppln(`A number: `, 1.2e10);
5791             viz.ppln(`Randomize particle2f as TableNr0: `, randomInstanceOf!particle2f.asTableNr0);
5792 
5793             alias Stats3 = Stat[3];
5794             auto stats = new Stat[3];
5795             randomize(stats);
5796             viz.ppln(`A ` ~ typeof(stats).stringof, `: `, stats.randomize.asTable);
5797 
5798             {
5799                 auto x = randomInstanceOf!Stats3;
5800                 foreach (ref e; x)
5801                 {
5802                     e.velocity *= 1e9;
5803                 }
5804                 viz.ppln(`Some Stats: `,
5805                          x.asTable);
5806             }
5807         }
5808 
5809 
5810     }
5811 }
5812 
5813 void scanner(string[] args)
5814 {
5815     // Register the SIGINT signal with the signalHandler function call:
5816     version(linux)
5817     {
5818         signal(SIGABRT, &signalHandler);
5819         signal(SIGTERM, &signalHandler);
5820         signal(SIGQUIT, &signalHandler);
5821         signal(SIGINT, &signalHandler);
5822     }
5823 
5824 
5825     auto term = Terminal(ConsoleOutputType.linear);
5826     auto scanner = new Scanner!Terminal(args, term);
5827 }