1 #!/usr/bin/env rdmd-dev
2 
3 /**
4    File Scanning Engine.
5 
6    Make rich use of Sparse Distributed Representations (SDR) using Hash Digests
7    for relating Data and its Relations/Properties/Meta-Data.
8 
9    See_Also: http://stackoverflow.com/questions/12629749/how-does-grep-run-so-fast
10    See_Also: http:www.regular-expressions.info/powergrep.html
11    See_Also: http://ridiculousfish.com/blog/posts/old-age-and-treachery.html
12    See_Also: http://www.olark.com/spw/2011/08/you-can-list-a-directory-with-8-million-files-but-not-with-ls/
13 
14    TODO Make use parallelism_ex: pmap
15 
16    TODO Call filterUnderAnyOfPaths using std.algorithm.filter directly on AAs. Use byPair or use AA.get(key, defaultValue)
17          See_Also: http://forum.dlang.org/thread/mailman.75.1392335793.6445.digitalmars-d-learn@puremagic.com
18          See_Also: https://github.com/D-Programming-Language/druntime/pull/574
19 
20    TODO Count logical lines.
21    TODO Lexers should be loosely coupled to FKinds instead of Files
22    TODO Generic Token[] and specific CToken[], CxxToken[]
23 
24    TODO Don't scan for duplicates inside vc-dirs by default
25 
26    TODO Assert that files along duplicates path don't include symlinks
27 
28    TODO Implement FOp.deduplicate
29    TODO Prevent rescans of duplicates
30 
31    TODO Defined generalized_specialized_two_way_relationship(kindD, kindDi)
32 
33    TODO Visualize hits using existingFileHitContext.asH!1 followed by a table:
34          ROW_NR | hit string in <code lang=LANG></code>
35 
36    TODO Parse and Sort GCC/Clang Compiler Messages on WARN_TYPE FILE:LINE:COL:MSG[WARN_TYPE] and use Collapsable HTML Widgets:
37          http://api.jquerymobile.com/collapsible/
38          when presenting them
39 
40    TODO Maybe make use of https://github.com/Abscissa/scriptlike
41 
42    TODO Calculate Tree grams and bist
43 
44    TODO Get stats of the link itself not the target in SymLink constructors
45 
46    TODO RegFile with FileContent.text should be decodable to Unicode using
47    either iso-latin1, utf-8, etc. Check std.uni for how to try and decode stuff.
48 
49    TODO Search for subwords.
50    For example gtk_widget should also match widget_gtk and GtkWidget etc.
51 
52    TODO Support multi-line keys
53 
54    TODO Use hash-lookup in txtFKinds.byExt for faster guessing of source file
55    kind. Merge it with binary kind lookup. And check FileContent member of
56    kind to instead determine if it should be scanned or not.
57    Sub-Task: Case-Insensitive Matching of extensions if
58    nothing else passes.
59 
60    TODO Detect symlinks with duplicate targets and only follow one of them and
61    group them together in visualization
62 
63    TODO Add addTag, removeTag, etc and interface to fs.d for setting tags:
64    --add-tag=comedy, remove-tag=comedy
65 
66    TODO If files ends with ~ or .backup assume its a backup file, strip it from
67    end match it again and set backupFlag in FileKind
68 
69    TODO Acronym match can make use of normal histogram counts. Check denseness
70    of binary histogram (bist) to determine if we should use a sparse or dense
71    histogram.
72 
73    TODO Activate and test support for ELF and Cxx11 subkinds
74 
75    TODO Call either File.checkObseleted upon inotify. checkObseleted should remove stuff from hash tables
76    TODO Integrate logic in clearCStat to RegFile.makeObselete
77    TODO Upon Dir inotify call invalidate _depth, etc.
78 
79    TODO Following command: fs.d --color -d ~/ware/emacs -s lispy  -k
80    shows "Skipped PNG file (png) at first extension try".
81    Assure that this logic reuses cache and instead prints something like "Skipped PNG file using cached FKind".
82 
83    TODO Cache each Dir separately to a file named after SHA1 of its path
84 
85    TODO Add ASCII kind: Requires optional stream analyzer member of FKind in
86    replacement for magicData. ASCIIFile
87 
88    TODO Defined NotAnyKind(binaryKinds) and cache it
89 
90    TODO Create PkZipFile() in Dir.load() when FKind "pkZip Archive" is found.
91    Use std.zip.ZipArchive(void[] from mmfile)
92 
93    TODO Scan Subversion Dirs with http://pastebin.com/6ZzPvpBj
94 
95    TODO Change order (binHit || allBHist8Miss) and benchmark
96 
97    TODO Display modification/access times as:
98    See: http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com
99 
100    TODO Use User Defined Attributes (UDA): http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com
101    TODO Use msgPack @nonPacked when needed
102 
103    TODO Limit lines to terminal width
104 
105    TODO Create array of (OFFSET, LENGTH) and this in FKind Pattern factory
106    function.  Then for source file extra slice at (OFFSET, LENGTH) and use as
107    input into hash-table from magic (if its a Lit-pattern to)
108 
109    TODO Verify that "f.tar.z" gets tuple extensions tuple("tar", "z")
110    TODO Verify that "libc.so.1.2.3" gets tuple extensions tuple("so", "1", "2", "3") and "so" extensions should the be tried
111    TODO Cache Symbols larger than three characters in a global hash from symbol to path
112 
113    TODO Benchmark horspool.d and perhaps use instead of std.find
114 
115    TODO Splitting into keys should not split arguments such as "a b"
116 
117    TODO Perhaps use http://www.chartjs.org/ to visualize stuff
118 
119    TODO Make use of @nonPacked in version(msgpack).
120 */
121 module nxt.fs;
122 
123 version = msgpack; // Use msgpack serialization
124 /* version = cerealed; // Use cerealed serialization */
125 
126 import std.stdio: ioFile = File, stdout;
127 import std.typecons: Tuple, tuple;
128 import std.algorithm: find, map, filter, reduce, max, min, uniq, all, joiner;
129 import std..string: representation, chompPrefix;
130 import std.stdio: write, writeln, writefln;
131 import std.path: baseName, dirName, isAbsolute, dirSeparator, extension, buildNormalizedPath, expandTilde, absolutePath;
132 import std.datetime;
133 import std.file: FileException;
134 import std.digest.sha: sha1Of, toHexString;
135 import std.range: repeat, array, empty, cycle, chain;
136 import std.stdint: uint64_t;
137 import std.traits: Unqual, isInstanceOf, isIterable;
138 import std.experimental.allocator;
139 import std.functional: memoize;
140 import std.complex: Complex;
141 
142 import nxt.predicates: isUntouched;
143 
144 import core.memory: GC;
145 import core.exception;
146 
147 import nxt.traits_ex;
148 import nxt.getopt_ex;
149 import nxt.digest_ex;
150 import nxt.algorithm_ex;
151 import nxt.codec;
152 import nxt.csunits;
153 alias Bytes64 = Bytes!ulong;
154 import arsd.terminal : Color;
155 import nxt.symbolic;
156 import nxt.static_bitarray;
157 import nxt.dbgio;
158 import nxt.tempfs;
159 import nxt.rational: Rational;
160 import nxt.ngram;
161 import nxt.notnull;
162 import nxt.pretty;
163 
164 import nxt.geometry;
165 import nxt.random_ex;
166 import nxt.mathml;
167 import nxt.mangling;
168 import nxt.lingua;
169 import nxt.attributes;
170 import nxt.find_ex;
171 
172 import nxt.elf;
173 import nxt.typedoc;
174 import lock_free.rwqueue;
175 
176 /* NGram Aliases */
177 /** Not very likely that we are interested in histograms 64-bit precision
178  * Bucket/Bin Counts so pick 32-bit for now. */
179 alias RequestedBinType = uint;
180 enum NGramOrder = 3;
181 alias Bist  = NGram!(ubyte, 1, ngram.Kind.binary, ngram.Storage.denseStatic, ngram.Symmetry.ordered, void, immutable(ubyte)[]);
182 alias XGram = NGram!(ubyte, NGramOrder, ngram.Kind.saturated, ngram.Storage.sparse, ngram.Symmetry.ordered, RequestedBinType, immutable(ubyte)[]);
183 
184 /* Need for signal handling */
185 import core.stdc.stdlib;
186 version(linux) import core.sys.posix.sys.stat;
187 version(linux) import core.sys.posix.signal;
188 //version(linux) import std.c.linux.linux;
189 
190 /* TODO Set global state.
191    http://forum.dlang.org/thread/cu9fgg$28mr$1@digitaldaemon.com
192 */
193 /** Exception Describing Process Signal. */
194 
195 shared uint ctrlC = 0; // Number of times Ctrl-C has been presed
196 class SignalCaughtException : Exception
197 {
198     int signo = int.max;
199     this(int signo, string file = __FILE__, size_t line = __LINE__ ) @safe {
200         this.signo = signo;
201         import std.conv: to;
202         super(`Signal number ` ~ to!string(signo) ~ ` at ` ~ file ~ `:` ~ to!string(line));
203     }
204 }
205 
206 void signalHandler(int signo)
207 {
208     import core.atomic: atomicOp;
209     if (signo == 2)
210     {
211         core.atomic.atomicOp!`+=`(ctrlC, 1);
212     }
213     // throw new SignalCaughtException(signo);
214 }
215 
216 alias signalHandler_t = void function(int);
217 extern (C) signalHandler_t signal(int signal, signalHandler_t handler);
218 
219 version(msgpack)
220 {
221     import msgpack;
222 }
223 version(cerealed)
224 {
225     /* import cerealed.cerealiser; */
226     /* import cerealed.decerealiser; */
227     /* import cerealed.cereal; */
228 }
229 
230 /** File Content Type Code. */
231 enum FileContent
232 {
233     unknown,
234     binaryUnknown,
235     binary,
236     text,
237     textASCII,
238     text8Bit,
239     document,
240     spreadsheet,
241     database,
242     tagsDatabase,
243     image,
244     imageIcon,
245     audio,
246     sound = audio,
247     music = audio,
248 
249     modemData,
250     imageModemFax1BPP, // One bit per pixel
251     voiceModem,
252 
253     video,
254     movie,
255     media,
256     sourceCode,
257     scriptCode,
258     buildSystemCode,
259     byteCode,
260     machineCode,
261     versionControl,
262     numericalData,
263     archive,
264     compressed,
265     cache,
266     binaryCache,
267     firmware,
268     spellCheckWordList,
269     font,
270     performanceBenchmark,
271     fingerprint,
272 }
273 
274 /** How File Kinds are detected. */
275 enum FileKindDetection
276 {
277     equalsParentPathDirsAndName, // Parenting path file name must match
278     equalsName, // Only name must match
279     equalsNameAndContents, // Both name and contents must match
280     equalsNameOrContents, // Either name or contents must match
281     equalsContents, // Only contents must match
282     equalsWhatsGiven, // All information defined must match
283 }
284 
285 /** Key Scan (Search) Context. */
286 enum ScanContext
287 {
288     /* code, */
289     /* comment, */
290     /* string, */
291 
292     /* word, */
293     /* symbol, */
294 
295     dirName,     // Name of directory being scanned
296     dir = dirName,
297 
298     fileName,    // Name of file being scanned
299     name = fileName,
300 
301     regularFileName,    // Name of file being scanned
302     symlinkName, // Name of symbolic linke being scanned
303 
304     fileContent, // Contents of file being scanned
305     content = fileContent,
306 
307     /* modTime, */
308     /* accessTime, */
309     /* xattr, */
310     /* size, */
311 
312     all,
313     standard = all,
314 }
315 
316 enum DuplicatesContext
317 {
318     internal, // All duplicates must lie inside topDirs
319     external, // At least one duplicate lie inside
320     // topDirs. Others may lie outside
321 }
322 
323 /** File Operation Type Code. */
324 enum FOp
325 {
326     none,
327 
328     checkSyntax,                // Check syntax
329     lint = checkSyntax,         // Check syntax alias
330 
331     build, // Project-Wide Build
332     compile, // Compile
333     byteCompile, // Byte compile
334     run, // Run (Execute)
335     execute = run,
336 
337     preprocess, // Preprocess C/C++/Objective-C (using cpp)
338     cpp = preprocess,
339 
340     /* VCS Operations */
341     vcStatus,
342     vcs = vcStatus,
343 
344     deduplicate, // Deduplicate Files using hardlinks and Dirs using Symlink
345 }
346 
347 /** Directory Operation Type Code. */
348 enum DirOp
349 {
350     /* VCS Operations */
351     vcStatus,
352 }
353 
354 /** Shell Command.
355  */
356 alias ShCmd = string; // Just simply a string for now.
357 
358 /** Pair of Delimiters.
359     Used to desribe for example comment and string delimiter syntax.
360  */
361 struct Delim
362 {
363     this(string intro)
364     {
365         this.intro = intro;
366         this.finish = finish.init;
367     }
368     this(string intro, string finish)
369     {
370         this.intro = intro;
371         this.finish = finish;
372     }
373     string intro;
374     string finish; // Defaults to end of line if not defined.
375 }
376 
377 /* Comment Delimiters */
378 enum defaultCommentDelims = [Delim(`#`)];
379 enum cCommentDelims = [Delim(`/*`, `*/`),
380                        Delim(`//`)];
381 enum dCommentDelims = [Delim(`/+`, `+/`)] ~ cCommentDelims;
382 
383 /* String Delimiters */
384 enum defaultStringDelims = [Delim(`"`),
385                             Delim(`'`),
386                             Delim("`")];
387 enum pythonStringDelims = [Delim(`"""`),
388                            Delim(`"`),
389                            Delim(`'`),
390                            Delim("`")];
391 
392 /** File Kind.
393  */
394 class FKind
395 {
396     this(T, MagicData, RefPattern)(string kindName_,
397                                    T baseNaming_,
398                                    const string[] exts_,
399                                    MagicData magicData, size_t magicOffset = 0,
400                                    RefPattern refPattern_ = RefPattern.init,
401                                    const string[] keywords_ = [],
402 
403                                    Delim[] strings_ = [],
404 
405                                    Delim[] comments_ = [],
406 
407                                    FileContent content_ = FileContent.unknown,
408                                    FileKindDetection detection_ = FileKindDetection.equalsWhatsGiven,
409                                    Lang lang_ = Lang.unknown,
410 
411                                    FKind superKind = null,
412                                    FKind[] subKinds = [],
413                                    string description = null,
414                                    string wikip = null) @trusted pure
415     {
416         this.kindName = kindName_;
417 
418         // Basename
419         import std.traits: isArray;
420         import std.range: ElementType;
421         static if (is(T == string))
422         {
423             this.baseNaming = lit(baseNaming_);
424         }
425         else static if (isArrayOf!(T, string))
426         {
427             // TODO Move to a factory function strs(x)
428             auto alt_ = alt();
429             foreach (ext; baseNaming_)  // add each string as an alternative
430             {
431                 alt_ ~= lit(ext);
432             }
433             this.baseNaming = alt_;
434         }
435         else static if (is(T == Patt))
436         {
437             this.baseNaming = baseNaming_;
438         }
439 
440         this.exts = exts_;
441 
442         import std.traits: isAssignable;
443         static      if (is(MagicData == ubyte[])) { this.magicData = lit(magicData) ; }
444         else static if (is(MagicData == string)) { this.magicData = lit(magicData.representation.dup); }
445         else static if (is(MagicData == void[])) { this.magicData = lit(cast(ubyte[])magicData); }
446         else static if (isAssignable!(Patt, MagicData)) { this.magicData = magicData; }
447         else static assert(0, `Cannot handle MagicData being type ` ~ MagicData.stringof);
448 
449         this.magicOffset = magicOffset;
450 
451         static      if (is(RefPattern == ubyte[])) { this.refPattern = refPattern_; }
452         else static if (is(RefPattern == string)) { this.refPattern = refPattern_.representation.dup; }
453         else static if (is(RefPattern == void[])) { this.refPattern = (cast(ubyte[])refPattern_).dup; }
454         else static assert(0, `Cannot handle RefPattern being type ` ~ RefPattern.stringof);
455 
456         this.keywords = keywords_;
457 
458         this.strings = strings_;
459         this.comments = comments_;
460 
461         this.content = content_;
462 
463         if ((content_ == FileContent.sourceCode ||
464              content_ == FileContent.scriptCode) &&
465             detection_ == FileKindDetection.equalsWhatsGiven)
466         {
467             // relax matching of sourcecode to only need name until we have complete parsers
468             this.detection = FileKindDetection.equalsName;
469         }
470         else
471         {
472             this.detection = detection_;
473         }
474         this.lang = lang_;
475 
476         this.superKind = superKind;
477         this.subKinds = subKinds;
478         this.description = description;
479         this.wikip = wikip.asURL;
480     }
481 
482     override string toString() const @property @trusted pure nothrow { return kindName; }
483 
484     /** Returns: Id Unique to matching behaviour of `this` FKind. If match
485         behaviour of `this` FKind changes returned id will change.
486         value is memoized.
487     */
488     auto ref const(SHA1Digest) behaviorId() @property @safe /* pure nothrow */
489         out(result) { assert(!result.empty); }
490     do
491     {
492         if (_behaviourDigest.empty) // if not yet defined
493         {
494             ubyte[] bytes;
495             const magicLit = cast(Lit)magicData;
496             if (magicLit)
497             {
498                 bytes = msgpack.pack(exts, magicLit.bytes, magicOffset, refPattern, keywords, content, detection);
499             }
500             else
501             {
502                 //dln(`warning: Handle magicData of type `, kindName);
503             }
504             _behaviourDigest = bytes.sha1Of;
505         }
506         return _behaviourDigest;
507     }
508 
509     string kindName;    // Kind Nick Name.
510     string description; // Kind Documenting Description.
511     AsURL!string wikip; // Wikipedia URL
512 
513     FKind superKind;    // Inherited pattern. For example ELF => ELF core file
514     FKind[] subKinds;   // Inherited pattern. For example ELF => ELF core file
515     Patt baseNaming;    // Pattern that matches typical file basenames of this Kind. May be null.
516 
517     string[] parentPathDirs; // example [`lib`, `firmware`] for `/lib/firmware` or `../lib/firmware`
518 
519     const string[] exts;      // Typical Extensions.
520     Patt magicData;     // Magic Data.
521     size_t magicOffset; // Magit Offset.
522     ubyte[] refPattern; // Reference pattern.
523     const FileContent content;
524     const FileKindDetection detection;
525     Lang lang; // Language if any
526 
527     // Volatile Statistics:
528     private SHA1Digest _behaviourDigest;
529     RegFile[] hitFiles;     // Files of this kind.
530 
531     const string[] keywords; // Keywords
532     string[] builtins; // Builtin Functions
533     Op[] opers; // Language Opers
534 
535     /* TODO Move this to CompLang class */
536     Delim[] strings; // String syntax.
537     Delim[] comments; // Comment syntax.
538 
539     bool machineGenerated; // True if this is a machine generated file.
540 
541     Tuple!(FOp, ShCmd)[] operations; // Operation and Corresponding Shell Command
542 }
543 
544 /** Set of File Kinds with Internal Hashing. */
545 class FKinds
546 {
547     void opOpAssign(string op)(FKind kind) @safe /* pure */ if (op == `~`)
548     {
549         mixin(`this.byIndex ` ~ op ~ `= kind;`);
550         this.register(kind);
551     }
552     void opOpAssign(string op)(FKinds kinds) @safe /* pure */ if (op == `~`)
553     {
554         mixin(`this.byIndex ` ~ op ~ `= kinds.byIndex;`);
555         foreach (kind; kinds.byIndex)
556             this.register(kind);
557     }
558 
559     FKinds register(FKind kind) @safe /* pure */
560     {
561         this.byName[kind.kindName] = kind;
562         foreach (const ext; kind.exts)
563         {
564             this.byExt[ext] ~= kind;
565         }
566         this.byId[kind.behaviorId] = kind;
567         if (kind.magicOffset == 0 && // only if zero-offset for now
568             kind.magicData)
569         {
570             if (const magicLit = cast(Lit)kind.magicData)
571             {
572                 this.byMagic[magicLit.bytes][magicLit.bytes.length] ~= kind;
573                 _magicLengths ~= magicLit.bytes.length; // add it
574             }
575         }
576         return this;
577     }
578 
579     /** Rehash Internal AAs.
580         TODO Change to @safe when https://github.com/D-Programming-Language/druntime/pull/942 has been merged
581         TODO Change to nothrow when uniq becomes nothrow.
582     */
583     FKinds rehash() @trusted pure /* nothrow */
584     {
585         import std.algorithm: sort;
586         _magicLengths = _magicLengths.uniq.array; // remove duplicates
587         _magicLengths.sort();
588         this.byName.rehash;
589         this.byExt.rehash;
590         this.byMagic.rehash;
591         this.byId.rehash;
592         return this;
593     }
594 
595     FKind[] byIndex;
596 private:
597     /* TODO These are "slaves" under byIndex and should not be modifiable outside
598      of this class but their FKind's can mutable.
599      */
600     FKind[string] byName; // Index by unique name string
601     FKind[][string] byExt; // Index by possibly non-unique extension string
602 
603     FKind[][size_t][immutable ubyte[]] byMagic; // length => zero-offset magic byte array to Binary FKind[]
604     size_t[] _magicLengths; // List of magic lengths to try as index in byMagic
605 
606     FKind[SHA1Digest] byId;    // Index Kinds by their behaviour
607 }
608 
609 /** Match `kind` with full filename `full`. */
610 bool matchFullName(in FKind kind,
611                    const scope string full, size_t six = 0) @safe pure nothrow
612 {
613     return (kind.baseNaming &&
614             !kind.baseNaming.match(full, six).empty);
615 }
616 
617 /** Match `kind` with file extension `ext`. */
618 bool matchExtension(in FKind kind,
619                     const scope string ext) @safe pure nothrow
620 {
621     return !kind.exts.find(ext).empty;
622 }
623 
624 bool matchName(in FKind kind,
625                const scope string full, size_t six = 0,
626                const scope string ext = null) @safe pure nothrow
627 {
628     return (kind.matchFullName(full) ||
629             kind.matchExtension(ext));
630 }
631 
632 import std.range: hasSlicing;
633 
634 /** Match (Magic) Contents of `kind` with `range`.
635     Returns: `true` iff match. */
636 bool matchContents(Range)(in FKind kind,
637                           in Range range,
638                           in RegFile regFile) pure nothrow if (hasSlicing!Range)
639 {
640     const hit = kind.magicData.matchU(range, kind.magicOffset);
641     return (!hit.empty);
642 }
643 
644 enum KindHit
645 {
646     none = 0,     // No hit.
647     cached = 1,   // Cached hit.
648     uncached = 2, // Uncached (fresh) hit.
649 }
650 
651 Tuple!(KindHit, FKind, size_t) ofAnyKindIn(NotNull!RegFile regFile,
652                                            FKinds kinds,
653                                            bool collectTypeHits)
654 {
655     // using kindId
656     if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate
657     {
658         if (regFile._cstat.kindId in kinds.byId)
659         {
660             return tuple(KindHit.cached,
661                          kinds.byId[regFile._cstat.kindId],
662                          0UL);
663         }
664     }
665 
666     // using extension
667     immutable ext = regFile.realExtension; // extension sans dot
668     if (!ext.empty &&
669         ext in kinds.byExt)
670     {
671         foreach (kindIndex, kind; kinds.byExt[ext])
672         {
673             auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds);
674             if (hit)
675             {
676                 return tuple(hit, kind, kindIndex);
677             }
678         }
679     }
680 
681     // try all
682     foreach (kindIndex, kind; kinds.byIndex) // Iterate each kind
683     {
684         auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds);
685         if (hit)
686         {
687             return tuple(hit, kind, kindIndex);
688         }
689     }
690 
691     // no hit
692     return tuple(KindHit.none,
693                  FKind.init,
694                  0UL);
695 }
696 
697 /** Returns: true if file with extension `ext` is of type `kind`. */
698 KindHit ofKind(NotNull!RegFile regFile,
699                NotNull!FKind kind,
700                bool collectTypeHits,
701                FKinds allFKinds) /* nothrow */ @trusted
702 {
703     immutable hit = regFile.ofKind1(kind,
704                                     collectTypeHits,
705                                     allFKinds);
706     return hit;
707 }
708 
709 KindHit ofKind(NotNull!RegFile regFile,
710                string kindName,
711                bool collectTypeHits,
712                FKinds allFKinds) /* nothrow */ @trusted
713 {
714     typeof(return) hit;
715     if (kindName in allFKinds.byName)
716     {
717         auto kind = assumeNotNull(allFKinds.byName[kindName]);
718         hit = regFile.ofKind(kind,
719                              collectTypeHits,
720                              allFKinds);
721     }
722     return hit;
723 }
724 
725 /** Helper for ofKind. */
726 KindHit ofKind1(NotNull!RegFile regFile,
727                 NotNull!FKind kind,
728                 bool collectTypeHits,
729                 FKinds allFKinds) /* nothrow */ @trusted
730 {
731     // Try cached first
732     if (regFile._cstat.kindId.defined &&
733         (regFile._cstat.kindId in allFKinds.byId) && // if kind is known
734         allFKinds.byId[regFile._cstat.kindId] is kind)  // if cached kind equals
735     {
736         return KindHit.cached;
737     }
738 
739     immutable ext = regFile.realExtension;
740 
741     if (kind.superKind)
742     {
743         immutable baseHit = regFile.ofKind(enforceNotNull(kind.superKind),
744                                            collectTypeHits,
745                                            allFKinds);
746         if (!baseHit)
747         {
748             return baseHit;
749         }
750     }
751 
752     bool hit = false;
753     final switch (kind.detection)
754     {
755     case FileKindDetection.equalsParentPathDirsAndName:
756         hit = (!regFile.parents.map!(a => a.name).find(kind.parentPathDirs).empty && // I love D :)
757                kind.matchName(regFile.name, 0, ext));
758         break;
759     case FileKindDetection.equalsName:
760         hit = kind.matchName(regFile.name, 0, ext);
761         break;
762     case FileKindDetection.equalsNameAndContents:
763         hit = (kind.matchName(regFile.name, 0, ext) &&
764                kind.matchContents(regFile.readOnlyContents, regFile));
765         break;
766     case FileKindDetection.equalsNameOrContents:
767         hit = (kind.matchName(regFile.name, 0, ext) ||
768                kind.matchContents(regFile.readOnlyContents, regFile));
769         break;
770     case FileKindDetection.equalsContents:
771         hit = kind.matchContents(regFile.readOnlyContents, regFile);
772         break;
773     case FileKindDetection.equalsWhatsGiven:
774         // something must be defined
775         assert(is(kind.baseNaming) ||
776                !kind.exts.empty ||
777                !(kind.magicData is null));
778         hit = ((kind.matchName(regFile.name, 0, ext) &&
779                 (kind.magicData is null ||
780                  kind.matchContents(regFile.readOnlyContents, regFile))));
781         break;
782     }
783     if (hit)
784     {
785         if (collectTypeHits)
786         {
787             kind.hitFiles ~= regFile;
788         }
789         regFile._cstat.kindId = kind.behaviorId;       // store reference in File
790     }
791 
792     return hit ? KindHit.uncached : KindHit.none;
793 }
794 
795 /** Directory Kind.
796  */
797 class DirKind
798 {
799     this(string fn,
800          string kn)
801     {
802         this.fileName = fn;
803         this.kindName = kn;
804     }
805 
806     version(msgpack)
807     {
808         this(Unpacker)(ref Unpacker unpacker)
809         {
810             fromMsgpack(msgpack.Unpacker(unpacker));
811         }
812         void toMsgpack(Packer)(ref Packer packer) const
813         {
814             packer.beginArray(this.tupleof.length);
815             packer.pack(this.tupleof);
816         }
817         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
818         {
819             unpacker.beginArray;
820             unpacker.unpack(this.tupleof);
821         }
822     }
823 
824     string fileName;
825     string kindName;
826 }
827 version(msgpack) unittest
828 {
829     auto k = tuple(``, ``);
830     auto data = pack(k);
831     Tuple!(string, string) k_; data.unpack(k_);
832     assert(k == k_);
833 }
834 
835 import std.file: DirEntry, getLinkAttributes;
836 import std.datetime: SysTime, Interval;
837 
838 /** File.
839  */
840 class File
841 {
842     this(Dir parent)
843     {
844         this.parent = parent;
845         if (parent) { ++parent.gstats.noFiles; }
846     }
847     this(string name, Dir parent, Bytes64 size,
848          SysTime timeLastModified,
849          SysTime timeLastAccessed)
850     {
851         this.name = name;
852         this.parent = parent;
853         this.size = size;
854         this.timeLastModified = timeLastModified;
855         this.timeLastAccessed = timeLastAccessed;
856         if (parent) { ++parent.gstats.noFiles; }
857     }
858 
859     // The Real Extension without leading dot.
860     string realExtension() @safe pure nothrow const { return name.extension.chompPrefix(`.`); }
861     alias ext = realExtension; // shorthand
862 
863     string toTextual() const @property { return `Any File`; }
864 
865     Bytes64 treeSize() @property @trusted /* @safe pure nothrow */ { return size; }
866 
867     /** Content Digest of Tree under this Directory. */
868     const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */
869     {
870         return typeof(return).init; // default to undefined
871     }
872 
873     Face!Color face() const @property @safe pure nothrow { return fileFace; }
874 
875     /** Check if `this` File has been invalidated by `dent`.
876         Returns: `true` iff `this` was obseleted.
877     */
878     bool checkObseleted(ref DirEntry dent) @trusted
879     {
880         // Git-Style Check for Changes (called Decider in SCons Build Tool)
881         bool flag = false;
882         if (dent.size != this.size || // size has changes
883             (dent.timeLastModified != this.timeLastModified) // if current modtime has changed or
884             )
885         {
886             makeObselete;
887             this.timeLastModified = dent.timeLastModified; // use new time
888             this.size = dent.size; // use new time
889             flag = true;
890         }
891         this.timeLastAccessed = dent.timeLastAccessed; // use new time
892         return flag;
893     }
894 
895     void makeObselete() @trusted {}
896     void makeUnObselete() @safe {}
897 
898     /** Returns: Depth of Depth from File System root to this File. */
899     int depth() @property @safe pure nothrow
900     {
901         return parent ? parent.depth + 1 : 0; // NOTE: this is fast because parent is memoized
902     }
903     /** NOTE: Currently not used. */
904     int depthIterative() @property @safe pure
905         out (depth) { debug assert(depth == depth); }
906     do
907     {
908         typeof(return) depth = 0;
909         for (auto curr = dir; curr !is null && !curr.isRoot; depth++)
910         {
911             curr = curr.parent;
912         }
913         return depth;
914     }
915 
916     /** Get Parenting Dirs starting from parent of `this` upto root.
917         Make this even more lazily evaluted.
918     */
919     Dir[] parentsUpwards()
920     {
921         typeof(return) parents; // collected parents
922         for (auto curr = dir; (curr !is null &&
923                                !curr.isRoot); curr = curr.parent)
924         {
925             parents ~= curr;
926         }
927         return parents;
928     }
929     alias dirsDownward = parentsUpwards;
930 
931     /** Get Parenting Dirs starting from file system root downto containing
932         directory of `this`.
933     */
934     auto parents()
935     {
936         return parentsUpwards.retro;
937     }
938     alias dirs = parents;     // SCons style alias
939     alias parentsDownward = parents;
940 
941     bool underAnyDir(alias pred = `a`)()
942     {
943         import std.algorithm: any;
944         import std.functional: unaryFun;
945         return parents.any!(unaryFun!pred);
946     }
947 
948     /** Returns: Path to `this` File.
949         TODO Reuse parents.
950      */
951     string path() @property @trusted pure out (result) {
952         /* assert(result == pathRecursive); */
953     }
954     do
955     {
956         if (!parent) { return dirSeparator; }
957 
958         size_t pathLength = 1 + name.length; // returned path length
959         Dir[] parents; // collected parents
960 
961         for (auto curr = parent; (curr !is null &&
962                                   !curr.isRoot); curr = curr.parent)
963         {
964             pathLength += 1 + curr.name.length;
965             parents ~= curr;
966         }
967 
968         // build path
969         auto thePath = new char[pathLength];
970         size_t i = 0; // index to thePath
971         import std.range: retro;
972         foreach (currParent_; parents.retro)
973         {
974             immutable parentName = currParent_.name;
975             thePath[i++] = dirSeparator[0];
976             thePath[i .. i + parentName.length] = parentName[];
977             i += parentName.length;
978         }
979         thePath[i++] = dirSeparator[0];
980         thePath[i .. i + name.length] = name[];
981 
982         return thePath;
983     }
984 
985     /** Returns: Path to `this` File.
986         Recursive Heap-active implementation, slower than $(D path()).
987     */
988     string pathRecursive() @property @trusted pure
989     {
990         if (parent)
991         {
992             static if (true)
993             {
994                 import std.path: dirSeparator;
995                 // NOTE: This is more efficient than buildPath(parent.path,
996                 // name) because we can guarantee things about parent.path and
997                 // name
998                 immutable parentPath = parent.isRoot ? `` : parent.pathRecursive;
999                 return parentPath ~ dirSeparator ~ name;
1000             }
1001             else
1002             {
1003                 import std.path: buildPath;
1004                 return buildPath(parent.pathRecursive, name);
1005             }
1006         }
1007         else
1008         {
1009             return `/`;  // assume root folder with beginning slash
1010         }
1011     }
1012 
1013     version(msgpack)
1014     {
1015         void toMsgpack(Packer)(ref Packer packer) const
1016         {
1017             writeln(`Entering File.toMsgpack `, name);
1018             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1019         }
1020         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1021         {
1022             long stdTime;
1023             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize
1024             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize
1025         }
1026     }
1027 
1028     Dir parent;               // Reference to parenting directory (or null if this is a root directory)
1029     alias dir = parent;       // SCons style alias
1030 
1031     string name;              // Empty if root directory
1032     Bytes64 size;             // Size of file in bytes
1033     SysTime timeLastModified; // Last modification time
1034     SysTime timeLastAccessed; // Last access time
1035 }
1036 
1037 /** Maps Files to their tags. */
1038 class FileTags
1039 {
1040     FileTags addTag(File file, const scope string tag) @safe pure /* nothrow */
1041     {
1042         if (file in _tags)
1043         {
1044             if (_tags[file].find(tag).empty)
1045             {
1046                 _tags[file] ~= tag; // add it
1047             }
1048         }
1049         else
1050         {
1051             _tags[file] = [tag];
1052         }
1053         return this;
1054     }
1055     FileTags removeTag(File file, string tag) @safe pure
1056     {
1057         if (file in _tags)
1058         {
1059             import std.algorithm: remove;
1060             _tags[file] = _tags[file].remove!(a => a == tag);
1061         }
1062         return this;
1063     }
1064     auto ref getTags(File file) const @safe pure nothrow
1065     {
1066         return file in _tags ? _tags[file] : null;
1067     }
1068     private string[][File] _tags; // Tags for each registered file.
1069 }
1070 
1071 version(linux) unittest
1072 {
1073     auto ftags = new FileTags();
1074 
1075     GStats gstats = new GStats();
1076 
1077     auto root = assumeNotNull(new Dir(cast(Dir)null, gstats));
1078     auto etc = getDir(root, `/etc`);
1079     assert(etc.path == `/etc`);
1080 
1081     auto dent = DirEntry(`/etc/passwd`);
1082     auto passwd = getFile(root, `/etc/passwd`, dent.isDir);
1083     assert(passwd.path == `/etc/passwd`);
1084     assert(passwd.parent == etc);
1085     assert(etc.sub(`passwd`) == passwd);
1086 
1087     ftags.addTag(passwd, `Password`);
1088     ftags.addTag(passwd, `Password`);
1089     ftags.addTag(passwd, `Secret`);
1090     assert(ftags.getTags(passwd) == [`Password`, `Secret`]);
1091     ftags.removeTag(passwd, `Password`);
1092     assert(ftags._tags[passwd] == [`Secret`]);
1093 }
1094 
1095 /** Symlink Target Status.
1096  */
1097 enum SymlinkTargetStatus
1098 {
1099     unknown,
1100     present,
1101     broken,
1102 }
1103 
1104 /** Symlink.
1105  */
1106 class Symlink : File
1107 {
1108     this(NotNull!Dir parent)
1109     {
1110         super(parent);
1111         ++parent.gstats.noSymlinks;
1112     }
1113     this(ref DirEntry dent, NotNull!Dir parent)
1114     {
1115         Bytes64 sizeBytes;
1116         SysTime modified, accessed;
1117         bool ok = true;
1118         try
1119         {
1120             sizeBytes = dent.size.Bytes64;
1121             modified = dent.timeLastModified;
1122             accessed = dent.timeLastAccessed;
1123         }
1124         catch (Exception)
1125         {
1126             ok = false;
1127         }
1128         // const attrs = getLinkAttributes(dent.name); // attributes of target file
1129         // super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0);
1130         super(dent.name.baseName, parent, sizeBytes, modified, accessed);
1131         if (ok)
1132         {
1133             this.retarget(dent); // trigger lazy load
1134         }
1135         ++parent.gstats.noSymlinks;
1136     }
1137 
1138     override Face!Color face() const @property @safe pure nothrow
1139     {
1140         if (_targetStatus == SymlinkTargetStatus.broken)
1141             return symlinkBrokenFace;
1142         else
1143             return symlinkFace;
1144     }
1145 
1146     override string toTextual() const @property { return `Symbolic Link`; }
1147 
1148     string retarget(ref DirEntry dent) @trusted
1149     {
1150         import std.file: readLink;
1151         return _target = readLink(dent);
1152     }
1153 
1154     /** Cached/Memoized/Lazy Lookup for target. */
1155     string target() @property @trusted
1156     {
1157         if (!_target)         // if target not yet read
1158         {
1159             auto targetDent = DirEntry(path);
1160             return retarget(targetDent); // read it
1161         }
1162         return _target;
1163     }
1164     /** Cached/Memoized/Lazy Lookup for target as absolute normalized path. */
1165     string absoluteNormalizedTargetPath() @property @trusted
1166     {
1167         import std.path: absolutePath, buildNormalizedPath;
1168         return target.absolutePath(path.dirName).buildNormalizedPath;
1169     }
1170 
1171     version(msgpack)
1172     {
1173         /** Construct from msgpack `unpacker`.  */
1174         this(Unpacker)(ref Unpacker unpacker)
1175         {
1176             fromMsgpack(msgpack.Unpacker(unpacker));
1177         }
1178         void toMsgpack(Packer)(ref Packer packer) const
1179         {
1180             /* writeln(`Entering File.toMsgpack `, name); */
1181             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1182         }
1183         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1184         {
1185             unpacker.unpack(name, size);
1186             long stdTime;
1187             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize
1188             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize
1189         }
1190     }
1191 
1192     string _target;
1193     SymlinkTargetStatus _targetStatus = SymlinkTargetStatus.unknown;
1194 }
1195 
1196 /** Special File (Character or Block Device).
1197  */
1198 class SpecFile : File
1199 {
1200     this(NotNull!Dir parent)
1201     {
1202         super(parent);
1203         ++parent.gstats.noSpecialFiles;
1204     }
1205     this(ref DirEntry dent, NotNull!Dir parent)
1206     {
1207         super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0);
1208         ++parent.gstats.noSpecialFiles;
1209     }
1210 
1211     override Face!Color face() const @property @safe pure nothrow { return specialFileFace; }
1212 
1213     override string toTextual() const @property { return `Special File`; }
1214 
1215     version(msgpack)
1216     {
1217         /** Construct from msgpack `unpacker`.  */
1218         this(Unpacker)(ref Unpacker unpacker)
1219         {
1220             fromMsgpack(msgpack.Unpacker(unpacker));
1221         }
1222         void toMsgpack(Packer)(ref Packer packer) const
1223         {
1224             /* writeln(`Entering File.toMsgpack `, name); */
1225             packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime);
1226         }
1227         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
1228         {
1229             unpacker.unpack(name, size);
1230             long stdTime;
1231             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize
1232             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize
1233         }
1234     }
1235 }
1236 
1237 /** Bit (Content) Status. */
1238 enum BitStatus
1239 {
1240     unknown,
1241     bits7,
1242     bits8,
1243 }
1244 
1245 /** Regular File.
1246  */
1247 class RegFile : File
1248 {
1249     this(NotNull!Dir parent)
1250     {
1251         super(parent);
1252         ++parent.gstats.noRegFiles;
1253     }
1254     this(ref DirEntry dent, NotNull!Dir parent)
1255     {
1256         this(dent.name.baseName, parent, dent.size.Bytes64,
1257              dent.timeLastModified, dent.timeLastAccessed);
1258     }
1259     this(string name, NotNull!Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed)
1260     {
1261         super(name, parent, size, timeLastModified, timeLastAccessed);
1262         ++parent.gstats.noRegFiles;
1263     }
1264 
1265     ~this() @nogc
1266     {
1267         _cstat.deallocate(false);
1268     }
1269 
1270     override string toTextual() const @property { return `Regular File`; }
1271 
1272     /** Returns: Content Id of `this`. */
1273     const(SHA1Digest) contentId() @property @trusted /* @safe pure nothrow */
1274     {
1275         if (_cstat._contentId.isUntouched)
1276         {
1277             enum doSHA1 = true;
1278             calculateCStatInChunks(parent.gstats.filesByContentId,
1279                                    32*pageSize(),
1280                                    doSHA1);
1281             freeContents(); // TODO Call lazily only when open count is too large
1282         }
1283         return _cstat._contentId;
1284     }
1285 
1286     /** Returns: Tree Content Id of `this`. */
1287     override const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */
1288     {
1289         return contentId;
1290     }
1291 
1292     override Face!Color face() const @property @safe pure nothrow { return regFileFace; }
1293 
1294     /** Returns: SHA-1 of `this` `File` Contents at `src`. */
1295     const(SHA1Digest) contId(inout (ubyte[]) src,
1296                              File[][SHA1Digest] filesByContentId)
1297         @property pure out(result) { assert(!result.empty); } // must have be defined
1298     do
1299     {
1300         if (_cstat._contentId.empty) // if not yet defined
1301         {
1302             _cstat._contentId = src.sha1Of;
1303             filesByContentId[_cstat._contentId] ~= this;
1304         }
1305         return _cstat._contentId;
1306     }
1307 
1308     /** Returns: Cached/Memoized Binary Histogram of `this` `File`. */
1309     auto ref bistogram8() @property @safe // ref needed here!
1310     {
1311         if (_cstat.bist.empty)
1312         {
1313             _cstat.bist.put(readOnlyContents); // memoized calculated
1314         }
1315         return _cstat.bist;
1316     }
1317 
1318     /** Returns: Cached/Memoized XGram of `this` `File`. */
1319     auto ref xgram() @property @safe // ref needed here!
1320     {
1321         if (_cstat.xgram.empty)
1322         {
1323             _cstat.xgram.put(readOnlyContents); // memoized calculated
1324         }
1325         return _cstat.xgram;
1326     }
1327 
1328     /** Returns: Cached/Memoized XGram Deep Denseness of `this` `File`. */
1329     auto ref xgramDeepDenseness() @property @safe
1330     {
1331         if (!_cstat._xgramDeepDenseness)
1332         {
1333             _cstat._xgramDeepDenseness = xgram.denseness(-1).numerator;
1334         }
1335         return Rational!ulong(_cstat._xgramDeepDenseness,
1336                               _cstat.xgram.noBins);
1337     }
1338 
1339     /** Returns: true if empty file (zero length). */
1340     bool empty() @property const @safe { return size == 0; }
1341 
1342     /** Process File in Cache Friendly Chunks. */
1343     void calculateCStatInChunks(NotNull!File[][SHA1Digest] filesByContentId,
1344                                 size_t chunkSize = 32*pageSize(),
1345                                 bool doSHA1 = false,
1346                                 bool doBist = false,
1347                                 bool doBitStatus = false) @safe
1348     {
1349         if (_cstat._contentId.defined || empty) { doSHA1 = false; }
1350         if (!_cstat.bist.empty) { doBist = false; }
1351         if (_cstat.bitStatus != BitStatus.unknown) { doBitStatus = false; }
1352 
1353         import std.digest.sha;
1354         SHA1 sha1;
1355         if (doSHA1) { sha1.start(); }
1356 
1357         bool isASCII = true;
1358 
1359         if (doSHA1 || doBist || doBitStatus)
1360         {
1361             import std.range: chunks;
1362             foreach (chunk; readOnlyContents.chunks(chunkSize))
1363             {
1364                 if (doSHA1) { sha1.put(chunk); }
1365                 if (doBist) { _cstat.bist.put(chunk); }
1366                 if (doBitStatus)
1367                 {
1368                     /* TODO This can be parallelized using 64-bit wording!
1369                      * Write automatic parallelizing library for this? */
1370                     foreach (elt; chunk)
1371                     {
1372                         import nxt.bitop_ex: bt;
1373                         isASCII = isASCII && !elt.bt(7); // ASCII has no topmost bit set
1374                     }
1375                 }
1376             }
1377         }
1378 
1379         if (doBitStatus)
1380         {
1381             _cstat.bitStatus = isASCII ? BitStatus.bits7 : BitStatus.bits8;
1382         }
1383 
1384         if (doSHA1)
1385         {
1386             _cstat._contentId = sha1.finish();
1387             filesByContentId[_cstat._contentId] ~= cast(NotNull!File)assumeNotNull(this); // TODO Prettier way?
1388         }
1389     }
1390 
1391     /** Clear/Reset Contents Statistics of `this` `File`. */
1392     void clearCStat(File[][SHA1Digest] filesByContentId) @safe nothrow
1393     {
1394         // SHA1-digest
1395         if (_cstat._contentId in filesByContentId)
1396         {
1397             auto dups = filesByContentId[_cstat._contentId];
1398             import std.algorithm: remove;
1399             immutable n = dups.length;
1400             dups = dups.remove!(a => a is this);
1401             assert(n == dups.length + 1); // assert that dups were not decreased by one);
1402         }
1403     }
1404 
1405     override string toString() @property @trusted
1406     {
1407         // import std.traits: fullyQualifiedName;
1408         // return fullyQualifiedName!(typeof(this)) ~ `(` ~ buildPath(parent.name, name) ~ `)`; // TODO typenameof
1409         return (typeof(this)).stringof ~ `(` ~ this.path ~ `)`; // TODO typenameof
1410     }
1411 
1412     version(msgpack)
1413     {
1414         /** Construct from msgpack `unpacker`.  */
1415         this(Unpacker)(ref Unpacker unpacker)
1416         {
1417             fromMsgpack(msgpack.Unpacker(unpacker));
1418         }
1419 
1420         /** Pack. */
1421         void toMsgpack(Packer)(ref Packer packer) const {
1422             /* writeln(`Entering RegFile.toMsgpack `, name); */
1423 
1424             packer.pack(name, size,
1425                         timeLastModified.stdTime,
1426                         timeLastAccessed.stdTime);
1427 
1428             // CStat: TODO Group
1429             packer.pack(_cstat.kindId); // FKind
1430             packer.pack(_cstat._contentId); // Digest
1431 
1432             // Bist
1433             immutable bistFlag = !_cstat.bist.empty;
1434             packer.pack(bistFlag);
1435             if (bistFlag) { packer.pack(_cstat.bist); }
1436 
1437             // XGram
1438             immutable xgramFlag = !_cstat.xgram.empty;
1439             packer.pack(xgramFlag);
1440             if (xgramFlag)
1441             {
1442                 /* debug dln("packing xgram. empty:", _cstat.xgram.empty); */
1443                 packer.pack(_cstat.xgram,
1444                             _cstat._xgramDeepDenseness);
1445             }
1446 
1447             /*     auto this_ = (cast(RegFile)this); // TODO Ugly! Is there another way? */
1448             /*     const tags = this_.parent.gstats.ftags.getTags(this_); */
1449             /*     immutable tagsFlag = !tags.empty; */
1450             /*     packer.pack(tagsFlag); */
1451             /*     debug dln(`Packing tags `, tags, ` of `, this_.path); */
1452             /*     if (tagsFlag) { packer.pack(tags); } */
1453         }
1454 
1455         /** Unpack. */
1456         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) @trusted
1457         {
1458             unpacker.unpack(name, size); // Name, Size
1459 
1460             // Time
1461             long stdTime;
1462             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize
1463             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize
1464 
1465             // CStat: TODO Group
1466             unpacker.unpack(_cstat.kindId); // FKind
1467             if (_cstat.kindId.defined &&
1468                 _cstat.kindId !in parent.gstats.allFKinds.byId)
1469             {
1470                 dln(`warning: kindId `, _cstat.kindId, ` not found for `,
1471                     path, `, FKinds length `, parent.gstats.allFKinds.byIndex.length);
1472                 _cstat.kindId.reset; // forget it
1473             }
1474             unpacker.unpack(_cstat._contentId); // Digest
1475             if (_cstat._contentId)
1476             {
1477                 parent.gstats.filesByContentId[_cstat._contentId] ~= cast(NotNull!File)this;
1478             }
1479 
1480             // Bist
1481             bool bistFlag; unpacker.unpack(bistFlag);
1482             if (bistFlag)
1483             {
1484                 unpacker.unpack(_cstat.bist);
1485             }
1486 
1487             // XGram
1488             bool xgramFlag; unpacker.unpack(xgramFlag);
1489             if (xgramFlag)
1490             {
1491                 /* if (_cstat.xgram == null) { */
1492                 /*     _cstat.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */
1493                 /* } */
1494                 /* unpacker.unpack(*_cstat.xgram); */
1495                 unpacker.unpack(_cstat.xgram,
1496                                 _cstat._xgramDeepDenseness);
1497                 /* debug dln(`unpacked xgram. empty:`, _cstat.xgram.empty); */
1498             }
1499 
1500             // tags
1501             /* bool tagsFlag; unpacker.unpack(tagsFlag); */
1502             /* if (tagsFlag) { */
1503             /*     string[] tags; */
1504             /*     unpacker.unpack(tags); */
1505             /* } */
1506         }
1507 
1508         override void makeObselete() @trusted { _cstat.reset(); /* debug dln(`Reset CStat for `, path); */ }
1509     }
1510 
1511     /** Returns: Read-Only Contents of `this` Regular File. */
1512     // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); }
1513     // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo);
1514     immutable(ubyte[]) readOnlyContents(string file = __FILE__, int line = __LINE__)() @trusted
1515     {
1516         if (_mmfile is null)
1517         {
1518             if (size == 0) // munmap fails for empty files
1519             {
1520                 static assert([] !is null);
1521                 return []; // empty file
1522             }
1523             else
1524             {
1525                 _mmfile = new MmFile(path, MmFile.Mode.read,
1526                                      mmfile_size, null, pageSize());
1527                 if (parent.gstats.showMMaps)
1528                 {
1529                     writeln(`Mapped `, path, ` of size `, size);
1530                 }
1531             }
1532         }
1533         return cast(typeof(return))_mmfile[];
1534     }
1535 
1536     /** Returns: Read-Writable Contents of `this` Regular File. */
1537     // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); }
1538     // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo);
1539     ubyte[] readWriteableContents() @trusted
1540     {
1541         if (!_mmfile)
1542         {
1543             _mmfile = new MmFile(path, MmFile.Mode.readWrite,
1544                                  mmfile_size, null, pageSize());
1545         }
1546         return cast(typeof(return))_mmfile[];
1547     }
1548 
1549     /** If needed Free Allocated Contents of `this` Regular File. */
1550     bool freeContents()
1551     {
1552         if (_mmfile) {
1553             delete _mmfile; _mmfile = null; return true;
1554         }
1555         else { return false; }
1556     }
1557 
1558     import std.mmfile;
1559     private MmFile _mmfile = null;
1560     private CStat _cstat;     // Statistics about the contents of this RegFile.
1561 }
1562 
1563 /** Traits */
1564 enum isFile(T) = (is(T == File) || is(T == NotNull!File));
1565 enum isDir(T) = (is(T == Dir) || is(T == NotNull!Dir));
1566 enum isSymlink(T) = (is(T == Symlink) || is(T == NotNull!Symlink));
1567 enum isRegFile(T) = (is(T == RegFile) || is(T == NotNull!RegFile));
1568 enum isSpecialFile(T) = (is(T == SpecFile) || is(T == NotNull!SpecFile));
1569 enum isAnyFile(T) = (isFile!T ||
1570                      isDir!T ||
1571                      isSymlink!T ||
1572                      isRegFile!T ||
1573                      isSpecialFile!T);
1574 
1575 /** Return true if T is a class representing File IO. */
1576 enum isFileIO(T) = (isAnyFile!T ||
1577                     is(T == ioFile));
1578 
1579 /** Contents Statistics of a Regular File. */
1580 struct CStat
1581 {
1582     void reset() @safe nothrow
1583     {
1584         kindId[] = 0;
1585         _contentId[] = 0;
1586         hitCount = 0;
1587         bist.reset();
1588         xgram.reset();
1589         _xgramDeepDenseness = 0;
1590         deallocate();
1591     }
1592 
1593     void deallocate(bool nullify = true) @trusted nothrow
1594     {
1595         kindId[] = 0;
1596         /* if (xgram != null) { */
1597         /*     import core.stdc.stdlib; */
1598         /*     free(xgram); */
1599         /*     if (nullify) { */
1600         /*         xgram = null; */
1601         /*     } */
1602         /* } */
1603     }
1604 
1605     SHA1Digest kindId; // FKind Identifier/Fingerprint of this regular file.
1606     SHA1Digest _contentId; // Content Identifier/Fingerprint.
1607 
1608     /** Boolean Single Bistogram over file contents. If
1609         binHist0[cast(ubyte)x] is set then this file contains byte x. Consumes
1610         32 bytes. */
1611     Bist bist; // TODO Put in separate slice std.allocator.
1612 
1613     /** Boolean Pair Bistogram (Digram) over file contents (higher-order statistics).
1614         If this RegFile contains a sequence of [byte0, bytes1],
1615         then bit at index byte0 + byte1 * 256 is set in xgram.
1616     */
1617     XGram xgram; // TODO Use slice std.allocator
1618     private ulong _xgramDeepDenseness = 0;
1619 
1620     uint64_t hitCount = 0;
1621     BitStatus bitStatus = BitStatus.unknown;
1622 }
1623 
1624 import core.sys.posix.sys.types;
1625 
1626 enum SymlinkFollowContext
1627 {
1628     none,                       // Follow no symlinks
1629     internal,                   // Follow only symlinks outside of scanned tree
1630     external,                   // Follow only symlinks inside of scanned tree
1631     all,                        // Follow all symlinks
1632     standard = external
1633 }
1634 
1635 /** Global Scanner Statistics. */
1636 class GStats
1637 {
1638     NotNull!File[][string] filesByName;    // Potential File Name Duplicates
1639     NotNull!File[][ino_t] filesByInode;    // Potential Link Duplicates
1640     NotNull!File[][SHA1Digest] filesByContentId; // File(s) (Duplicates) Indexed on Contents SHA1.
1641     NotNull!RegFile[][string] elfFilesBySymbol; // File(s) (Duplicates) Indexed on raw unmangled symbol.
1642     FileTags ftags;
1643 
1644     Bytes64[NotNull!File] treeSizesByFile; // Tree sizes.
1645     size_t[NotNull!File] lineCountsByFile; // Line counts.
1646 
1647     // VCS Directories
1648     DirKind[] vcDirKinds;
1649     DirKind[string] vcDirKindsMap;
1650 
1651     // Skipped Directories
1652     DirKind[] skippedDirKinds;
1653     DirKind[string] skippedDirKindsMap;
1654 
1655     FKinds txtFKinds = new FKinds; // Textual
1656     FKinds binFKinds = new FKinds; // Binary (Non-Textual)
1657     FKinds allFKinds = new FKinds; // All
1658     FKinds selFKinds = new FKinds; // User selected
1659 
1660     void loadFileKinds()
1661     {
1662         txtFKinds ~= new FKind("SCons", ["SConstruct", "SConscript"],
1663                                ["scons"],
1664                                [], 0, [], [],
1665                                defaultCommentDelims,
1666                                pythonStringDelims,
1667                                FileContent.buildSystemCode, FileKindDetection.equalsNameAndContents); // TOOD: Inherit Python
1668 
1669         txtFKinds ~= new FKind("Makefile", ["GNUmakefile", "Makefile", "makefile"],
1670                                ["mk", "mak", "makefile", "make", "gnumakefile"], [], 0, [], [],
1671                                defaultCommentDelims,
1672                                defaultStringDelims,
1673                                FileContent.sourceCode, FileKindDetection.equalsName);
1674         txtFKinds ~= new FKind("Automakefile", ["Makefile.am", "makefile.am"],
1675                                ["am"], [], 0, [], [],
1676                                defaultCommentDelims,
1677                                defaultStringDelims,
1678                                FileContent.sourceCode);
1679         txtFKinds ~= new FKind("Autoconffile", ["configure.ac", "configure.in"],
1680                                [], [], 0, [], [],
1681                                defaultCommentDelims,
1682                                defaultStringDelims,
1683                                FileContent.sourceCode);
1684         txtFKinds ~= new FKind("Doxygen", ["Doxyfile"],
1685                                ["doxygen"], [], 0, [], [],
1686                                defaultCommentDelims,
1687                                defaultStringDelims,
1688                                FileContent.sourceCode);
1689 
1690         txtFKinds ~= new FKind("Rake", ["Rakefile"],// TODO inherit Ruby
1691                                ["mk", "makefile", "make", "gnumakefile"], [], 0, [], [],
1692                                [Delim("#"), Delim("=begin", "=end")],
1693                                defaultStringDelims,
1694                                FileContent.sourceCode, FileKindDetection.equalsName);
1695 
1696         txtFKinds ~= new FKind("HTML", [], ["htm", "html", "shtml", "xhtml"], [], 0, [], [],
1697                                [Delim("<!--", "-->")],
1698                                defaultStringDelims,
1699                                FileContent.text, FileKindDetection.equalsContents); // markup text
1700         txtFKinds ~= new FKind("XML", [], ["xml", "dtd", "xsl", "xslt", "ent", ], [], 0, "<?xml", [],
1701                                [Delim("<!--", "-->")],
1702                                defaultStringDelims,
1703                                FileContent.text, FileKindDetection.equalsContents); // TODO markup text
1704         txtFKinds ~= new FKind("YAML", [], ["yaml", "yml"], [], 0, [], [],
1705                                defaultCommentDelims,
1706                                defaultStringDelims,
1707                                FileContent.text); // TODO markup text
1708         txtFKinds ~= new FKind("CSS", [], ["css"], [], 0, [], [],
1709                                [Delim("/*", "*/")],
1710                                defaultStringDelims,
1711                                FileContent.text, FileKindDetection.equalsContents);
1712 
1713         txtFKinds ~= new FKind("Audacity Project", [], ["aup"], [], 0, "<?xml", [],
1714                                defaultCommentDelims,
1715                                defaultStringDelims,
1716                                FileContent.text, FileKindDetection.equalsNameAndContents);
1717 
1718         txtFKinds ~= new FKind("Comma-separated values", [], ["csv"], [], 0, [], [], // TODO decribe with symbolic
1719                                defaultCommentDelims,
1720                                defaultStringDelims,
1721                                FileContent.text, FileKindDetection.equalsNameAndContents);
1722 
1723         txtFKinds ~= new FKind("Tab-separated values", [], ["tsv"], [], 0, [], [], // TODO describe with symbolic
1724                                defaultCommentDelims,
1725                                defaultStringDelims,
1726                                FileContent.text, FileKindDetection.equalsNameAndContents);
1727 
1728         static immutable keywordsC = [
1729             "auto", "const", "double", "float", "int", "short", "struct",
1730             "unsigned", "break", "continue", "else", "for", "long", "signed",
1731             "switch", "void", "case", "default", "enum", "goto", "register",
1732             "sizeof", "typedef", "volatile", "char", "do", "extern", "if",
1733             "return", "static", "union", "while",
1734             ];
1735 
1736         /* See_Also: https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B */
1737         auto opersCBasic = [
1738             // Arithmetic
1739             Op("+", OpArity.binary, OpAssoc.LR, 6, "Add"),
1740             Op("-", OpArity.binary, OpAssoc.LR, 6, "Subtract"),
1741             Op("*", OpArity.binary, OpAssoc.LR, 5, "Multiply"),
1742             Op("/", OpArity.binary, OpAssoc.LR, 5, "Divide"),
1743             Op("%", OpArity.binary, OpAssoc.LR, 5, "Remainder/Moduls"),
1744 
1745             Op("+", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary plus"),
1746             Op("-", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary minus"),
1747 
1748             Op("++", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix increment"),
1749             Op("--", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix decrement"),
1750 
1751             Op("++", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix increment"),
1752             Op("--", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix decrement"),
1753 
1754             // Assignment Arithmetic (binary)
1755             Op("=", OpArity.binary, OpAssoc.RL, 16, "Assign"),
1756             Op("+=", OpArity.binary, OpAssoc.RL, 16, "Assignment by sum"),
1757             Op("-=", OpArity.binary, OpAssoc.RL, 16, "Assignment by difference"),
1758             Op("*=", OpArity.binary, OpAssoc.RL, 16, "Assignment by product"),
1759             Op("/=", OpArity.binary, OpAssoc.RL, 16, "Assignment by quotient"),
1760             Op("%=", OpArity.binary, OpAssoc.RL, 16, "Assignment by remainder"),
1761 
1762             Op("&=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise AND"),
1763             Op("|=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise OR"),
1764 
1765             Op("^=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise XOR"),
1766             Op("<<=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise left shift"),
1767             Op(">>=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise right shift"),
1768 
1769             Op("==", OpArity.binary, OpAssoc.LR, 9, "Equal to"),
1770             Op("!=", OpArity.binary, OpAssoc.LR, 9, "Not equal to"),
1771 
1772             Op("<", OpArity.binary, OpAssoc.LR, 8, "Less than"),
1773             Op(">", OpArity.binary, OpAssoc.LR, 8, "Greater than"),
1774             Op("<=", OpArity.binary, OpAssoc.LR, 8, "Less than or equal to"),
1775             Op(">=", OpArity.binary, OpAssoc.LR, 8, "Greater than or equal to"),
1776 
1777             Op("&&", OpArity.binary, OpAssoc.LR, 13, "Logical AND"), // TODO Convert to math in smallcaps AND
1778             Op("||", OpArity.binary, OpAssoc.LR, 14, "Logical OR"), // TODO Convert to math in smallcaps OR
1779 
1780             Op("!", OpArity.unaryPrefix, OpAssoc.LR, 3, "Logical NOT"), // TODO Convert to math in smallcaps NOT
1781 
1782             Op("&", OpArity.binary, OpAssoc.LR, 10, "Bitwise AND"),
1783             Op("^", OpArity.binary, OpAssoc.LR, 11, "Bitwise XOR (exclusive or)"),
1784             Op("|", OpArity.binary, OpAssoc.LR, 12, "Bitwise OR"),
1785 
1786             Op("<<", OpArity.binary, OpAssoc.LR, 7, "Bitwise left shift"),
1787             Op(">>", OpArity.binary, OpAssoc.LR, 7, "Bitwise right shift"),
1788 
1789             Op("~", OpArity.unaryPrefix, OpAssoc.LR, 3, "Bitwise NOT (One's Complement)"),
1790             Op(",", OpArity.binary, OpAssoc.LR, 18, "Comma"),
1791             Op("sizeof", OpArity.unaryPrefix, OpAssoc.LR, 3, "Size-of"),
1792 
1793             Op("->", OpArity.binary, OpAssoc.LR, 2, "Element selection through pointer"),
1794             Op(".", OpArity.binary, OpAssoc.LR, 2, "Element selection by reference"),
1795 
1796             ];
1797 
1798         /* See_Also: https://en.wikipedia.org/wiki/Iso646.h */
1799         auto opersC_ISO646 = [
1800             OpAlias("and", "&&"),
1801             OpAlias("or", "||"),
1802             OpAlias("and_eq", "&="),
1803 
1804             OpAlias("bitand", "&"),
1805             OpAlias("bitor", "|"),
1806 
1807             OpAlias("compl", "~"),
1808             OpAlias("not", "!"),
1809             OpAlias("not_eq", "!="),
1810             OpAlias("or_eq", "|="),
1811             OpAlias("xor", "^"),
1812             OpAlias("xor_eq", "^="),
1813             ];
1814 
1815         auto opersC = opersCBasic /* ~ opersC_ISO646 */;
1816 
1817         auto kindC = new FKind("C", [], ["c", "h"], [], 0, [],
1818                                keywordsC,
1819                                cCommentDelims,
1820                                defaultStringDelims,
1821                                FileContent.sourceCode,
1822                                FileKindDetection.equalsWhatsGiven,
1823                                Lang.c);
1824         txtFKinds ~= kindC;
1825         kindC.operations ~= tuple(FOp.checkSyntax, `gcc -x c -fsyntax-only -c`);
1826         kindC.operations ~= tuple(FOp.checkSyntax, `clang -x c -fsyntax-only -c`);
1827         kindC.operations ~= tuple(FOp.preprocess, `cpp`);
1828         kindC.opers = opersC;
1829 
1830         static immutable keywordsCxx = (keywordsC ~ ["asm", "dynamic_cast", "namespace", "reinterpret_cast", "try",
1831                                                      "bool", "explicit", "new", "static_cast", "typeid",
1832                                                      "catch", "false", "operator", "template", "typename",
1833                                                      "class", "friend", "private", "this", "using",
1834                                                      "const_cast", "inline", "public", "throw", "virtual",
1835                                                      "delete", "mutable", "protected", "true", "wchar_t",
1836                                                      // The following are not essential when
1837                                                      // the standard ASCII character set is
1838                                                      // being used, but they have been added
1839                                                      // to provide more readable alternatives
1840                                                      // for some of the C++ operators, and
1841                                                      // also to facilitate programming with
1842                                                      // character sets that lack characters
1843                                                      // needed by C++.
1844                                                      "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq",
1845                                                      "and_eq", "bitor", "not", "or", "xor", ]).uniq.array;
1846 
1847         auto opersCxx = opersC ~ [
1848             Op("->*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"),
1849             Op(".*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"),
1850             Op("::", OpArity.binary, OpAssoc.none, 1, "Scope resolution"),
1851             Op("typeid", OpArity.unaryPrefix, OpAssoc.LR, 2, "Run-time type information (RTTI))"),
1852             //Op("alignof", OpArity.unaryPrefix, OpAssoc.LR, _, _),
1853             Op("new", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory allocation"),
1854             Op("delete", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"),
1855             Op("delete[]", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"),
1856             /* Op("noexcept", OpArity.unaryPrefix, OpAssoc.none, _, _), */
1857 
1858             Op("dynamic_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1859             Op("reinterpret_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1860             Op("static_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1861             Op("const_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"),
1862 
1863             Op("throw", OpArity.unaryPrefix, OpAssoc.LR, 17, "Throw operator"),
1864             /* Op("catch", OpArity.unaryPrefix, OpAssoc.LR, _, _) */
1865             ];
1866 
1867         static immutable extsCxx = ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"];
1868         auto kindCxx = new FKind("C++", [], extsCxx, [], 0, [],
1869                                  keywordsCxx,
1870                                  cCommentDelims,
1871                                  defaultStringDelims,
1872                                  FileContent.sourceCode,
1873                                  FileKindDetection.equalsWhatsGiven,
1874                                  Lang.cxx);
1875         kindCxx.operations ~= tuple(FOp.checkSyntax, `gcc -x c++ -fsyntax-only -c`);
1876         kindCxx.operations ~= tuple(FOp.checkSyntax, `clang -x c++ -fsyntax-only -c`);
1877         kindCxx.operations ~= tuple(FOp.preprocess, `cpp`);
1878         kindCxx.opers = opersCxx;
1879         txtFKinds ~= kindCxx;
1880         static immutable keywordsCxx11 = keywordsCxx ~ ["alignas", "alignof",
1881                                                         "char16_t", "char32_t",
1882                                                         "constexpr",
1883                                                         "decltype",
1884                                                         "override", "final",
1885                                                         "noexcept", "nullptr",
1886                                                         "auto",
1887                                                         "thread_local",
1888                                                         "static_assert", ];
1889         // TODO Define as subkind
1890         /* txtFKinds ~= new FKind("C++11", [], ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"], [], 0, [], */
1891         /*                        keywordsCxx11, */
1892         /*                        [Delim("/\*", "*\/"), */
1893         /*                         Delim("//")], */
1894         /*                        defaultStringDelims, */
1895         /*                        FileContent.sourceCode, */
1896         /*                        FileKindDetection.equalsWhatsGiven); */
1897 
1898         /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */
1899         static immutable opersCxxMicrosoft = ["__alignof"];
1900 
1901         /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */
1902         static immutable keywordsCxxMicrosoft = (keywordsCxx ~ [/* __abstract 2 */
1903                                                      "__asm",
1904                                                      "__assume",
1905                                                      "__based",
1906                                                      /* __box 2 */
1907                                                      "__cdecl",
1908                                                      "__declspec",
1909                                                      /* __delegate 2 */
1910                                                      "__event",
1911                                                      "__except",
1912                                                      "__fastcall",
1913                                                      "__finally",
1914                                                      "__forceinline",
1915                                                      /* __gc 2 */
1916                                                      /* __hook 3 */
1917                                                      "__identifier",
1918                                                      "__if_exists",
1919                                                      "__if_not_exists",
1920                                                      "__inline",
1921                                                      "__int16",
1922                                                      "__int32",
1923                                                      "__int64",
1924                                                      "__int8",
1925                                                      "__interface",
1926                                                      "__leave",
1927                                                      "__m128",
1928                                                      "__m128d",
1929                                                      "__m128i",
1930                                                      "__m64",
1931                                                      "__multiple_inheritance",
1932                                                      /* __nogc 2 */
1933                                                      "__noop",
1934                                                      /* __pin 2 */
1935                                                      /* __property 2 */
1936                                                      "__raise",
1937                                                      /* __sealed 2 */
1938                                                      "__single_inheritance",
1939                                                      "__stdcall",
1940                                                      "__super",
1941                                                      "__thiscall",
1942                                                      "__try",
1943                                                      "__except",
1944                                                      "__finally",
1945                                                      /* __try_cast 2 */
1946                                                      "__unaligned",
1947                                                      /* __unhook 3 */
1948                                                      "__uuidof",
1949                                                      /* __value 2 */
1950                                                      "__virtual_inheritance",
1951                                                      "__w64",
1952                                                      "__wchar_t",
1953                                                      "wchar_t",
1954                                                      "abstract",
1955                                                      "array",
1956                                                      "auto",
1957                                                      "bool",
1958                                                      "break",
1959                                                      "case",
1960                                                      "catch",
1961                                                      "char",
1962                                                      "class",
1963                                                      "const",
1964                                                      "const_cast",
1965                                                      "continue",
1966                                                      "decltype",
1967                                                      "default",
1968                                                      "delegate",
1969                                                      "delete",
1970                                                      /* deprecated 1 */
1971                                                      /* dllexport 1 */
1972                                                      /* dllimport 1 */
1973                                                      "do",
1974                                                      "double",
1975                                                      "dynamic_cast",
1976                                                      "else",
1977                                                      "enum",
1978                                                      "enum class"
1979                                                      "enum struct"
1980                                                      "event",
1981                                                      "explicit",
1982                                                      "extern",
1983                                                      "false",
1984                                                      "finally",
1985                                                      "float",
1986                                                      "for",
1987                                                      "for each",
1988                                                      "in",
1989                                                      "friend",
1990                                                      "friend_as",
1991                                                      "gcnew",
1992                                                      "generic",
1993                                                      "goto",
1994                                                      "if",
1995                                                      "initonly",
1996                                                      "inline",
1997                                                      "int",
1998                                                      "interface class",
1999                                                      "interface struct",
2000                                                      "interior_ptr",
2001                                                      "literal",
2002                                                      "long",
2003                                                      "mutable",
2004                                                      /* naked 1 */
2005                                                      "namespace",
2006                                                      "new",
2007                                                      "new",
2008                                                      /* noinline 1 */
2009                                                      /* noreturn 1 */
2010                                                      /* nothrow 1 */
2011                                                      /* novtable 1 */
2012                                                      "nullptr",
2013                                                      "operator",
2014                                                      "private",
2015                                                      "property",
2016                                                      /* property 1 */
2017                                                      "protected",
2018                                                      "public",
2019                                                      "ref class",
2020                                                      "ref struct",
2021                                                      "register",
2022                                                      "reinterpret_cast",
2023                                                      "return",
2024                                                      "safecast",
2025                                                      "sealed",
2026                                                      /* selectany 1 */
2027                                                      "short",
2028                                                      "signed",
2029                                                      "sizeof",
2030                                                      "static",
2031                                                      "static_assert",
2032                                                      "static_cast",
2033                                                      "struct",
2034                                                      "switch",
2035                                                      "template",
2036                                                      "this",
2037                                                      /* thread 1 */
2038                                                      "throw",
2039                                                      "true",
2040                                                      "try",
2041                                                      "typedef",
2042                                                      "typeid",
2043                                                      "typeid",
2044                                                      "typename",
2045                                                      "union",
2046                                                      "unsigned",
2047                                                      "using" /* declaration */,
2048                                                      "using" /* directive */,
2049                                                      /* uuid 1 */
2050                                                      "value class",
2051                                                      "value struct",
2052                                                      "virtual",
2053                                                      "void",
2054                                                      "volatile",
2055                                                      "while"]).uniq.array;
2056 
2057         static immutable xattrCxxMicrosoft = [];
2058 
2059         static immutable keywordsNewObjectiveC = ["id",
2060                                                   "in",
2061                                                   "out", // Returned by reference
2062                                                   "inout", // Argument is used both to provide information and to get information back
2063                                                   "bycopy",
2064                                                   "byref", "oneway", "self",
2065                                                   "super", "@interface", "@end",
2066                                                   "@implementation", "@end",
2067                                                   "@interface", "@end",
2068                                                   "@implementation", "@end",
2069                                                   "@protoco", "@end", "@class" ];
2070 
2071         static immutable keywordsObjectiveC = keywordsC ~ keywordsNewObjectiveC;
2072         txtFKinds ~= new FKind("Objective-C", [], ["m", "h"], [], 0, [],
2073                                keywordsObjectiveC,
2074                                cCommentDelims,
2075                                defaultStringDelims,
2076                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven,
2077                                Lang.objectiveC);
2078 
2079         static immutable keywordsObjectiveCxx = keywordsCxx ~ keywordsNewObjectiveC;
2080         txtFKinds ~= new FKind("Objective-C++", [], ["mm", "h"], [], 0, [],
2081                                keywordsObjectiveCxx,
2082                                defaultCommentDelims,
2083                                defaultStringDelims,
2084                                FileContent.sourceCode,
2085                                FileKindDetection.equalsWhatsGiven,
2086                                Lang.objectiveCxx);
2087 
2088         static immutable keywordsSwift = ["break", "class", "continue", "default", "do", "else", "for", "func", "if", "import",
2089                               "in", "let", "return", "self", "struct", "super", "switch", "unowned", "var", "weak", "while",
2090                               "mutating", "extension"];
2091         auto opersOverflowSwift = opersC ~ [Op("&+"), Op("&-"), Op("&*"), Op("&/"), Op("&%")];
2092         auto builtinsSwift = ["print", "println"];
2093         auto kindSwift = new FKind("Swift", [], ["swift"], [], 0, [],
2094                                    keywordsSwift,
2095                                    cCommentDelims,
2096                                    defaultStringDelims,
2097                                    FileContent.sourceCode,
2098                                    FileKindDetection.equalsWhatsGiven,
2099                                    Lang.swift);
2100         kindSwift.builtins = builtinsSwift;
2101         kindSwift.opers = opersOverflowSwift;
2102         txtFKinds ~= kindSwift;
2103 
2104         static immutable keywordsCSharp = ["if"]; // TODO Add keywords
2105         txtFKinds ~= new FKind("C#", [], ["cs"], [], 0, [], keywordsCSharp,
2106                                cCommentDelims,
2107                                defaultStringDelims,
2108                                FileContent.sourceCode,
2109                                FileKindDetection.equalsWhatsGiven,
2110                                Lang.cSharp);
2111 
2112         static immutable keywordsOCaml = ["and", "as", "assert", "begin", "class",
2113                                           "constraint", "do", "done", "downto", "else",
2114                                           "end", "exception", "external", "false", "for",
2115                                           "fun", "function", "functor", "if", "in",
2116                                           "include", "inherit", "inherit!", "initializer"
2117                                           "lazy", "let", "match", "method", "method!",
2118                                           "module", "mutable", "new", "object", "of",
2119                                           "open", "or",
2120                                           "private", "rec", "sig", "struct", "then", "to",
2121                                           "true", "try", "type",
2122                                           "val", "val!", "virtual",
2123                                           "when", "while", "with"];
2124         txtFKinds ~= new FKind("OCaml", [], ["ocaml"], [], 0, [], keywordsOCaml,
2125                                [Delim("(*", "*)")],
2126                                defaultStringDelims,
2127                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2128 
2129         txtFKinds ~= new FKind("Parrot", [], ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg", ], [], 0, [], keywordsOCaml,
2130                                [Delim("#"),
2131                                 Delim("^=", // TODO Needs beginning of line instead of ^
2132                                       "=cut")],
2133                                defaultStringDelims,
2134                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2135 
2136         static immutable keywordsProlog = [];
2137         txtFKinds ~= new FKind("Prolog", [], ["pl", "pro", "P"], [], 0, [], keywordsProlog,
2138                                [],
2139                                [],
2140                                FileContent.sourceCode, FileKindDetection.equalsWhatsGiven);
2141 
2142         auto opersD = [
2143             // Arithmetic
2144             Op("+", OpArity.binary, OpAssoc.LR, 10*2, "Add"),
2145             Op("-", OpArity.binary, OpAssoc.LR, 10*2, "Subtract"),
2146             Op("~", OpArity.binary, OpAssoc.LR, 10*2, "Concatenate"),
2147 
2148             Op("*", OpArity.binary, OpAssoc.LR, 11*2, "Multiply"),
2149             Op("/", OpArity.binary, OpAssoc.LR, 11*2, "Divide"),
2150             Op("%", OpArity.binary, OpAssoc.LR, 11*2, "Remainder/Moduls"),
2151 
2152             Op("++", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix increment"),
2153             Op("--", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix decrement"),
2154 
2155             Op("^^", OpArity.binary, OpAssoc.RL, 13*2, "Power"),
2156 
2157             Op("++", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix increment"),
2158             Op("--", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix decrement"),
2159             Op("&", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Address off"),
2160             Op("*", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Pointer Dereference"),
2161             Op("+", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Plus"),
2162             Op("-", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Minus"),
2163             Op("!", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Logical NOT"), // TODO Convert to math in smallcaps NOT
2164             Op("~", OpArity.unaryPrefix, OpAssoc.LR, 12*2, "Bitwise NOT (One's Complement)"),
2165 
2166             // Bit shift
2167             Op("<<", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise left shift"),
2168             Op(">>", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise right shift"),
2169 
2170             // Comparison
2171             Op("==", OpArity.binary, OpAssoc.LR, 6*2, "Equal to"),
2172             Op("!=", OpArity.binary, OpAssoc.LR, 6*2, "Not equal to"),
2173             Op("<", OpArity.binary, OpAssoc.LR, 6*2, "Less than"),
2174             Op(">", OpArity.binary, OpAssoc.LR, 6*2, "Greater than"),
2175             Op("<=", OpArity.binary, OpAssoc.LR, 6*2, "Less than or equal to"),
2176             Op(">=", OpArity.binary, OpAssoc.LR, 6*2, "Greater than or equal to"),
2177             Op("in", OpArity.binary, OpAssoc.LR, 6*2, "In"),
2178             Op("!in", OpArity.binary, OpAssoc.LR, 6*2, "Not In"),
2179             Op("is", OpArity.binary, OpAssoc.LR, 6*2, "Is"),
2180             Op("!is", OpArity.binary, OpAssoc.LR, 6*2, "Not Is"),
2181 
2182             Op("&", OpArity.binary, OpAssoc.LR, 8*2, "Bitwise AND"),
2183             Op("^", OpArity.binary, OpAssoc.LR, 7*2, "Bitwise XOR (exclusive or)"),
2184             Op("|", OpArity.binary, OpAssoc.LR, 6*2, "Bitwise OR"),
2185 
2186             Op("&&", OpArity.binary, OpAssoc.LR, 5*2, "Logical AND"), // TODO Convert to math in smallcaps AND
2187             Op("||", OpArity.binary, OpAssoc.LR, 4*2, "Logical OR"), // TODO Convert to math in smallcaps OR
2188 
2189             // Assignment Arithmetic (binary)
2190             Op("=", OpArity.binary, OpAssoc.RL, 2*2, "Assign"),
2191             Op("+=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by sum"),
2192             Op("-=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by difference"),
2193             Op("*=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by product"),
2194             Op("/=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by quotient"),
2195             Op("%=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by remainder"),
2196             Op("&=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise AND"),
2197             Op("|=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise OR"),
2198             Op("^=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise XOR"),
2199             Op("<<=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise left shift"),
2200             Op(">>=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise right shift"),
2201 
2202             Op(",", OpArity.binary, OpAssoc.LR, 1*2, "Comma"),
2203             Op("..", OpArity.binary, OpAssoc.LR, cast(int)(0*2), "Range separator"),
2204             ];
2205 
2206         enum interpretersForD = ["rdmd",
2207                                  "gdmd"];
2208         auto magicForD = shebangLine(alt(lit("rdmd"),
2209                                          lit("gdmd")));
2210 
2211         static immutable keywordsD = [`@property`, `@safe`, `@trusted`, `@system`, `@disable`, `abstract`, `alias`, `align`, `asm`, `assert`, `auto`, `body`, `bool`, `break`, `byte`, `case`, `cast`, `catch`,
2212                                       `cdouble`, `cent`, `cfloat`, `char`, `class`, `const`, `continue`, `creal`, `dchar`, `debug`, `default`, `delegate`, `delete`, `deprecated`,
2213                                       `do`, `double`, `else`, `enum`, `export`, `extern`, `false`, `final`, `finally`, `float`, `for`, `foreach`, `foreach_reverse`,
2214                                       `function`, `goto`, `idouble`, `if`, `ifloat`, `immutable`, `import`, `in`, `inout`, `int`, `interface`, `invariant`, `ireal`,
2215                                       `is`, `lazy`, `long`, `macro`, `mixin`, `module`, `new`, `nothrow`, `null`, `out`, `override`, `package`, `pragma`, `private`,
2216                                       `protected`, `public`, `pure`, `real`, `ref`, `return`, `scope`, `shared`, `short`, `static`, `struct`, `super`, `switch`,
2217                                       `synchronized`, `template`, `this`, `throw`, `true`, `try`, `typedef`, `typeid`, `typeof`, `ubyte`, `ucent`, `uint`, `ulong`,
2218                                       `union`, `unittest`, `ushort`, `version`, `void`, `volatile`, `wchar`, `while`, `with`, `__gshared`,
2219                                       `__thread`, `__traits`,
2220                                       `string`, `wstring`, `dstring`, `size_t`, `hash_t`, `ptrdiff_t`, `equals_`]; // aliases
2221 
2222         static immutable builtinsD = [`toString`, `toHash`, `opCmp`, `opEquals`,
2223                           `opUnary`, `opBinary`, `opApply`, `opCall`, `opAssign`, `opIndexAssign`, `opSliceAssign`, `opOpAssign`,
2224                           `opIndex`, `opSlice`, `opDispatch`,
2225                           `toString`, `toHash`, `opCmp`, `opEquals`, `Monitor`, `factory`, `classinfo`, `vtbl`, `offset`, `getHash`, `equals`, `compare`, `tsize`, `swap`, `next`, `init`, `flags`, `offTi`, `destroy`, `postblit`, `toString`, `toHash`,
2226                           `factory`, `classinfo`, `Throwable`, `Exception`, `Error`, `capacity`, `reserve`, `assumeSafeAppend`, `clear`,
2227                           `ModuleInfo`, `ClassInfo`, `MemberInfo`, `TypeInfo`];
2228 
2229         static immutable propertiesD = [`sizeof`, `stringof`, `mangleof`, `nan`, `init`, `alignof`, `max`, `min`, `infinity`, `epsilon`, `mant_dig`, ``,
2230                             `max_10_exp`, `max_exp`, `min_10_exp`, `min_exp`, `min_normal`, `re`, `im`];
2231 
2232         static immutable specialsD = [`__FILE__`, `__LINE__`, `__DATE__`, `__EOF__`, `__TIME__`, `__TIMESTAMP__`, `__VENDOR__`, `__VERSION__`, `#line`];
2233 
2234         auto kindDInterface = new FKind("D Interface", [], ["di"],
2235                                         magicForD, 0,
2236                                         [],
2237                                         keywordsD,
2238                                         dCommentDelims,
2239                                         defaultStringDelims,
2240                                         FileContent.sourceCode,
2241                                         FileKindDetection.equalsNameOrContents,
2242                                         Lang.d);
2243         kindDInterface.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2244         kindDInterface.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths
2245         txtFKinds ~= kindDInterface;
2246 
2247         auto kindDDoc = new FKind("D Documentation", [], ["dd"],
2248                                   magicForD, 0,
2249                                   [],
2250                                   keywordsD,
2251                                   dCommentDelims,
2252                                   defaultStringDelims,
2253                                   FileContent.sourceCode,
2254                                   FileKindDetection.equalsNameOrContents);
2255         txtFKinds ~= kindDDoc;
2256 
2257         auto kindD = new FKind("D", [], ["d", "di"],
2258                                magicForD, 0,
2259                                [],
2260                                keywordsD,
2261                                dCommentDelims,
2262                                defaultStringDelims,
2263                                FileContent.sourceCode,
2264                                FileKindDetection.equalsNameOrContents,
2265                                Lang.d);
2266         kindD.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2267         kindD.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths
2268         txtFKinds ~= kindD;
2269 
2270         auto kindDi = new FKind("D Interface", [], ["di"],
2271                                 magicForD, 0,
2272                                 [],
2273                                 keywordsD,
2274                                 dCommentDelims,
2275                                 defaultStringDelims,
2276                                 FileContent.sourceCode,
2277                                 FileKindDetection.equalsNameOrContents,
2278                                 Lang.d);
2279         kindDi.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`);
2280         kindDi.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths
2281         txtFKinds ~= kindDi;
2282 
2283         static immutable keywordsRust = ["as", "box", "break", "continue", "crate",
2284                                          "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in",
2285                                          "let", "loop", "match", "mod", "mut", "priv", "proc", "pub", "ref",
2286                                          "return", "self", "static", "struct", "super", "true", "trait",
2287                                          "type", "unsafe", "use", "while"];
2288 
2289         auto kindRust = new FKind("Rust", [], ["rs"],
2290                                   [], 0,
2291                                   [],
2292                                   keywordsRust,
2293                                   cCommentDelims,
2294                                   defaultStringDelims,
2295                                   FileContent.sourceCode,
2296                                   FileKindDetection.equalsNameOrContents,
2297                                   Lang.rust);
2298         txtFKinds ~= kindRust;
2299 
2300         static immutable keywordsFortran77 = ["if", "else"];
2301         // TODO Support .h files but require it to contain some Fortran-specific or be parseable.
2302         auto kindFortan = new FKind("Fortran", [], ["f", "fortran", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], [], 0, [], keywordsFortran77,
2303                                     [Delim("^C")], // TODO Need beginning of line instead ^. seq(bol(), alt(lit('C'), lit('c'))); // TODO Add chars chs("cC");
2304                                     defaultStringDelims,
2305                                     FileContent.sourceCode,
2306                                     FileKindDetection.equalsNameOrContents,
2307                                     Lang.fortran);
2308         kindFortan.operations ~= tuple(FOp.checkSyntax, `gcc -x fortran -fsyntax-only`);
2309         txtFKinds ~= kindFortan;
2310 
2311         // Ada
2312         import nxt.ada_defs;
2313         static immutable keywordsAda83 = ada_defs.keywords83;
2314         static immutable keywordsAda95 = keywordsAda83 ~ ada_defs.keywordsNew95;
2315         static immutable keywordsAda2005 = keywordsAda95 ~ ada_defs.keywordsNew2005;
2316         static immutable keywordsAda2012 = keywordsAda2005 ~ ada_defs.keywordsNew2012;
2317         static immutable extsAda = ["ada", "adb", "ads"];
2318         txtFKinds ~= new FKind("Ada 82", [], extsAda, [], 0, [], keywordsAda83,
2319                                [Delim("--")],
2320                                defaultStringDelims,
2321                                FileContent.sourceCode);
2322         txtFKinds ~= new FKind("Ada 95", [], extsAda, [], 0, [], keywordsAda95,
2323                                [Delim("--")],
2324                                defaultStringDelims,
2325                                FileContent.sourceCode);
2326         txtFKinds ~= new FKind("Ada 2005", [], extsAda, [], 0, [], keywordsAda2005,
2327                                [Delim("--")],
2328                                defaultStringDelims,
2329                                FileContent.sourceCode);
2330         txtFKinds ~= new FKind("Ada 2012", [], extsAda, [], 0, [], keywordsAda2012,
2331                                [Delim("--")],
2332                                defaultStringDelims,
2333                                FileContent.sourceCode);
2334         txtFKinds ~= new FKind("Ada", [], extsAda, [], 0, [], keywordsAda2012,
2335                                [Delim("--")],
2336                                defaultStringDelims,
2337                                FileContent.sourceCode);
2338 
2339         auto aliKind = new FKind("Ada Library File", [], ["ali"], [], 0, `V "GNAT Lib v`, [],
2340                                  [], // N/A
2341                                  defaultStringDelims,
2342                                  FileContent.fingerprint); // TODO Parse version following magic tag?
2343         aliKind.machineGenerated = true;
2344         txtFKinds ~= aliKind;
2345 
2346         txtFKinds ~= new FKind("Pascal", [], ["pas", "pascal"], [], 0, [], [],
2347                                [Delim("(*", "*)"),// Old-Style
2348                                 Delim("{", "}"),// Turbo Pascal
2349                                 Delim("//")],// Delphi
2350                                defaultStringDelims,
2351                                FileContent.sourceCode, FileKindDetection.equalsContents);
2352         txtFKinds ~= new FKind("Delphi", [], ["pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj"],
2353                                [], 0, [], [],
2354                                [Delim("//")],
2355                                defaultStringDelims,
2356                                FileContent.sourceCode, FileKindDetection.equalsContents);
2357 
2358         txtFKinds ~= new FKind("Objective-C", [], ["m"], [], 0, [], [],
2359                                cCommentDelims,
2360                                defaultStringDelims,
2361                                FileContent.sourceCode);
2362 
2363         static immutable keywordsPython = ["and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return",
2364                                "break", "else", "global", "not", "try", "class", "except", "if", "or", "while",
2365                                "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print"];
2366 
2367         // Scripting
2368 
2369         auto kindPython = new FKind("Python", [], ["py"],
2370                                     shebangLine(lit("python")), 0, [],
2371                                     keywordsPython,
2372                                     defaultCommentDelims,
2373                                     pythonStringDelims,
2374                                     FileContent.scriptCode);
2375         txtFKinds ~= kindPython;
2376 
2377         txtFKinds ~= new FKind("Ruby", [], ["rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec", ],
2378                                shebangLine(lit("ruby")), 0,
2379                                [], [],
2380                                [Delim("#"), Delim("=begin", "=end")],
2381                                defaultStringDelims,
2382                                FileContent.scriptCode);
2383 
2384         txtFKinds ~= new FKind("Scala", [], ["scala", ],
2385                                shebangLine(lit("scala")), 0,
2386                                [], [],
2387                                cCommentDelims,
2388                                defaultStringDelims,
2389                                FileContent.scriptCode);
2390         txtFKinds ~= new FKind("Scheme", [], ["scm", "ss"],
2391                                [], 0,
2392                                [], [],
2393                                [Delim(";")],
2394                                defaultStringDelims,
2395                                FileContent.scriptCode);
2396 
2397         txtFKinds ~= new FKind("Smalltalk", [], ["st"], [], 0, [], [],
2398                                [Delim("\"", "\"")],
2399                                defaultStringDelims,
2400                                FileContent.sourceCode);
2401 
2402         txtFKinds ~= new FKind("Perl", [], ["pl", "pm", "pm6", "pod", "t", "psgi", ],
2403                                shebangLine(lit("perl")), 0,
2404                                [], [],
2405                                defaultCommentDelims,
2406                                defaultStringDelims,
2407                                FileContent.scriptCode);
2408         txtFKinds ~= new FKind("PHP", [], ["php", "phpt", "php3", "php4", "php5", "phtml", ],
2409                                shebangLine(lit("php")), 0,
2410                                [], [],
2411                                defaultCommentDelims ~ cCommentDelims,
2412                                defaultStringDelims,
2413                                FileContent.scriptCode);
2414         txtFKinds ~= new FKind("Plone", [], ["pt", "cpt", "metadata", "cpy", "py", ], [], 0, [], [],
2415                                defaultCommentDelims,
2416                                defaultStringDelims,
2417                                FileContent.scriptCode);
2418 
2419         txtFKinds ~= new FKind("Shell", [], ["sh"],
2420                                shebangLine(lit("sh")), 0,
2421                                [], [],
2422                                defaultCommentDelims,
2423                                defaultStringDelims,
2424                                FileContent.scriptCode);
2425         txtFKinds ~= new FKind("Bash", [], ["bash"],
2426                                shebangLine(lit("bash")), 0,
2427                                [], [],
2428                                defaultCommentDelims,
2429                                defaultStringDelims,
2430                                FileContent.scriptCode);
2431         txtFKinds ~= new FKind("Zsh", [], ["zsh"],
2432                                shebangLine(lit("zsh")), 0,
2433                                [], [],
2434                                defaultCommentDelims,
2435                                defaultStringDelims,
2436                                FileContent.scriptCode);
2437 
2438         txtFKinds ~= new FKind("Batch", [], ["bat", "cmd"], [], 0, [], [],
2439                                [Delim("REM")],
2440                                defaultStringDelims,
2441                                FileContent.scriptCode);
2442 
2443         txtFKinds ~= new FKind("TCL", [], ["tcl", "itcl", "itk", ], [], 0, [], [],
2444                                defaultCommentDelims,
2445                                defaultStringDelims,
2446                                FileContent.scriptCode);
2447         txtFKinds ~= new FKind("Tex", [], ["tex", "cls", "sty", ], [], 0, [], [],
2448                                [Delim("%")],
2449                                defaultStringDelims,
2450                                FileContent.scriptCode);
2451         txtFKinds ~= new FKind("TT", [], ["tt", "tt2", "ttml", ], [], 0, [], [],
2452                                defaultCommentDelims,
2453                                defaultStringDelims,
2454                                FileContent.scriptCode);
2455         txtFKinds ~= new FKind("Viz Basic", [], ["bas", "cls", "frm", "ctl", "vb", "resx", ], [], 0, [], [],
2456                                [Delim("'")],
2457                                defaultStringDelims,
2458                                FileContent.scriptCode);
2459 
2460         txtFKinds ~= new FKind("Verilog", [], ["v", "vh", "sv"], [], 0, [], [],
2461                                cCommentDelims,
2462                                defaultStringDelims,
2463                                FileContent.scriptCode);
2464         txtFKinds ~= new FKind("VHDL", [], ["vhd", "vhdl"], [], 0, [], [],
2465                                [Delim("--")],
2466                                defaultStringDelims,
2467                                FileContent.scriptCode);
2468 
2469         txtFKinds ~= new FKind("Clojure", [], ["clj"], [], 0, [], [],
2470                                [Delim(";")],
2471                                defaultStringDelims,
2472                                FileContent.sourceCode);
2473         txtFKinds ~= new FKind("Go", [], ["go"], [], 0, [], [],
2474                                cCommentDelims,
2475                                defaultStringDelims,
2476                                FileContent.sourceCode);
2477 
2478         auto kindJava = new FKind("Java", [], ["java", "properties"], [], 0, [], [],
2479                                   cCommentDelims,
2480                                   defaultStringDelims,
2481                                   FileContent.sourceCode);
2482         txtFKinds ~= kindJava;
2483         kindJava.operations ~= tuple(FOp.byteCompile, `javac`);
2484 
2485         txtFKinds ~= new FKind("Groovy", [], ["groovy", "gtmpl", "gpp", "grunit"], [], 0, [], [],
2486                                cCommentDelims,
2487                                defaultStringDelims,
2488                                FileContent.sourceCode);
2489         txtFKinds ~= new FKind("Haskell", [], ["hs", "lhs"], [], 0, [], [],
2490                                [Delim("--}"),
2491                                 Delim("{-", "-}")],
2492                                defaultStringDelims,
2493                                FileContent.sourceCode);
2494 
2495         static immutable keywordsJavascript = ["break", "case", "catch", "continue", "debugger", "default", "delete",
2496                                                "do", "else", "finally", "for", "function", "if", "in", "instanceof",
2497                                                "new", "return", "switch", "this", "throw", "try", "typeof", "var",
2498                                                "void", "while", "with" ];
2499         txtFKinds ~= new FKind("JavaScript", [], ["js"],
2500                                [], 0, [],
2501                                keywordsJavascript,
2502                                cCommentDelims,
2503                                defaultStringDelims,
2504                                FileContent.scriptCode);
2505         txtFKinds ~= new FKind("JavaScript Object Notation",
2506                                [], ["json"],
2507                                [], 0, [], [],
2508                                [], // N/A
2509                                defaultStringDelims,
2510                                FileContent.sourceCode);
2511 
2512         auto dubFKind = new FKind("DUB",
2513                                   ["dub.json"], ["json"],
2514                                   [], 0, [], [],
2515                                   [], // N/A
2516                                   defaultStringDelims,
2517                                   FileContent.scriptCode);
2518         txtFKinds ~= dubFKind;
2519         dubFKind.operations ~= tuple(FOp.build, `dub`);
2520 
2521         // TODO Inherit XML
2522         txtFKinds ~= new FKind("JSP", [], ["jsp", "jspx", "jhtm", "jhtml"], [], 0, [], [],
2523                                [Delim("<!--", "--%>"), // XML
2524                                 Delim("<%--", "--%>")],
2525                                defaultStringDelims,
2526                                FileContent.scriptCode);
2527 
2528         txtFKinds ~= new FKind("ActionScript", [], ["as", "mxml"], [], 0, [], [],
2529                                cCommentDelims, // N/A
2530                                defaultStringDelims,
2531                                FileContent.scriptCode);
2532 
2533         txtFKinds ~= new FKind("LUA", [], ["lua"], [], 0, [], [],
2534                                [Delim("--")],
2535                                defaultStringDelims,
2536                                FileContent.scriptCode);
2537         txtFKinds ~= new FKind("Mason", [], ["mas", "mhtml", "mpl", "mtxt"], [], 0, [], [],
2538                                [], // TODO Need symbolic
2539                                defaultStringDelims,
2540                                FileContent.scriptCode);
2541 
2542         txtFKinds ~= new FKind("CFMX", [], ["cfc", "cfm", "cfml"], [], 0, [], [],
2543                                [], // N/A
2544                                defaultStringDelims,
2545                                FileContent.scriptCode);
2546 
2547         // Simulation
2548         static immutable keywordsModelica = ["algorithm", "discrete", "false", "loop", "pure",
2549                                              "and", "each", "final", "model", "record",
2550                                              "annotation", "else", "flow", "not", "redeclare",
2551                                              "elseif", "for", "operator", "replaceable",
2552                                              "block", "elsewhen", "function", "or", "return",
2553                                              "break", "encapsulated", "if", "outer", "stream",
2554                                              "class", "end", "import", "output", "then",
2555                                              "connect", "enumeration", "impure", "package", "true",
2556                                              "connector", "equation", "in", "parameter", "type",
2557                                              "constant", "expandable", "initial", "partial", "when",
2558                                              "constrainedby", "extends", "inner", "protected", "while",
2559                                              "der", "external", "input", "public", "within"];
2560         auto kindModelica = new FKind("Modelica", [], ["mo"], [], 0, [],
2561                                       keywordsModelica,
2562                                       cCommentDelims,
2563                                       defaultStringDelims,
2564                                       FileContent.sourceCode,
2565                                       FileKindDetection.equalsWhatsGiven,
2566                                       Lang.modelica);
2567 
2568         // Numerical Computing
2569 
2570         txtFKinds ~= new FKind("Matlab", [], ["m"], [], 0, [], [],
2571                                [Delim("%{", "}%"), // TODO Prio 1
2572                                 Delim("%")], // TODO Prio 2
2573                                defaultStringDelims,
2574                                FileContent.sourceCode);
2575         auto kindOctave = new FKind("Octave", [], ["m"], [], 0, [], [],
2576                                     [Delim("%{", "}%"), // TODO Prio 1
2577                                      Delim("%"),
2578                                      Delim("#")],
2579                                     defaultStringDelims,
2580                                     FileContent.sourceCode);
2581         txtFKinds ~= kindOctave;
2582         kindOctave.operations ~= tuple(FOp.byteCompile, `octave`);
2583 
2584         txtFKinds ~= new FKind("Julia", [], ["jl"], [], 0, [], [],
2585                                defaultCommentDelims,
2586                                defaultStringDelims,
2587                                FileContent.sourceCode); // ((:execute "julia") (:evaluate "julia -e"))
2588 
2589         txtFKinds ~= new FKind("Erlang", [], ["erl", "hrl"], [], 0, [], [],
2590                                [Delim("%")],
2591                                defaultStringDelims,
2592                                FileContent.sourceCode);
2593 
2594         auto magicForElisp = seq(shebangLine(lit("emacs")),
2595                                  ws(),
2596                                  lit("--script"));
2597         auto kindElisp = new FKind("Emacs-Lisp", [],
2598                                    ["el", "lisp"],
2599                                    magicForElisp, 0, // Script Execution
2600                                    [], [],
2601                                    [Delim(";")],
2602                                    defaultStringDelims,
2603                                    FileContent.sourceCode);
2604         kindElisp.operations ~= tuple(FOp.byteCompile, `emacs -batch -f batch-byte-compile`);
2605         kindElisp.operations ~= tuple(FOp.byteCompile, `emacs --script`);
2606         /* kindELisp.moduleName = "(provide 'MODULE_NAME)"; */
2607         /* kindELisp.moduleImport = "(require 'MODULE_NAME)"; */
2608         txtFKinds ~= kindElisp;
2609 
2610         txtFKinds ~= new FKind("Lisp", [], ["lisp", "lsp"], [], 0, [], [],
2611                                [Delim(";")],
2612                                defaultStringDelims,
2613                                FileContent.sourceCode);
2614         txtFKinds ~= new FKind("PostScript", [], ["ps", "postscript"], [], 0, "%!", [],
2615                                [Delim("%")],
2616                                defaultStringDelims,
2617                                FileContent.sourceCode);
2618 
2619         txtFKinds ~= new FKind("CMake", [], ["cmake"], [], 0, [], [],
2620                                defaultCommentDelims,
2621                                defaultStringDelims,
2622                                FileContent.sourceCode);
2623 
2624         // http://stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary
2625         txtFKinds ~= new FKind("Pure ASCII", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [],
2626                                [], // N/A
2627                                defaultStringDelims,
2628                                FileContent.textASCII); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126
2629         txtFKinds ~= new FKind("8-Bit Text", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [],
2630                                [], // N/A
2631                                defaultStringDelims,
2632                                FileContent.text8Bit); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 or 128–255
2633 
2634         txtFKinds ~= new FKind("Assembler", [], ["asm", "s"], [], 0, [], [],
2635                                [], // N/A
2636                                defaultStringDelims,
2637                                FileContent.sourceCode);
2638 
2639         // https://en.wikipedia.org/wiki/Diff
2640         auto diffKind = new FKind("Diff", [], ["diff", "patch"],
2641                                   "diff", 0,
2642                                   [], [],
2643                                   [], // N/A
2644                                   defaultStringDelims,
2645                                   FileContent.text);
2646         txtFKinds ~= diffKind;
2647         diffKind.wikip = "https://en.wikipedia.org/wiki/Diff";
2648 
2649         auto pemCertKind = new FKind(`PEM certificate`, [], [`cert`],
2650                                      `-----BEGIN CERTIFICATE-----`, 0,
2651                                      [], [],
2652                                      [], // N/A
2653                                      [], // N/A
2654                                      FileContent.text,
2655                                      FileKindDetection.equalsContents);
2656         txtFKinds ~= pemCertKind;
2657 
2658         auto pemCertReqKind = new FKind(`PEM certificate request`, [], [`cert`],
2659                                         `-----BEGIN CERTIFICATE REQ`, 0,
2660                                         [], [],
2661                                         [], // N/A
2662                                         [], // N/A
2663                                         FileContent.text,
2664                                         FileKindDetection.equalsContents);
2665         txtFKinds ~= pemCertReqKind;
2666 
2667         auto pemRSAPrivateKeyKind = new FKind(`PEM RSA private key`, [], [`cert`],
2668                                               `-----BEGIN RSA PRIVATE`, 0,
2669                                               [], [],
2670                                               [], // N/A
2671                                               [], // N/A
2672                                               FileContent.text,
2673                                               FileKindDetection.equalsContents);
2674         txtFKinds ~= pemRSAPrivateKeyKind;
2675 
2676         auto pemDSAPrivateKeyKind = new FKind(`PEM DSA private key`, [], [`cert`],
2677                                               `-----BEGIN DSA PRIVATE`, 0,
2678                                               [], [],
2679                                               [], // N/A
2680                                               [], // N/A
2681                                               FileContent.text,
2682                                               FileKindDetection.equalsContents);
2683         txtFKinds ~= pemDSAPrivateKeyKind;
2684 
2685         auto pemECPrivateKeyKind = new FKind(`PEM EC private key`, [], [`cert`],
2686                                               `-----BEGIN EC PRIVATE`, 0,
2687                                               [], [],
2688                                               [], // N/A
2689                                               [], // N/A
2690                                               FileContent.text,
2691                                               FileKindDetection.equalsContents);
2692         txtFKinds ~= pemECPrivateKeyKind;
2693 
2694         // Binaries
2695 
2696         static immutable extsELF = ["o", "so", "ko", "os", "out", "bin", "x", "elf", "axf", "prx", "puff", "none"]; // ELF file extensions
2697 
2698         auto elfKind = new FKind("ELF",
2699                                  [], extsELF, x"7F 45 4C 46", 0, [], [],
2700                                  [], // N/A
2701                                  [], // N/A
2702                                  FileContent.machineCode,
2703                                  FileKindDetection.equalsContents);
2704         elfKind.wikip = "https://en.wikipedia.org/wiki/Executable_and_Linkable_Format";
2705         binFKinds ~= elfKind;
2706         /* auto extsExeELF = ["out", "bin", "x", "elf", ]; // ELF file extensions */
2707         /* auto elfExeKind  = new FKind("ELF executable",    [], extsExeELF,  [0x2, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2708         /* auto elfSOKind   = new FKind("ELF shared object", [], ["so", "ko"],  [0x3, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2709         /* auto elfCoreKind = new FKind("ELF core file",     [], ["core"], [0x4, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */
2710         /* binFKinds ~= elfExeKind; */
2711         /* elfKind.subKinds ~= elfSOKind; */
2712         /* elfKind.subKinds ~= elfCoreKind; */
2713         /* elfKind.subKinds ~= elfKind; */
2714 
2715         // TODO Specialize to not steal results from file's magics.
2716         auto linuxFirmwareKind = new FKind("Linux Firmware",
2717                                  [], ["bin", "ucode", "dat", "sbcf", "fw"], [], 0, [], [],
2718                                  [], // N/A
2719                                  [], // N/A
2720                                  FileContent.binaryUnknown,
2721                                  FileKindDetection.equalsParentPathDirsAndName);
2722         linuxFirmwareKind.parentPathDirs = ["lib", "firmware"];
2723         binFKinds ~= linuxFirmwareKind;
2724 
2725         // TODO Specialize to not steal results from file's magics.
2726         auto linuxHwDbKind = new FKind("Linux Hardware Database Index",
2727                                        "hwdb.bin", ["bin"], "KSLPHHRH", 0, [], [],
2728                                        [], // N/A
2729                                        [], // N/A
2730                                        FileContent.binaryUnknown,
2731                                        FileKindDetection.equalsNameAndContents);
2732         binFKinds ~= linuxHwDbKind;
2733 
2734         // Executables
2735         binFKinds ~= new FKind("Mach-O", [], ["o"], x"CE FA ED FE", 0, [], [],
2736                                [], // N/A
2737                                [], // N/A
2738                                FileContent.machineCode, FileKindDetection.equalsContents);
2739 
2740         binFKinds ~= new FKind("modules.symbols.bin", [], ["bin"],
2741                                cast(ubyte[])[0xB0, 0x07, 0xF4, 0x57, 0x00, 0x02, 0x00, 0x01, 0x20], 0, [], [],
2742                                [], // N/A
2743                                [], // N/A
2744                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
2745 
2746         auto kindCOFF = new FKind("COFF/i386/32", [], ["o"], x"4C 01", 0, [], [],
2747                                   [], // N/A
2748                                   [], // N/A
2749                                   FileContent.machineCode, FileKindDetection.equalsContents);
2750         kindCOFF.description = "Common Object File Format";
2751         binFKinds ~= kindCOFF;
2752 
2753         auto kindPECOFF = new FKind("PE/COFF", [], ["cpl", "exe", "dll", "ocx", "sys", "scr", "drv", "obj"],
2754                                     "PE\0\0", 0x60, // And ("MZ") at offset 0x0
2755                                     [], [],
2756                                     [], // N/A
2757                                     [], // N/A
2758                                     FileContent.machineCode, FileKindDetection.equalsContents);
2759         kindPECOFF.description = "COFF Portable Executable";
2760         binFKinds ~= kindPECOFF;
2761 
2762         auto kindDOSMZ = new FKind("DOS-MZ", [], ["exe", "dll"], "MZ", 0, [], [],
2763                                    [], // N/A
2764                                    [], // N/A
2765                                    FileContent.machineCode);
2766         kindDOSMZ.description = "MS-DOS, OS/2 or MS Windows executable";
2767         binFKinds ~= kindDOSMZ;
2768 
2769         // Caches
2770         binFKinds ~= new FKind("ld.so.cache", [], ["cache"], "ld.so-", 0, [], [],
2771                                [], // N/A
2772                                [], // N/A
2773                                FileContent.binaryCache);
2774 
2775         // Profile Data
2776         binFKinds ~= new FKind("perf benchmark data", [], ["data"], "PERFILE2h", 0, [], [],
2777                                [], // N/A
2778                                [], // N/A
2779                                FileContent.performanceBenchmark);
2780 
2781         // Images
2782         binFKinds ~= new FKind("GIF87a", [], ["gif"], "GIF87a", 0, [], [],
2783                                [], // N/A
2784                                [], // N/A
2785                                FileContent.image);
2786         binFKinds ~= new FKind("GIF89a", [], ["gif"], "GIF89a", 0, [], [],
2787                                [], // N/A
2788                                [], // N/A
2789                                FileContent.image);
2790         auto extJPEG = ["jpeg", "jpg", "j2k", "jpeg2000"];
2791         binFKinds ~= new FKind("JPEG", [], extJPEG, x"FF D8", 0, [], [],
2792                                [], // N/A
2793                                [], // N/A
2794                                FileContent.image); // TODO Support ends with [0xFF, 0xD9]
2795         binFKinds ~= new FKind("JPEG/JFIF", [], extJPEG, x"FF D8", 0, [], [],
2796                                [], // N/A
2797                                [], // N/A
2798                                FileContent.image); // TODO Support ends with ['J','F','I','F', 0x00]
2799         binFKinds ~= new FKind("JPEG/Exif", [], extJPEG, x"FF D8", 0, [], [],
2800                                [], // N/A
2801                                [], // N/A
2802                                FileContent.image); // TODO Support contains ['E','x','i','f', 0x00] followed by metadata
2803 
2804         binFKinds ~= new FKind("Pack200-Compressed Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [],
2805                                [], // N/A
2806                                [], // N/A
2807                                FileContent.machineCode);
2808 
2809         binFKinds ~= new FKind("JRun Server Application", [], ["jsa"],
2810                                cast(ubyte[])[0xa2,0xab,0x0b,0xf0,
2811                                              0x01,0x00,0x00,0x00,
2812                                              0x00,0x00,0x20,0x00], 0, [], [],
2813                                [], // N/A
2814                                [], // N/A
2815                                FileContent.machineCode);
2816 
2817         binFKinds ~= new FKind("PNG", [], ["png"],
2818                                cast(ubyte[])[137, 80, 78, 71, 13, 10, 26, 10], 0, [], [],
2819                                [], // N/A
2820                                [], // N/A
2821                                FileContent.image);
2822 
2823         auto icnsKind = new FKind("Apple Icon Image", [], ["icns"],
2824                                   "icns", 0, [], [],
2825                                   [], // N/A
2826                                   [], // N/A
2827                                   FileContent.imageIcon);
2828         icnsKind.wikip = "https://en.wikipedia.org/wiki/Apple_Icon_Image_format";
2829         binFKinds ~= icnsKind;
2830         // TODO read with http://icns.sourceforge.net/
2831 
2832         auto kindPDF = new FKind("PDF", [], ["pdf"], "%PDF", 0, [], [],
2833                                  [], // N/A
2834                                  [], // N/A
2835                                  FileContent.document);
2836         kindPDF.description = "Portable Document Format";
2837         binFKinds ~= kindPDF;
2838 
2839         auto kindMarkdownFmt = new FKind("Markdown", [], ["md", "markdown"],
2840                                          [], 0,
2841                                          [], [],
2842                                          [], // N/A
2843                                          defaultStringDelims,
2844                                          FileContent.binaryCache);
2845         kindMarkdownFmt.wikip = "https://en.wikipedia.org/wiki/Markdown";
2846         binFKinds ~= kindMarkdownFmt;
2847 
2848         auto kindAsciiDocFmt = new FKind("AsciiDoc", [], ["ad", "adoc", "asciidoc"],
2849                                          [], 0,
2850                                          [], [],
2851                                          [], // N/A
2852                                          defaultStringDelims,
2853                                          FileContent.binaryCache);
2854         binFKinds ~= kindAsciiDocFmt;
2855 
2856         auto kindLatexPDFFmt = new FKind("LaTeX PDF Format", [], ["fmt"],
2857                                          cast(ubyte[])['W','2','T','X',
2858                                                        0x00,0x00,0x00,0x08,
2859                                                        0x70,0x64,0x66,0x74,
2860                                                        0x65,0x78], 0, [], [],
2861                                          [], // N/A
2862                                          defaultStringDelims,
2863                                          FileContent.binaryCache);
2864         binFKinds ~= kindLatexPDFFmt;
2865 
2866         binFKinds ~= new FKind("Microsoft Office Document", [], ["doc", "docx", "xls", "ppt"], x"D0 CF 11 E0", 0, [], [],
2867                                [], // N/A
2868                                defaultStringDelims,
2869                                FileContent.document);
2870 
2871         // Fonts
2872 
2873         auto kindTTF = new FKind("TrueType Font", [], ["ttf"], x"00 01 00 00 00", 0, [], [],
2874                                  [], // N/A
2875                                  defaultStringDelims,
2876                                  FileContent.font);
2877         binFKinds ~= kindTTF;
2878 
2879         auto kindTTCF = new FKind("TrueType/OpenType Font Collection", [], ["ttc"], "ttcf", 0, [], [],
2880                                   [], // N/A
2881                                   defaultStringDelims,
2882                                   FileContent.font);
2883         binFKinds ~= kindTTCF;
2884 
2885         auto kindWOFF = new FKind("Web Open Font", [], ["woff"], "wOFF", 0, [], [],
2886                                   [], // N/A
2887                                   defaultStringDelims,
2888                                   FileContent.font); // TODO container for kindSFNT
2889         binFKinds ~= kindWOFF;
2890 
2891         auto kindSFNT = new FKind("Spline Font", [], ["sfnt"], "sfnt", 0, [], [],
2892                                   [], // N/A
2893                                   defaultStringDelims,
2894                                   FileContent.font); // TODO container for Sfnt
2895         binFKinds ~= kindSFNT;
2896 
2897         // Audio
2898 
2899         binFKinds ~= new FKind("MIDI", [], ["mid", "midi"], "MThd", 0, [], [],
2900                                [], // N/A
2901                                defaultStringDelims,
2902                                FileContent.audio, FileKindDetection.equalsNameAndContents);
2903 
2904         // Au
2905         auto auKind = new FKind("Au", [], ["au", "snd"], ".snd", 0, [], [],
2906                                 [], // N/A
2907                                 defaultStringDelims,
2908                                 FileContent.audio, FileKindDetection.equalsNameAndContents);
2909         auKind.wikip = "https://en.wikipedia.org/wiki/Au_file_format";
2910         binFKinds ~= auKind;
2911 
2912         binFKinds ~= new FKind("Ogg", [], ["ogg", "oga", "ogv"],
2913                                cast(ubyte[])[0x4F,0x67,0x67,0x53,
2914                                              0x00,0x02,0x00,0x00,
2915                                              0x00,0x00,0x00,0x00,
2916                                              0x00, 0x00], 0, [], [],
2917                                [], // N/A
2918                                defaultStringDelims,
2919                                FileContent.media);
2920 
2921         // TODO Support RIFF....WAVEfmt using symbolic seq(lit("RIFF"), any(4), lit("WAVEfmt"))
2922         binFKinds ~= new FKind("WAV", [], ["wav", "wave"], "RIFF", 0, [], [],
2923                                [], // N/A
2924                                defaultStringDelims,
2925                                FileContent.audio, FileKindDetection.equalsContents);
2926 
2927         // Archives
2928 
2929         auto kindBSDAr = new FKind("BSD Archive", [], ["a", "ar"], "!<arch>\n", 0, [], [],
2930                                    [], // N/A
2931                                    defaultStringDelims,
2932                                    FileContent.archive, FileKindDetection.equalsContents);
2933         kindBSDAr.description = "BSD 4.4 and Mac OSX Archive";
2934         binFKinds ~= kindBSDAr;
2935 
2936         binFKinds ~= new FKind("GNU tar Archive", [], ["tar"], "ustar\040\040\0", 257, [], [],
2937                                [], // N/A
2938                                defaultStringDelims,
2939                                FileContent.archive, FileKindDetection.equalsContents); // TODO Specialized Derivation of "POSIX tar Archive"
2940         binFKinds ~= new FKind("POSIX tar Archive", [], ["tar"], "ustar\0", 257, [], [],
2941                                [], // N/A
2942                                defaultStringDelims,
2943                                FileContent.archive, FileKindDetection.equalsContents);
2944 
2945         binFKinds ~= new FKind("pkZip Archive", [], ["zip", "jar", "pptx", "docx", "xlsx"], "PK\003\004", 0, [], [],
2946                                [], // N/A
2947                                defaultStringDelims,
2948                                FileContent.archive, FileKindDetection.equalsContents);
2949         binFKinds ~= new FKind("pkZip Archive (empty)", [], ["zip", "jar"], "PK\005\006", 0, [], [],
2950                                [], // N/A
2951                                defaultStringDelims,
2952                                FileContent.archive, FileKindDetection.equalsContents);
2953 
2954         binFKinds ~= new FKind("PAK file", [], ["pak"], cast(ubyte[])[0x40, 0x00, 0x00, 0x00,
2955                                                                       0x4a, 0x12, 0x00, 0x00,
2956                                                                       0x01, 0x2d, 0x23, 0xcb,
2957                                                                       0x6d, 0x00, 0x00, 0x2f], 0, [], [],
2958                                [], // N/A
2959                                defaultStringDelims,
2960                                FileContent.spellCheckWordList,
2961                                FileKindDetection.equalsNameAndContents);
2962 
2963         binFKinds ~= new FKind("LZW-Compressed", [], ["z", "tar.z"], x"1F 9D", 0, [], [],
2964                                [], // N/A
2965                                defaultStringDelims,
2966                                FileContent.compressed);
2967         binFKinds ~= new FKind("LZH-Compressed", [], ["z", "tar.z"], x"1F A0", 0, [], [],
2968                                [], // N/A
2969                                defaultStringDelims,
2970                                FileContent.compressed);
2971 
2972         binFKinds ~= new FKind("CompressedZ", [], ["z"], "\037\235", 0, [], [],
2973                                [], // N/A
2974                                defaultStringDelims,
2975                                FileContent.compressed);
2976         binFKinds ~= new FKind("GNU-Zip (gzip)", [], ["tgz", "gz", "gzip", "dz"], "\037\213", 0, [], [],
2977                                [], // N/A
2978                                defaultStringDelims,
2979                                FileContent.compressed);
2980         binFKinds ~= new FKind("BZip", [], ["bz2", "bz", "tbz2", "bzip2"], "BZh", 0, [], [],
2981                                [], // N/A
2982                                defaultStringDelims,
2983                                FileContent.compressed);
2984         binFKinds ~= new FKind("XZ/7-Zip", [], ["xz", "txz", "7z", "t7z", "lzma", "tlzma", "lz", "tlz"],
2985                                cast(ubyte[])[0xFD, '7', 'z', 'X', 'Z', 0x00], 0, [], [],
2986                                [], // N/A
2987                                defaultStringDelims,
2988                                FileContent.compressed);
2989         binFKinds ~= new FKind("LZX", [], ["lzx"], "LZX", 0, [], [],
2990                                [], // N/A
2991                                defaultStringDelims,
2992                                FileContent.compressed);
2993         binFKinds ~= new FKind("SZip", [], ["szip"], "SZ\x0a\4", 0, [], [],
2994                                [], // N/A
2995                                defaultStringDelims,
2996                                FileContent.compressed);
2997 
2998         binFKinds ~= new FKind("Git Bundle", [], ["bundle"], "# v2 git bundle", 0, [], [],
2999                                [], // N/A
3000                                defaultStringDelims,
3001                                FileContent.versionControl);
3002 
3003         binFKinds ~= new FKind("Emacs-Lisp Bytes Code", [], ["elc"], ";ELC\27\0\0\0", 0, [], [],
3004                                [], // N/A
3005                                defaultStringDelims,
3006                                FileContent.byteCode, FileKindDetection.equalsContents);
3007         binFKinds ~= new FKind("Python Bytes Code", [], ["pyc"], x"0D 0A", 2, [], [],
3008                                [], // N/A
3009                                defaultStringDelims,
3010                                FileContent.byteCode, FileKindDetection.equalsNameAndContents); // TODO Handle versions at src[0..2]
3011 
3012         binFKinds ~= new FKind("Zshell Wordcode", [], ["zwc"], x"07 06 05 04", 0, [], [],
3013                                [], // N/A
3014                                defaultStringDelims,
3015                                FileContent.byteCode);
3016 
3017         binFKinds ~= new FKind("Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [],
3018                                [], // N/A
3019                                defaultStringDelims,
3020                                FileContent.byteCode, FileKindDetection.equalsContents);
3021         binFKinds ~= new FKind("Java KeyStore", [], [], x"FE ED FE ED", 0, [], [],
3022                                [], // N/A
3023                                defaultStringDelims,
3024                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
3025         binFKinds ~= new FKind("Java JCE KeyStore", [], [], x"CE CE CE CE", 0, [], [],
3026                                [], // N/A
3027                                defaultStringDelims,
3028                                FileContent.binaryUnknown, FileKindDetection.equalsContents);
3029 
3030         binFKinds ~= new FKind("LLVM Bitcode", [], ["bc"], "BC", 0, [], [],
3031                                [], // N/A
3032                                defaultStringDelims,
3033                                FileContent.byteCode, FileKindDetection.equalsNameAndContents);
3034 
3035         binFKinds ~= new FKind("MATLAB MAT", [], ["mat"], "MATLAB 5.0 MAT-file", 0, [], [],
3036                                [], // N/A
3037                                defaultStringDelims,
3038                                FileContent.numericalData, FileKindDetection.equalsContents);
3039 
3040         auto hdf4Kind = new FKind("HDF4", [], ["hdf", "h4", "hdf4", "he4"], x"0E 03 13 01", 0, [], [],
3041                                   [], // N/A
3042                                   defaultStringDelims,
3043                                   FileContent.numericalData);
3044         binFKinds ~= hdf4Kind;
3045         hdf4Kind.description = "Hierarchical Data Format version 4";
3046 
3047         auto hdf5Kind = new FKind("HDF5", "Hierarchical Data Format version 5", ["hdf", "h5", "hdf5", "he5"], x"89 48 44 46 0D 0A 1A 0A", 0, [], [],
3048                                   [], // N/A
3049                                   defaultStringDelims,
3050                                   FileContent.numericalData);
3051         binFKinds ~= hdf5Kind;
3052         hdf5Kind.description = "Hierarchical Data Format version 5";
3053 
3054         auto numpyKind = new FKind("NUMPY", "NUMPY", ["npy", "numpy"], x"93 4E 55 4D 50 59", 0, [], [],
3055                                   [], // N/A
3056                                   defaultStringDelims,
3057                                   FileContent.numericalData);
3058         binFKinds ~= numpyKind;
3059 
3060         binFKinds ~= new FKind("GNU GLOBAL Database", ["GTAGS", "GRTAGS", "GPATH", "GSYMS"], [], "b1\5\0", 0, [], [],
3061                                [], // N/A
3062                                defaultStringDelims,
3063                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3064 
3065         // SQLite
3066         static immutable extsSQLite = ["sql", "sqlite", "sqlite3"];
3067         binFKinds ~= new FKind("MySQL table definition file", [], extsSQLite, x"FE 01", 0, [], [],
3068                                [], // N/A
3069                                defaultStringDelims,
3070                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3071         binFKinds ~= new FKind("MySQL MyISAM index file", [], extsSQLite, x"FE FE 07", 0, [], [],
3072                                [], // N/A
3073                                defaultStringDelims,
3074                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3075         binFKinds ~= new FKind("MySQL MyISAM compressed data file", [], extsSQLite, x"FE FE 08", 0, [], [],
3076                                [], // N/A
3077                                defaultStringDelims,
3078                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3079         binFKinds ~= new FKind("MySQL Maria index file", [], extsSQLite, x"FF FF FF", 0, [], [],
3080                                [], // N/A
3081                                defaultStringDelims,
3082                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3083         binFKinds ~= new FKind("MySQL Maria compressed data file", [], extsSQLite, x"FF FF FF", 0, [], [],
3084                                [], // N/A
3085                                defaultStringDelims,
3086                                FileContent.tagsDatabase, FileKindDetection.equalsContents);
3087         binFKinds ~= new FKind("SQLite format 3", [], extsSQLite , "SQLite format 3", 0, [], [],
3088                                [], // N/A
3089                                defaultStringDelims,
3090                                FileContent.tagsDatabase, FileKindDetection.equalsContents); // TODO Why is this detected at 49:th try?
3091 
3092         binFKinds ~= new FKind("Vim swap", [], ["swo"], [], 0, "b0VIM ", [],
3093                                [], // N/A
3094                                defaultStringDelims,
3095                                FileContent.binaryCache);
3096 
3097         binFKinds ~= new FKind("PCH", "(GCC) Precompiled header", ["pch", "gpch"], "gpch", 0, [], [],
3098                                [], // N/A
3099                                defaultStringDelims,
3100                                FileContent.cache);
3101 
3102         binFKinds ~= new FKind("Firmware", [], ["fw"], cast(ubyte[])[], 0, [], [],
3103                                [], // N/A
3104                                defaultStringDelims,
3105                                FileContent.cache, FileKindDetection.equalsName); // TODO Add check for binary contents and that some parenting directory is named "firmware"
3106 
3107         binFKinds ~= new FKind("LibreOffice or OpenOffice RDB", [], ["rdb"],
3108                                cast(ubyte[])[0x43,0x53,0x4d,0x48,
3109                                              0x4a,0x2d,0xd0,0x26,
3110                                              0x00,0x02,0x00,0x00,
3111                                              0x00,0x02,0x00,0x02], 0, [], [],
3112                                [], // N/A
3113                                defaultStringDelims,
3114                                FileContent.database, FileKindDetection.equalsName); // TODO Add check for binary contents and that some parenting directory is named "firmware"
3115 
3116         binFKinds ~= new FKind("sconsign", [], ["sconsign", "sconsign.dblite", "dblite"], x"7d 71 01 28", 0, [], [],
3117                                [], // N/A
3118                                defaultStringDelims,
3119                                FileContent.cache, FileKindDetection.equalsNameAndContents);
3120 
3121         binFKinds ~= new FKind("GnuPG (GPG) key public ring", [], ["gpg"], x"99 01", 0, [], [],
3122                                [], // N/A
3123                                defaultStringDelims,
3124                                FileContent.binary, FileKindDetection.equalsNameOrContents);
3125         binFKinds ~= new FKind("GnuPG (GPG) encrypted data", [], [], x"85 02", 0, [], [],
3126                                [], // N/A
3127                                defaultStringDelims,
3128                                FileContent.binary, FileKindDetection.equalsContents);
3129         binFKinds ~= new FKind("GNUPG (GPG) key trust database", [], [], "\001gpg", 0, [], [],
3130                                [], // N/A
3131                                defaultStringDelims,
3132                                FileContent.binary, FileKindDetection.equalsContents);
3133 
3134         binFKinds ~= new FKind("aspell word list (rowl)", [], ["rws"], "aspell default speller rowl ", 0, [], [],
3135                                [], // N/A
3136                                defaultStringDelims,
3137                                FileContent.spellCheckWordList, FileKindDetection.equalsNameAndContents);
3138 
3139         binFKinds ~= new FKind("DS_Store", ".DS_Store", [], "Mac OS X Desktop Services Store ", 0, [], [],
3140                                [], // N/A
3141                                [],
3142                                FileContent.binary, FileKindDetection.equalsName);
3143 
3144         /* Fax image created in the CCITT Group 3 compressed format, which is
3145          * used for digital transmission of fax data and supports 1 bit per
3146          * pixel
3147          */
3148         binFKinds ~= new FKind("CCITT Group 3 compressed format", [], // TODO Altenative name: Digifax-G3, G3 Fax
3149                                ["g3", "G3"],
3150                                "PC Research, Inc", 0, [], [],
3151                                [], // N/A
3152                                [],
3153                                FileContent.imageModemFax1BPP, FileKindDetection.equalsContents);
3154 
3155         binFKinds ~= new FKind("Raw Modem Data version 1", [],
3156                                ["rmd1"],
3157                                "RMD1", 0, [], [],
3158                                [], // N/A
3159                                [],
3160                                FileContent.modemData, FileKindDetection.equalsContents);
3161 
3162         binFKinds ~= new FKind("Portable voice format 1", [],
3163                                ["pvf1"],
3164                                "PVF1\n", 0, [], [],
3165                                [], // N/A
3166                                [],
3167                                FileContent.voiceModem, FileKindDetection.equalsContents);
3168 
3169         binFKinds ~= new FKind("Portable voice format 2", [],
3170                                ["pvf2"],
3171                                "PVF2\n", 0, [], [],
3172                                [], // N/A
3173                                [],
3174                                FileContent.voiceModem, FileKindDetection.equalsContents);
3175 
3176         allFKinds ~= txtFKinds;
3177         allFKinds ~= binFKinds;
3178 
3179         assert(allFKinds.byIndex.length ==
3180                (txtFKinds.byIndex.length +
3181                 binFKinds.byIndex.length));
3182 
3183         assert(allFKinds.byId.length ==
3184                (txtFKinds.byId.length +
3185                 binFKinds.byId.length));
3186 
3187         txtFKinds.rehash;
3188         binFKinds.rehash;
3189         allFKinds.rehash;
3190     }
3191 
3192     // Code
3193 
3194     // Interpret Command Line
3195     void loadDirKinds()
3196     {
3197         vcDirKinds ~= new DirKind(".git", "Git");
3198         vcDirKinds ~= new DirKind(".svn", "Subversion (Svn)");
3199         vcDirKinds ~= new DirKind(".bzr", "Bazaar (Bzr)");
3200         vcDirKinds ~= new DirKind("RCS", "RCS");
3201         vcDirKinds ~= new DirKind("CVS", "CVS");
3202         vcDirKinds ~= new DirKind("MCVS", "MCVS");
3203         vcDirKinds ~= new DirKind("RCS", "RCS");
3204         vcDirKinds ~= new DirKind(".hg", "Mercurial (Hg)");
3205         vcDirKinds ~= new DirKind("SCCS", "SCCS");
3206         vcDirKinds ~= new DirKind(".wact", "WACT");
3207         vcDirKinds ~= new DirKind("_MTN", "Monotone");
3208         vcDirKinds ~= new DirKind("_darcs", "Darcs");
3209         vcDirKinds ~= new DirKind("{arch}", "Arch");
3210 
3211         skippedDirKinds ~= vcDirKinds;
3212 
3213         DirKind[string] vcDirKindsMap_;
3214         foreach (kind; vcDirKinds)
3215         {
3216             vcDirKindsMap[kind.fileName] = kind;
3217         }
3218         vcDirKindsMap.rehash;
3219 
3220         skippedDirKinds ~= new DirKind(".trash",  "Trash");
3221         skippedDirKinds ~= new DirKind(".undo",  "Undo");
3222         skippedDirKinds ~= new DirKind(".deps",  "Dependencies");
3223         skippedDirKinds ~= new DirKind(".backups",  "Backups");
3224         skippedDirKinds ~= new DirKind(".autom4te.cache",  "Automake Cache");
3225 
3226         foreach (kind; skippedDirKinds) { skippedDirKindsMap[kind.fileName] = kind; }
3227         skippedDirKindsMap.rehash;
3228     }
3229 
3230     ScanContext scanContext = ScanContext.standard;
3231     KeyStrictness keyStrictness = KeyStrictness.standard;
3232 
3233     bool showNameDups = false;
3234     bool showTreeContentDups = false;
3235     bool showFileContentDups = false;
3236     bool showELFSymbolDups = false;
3237     bool linkContentDups = false;
3238 
3239     bool showLinkDups = false;
3240     SymlinkFollowContext followSymlinks = SymlinkFollowContext.external;
3241     bool showBrokenSymlinks = true;
3242     bool showSymlinkCycles = true;
3243 
3244     bool showAnyDups = false;
3245     bool showMMaps = false;
3246     bool showUsage = false;
3247     bool showSHA1 = false;
3248     bool showLineCounts = false;
3249 
3250     uint64_t noFiles = 0;
3251     uint64_t noRegFiles = 0;
3252     uint64_t noSymlinks = 0;
3253     uint64_t noSpecialFiles = 0;
3254     uint64_t noDirs = 0;
3255 
3256     uint64_t noScannedFiles = 0;
3257     uint64_t noScannedRegFiles = 0;
3258     uint64_t noScannedSymlinks = 0;
3259     uint64_t noScannedSpecialFiles = 0;
3260     uint64_t noScannedDirs = 0;
3261 
3262     auto shallowDensenessSum = Rational!ulong(0, 1);
3263     auto deepDensenessSum = Rational!ulong(0, 1);
3264     uint64_t densenessCount = 0;
3265 
3266     FOp fOp = FOp.none;
3267 
3268     bool keyAsWord = false;
3269     bool keyAsSymbol = false;
3270     bool keyAsAcronym = false;
3271     bool keyAsExact = false;
3272 
3273     bool showTree = false;
3274 
3275     bool useHTML = false;
3276     bool browseOutput = false;
3277     bool collectTypeHits = false;
3278     bool colorFlag = false;
3279 
3280     int scanDepth = -1;
3281 
3282     bool demangleELF = true;
3283 
3284     bool recache = false;
3285 
3286     bool useNGrams = false;
3287 
3288     PathFormat pathFormat = PathFormat.relative;
3289 
3290     DirSorting subsSorting = DirSorting.onTimeLastModified;
3291     BuildType buildType = BuildType.none;
3292     DuplicatesContext duplicatesContext = DuplicatesContext.internal;
3293 
3294     Dir[] topDirs;
3295     Dir rootDir;
3296 }
3297 
3298 struct Results
3299 {
3300     size_t numTotalHits; // Number of total hits.
3301     size_t numFilesWithHits; // Number of files with hits
3302     Bytes64 noBytesTotal; // Number of bytes total.
3303     Bytes64 noBytesTotalContents; // Number of contents bytes total.
3304     Bytes64 noBytesScanned; // Number of bytes scanned.
3305     Bytes64 noBytesSkipped; // Number of bytes skipped.
3306     Bytes64 noBytesUnreadable; // Number of bytes unreadable.
3307 }
3308 
3309 version(cerealed)
3310 {
3311     void grain(T)(ref Cereal cereal, ref SysTime systime)
3312     {
3313         auto stdTime = systime.stdTime;
3314         cereal.grain(stdTime);
3315         if (stdTime != 0)
3316         {
3317             systime = SysTime(stdTime);
3318         }
3319     }
3320 }
3321 
3322 /** Directory Sorting Order. */
3323 enum DirSorting
3324 {
3325     /* onTimeCreated, /\* Windows only. Currently stored in Linux on ext4 but no */
3326     /*               * standard interface exists yet, it will probably be called */
3327     /*               * xstat(). *\/ */
3328     onTimeLastModified,
3329     onTimeLastAccessed,
3330     onSize,
3331     onNothing,
3332 }
3333 
3334 enum BuildType
3335 {
3336     none,    // Don't compile
3337     devel,   // Compile with debug symbols
3338     release, // Compile without debugs symbols and optimizations
3339     standard = devel,
3340 }
3341 
3342 enum PathFormat
3343 {
3344     absolute,
3345     relative,
3346 }
3347 
3348 /** Dir.
3349  */
3350 class Dir : File
3351 {
3352     /** Construct File System Root Directory. */
3353     this(Dir parent = null, GStats gstats = null)
3354     {
3355         super(parent);
3356         this._gstats = gstats;
3357         if (gstats) { ++gstats.noDirs; }
3358     }
3359 
3360     this(string root_path, GStats gstats)
3361         in { assert(root_path == "/"); assert(gstats); }
3362     do
3363     {
3364         auto rootDent = DirEntry(root_path);
3365         Dir rootParent = null;
3366         this(rootDent, rootParent, gstats);
3367     }
3368 
3369     this(ref DirEntry dent, Dir parent, GStats gstats)
3370         in { assert(gstats); }
3371     do
3372     {
3373         this(dent.name.baseName, parent, dent.size.Bytes64, dent.timeLastModified, dent.timeLastAccessed, gstats);
3374     }
3375 
3376     this(string name, Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed,
3377          GStats gstats = null)
3378     {
3379         super(name, parent, size, timeLastModified, timeLastAccessed);
3380         this._gstats = gstats;
3381         if (gstats) { ++gstats.noDirs; }
3382     }
3383 
3384     override string toTextual() const @property { return "Directory"; }
3385 
3386     override Bytes64 treeSize() @property @trusted /* @safe nothrow */
3387     {
3388         if (_treeSize.isUntouched)
3389         {
3390             _treeSize = (this.size +
3391                          reduce!"a+b"(0.Bytes64,
3392                                       subs.byValue.map!"a.treeSize")); // recurse!
3393         }
3394         return _treeSize.get.bytes;
3395     }
3396 
3397     /** Returns: Directory Tree Content Id of `this`. */
3398     override const(SHA1Digest) treeContentId() @property @trusted /* @safe nothrow */
3399     {
3400         if (_treeContentId.isUntouched)
3401         {
3402             _treeContentId = subs.byValue.map!"a.treeContentId".sha1Of; // TODO join loops for calculating treeSize
3403             assert(_treeContentId, "Zero tree content digest");
3404             if (treeSize() != 0)
3405             {
3406                 gstats.filesByContentId[_treeContentId] ~= assumeNotNull(cast(File)this); // TODO Avoid cast when DMD and NotNull is fixed
3407             }
3408         }
3409         return _treeContentId;
3410     }
3411 
3412     override Face!Color face() const @property @safe pure nothrow { return dirFace; }
3413 
3414     /** Return true if `this` is a file system root directory. */
3415     bool isRoot() @property @safe const pure nothrow { return !parent; }
3416 
3417     GStats gstats(GStats gstats) @property @safe pure /* nothrow */ {
3418         return this._gstats = gstats;
3419     }
3420     GStats gstats() @property @safe nothrow
3421     {
3422         if (!_gstats && this.parent)
3423         {
3424             _gstats = this.parent.gstats();
3425         }
3426         return _gstats;
3427     }
3428 
3429     /** Returns: Depth of Depth from File System root to this File. */
3430     override int depth() @property @safe nothrow
3431     {
3432         if (_depth ==- 1)
3433         {
3434             _depth = parent ? parent.depth + 1 : 0; // memoized depth
3435         }
3436         return _depth;
3437     }
3438 
3439     /** Scan `this` recursively for a non-diretory file with basename `name`.
3440         TODO Reuse range based algorithm this.tree(depthFirst|breadFirst)
3441      */
3442     File find(string name) @property
3443     {
3444         auto subs_ = subs();
3445         if (name in subs_)
3446         {
3447             auto hit = subs_[name];
3448             Dir hitDir = cast(Dir)hit;
3449             if (!hitDir) // if not a directory
3450                 return hit;
3451         }
3452         else
3453         {
3454             foreach (sub; subs_)
3455             {
3456                 Dir subDir = cast(Dir)sub;
3457                 if (subDir)
3458                 {
3459                     auto hit = subDir.find(name);
3460                     if (hit) // if not a directory
3461                         return hit;
3462                 }
3463             }
3464         }
3465         return null;
3466     }
3467 
3468     /** Append Tree Statistics. */
3469     void addTreeStatsFromSub(F)(NotNull!F subFile, ref DirEntry subDent)
3470     {
3471         if (subDent.isFile)
3472         {
3473             /* _treeSize += subDent.size.Bytes64; */
3474             // dbg("Updating ", _treeSize, " of ", path);
3475 
3476             /** TODO Move these overloads to std.datetime */
3477             auto ref min(in SysTime a, in SysTime b) @trusted pure nothrow { return (a < b ? a : b); }
3478             auto ref max(in SysTime a, in SysTime b) @trusted pure nothrow { return (a > b ? a : b); }
3479 
3480             const lastMod = subDent.timeLastModified;
3481             _timeModifiedInterval = Interval!SysTime(min(lastMod, _timeModifiedInterval.begin),
3482                                                      max(lastMod, _timeModifiedInterval.end));
3483             const lastAcc = subDent.timeLastAccessed;
3484             _timeAccessedInterval = Interval!SysTime(min(lastAcc, _timeAccessedInterval.begin),
3485                                                      max(lastAcc, _timeAccessedInterval.end));
3486         }
3487     }
3488 
3489     /** Update Statistics for Sub-File `sub` with `subDent` of `this` Dir. */
3490     void updateStats(F)(NotNull!F subFile, ref DirEntry subDent, bool isRegFile)
3491     {
3492         auto lGS = gstats();
3493         if (lGS)
3494         {
3495             if (lGS.showNameDups/*  && */
3496                 /* !subFile.underAnyDir!(a => a.name in lGS.skippedDirKindsMap) */)
3497             {
3498                 lGS.filesByName[subFile.name] ~= cast(NotNull!File)subFile;
3499             }
3500             if (lGS.showLinkDups &&
3501                 isRegFile)
3502             {
3503                 import core.sys.posix.sys.stat;
3504                 immutable stat_t stat = subDent.statBuf();
3505                 if (stat.st_nlink >= 2)
3506                 {
3507                     lGS.filesByInode[stat.st_ino] ~= cast(NotNull!File)subFile;
3508                 }
3509             }
3510         }
3511     }
3512 
3513     /** Load Contents of `this` Directory from Disk using DirEntries.
3514         Returns: `true` iff Dir was updated (reread) from disk.
3515     */
3516     bool load(int depth = 0, bool force = false)
3517     {
3518         import std.range: empty;
3519         if (!_obseleteDir && // already loaded
3520             !force)          // and not forced reload
3521         {
3522             return false;    // signal already scanned
3523         }
3524 
3525         // dbg("Zeroing ", _treeSize, " of ", path);
3526         _treeSize.reset; // this.size;
3527         auto oldSubs = _subs;
3528         _subs.reset;
3529         assert(_subs.length == 0); // TODO Remove when verified
3530 
3531         import std.file: dirEntries, SpanMode;
3532         auto entries = dirEntries(path, SpanMode.shallow, false); // false: skip symlinks
3533         foreach (dent; entries)
3534         {
3535             immutable basename = dent.name.baseName;
3536             File sub = null;
3537             if (basename in oldSubs)
3538             {
3539                 sub = oldSubs[basename]; // reuse from previous cache
3540             }
3541             else
3542             {
3543                 bool isRegFile = false;
3544                 if (dent.isSymlink)
3545                 {
3546                     sub = new Symlink(dent, assumeNotNull(this));
3547                 }
3548                 else if (dent.isDir)
3549                 {
3550                     sub = new Dir(dent, this, gstats);
3551                 }
3552                 else if (dent.isFile)
3553                 {
3554                     // TODO Delay construction of and specific files such as
3555                     // CFile, ELFFile, after FKind-recognition has been made.
3556                     sub = new RegFile(dent, assumeNotNull(this));
3557                     isRegFile = true;
3558                 }
3559                 else
3560                 {
3561                     sub = new SpecFile(dent, assumeNotNull(this));
3562                 }
3563                 updateStats(enforceNotNull(sub), dent, isRegFile);
3564             }
3565             auto nnsub = enforceNotNull(sub);
3566             addTreeStatsFromSub(nnsub, dent);
3567             _subs[basename] = nnsub;
3568         }
3569         _subs.rehash;           // optimize hash for faster lookups
3570 
3571         _obseleteDir = false;
3572         return true;
3573     }
3574 
3575     bool reload(int depth = 0) { return load(depth, true); }
3576     alias sync = reload;
3577 
3578     /* TODO Can we get make this const to the outside world perhaps using inout? */
3579     ref NotNull!File[string] subs() @property { load(); return _subs; }
3580 
3581     NotNull!File[] subsSorted(DirSorting sorted = DirSorting.onTimeLastModified) @property
3582     {
3583         load();
3584         auto ssubs = _subs.values;
3585         /* TODO Use radix sort to speed things up. */
3586         final switch (sorted)
3587         {
3588             /* case DirSorting.onTimeCreated: */
3589             /*     break; */
3590         case DirSorting.onTimeLastModified:
3591             ssubs.sort!((a, b) => (a.timeLastModified >
3592                                    b.timeLastModified));
3593             break;
3594         case DirSorting.onTimeLastAccessed:
3595             ssubs.sort!((a, b) => (a.timeLastAccessed >
3596                                    b.timeLastAccessed));
3597             break;
3598         case DirSorting.onSize:
3599             ssubs.sort!((a, b) => (a.size >
3600                                    b.size));
3601             break;
3602         case DirSorting.onNothing:
3603             break;
3604         }
3605         return ssubs;
3606     }
3607 
3608     File sub(Name)(Name sub_name)
3609     {
3610         load();
3611         return (sub_name in _subs) ? _subs[sub_name] : null;
3612     }
3613     File sub(File sub)
3614     {
3615         load();
3616         return (sub.path in _subs) != null ? sub : null;
3617     }
3618 
3619     version(cerealed)
3620     {
3621         void accept(Cereal cereal)
3622         {
3623             auto stdTime = timeLastModified.stdTime;
3624             cereal.grain(name, size, stdTime);
3625             timeLastModified = SysTime(stdTime);
3626         }
3627     }
3628     version(msgpack)
3629     {
3630         /** Construct from msgpack `unpacker`.  */
3631         this(Unpacker)(ref Unpacker unpacker)
3632         {
3633             fromMsgpack(msgpack.Unpacker(unpacker));
3634         }
3635 
3636         void toMsgpack(Packer)(ref Packer packer) const
3637         {
3638             /* writeln("Entering Dir.toMsgpack ", this.name); */
3639             packer.pack(name, size,
3640                         timeLastModified.stdTime,
3641                         timeLastAccessed.stdTime,
3642                         kind);
3643 
3644             // Contents
3645             /* TODO serialize map of polymorphic objects using
3646              * packer.packArray(_subs) and type trait lookup up all child-classes of
3647              * File */
3648             packer.pack(_subs.length);
3649 
3650             if (_subs.length >= 1)
3651             {
3652                 auto diffsLastModified = _subs.byValue.map!"a.timeLastModified.stdTime".encodeForwardDifference;
3653                 auto diffsLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime".encodeForwardDifference;
3654                 /* auto timesLastModified = _subs.byValue.map!"a.timeLastModified.stdTime"; */
3655                 /* auto timesLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime"; */
3656 
3657                 packer.pack(diffsLastModified, diffsLastAccessed);
3658 
3659                 /* debug dbg(this.name, " sub.length: ", _subs.length); */
3660                 /* debug dbg(name, " modified diffs: ", diffsLastModified.pack.length); */
3661                 /* debug dbg(name, " accessed diffs: ", diffsLastAccessed.pack.length); */
3662                 /* debug dbg(name, " modified: ", timesLastModified.array.pack.length); */
3663                 /* debug dbg(name, " accessed: ", timesLastAccessed.array.pack.length); */
3664             }
3665 
3666             foreach (sub; _subs)
3667             {
3668                 if        (const regFile = cast(RegFile)sub)
3669                 {
3670                     packer.pack("RegFile");
3671                     regFile.toMsgpack(packer);
3672                 }
3673                 else if (const dir = cast(Dir)sub)
3674                 {
3675                     packer.pack("Dir");
3676                     dir.toMsgpack(packer);
3677                 }
3678                 else if (const symlink = cast(Symlink)sub)
3679                 {
3680                     packer.pack("Symlink");
3681                     symlink.toMsgpack(packer);
3682                 }
3683                 else if (const special = cast(SpecFile)sub)
3684                 {
3685                     packer.pack("SpecFile");
3686                     special.toMsgpack(packer);
3687                 }
3688                 else
3689                 {
3690                     immutable subClassName = sub.classinfo.name;
3691                     assert(0, "Unknown sub File class " ~ subClassName); // TODO Exception
3692                 }
3693             }
3694         }
3695 
3696         void fromMsgpack(Unpacker)(auto ref Unpacker unpacker)
3697         {
3698             unpacker.unpack(name, size);
3699 
3700             long stdTime;
3701             unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize
3702             unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize
3703 
3704             /* dbg("before:", path, " ", size, " ", timeLastModified, " ", timeLastAccessed); */
3705 
3706             // FKind
3707             if (!kind) { kind = null; }
3708             unpacker.unpack(kind); /* TODO kind = new DirKind(unpacker); */
3709             /* dbg("after:", path); */
3710 
3711             _treeSize.reset; // this.size;
3712 
3713             // Contents
3714             /* TODO unpacker.unpack(_subs); */
3715             immutable noPreviousSubs = _subs.length == 0;
3716             size_t subs_length; unpacker.unpack(subs_length); // TODO Functionize to unpacker.unpack!size_t()
3717 
3718             ForwardDifferenceCode!(long[]) diffsLastModified,
3719                 diffsLastAccessed;
3720             if (subs_length >= 1)
3721             {
3722                 unpacker.unpack(diffsLastModified, diffsLastAccessed);
3723                 /* auto x = diffsLastModified.decodeForwardDifference; */
3724             }
3725 
3726             foreach (ix; 0..subs_length) // repeat for subs_length times
3727             {
3728                 string subClassName; unpacker.unpack(subClassName); // TODO Functionize
3729                 File sub = null;
3730                 try
3731                 {
3732                     switch (subClassName)
3733                     {
3734                     default:
3735                         assert(0, "Unknown File parent class " ~ subClassName); // TODO Exception
3736                     case "Dir":
3737                         auto subDir = new Dir(this, gstats);
3738                         unpacker.unpack(subDir); sub = subDir;
3739                         auto subDent = DirEntry(sub.path);
3740                         subDir.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed
3741                         addTreeStatsFromSub(assumeNotNull(subDir), subDent);
3742                         break;
3743                     case "RegFile":
3744                         auto subRegFile = new RegFile(assumeNotNull(this));
3745                         unpacker.unpack(subRegFile); sub = subRegFile;
3746                         auto subDent = DirEntry(sub.path);
3747                         subRegFile.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed
3748                         updateStats(assumeNotNull(subRegFile), subDent, true);
3749                         addTreeStatsFromSub(assumeNotNull(subRegFile), subDent);
3750                         break;
3751                     case "Symlink":
3752                         auto subSymlink = new Symlink(assumeNotNull(this));
3753                         unpacker.unpack(subSymlink); sub = subSymlink;
3754                         break;
3755                     case "SpecFile":
3756                         auto SpecFile = new SpecFile(assumeNotNull(this));
3757                         unpacker.unpack(SpecFile); sub = SpecFile;
3758                         break;
3759                     }
3760                     if (noPreviousSubs ||
3761                         !(sub.name in _subs))
3762                     {
3763                         _subs[sub.name] = enforceNotNull(sub);
3764                     }
3765                     /* dbg("Unpacked Dir sub ", sub.path, " of type ", subClassName); */
3766                 } catch (FileException) { // this may be a too generic exception
3767                     /* dbg(sub.path, " is not accessible anymore"); */
3768                 }
3769             }
3770 
3771         }
3772     }
3773 
3774     override void makeObselete() @trusted
3775     {
3776         _obseleteDir = true;
3777         _treeSize.reset;
3778         _timeModifiedInterval.reset;
3779         _timeAccessedInterval.reset;
3780     }
3781     override void makeUnObselete() @safe
3782     {
3783         _obseleteDir = false;
3784     }
3785 
3786     private NotNull!File[string] _subs; // Directory contents
3787     DirKind kind;               // Kind of this directory
3788     uint64_t hitCount = 0;
3789     private int _depth = -1;            // Memoized Depth
3790     private bool _obseleteDir = true;  // Flags that this is obselete
3791     GStats _gstats = null;
3792 
3793     /* TODO Reuse Span and span in Phobos. (Span!T).init should be (T.max, T.min) */
3794     Interval!SysTime _timeModifiedInterval;
3795     Interval!SysTime _timeAccessedInterval;
3796 
3797     Nullable!(size_t, size_t.max) _treeSize; // Size of tree with this directory as root.
3798     /* TODO Make this work instead: */
3799     /* import std.typecons: Nullable; */
3800     /* Nullable!(Bytes64, Bytes64.max) _treeSize; // Size of tree with this directory as root. */
3801 
3802     SHA1Digest _treeContentId;
3803 }
3804 
3805 /** Externally Directory Memoized Calculation of Tree Size.
3806     Is it possible to make get any of @safe pure nothrow?
3807  */
3808 Bytes64 treeSizeMemoized(NotNull!File file, Bytes64[File] cache) @trusted /* nothrow */
3809 {
3810     typeof(return) sum = file.size;
3811     if (auto dir = cast(Dir)file)
3812     {
3813         if (file in cache)
3814         {
3815             sum = cache[file];
3816         }
3817         else
3818         {
3819             foreach (sub; dir.subs.byValue)
3820             {
3821                 sum += treeSizeMemoized(sub, cache);
3822             }
3823             cache[file] = sum;
3824         }
3825     }
3826     return sum;
3827 }
3828 
3829 /** Save File System Tree Cache under Directory `rootDir`.
3830     Returns: Serialized Byte Array.
3831 */
3832 const(ubyte[]) saveRootDirTree(Viz viz,
3833                                Dir rootDir, string cacheFile) @trusted
3834 {
3835     immutable tic = Clock.currTime;
3836     version(msgpack)
3837     {
3838         const data = rootDir.pack();
3839         import std.file: write;
3840     }
3841     else version(cerealed)
3842          {
3843              auto enc = new Cerealiser(); // encoder
3844              enc ~= rootDir;
3845              auto data = enc.bytes;
3846          }
3847     else
3848     {
3849         ubyte[] data;
3850     }
3851     cacheFile.write(data);
3852     immutable toc = Clock.currTime;
3853 
3854     viz.ppln("Cache Write".asH!2,
3855              "Wrote tree cache of size ",
3856              data.length.Bytes64, " to ",
3857              cacheFile.asPath,
3858              " in ",
3859              shortDurationString(toc - tic));
3860 
3861     return data;
3862 }
3863 
3864 /** Load File System Tree Cache from `cacheFile`.
3865     Returns: Root Directory of Loaded Tree.
3866 */
3867 Dir loadRootDirTree(Viz viz,
3868                     string cacheFile, GStats gstats) @trusted
3869 {
3870     immutable tic = Clock.currTime;
3871 
3872     import std.file: read;
3873     try
3874     {
3875         const data = read(cacheFile);
3876 
3877         auto rootDir = new Dir(cast(Dir)null, gstats);
3878         version(msgpack)
3879         {
3880             unpack(cast(ubyte[])data, rootDir); /* Dir rootDir = new Dir(cast(const(ubyte)[])data); */
3881         }
3882         immutable toc = Clock.currTime;
3883 
3884         viz.pp("Cache Read".asH!2,
3885                "Read cache of size ",
3886                data.length.Bytes64, " from ",
3887                cacheFile.asPath,
3888                " in ",
3889                shortDurationString(toc - tic), " containing",
3890                asUList(asItem(gstats.noDirs, " Dirs,"),
3891                        asItem(gstats.noRegFiles, " Regular Files,"),
3892                        asItem(gstats.noSymlinks, " Symbolic Links,"),
3893                        asItem(gstats.noSpecialFiles, " Special Files,"),
3894                        asItem("totalling ", gstats.noFiles + 1, " Files")));
3895         assert(gstats.noDirs +
3896                gstats.noRegFiles +
3897                gstats.noSymlinks +
3898                gstats.noSpecialFiles == gstats.noFiles + 1);
3899         return rootDir;
3900     }
3901     catch (FileException)
3902     {
3903         viz.ppln("Failed to read cache from ", cacheFile);
3904         return null;
3905     }
3906 }
3907 
3908 Dir[] getDirs(NotNull!Dir rootDir, string[] topDirNames)
3909 {
3910     Dir[] topDirs;
3911     foreach (topName; topDirNames)
3912     {
3913         Dir topDir = getDir(rootDir, topName);
3914 
3915         if (!topDir)
3916         {
3917             dbg("Directory " ~ topName ~ " is missing");
3918         }
3919         else
3920         {
3921             topDirs ~= topDir;
3922         }
3923     }
3924     return topDirs;
3925 }
3926 
3927 /** (Cached) Lookup of File `filePath`.
3928  */
3929 File getFile(NotNull!Dir rootDir, string filePath,
3930              bool isDir = false,
3931              bool tolerant = false) @trusted
3932 {
3933     if (isDir)
3934     {
3935         return getDir(rootDir, filePath);
3936     }
3937     else
3938     {
3939         auto parentDir = getDir(rootDir, filePath.dirName);
3940         if (parentDir)
3941         {
3942             auto hit = parentDir.sub(filePath.baseName);
3943             if (hit)
3944                 return hit;
3945             else
3946             {
3947                 dbg("File path " ~ filePath ~ " doesn't exist. TODO Query user to instead find it under "
3948                     ~ parentDir.path);
3949                 parentDir.find(filePath.baseName);
3950             }
3951         }
3952         else
3953         {
3954             dbg("Directory " ~ parentDir.path ~ " doesn't exist");
3955         }
3956     }
3957     return null;
3958 }
3959 
3960 /** (Cached) Lookup of Directory `dirpath`.
3961     Returns: Dir if present under rootDir, null otherwise.
3962     TODO Make use of dent
3963 */
3964 import std.path: isRooted;
3965 Dir getDir(NotNull!Dir rootDir, string dirPath, ref DirEntry dent,
3966            ref Symlink[] followedSymlinks) @trusted
3967     in { assert(dirPath.isRooted); }
3968 do
3969 {
3970     Dir currDir = rootDir;
3971 
3972     import std.range: drop;
3973     import std.path: pathSplitter;
3974     foreach (part; dirPath.pathSplitter().drop(1)) // all but first
3975     {
3976         auto sub = currDir.sub(part);
3977         if        (auto subDir = cast(Dir)sub)
3978         {
3979             currDir = subDir;
3980         }
3981         else if (auto subSymlink = cast(Symlink)sub)
3982         {
3983             auto subDent = DirEntry(subSymlink.absoluteNormalizedTargetPath);
3984             if (subDent.isDir)
3985             {
3986                 if (followedSymlinks.find(subSymlink))
3987                 {
3988                     dbg("Infinite recursion in ", subSymlink);
3989                     return null;
3990                 }
3991                 followedSymlinks ~= subSymlink;
3992                 currDir = getDir(rootDir, subSymlink.absoluteNormalizedTargetPath, subDent, followedSymlinks); // TODO Check for infinite recursion
3993             }
3994             else
3995             {
3996                 dbg("Loaded path " ~ dirPath ~ " is not a directory");
3997                 return null;
3998             }
3999         }
4000         else
4001         {
4002             return null;
4003         }
4004     }
4005     return currDir;
4006 }
4007 
4008 /** (Cached) Lookup of Directory `dirPath`. */
4009 Dir getDir(NotNull!Dir rootDir, string dirPath) @trusted
4010 {
4011     Symlink[] followedSymlinks;
4012     try
4013     {
4014         auto dirDent = DirEntry(dirPath);
4015         return getDir(rootDir, dirPath, dirDent, followedSymlinks);
4016     }
4017     catch (FileException)
4018     {
4019         dbg("Exception getting Dir");
4020         return null;
4021     }
4022 }
4023 unittest {
4024     /* auto tmp = tempfile("/tmp/fsfile"); */
4025 }
4026 
4027 enum ulong mmfile_size = 0; // 100*1024
4028 
4029 auto pageSize() @trusted
4030 {
4031     version(linux)
4032     {
4033         import core.sys.posix.sys.shm: __getpagesize;
4034         return __getpagesize();
4035     }
4036     else
4037     {
4038         return 4096;
4039     }
4040 }
4041 
4042 enum KeyStrictness
4043 {
4044     exact,
4045     acronym,
4046     eitherExactOrAcronym,
4047     standard = eitherExactOrAcronym,
4048 }
4049 
4050 /** Language Operator Associativity. */
4051 enum OpAssoc { none,
4052                LR, // Left-to-Right
4053                RL, // Right-to-Left
4054 }
4055 
4056 /** Language Operator Arity. */
4057 enum OpArity
4058 {
4059     unknown,
4060     unaryPostfix, // 1-arguments
4061     unaryPrefix, // 1-arguments
4062     binary, // 2-arguments
4063     ternary, // 3-arguments
4064 }
4065 
4066 /** Language Operator. */
4067 struct Op
4068 {
4069     this(string op,
4070          OpArity arity = OpArity.unknown,
4071          OpAssoc assoc = OpAssoc.none,
4072          byte prec = -1,
4073          string desc = [])
4074     {
4075         this.op = op;
4076         this.arity = arity;
4077         this.assoc = assoc;
4078         this.prec = prec;
4079         this.desc = desc;
4080     }
4081     /** Make `this` an alias of `opOrig`. */
4082     Op aliasOf(string opOrig)
4083     {
4084         // TODO set relation in map from op to opOrig
4085         return this;
4086     }
4087     string op; // Operator. TODO Optimize this storage using a value type?
4088     string desc; // Description
4089     OpAssoc assoc; // Associativity
4090     ubyte prec; // Precedence
4091     OpArity arity; // Arity
4092     bool overloadable; // Overloadable
4093 }
4094 
4095 /** Language Operator Alias. */
4096 struct OpAlias
4097 {
4098     this(string op, string opOrigin)
4099     {
4100         this.op = op;
4101         this.opOrigin = opOrigin;
4102     }
4103     string op;
4104     string opOrigin;
4105 }
4106 
4107 FKind tryLookupKindIn(RegFile regFile,
4108                       FKind[SHA1Digest] kindsById)
4109 {
4110     immutable id = regFile._cstat.kindId;
4111     if (id in kindsById)
4112     {
4113         return kindsById[id];
4114     }
4115     else
4116     {
4117         return null;
4118     }
4119 }
4120 
4121 string displayedFileName(AnyFile)(GStats gstats,
4122                                   AnyFile theFile) @safe pure
4123 {
4124     return ((gstats.pathFormat == PathFormat.relative &&
4125              gstats.topDirs.length == 1) ?
4126             "./" ~ theFile.name :
4127             theFile.path);
4128 }
4129 
4130 /** File System Scanner. */
4131 class Scanner(Term)
4132 {
4133     this(string[] args, ref Term term)
4134     {
4135         prepare(args, term);
4136     }
4137 
4138     SysTime _currTime;
4139     import std.getopt;
4140     import std..string: toLower, toUpper, startsWith, CaseSensitive;
4141     import std.mmfile;
4142     import std.stdio: writeln, stdout, stderr, stdin, popen;
4143     import std.algorithm: find, count, countUntil, min, splitter;
4144     import std.range: join;
4145     import std.conv: to;
4146 
4147     import core.sys.posix.sys.mman;
4148     import core.sys.posix.pwd: passwd, getpwuid_r;
4149     version(linux)
4150     {
4151         // import core.sys.linux.sys.inotify;
4152         import core.sys.linux.sys.xattr;
4153     }
4154     import core.sys.posix.unistd: getuid, getgid;
4155     import std.file: read, FileException, exists, getcwd;
4156     import std.range: retro;
4157     import std.exception: ErrnoException;
4158     import core.sys.posix.sys.stat: stat_t, S_IRUSR, S_IRGRP, S_IROTH;
4159 
4160     uint64_t _hitsCountTotal = 0;
4161 
4162     Symlink[] _brokenSymlinks;
4163 
4164     bool _beVerbose = false;
4165     bool _caseFold = false;
4166     bool _showSkipped = false;
4167     bool listTxtFKinds = false;
4168     bool listBinFKinds = false;
4169     string selFKindNames;
4170     string[] _topDirNames;
4171     string[] addTags;
4172     string[] removeTags;
4173 
4174     private
4175     {
4176         GStats gstats = new GStats();
4177 
4178         string _cacheFile = "~/.cache/fs-root.msgpack";
4179 
4180         uid_t _uid;
4181         gid_t _gid;
4182     }
4183 
4184     ioFile outFile;
4185 
4186     string[] keys; // Keys to scan.
4187     typeof(keys.map!bistogramOverRepresentation) keysBists;
4188     typeof(keys.map!(sparseUIntNGramOverRepresentation!NGramOrder)) keysXGrams;
4189     Bist keysBistsUnion;
4190     XGram keysXGramsUnion;
4191 
4192     string selFKindsNote;
4193 
4194     void prepare(string[] args, ref Term term)
4195     {
4196         _scanChunkSize = 32*pageSize;
4197         gstats.loadFileKinds;
4198         gstats.loadDirKinds;
4199 
4200         bool helpPrinted = getoptEx("FS --- File System Scanning Utility in D.\n" ~
4201                                     "Usage: fs { --switches } [KEY]...\n" ~
4202                                     "Note that scanning for multiple KEYs is possible.\nIf so hits are highlighted in different colors!\n" ~
4203                                     "Sample calls: \n" ~
4204                                     "  fdo.d --color -d /lib/modules/3.13.0-24-generic/kernel/drivers/staging --browse --duplicates --recache lirc\n" ~
4205                                     "  fdo.d --color -d /etc -s --tree --usage -l --duplicates stallman\n"
4206                                     "  fdo.d --color -d /etc -d /var --acronym sttccc\n"
4207                                     "  fdo.d --color -d /etc -d /var --acronym dktp\n"
4208                                     "  fdo.d --color -d /etc -d /var --acronym tms sttc prc dtp xsr\n" ~
4209                                     "  fdo.d --color -d /etc min max delta\n" ~
4210                                     "  fdo.d --color -d /etc if elif return len --duplicates --sort=onSize\n" ~
4211                                     "  fdo.d --color -k -d /bin alpha\n" ~
4212                                     "  fdo.d --color -d /lib -k linus" ~
4213                                     "  fdo.d --color -d /etc --symbol alpha beta gamma delta" ~
4214                                     "  fdo.d --color -d /var/spool/postfix/dev " ~
4215                                     "  fdo.d --color -d /etc alpha" ~
4216                                     "  fdo.d --color -d ~/Work/dmd  --browse xyz --duplicates --do=preprocess",
4217 
4218                                     args,
4219                                     std.getopt.config.caseInsensitive,
4220 
4221                                     "verbose|v", "\tVerbose",  &_beVerbose,
4222 
4223                                     "color|C", "\tColorize Output" ~ defaultDoc(gstats.colorFlag),  &gstats.colorFlag,
4224                                     "types|T", "\tComma separated list (CSV) of file types/kinds to scan" ~ defaultDoc(selFKindNames), &selFKindNames,
4225                                     "list-textual-kinds", "\tList registered textual types/kinds" ~ defaultDoc(listTxtFKinds), &listTxtFKinds,
4226                                     "list-binary-kinds", "\tList registered binary types/kinds" ~ defaultDoc(listBinFKinds), &listBinFKinds,
4227                                     "group-types|G", "\tCollect and group file types found" ~ defaultDoc(gstats.collectTypeHits), &gstats.collectTypeHits,
4228 
4229                                     "i", "\tCase-Fold, Case-Insensitive" ~ defaultDoc(_caseFold), &_caseFold,
4230                                     "k", "\tShow Skipped Directories and Files" ~ defaultDoc(_showSkipped), &_showSkipped,
4231                                     "d", "\tRoot Directory(s) of tree(s) to scan, defaulted to current directory" ~ defaultDoc(_topDirNames), &_topDirNames,
4232                                     "depth", "\tDepth of tree to scan, defaulted to unlimited (-1) depth" ~ defaultDoc(gstats.scanDepth), &gstats.scanDepth,
4233 
4234                                     // Contexts
4235                                     "context|x", "\tComma Separated List of Contexts. Either: " ~ enumDoc!ScanContext, &gstats.scanContext,
4236 
4237                                     "word|w", "\tSearch for key as a complete Word (A Letter followed by more Letters and Digits)." ~ defaultDoc(gstats.keyAsWord), &gstats.keyAsWord,
4238                                     "symbol|ident|id|s", "\tSearch for key as a complete Symbol (Identifier)" ~ defaultDoc(gstats.keyAsSymbol), &gstats.keyAsSymbol,
4239                                     "acronym|a", "\tSearch for key as an acronym (relaxed)" ~ defaultDoc(gstats.keyAsAcronym), &gstats.keyAsAcronym,
4240                                     "exact", "\tSearch for key only with exact match (strict)" ~ defaultDoc(gstats.keyAsExact), &gstats.keyAsExact,
4241 
4242                                     "name-duplicates|snd", "\tDetect & Show file name duplicates" ~ defaultDoc(gstats.showNameDups), &gstats.showNameDups,
4243                                     "hardlink-duplicates|inode-duplicates|shd", "\tDetect & Show multiple links to same inode" ~ defaultDoc(gstats.showLinkDups), &gstats.showLinkDups,
4244                                     "file-content-duplicates|scd", "\tDetect & Show file contents duplicates" ~ defaultDoc(gstats.showFileContentDups), &gstats.showFileContentDups,
4245                                     "tree-content-duplicates", "\tDetect & Show directory tree contents duplicates" ~ defaultDoc(gstats.showTreeContentDups), &gstats.showTreeContentDups,
4246 
4247                                     "elf-symbol-duplicates", "\tDetect & Show ELF Symbol Duplicates" ~ defaultDoc(gstats.showELFSymbolDups), &gstats.showELFSymbolDups,
4248 
4249                                     "duplicates|D", "\tDetect & Show file name and contents duplicates" ~ defaultDoc(gstats.showAnyDups), &gstats.showAnyDups,
4250                                     "duplicates-context", "\tDuplicates Detection Context. Either: " ~ enumDoc!DuplicatesContext, &gstats.duplicatesContext,
4251                                     "hardlink-content-duplicates", "\tConvert all content duplicates into hardlinks (common inode) if they reside on the same file system" ~ defaultDoc(gstats.linkContentDups), &gstats.linkContentDups,
4252 
4253                                     "usage", "\tShow disk usage (tree size) of scanned directories" ~ defaultDoc(gstats.showUsage), &gstats.showUsage,
4254                                     "count-lines", "\tShow line counts of scanned files" ~ defaultDoc(gstats.showLineCounts), &gstats.showLineCounts,
4255 
4256                                     "sha1", "\tShow SHA1 content digests" ~ defaultDoc(gstats.showSHA1), &gstats.showSHA1,
4257 
4258                                     "mmaps", "\tShow when files are memory mapped (mmaped)" ~ defaultDoc(gstats.showMMaps), &gstats.showMMaps,
4259 
4260                                     "follow-symlinks|f", "\tFollow symbolic links" ~ defaultDoc(gstats.followSymlinks), &gstats.followSymlinks,
4261                                     "broken-symlinks|l", "\tDetect & Show broken symbolic links (target is non-existing file) " ~ defaultDoc(gstats.showBrokenSymlinks), &gstats.showBrokenSymlinks,
4262                                     "show-symlink-cycles|l", "\tDetect & Show symbolic links cycles" ~ defaultDoc(gstats.showSymlinkCycles), &gstats.showSymlinkCycles,
4263 
4264                                     "add-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(addTags), &addTags,
4265                                     "remove-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(removeTags), &removeTags,
4266 
4267                                     "tree|W", "\tShow Scanned Tree and Followed Symbolic Links" ~ defaultDoc(gstats.showTree), &gstats.showTree,
4268                                     "sort|S", "\tDirectory contents sorting order. Either: " ~ enumDoc!DirSorting, &gstats.subsSorting,
4269                                     "build", "\tBuild Source Code. Either: " ~ enumDoc!BuildType, &gstats.buildType,
4270 
4271                                     "path-format", "\tFormat of paths. Either: " ~ enumDoc!PathFormat ~ "." ~ defaultDoc(gstats.pathFormat), &gstats.pathFormat,
4272 
4273                                     "cache-file|F", "\tFile System Tree Cache File" ~ defaultDoc(_cacheFile), &_cacheFile,
4274                                     "recache", "\tSkip initial load of cache from disk" ~ defaultDoc(gstats.recache), &gstats.recache,
4275 
4276                                     "do", "\tOperation to perform on matching files. Either: " ~ enumDoc!FOp, &gstats.fOp,
4277 
4278                                     "demangle-elf", "\tDemangle ELF files.", &gstats.demangleELF,
4279 
4280                                     "use-ngrams", "\tUse NGrams to cache statistics and thereby speed up search" ~ defaultDoc(gstats.useNGrams), &gstats.useNGrams,
4281 
4282                                     "html|H", "\tFormat output as HTML" ~ defaultDoc(gstats.useHTML), &gstats.useHTML,
4283                                     "browse|B", ("\tFormat output as HTML to a temporary file" ~
4284                                                  defaultDoc(_cacheFile) ~
4285                                                  " and open it with default Web browser" ~
4286                                                  defaultDoc(gstats.browseOutput)), &gstats.browseOutput,
4287 
4288                                     "author", "\tPrint name of\n"~"\tthe author",
4289                                     delegate() { writeln("Per Nordlöw"); }
4290             );
4291 
4292         if (gstats.showAnyDups)
4293         {
4294             gstats.showNameDups = true;
4295             gstats.showLinkDups = true;
4296             gstats.showFileContentDups = true;
4297             gstats.showTreeContentDups = true;
4298             gstats.showELFSymbolDups = true;
4299         }
4300         if (helpPrinted)
4301             return;
4302 
4303         _cacheFile = std.path.expandTilde(_cacheFile);
4304 
4305         if (_topDirNames.empty)
4306         {
4307             _topDirNames = ["."];
4308         }
4309         if (_topDirNames == ["."])
4310         {
4311             gstats.pathFormat = PathFormat.relative;
4312         }
4313         else
4314         {
4315             gstats.pathFormat = PathFormat.absolute;
4316         }
4317         foreach (ref topName; _topDirNames)
4318         {
4319             if (topName ==  ".")
4320             {
4321                 topName = topName.absolutePath.buildNormalizedPath;
4322             }
4323             else
4324             {
4325                 topName = topName.expandTilde.buildNormalizedPath;
4326             }
4327         }
4328 
4329         // Output Handling
4330         if (gstats.browseOutput)
4331         {
4332             gstats.useHTML = true;
4333             immutable ext = gstats.useHTML ? "html" : "results.txt";
4334             import std.uuid: randomUUID;
4335             outFile = ioFile("/tmp/fs-" ~ randomUUID.toString() ~
4336                              "." ~ ext,
4337                              "w");
4338             /* popen("gnome-open " ~ outFile.name); */
4339             popen("firefox -new-tab " ~ outFile.name);
4340         }
4341         else
4342         {
4343             outFile = stdout;
4344         }
4345 
4346         auto cwd = getcwd();
4347 
4348         foreach (arg; args[1..$])
4349         {
4350             if (!arg.startsWith("-")) // if argument not a flag
4351             {
4352                 keys ~= arg;
4353             }
4354         }
4355 
4356         // Calc stats
4357         keysBists = keys.map!bistogramOverRepresentation;
4358         keysXGrams = keys.map!(sparseUIntNGramOverRepresentation!NGramOrder);
4359         keysBistsUnion = reduce!"a | b"(typeof(keysBists.front).init, keysBists);
4360         keysXGramsUnion = reduce!"a + b"(typeof(keysXGrams.front).init, keysXGrams);
4361 
4362         auto viz = new Viz(outFile,
4363                            &term,
4364                            gstats.showTree,
4365                            gstats.useHTML ? VizForm.HTML : VizForm.textAsciiDocUTF8,
4366                            gstats.colorFlag,
4367                            !gstats.useHTML, // only use if HTML
4368                            true, // TODO Only set if in debug mode
4369             );
4370 
4371         if (gstats.useNGrams &&
4372             (!keys.empty) &&
4373             keysXGramsUnion.empty)
4374         {
4375             gstats.useNGrams = false;
4376             viz.ppln("Keys must be at least of length " ~
4377                      to!string(NGramOrder + 1) ~
4378                      " in order for " ~
4379                      keysXGrams[0].typeName ~
4380                      " to be calculated");
4381         }
4382 
4383         // viz.ppln("<meta http-equiv=\"refresh\" content=\"1\"/>"); // refresh every second
4384 
4385         if (selFKindNames)
4386         {
4387             foreach (lang; selFKindNames.splitterASCIIAmong!(","))
4388             {
4389                 if      (lang         in gstats.allFKinds.byName) // try exact match
4390                 {
4391                     gstats.selFKinds ~= gstats.allFKinds.byName[lang];
4392                 }
4393                 else if (lang.toLower in gstats.allFKinds.byName) // else try all in lower case
4394                 {
4395                     gstats.selFKinds ~= gstats.allFKinds.byName[lang.toLower];
4396                 }
4397                 else if (lang.toUpper in gstats.allFKinds.byName) // else try all in upper case
4398                 {
4399                     gstats.selFKinds ~= gstats.allFKinds.byName[lang.toUpper];
4400                 }
4401                 else
4402                 {
4403                     writeln("warning: Language ", lang, " not registered");
4404                 }
4405             }
4406             if (gstats.selFKinds.byIndex.empty)
4407             {
4408                 writeln("warning: None of the languages ", to!string(selFKindNames), " are registered. Defaulting to all file types.");
4409                 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds
4410             }
4411             else
4412             {
4413                 gstats.selFKinds.rehash;
4414             }
4415         }
4416         else
4417         {
4418             gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds
4419         }
4420 
4421         // Keys
4422         auto commaedKeys = keys.joiner(",");
4423         const keysPluralExt = keys.length >= 2 ? "s" : "";
4424         string commaedKeysString = to!string(commaedKeys);
4425         if (keys)
4426         {
4427             selFKindsNote = " in " ~ (gstats.selFKinds == gstats.allFKinds ?
4428                                       "all " :
4429                                       gstats.selFKinds.byIndex.map!(a => a.kindName).join(",") ~ "-") ~ "files";
4430             immutable underNote = " under \"" ~ (_topDirNames.reduce!"a ~ ',' ~ b") ~ "\"";
4431             const exactNote = gstats.keyAsExact ? "exact " : "";
4432             string asNote;
4433             if (gstats.keyAsAcronym)
4434             {
4435                 asNote = (" as " ~ exactNote ~
4436                           (gstats.keyAsWord ? "word" : "symbol") ~
4437                           " acronym" ~ keysPluralExt);
4438             }
4439             else if (gstats.keyAsSymbol)
4440             {
4441                 asNote = " as " ~ exactNote ~ "symbol" ~ keysPluralExt;
4442             }
4443             else if (gstats.keyAsWord)
4444             {
4445                 asNote = " as " ~ exactNote ~ "word" ~ keysPluralExt;
4446             }
4447             else
4448             {
4449                 asNote = "";
4450             }
4451 
4452             const title = ("Searching for \"" ~ commaedKeysString ~ "\"" ~
4453                            " case-" ~ (_caseFold ? "in" : "") ~"sensitively"
4454                            ~asNote ~selFKindsNote ~underNote);
4455             if (viz.form == VizForm.HTML) // only needed for HTML output
4456             {
4457                 viz.ppln(faze(title, titleFace));
4458             }
4459 
4460             viz.pp(asH!1("Searching for \"", commaedKeysString, "\"",
4461                          " case-", (_caseFold ? "in" : ""), "sensitively",
4462                          asNote, selFKindsNote,
4463                          " under ", _topDirNames.map!(a => a.asPath)));
4464         }
4465 
4466         if (listTxtFKinds)
4467         {
4468             viz.pp("Textual (Source) Kinds".asH!2,
4469                    gstats.txtFKinds.byIndex.asTable);
4470         }
4471 
4472         if (listBinFKinds)
4473         {
4474             viz.pp("Binary Kinds".asH!2,
4475                    gstats.binFKinds.byIndex.asTable);
4476         }
4477 
4478         /* binFKinds.asTable, */
4479 
4480         if (_showSkipped)
4481         {
4482             viz.pp("Skipping files of type".asH!2,
4483                    asUList(gstats.binFKinds.byIndex.map!(a => asItem(a.kindName.asBold,
4484                                                                      ": ",
4485                                                                      asCSL(a.exts.map!(b => b.asCode))))));
4486             viz.pp("Skipping directories of type".asH!2,
4487                    asUList(gstats.skippedDirKinds.map!(a => asItem(a.kindName.asBold,
4488                                                                    ": ",
4489                                                                    a.fileName.asCode))));
4490         }
4491 
4492         // if (key && key == key.toLower()) { // if search key is all lowercase
4493         //     _caseFold = true;               // we do case-insensitive search like in Emacs
4494         // }
4495 
4496         _uid = getuid;
4497         _gid = getgid;
4498 
4499         // Setup root directory
4500         if (!gstats.recache)
4501         {
4502             GC.disable;
4503             gstats.rootDir = loadRootDirTree(viz, _cacheFile, gstats);
4504             GC.enable;
4505         }
4506         if (!gstats.rootDir) // if first time
4507         {
4508             gstats.rootDir = new Dir("/", gstats); // filesystem root directory. TODO Make this uncopyable?
4509         }
4510 
4511         // Scan for exact key match
4512         gstats.topDirs = getDirs(enforceNotNull(gstats.rootDir), _topDirNames);
4513 
4514         _currTime = Clock.currTime;
4515 
4516         GC.disable;
4517         scanTopDirs(viz, commaedKeysString);
4518         GC.enable;
4519 
4520         GC.disable;
4521         saveRootDirTree(viz, gstats.rootDir, _cacheFile);
4522         GC.enable;
4523 
4524         // Print statistics
4525         showStats(viz);
4526     }
4527 
4528     void scanTopDirs(Viz viz,
4529                      string commaedKeysString)
4530     {
4531         viz.pp("Results".asH!2);
4532         if (gstats.topDirs)
4533         {
4534             foreach (topIndex, topDir; gstats.topDirs)
4535             {
4536                 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys);
4537                 if (ctrlC)
4538                 {
4539                     auto restDirs = gstats.topDirs[topIndex + 1..$];
4540                     if (!restDirs.empty)
4541                     {
4542                         debug dbg("Ctrl-C pressed: Skipping search of " ~ to!string(restDirs));
4543                         break;
4544                     }
4545                 }
4546             }
4547 
4548             viz.pp("Summary".asH!2);
4549 
4550             if ((gstats.noScannedFiles - gstats.noScannedDirs) == 0)
4551             {
4552                 viz.ppln("No files with any content found");
4553             }
4554             else
4555             {
4556                 // Scan for acronym key match
4557                 if (keys && _hitsCountTotal == 0)  // if keys given but no hit found
4558                 {
4559                     auto keysString = (keys.length >= 2 ? "s" : "") ~ " \"" ~ commaedKeysString;
4560                     if (gstats.keyAsAcronym)
4561                     {
4562                         viz.ppln(("No acronym matches for key" ~ keysString ~ `"` ~
4563                                   (gstats.keyAsSymbol ? " as symbol" : "") ~
4564                                   " found in files of type"));
4565                     }
4566                     else if (!gstats.keyAsExact)
4567                     {
4568                         viz.ppln(("No exact matches for key" ~ keysString ~ `"` ~
4569                                   (gstats.keyAsSymbol ? " as symbol" : "") ~
4570                                   " found" ~ selFKindsNote ~
4571                                   ". Relaxing scan to" ~ (gstats.keyAsSymbol ? " symbol" : "") ~ " acronym match."));
4572                         gstats.keyAsAcronym = true;
4573 
4574                         foreach (topDir; gstats.topDirs)
4575                         {
4576                             scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys);
4577                         }
4578                     }
4579                 }
4580             }
4581         }
4582 
4583         assert(gstats.noScannedDirs +
4584                gstats.noScannedRegFiles +
4585                gstats.noScannedSymlinks +
4586                gstats.noScannedSpecialFiles == gstats.noScannedFiles);
4587     }
4588 
4589     version(linux)
4590     {
4591         @trusted bool readable(in stat_t stat, uid_t uid, gid_t gid, ref string msg)
4592         {
4593             immutable mode = stat.st_mode;
4594             immutable ok = ((stat.st_uid == uid) && (mode & S_IRUSR) ||
4595                             (stat.st_gid == gid) && (mode & S_IRGRP) ||
4596                             (mode & S_IROTH));
4597             if (!ok)
4598             {
4599                 msg = " is not readable by you, but only by";
4600                 bool can = false; // someone can access
4601                 if (mode & S_IRUSR)
4602                 {
4603                     can = true;
4604                     msg ~= " user id " ~ to!string(stat.st_uid);
4605 
4606                     // Lookup user name from user id
4607                     passwd pw;
4608                     passwd* pw_ret;
4609                     immutable size_t bufsize = 16384;
4610                     char* buf = cast(char*)core.stdc.stdlib.malloc(bufsize);
4611                     getpwuid_r(stat.st_uid, &pw, buf, bufsize, &pw_ret);
4612                     if (pw_ret != null)
4613                     {
4614                         string userName;
4615                         {
4616                             size_t n = 0;
4617                             while (pw.pw_name[n] != 0)
4618                             {
4619                                 userName ~= pw.pw_name[n];
4620                                 n++;
4621                             }
4622                         }
4623                         msg ~= " (" ~ userName ~ ")";
4624 
4625                         // string realName;
4626                         // {
4627                         //     size_t n = 0;
4628                         //     while (pw.pw_gecos[n] != 0)
4629                         //     {
4630                         //         realName ~= pw.pw_gecos[n];
4631                         //         n++;
4632                         //     }
4633                         // }
4634                     }
4635                     core.stdc.stdlib.free(buf);
4636 
4637                 }
4638                 if (mode & S_IRGRP)
4639                 {
4640                     can = true;
4641                     if (msg != "")
4642                     {
4643                         msg ~= " or";
4644                     }
4645                     msg ~= " group id " ~ to!string(stat.st_gid);
4646                 }
4647                 if (!can)
4648                 {
4649                     msg ~= " root";
4650                 }
4651             }
4652             return ok;
4653         }
4654     }
4655 
4656     Results results;
4657 
4658     void handleError(F)(Viz viz,
4659                         NotNull!F file, bool isDir, size_t subIndex)
4660     {
4661         auto dent = DirEntry(file.path);
4662         immutable stat_t stat = dent.statBuf;
4663         string msg;
4664         if (!readable(stat, _uid, _gid, msg))
4665         {
4666             results.noBytesUnreadable += dent.size;
4667             if (_showSkipped)
4668             {
4669                 if (gstats.showTree)
4670                 {
4671                     auto parentDir = file.parent;
4672                     immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├";
4673                     viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ ");
4674                 }
4675                 viz.ppln(file,
4676                          ":  ", isDir ? "Directory" : "File",
4677                          faze(msg, warnFace));
4678             }
4679         }
4680     }
4681 
4682     void printSkipped(Viz viz,
4683                       NotNull!RegFile regFile,
4684                       size_t subIndex,
4685                       const NotNull!FKind kind, KindHit kindhit,
4686                       const string skipCause)
4687     {
4688         auto parentDir = regFile.parent;
4689         if (_showSkipped)
4690         {
4691             if (gstats.showTree)
4692             {
4693                 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├";
4694                 viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ ");
4695             }
4696             viz.pp(horizontalRuler,
4697                    asH!3(regFile,
4698                          ": Skipped ", kind, " file",
4699                          skipCause));
4700         }
4701     }
4702 
4703     size_t _scanChunkSize;
4704 
4705     KindHit isSelectedFKind(NotNull!RegFile regFile) @safe /* nothrow */
4706     {
4707         typeof(return) kindHit = KindHit.none;
4708         FKind hitKind;
4709 
4710         // Try cached kind first
4711         // First Try with kindId as try
4712         if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate
4713         {
4714             if (regFile._cstat.kindId in gstats.selFKinds.byId)
4715             {
4716                 hitKind = gstats.selFKinds.byId[regFile._cstat.kindId];
4717                 kindHit = KindHit.cached;
4718                 return kindHit;
4719             }
4720         }
4721 
4722         immutable ext = regFile.realExtension;
4723 
4724         // Try with hash table first
4725         if (!ext.empty && // if file has extension and
4726             ext in gstats.selFKinds.byExt) // and extensions may match specified included files
4727         {
4728             auto possibleKinds = gstats.selFKinds.byExt[ext];
4729             foreach (kind; possibleKinds)
4730             {
4731                 auto nnKind = enforceNotNull(kind);
4732                 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds);
4733                 if (hit)
4734                 {
4735                     hitKind = nnKind;
4736                     kindHit = hit;
4737                     break;
4738                 }
4739             }
4740         }
4741 
4742         if (!hitKind) // if no hit yet
4743         {
4744             // blindly try the rest
4745             foreach (kind; gstats.selFKinds.byIndex)
4746             {
4747                 auto nnKind = enforceNotNull(kind);
4748                 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds);
4749                 if (hit)
4750                 {
4751                     hitKind = nnKind;
4752                     kindHit = hit;
4753                     break;
4754                 }
4755             }
4756         }
4757 
4758         return kindHit;
4759     }
4760 
4761     /** Search for Keys `keys` in Source `src`.
4762      */
4763     size_t scanForKeys(Source, Keys)(Viz viz,
4764                                      NotNull!Dir topDir,
4765                                      NotNull!File theFile,
4766                                      NotNull!Dir parentDir,
4767                                      ref Symlink[] fromSymlinks,
4768                                      in Source src,
4769                                      in Keys keys,
4770                                      in bool[] bistHits = [],
4771                                      ScanContext ctx = ScanContext.standard)
4772     {
4773         bool anyFileHit = false; // will become true if any hit in this file
4774 
4775         typeof(return) hitCount = 0;
4776 
4777         import std.ascii: newline;
4778 
4779         auto thisFace = stdFace;
4780         if (gstats.colorFlag)
4781         {
4782             if (ScanContext.fileName)
4783             {
4784                 thisFace = fileFace;
4785             }
4786         }
4787 
4788         size_t nL = 0; // line counter
4789         foreach (line; src.splitterASCIIAmong!(newline))
4790         {
4791             auto rest = cast(string)line; // rest of line as a string
4792 
4793             bool anyLineHit = false; // will become true if any hit on current line
4794             // Hit search loop
4795             while (!rest.empty)
4796             {
4797                 // Find any key
4798 
4799                 /* TODO Convert these to a range. */
4800                 ptrdiff_t offKB = -1;
4801                 ptrdiff_t offKE = -1;
4802 
4803                 foreach (uint ix, key; keys) // TODO Call variadic-find instead to speed things up.
4804                 {
4805                     /* Bistogram Discardal */
4806                     if ((!bistHits.empty) &&
4807                         !bistHits[ix]) // if neither exact nor acronym match possible
4808                     {
4809                         continue; // try next key
4810                     }
4811 
4812                     /* dbg("key:", key, " line:", line); */
4813                     ptrdiff_t[] acronymOffsets;
4814                     if (gstats.keyAsAcronym) // acronym search
4815                     {
4816                         auto hit = (cast(immutable ubyte[])rest).findAcronymAt(key,
4817                                                                                gstats.keyAsSymbol ? FindContext.inSymbol : FindContext.inWord);
4818                         if (!hit[0].empty)
4819                         {
4820                             acronymOffsets = hit[1];
4821                             offKB = hit[1][0];
4822                             offKE = hit[1][$-1] + 1;
4823                         }
4824                     }
4825                     else
4826                     { // normal search
4827                         import std..string: indexOf;
4828                         offKB = rest.indexOf(key,
4829                                              _caseFold ? CaseSensitive.no : CaseSensitive.yes); // hit begin offset
4830                         offKE = offKB + key.length; // hit end offset
4831                     }
4832 
4833                     if (offKB >= 0) // if hit
4834                     {
4835                         if (!gstats.showTree && ctx == ScanContext.fileName)
4836                         {
4837                             viz.pp(parentDir, dirSeparator);
4838                         }
4839 
4840                         // Check Context
4841                         if ((gstats.keyAsSymbol && !isSymbolASCII(rest, offKB, offKE)) ||
4842                             (gstats.keyAsWord   && !isWordASCII  (rest, offKB, offKE)))
4843                         {
4844                             rest = rest[offKE..$]; // move forward in line
4845                             continue;
4846                         }
4847 
4848                         if (ctx == ScanContext.fileContent &&
4849                             !anyLineHit) // if this is first hit
4850                         {
4851                             if (viz.form == VizForm.HTML)
4852                             {
4853                                 if (!anyFileHit)
4854                                 {
4855                                     viz.pp(horizontalRuler,
4856                                            displayedFileName(gstats, theFile).asPath.asH!3);
4857                                     viz.ppTagOpen(`table`, `border=1`);
4858                                     anyFileHit = true;
4859                                 }
4860                             }
4861                             else
4862                             {
4863                                 if (gstats.showTree)
4864                                 {
4865                                     viz.pp("│  ".repeat(parentDir.depth + 1).join("") ~ "├" ~ "─ ");
4866                                 }
4867                                 else
4868                                 {
4869                                     foreach (fromSymlink; fromSymlinks)
4870                                     {
4871                                         viz.pp(fromSymlink,
4872                                                " modified ",
4873                                                faze(shortDurationString(_currTime - fromSymlink.timeLastModified),
4874                                                     timeFace),
4875                                                " ago",
4876                                                " -> ");
4877                                     }
4878                                     // show file path/name
4879                                     viz.pp(displayedFileName(gstats, theFile).asPath); // show path
4880                                 }
4881                             }
4882 
4883                             // show line:column
4884                             if (viz.form == VizForm.HTML)
4885                             {
4886                                 viz.ppTagOpen("tr");
4887                                 viz.pp(to!string(nL+1).asCell,
4888                                        to!string(offKB+1).asCell);
4889                                 viz.ppTagOpen("td");
4890                                 viz.ppTagOpen("code");
4891                             }
4892                             else
4893                             {
4894                                 viz.pp(faze(":" ~ to!string(nL+1) ~ ":" ~ to!string(offKB+1) ~ ":",
4895                                             contextFace));
4896                             }
4897                             anyLineHit = true;
4898                         }
4899 
4900                         // show content prefix
4901                         viz.pp(faze(to!string(rest[0..offKB]), thisFace));
4902 
4903                         // show hit part
4904                         if (!acronymOffsets.empty)
4905                         {
4906                             foreach (aIndex, currOff; acronymOffsets) // TODO Reuse std.algorithm: zip or lockstep? Or create a new kind say named conv.
4907                             {
4908                                 // context before
4909                                 if (aIndex >= 1)
4910                                 {
4911                                     immutable prevOff = acronymOffsets[aIndex-1];
4912                                     if (prevOff + 1 < currOff) // at least one letter in between
4913                                     {
4914                                         viz.pp(asCtx(ix, to!string(rest[prevOff + 1 .. currOff])));
4915                                     }
4916                                 }
4917                                 // hit letter
4918                                 viz.pp(asHit(ix, to!string(rest[currOff])));
4919                             }
4920                         }
4921                         else
4922                         {
4923                             viz.pp(asHit(ix, to!string(rest[offKB..offKE])));
4924                         }
4925 
4926                         rest = rest[offKE..$]; // move forward in line
4927 
4928                         hitCount++; // increase hit count
4929                         parentDir.hitCount++;
4930                         _hitsCountTotal++;
4931 
4932                         goto foundHit;
4933                     }
4934                 }
4935             foundHit:
4936                 if (offKB == -1) { break; }
4937             }
4938 
4939             // finalize line
4940             if (anyLineHit)
4941             {
4942                 // show final context suffix
4943                 viz.ppln(faze(rest, thisFace));
4944                 if (viz.form == VizForm.HTML)
4945                 {
4946                     viz.ppTagClose("code");
4947                     viz.ppTagClose("td");
4948                     viz.pplnTagClose("tr");
4949                 }
4950             }
4951             nL++;
4952         }
4953 
4954         if (gstats.showLineCounts)
4955         {
4956             gstats.lineCountsByFile[theFile] = nL;
4957         }
4958 
4959         if (anyFileHit)
4960         {
4961             viz.pplnTagClose("table");
4962         }
4963 
4964         // Previous solution
4965         // version(none)
4966         // {
4967         //     ptrdiff_t offHit = 0;
4968         //     foreach (ix, key; keys)
4969         //     {
4970         //         scope immutable hit1 = src.find(key); // single key hit
4971         //         offHit = hit1.ptr - src.ptr;
4972         //         if (!hit1.empty)
4973         //         {
4974         //             scope immutable src0 = src[0..offHit]; // src beforce hi
4975         //             immutable rowHit = count(src0, newline);
4976         //             immutable colHit = src0.retro.countUntil(newline); // count backwards till beginning of rowHit
4977         //             immutable offBOL = offHit - colHit;
4978         //             immutable cntEOL = src[offHit..$].countUntil(newline); // count forwards to end of rowHit
4979         //             immutable offEOL = (cntEOL == -1 ? // if no hit
4980         //                                 src.length :   // end of file
4981         //                                 offHit + cntEOL); // normal case
4982         //             viz.pp(faze(asPath(gstats.useHTML, dent.name), pathFace));
4983         //             viz.ppln(":", rowHit + 1,
4984         //                                                                               ":", colHit + 1,
4985         //                                                                               ":", cast(string)src[offBOL..offEOL]);
4986         //         }
4987         //     }
4988         // }
4989 
4990         // switch (keys.length)
4991         // {
4992         // default:
4993         //     break;
4994         // case 0:
4995         //     break;
4996         // case 1:
4997         //     immutable hit1 = src.find(keys[0]);
4998         //     if (!hit1.empty)
4999         //     {
5000         //         viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit1.length);
5001         //     }
5002         //     break;
5003         // // case 2:
5004         // //     immutable hit2 = src.find(keys[0], keys[1]); // find two keys
5005         // //     if (!hit2[0].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit2[0].length); }
5006         // //     if (!hit2[1].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit2[1].length); }
5007         // //     break;
5008         // // case 3:
5009         // //     immutable hit3 = src.find(keys[0], keys[1], keys[2]); // find two keys
5010         // //     if (!hit3.empty)
5011         //        {
5012         // //         viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit1.length);
5013         // //     }
5014         // //     break;
5015         // }
5016         return hitCount;
5017     }
5018 
5019     /** Process Regular File `theRegFile`. */
5020     void processRegFile(Viz viz,
5021                         NotNull!Dir topDir,
5022                         NotNull!RegFile theRegFile,
5023                         NotNull!Dir parentDir,
5024                         const string[] keys,
5025                         ref Symlink[] fromSymlinks,
5026                         size_t subIndex,
5027                         GStats gstats)
5028     {
5029         scanRegFile(viz,
5030                     topDir,
5031                     theRegFile,
5032                     parentDir,
5033                     keys,
5034                     fromSymlinks,
5035                     subIndex);
5036 
5037         // check for operations
5038         // TODO Reuse isSelectedFKind instead of this
5039         immutable ext = theRegFile.realExtension;
5040         if (ext in gstats.selFKinds.byExt)
5041         {
5042             auto matchingFKinds = gstats.selFKinds.byExt[ext];
5043             foreach (kind; matchingFKinds)
5044             {
5045                 const hit = kind.operations.find!(a => a[0] == gstats.fOp);
5046                 if (!hit.empty)
5047                 {
5048                     const fOp = hit.front;
5049                     const cmd = fOp[1]; // command string
5050                     import std.process: spawnProcess;
5051                     import std.algorithm: splitter;
5052                     dbg("TODO Performing operation ", to!string(cmd),
5053                         " on ", theRegFile.path,
5054                         " by calling it using ", cmd);
5055                     auto pid = spawnProcess(cmd.splitterASCIIAmong!(" ").array ~ [theRegFile.path]);
5056                 }
5057             }
5058         }
5059     }
5060 
5061     /** Scan `elfFile` for ELF Symbols. */
5062     void scanELFFile(Viz viz,
5063                      NotNull!RegFile elfFile,
5064                      const string[] keys,
5065                      GStats gstats)
5066     {
5067         import nxt.elfdoc: sectionNameExplanations;
5068         /* TODO Add mouse hovering help for sectionNameExplanations[section] */
5069         dbg("before: ", elfFile);
5070         ELF decoder = ELF.fromFile(elfFile._mmfile);
5071         dbg("after: ", elfFile);
5072 
5073         /* foreach (section; decoder.sections) */
5074         /* { */
5075         /*     if (section.name.length) */
5076         /*     { */
5077         /*         /\* auto sst = section.StringTable; *\/ */
5078         /*         //writeln("ELF Section named ", section.name); */
5079         /*     } */
5080         /* } */
5081 
5082         /* const sectionNames = [".symtab"/\* , ".strtab", ".dynsym" *\/];    // TODO These two other sections causes range exceptions. */
5083         /* foreach (sectionName; sectionNames) */
5084         /* { */
5085         /*     auto sts = decoder.getSection(sectionName); */
5086         /*     if (!sts.isNull) */
5087         /*     { */
5088         /*         SymbolTable symtab = SymbolTable(sts); */
5089         /*         // TODO Use range: auto symbolsDemangled = symtab.symbols.map!(sym => demangler(sym.name).decodeSymbol); */
5090         /*         foreach (sym; symtab.symbols) // you can add filters here */
5091         /*         { */
5092         /*             if (gstats.demangleELF) */
5093         /*             { */
5094         /*                 const hit = demangler(sym.name).decodeSymbol; */
5095         /*             } */
5096         /*             else */
5097         /*             { */
5098         /*                 writeln("?: ", sym.name); */
5099         /*             } */
5100         /*         } */
5101         /*     } */
5102         /* } */
5103 
5104         auto sst = decoder.getSymbolsStringTable;
5105         if (!sst.isNull)
5106         {
5107             import nxt.algorithm_ex: findFirstOfAnyInOrder;
5108             import std.range : tee;
5109 
5110             auto scan = (sst.strings
5111                             .filter!(raw => !raw.empty) // skip empty raw string
5112                             .tee!(raw => gstats.elfFilesBySymbol[raw.idup] ~= elfFile) // WARNING: needs raw.idup here because we can't rever to raw
5113                             .map!(raw => demangler(raw).decodeSymbol)
5114                             .filter!(demangling => (!keys.empty && // don't show anything if no keys given
5115                                                     demangling.unmangled.findFirstOfAnyInOrder(keys)[1]))); // I love D :)
5116 
5117             if (!scan.empty &&
5118                 `ELF` in gstats.selFKinds.byName) // if user selected ELF file show them
5119             {
5120                 viz.pp(horizontalRuler,
5121                        displayedFileName(gstats, elfFile).asPath.asH!3,
5122                        asH!4(`ELF Symbol Strings Table (`, `.strtab`.asCode, `)`),
5123                        scan.asTable);
5124             }
5125         }
5126     }
5127 
5128     /** Search for Keys `keys` in Regular File `theRegFile`. */
5129     void scanRegFile(Viz viz,
5130                      NotNull!Dir topDir,
5131                      NotNull!RegFile theRegFile,
5132                      NotNull!Dir parentDir,
5133                      const string[] keys,
5134                      ref Symlink[] fromSymlinks,
5135                      size_t subIndex)
5136     {
5137         results.noBytesTotal += theRegFile.size;
5138         results.noBytesTotalContents += theRegFile.size;
5139 
5140         // Scan name
5141         if ((gstats.scanContext == ScanContext.all ||
5142              gstats.scanContext == ScanContext.fileName ||
5143              gstats.scanContext == ScanContext.regularFileName) &&
5144             !keys.empty)
5145         {
5146             immutable hitCountInName = scanForKeys(viz,
5147                                                    topDir, cast(NotNull!File)theRegFile, parentDir,
5148                                                    fromSymlinks,
5149                                                    theRegFile.name, keys, [], ScanContext.fileName);
5150         }
5151 
5152         // Scan Contents
5153         if ((gstats.scanContext == ScanContext.all ||
5154              gstats.scanContext == ScanContext.fileContent) &&
5155             (gstats.showFileContentDups ||
5156              gstats.showELFSymbolDups ||
5157              !keys.empty) &&
5158             theRegFile.size != 0)        // non-empty file
5159         {
5160             // immutable upTo = size_t.max;
5161 
5162             // TODO Flag for readText
5163             try
5164             {
5165                 ++gstats.noScannedRegFiles;
5166                 ++gstats.noScannedFiles;
5167 
5168                 // ELF Symbols
5169                 if (gstats.showELFSymbolDups &&
5170                     theRegFile.ofKind(`ELF`, gstats.collectTypeHits, gstats.allFKinds))
5171                 {
5172                     scanELFFile(viz, theRegFile, keys, gstats);
5173                 }
5174 
5175                 // Check included kinds first because they are fast.
5176                 KindHit incKindHit = isSelectedFKind(theRegFile);
5177                 if (!gstats.selFKinds.byIndex.empty && // TODO Do we really need this one?
5178                     !incKindHit)
5179                 {
5180                     return;
5181                 }
5182 
5183                 // Super-Fast Key-File Bistogram Discardal. TODO Trim scale factor to optimal value.
5184                 enum minFileSize = 256; // minimum size of file for discardal.
5185                 immutable bool doBist = theRegFile.size > minFileSize;
5186                 immutable bool doNGram = (gstats.useNGrams &&
5187                                           (!gstats.keyAsSymbol) &&
5188                                           theRegFile.size > minFileSize);
5189                 immutable bool doBitStatus = true;
5190 
5191                 // Chunked Calculation of CStat in one pass. TODO call async.
5192                 theRegFile.calculateCStatInChunks(gstats.filesByContentId,
5193                                                   _scanChunkSize,
5194                                                   gstats.showFileContentDups,
5195                                                   doBist,
5196                                                   doBitStatus);
5197 
5198                 // Match Bist of Keys with BistX of File
5199                 bool[] bistHits;
5200                 bool noBistMatch = false;
5201                 if (doBist)
5202                 {
5203                     const theHist = theRegFile.bistogram8;
5204                     auto hitsHist = keysBists.map!(a =>
5205                                                    ((a.value & theHist.value) ==
5206                                                     a.value)); // TODO Functionize to x.subsetOf(y) or reuse std.algorithm: setDifference or similar
5207                     bistHits = hitsHist.map!`a == true`.array;
5208                     noBistMatch = hitsHist.all!`a == false`;
5209                 }
5210                 /* int kix = 0; */
5211                 /* foreach (hit; bistHits) { if (!hit) { debug dbg(`Assert key ` ~ keys[kix] ~ ` not in file ` ~ theRegFile.path); } ++kix; } */
5212 
5213                 bool allXGramsMiss = false;
5214                 if (doNGram)
5215                 {
5216                     ulong keysXGramUnionMatch = keysXGramsUnion.matchDenser(theRegFile.xgram);
5217                     debug dbg(theRegFile.path,
5218                               ` sized `, theRegFile.size, ` : `,
5219                               keysXGramsUnion.length, `, `,
5220                               theRegFile.xgram.length,
5221                               ` gave match:`, keysXGramUnionMatch);
5222                     allXGramsMiss = keysXGramUnionMatch == 0;
5223                 }
5224 
5225                 auto binHit = theRegFile.ofAnyKindIn(gstats.binFKinds,
5226                                                      gstats.collectTypeHits);
5227                 const binKindHit = binHit[0];
5228                 if (binKindHit)
5229                 {
5230                     import nxt.numerals: toOrdinal;
5231                     const nnKind = binHit[1].enforceNotNull;
5232                     const kindIndex = binHit[2];
5233                     if (_showSkipped)
5234                     {
5235                         if (gstats.showTree)
5236                         {
5237                             immutable intro = subIndex == parentDir.subs.length - 1 ? `└` : `├`;
5238                             viz.pp(`│  `.repeat(parentDir.depth + 1).join(``) ~ intro ~ `─ `);
5239                         }
5240                         viz.ppln(theRegFile, `: Skipped `, nnKind, ` file at `,
5241                                  toOrdinal(kindIndex + 1), ` blind try`);
5242                     }
5243                     final switch (binKindHit)
5244                     {
5245                         case KindHit.none:
5246                             break;
5247                         case KindHit.cached:
5248                             printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit,
5249                                          ` using cached KindId`);
5250                             break;
5251                         case KindHit.uncached:
5252                             printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit,
5253                                          ` at ` ~ toOrdinal(kindIndex + 1) ~ ` extension try`);
5254                             break;
5255                     }
5256                 }
5257 
5258                 if (binKindHit != KindHit.none ||
5259                     noBistMatch ||
5260                     allXGramsMiss) // or no hits possible. TODO Maybe more efficient to do histogram discardal first
5261                 {
5262                     results.noBytesSkipped += theRegFile.size;
5263                 }
5264                 else
5265                 {
5266                     // Search if not Binary
5267 
5268                     // If Source file is ok
5269                     auto src = theRegFile.readOnlyContents[];
5270 
5271                     results.noBytesScanned += theRegFile.size;
5272 
5273                     if (keys)
5274                     {
5275                         // Fast discardal of files with no match
5276                         bool fastOk = true;
5277                         if (!_caseFold) { // if no relaxation of search
5278                             if (gstats.keyAsAcronym) // if no relaxation of search
5279                             {
5280                                 /* TODO Reuse findAcronym in algorith_ex. */
5281                             }
5282                             else // if no relaxation of search
5283                             {
5284                                 switch (keys.length)
5285                                 {
5286                                 default: break;
5287                                 case 1: immutable hit1 = src.find(keys[0]); fastOk = !hit1.empty; break;
5288                                     // case 2: immutable hit2 = src.find(keys[0], keys[1]); fastOk = !hit2[0].empty; break;
5289                                     // case 3: immutable hit3 = src.find(keys[0], keys[1], keys[2]); fastOk = !hit3[0].empty; break;
5290                                     // case 4: immutable hit4 = src.find(keys[0], keys[1], keys[2], keys[3]); fastOk = !hit4[0].empty; break;
5291                                     // case 5: immutable hit5 = src.find(keys[0], keys[1], keys[2], keys[3], keys[4]); fastOk = !hit5[0].empty; break;
5292                                 }
5293                             }
5294                         }
5295 
5296                         // TODO Continue search from hit1, hit2 etc.
5297 
5298                         if (fastOk)
5299                         {
5300                             foreach (tag; addTags) gstats.ftags.addTag(theRegFile, tag);
5301                             foreach (tag; removeTags) gstats.ftags.removeTag(theRegFile, tag);
5302 
5303                             if (theRegFile.size >= 8192)
5304                             {
5305                                 /* if (theRegFile.xgram == null) { */
5306                                 /*     theRegFile.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */
5307                                 /* } */
5308                                 /* (*theRegFile.xgram).put(src); */
5309                                 /* theRegFile.xgram.put(src); */
5310                                 /* foreach (lix, ub0; line) { // for each ubyte in line */
5311                                 /*     if (lix + 1 < line.length) { */
5312                                 /*         immutable ub1 = line[lix + 1]; */
5313                                 /*         immutable dix = (cast(ushort)ub0 | */
5314                                 /*                          cast(ushort)ub1*256); */
5315                                 /*         (*theRegFile.xgram)[dix] = true; */
5316                                 /*     } */
5317                                 /* } */
5318                                 auto shallowDenseness = theRegFile.bistogram8.denseness;
5319                                 auto deepDenseness = theRegFile.xgramDeepDenseness;
5320                                 // assert(deepDenseness >= 1);
5321                                 gstats.shallowDensenessSum += shallowDenseness;
5322                                 gstats.deepDensenessSum += deepDenseness;
5323                                 ++gstats.densenessCount;
5324                                 /* dbg(theRegFile.path, `:`, theRegFile.size, */
5325                                 /*     `, length:`, theRegFile.xgram.length, */
5326                                 /*     `, deepDenseness:`, deepDenseness); */
5327                             }
5328 
5329                             theRegFile._cstat.hitCount = scanForKeys(viz,
5330                                                                      topDir, cast(NotNull!File)theRegFile, parentDir,
5331                                                                      fromSymlinks,
5332                                                                      src, keys, bistHits,
5333                                                                      ScanContext.fileContent);
5334                         }
5335                     }
5336                 }
5337 
5338             }
5339             catch (FileException)
5340             {
5341                 handleError(viz, theRegFile, false, subIndex);
5342             }
5343             catch (ErrnoException)
5344             {
5345                 handleError(viz, theRegFile, false, subIndex);
5346             }
5347             theRegFile.freeContents; // TODO Call lazily only when open count is too large
5348         }
5349     }
5350 
5351     /** Scan Symlink `symlink` at `parentDir` for `keys`
5352         Put results in `results`. */
5353     void scanSymlink(Viz viz,
5354                      NotNull!Dir topDir,
5355                      NotNull!Symlink theSymlink,
5356                      NotNull!Dir parentDir,
5357                      const string[] keys,
5358                      ref Symlink[] fromSymlinks)
5359     {
5360         // check for symlink cycles
5361         if (!fromSymlinks.find(theSymlink).empty)
5362         {
5363             if (gstats.showSymlinkCycles)
5364             {
5365                 import std.range: back;
5366                 viz.ppln(`Cycle of symbolic links: `,
5367                          fromSymlinks.asPath,
5368                          ` -> `,
5369                          fromSymlinks.back.target);
5370             }
5371             return;
5372         }
5373 
5374         // Scan name
5375         if ((gstats.scanContext == ScanContext.all ||
5376              gstats.scanContext == ScanContext.fileName ||
5377              gstats.scanContext == ScanContext.symlinkName) &&
5378             !keys.empty)
5379         {
5380             scanForKeys(viz,
5381                         topDir, cast(NotNull!File)theSymlink, enforceNotNull(theSymlink.parent),
5382                         fromSymlinks,
5383                         theSymlink.name, keys, [], ScanContext.fileName);
5384         }
5385 
5386         // try {
5387         //     results.noBytesTotal += dent.size;
5388         // } catch (Exception)
5389         //   {
5390         //     dbg(`Couldn't get size of `,  dir.name);
5391         // }
5392         if (gstats.followSymlinks == SymlinkFollowContext.none) { return; }
5393 
5394         import std.range: popBackN;
5395         fromSymlinks ~= theSymlink;
5396         immutable targetPath = theSymlink.absoluteNormalizedTargetPath;
5397         if (targetPath.exists)
5398         {
5399             theSymlink._targetStatus = SymlinkTargetStatus.present;
5400             if (_topDirNames.all!(a => !targetPath.startsWith(a))) { // if target path lies outside of all rootdirs
5401                 auto targetDent = DirEntry(targetPath);
5402                 auto targetFile = getFile(enforceNotNull(gstats.rootDir), targetPath, targetDent.isDir);
5403 
5404                 if (gstats.showTree)
5405                 {
5406                     viz.ppln(`│  `.repeat(parentDir.depth + 1).join(``) ~ `├` ~ `─ `,
5407                              theSymlink,
5408                              ` modified `,
5409                              faze(shortDurationString(_currTime - theSymlink.timeLastModified),
5410                                   timeFace),
5411                              ` ago`, ` -> `,
5412                              targetFile.asPath,
5413                              faze(` outside of ` ~ (_topDirNames.length == 1 ? `tree ` : `all trees `),
5414                                   infoFace),
5415                              gstats.topDirs.asPath,
5416                              faze(` is followed`, infoFace));
5417                 }
5418 
5419                 ++gstats.noScannedSymlinks;
5420                 ++gstats.noScannedFiles;
5421 
5422                 if      (auto targetRegFile = cast(RegFile)targetFile)
5423                 {
5424                     processRegFile(viz, topDir, assumeNotNull(targetRegFile), parentDir, keys, fromSymlinks, 0, gstats);
5425                 }
5426                 else if (auto targetDir = cast(Dir)targetFile)
5427                 {
5428                     scanDir(viz, topDir, assumeNotNull(targetDir), keys, fromSymlinks);
5429                 }
5430                 else if (auto targetSymlink = cast(Symlink)targetFile) // target is a Symlink
5431                 {
5432                     scanSymlink(viz, topDir,
5433                                 assumeNotNull(targetSymlink),
5434                                 enforceNotNull(targetSymlink.parent),
5435                                 keys, fromSymlinks);
5436                 }
5437             }
5438         }
5439         else
5440         {
5441             theSymlink._targetStatus = SymlinkTargetStatus.broken;
5442 
5443             if (gstats.showBrokenSymlinks)
5444             {
5445                 _brokenSymlinks ~= theSymlink;
5446 
5447                 foreach (ix, fromSymlink; fromSymlinks)
5448                 {
5449                     if (gstats.showTree && ix == 0)
5450                     {
5451                         immutable intro = `├`;
5452                         viz.pp(`│  `.repeat(theSymlink.parent.depth + 1).join(``) ~ intro ~ `─ `,
5453                                theSymlink);
5454                     }
5455                     else
5456                     {
5457                         viz.pp(fromSymlink);
5458                     }
5459                     viz.pp(` -> `);
5460                 }
5461 
5462                 viz.ppln(faze(theSymlink.target, missingSymlinkTargetFace),
5463                          faze(` is missing`, warnFace));
5464             }
5465         }
5466         fromSymlinks.popBackN(1);
5467     }
5468 
5469     /** Scan Directory `parentDir` for `keys`. */
5470     void scanDir(Viz viz,
5471                  NotNull!Dir topDir,
5472                  NotNull!Dir theDir,
5473                  const string[] keys,
5474                  Symlink[] fromSymlinks = [],
5475                  int maxDepth = -1)
5476     {
5477         if (theDir.isRoot)  { results.reset; }
5478 
5479         // scan in directory name
5480         if ((gstats.scanContext == ScanContext.all ||
5481              gstats.scanContext == ScanContext.fileName ||
5482              gstats.scanContext == ScanContext.dirName) &&
5483             !keys.empty)
5484         {
5485             scanForKeys(viz,
5486                         topDir,
5487                         cast(NotNull!File)theDir,
5488                         enforceNotNull(theDir.parent),
5489                         fromSymlinks,
5490                         theDir.name, keys, [], ScanContext.fileName);
5491         }
5492 
5493         try
5494         {
5495             size_t subIndex = 0;
5496             if (gstats.showTree)
5497             {
5498                 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`;
5499 
5500                 viz.pp(`│  `.repeat(theDir.depth).join(``) ~ intro ~
5501                        `─ `, theDir, ` modified `,
5502                        faze(shortDurationString(_currTime -
5503                                                 theDir.timeLastModified),
5504                             timeFace),
5505                        ` ago`);
5506 
5507                 if (gstats.showUsage)
5508                 {
5509                     viz.pp(` of Tree-Size `, theDir.treeSize);
5510                 }
5511 
5512                 if (gstats.showSHA1)
5513                 {
5514                     viz.pp(` with Tree-Content-Id `, theDir.treeContentId);
5515                 }
5516                 viz.ppendl;
5517             }
5518 
5519             ++gstats.noScannedDirs;
5520             ++gstats.noScannedFiles;
5521 
5522             auto subsSorted = theDir.subsSorted(gstats.subsSorting);
5523             foreach (key, sub; subsSorted)
5524             {
5525                 /* TODO Functionize to scanFile */
5526                 if (auto regFile = cast(RegFile)sub)
5527                 {
5528                     processRegFile(viz, topDir, assumeNotNull(regFile), theDir, keys, fromSymlinks, subIndex, gstats);
5529                 }
5530                 else if (auto subDir = cast(Dir)sub)
5531                 {
5532                     if (maxDepth == -1 || // if either all levels or
5533                         maxDepth >= 1) { // levels left
5534                         if (sub.name in gstats.skippedDirKindsMap) // if sub should be skipped
5535                         {
5536                             if (_showSkipped)
5537                             {
5538                                 if (gstats.showTree)
5539                                 {
5540                                     immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`;
5541                                     viz.pp(`│  `.repeat(theDir.depth + 1).join(``) ~ intro ~ `─ `);
5542                                 }
5543 
5544                                 viz.pp(subDir,
5545                                        ` modified `,
5546                                        faze(shortDurationString(_currTime -
5547                                                                 subDir.timeLastModified),
5548                                             timeFace),
5549                                        ` ago`,
5550                                        faze(`: Skipped Directory of type `, infoFace),
5551                                        gstats.skippedDirKindsMap[sub.name].kindName);
5552                             }
5553                         }
5554                         else
5555                         {
5556                             scanDir(viz, topDir,
5557                                     assumeNotNull(subDir),
5558                                     keys,
5559                                     fromSymlinks,
5560                                     maxDepth >= 0 ? --maxDepth : maxDepth);
5561                         }
5562                     }
5563                 }
5564                 else if (auto subSymlink = cast(Symlink)sub)
5565                 {
5566                     scanSymlink(viz, topDir, assumeNotNull(subSymlink), theDir, keys, fromSymlinks);
5567                 }
5568                 else
5569                 {
5570                     if (gstats.showTree) { viz.ppendl; }
5571                 }
5572                 ++subIndex;
5573 
5574                 if (ctrlC)
5575                 {
5576                     viz.ppln(`Ctrl-C pressed: Aborting scan of `, theDir);
5577                     break;
5578                 }
5579             }
5580 
5581             if (gstats.showTreeContentDups)
5582             {
5583                 theDir.treeContentId; // better to put this after file scan for now
5584             }
5585         }
5586         catch (FileException)
5587         {
5588             handleError(viz, theDir, true, 0);
5589         }
5590     }
5591 
5592     /** Filter out `files` that lie under any of the directories `dirPaths`. */
5593     F[] filterUnderAnyOfPaths(F)(F[] files,
5594                                  string[] dirPaths)
5595     {
5596         import std.algorithm: any;
5597         import std.array: array;
5598         auto dupFilesUnderAnyTopDirName = (files
5599                                            .filter!(dupFile =>
5600                                                     dirPaths.any!(dirPath =>
5601                                                                   dupFile.path.startsWith(dirPath)))
5602                                            .array // evaluate to array to get .length below
5603             );
5604         F[] hits;
5605         final switch (gstats.duplicatesContext)
5606         {
5607         case DuplicatesContext.internal:
5608             if (dupFilesUnderAnyTopDirName.length >= 2)
5609                 hits = dupFilesUnderAnyTopDirName;
5610             break;
5611         case DuplicatesContext.external:
5612             if (dupFilesUnderAnyTopDirName.length >= 1)
5613                 hits = files;
5614             break;
5615         }
5616         return hits;
5617     }
5618 
5619     /** Show Statistics. */
5620     void showContentDups(Viz viz)
5621     {
5622         import std.meta : AliasSeq;
5623         foreach (ix, kind; AliasSeq!(RegFile, Dir))
5624         {
5625             immutable typeName = ix == 0 ? `Regular File` : `Directory Tree`;
5626             viz.pp((typeName ~ ` Content Duplicates`).asH!2);
5627             foreach (digest, dupFiles; gstats.filesByContentId)
5628             {
5629                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5630                 if (dupFilesOk.length >= 2) // non-empty file/directory
5631                 {
5632                     auto firstDup = cast(kind)dupFilesOk[0];
5633                     if (firstDup)
5634                     {
5635                         static if (is(kind == RegFile))
5636                         {
5637                             if (firstDup._cstat.kindId)
5638                             {
5639                                 if (firstDup._cstat.kindId in gstats.allFKinds.byId)
5640                                 {
5641                                     viz.pp(asH!3(gstats.allFKinds.byId[firstDup._cstat.kindId],
5642                                                  ` files sharing digest `, digest, ` of size `, firstDup.treeSize));
5643                                 }
5644                                 else
5645                                 {
5646                                     dbg(firstDup.path ~ ` kind Id ` ~ to!string(firstDup._cstat.kindId) ~
5647                                         ` could not be found in allFKinds.byId`);
5648                                 }
5649                             }
5650                             viz.pp(asH!3((firstDup._cstat.bitStatus == BitStatus.bits7) ? `ASCII File` : typeName,
5651                                          `s sharing digest `, digest, ` of size `, firstDup.treeSize));
5652                         }
5653                         else
5654                         {
5655                             viz.pp(asH!3(typeName, `s sharing digest `, digest, ` of size `, firstDup.size));
5656                         }
5657 
5658                         viz.pp(asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5659                     }
5660                 }
5661             }
5662         }
5663     }
5664 
5665     /** Show Statistics. */
5666     void showStats(Viz viz)
5667     {
5668         /* Duplicates */
5669 
5670         if (gstats.showNameDups)
5671         {
5672             viz.pp(`Name Duplicates`.asH!2);
5673             foreach (digest, dupFiles; gstats.filesByName)
5674             {
5675                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5676                 if (!dupFilesOk.empty)
5677                 {
5678                     viz.pp(asH!3(`Files with same name `,
5679                                  faze(dupFilesOk[0].name, fileFace)),
5680                            asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5681                 }
5682             }
5683         }
5684 
5685         if (gstats.showLinkDups)
5686         {
5687             viz.pp(`Inode Duplicates (Hardlinks)`.asH!2);
5688             foreach (inode, dupFiles; gstats.filesByInode)
5689             {
5690                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5691                 if (dupFilesOk.length >= 2)
5692                 {
5693                     viz.pp(asH!3(`Files with same inode ` ~ to!string(inode) ~
5694                                  ` (hardlinks): `),
5695                            asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5696                 }
5697             }
5698         }
5699 
5700         if (gstats.showFileContentDups)
5701         {
5702             showContentDups(viz);
5703         }
5704 
5705         if (gstats.showELFSymbolDups &&
5706             !keys.empty) // don't show anything if no keys where given
5707         {
5708             viz.pp(`ELF Symbol Duplicates`.asH!2);
5709             foreach (raw, dupFiles; gstats.elfFilesBySymbol)
5710             {
5711                 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames);
5712                 if (dupFilesOk.length >= 2)
5713                 {
5714                     const demangling = demangler(raw).decodeSymbol;
5715                     if (demangling.unmangled.findFirstOfAnyInOrder(keys)[1])
5716                     {
5717                         viz.pp(asH!3(`ELF Files with same symbol ` ~ to!string(raw)),
5718                                asUList(dupFilesOk.map!(x => x.asPath.asItem)));
5719                     }
5720                 }
5721             }
5722         }
5723 
5724         /* Broken Symlinks */
5725         if (gstats.showBrokenSymlinks &&
5726             !_brokenSymlinks.empty)
5727         {
5728             viz.pp(`Broken Symlinks `.asH!2,
5729                    asUList(_brokenSymlinks.map!(x => x.asPath.asItem)));
5730         }
5731 
5732         /* Counts */
5733         viz.pp(`Scanned Types`.asH!2,
5734                /* asUList(asItem(gstats.noScannedDirs, ` Dirs, `), */
5735                /*         asItem(gstats.noScannedRegFiles, ` Regular Files, `), */
5736                /*         asItem(gstats.noScannedSymlinks, ` Symbolic Links, `), */
5737                /*         asItem(gstats.noScannedSpecialFiles, ` Special Files, `), */
5738                /*         asItem(`totalling `, gstats.noScannedFiles, ` Files`) // on extra because of lack of root */
5739                /*     ) */
5740                asTable(asRow(asCell(asBold(`Scan Count`)),
5741                              asCell(asBold(`File Type`))),
5742                        asRow(asCell(gstats.noScannedDirs),
5743                              asCell(asItalic(`Dirs`))),
5744                        asRow(asCell(gstats.noScannedRegFiles),
5745                              asCell(asItalic(`Regular Files`))),
5746                        asRow(asCell(gstats.noScannedSymlinks),
5747                              asCell(asItalic(`Symbolic Links`))),
5748                        asRow(asCell(gstats.noScannedSpecialFiles),
5749                              asCell(asItalic(`Special Files`))),
5750                        asRow(asCell(gstats.noScannedFiles),
5751                              asCell(asItalic(`Files`)))
5752                    )
5753             );
5754 
5755         if (gstats.densenessCount)
5756         {
5757             viz.pp(`Histograms`.asH!2,
5758                    asUList(asItem(`Average Byte Bistogram (Binary Histogram) Denseness `,
5759                                   cast(real)(100*gstats.shallowDensenessSum / gstats.densenessCount), ` Percent`),
5760                            asItem(`Average Byte `, NGramOrder, `-Gram Denseness `,
5761                                   cast(real)(100*gstats.deepDensenessSum / gstats.densenessCount), ` Percent`)));
5762         }
5763 
5764         viz.pp(`Scanned Bytes`.asH!2,
5765                asUList(asItem(`Scanned `, results.noBytesScanned),
5766                        asItem(`Skipped `, results.noBytesSkipped),
5767                        asItem(`Unreadable `, results.noBytesUnreadable),
5768                        asItem(`Total Contents `, results.noBytesTotalContents),
5769                        asItem(`Total `, results.noBytesTotal),
5770                        asItem(`Total number of hits `, results.numTotalHits),
5771                        asItem(`Number of Files with hits `, results.numFilesWithHits)));
5772 
5773         viz.pp(`Some Math`.asH!2);
5774 
5775         {
5776             struct Stat
5777             {
5778                 particle2f particle;
5779                 point2r point;
5780                 vec2r velocity;
5781                 vec2r acceleration;
5782                 mat2 rotation;
5783                 Rational!uint ratInt;
5784                 Vector!(Rational!int, 4) ratIntVec;
5785                 Vector!(float, 2, true) normFloatVec2;
5786                 Vector!(float, 3, true) normFloatVec3;
5787                 Point!(Rational!int, 4) ratIntPoint;
5788             }
5789 
5790             /* Vector!(Complex!float, 4) complexVec; */
5791 
5792             viz.ppln(`A number: `, 1.2e10);
5793             viz.ppln(`Randomize particle2f as TableNr0: `, randomInstanceOf!particle2f.asTableNr0);
5794 
5795             alias Stats3 = Stat[3];
5796             auto stats = new Stat[3];
5797             randomize(stats);
5798             viz.ppln(`A ` ~ typeof(stats).stringof, `: `, stats.randomize.asTable);
5799 
5800             {
5801                 auto x = randomInstanceOf!Stats3;
5802                 foreach (ref e; x)
5803                 {
5804                     e.velocity *= 1e9;
5805                 }
5806                 viz.ppln(`Some Stats: `,
5807                          x.asTable);
5808             }
5809         }
5810 
5811 
5812     }
5813 }
5814 
5815 void scanner(string[] args)
5816 {
5817     // Register the SIGINT signal with the signalHandler function call:
5818     version(linux)
5819     {
5820         signal(SIGABRT, &signalHandler);
5821         signal(SIGTERM, &signalHandler);
5822         signal(SIGQUIT, &signalHandler);
5823         signal(SIGINT, &signalHandler);
5824     }
5825 
5826 
5827     auto term = Terminal(ConsoleOutputType.linear);
5828     auto scanner = new Scanner!Terminal(args, term);
5829 }