1 #!/usr/bin/env rdmd-dev 2 3 /** 4 File Scanning Engine. 5 6 Make rich use of Sparse Distributed Representations (SDR) using Hash Digests 7 for relating Data and its Relations/Properties/Meta-Data. 8 9 See_Also: http://stackoverflow.com/questions/12629749/how-does-grep-run-so-fast 10 See_Also: http:www.regular-expressions.info/powergrep.html 11 See_Also: http://ridiculousfish.com/blog/posts/old-age-and-treachery.html 12 See_Also: http://www.olark.com/spw/2011/08/you-can-list-a-directory-with-8-million-files-but-not-with-ls/ 13 14 TODO Make use parallelism_ex: pmap 15 16 TODO Call filterUnderAnyOfPaths using std.algorithm.filter directly on AAs. Use byPair or use AA.get(key, defaultValue) 17 See_Also: http://forum.dlang.org/thread/mailman.75.1392335793.6445.digitalmars-d-learn@puremagic.com 18 See_Also: https://github.com/D-Programming-Language/druntime/pull/574 19 20 TODO Count logical lines. 21 TODO Lexers should be loosely coupled to FKinds instead of Files 22 TODO Generic Token[] and specific CToken[], CxxToken[] 23 24 TODO Don't scan for duplicates inside vc-dirs by default 25 26 TODO Assert that files along duplicates path don't include symlinks 27 28 TODO Implement FOp.deduplicate 29 TODO Prevent rescans of duplicates 30 31 TODO Defined generalized_specialized_two_way_relationship(kindD, kindDi) 32 33 TODO Visualize hits using existingFileHitContext.asH!1 followed by a table: 34 ROW_NR | hit string in <code lang=LANG></code> 35 36 TODO Parse and Sort GCC/Clang Compiler Messages on WARN_TYPE FILE:LINE:COL:MSG[WARN_TYPE] and use Collapsable HTML Widgets: 37 http://api.jquerymobile.com/collapsible/ 38 when presenting them 39 40 TODO Maybe make use of https://github.com/Abscissa/scriptlike 41 42 TODO Calculate Tree grams and bist 43 44 TODO Get stats of the link itself not the target in SymLink constructors 45 46 TODO RegFile with FileContent.text should be decodable to Unicode using 47 either iso-latin1, utf-8, etc. Check std.uni for how to try and decode stuff. 48 49 TODO Search for subwords. 50 For example gtk_widget should also match widget_gtk and GtkWidget etc. 51 52 TODO Support multi-line keys 53 54 TODO Use hash-lookup in txtFKinds.byExt for faster guessing of source file 55 kind. Merge it with binary kind lookup. And check FileContent member of 56 kind to instead determine if it should be scanned or not. 57 Sub-Task: Case-Insensitive Matching of extensions if 58 nothing else passes. 59 60 TODO Detect symlinks with duplicate targets and only follow one of them and 61 group them together in visualization 62 63 TODO Add addTag, removeTag, etc and interface to fs.d for setting tags: 64 --add-tag=comedy, remove-tag=comedy 65 66 TODO If files ends with ~ or .backup assume its a backup file, strip it from 67 end match it again and set backupFlag in FileKind 68 69 TODO Acronym match can make use of normal histogram counts. Check denseness 70 of binary histogram (bist) to determine if we should use a sparse or dense 71 histogram. 72 73 TODO Activate and test support for ELF and Cxx11 subkinds 74 75 TODO Call either File.checkObseleted upon inotify. checkObseleted should remove stuff from hash tables 76 TODO Integrate logic in clearCStat to RegFile.makeObselete 77 TODO Upon Dir inotify call invalidate _depth, etc. 78 79 TODO Following command: fs.d --color -d ~/ware/emacs -s lispy -k 80 shows "Skipped PNG file (png) at first extension try". 81 Assure that this logic reuses cache and instead prints something like "Skipped PNG file using cached FKind". 82 83 TODO Cache each Dir separately to a file named after SHA1 of its path 84 85 TODO Add ASCII kind: Requires optional stream analyzer member of FKind in 86 replacement for magicData. ASCIIFile 87 88 TODO Defined NotAnyKind(binaryKinds) and cache it 89 90 TODO Create PkZipFile() in Dir.load() when FKind "pkZip Archive" is found. 91 Use std.zip.ZipArchive(void[] from mmfile) 92 93 TODO Scan Subversion Dirs with http://pastebin.com/6ZzPvpBj 94 95 TODO Change order (binHit || allBHist8Miss) and benchmark 96 97 TODO Display modification/access times as: 98 See: http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 99 100 TODO Use User Defined Attributes (UDA): http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 101 TODO Use msgPack @nonPacked when needed 102 103 TODO Limit lines to terminal width 104 105 TODO Create array of (OFFSET, LENGTH) and this in FKind Pattern factory 106 function. Then for source file extra slice at (OFFSET, LENGTH) and use as 107 input into hash-table from magic (if its a Lit-pattern to) 108 109 TODO Verify that "f.tar.z" gets tuple extensions tuple("tar", "z") 110 TODO Verify that "libc.so.1.2.3" gets tuple extensions tuple("so", "1", "2", "3") and "so" extensions should the be tried 111 TODO Cache Symbols larger than three characters in a global hash from symbol to path 112 113 TODO Benchmark horspool.d and perhaps use instead of std.find 114 115 TODO Splitting into keys should not split arguments such as "a b" 116 117 TODO Perhaps use http://www.chartjs.org/ to visualize stuff 118 119 TODO Make use of @nonPacked in version(msgpack). 120 */ 121 module nxt.fs; 122 123 version = msgpack; // Use msgpack serialization 124 /* version = cerealed; // Use cerealed serialization */ 125 126 import std.stdio: ioFile = File, stdout; 127 import std.typecons: Tuple, tuple; 128 import std.algorithm: find, map, filter, reduce, max, min, uniq, all, joiner; 129 import std..string: representation, chompPrefix; 130 import std.stdio: write, writeln, writefln; 131 import std.path: baseName, dirName, isAbsolute, dirSeparator, extension, buildNormalizedPath, expandTilde, absolutePath; 132 import std.datetime; 133 import std.file: FileException; 134 import std.digest.sha: sha1Of, toHexString; 135 import std.range: repeat, array, empty, cycle, chain; 136 import std.stdint: uint64_t; 137 import std.traits: Unqual, isInstanceOf, isIterable; 138 import std.experimental.allocator; 139 import std.functional: memoize; 140 import std.complex: Complex; 141 142 import nxt.predicates: isUntouched; 143 144 import core.memory: GC; 145 import core.exception; 146 147 import nxt.traits_ex; 148 import nxt.getopt_ex; 149 import nxt.digest_ex; 150 import nxt.algorithm_ex; 151 import nxt.codec; 152 import nxt.csunits; 153 alias Bytes64 = Bytes!ulong; 154 import arsd.terminal : Color; 155 import nxt.symbolic; 156 import nxt.static_bitarray; 157 import nxt.dbgio; 158 import nxt.tempfs; 159 import nxt.rational: Rational; 160 import nxt.ngram; 161 import nxt.notnull; 162 import nxt.pretty; 163 164 import nxt.geometry; 165 import nxt.random_ex; 166 import nxt.mathml; 167 import nxt.mangling; 168 import nxt.lingua; 169 import nxt.attributes; 170 import nxt.find_ex; 171 172 import nxt.elf; 173 import nxt.typedoc; 174 import lock_free.rwqueue; 175 176 /* NGram Aliases */ 177 /** Not very likely that we are interested in histograms 64-bit precision 178 * Bucket/Bin Counts so pick 32-bit for now. */ 179 alias RequestedBinType = uint; 180 enum NGramOrder = 3; 181 alias Bist = NGram!(ubyte, 1, ngram.Kind.binary, ngram.Storage.denseStatic, ngram.Symmetry.ordered, void, immutable(ubyte)[]); 182 alias XGram = NGram!(ubyte, NGramOrder, ngram.Kind.saturated, ngram.Storage.sparse, ngram.Symmetry.ordered, RequestedBinType, immutable(ubyte)[]); 183 184 /* Need for signal handling */ 185 import core.stdc.stdlib; 186 version(linux) import core.sys.posix.sys.stat; 187 version(linux) import core.sys.posix.signal; 188 //version(linux) import std.c.linux.linux; 189 190 /* TODO Set global state. 191 http://forum.dlang.org/thread/cu9fgg$28mr$1@digitaldaemon.com 192 */ 193 /** Exception Describing Process Signal. */ 194 195 shared uint ctrlC = 0; // Number of times Ctrl-C has been presed 196 class SignalCaughtException : Exception 197 { 198 int signo = int.max; 199 this(int signo, string file = __FILE__, size_t line = __LINE__ ) @safe { 200 this.signo = signo; 201 import std.conv: to; 202 super(`Signal number ` ~ to!string(signo) ~ ` at ` ~ file ~ `:` ~ to!string(line)); 203 } 204 } 205 206 void signalHandler(int signo) 207 { 208 import core.atomic: atomicOp; 209 if (signo == 2) 210 { 211 core.atomic.atomicOp!`+=`(ctrlC, 1); 212 } 213 // throw new SignalCaughtException(signo); 214 } 215 216 alias signalHandler_t = void function(int); 217 extern (C) signalHandler_t signal(int signal, signalHandler_t handler); 218 219 version(msgpack) 220 { 221 import msgpack; 222 } 223 version(cerealed) 224 { 225 /* import cerealed.cerealiser; */ 226 /* import cerealed.decerealiser; */ 227 /* import cerealed.cereal; */ 228 } 229 230 /** File Content Type Code. */ 231 enum FileContent 232 { 233 unknown, 234 binaryUnknown, 235 binary, 236 text, 237 textASCII, 238 text8Bit, 239 document, 240 spreadsheet, 241 database, 242 tagsDatabase, 243 image, 244 imageIcon, 245 audio, 246 sound = audio, 247 music = audio, 248 249 modemData, 250 imageModemFax1BPP, // One bit per pixel 251 voiceModem, 252 253 video, 254 movie, 255 media, 256 sourceCode, 257 scriptCode, 258 buildSystemCode, 259 byteCode, 260 machineCode, 261 versionControl, 262 numericalData, 263 archive, 264 compressed, 265 cache, 266 binaryCache, 267 firmware, 268 spellCheckWordList, 269 font, 270 performanceBenchmark, 271 fingerprint, 272 } 273 274 /** How File Kinds are detected. */ 275 enum FileKindDetection 276 { 277 equalsParentPathDirsAndName, // Parenting path file name must match 278 equalsName, // Only name must match 279 equalsNameAndContents, // Both name and contents must match 280 equalsNameOrContents, // Either name or contents must match 281 equalsContents, // Only contents must match 282 equalsWhatsGiven, // All information defined must match 283 } 284 285 /** Key Scan (Search) Context. */ 286 enum ScanContext 287 { 288 /* code, */ 289 /* comment, */ 290 /* string, */ 291 292 /* word, */ 293 /* symbol, */ 294 295 dirName, // Name of directory being scanned 296 dir = dirName, 297 298 fileName, // Name of file being scanned 299 name = fileName, 300 301 regularFileName, // Name of file being scanned 302 symlinkName, // Name of symbolic linke being scanned 303 304 fileContent, // Contents of file being scanned 305 content = fileContent, 306 307 /* modTime, */ 308 /* accessTime, */ 309 /* xattr, */ 310 /* size, */ 311 312 all, 313 standard = all, 314 } 315 316 enum DuplicatesContext 317 { 318 internal, // All duplicates must lie inside topDirs 319 external, // At least one duplicate lie inside 320 // topDirs. Others may lie outside 321 } 322 323 /** File Operation Type Code. */ 324 enum FOp 325 { 326 none, 327 328 checkSyntax, // Check syntax 329 lint = checkSyntax, // Check syntax alias 330 331 build, // Project-Wide Build 332 compile, // Compile 333 byteCompile, // Byte compile 334 run, // Run (Execute) 335 execute = run, 336 337 preprocess, // Preprocess C/C++/Objective-C (using cpp) 338 cpp = preprocess, 339 340 /* VCS Operations */ 341 vcStatus, 342 vcs = vcStatus, 343 344 deduplicate, // Deduplicate Files using hardlinks and Dirs using Symlink 345 } 346 347 /** Directory Operation Type Code. */ 348 enum DirOp 349 { 350 /* VCS Operations */ 351 vcStatus, 352 } 353 354 /** Shell Command. 355 */ 356 alias ShCmd = string; // Just simply a string for now. 357 358 /** Pair of Delimiters. 359 Used to desribe for example comment and string delimiter syntax. 360 */ 361 struct Delim 362 { 363 this(string intro) 364 { 365 this.intro = intro; 366 this.finish = finish.init; 367 } 368 this(string intro, string finish) 369 { 370 this.intro = intro; 371 this.finish = finish; 372 } 373 string intro; 374 string finish; // Defaults to end of line if not defined. 375 } 376 377 /* Comment Delimiters */ 378 enum defaultCommentDelims = [Delim(`#`)]; 379 enum cCommentDelims = [Delim(`/*`, `*/`), 380 Delim(`//`)]; 381 enum dCommentDelims = [Delim(`/+`, `+/`)] ~ cCommentDelims; 382 383 /* String Delimiters */ 384 enum defaultStringDelims = [Delim(`"`), 385 Delim(`'`), 386 Delim("`")]; 387 enum pythonStringDelims = [Delim(`"""`), 388 Delim(`"`), 389 Delim(`'`), 390 Delim("`")]; 391 392 /** File Kind. 393 */ 394 class FKind 395 { 396 this(T, MagicData, RefPattern)(string kindName_, 397 T baseNaming_, 398 const string[] exts_, 399 MagicData magicData, size_t magicOffset = 0, 400 RefPattern refPattern_ = RefPattern.init, 401 const string[] keywords_ = [], 402 403 Delim[] strings_ = [], 404 405 Delim[] comments_ = [], 406 407 FileContent content_ = FileContent.unknown, 408 FileKindDetection detection_ = FileKindDetection.equalsWhatsGiven, 409 Lang lang_ = Lang.unknown, 410 411 FKind superKind = null, 412 FKind[] subKinds = [], 413 string description = null, 414 string wikip = null) @trusted pure 415 { 416 this.kindName = kindName_; 417 418 // Basename 419 import std.traits: isArray; 420 import std.range: ElementType; 421 static if (is(T == string)) 422 { 423 this.baseNaming = lit(baseNaming_); 424 } 425 else static if (isArrayOf!(T, string)) 426 { 427 // TODO Move to a factory function strs(x) 428 auto alt_ = alt(); 429 foreach (ext; baseNaming_) // add each string as an alternative 430 { 431 alt_ ~= lit(ext); 432 } 433 this.baseNaming = alt_; 434 } 435 else static if (is(T == Patt)) 436 { 437 this.baseNaming = baseNaming_; 438 } 439 440 this.exts = exts_; 441 442 import std.traits: isAssignable; 443 static if (is(MagicData == ubyte[])) { this.magicData = lit(magicData) ; } 444 else static if (is(MagicData == string)) { this.magicData = lit(magicData.representation.dup); } 445 else static if (is(MagicData == void[])) { this.magicData = lit(cast(ubyte[])magicData); } 446 else static if (isAssignable!(Patt, MagicData)) { this.magicData = magicData; } 447 else static assert(0, `Cannot handle MagicData being type ` ~ MagicData.stringof); 448 449 this.magicOffset = magicOffset; 450 451 static if (is(RefPattern == ubyte[])) { this.refPattern = refPattern_; } 452 else static if (is(RefPattern == string)) { this.refPattern = refPattern_.representation.dup; } 453 else static if (is(RefPattern == void[])) { this.refPattern = (cast(ubyte[])refPattern_).dup; } 454 else static assert(0, `Cannot handle RefPattern being type ` ~ RefPattern.stringof); 455 456 this.keywords = keywords_; 457 458 this.strings = strings_; 459 this.comments = comments_; 460 461 this.content = content_; 462 463 if ((content_ == FileContent.sourceCode || 464 content_ == FileContent.scriptCode) && 465 detection_ == FileKindDetection.equalsWhatsGiven) 466 { 467 // relax matching of sourcecode to only need name until we have complete parsers 468 this.detection = FileKindDetection.equalsName; 469 } 470 else 471 { 472 this.detection = detection_; 473 } 474 this.lang = lang_; 475 476 this.superKind = superKind; 477 this.subKinds = subKinds; 478 this.description = description; 479 this.wikip = wikip.asURL; 480 } 481 482 override string toString() const @property @trusted pure nothrow { return kindName; } 483 484 /** Returns: Id Unique to matching behaviour of `this` FKind. If match 485 behaviour of `this` FKind changes returned id will change. 486 value is memoized. 487 */ 488 auto ref const(SHA1Digest) behaviorId() @property @safe /* pure nothrow */ 489 out(result) { assert(!result.empty); } 490 do 491 { 492 if (_behaviourDigest.empty) // if not yet defined 493 { 494 ubyte[] bytes; 495 const magicLit = cast(Lit)magicData; 496 if (magicLit) 497 { 498 bytes = msgpack.pack(exts, magicLit.bytes, magicOffset, refPattern, keywords, content, detection); 499 } 500 else 501 { 502 //dln(`warning: Handle magicData of type `, kindName); 503 } 504 _behaviourDigest = bytes.sha1Of; 505 } 506 return _behaviourDigest; 507 } 508 509 string kindName; // Kind Nick Name. 510 string description; // Kind Documenting Description. 511 AsURL!string wikip; // Wikipedia URL 512 513 FKind superKind; // Inherited pattern. For example ELF => ELF core file 514 FKind[] subKinds; // Inherited pattern. For example ELF => ELF core file 515 Patt baseNaming; // Pattern that matches typical file basenames of this Kind. May be null. 516 517 string[] parentPathDirs; // example [`lib`, `firmware`] for `/lib/firmware` or `../lib/firmware` 518 519 const string[] exts; // Typical Extensions. 520 Patt magicData; // Magic Data. 521 size_t magicOffset; // Magit Offset. 522 ubyte[] refPattern; // Reference pattern. 523 const FileContent content; 524 const FileKindDetection detection; 525 Lang lang; // Language if any 526 527 // Volatile Statistics: 528 private SHA1Digest _behaviourDigest; 529 RegFile[] hitFiles; // Files of this kind. 530 531 const string[] keywords; // Keywords 532 string[] builtins; // Builtin Functions 533 Op[] opers; // Language Opers 534 535 /* TODO Move this to CompLang class */ 536 Delim[] strings; // String syntax. 537 Delim[] comments; // Comment syntax. 538 539 bool machineGenerated; // True if this is a machine generated file. 540 541 Tuple!(FOp, ShCmd)[] operations; // Operation and Corresponding Shell Command 542 } 543 544 /** Set of File Kinds with Internal Hashing. */ 545 class FKinds 546 { 547 void opOpAssign(string op)(FKind kind) @safe /* pure */ if (op == `~`) 548 { 549 mixin(`this.byIndex ` ~ op ~ `= kind;`); 550 this.register(kind); 551 } 552 void opOpAssign(string op)(FKinds kinds) @safe /* pure */ if (op == `~`) 553 { 554 mixin(`this.byIndex ` ~ op ~ `= kinds.byIndex;`); 555 foreach (kind; kinds.byIndex) 556 this.register(kind); 557 } 558 559 FKinds register(FKind kind) @safe /* pure */ 560 { 561 this.byName[kind.kindName] = kind; 562 foreach (const ext; kind.exts) 563 { 564 this.byExt[ext] ~= kind; 565 } 566 this.byId[kind.behaviorId] = kind; 567 if (kind.magicOffset == 0 && // only if zero-offset for now 568 kind.magicData) 569 { 570 if (const magicLit = cast(Lit)kind.magicData) 571 { 572 this.byMagic[magicLit.bytes][magicLit.bytes.length] ~= kind; 573 _magicLengths ~= magicLit.bytes.length; // add it 574 } 575 } 576 return this; 577 } 578 579 /** Rehash Internal AAs. 580 TODO Change to @safe when https://github.com/D-Programming-Language/druntime/pull/942 has been merged 581 TODO Change to nothrow when uniq becomes nothrow. 582 */ 583 FKinds rehash() @trusted pure /* nothrow */ 584 { 585 import std.algorithm: sort; 586 _magicLengths = _magicLengths.uniq.array; // remove duplicates 587 _magicLengths.sort(); 588 this.byName.rehash; 589 this.byExt.rehash; 590 this.byMagic.rehash; 591 this.byId.rehash; 592 return this; 593 } 594 595 FKind[] byIndex; 596 private: 597 /* TODO These are "slaves" under byIndex and should not be modifiable outside 598 of this class but their FKind's can mutable. 599 */ 600 FKind[string] byName; // Index by unique name string 601 FKind[][string] byExt; // Index by possibly non-unique extension string 602 603 FKind[][size_t][immutable ubyte[]] byMagic; // length => zero-offset magic byte array to Binary FKind[] 604 size_t[] _magicLengths; // List of magic lengths to try as index in byMagic 605 606 FKind[SHA1Digest] byId; // Index Kinds by their behaviour 607 } 608 609 /** Match `kind` with full filename `full`. */ 610 bool matchFullName(in FKind kind, 611 const scope string full, size_t six = 0) @safe pure nothrow 612 { 613 return (kind.baseNaming && 614 !kind.baseNaming.match(full, six).empty); 615 } 616 617 /** Match `kind` with file extension `ext`. */ 618 bool matchExtension(in FKind kind, 619 const scope string ext) @safe pure nothrow 620 { 621 return !kind.exts.find(ext).empty; 622 } 623 624 bool matchName(in FKind kind, 625 const scope string full, size_t six = 0, 626 const scope string ext = null) @safe pure nothrow 627 { 628 return (kind.matchFullName(full) || 629 kind.matchExtension(ext)); 630 } 631 632 import std.range: hasSlicing; 633 634 /** Match (Magic) Contents of `kind` with `range`. 635 Returns: `true` iff match. */ 636 bool matchContents(Range)(in FKind kind, 637 in Range range, 638 in RegFile regFile) pure nothrow if (hasSlicing!Range) 639 { 640 const hit = kind.magicData.matchU(range, kind.magicOffset); 641 return (!hit.empty); 642 } 643 644 enum KindHit 645 { 646 none = 0, // No hit. 647 cached = 1, // Cached hit. 648 uncached = 2, // Uncached (fresh) hit. 649 } 650 651 Tuple!(KindHit, FKind, size_t) ofAnyKindIn(NotNull!RegFile regFile, 652 FKinds kinds, 653 bool collectTypeHits) 654 { 655 // using kindId 656 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 657 { 658 if (regFile._cstat.kindId in kinds.byId) 659 { 660 return tuple(KindHit.cached, 661 kinds.byId[regFile._cstat.kindId], 662 0UL); 663 } 664 } 665 666 // using extension 667 immutable ext = regFile.realExtension; // extension sans dot 668 if (!ext.empty && 669 ext in kinds.byExt) 670 { 671 foreach (kindIndex, kind; kinds.byExt[ext]) 672 { 673 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 674 if (hit) 675 { 676 return tuple(hit, kind, kindIndex); 677 } 678 } 679 } 680 681 // try all 682 foreach (kindIndex, kind; kinds.byIndex) // Iterate each kind 683 { 684 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 685 if (hit) 686 { 687 return tuple(hit, kind, kindIndex); 688 } 689 } 690 691 // no hit 692 return tuple(KindHit.none, 693 FKind.init, 694 0UL); 695 } 696 697 /** Returns: true if file with extension `ext` is of type `kind`. */ 698 KindHit ofKind(NotNull!RegFile regFile, 699 NotNull!FKind kind, 700 bool collectTypeHits, 701 FKinds allFKinds) /* nothrow */ @trusted 702 { 703 immutable hit = regFile.ofKind1(kind, 704 collectTypeHits, 705 allFKinds); 706 return hit; 707 } 708 709 KindHit ofKind(NotNull!RegFile regFile, 710 string kindName, 711 bool collectTypeHits, 712 FKinds allFKinds) /* nothrow */ @trusted 713 { 714 typeof(return) hit; 715 if (kindName in allFKinds.byName) 716 { 717 auto kind = assumeNotNull(allFKinds.byName[kindName]); 718 hit = regFile.ofKind(kind, 719 collectTypeHits, 720 allFKinds); 721 } 722 return hit; 723 } 724 725 /** Helper for ofKind. */ 726 KindHit ofKind1(NotNull!RegFile regFile, 727 NotNull!FKind kind, 728 bool collectTypeHits, 729 FKinds allFKinds) /* nothrow */ @trusted 730 { 731 // Try cached first 732 if (regFile._cstat.kindId.defined && 733 (regFile._cstat.kindId in allFKinds.byId) && // if kind is known 734 allFKinds.byId[regFile._cstat.kindId] is kind) // if cached kind equals 735 { 736 return KindHit.cached; 737 } 738 739 immutable ext = regFile.realExtension; 740 741 if (kind.superKind) 742 { 743 immutable baseHit = regFile.ofKind(enforceNotNull(kind.superKind), 744 collectTypeHits, 745 allFKinds); 746 if (!baseHit) 747 { 748 return baseHit; 749 } 750 } 751 752 bool hit = false; 753 final switch (kind.detection) 754 { 755 case FileKindDetection.equalsParentPathDirsAndName: 756 hit = (!regFile.parents.map!(a => a.name).find(kind.parentPathDirs).empty && // I love D :) 757 kind.matchName(regFile.name, 0, ext)); 758 break; 759 case FileKindDetection.equalsName: 760 hit = kind.matchName(regFile.name, 0, ext); 761 break; 762 case FileKindDetection.equalsNameAndContents: 763 hit = (kind.matchName(regFile.name, 0, ext) && 764 kind.matchContents(regFile.readOnlyContents, regFile)); 765 break; 766 case FileKindDetection.equalsNameOrContents: 767 hit = (kind.matchName(regFile.name, 0, ext) || 768 kind.matchContents(regFile.readOnlyContents, regFile)); 769 break; 770 case FileKindDetection.equalsContents: 771 hit = kind.matchContents(regFile.readOnlyContents, regFile); 772 break; 773 case FileKindDetection.equalsWhatsGiven: 774 // something must be defined 775 assert(is(kind.baseNaming) || 776 !kind.exts.empty || 777 !(kind.magicData is null)); 778 hit = ((kind.matchName(regFile.name, 0, ext) && 779 (kind.magicData is null || 780 kind.matchContents(regFile.readOnlyContents, regFile)))); 781 break; 782 } 783 if (hit) 784 { 785 if (collectTypeHits) 786 { 787 kind.hitFiles ~= regFile; 788 } 789 regFile._cstat.kindId = kind.behaviorId; // store reference in File 790 } 791 792 return hit ? KindHit.uncached : KindHit.none; 793 } 794 795 /** Directory Kind. 796 */ 797 class DirKind 798 { 799 this(string fn, 800 string kn) 801 { 802 this.fileName = fn; 803 this.kindName = kn; 804 } 805 806 version(msgpack) 807 { 808 this(Unpacker)(ref Unpacker unpacker) 809 { 810 fromMsgpack(msgpack.Unpacker(unpacker)); 811 } 812 void toMsgpack(Packer)(ref Packer packer) const 813 { 814 packer.beginArray(this.tupleof.length); 815 packer.pack(this.tupleof); 816 } 817 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 818 { 819 unpacker.beginArray; 820 unpacker.unpack(this.tupleof); 821 } 822 } 823 824 string fileName; 825 string kindName; 826 } 827 version(msgpack) unittest 828 { 829 auto k = tuple(``, ``); 830 auto data = pack(k); 831 Tuple!(string, string) k_; data.unpack(k_); 832 assert(k == k_); 833 } 834 835 import std.file: DirEntry, getLinkAttributes; 836 import std.datetime: SysTime, Interval; 837 838 /** File. 839 */ 840 class File 841 { 842 this(Dir parent) 843 { 844 this.parent = parent; 845 if (parent) { ++parent.gstats.noFiles; } 846 } 847 this(string name, Dir parent, Bytes64 size, 848 SysTime timeLastModified, 849 SysTime timeLastAccessed) 850 { 851 this.name = name; 852 this.parent = parent; 853 this.size = size; 854 this.timeLastModified = timeLastModified; 855 this.timeLastAccessed = timeLastAccessed; 856 if (parent) { ++parent.gstats.noFiles; } 857 } 858 859 // The Real Extension without leading dot. 860 string realExtension() @safe pure nothrow const { return name.extension.chompPrefix(`.`); } 861 alias ext = realExtension; // shorthand 862 863 string toTextual() const @property { return `Any File`; } 864 865 Bytes64 treeSize() @property @trusted /* @safe pure nothrow */ { return size; } 866 867 /** Content Digest of Tree under this Directory. */ 868 const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 869 { 870 return typeof(return).init; // default to undefined 871 } 872 873 Face!Color face() const @property @safe pure nothrow { return fileFace; } 874 875 /** Check if `this` File has been invalidated by `dent`. 876 Returns: `true` iff `this` was obseleted. 877 */ 878 bool checkObseleted(ref DirEntry dent) @trusted 879 { 880 // Git-Style Check for Changes (called Decider in SCons Build Tool) 881 bool flag = false; 882 if (dent.size != this.size || // size has changes 883 (dent.timeLastModified != this.timeLastModified) // if current modtime has changed or 884 ) 885 { 886 makeObselete; 887 this.timeLastModified = dent.timeLastModified; // use new time 888 this.size = dent.size; // use new time 889 flag = true; 890 } 891 this.timeLastAccessed = dent.timeLastAccessed; // use new time 892 return flag; 893 } 894 895 void makeObselete() @trusted {} 896 void makeUnObselete() @safe {} 897 898 /** Returns: Depth of Depth from File System root to this File. */ 899 int depth() @property @safe pure nothrow 900 { 901 return parent ? parent.depth + 1 : 0; // NOTE: this is fast because parent is memoized 902 } 903 /** NOTE: Currently not used. */ 904 int depthIterative() @property @safe pure 905 out (depth) { debug assert(depth == depth); } 906 do 907 { 908 typeof(return) depth = 0; 909 for (auto curr = dir; curr !is null && !curr.isRoot; depth++) 910 { 911 curr = curr.parent; 912 } 913 return depth; 914 } 915 916 /** Get Parenting Dirs starting from parent of `this` upto root. 917 Make this even more lazily evaluted. 918 */ 919 Dir[] parentsUpwards() 920 { 921 typeof(return) parents; // collected parents 922 for (auto curr = dir; (curr !is null && 923 !curr.isRoot); curr = curr.parent) 924 { 925 parents ~= curr; 926 } 927 return parents; 928 } 929 alias dirsDownward = parentsUpwards; 930 931 /** Get Parenting Dirs starting from file system root downto containing 932 directory of `this`. 933 */ 934 auto parents() 935 { 936 return parentsUpwards.retro; 937 } 938 alias dirs = parents; // SCons style alias 939 alias parentsDownward = parents; 940 941 bool underAnyDir(alias pred = `a`)() 942 { 943 import std.algorithm: any; 944 import std.functional: unaryFun; 945 return parents.any!(unaryFun!pred); 946 } 947 948 /** Returns: Path to `this` File. 949 TODO Reuse parents. 950 */ 951 string path() @property @trusted pure out (result) { 952 /* assert(result == pathRecursive); */ 953 } 954 do 955 { 956 if (!parent) { return dirSeparator; } 957 958 size_t pathLength = 1 + name.length; // returned path length 959 Dir[] parents; // collected parents 960 961 for (auto curr = parent; (curr !is null && 962 !curr.isRoot); curr = curr.parent) 963 { 964 pathLength += 1 + curr.name.length; 965 parents ~= curr; 966 } 967 968 // build path 969 auto thePath = new char[pathLength]; 970 size_t i = 0; // index to thePath 971 import std.range: retro; 972 foreach (currParent_; parents.retro) 973 { 974 immutable parentName = currParent_.name; 975 thePath[i++] = dirSeparator[0]; 976 thePath[i .. i + parentName.length] = parentName[]; 977 i += parentName.length; 978 } 979 thePath[i++] = dirSeparator[0]; 980 thePath[i .. i + name.length] = name[]; 981 982 return thePath; 983 } 984 985 /** Returns: Path to `this` File. 986 Recursive Heap-active implementation, slower than $(D path()). 987 */ 988 string pathRecursive() @property @trusted pure 989 { 990 if (parent) 991 { 992 static if (true) 993 { 994 import std.path: dirSeparator; 995 // NOTE: This is more efficient than buildPath(parent.path, 996 // name) because we can guarantee things about parent.path and 997 // name 998 immutable parentPath = parent.isRoot ? `` : parent.pathRecursive; 999 return parentPath ~ dirSeparator ~ name; 1000 } 1001 else 1002 { 1003 import std.path: buildPath; 1004 return buildPath(parent.pathRecursive, name); 1005 } 1006 } 1007 else 1008 { 1009 return `/`; // assume root folder with beginning slash 1010 } 1011 } 1012 1013 version(msgpack) 1014 { 1015 void toMsgpack(Packer)(ref Packer packer) const 1016 { 1017 writeln(`Entering File.toMsgpack `, name); 1018 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1019 } 1020 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1021 { 1022 long stdTime; 1023 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize 1024 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize 1025 } 1026 } 1027 1028 Dir parent; // Reference to parenting directory (or null if this is a root directory) 1029 alias dir = parent; // SCons style alias 1030 1031 string name; // Empty if root directory 1032 Bytes64 size; // Size of file in bytes 1033 SysTime timeLastModified; // Last modification time 1034 SysTime timeLastAccessed; // Last access time 1035 } 1036 1037 /** Maps Files to their tags. */ 1038 class FileTags 1039 { 1040 FileTags addTag(File file, const scope string tag) @safe pure /* nothrow */ 1041 { 1042 if (file in _tags) 1043 { 1044 if (_tags[file].find(tag).empty) 1045 { 1046 _tags[file] ~= tag; // add it 1047 } 1048 } 1049 else 1050 { 1051 _tags[file] = [tag]; 1052 } 1053 return this; 1054 } 1055 FileTags removeTag(File file, string tag) @safe pure 1056 { 1057 if (file in _tags) 1058 { 1059 import std.algorithm: remove; 1060 _tags[file] = _tags[file].remove!(a => a == tag); 1061 } 1062 return this; 1063 } 1064 auto ref getTags(File file) const @safe pure nothrow 1065 { 1066 return file in _tags ? _tags[file] : null; 1067 } 1068 private string[][File] _tags; // Tags for each registered file. 1069 } 1070 1071 version(linux) unittest 1072 { 1073 auto ftags = new FileTags(); 1074 1075 GStats gstats = new GStats(); 1076 1077 auto root = assumeNotNull(new Dir(cast(Dir)null, gstats)); 1078 auto etc = getDir(root, `/etc`); 1079 assert(etc.path == `/etc`); 1080 1081 auto dent = DirEntry(`/etc/passwd`); 1082 auto passwd = getFile(root, `/etc/passwd`, dent.isDir); 1083 assert(passwd.path == `/etc/passwd`); 1084 assert(passwd.parent == etc); 1085 assert(etc.sub(`passwd`) == passwd); 1086 1087 ftags.addTag(passwd, `Password`); 1088 ftags.addTag(passwd, `Password`); 1089 ftags.addTag(passwd, `Secret`); 1090 assert(ftags.getTags(passwd) == [`Password`, `Secret`]); 1091 ftags.removeTag(passwd, `Password`); 1092 assert(ftags._tags[passwd] == [`Secret`]); 1093 } 1094 1095 /** Symlink Target Status. 1096 */ 1097 enum SymlinkTargetStatus 1098 { 1099 unknown, 1100 present, 1101 broken, 1102 } 1103 1104 /** Symlink. 1105 */ 1106 class Symlink : File 1107 { 1108 this(NotNull!Dir parent) 1109 { 1110 super(parent); 1111 ++parent.gstats.noSymlinks; 1112 } 1113 this(ref DirEntry dent, NotNull!Dir parent) 1114 { 1115 Bytes64 sizeBytes; 1116 SysTime modified, accessed; 1117 bool ok = true; 1118 try 1119 { 1120 sizeBytes = dent.size.Bytes64; 1121 modified = dent.timeLastModified; 1122 accessed = dent.timeLastAccessed; 1123 } 1124 catch (Exception) 1125 { 1126 ok = false; 1127 } 1128 // const attrs = getLinkAttributes(dent.name); // attributes of target file 1129 // super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1130 super(dent.name.baseName, parent, sizeBytes, modified, accessed); 1131 if (ok) 1132 { 1133 this.retarget(dent); // trigger lazy load 1134 } 1135 ++parent.gstats.noSymlinks; 1136 } 1137 1138 override Face!Color face() const @property @safe pure nothrow 1139 { 1140 if (_targetStatus == SymlinkTargetStatus.broken) 1141 return symlinkBrokenFace; 1142 else 1143 return symlinkFace; 1144 } 1145 1146 override string toTextual() const @property { return `Symbolic Link`; } 1147 1148 string retarget(ref DirEntry dent) @trusted 1149 { 1150 import std.file: readLink; 1151 return _target = readLink(dent); 1152 } 1153 1154 /** Cached/Memoized/Lazy Lookup for target. */ 1155 string target() @property @trusted 1156 { 1157 if (!_target) // if target not yet read 1158 { 1159 auto targetDent = DirEntry(path); 1160 return retarget(targetDent); // read it 1161 } 1162 return _target; 1163 } 1164 /** Cached/Memoized/Lazy Lookup for target as absolute normalized path. */ 1165 string absoluteNormalizedTargetPath() @property @trusted 1166 { 1167 import std.path: absolutePath, buildNormalizedPath; 1168 return target.absolutePath(path.dirName).buildNormalizedPath; 1169 } 1170 1171 version(msgpack) 1172 { 1173 /** Construct from msgpack `unpacker`. */ 1174 this(Unpacker)(ref Unpacker unpacker) 1175 { 1176 fromMsgpack(msgpack.Unpacker(unpacker)); 1177 } 1178 void toMsgpack(Packer)(ref Packer packer) const 1179 { 1180 /* writeln(`Entering File.toMsgpack `, name); */ 1181 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1182 } 1183 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1184 { 1185 unpacker.unpack(name, size); 1186 long stdTime; 1187 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize 1188 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize 1189 } 1190 } 1191 1192 string _target; 1193 SymlinkTargetStatus _targetStatus = SymlinkTargetStatus.unknown; 1194 } 1195 1196 /** Special File (Character or Block Device). 1197 */ 1198 class SpecFile : File 1199 { 1200 this(NotNull!Dir parent) 1201 { 1202 super(parent); 1203 ++parent.gstats.noSpecialFiles; 1204 } 1205 this(ref DirEntry dent, NotNull!Dir parent) 1206 { 1207 super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1208 ++parent.gstats.noSpecialFiles; 1209 } 1210 1211 override Face!Color face() const @property @safe pure nothrow { return specialFileFace; } 1212 1213 override string toTextual() const @property { return `Special File`; } 1214 1215 version(msgpack) 1216 { 1217 /** Construct from msgpack `unpacker`. */ 1218 this(Unpacker)(ref Unpacker unpacker) 1219 { 1220 fromMsgpack(msgpack.Unpacker(unpacker)); 1221 } 1222 void toMsgpack(Packer)(ref Packer packer) const 1223 { 1224 /* writeln(`Entering File.toMsgpack `, name); */ 1225 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1226 } 1227 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1228 { 1229 unpacker.unpack(name, size); 1230 long stdTime; 1231 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize 1232 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize 1233 } 1234 } 1235 } 1236 1237 /** Bit (Content) Status. */ 1238 enum BitStatus 1239 { 1240 unknown, 1241 bits7, 1242 bits8, 1243 } 1244 1245 /** Regular File. 1246 */ 1247 class RegFile : File 1248 { 1249 this(NotNull!Dir parent) 1250 { 1251 super(parent); 1252 ++parent.gstats.noRegFiles; 1253 } 1254 this(ref DirEntry dent, NotNull!Dir parent) 1255 { 1256 this(dent.name.baseName, parent, dent.size.Bytes64, 1257 dent.timeLastModified, dent.timeLastAccessed); 1258 } 1259 this(string name, NotNull!Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed) 1260 { 1261 super(name, parent, size, timeLastModified, timeLastAccessed); 1262 ++parent.gstats.noRegFiles; 1263 } 1264 1265 ~this() @nogc 1266 { 1267 _cstat.deallocate(false); 1268 } 1269 1270 override string toTextual() const @property { return `Regular File`; } 1271 1272 /** Returns: Content Id of `this`. */ 1273 const(SHA1Digest) contentId() @property @trusted /* @safe pure nothrow */ 1274 { 1275 if (_cstat._contentId.isUntouched) 1276 { 1277 enum doSHA1 = true; 1278 calculateCStatInChunks(parent.gstats.filesByContentId, 1279 32*pageSize(), 1280 doSHA1); 1281 freeContents(); // TODO Call lazily only when open count is too large 1282 } 1283 return _cstat._contentId; 1284 } 1285 1286 /** Returns: Tree Content Id of `this`. */ 1287 override const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 1288 { 1289 return contentId; 1290 } 1291 1292 override Face!Color face() const @property @safe pure nothrow { return regFileFace; } 1293 1294 /** Returns: SHA-1 of `this` `File` Contents at `src`. */ 1295 const(SHA1Digest) contId(inout (ubyte[]) src, 1296 File[][SHA1Digest] filesByContentId) 1297 @property pure out(result) { assert(!result.empty); } // must have be defined 1298 do 1299 { 1300 if (_cstat._contentId.empty) // if not yet defined 1301 { 1302 _cstat._contentId = src.sha1Of; 1303 filesByContentId[_cstat._contentId] ~= this; 1304 } 1305 return _cstat._contentId; 1306 } 1307 1308 /** Returns: Cached/Memoized Binary Histogram of `this` `File`. */ 1309 auto ref bistogram8() @property @safe // ref needed here! 1310 { 1311 if (_cstat.bist.empty) 1312 { 1313 _cstat.bist.put(readOnlyContents); // memoized calculated 1314 } 1315 return _cstat.bist; 1316 } 1317 1318 /** Returns: Cached/Memoized XGram of `this` `File`. */ 1319 auto ref xgram() @property @safe // ref needed here! 1320 { 1321 if (_cstat.xgram.empty) 1322 { 1323 _cstat.xgram.put(readOnlyContents); // memoized calculated 1324 } 1325 return _cstat.xgram; 1326 } 1327 1328 /** Returns: Cached/Memoized XGram Deep Denseness of `this` `File`. */ 1329 auto ref xgramDeepDenseness() @property @safe 1330 { 1331 if (!_cstat._xgramDeepDenseness) 1332 { 1333 _cstat._xgramDeepDenseness = xgram.denseness(-1).numerator; 1334 } 1335 return Rational!ulong(_cstat._xgramDeepDenseness, 1336 _cstat.xgram.noBins); 1337 } 1338 1339 /** Returns: true if empty file (zero length). */ 1340 bool empty() @property const @safe { return size == 0; } 1341 1342 /** Process File in Cache Friendly Chunks. */ 1343 void calculateCStatInChunks(NotNull!File[][SHA1Digest] filesByContentId, 1344 size_t chunkSize = 32*pageSize(), 1345 bool doSHA1 = false, 1346 bool doBist = false, 1347 bool doBitStatus = false) @safe 1348 { 1349 if (_cstat._contentId.defined || empty) { doSHA1 = false; } 1350 if (!_cstat.bist.empty) { doBist = false; } 1351 if (_cstat.bitStatus != BitStatus.unknown) { doBitStatus = false; } 1352 1353 import std.digest.sha; 1354 SHA1 sha1; 1355 if (doSHA1) { sha1.start(); } 1356 1357 bool isASCII = true; 1358 1359 if (doSHA1 || doBist || doBitStatus) 1360 { 1361 import std.range: chunks; 1362 foreach (chunk; readOnlyContents.chunks(chunkSize)) 1363 { 1364 if (doSHA1) { sha1.put(chunk); } 1365 if (doBist) { _cstat.bist.put(chunk); } 1366 if (doBitStatus) 1367 { 1368 /* TODO This can be parallelized using 64-bit wording! 1369 * Write automatic parallelizing library for this? */ 1370 foreach (elt; chunk) 1371 { 1372 import nxt.bitop_ex: bt; 1373 isASCII = isASCII && !elt.bt(7); // ASCII has no topmost bit set 1374 } 1375 } 1376 } 1377 } 1378 1379 if (doBitStatus) 1380 { 1381 _cstat.bitStatus = isASCII ? BitStatus.bits7 : BitStatus.bits8; 1382 } 1383 1384 if (doSHA1) 1385 { 1386 _cstat._contentId = sha1.finish(); 1387 filesByContentId[_cstat._contentId] ~= cast(NotNull!File)assumeNotNull(this); // TODO Prettier way? 1388 } 1389 } 1390 1391 /** Clear/Reset Contents Statistics of `this` `File`. */ 1392 void clearCStat(File[][SHA1Digest] filesByContentId) @safe nothrow 1393 { 1394 // SHA1-digest 1395 if (_cstat._contentId in filesByContentId) 1396 { 1397 auto dups = filesByContentId[_cstat._contentId]; 1398 import std.algorithm: remove; 1399 immutable n = dups.length; 1400 dups = dups.remove!(a => a is this); 1401 assert(n == dups.length + 1); // assert that dups were not decreased by one); 1402 } 1403 } 1404 1405 override string toString() @property @trusted 1406 { 1407 // import std.traits: fullyQualifiedName; 1408 // return fullyQualifiedName!(typeof(this)) ~ `(` ~ buildPath(parent.name, name) ~ `)`; // TODO typenameof 1409 return (typeof(this)).stringof ~ `(` ~ this.path ~ `)`; // TODO typenameof 1410 } 1411 1412 version(msgpack) 1413 { 1414 /** Construct from msgpack `unpacker`. */ 1415 this(Unpacker)(ref Unpacker unpacker) 1416 { 1417 fromMsgpack(msgpack.Unpacker(unpacker)); 1418 } 1419 1420 /** Pack. */ 1421 void toMsgpack(Packer)(ref Packer packer) const { 1422 /* writeln(`Entering RegFile.toMsgpack `, name); */ 1423 1424 packer.pack(name, size, 1425 timeLastModified.stdTime, 1426 timeLastAccessed.stdTime); 1427 1428 // CStat: TODO Group 1429 packer.pack(_cstat.kindId); // FKind 1430 packer.pack(_cstat._contentId); // Digest 1431 1432 // Bist 1433 immutable bistFlag = !_cstat.bist.empty; 1434 packer.pack(bistFlag); 1435 if (bistFlag) { packer.pack(_cstat.bist); } 1436 1437 // XGram 1438 immutable xgramFlag = !_cstat.xgram.empty; 1439 packer.pack(xgramFlag); 1440 if (xgramFlag) 1441 { 1442 /* debug dln("packing xgram. empty:", _cstat.xgram.empty); */ 1443 packer.pack(_cstat.xgram, 1444 _cstat._xgramDeepDenseness); 1445 } 1446 1447 /* auto this_ = (cast(RegFile)this); // TODO Ugly! Is there another way? */ 1448 /* const tags = this_.parent.gstats.ftags.getTags(this_); */ 1449 /* immutable tagsFlag = !tags.empty; */ 1450 /* packer.pack(tagsFlag); */ 1451 /* debug dln(`Packing tags `, tags, ` of `, this_.path); */ 1452 /* if (tagsFlag) { packer.pack(tags); } */ 1453 } 1454 1455 /** Unpack. */ 1456 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) @trusted 1457 { 1458 unpacker.unpack(name, size); // Name, Size 1459 1460 // Time 1461 long stdTime; 1462 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize 1463 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize 1464 1465 // CStat: TODO Group 1466 unpacker.unpack(_cstat.kindId); // FKind 1467 if (_cstat.kindId.defined && 1468 _cstat.kindId !in parent.gstats.allFKinds.byId) 1469 { 1470 dln(`warning: kindId `, _cstat.kindId, ` not found for `, 1471 path, `, FKinds length `, parent.gstats.allFKinds.byIndex.length); 1472 _cstat.kindId.reset; // forget it 1473 } 1474 unpacker.unpack(_cstat._contentId); // Digest 1475 if (_cstat._contentId) 1476 { 1477 parent.gstats.filesByContentId[_cstat._contentId] ~= cast(NotNull!File)this; 1478 } 1479 1480 // Bist 1481 bool bistFlag; unpacker.unpack(bistFlag); 1482 if (bistFlag) 1483 { 1484 unpacker.unpack(_cstat.bist); 1485 } 1486 1487 // XGram 1488 bool xgramFlag; unpacker.unpack(xgramFlag); 1489 if (xgramFlag) 1490 { 1491 /* if (_cstat.xgram == null) { */ 1492 /* _cstat.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 1493 /* } */ 1494 /* unpacker.unpack(*_cstat.xgram); */ 1495 unpacker.unpack(_cstat.xgram, 1496 _cstat._xgramDeepDenseness); 1497 /* debug dln(`unpacked xgram. empty:`, _cstat.xgram.empty); */ 1498 } 1499 1500 // tags 1501 /* bool tagsFlag; unpacker.unpack(tagsFlag); */ 1502 /* if (tagsFlag) { */ 1503 /* string[] tags; */ 1504 /* unpacker.unpack(tags); */ 1505 /* } */ 1506 } 1507 1508 override void makeObselete() @trusted { _cstat.reset(); /* debug dln(`Reset CStat for `, path); */ } 1509 } 1510 1511 /** Returns: Read-Only Contents of `this` Regular File. */ 1512 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1513 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1514 immutable(ubyte[]) readOnlyContents(string file = __FILE__, int line = __LINE__)() @trusted 1515 { 1516 if (_mmfile is null) 1517 { 1518 if (size == 0) // munmap fails for empty files 1519 { 1520 static assert([] !is null); 1521 return []; // empty file 1522 } 1523 else 1524 { 1525 _mmfile = new MmFile(path, MmFile.Mode.read, 1526 mmfile_size, null, pageSize()); 1527 if (parent.gstats.showMMaps) 1528 { 1529 writeln(`Mapped `, path, ` of size `, size); 1530 } 1531 } 1532 } 1533 return cast(typeof(return))_mmfile[]; 1534 } 1535 1536 /** Returns: Read-Writable Contents of `this` Regular File. */ 1537 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1538 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1539 ubyte[] readWriteableContents() @trusted 1540 { 1541 if (!_mmfile) 1542 { 1543 _mmfile = new MmFile(path, MmFile.Mode.readWrite, 1544 mmfile_size, null, pageSize()); 1545 } 1546 return cast(typeof(return))_mmfile[]; 1547 } 1548 1549 /** If needed Free Allocated Contents of `this` Regular File. */ 1550 bool freeContents() 1551 { 1552 if (_mmfile) { 1553 delete _mmfile; _mmfile = null; return true; 1554 } 1555 else { return false; } 1556 } 1557 1558 import std.mmfile; 1559 private MmFile _mmfile = null; 1560 private CStat _cstat; // Statistics about the contents of this RegFile. 1561 } 1562 1563 /** Traits */ 1564 enum isFile(T) = (is(T == File) || is(T == NotNull!File)); 1565 enum isDir(T) = (is(T == Dir) || is(T == NotNull!Dir)); 1566 enum isSymlink(T) = (is(T == Symlink) || is(T == NotNull!Symlink)); 1567 enum isRegFile(T) = (is(T == RegFile) || is(T == NotNull!RegFile)); 1568 enum isSpecialFile(T) = (is(T == SpecFile) || is(T == NotNull!SpecFile)); 1569 enum isAnyFile(T) = (isFile!T || 1570 isDir!T || 1571 isSymlink!T || 1572 isRegFile!T || 1573 isSpecialFile!T); 1574 1575 /** Return true if T is a class representing File IO. */ 1576 enum isFileIO(T) = (isAnyFile!T || 1577 is(T == ioFile)); 1578 1579 /** Contents Statistics of a Regular File. */ 1580 struct CStat 1581 { 1582 void reset() @safe nothrow 1583 { 1584 kindId[] = 0; 1585 _contentId[] = 0; 1586 hitCount = 0; 1587 bist.reset(); 1588 xgram.reset(); 1589 _xgramDeepDenseness = 0; 1590 deallocate(); 1591 } 1592 1593 void deallocate(bool nullify = true) @trusted nothrow 1594 { 1595 kindId[] = 0; 1596 /* if (xgram != null) { */ 1597 /* import core.stdc.stdlib; */ 1598 /* free(xgram); */ 1599 /* if (nullify) { */ 1600 /* xgram = null; */ 1601 /* } */ 1602 /* } */ 1603 } 1604 1605 SHA1Digest kindId; // FKind Identifier/Fingerprint of this regular file. 1606 SHA1Digest _contentId; // Content Identifier/Fingerprint. 1607 1608 /** Boolean Single Bistogram over file contents. If 1609 binHist0[cast(ubyte)x] is set then this file contains byte x. Consumes 1610 32 bytes. */ 1611 Bist bist; // TODO Put in separate slice std.allocator. 1612 1613 /** Boolean Pair Bistogram (Digram) over file contents (higher-order statistics). 1614 If this RegFile contains a sequence of [byte0, bytes1], 1615 then bit at index byte0 + byte1 * 256 is set in xgram. 1616 */ 1617 XGram xgram; // TODO Use slice std.allocator 1618 private ulong _xgramDeepDenseness = 0; 1619 1620 uint64_t hitCount = 0; 1621 BitStatus bitStatus = BitStatus.unknown; 1622 } 1623 1624 import core.sys.posix.sys.types; 1625 1626 enum SymlinkFollowContext 1627 { 1628 none, // Follow no symlinks 1629 internal, // Follow only symlinks outside of scanned tree 1630 external, // Follow only symlinks inside of scanned tree 1631 all, // Follow all symlinks 1632 standard = external 1633 } 1634 1635 /** Global Scanner Statistics. */ 1636 class GStats 1637 { 1638 NotNull!File[][string] filesByName; // Potential File Name Duplicates 1639 NotNull!File[][ino_t] filesByInode; // Potential Link Duplicates 1640 NotNull!File[][SHA1Digest] filesByContentId; // File(s) (Duplicates) Indexed on Contents SHA1. 1641 NotNull!RegFile[][string] elfFilesBySymbol; // File(s) (Duplicates) Indexed on raw unmangled symbol. 1642 FileTags ftags; 1643 1644 Bytes64[NotNull!File] treeSizesByFile; // Tree sizes. 1645 size_t[NotNull!File] lineCountsByFile; // Line counts. 1646 1647 // VCS Directories 1648 DirKind[] vcDirKinds; 1649 DirKind[string] vcDirKindsMap; 1650 1651 // Skipped Directories 1652 DirKind[] skippedDirKinds; 1653 DirKind[string] skippedDirKindsMap; 1654 1655 FKinds txtFKinds = new FKinds; // Textual 1656 FKinds binFKinds = new FKinds; // Binary (Non-Textual) 1657 FKinds allFKinds = new FKinds; // All 1658 FKinds selFKinds = new FKinds; // User selected 1659 1660 void loadFileKinds() 1661 { 1662 txtFKinds ~= new FKind("SCons", ["SConstruct", "SConscript"], 1663 ["scons"], 1664 [], 0, [], [], 1665 defaultCommentDelims, 1666 pythonStringDelims, 1667 FileContent.buildSystemCode, FileKindDetection.equalsNameAndContents); // TOOD: Inherit Python 1668 1669 txtFKinds ~= new FKind("Makefile", ["GNUmakefile", "Makefile", "makefile"], 1670 ["mk", "mak", "makefile", "make", "gnumakefile"], [], 0, [], [], 1671 defaultCommentDelims, 1672 defaultStringDelims, 1673 FileContent.sourceCode, FileKindDetection.equalsName); 1674 txtFKinds ~= new FKind("Automakefile", ["Makefile.am", "makefile.am"], 1675 ["am"], [], 0, [], [], 1676 defaultCommentDelims, 1677 defaultStringDelims, 1678 FileContent.sourceCode); 1679 txtFKinds ~= new FKind("Autoconffile", ["configure.ac", "configure.in"], 1680 [], [], 0, [], [], 1681 defaultCommentDelims, 1682 defaultStringDelims, 1683 FileContent.sourceCode); 1684 txtFKinds ~= new FKind("Doxygen", ["Doxyfile"], 1685 ["doxygen"], [], 0, [], [], 1686 defaultCommentDelims, 1687 defaultStringDelims, 1688 FileContent.sourceCode); 1689 1690 txtFKinds ~= new FKind("Rake", ["Rakefile"],// TODO inherit Ruby 1691 ["mk", "makefile", "make", "gnumakefile"], [], 0, [], [], 1692 [Delim("#"), Delim("=begin", "=end")], 1693 defaultStringDelims, 1694 FileContent.sourceCode, FileKindDetection.equalsName); 1695 1696 txtFKinds ~= new FKind("HTML", [], ["htm", "html", "shtml", "xhtml"], [], 0, [], [], 1697 [Delim("<!--", "-->")], 1698 defaultStringDelims, 1699 FileContent.text, FileKindDetection.equalsContents); // markup text 1700 txtFKinds ~= new FKind("XML", [], ["xml", "dtd", "xsl", "xslt", "ent", ], [], 0, "<?xml", [], 1701 [Delim("<!--", "-->")], 1702 defaultStringDelims, 1703 FileContent.text, FileKindDetection.equalsContents); // TODO markup text 1704 txtFKinds ~= new FKind("YAML", [], ["yaml", "yml"], [], 0, [], [], 1705 defaultCommentDelims, 1706 defaultStringDelims, 1707 FileContent.text); // TODO markup text 1708 txtFKinds ~= new FKind("CSS", [], ["css"], [], 0, [], [], 1709 [Delim("/*", "*/")], 1710 defaultStringDelims, 1711 FileContent.text, FileKindDetection.equalsContents); 1712 1713 txtFKinds ~= new FKind("Audacity Project", [], ["aup"], [], 0, "<?xml", [], 1714 defaultCommentDelims, 1715 defaultStringDelims, 1716 FileContent.text, FileKindDetection.equalsNameAndContents); 1717 1718 txtFKinds ~= new FKind("Comma-separated values", [], ["csv"], [], 0, [], [], // TODO decribe with symbolic 1719 defaultCommentDelims, 1720 defaultStringDelims, 1721 FileContent.text, FileKindDetection.equalsNameAndContents); 1722 1723 txtFKinds ~= new FKind("Tab-separated values", [], ["tsv"], [], 0, [], [], // TODO describe with symbolic 1724 defaultCommentDelims, 1725 defaultStringDelims, 1726 FileContent.text, FileKindDetection.equalsNameAndContents); 1727 1728 static immutable keywordsC = [ 1729 "auto", "const", "double", "float", "int", "short", "struct", 1730 "unsigned", "break", "continue", "else", "for", "long", "signed", 1731 "switch", "void", "case", "default", "enum", "goto", "register", 1732 "sizeof", "typedef", "volatile", "char", "do", "extern", "if", 1733 "return", "static", "union", "while", 1734 ]; 1735 1736 /* See_Also: https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B */ 1737 auto opersCBasic = [ 1738 // Arithmetic 1739 Op("+", OpArity.binary, OpAssoc.LR, 6, "Add"), 1740 Op("-", OpArity.binary, OpAssoc.LR, 6, "Subtract"), 1741 Op("*", OpArity.binary, OpAssoc.LR, 5, "Multiply"), 1742 Op("/", OpArity.binary, OpAssoc.LR, 5, "Divide"), 1743 Op("%", OpArity.binary, OpAssoc.LR, 5, "Remainder/Moduls"), 1744 1745 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary plus"), 1746 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary minus"), 1747 1748 Op("++", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix increment"), 1749 Op("--", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix decrement"), 1750 1751 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix increment"), 1752 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix decrement"), 1753 1754 // Assignment Arithmetic (binary) 1755 Op("=", OpArity.binary, OpAssoc.RL, 16, "Assign"), 1756 Op("+=", OpArity.binary, OpAssoc.RL, 16, "Assignment by sum"), 1757 Op("-=", OpArity.binary, OpAssoc.RL, 16, "Assignment by difference"), 1758 Op("*=", OpArity.binary, OpAssoc.RL, 16, "Assignment by product"), 1759 Op("/=", OpArity.binary, OpAssoc.RL, 16, "Assignment by quotient"), 1760 Op("%=", OpArity.binary, OpAssoc.RL, 16, "Assignment by remainder"), 1761 1762 Op("&=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise AND"), 1763 Op("|=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise OR"), 1764 1765 Op("^=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise XOR"), 1766 Op("<<=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise left shift"), 1767 Op(">>=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise right shift"), 1768 1769 Op("==", OpArity.binary, OpAssoc.LR, 9, "Equal to"), 1770 Op("!=", OpArity.binary, OpAssoc.LR, 9, "Not equal to"), 1771 1772 Op("<", OpArity.binary, OpAssoc.LR, 8, "Less than"), 1773 Op(">", OpArity.binary, OpAssoc.LR, 8, "Greater than"), 1774 Op("<=", OpArity.binary, OpAssoc.LR, 8, "Less than or equal to"), 1775 Op(">=", OpArity.binary, OpAssoc.LR, 8, "Greater than or equal to"), 1776 1777 Op("&&", OpArity.binary, OpAssoc.LR, 13, "Logical AND"), // TODO Convert to math in smallcaps AND 1778 Op("||", OpArity.binary, OpAssoc.LR, 14, "Logical OR"), // TODO Convert to math in smallcaps OR 1779 1780 Op("!", OpArity.unaryPrefix, OpAssoc.LR, 3, "Logical NOT"), // TODO Convert to math in smallcaps NOT 1781 1782 Op("&", OpArity.binary, OpAssoc.LR, 10, "Bitwise AND"), 1783 Op("^", OpArity.binary, OpAssoc.LR, 11, "Bitwise XOR (exclusive or)"), 1784 Op("|", OpArity.binary, OpAssoc.LR, 12, "Bitwise OR"), 1785 1786 Op("<<", OpArity.binary, OpAssoc.LR, 7, "Bitwise left shift"), 1787 Op(">>", OpArity.binary, OpAssoc.LR, 7, "Bitwise right shift"), 1788 1789 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 3, "Bitwise NOT (One's Complement)"), 1790 Op(",", OpArity.binary, OpAssoc.LR, 18, "Comma"), 1791 Op("sizeof", OpArity.unaryPrefix, OpAssoc.LR, 3, "Size-of"), 1792 1793 Op("->", OpArity.binary, OpAssoc.LR, 2, "Element selection through pointer"), 1794 Op(".", OpArity.binary, OpAssoc.LR, 2, "Element selection by reference"), 1795 1796 ]; 1797 1798 /* See_Also: https://en.wikipedia.org/wiki/Iso646.h */ 1799 auto opersC_ISO646 = [ 1800 OpAlias("and", "&&"), 1801 OpAlias("or", "||"), 1802 OpAlias("and_eq", "&="), 1803 1804 OpAlias("bitand", "&"), 1805 OpAlias("bitor", "|"), 1806 1807 OpAlias("compl", "~"), 1808 OpAlias("not", "!"), 1809 OpAlias("not_eq", "!="), 1810 OpAlias("or_eq", "|="), 1811 OpAlias("xor", "^"), 1812 OpAlias("xor_eq", "^="), 1813 ]; 1814 1815 auto opersC = opersCBasic /* ~ opersC_ISO646 */; 1816 1817 auto kindC = new FKind("C", [], ["c", "h"], [], 0, [], 1818 keywordsC, 1819 cCommentDelims, 1820 defaultStringDelims, 1821 FileContent.sourceCode, 1822 FileKindDetection.equalsWhatsGiven, 1823 Lang.c); 1824 txtFKinds ~= kindC; 1825 kindC.operations ~= tuple(FOp.checkSyntax, `gcc -x c -fsyntax-only -c`); 1826 kindC.operations ~= tuple(FOp.checkSyntax, `clang -x c -fsyntax-only -c`); 1827 kindC.operations ~= tuple(FOp.preprocess, `cpp`); 1828 kindC.opers = opersC; 1829 1830 static immutable keywordsCxx = (keywordsC ~ ["asm", "dynamic_cast", "namespace", "reinterpret_cast", "try", 1831 "bool", "explicit", "new", "static_cast", "typeid", 1832 "catch", "false", "operator", "template", "typename", 1833 "class", "friend", "private", "this", "using", 1834 "const_cast", "inline", "public", "throw", "virtual", 1835 "delete", "mutable", "protected", "true", "wchar_t", 1836 // The following are not essential when 1837 // the standard ASCII character set is 1838 // being used, but they have been added 1839 // to provide more readable alternatives 1840 // for some of the C++ operators, and 1841 // also to facilitate programming with 1842 // character sets that lack characters 1843 // needed by C++. 1844 "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq", 1845 "and_eq", "bitor", "not", "or", "xor", ]).uniq.array; 1846 1847 auto opersCxx = opersC ~ [ 1848 Op("->*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1849 Op(".*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1850 Op("::", OpArity.binary, OpAssoc.none, 1, "Scope resolution"), 1851 Op("typeid", OpArity.unaryPrefix, OpAssoc.LR, 2, "Run-time type information (RTTI))"), 1852 //Op("alignof", OpArity.unaryPrefix, OpAssoc.LR, _, _), 1853 Op("new", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory allocation"), 1854 Op("delete", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1855 Op("delete[]", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1856 /* Op("noexcept", OpArity.unaryPrefix, OpAssoc.none, _, _), */ 1857 1858 Op("dynamic_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1859 Op("reinterpret_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1860 Op("static_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1861 Op("const_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1862 1863 Op("throw", OpArity.unaryPrefix, OpAssoc.LR, 17, "Throw operator"), 1864 /* Op("catch", OpArity.unaryPrefix, OpAssoc.LR, _, _) */ 1865 ]; 1866 1867 static immutable extsCxx = ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"]; 1868 auto kindCxx = new FKind("C++", [], extsCxx, [], 0, [], 1869 keywordsCxx, 1870 cCommentDelims, 1871 defaultStringDelims, 1872 FileContent.sourceCode, 1873 FileKindDetection.equalsWhatsGiven, 1874 Lang.cxx); 1875 kindCxx.operations ~= tuple(FOp.checkSyntax, `gcc -x c++ -fsyntax-only -c`); 1876 kindCxx.operations ~= tuple(FOp.checkSyntax, `clang -x c++ -fsyntax-only -c`); 1877 kindCxx.operations ~= tuple(FOp.preprocess, `cpp`); 1878 kindCxx.opers = opersCxx; 1879 txtFKinds ~= kindCxx; 1880 static immutable keywordsCxx11 = keywordsCxx ~ ["alignas", "alignof", 1881 "char16_t", "char32_t", 1882 "constexpr", 1883 "decltype", 1884 "override", "final", 1885 "noexcept", "nullptr", 1886 "auto", 1887 "thread_local", 1888 "static_assert", ]; 1889 // TODO Define as subkind 1890 /* txtFKinds ~= new FKind("C++11", [], ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"], [], 0, [], */ 1891 /* keywordsCxx11, */ 1892 /* [Delim("/\*", "*\/"), */ 1893 /* Delim("//")], */ 1894 /* defaultStringDelims, */ 1895 /* FileContent.sourceCode, */ 1896 /* FileKindDetection.equalsWhatsGiven); */ 1897 1898 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1899 static immutable opersCxxMicrosoft = ["__alignof"]; 1900 1901 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1902 static immutable keywordsCxxMicrosoft = (keywordsCxx ~ [/* __abstract 2 */ 1903 "__asm", 1904 "__assume", 1905 "__based", 1906 /* __box 2 */ 1907 "__cdecl", 1908 "__declspec", 1909 /* __delegate 2 */ 1910 "__event", 1911 "__except", 1912 "__fastcall", 1913 "__finally", 1914 "__forceinline", 1915 /* __gc 2 */ 1916 /* __hook 3 */ 1917 "__identifier", 1918 "__if_exists", 1919 "__if_not_exists", 1920 "__inline", 1921 "__int16", 1922 "__int32", 1923 "__int64", 1924 "__int8", 1925 "__interface", 1926 "__leave", 1927 "__m128", 1928 "__m128d", 1929 "__m128i", 1930 "__m64", 1931 "__multiple_inheritance", 1932 /* __nogc 2 */ 1933 "__noop", 1934 /* __pin 2 */ 1935 /* __property 2 */ 1936 "__raise", 1937 /* __sealed 2 */ 1938 "__single_inheritance", 1939 "__stdcall", 1940 "__super", 1941 "__thiscall", 1942 "__try", 1943 "__except", 1944 "__finally", 1945 /* __try_cast 2 */ 1946 "__unaligned", 1947 /* __unhook 3 */ 1948 "__uuidof", 1949 /* __value 2 */ 1950 "__virtual_inheritance", 1951 "__w64", 1952 "__wchar_t", 1953 "wchar_t", 1954 "abstract", 1955 "array", 1956 "auto", 1957 "bool", 1958 "break", 1959 "case", 1960 "catch", 1961 "char", 1962 "class", 1963 "const", 1964 "const_cast", 1965 "continue", 1966 "decltype", 1967 "default", 1968 "delegate", 1969 "delete", 1970 /* deprecated 1 */ 1971 /* dllexport 1 */ 1972 /* dllimport 1 */ 1973 "do", 1974 "double", 1975 "dynamic_cast", 1976 "else", 1977 "enum", 1978 "enum class" 1979 "enum struct" 1980 "event", 1981 "explicit", 1982 "extern", 1983 "false", 1984 "finally", 1985 "float", 1986 "for", 1987 "for each", 1988 "in", 1989 "friend", 1990 "friend_as", 1991 "gcnew", 1992 "generic", 1993 "goto", 1994 "if", 1995 "initonly", 1996 "inline", 1997 "int", 1998 "interface class", 1999 "interface struct", 2000 "interior_ptr", 2001 "literal", 2002 "long", 2003 "mutable", 2004 /* naked 1 */ 2005 "namespace", 2006 "new", 2007 "new", 2008 /* noinline 1 */ 2009 /* noreturn 1 */ 2010 /* nothrow 1 */ 2011 /* novtable 1 */ 2012 "nullptr", 2013 "operator", 2014 "private", 2015 "property", 2016 /* property 1 */ 2017 "protected", 2018 "public", 2019 "ref class", 2020 "ref struct", 2021 "register", 2022 "reinterpret_cast", 2023 "return", 2024 "safecast", 2025 "sealed", 2026 /* selectany 1 */ 2027 "short", 2028 "signed", 2029 "sizeof", 2030 "static", 2031 "static_assert", 2032 "static_cast", 2033 "struct", 2034 "switch", 2035 "template", 2036 "this", 2037 /* thread 1 */ 2038 "throw", 2039 "true", 2040 "try", 2041 "typedef", 2042 "typeid", 2043 "typeid", 2044 "typename", 2045 "union", 2046 "unsigned", 2047 "using" /* declaration */, 2048 "using" /* directive */, 2049 /* uuid 1 */ 2050 "value class", 2051 "value struct", 2052 "virtual", 2053 "void", 2054 "volatile", 2055 "while"]).uniq.array; 2056 2057 static immutable xattrCxxMicrosoft = []; 2058 2059 static immutable keywordsNewObjectiveC = ["id", 2060 "in", 2061 "out", // Returned by reference 2062 "inout", // Argument is used both to provide information and to get information back 2063 "bycopy", 2064 "byref", "oneway", "self", 2065 "super", "@interface", "@end", 2066 "@implementation", "@end", 2067 "@interface", "@end", 2068 "@implementation", "@end", 2069 "@protoco", "@end", "@class" ]; 2070 2071 static immutable keywordsObjectiveC = keywordsC ~ keywordsNewObjectiveC; 2072 txtFKinds ~= new FKind("Objective-C", [], ["m", "h"], [], 0, [], 2073 keywordsObjectiveC, 2074 cCommentDelims, 2075 defaultStringDelims, 2076 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven, 2077 Lang.objectiveC); 2078 2079 static immutable keywordsObjectiveCxx = keywordsCxx ~ keywordsNewObjectiveC; 2080 txtFKinds ~= new FKind("Objective-C++", [], ["mm", "h"], [], 0, [], 2081 keywordsObjectiveCxx, 2082 defaultCommentDelims, 2083 defaultStringDelims, 2084 FileContent.sourceCode, 2085 FileKindDetection.equalsWhatsGiven, 2086 Lang.objectiveCxx); 2087 2088 static immutable keywordsSwift = ["break", "class", "continue", "default", "do", "else", "for", "func", "if", "import", 2089 "in", "let", "return", "self", "struct", "super", "switch", "unowned", "var", "weak", "while", 2090 "mutating", "extension"]; 2091 auto opersOverflowSwift = opersC ~ [Op("&+"), Op("&-"), Op("&*"), Op("&/"), Op("&%")]; 2092 auto builtinsSwift = ["print", "println"]; 2093 auto kindSwift = new FKind("Swift", [], ["swift"], [], 0, [], 2094 keywordsSwift, 2095 cCommentDelims, 2096 defaultStringDelims, 2097 FileContent.sourceCode, 2098 FileKindDetection.equalsWhatsGiven, 2099 Lang.swift); 2100 kindSwift.builtins = builtinsSwift; 2101 kindSwift.opers = opersOverflowSwift; 2102 txtFKinds ~= kindSwift; 2103 2104 static immutable keywordsCSharp = ["if"]; // TODO Add keywords 2105 txtFKinds ~= new FKind("C#", [], ["cs"], [], 0, [], keywordsCSharp, 2106 cCommentDelims, 2107 defaultStringDelims, 2108 FileContent.sourceCode, 2109 FileKindDetection.equalsWhatsGiven, 2110 Lang.cSharp); 2111 2112 static immutable keywordsOCaml = ["and", "as", "assert", "begin", "class", 2113 "constraint", "do", "done", "downto", "else", 2114 "end", "exception", "external", "false", "for", 2115 "fun", "function", "functor", "if", "in", 2116 "include", "inherit", "inherit!", "initializer" 2117 "lazy", "let", "match", "method", "method!", 2118 "module", "mutable", "new", "object", "of", 2119 "open", "or", 2120 "private", "rec", "sig", "struct", "then", "to", 2121 "true", "try", "type", 2122 "val", "val!", "virtual", 2123 "when", "while", "with"]; 2124 txtFKinds ~= new FKind("OCaml", [], ["ocaml"], [], 0, [], keywordsOCaml, 2125 [Delim("(*", "*)")], 2126 defaultStringDelims, 2127 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2128 2129 txtFKinds ~= new FKind("Parrot", [], ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg", ], [], 0, [], keywordsOCaml, 2130 [Delim("#"), 2131 Delim("^=", // TODO Needs beginning of line instead of ^ 2132 "=cut")], 2133 defaultStringDelims, 2134 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2135 2136 static immutable keywordsProlog = []; 2137 txtFKinds ~= new FKind("Prolog", [], ["pl", "pro", "P"], [], 0, [], keywordsProlog, 2138 [], 2139 [], 2140 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2141 2142 auto opersD = [ 2143 // Arithmetic 2144 Op("+", OpArity.binary, OpAssoc.LR, 10*2, "Add"), 2145 Op("-", OpArity.binary, OpAssoc.LR, 10*2, "Subtract"), 2146 Op("~", OpArity.binary, OpAssoc.LR, 10*2, "Concatenate"), 2147 2148 Op("*", OpArity.binary, OpAssoc.LR, 11*2, "Multiply"), 2149 Op("/", OpArity.binary, OpAssoc.LR, 11*2, "Divide"), 2150 Op("%", OpArity.binary, OpAssoc.LR, 11*2, "Remainder/Moduls"), 2151 2152 Op("++", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix increment"), 2153 Op("--", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix decrement"), 2154 2155 Op("^^", OpArity.binary, OpAssoc.RL, 13*2, "Power"), 2156 2157 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix increment"), 2158 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix decrement"), 2159 Op("&", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Address off"), 2160 Op("*", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Pointer Dereference"), 2161 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Plus"), 2162 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Minus"), 2163 Op("!", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Logical NOT"), // TODO Convert to math in smallcaps NOT 2164 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 12*2, "Bitwise NOT (One's Complement)"), 2165 2166 // Bit shift 2167 Op("<<", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise left shift"), 2168 Op(">>", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise right shift"), 2169 2170 // Comparison 2171 Op("==", OpArity.binary, OpAssoc.LR, 6*2, "Equal to"), 2172 Op("!=", OpArity.binary, OpAssoc.LR, 6*2, "Not equal to"), 2173 Op("<", OpArity.binary, OpAssoc.LR, 6*2, "Less than"), 2174 Op(">", OpArity.binary, OpAssoc.LR, 6*2, "Greater than"), 2175 Op("<=", OpArity.binary, OpAssoc.LR, 6*2, "Less than or equal to"), 2176 Op(">=", OpArity.binary, OpAssoc.LR, 6*2, "Greater than or equal to"), 2177 Op("in", OpArity.binary, OpAssoc.LR, 6*2, "In"), 2178 Op("!in", OpArity.binary, OpAssoc.LR, 6*2, "Not In"), 2179 Op("is", OpArity.binary, OpAssoc.LR, 6*2, "Is"), 2180 Op("!is", OpArity.binary, OpAssoc.LR, 6*2, "Not Is"), 2181 2182 Op("&", OpArity.binary, OpAssoc.LR, 8*2, "Bitwise AND"), 2183 Op("^", OpArity.binary, OpAssoc.LR, 7*2, "Bitwise XOR (exclusive or)"), 2184 Op("|", OpArity.binary, OpAssoc.LR, 6*2, "Bitwise OR"), 2185 2186 Op("&&", OpArity.binary, OpAssoc.LR, 5*2, "Logical AND"), // TODO Convert to math in smallcaps AND 2187 Op("||", OpArity.binary, OpAssoc.LR, 4*2, "Logical OR"), // TODO Convert to math in smallcaps OR 2188 2189 // Assignment Arithmetic (binary) 2190 Op("=", OpArity.binary, OpAssoc.RL, 2*2, "Assign"), 2191 Op("+=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by sum"), 2192 Op("-=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by difference"), 2193 Op("*=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by product"), 2194 Op("/=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by quotient"), 2195 Op("%=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by remainder"), 2196 Op("&=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise AND"), 2197 Op("|=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise OR"), 2198 Op("^=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise XOR"), 2199 Op("<<=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise left shift"), 2200 Op(">>=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise right shift"), 2201 2202 Op(",", OpArity.binary, OpAssoc.LR, 1*2, "Comma"), 2203 Op("..", OpArity.binary, OpAssoc.LR, cast(int)(0*2), "Range separator"), 2204 ]; 2205 2206 enum interpretersForD = ["rdmd", 2207 "gdmd"]; 2208 auto magicForD = shebangLine(alt(lit("rdmd"), 2209 lit("gdmd"))); 2210 2211 static immutable keywordsD = [`@property`, `@safe`, `@trusted`, `@system`, `@disable`, `abstract`, `alias`, `align`, `asm`, `assert`, `auto`, `body`, `bool`, `break`, `byte`, `case`, `cast`, `catch`, 2212 `cdouble`, `cent`, `cfloat`, `char`, `class`, `const`, `continue`, `creal`, `dchar`, `debug`, `default`, `delegate`, `delete`, `deprecated`, 2213 `do`, `double`, `else`, `enum`, `export`, `extern`, `false`, `final`, `finally`, `float`, `for`, `foreach`, `foreach_reverse`, 2214 `function`, `goto`, `idouble`, `if`, `ifloat`, `immutable`, `import`, `in`, `inout`, `int`, `interface`, `invariant`, `ireal`, 2215 `is`, `lazy`, `long`, `macro`, `mixin`, `module`, `new`, `nothrow`, `null`, `out`, `override`, `package`, `pragma`, `private`, 2216 `protected`, `public`, `pure`, `real`, `ref`, `return`, `scope`, `shared`, `short`, `static`, `struct`, `super`, `switch`, 2217 `synchronized`, `template`, `this`, `throw`, `true`, `try`, `typedef`, `typeid`, `typeof`, `ubyte`, `ucent`, `uint`, `ulong`, 2218 `union`, `unittest`, `ushort`, `version`, `void`, `volatile`, `wchar`, `while`, `with`, `__gshared`, 2219 `__thread`, `__traits`, 2220 `string`, `wstring`, `dstring`, `size_t`, `hash_t`, `ptrdiff_t`, `equals_`]; // aliases 2221 2222 static immutable builtinsD = [`toString`, `toHash`, `opCmp`, `opEquals`, 2223 `opUnary`, `opBinary`, `opApply`, `opCall`, `opAssign`, `opIndexAssign`, `opSliceAssign`, `opOpAssign`, 2224 `opIndex`, `opSlice`, `opDispatch`, 2225 `toString`, `toHash`, `opCmp`, `opEquals`, `Monitor`, `factory`, `classinfo`, `vtbl`, `offset`, `getHash`, `equals`, `compare`, `tsize`, `swap`, `next`, `init`, `flags`, `offTi`, `destroy`, `postblit`, `toString`, `toHash`, 2226 `factory`, `classinfo`, `Throwable`, `Exception`, `Error`, `capacity`, `reserve`, `assumeSafeAppend`, `clear`, 2227 `ModuleInfo`, `ClassInfo`, `MemberInfo`, `TypeInfo`]; 2228 2229 static immutable propertiesD = [`sizeof`, `stringof`, `mangleof`, `nan`, `init`, `alignof`, `max`, `min`, `infinity`, `epsilon`, `mant_dig`, ``, 2230 `max_10_exp`, `max_exp`, `min_10_exp`, `min_exp`, `min_normal`, `re`, `im`]; 2231 2232 static immutable specialsD = [`__FILE__`, `__LINE__`, `__DATE__`, `__EOF__`, `__TIME__`, `__TIMESTAMP__`, `__VENDOR__`, `__VERSION__`, `#line`]; 2233 2234 auto kindDInterface = new FKind("D Interface", [], ["di"], 2235 magicForD, 0, 2236 [], 2237 keywordsD, 2238 dCommentDelims, 2239 defaultStringDelims, 2240 FileContent.sourceCode, 2241 FileKindDetection.equalsNameOrContents, 2242 Lang.d); 2243 kindDInterface.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2244 kindDInterface.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths 2245 txtFKinds ~= kindDInterface; 2246 2247 auto kindDDoc = new FKind("D Documentation", [], ["dd"], 2248 magicForD, 0, 2249 [], 2250 keywordsD, 2251 dCommentDelims, 2252 defaultStringDelims, 2253 FileContent.sourceCode, 2254 FileKindDetection.equalsNameOrContents); 2255 txtFKinds ~= kindDDoc; 2256 2257 auto kindD = new FKind("D", [], ["d", "di"], 2258 magicForD, 0, 2259 [], 2260 keywordsD, 2261 dCommentDelims, 2262 defaultStringDelims, 2263 FileContent.sourceCode, 2264 FileKindDetection.equalsNameOrContents, 2265 Lang.d); 2266 kindD.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2267 kindD.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths 2268 txtFKinds ~= kindD; 2269 2270 auto kindDi = new FKind("D Interface", [], ["di"], 2271 magicForD, 0, 2272 [], 2273 keywordsD, 2274 dCommentDelims, 2275 defaultStringDelims, 2276 FileContent.sourceCode, 2277 FileKindDetection.equalsNameOrContents, 2278 Lang.d); 2279 kindDi.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2280 kindDi.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO Include paths 2281 txtFKinds ~= kindDi; 2282 2283 static immutable keywordsRust = ["as", "box", "break", "continue", "crate", 2284 "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in", 2285 "let", "loop", "match", "mod", "mut", "priv", "proc", "pub", "ref", 2286 "return", "self", "static", "struct", "super", "true", "trait", 2287 "type", "unsafe", "use", "while"]; 2288 2289 auto kindRust = new FKind("Rust", [], ["rs"], 2290 [], 0, 2291 [], 2292 keywordsRust, 2293 cCommentDelims, 2294 defaultStringDelims, 2295 FileContent.sourceCode, 2296 FileKindDetection.equalsNameOrContents, 2297 Lang.rust); 2298 txtFKinds ~= kindRust; 2299 2300 static immutable keywordsFortran77 = ["if", "else"]; 2301 // TODO Support .h files but require it to contain some Fortran-specific or be parseable. 2302 auto kindFortan = new FKind("Fortran", [], ["f", "fortran", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], [], 0, [], keywordsFortran77, 2303 [Delim("^C")], // TODO Need beginning of line instead ^. seq(bol(), alt(lit('C'), lit('c'))); // TODO Add chars chs("cC"); 2304 defaultStringDelims, 2305 FileContent.sourceCode, 2306 FileKindDetection.equalsNameOrContents, 2307 Lang.fortran); 2308 kindFortan.operations ~= tuple(FOp.checkSyntax, `gcc -x fortran -fsyntax-only`); 2309 txtFKinds ~= kindFortan; 2310 2311 // Ada 2312 import nxt.ada_defs; 2313 static immutable keywordsAda83 = ada_defs.keywords83; 2314 static immutable keywordsAda95 = keywordsAda83 ~ ada_defs.keywordsNew95; 2315 static immutable keywordsAda2005 = keywordsAda95 ~ ada_defs.keywordsNew2005; 2316 static immutable keywordsAda2012 = keywordsAda2005 ~ ada_defs.keywordsNew2012; 2317 static immutable extsAda = ["ada", "adb", "ads"]; 2318 txtFKinds ~= new FKind("Ada 82", [], extsAda, [], 0, [], keywordsAda83, 2319 [Delim("--")], 2320 defaultStringDelims, 2321 FileContent.sourceCode); 2322 txtFKinds ~= new FKind("Ada 95", [], extsAda, [], 0, [], keywordsAda95, 2323 [Delim("--")], 2324 defaultStringDelims, 2325 FileContent.sourceCode); 2326 txtFKinds ~= new FKind("Ada 2005", [], extsAda, [], 0, [], keywordsAda2005, 2327 [Delim("--")], 2328 defaultStringDelims, 2329 FileContent.sourceCode); 2330 txtFKinds ~= new FKind("Ada 2012", [], extsAda, [], 0, [], keywordsAda2012, 2331 [Delim("--")], 2332 defaultStringDelims, 2333 FileContent.sourceCode); 2334 txtFKinds ~= new FKind("Ada", [], extsAda, [], 0, [], keywordsAda2012, 2335 [Delim("--")], 2336 defaultStringDelims, 2337 FileContent.sourceCode); 2338 2339 auto aliKind = new FKind("Ada Library File", [], ["ali"], [], 0, `V "GNAT Lib v`, [], 2340 [], // N/A 2341 defaultStringDelims, 2342 FileContent.fingerprint); // TODO Parse version following magic tag? 2343 aliKind.machineGenerated = true; 2344 txtFKinds ~= aliKind; 2345 2346 txtFKinds ~= new FKind("Pascal", [], ["pas", "pascal"], [], 0, [], [], 2347 [Delim("(*", "*)"),// Old-Style 2348 Delim("{", "}"),// Turbo Pascal 2349 Delim("//")],// Delphi 2350 defaultStringDelims, 2351 FileContent.sourceCode, FileKindDetection.equalsContents); 2352 txtFKinds ~= new FKind("Delphi", [], ["pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj"], 2353 [], 0, [], [], 2354 [Delim("//")], 2355 defaultStringDelims, 2356 FileContent.sourceCode, FileKindDetection.equalsContents); 2357 2358 txtFKinds ~= new FKind("Objective-C", [], ["m"], [], 0, [], [], 2359 cCommentDelims, 2360 defaultStringDelims, 2361 FileContent.sourceCode); 2362 2363 static immutable keywordsPython = ["and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return", 2364 "break", "else", "global", "not", "try", "class", "except", "if", "or", "while", 2365 "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print"]; 2366 2367 // Scripting 2368 2369 auto kindPython = new FKind("Python", [], ["py"], 2370 shebangLine(lit("python")), 0, [], 2371 keywordsPython, 2372 defaultCommentDelims, 2373 pythonStringDelims, 2374 FileContent.scriptCode); 2375 txtFKinds ~= kindPython; 2376 2377 txtFKinds ~= new FKind("Ruby", [], ["rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec", ], 2378 shebangLine(lit("ruby")), 0, 2379 [], [], 2380 [Delim("#"), Delim("=begin", "=end")], 2381 defaultStringDelims, 2382 FileContent.scriptCode); 2383 2384 txtFKinds ~= new FKind("Scala", [], ["scala", ], 2385 shebangLine(lit("scala")), 0, 2386 [], [], 2387 cCommentDelims, 2388 defaultStringDelims, 2389 FileContent.scriptCode); 2390 txtFKinds ~= new FKind("Scheme", [], ["scm", "ss"], 2391 [], 0, 2392 [], [], 2393 [Delim(";")], 2394 defaultStringDelims, 2395 FileContent.scriptCode); 2396 2397 txtFKinds ~= new FKind("Smalltalk", [], ["st"], [], 0, [], [], 2398 [Delim("\"", "\"")], 2399 defaultStringDelims, 2400 FileContent.sourceCode); 2401 2402 txtFKinds ~= new FKind("Perl", [], ["pl", "pm", "pm6", "pod", "t", "psgi", ], 2403 shebangLine(lit("perl")), 0, 2404 [], [], 2405 defaultCommentDelims, 2406 defaultStringDelims, 2407 FileContent.scriptCode); 2408 txtFKinds ~= new FKind("PHP", [], ["php", "phpt", "php3", "php4", "php5", "phtml", ], 2409 shebangLine(lit("php")), 0, 2410 [], [], 2411 defaultCommentDelims ~ cCommentDelims, 2412 defaultStringDelims, 2413 FileContent.scriptCode); 2414 txtFKinds ~= new FKind("Plone", [], ["pt", "cpt", "metadata", "cpy", "py", ], [], 0, [], [], 2415 defaultCommentDelims, 2416 defaultStringDelims, 2417 FileContent.scriptCode); 2418 2419 txtFKinds ~= new FKind("Shell", [], ["sh"], 2420 shebangLine(lit("sh")), 0, 2421 [], [], 2422 defaultCommentDelims, 2423 defaultStringDelims, 2424 FileContent.scriptCode); 2425 txtFKinds ~= new FKind("Bash", [], ["bash"], 2426 shebangLine(lit("bash")), 0, 2427 [], [], 2428 defaultCommentDelims, 2429 defaultStringDelims, 2430 FileContent.scriptCode); 2431 txtFKinds ~= new FKind("Zsh", [], ["zsh"], 2432 shebangLine(lit("zsh")), 0, 2433 [], [], 2434 defaultCommentDelims, 2435 defaultStringDelims, 2436 FileContent.scriptCode); 2437 2438 txtFKinds ~= new FKind("Batch", [], ["bat", "cmd"], [], 0, [], [], 2439 [Delim("REM")], 2440 defaultStringDelims, 2441 FileContent.scriptCode); 2442 2443 txtFKinds ~= new FKind("TCL", [], ["tcl", "itcl", "itk", ], [], 0, [], [], 2444 defaultCommentDelims, 2445 defaultStringDelims, 2446 FileContent.scriptCode); 2447 txtFKinds ~= new FKind("Tex", [], ["tex", "cls", "sty", ], [], 0, [], [], 2448 [Delim("%")], 2449 defaultStringDelims, 2450 FileContent.scriptCode); 2451 txtFKinds ~= new FKind("TT", [], ["tt", "tt2", "ttml", ], [], 0, [], [], 2452 defaultCommentDelims, 2453 defaultStringDelims, 2454 FileContent.scriptCode); 2455 txtFKinds ~= new FKind("Viz Basic", [], ["bas", "cls", "frm", "ctl", "vb", "resx", ], [], 0, [], [], 2456 [Delim("'")], 2457 defaultStringDelims, 2458 FileContent.scriptCode); 2459 2460 txtFKinds ~= new FKind("Verilog", [], ["v", "vh", "sv"], [], 0, [], [], 2461 cCommentDelims, 2462 defaultStringDelims, 2463 FileContent.scriptCode); 2464 txtFKinds ~= new FKind("VHDL", [], ["vhd", "vhdl"], [], 0, [], [], 2465 [Delim("--")], 2466 defaultStringDelims, 2467 FileContent.scriptCode); 2468 2469 txtFKinds ~= new FKind("Clojure", [], ["clj"], [], 0, [], [], 2470 [Delim(";")], 2471 defaultStringDelims, 2472 FileContent.sourceCode); 2473 txtFKinds ~= new FKind("Go", [], ["go"], [], 0, [], [], 2474 cCommentDelims, 2475 defaultStringDelims, 2476 FileContent.sourceCode); 2477 2478 auto kindJava = new FKind("Java", [], ["java", "properties"], [], 0, [], [], 2479 cCommentDelims, 2480 defaultStringDelims, 2481 FileContent.sourceCode); 2482 txtFKinds ~= kindJava; 2483 kindJava.operations ~= tuple(FOp.byteCompile, `javac`); 2484 2485 txtFKinds ~= new FKind("Groovy", [], ["groovy", "gtmpl", "gpp", "grunit"], [], 0, [], [], 2486 cCommentDelims, 2487 defaultStringDelims, 2488 FileContent.sourceCode); 2489 txtFKinds ~= new FKind("Haskell", [], ["hs", "lhs"], [], 0, [], [], 2490 [Delim("--}"), 2491 Delim("{-", "-}")], 2492 defaultStringDelims, 2493 FileContent.sourceCode); 2494 2495 static immutable keywordsJavascript = ["break", "case", "catch", "continue", "debugger", "default", "delete", 2496 "do", "else", "finally", "for", "function", "if", "in", "instanceof", 2497 "new", "return", "switch", "this", "throw", "try", "typeof", "var", 2498 "void", "while", "with" ]; 2499 txtFKinds ~= new FKind("JavaScript", [], ["js"], 2500 [], 0, [], 2501 keywordsJavascript, 2502 cCommentDelims, 2503 defaultStringDelims, 2504 FileContent.scriptCode); 2505 txtFKinds ~= new FKind("JavaScript Object Notation", 2506 [], ["json"], 2507 [], 0, [], [], 2508 [], // N/A 2509 defaultStringDelims, 2510 FileContent.sourceCode); 2511 2512 auto dubFKind = new FKind("DUB", 2513 ["dub.json"], ["json"], 2514 [], 0, [], [], 2515 [], // N/A 2516 defaultStringDelims, 2517 FileContent.scriptCode); 2518 txtFKinds ~= dubFKind; 2519 dubFKind.operations ~= tuple(FOp.build, `dub`); 2520 2521 // TODO Inherit XML 2522 txtFKinds ~= new FKind("JSP", [], ["jsp", "jspx", "jhtm", "jhtml"], [], 0, [], [], 2523 [Delim("<!--", "--%>"), // XML 2524 Delim("<%--", "--%>")], 2525 defaultStringDelims, 2526 FileContent.scriptCode); 2527 2528 txtFKinds ~= new FKind("ActionScript", [], ["as", "mxml"], [], 0, [], [], 2529 cCommentDelims, // N/A 2530 defaultStringDelims, 2531 FileContent.scriptCode); 2532 2533 txtFKinds ~= new FKind("LUA", [], ["lua"], [], 0, [], [], 2534 [Delim("--")], 2535 defaultStringDelims, 2536 FileContent.scriptCode); 2537 txtFKinds ~= new FKind("Mason", [], ["mas", "mhtml", "mpl", "mtxt"], [], 0, [], [], 2538 [], // TODO Need symbolic 2539 defaultStringDelims, 2540 FileContent.scriptCode); 2541 2542 txtFKinds ~= new FKind("CFMX", [], ["cfc", "cfm", "cfml"], [], 0, [], [], 2543 [], // N/A 2544 defaultStringDelims, 2545 FileContent.scriptCode); 2546 2547 // Simulation 2548 static immutable keywordsModelica = ["algorithm", "discrete", "false", "loop", "pure", 2549 "and", "each", "final", "model", "record", 2550 "annotation", "else", "flow", "not", "redeclare", 2551 "elseif", "for", "operator", "replaceable", 2552 "block", "elsewhen", "function", "or", "return", 2553 "break", "encapsulated", "if", "outer", "stream", 2554 "class", "end", "import", "output", "then", 2555 "connect", "enumeration", "impure", "package", "true", 2556 "connector", "equation", "in", "parameter", "type", 2557 "constant", "expandable", "initial", "partial", "when", 2558 "constrainedby", "extends", "inner", "protected", "while", 2559 "der", "external", "input", "public", "within"]; 2560 auto kindModelica = new FKind("Modelica", [], ["mo"], [], 0, [], 2561 keywordsModelica, 2562 cCommentDelims, 2563 defaultStringDelims, 2564 FileContent.sourceCode, 2565 FileKindDetection.equalsWhatsGiven, 2566 Lang.modelica); 2567 2568 // Numerical Computing 2569 2570 txtFKinds ~= new FKind("Matlab", [], ["m"], [], 0, [], [], 2571 [Delim("%{", "}%"), // TODO Prio 1 2572 Delim("%")], // TODO Prio 2 2573 defaultStringDelims, 2574 FileContent.sourceCode); 2575 auto kindOctave = new FKind("Octave", [], ["m"], [], 0, [], [], 2576 [Delim("%{", "}%"), // TODO Prio 1 2577 Delim("%"), 2578 Delim("#")], 2579 defaultStringDelims, 2580 FileContent.sourceCode); 2581 txtFKinds ~= kindOctave; 2582 kindOctave.operations ~= tuple(FOp.byteCompile, `octave`); 2583 2584 txtFKinds ~= new FKind("Julia", [], ["jl"], [], 0, [], [], 2585 defaultCommentDelims, 2586 defaultStringDelims, 2587 FileContent.sourceCode); // ((:execute "julia") (:evaluate "julia -e")) 2588 2589 txtFKinds ~= new FKind("Erlang", [], ["erl", "hrl"], [], 0, [], [], 2590 [Delim("%")], 2591 defaultStringDelims, 2592 FileContent.sourceCode); 2593 2594 auto magicForElisp = seq(shebangLine(lit("emacs")), 2595 ws(), 2596 lit("--script")); 2597 auto kindElisp = new FKind("Emacs-Lisp", [], 2598 ["el", "lisp"], 2599 magicForElisp, 0, // Script Execution 2600 [], [], 2601 [Delim(";")], 2602 defaultStringDelims, 2603 FileContent.sourceCode); 2604 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs -batch -f batch-byte-compile`); 2605 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs --script`); 2606 /* kindELisp.moduleName = "(provide 'MODULE_NAME)"; */ 2607 /* kindELisp.moduleImport = "(require 'MODULE_NAME)"; */ 2608 txtFKinds ~= kindElisp; 2609 2610 txtFKinds ~= new FKind("Lisp", [], ["lisp", "lsp"], [], 0, [], [], 2611 [Delim(";")], 2612 defaultStringDelims, 2613 FileContent.sourceCode); 2614 txtFKinds ~= new FKind("PostScript", [], ["ps", "postscript"], [], 0, "%!", [], 2615 [Delim("%")], 2616 defaultStringDelims, 2617 FileContent.sourceCode); 2618 2619 txtFKinds ~= new FKind("CMake", [], ["cmake"], [], 0, [], [], 2620 defaultCommentDelims, 2621 defaultStringDelims, 2622 FileContent.sourceCode); 2623 2624 // http://stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary 2625 txtFKinds ~= new FKind("Pure ASCII", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2626 [], // N/A 2627 defaultStringDelims, 2628 FileContent.textASCII); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 2629 txtFKinds ~= new FKind("8-Bit Text", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2630 [], // N/A 2631 defaultStringDelims, 2632 FileContent.text8Bit); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 or 128–255 2633 2634 txtFKinds ~= new FKind("Assembler", [], ["asm", "s"], [], 0, [], [], 2635 [], // N/A 2636 defaultStringDelims, 2637 FileContent.sourceCode); 2638 2639 // https://en.wikipedia.org/wiki/Diff 2640 auto diffKind = new FKind("Diff", [], ["diff", "patch"], 2641 "diff", 0, 2642 [], [], 2643 [], // N/A 2644 defaultStringDelims, 2645 FileContent.text); 2646 txtFKinds ~= diffKind; 2647 diffKind.wikip = "https://en.wikipedia.org/wiki/Diff"; 2648 2649 auto pemCertKind = new FKind(`PEM certificate`, [], [`cert`], 2650 `-----BEGIN CERTIFICATE-----`, 0, 2651 [], [], 2652 [], // N/A 2653 [], // N/A 2654 FileContent.text, 2655 FileKindDetection.equalsContents); 2656 txtFKinds ~= pemCertKind; 2657 2658 auto pemCertReqKind = new FKind(`PEM certificate request`, [], [`cert`], 2659 `-----BEGIN CERTIFICATE REQ`, 0, 2660 [], [], 2661 [], // N/A 2662 [], // N/A 2663 FileContent.text, 2664 FileKindDetection.equalsContents); 2665 txtFKinds ~= pemCertReqKind; 2666 2667 auto pemRSAPrivateKeyKind = new FKind(`PEM RSA private key`, [], [`cert`], 2668 `-----BEGIN RSA PRIVATE`, 0, 2669 [], [], 2670 [], // N/A 2671 [], // N/A 2672 FileContent.text, 2673 FileKindDetection.equalsContents); 2674 txtFKinds ~= pemRSAPrivateKeyKind; 2675 2676 auto pemDSAPrivateKeyKind = new FKind(`PEM DSA private key`, [], [`cert`], 2677 `-----BEGIN DSA PRIVATE`, 0, 2678 [], [], 2679 [], // N/A 2680 [], // N/A 2681 FileContent.text, 2682 FileKindDetection.equalsContents); 2683 txtFKinds ~= pemDSAPrivateKeyKind; 2684 2685 auto pemECPrivateKeyKind = new FKind(`PEM EC private key`, [], [`cert`], 2686 `-----BEGIN EC PRIVATE`, 0, 2687 [], [], 2688 [], // N/A 2689 [], // N/A 2690 FileContent.text, 2691 FileKindDetection.equalsContents); 2692 txtFKinds ~= pemECPrivateKeyKind; 2693 2694 // Binaries 2695 2696 static immutable extsELF = ["o", "so", "ko", "os", "out", "bin", "x", "elf", "axf", "prx", "puff", "none"]; // ELF file extensions 2697 2698 auto elfKind = new FKind("ELF", 2699 [], extsELF, x"7F 45 4C 46", 0, [], [], 2700 [], // N/A 2701 [], // N/A 2702 FileContent.machineCode, 2703 FileKindDetection.equalsContents); 2704 elfKind.wikip = "https://en.wikipedia.org/wiki/Executable_and_Linkable_Format"; 2705 binFKinds ~= elfKind; 2706 /* auto extsExeELF = ["out", "bin", "x", "elf", ]; // ELF file extensions */ 2707 /* auto elfExeKind = new FKind("ELF executable", [], extsExeELF, [0x2, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2708 /* auto elfSOKind = new FKind("ELF shared object", [], ["so", "ko"], [0x3, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2709 /* auto elfCoreKind = new FKind("ELF core file", [], ["core"], [0x4, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2710 /* binFKinds ~= elfExeKind; */ 2711 /* elfKind.subKinds ~= elfSOKind; */ 2712 /* elfKind.subKinds ~= elfCoreKind; */ 2713 /* elfKind.subKinds ~= elfKind; */ 2714 2715 // TODO Specialize to not steal results from file's magics. 2716 auto linuxFirmwareKind = new FKind("Linux Firmware", 2717 [], ["bin", "ucode", "dat", "sbcf", "fw"], [], 0, [], [], 2718 [], // N/A 2719 [], // N/A 2720 FileContent.binaryUnknown, 2721 FileKindDetection.equalsParentPathDirsAndName); 2722 linuxFirmwareKind.parentPathDirs = ["lib", "firmware"]; 2723 binFKinds ~= linuxFirmwareKind; 2724 2725 // TODO Specialize to not steal results from file's magics. 2726 auto linuxHwDbKind = new FKind("Linux Hardware Database Index", 2727 "hwdb.bin", ["bin"], "KSLPHHRH", 0, [], [], 2728 [], // N/A 2729 [], // N/A 2730 FileContent.binaryUnknown, 2731 FileKindDetection.equalsNameAndContents); 2732 binFKinds ~= linuxHwDbKind; 2733 2734 // Executables 2735 binFKinds ~= new FKind("Mach-O", [], ["o"], x"CE FA ED FE", 0, [], [], 2736 [], // N/A 2737 [], // N/A 2738 FileContent.machineCode, FileKindDetection.equalsContents); 2739 2740 binFKinds ~= new FKind("modules.symbols.bin", [], ["bin"], 2741 cast(ubyte[])[0xB0, 0x07, 0xF4, 0x57, 0x00, 0x02, 0x00, 0x01, 0x20], 0, [], [], 2742 [], // N/A 2743 [], // N/A 2744 FileContent.binaryUnknown, FileKindDetection.equalsContents); 2745 2746 auto kindCOFF = new FKind("COFF/i386/32", [], ["o"], x"4C 01", 0, [], [], 2747 [], // N/A 2748 [], // N/A 2749 FileContent.machineCode, FileKindDetection.equalsContents); 2750 kindCOFF.description = "Common Object File Format"; 2751 binFKinds ~= kindCOFF; 2752 2753 auto kindPECOFF = new FKind("PE/COFF", [], ["cpl", "exe", "dll", "ocx", "sys", "scr", "drv", "obj"], 2754 "PE\0\0", 0x60, // And ("MZ") at offset 0x0 2755 [], [], 2756 [], // N/A 2757 [], // N/A 2758 FileContent.machineCode, FileKindDetection.equalsContents); 2759 kindPECOFF.description = "COFF Portable Executable"; 2760 binFKinds ~= kindPECOFF; 2761 2762 auto kindDOSMZ = new FKind("DOS-MZ", [], ["exe", "dll"], "MZ", 0, [], [], 2763 [], // N/A 2764 [], // N/A 2765 FileContent.machineCode); 2766 kindDOSMZ.description = "MS-DOS, OS/2 or MS Windows executable"; 2767 binFKinds ~= kindDOSMZ; 2768 2769 // Caches 2770 binFKinds ~= new FKind("ld.so.cache", [], ["cache"], "ld.so-", 0, [], [], 2771 [], // N/A 2772 [], // N/A 2773 FileContent.binaryCache); 2774 2775 // Profile Data 2776 binFKinds ~= new FKind("perf benchmark data", [], ["data"], "PERFILE2h", 0, [], [], 2777 [], // N/A 2778 [], // N/A 2779 FileContent.performanceBenchmark); 2780 2781 // Images 2782 binFKinds ~= new FKind("GIF87a", [], ["gif"], "GIF87a", 0, [], [], 2783 [], // N/A 2784 [], // N/A 2785 FileContent.image); 2786 binFKinds ~= new FKind("GIF89a", [], ["gif"], "GIF89a", 0, [], [], 2787 [], // N/A 2788 [], // N/A 2789 FileContent.image); 2790 auto extJPEG = ["jpeg", "jpg", "j2k", "jpeg2000"]; 2791 binFKinds ~= new FKind("JPEG", [], extJPEG, x"FF D8", 0, [], [], 2792 [], // N/A 2793 [], // N/A 2794 FileContent.image); // TODO Support ends with [0xFF, 0xD9] 2795 binFKinds ~= new FKind("JPEG/JFIF", [], extJPEG, x"FF D8", 0, [], [], 2796 [], // N/A 2797 [], // N/A 2798 FileContent.image); // TODO Support ends with ['J','F','I','F', 0x00] 2799 binFKinds ~= new FKind("JPEG/Exif", [], extJPEG, x"FF D8", 0, [], [], 2800 [], // N/A 2801 [], // N/A 2802 FileContent.image); // TODO Support contains ['E','x','i','f', 0x00] followed by metadata 2803 2804 binFKinds ~= new FKind("Pack200-Compressed Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 2805 [], // N/A 2806 [], // N/A 2807 FileContent.machineCode); 2808 2809 binFKinds ~= new FKind("JRun Server Application", [], ["jsa"], 2810 cast(ubyte[])[0xa2,0xab,0x0b,0xf0, 2811 0x01,0x00,0x00,0x00, 2812 0x00,0x00,0x20,0x00], 0, [], [], 2813 [], // N/A 2814 [], // N/A 2815 FileContent.machineCode); 2816 2817 binFKinds ~= new FKind("PNG", [], ["png"], 2818 cast(ubyte[])[137, 80, 78, 71, 13, 10, 26, 10], 0, [], [], 2819 [], // N/A 2820 [], // N/A 2821 FileContent.image); 2822 2823 auto icnsKind = new FKind("Apple Icon Image", [], ["icns"], 2824 "icns", 0, [], [], 2825 [], // N/A 2826 [], // N/A 2827 FileContent.imageIcon); 2828 icnsKind.wikip = "https://en.wikipedia.org/wiki/Apple_Icon_Image_format"; 2829 binFKinds ~= icnsKind; 2830 // TODO read with http://icns.sourceforge.net/ 2831 2832 auto kindPDF = new FKind("PDF", [], ["pdf"], "%PDF", 0, [], [], 2833 [], // N/A 2834 [], // N/A 2835 FileContent.document); 2836 kindPDF.description = "Portable Document Format"; 2837 binFKinds ~= kindPDF; 2838 2839 auto kindMarkdownFmt = new FKind("Markdown", [], ["md", "markdown"], 2840 [], 0, 2841 [], [], 2842 [], // N/A 2843 defaultStringDelims, 2844 FileContent.binaryCache); 2845 kindMarkdownFmt.wikip = "https://en.wikipedia.org/wiki/Markdown"; 2846 binFKinds ~= kindMarkdownFmt; 2847 2848 auto kindAsciiDocFmt = new FKind("AsciiDoc", [], ["ad", "adoc", "asciidoc"], 2849 [], 0, 2850 [], [], 2851 [], // N/A 2852 defaultStringDelims, 2853 FileContent.binaryCache); 2854 binFKinds ~= kindAsciiDocFmt; 2855 2856 auto kindLatexPDFFmt = new FKind("LaTeX PDF Format", [], ["fmt"], 2857 cast(ubyte[])['W','2','T','X', 2858 0x00,0x00,0x00,0x08, 2859 0x70,0x64,0x66,0x74, 2860 0x65,0x78], 0, [], [], 2861 [], // N/A 2862 defaultStringDelims, 2863 FileContent.binaryCache); 2864 binFKinds ~= kindLatexPDFFmt; 2865 2866 binFKinds ~= new FKind("Microsoft Office Document", [], ["doc", "docx", "xls", "ppt"], x"D0 CF 11 E0", 0, [], [], 2867 [], // N/A 2868 defaultStringDelims, 2869 FileContent.document); 2870 2871 // Fonts 2872 2873 auto kindTTF = new FKind("TrueType Font", [], ["ttf"], x"00 01 00 00 00", 0, [], [], 2874 [], // N/A 2875 defaultStringDelims, 2876 FileContent.font); 2877 binFKinds ~= kindTTF; 2878 2879 auto kindTTCF = new FKind("TrueType/OpenType Font Collection", [], ["ttc"], "ttcf", 0, [], [], 2880 [], // N/A 2881 defaultStringDelims, 2882 FileContent.font); 2883 binFKinds ~= kindTTCF; 2884 2885 auto kindWOFF = new FKind("Web Open Font", [], ["woff"], "wOFF", 0, [], [], 2886 [], // N/A 2887 defaultStringDelims, 2888 FileContent.font); // TODO container for kindSFNT 2889 binFKinds ~= kindWOFF; 2890 2891 auto kindSFNT = new FKind("Spline Font", [], ["sfnt"], "sfnt", 0, [], [], 2892 [], // N/A 2893 defaultStringDelims, 2894 FileContent.font); // TODO container for Sfnt 2895 binFKinds ~= kindSFNT; 2896 2897 // Audio 2898 2899 binFKinds ~= new FKind("MIDI", [], ["mid", "midi"], "MThd", 0, [], [], 2900 [], // N/A 2901 defaultStringDelims, 2902 FileContent.audio, FileKindDetection.equalsNameAndContents); 2903 2904 // Au 2905 auto auKind = new FKind("Au", [], ["au", "snd"], ".snd", 0, [], [], 2906 [], // N/A 2907 defaultStringDelims, 2908 FileContent.audio, FileKindDetection.equalsNameAndContents); 2909 auKind.wikip = "https://en.wikipedia.org/wiki/Au_file_format"; 2910 binFKinds ~= auKind; 2911 2912 binFKinds ~= new FKind("Ogg", [], ["ogg", "oga", "ogv"], 2913 cast(ubyte[])[0x4F,0x67,0x67,0x53, 2914 0x00,0x02,0x00,0x00, 2915 0x00,0x00,0x00,0x00, 2916 0x00, 0x00], 0, [], [], 2917 [], // N/A 2918 defaultStringDelims, 2919 FileContent.media); 2920 2921 // TODO Support RIFF....WAVEfmt using symbolic seq(lit("RIFF"), any(4), lit("WAVEfmt")) 2922 binFKinds ~= new FKind("WAV", [], ["wav", "wave"], "RIFF", 0, [], [], 2923 [], // N/A 2924 defaultStringDelims, 2925 FileContent.audio, FileKindDetection.equalsContents); 2926 2927 // Archives 2928 2929 auto kindBSDAr = new FKind("BSD Archive", [], ["a", "ar"], "!<arch>\n", 0, [], [], 2930 [], // N/A 2931 defaultStringDelims, 2932 FileContent.archive, FileKindDetection.equalsContents); 2933 kindBSDAr.description = "BSD 4.4 and Mac OSX Archive"; 2934 binFKinds ~= kindBSDAr; 2935 2936 binFKinds ~= new FKind("GNU tar Archive", [], ["tar"], "ustar\040\040\0", 257, [], [], 2937 [], // N/A 2938 defaultStringDelims, 2939 FileContent.archive, FileKindDetection.equalsContents); // TODO Specialized Derivation of "POSIX tar Archive" 2940 binFKinds ~= new FKind("POSIX tar Archive", [], ["tar"], "ustar\0", 257, [], [], 2941 [], // N/A 2942 defaultStringDelims, 2943 FileContent.archive, FileKindDetection.equalsContents); 2944 2945 binFKinds ~= new FKind("pkZip Archive", [], ["zip", "jar", "pptx", "docx", "xlsx"], "PK\003\004", 0, [], [], 2946 [], // N/A 2947 defaultStringDelims, 2948 FileContent.archive, FileKindDetection.equalsContents); 2949 binFKinds ~= new FKind("pkZip Archive (empty)", [], ["zip", "jar"], "PK\005\006", 0, [], [], 2950 [], // N/A 2951 defaultStringDelims, 2952 FileContent.archive, FileKindDetection.equalsContents); 2953 2954 binFKinds ~= new FKind("PAK file", [], ["pak"], cast(ubyte[])[0x40, 0x00, 0x00, 0x00, 2955 0x4a, 0x12, 0x00, 0x00, 2956 0x01, 0x2d, 0x23, 0xcb, 2957 0x6d, 0x00, 0x00, 0x2f], 0, [], [], 2958 [], // N/A 2959 defaultStringDelims, 2960 FileContent.spellCheckWordList, 2961 FileKindDetection.equalsNameAndContents); 2962 2963 binFKinds ~= new FKind("LZW-Compressed", [], ["z", "tar.z"], x"1F 9D", 0, [], [], 2964 [], // N/A 2965 defaultStringDelims, 2966 FileContent.compressed); 2967 binFKinds ~= new FKind("LZH-Compressed", [], ["z", "tar.z"], x"1F A0", 0, [], [], 2968 [], // N/A 2969 defaultStringDelims, 2970 FileContent.compressed); 2971 2972 binFKinds ~= new FKind("CompressedZ", [], ["z"], "\037\235", 0, [], [], 2973 [], // N/A 2974 defaultStringDelims, 2975 FileContent.compressed); 2976 binFKinds ~= new FKind("GNU-Zip (gzip)", [], ["tgz", "gz", "gzip", "dz"], "\037\213", 0, [], [], 2977 [], // N/A 2978 defaultStringDelims, 2979 FileContent.compressed); 2980 binFKinds ~= new FKind("BZip", [], ["bz2", "bz", "tbz2", "bzip2"], "BZh", 0, [], [], 2981 [], // N/A 2982 defaultStringDelims, 2983 FileContent.compressed); 2984 binFKinds ~= new FKind("XZ/7-Zip", [], ["xz", "txz", "7z", "t7z", "lzma", "tlzma", "lz", "tlz"], 2985 cast(ubyte[])[0xFD, '7', 'z', 'X', 'Z', 0x00], 0, [], [], 2986 [], // N/A 2987 defaultStringDelims, 2988 FileContent.compressed); 2989 binFKinds ~= new FKind("LZX", [], ["lzx"], "LZX", 0, [], [], 2990 [], // N/A 2991 defaultStringDelims, 2992 FileContent.compressed); 2993 binFKinds ~= new FKind("SZip", [], ["szip"], "SZ\x0a\4", 0, [], [], 2994 [], // N/A 2995 defaultStringDelims, 2996 FileContent.compressed); 2997 2998 binFKinds ~= new FKind("Git Bundle", [], ["bundle"], "# v2 git bundle", 0, [], [], 2999 [], // N/A 3000 defaultStringDelims, 3001 FileContent.versionControl); 3002 3003 binFKinds ~= new FKind("Emacs-Lisp Bytes Code", [], ["elc"], ";ELC\27\0\0\0", 0, [], [], 3004 [], // N/A 3005 defaultStringDelims, 3006 FileContent.byteCode, FileKindDetection.equalsContents); 3007 binFKinds ~= new FKind("Python Bytes Code", [], ["pyc"], x"0D 0A", 2, [], [], 3008 [], // N/A 3009 defaultStringDelims, 3010 FileContent.byteCode, FileKindDetection.equalsNameAndContents); // TODO Handle versions at src[0..2] 3011 3012 binFKinds ~= new FKind("Zshell Wordcode", [], ["zwc"], x"07 06 05 04", 0, [], [], 3013 [], // N/A 3014 defaultStringDelims, 3015 FileContent.byteCode); 3016 3017 binFKinds ~= new FKind("Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 3018 [], // N/A 3019 defaultStringDelims, 3020 FileContent.byteCode, FileKindDetection.equalsContents); 3021 binFKinds ~= new FKind("Java KeyStore", [], [], x"FE ED FE ED", 0, [], [], 3022 [], // N/A 3023 defaultStringDelims, 3024 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3025 binFKinds ~= new FKind("Java JCE KeyStore", [], [], x"CE CE CE CE", 0, [], [], 3026 [], // N/A 3027 defaultStringDelims, 3028 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3029 3030 binFKinds ~= new FKind("LLVM Bitcode", [], ["bc"], "BC", 0, [], [], 3031 [], // N/A 3032 defaultStringDelims, 3033 FileContent.byteCode, FileKindDetection.equalsNameAndContents); 3034 3035 binFKinds ~= new FKind("MATLAB MAT", [], ["mat"], "MATLAB 5.0 MAT-file", 0, [], [], 3036 [], // N/A 3037 defaultStringDelims, 3038 FileContent.numericalData, FileKindDetection.equalsContents); 3039 3040 auto hdf4Kind = new FKind("HDF4", [], ["hdf", "h4", "hdf4", "he4"], x"0E 03 13 01", 0, [], [], 3041 [], // N/A 3042 defaultStringDelims, 3043 FileContent.numericalData); 3044 binFKinds ~= hdf4Kind; 3045 hdf4Kind.description = "Hierarchical Data Format version 4"; 3046 3047 auto hdf5Kind = new FKind("HDF5", "Hierarchical Data Format version 5", ["hdf", "h5", "hdf5", "he5"], x"89 48 44 46 0D 0A 1A 0A", 0, [], [], 3048 [], // N/A 3049 defaultStringDelims, 3050 FileContent.numericalData); 3051 binFKinds ~= hdf5Kind; 3052 hdf5Kind.description = "Hierarchical Data Format version 5"; 3053 3054 auto numpyKind = new FKind("NUMPY", "NUMPY", ["npy", "numpy"], x"93 4E 55 4D 50 59", 0, [], [], 3055 [], // N/A 3056 defaultStringDelims, 3057 FileContent.numericalData); 3058 binFKinds ~= numpyKind; 3059 3060 binFKinds ~= new FKind("GNU GLOBAL Database", ["GTAGS", "GRTAGS", "GPATH", "GSYMS"], [], "b1\5\0", 0, [], [], 3061 [], // N/A 3062 defaultStringDelims, 3063 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3064 3065 // SQLite 3066 static immutable extsSQLite = ["sql", "sqlite", "sqlite3"]; 3067 binFKinds ~= new FKind("MySQL table definition file", [], extsSQLite, x"FE 01", 0, [], [], 3068 [], // N/A 3069 defaultStringDelims, 3070 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3071 binFKinds ~= new FKind("MySQL MyISAM index file", [], extsSQLite, x"FE FE 07", 0, [], [], 3072 [], // N/A 3073 defaultStringDelims, 3074 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3075 binFKinds ~= new FKind("MySQL MyISAM compressed data file", [], extsSQLite, x"FE FE 08", 0, [], [], 3076 [], // N/A 3077 defaultStringDelims, 3078 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3079 binFKinds ~= new FKind("MySQL Maria index file", [], extsSQLite, x"FF FF FF", 0, [], [], 3080 [], // N/A 3081 defaultStringDelims, 3082 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3083 binFKinds ~= new FKind("MySQL Maria compressed data file", [], extsSQLite, x"FF FF FF", 0, [], [], 3084 [], // N/A 3085 defaultStringDelims, 3086 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3087 binFKinds ~= new FKind("SQLite format 3", [], extsSQLite , "SQLite format 3", 0, [], [], 3088 [], // N/A 3089 defaultStringDelims, 3090 FileContent.tagsDatabase, FileKindDetection.equalsContents); // TODO Why is this detected at 49:th try? 3091 3092 binFKinds ~= new FKind("Vim swap", [], ["swo"], [], 0, "b0VIM ", [], 3093 [], // N/A 3094 defaultStringDelims, 3095 FileContent.binaryCache); 3096 3097 binFKinds ~= new FKind("PCH", "(GCC) Precompiled header", ["pch", "gpch"], "gpch", 0, [], [], 3098 [], // N/A 3099 defaultStringDelims, 3100 FileContent.cache); 3101 3102 binFKinds ~= new FKind("Firmware", [], ["fw"], cast(ubyte[])[], 0, [], [], 3103 [], // N/A 3104 defaultStringDelims, 3105 FileContent.cache, FileKindDetection.equalsName); // TODO Add check for binary contents and that some parenting directory is named "firmware" 3106 3107 binFKinds ~= new FKind("LibreOffice or OpenOffice RDB", [], ["rdb"], 3108 cast(ubyte[])[0x43,0x53,0x4d,0x48, 3109 0x4a,0x2d,0xd0,0x26, 3110 0x00,0x02,0x00,0x00, 3111 0x00,0x02,0x00,0x02], 0, [], [], 3112 [], // N/A 3113 defaultStringDelims, 3114 FileContent.database, FileKindDetection.equalsName); // TODO Add check for binary contents and that some parenting directory is named "firmware" 3115 3116 binFKinds ~= new FKind("sconsign", [], ["sconsign", "sconsign.dblite", "dblite"], x"7d 71 01 28", 0, [], [], 3117 [], // N/A 3118 defaultStringDelims, 3119 FileContent.cache, FileKindDetection.equalsNameAndContents); 3120 3121 binFKinds ~= new FKind("GnuPG (GPG) key public ring", [], ["gpg"], x"99 01", 0, [], [], 3122 [], // N/A 3123 defaultStringDelims, 3124 FileContent.binary, FileKindDetection.equalsNameOrContents); 3125 binFKinds ~= new FKind("GnuPG (GPG) encrypted data", [], [], x"85 02", 0, [], [], 3126 [], // N/A 3127 defaultStringDelims, 3128 FileContent.binary, FileKindDetection.equalsContents); 3129 binFKinds ~= new FKind("GNUPG (GPG) key trust database", [], [], "\001gpg", 0, [], [], 3130 [], // N/A 3131 defaultStringDelims, 3132 FileContent.binary, FileKindDetection.equalsContents); 3133 3134 binFKinds ~= new FKind("aspell word list (rowl)", [], ["rws"], "aspell default speller rowl ", 0, [], [], 3135 [], // N/A 3136 defaultStringDelims, 3137 FileContent.spellCheckWordList, FileKindDetection.equalsNameAndContents); 3138 3139 binFKinds ~= new FKind("DS_Store", ".DS_Store", [], "Mac OS X Desktop Services Store ", 0, [], [], 3140 [], // N/A 3141 [], 3142 FileContent.binary, FileKindDetection.equalsName); 3143 3144 /* Fax image created in the CCITT Group 3 compressed format, which is 3145 * used for digital transmission of fax data and supports 1 bit per 3146 * pixel 3147 */ 3148 binFKinds ~= new FKind("CCITT Group 3 compressed format", [], // TODO Altenative name: Digifax-G3, G3 Fax 3149 ["g3", "G3"], 3150 "PC Research, Inc", 0, [], [], 3151 [], // N/A 3152 [], 3153 FileContent.imageModemFax1BPP, FileKindDetection.equalsContents); 3154 3155 binFKinds ~= new FKind("Raw Modem Data version 1", [], 3156 ["rmd1"], 3157 "RMD1", 0, [], [], 3158 [], // N/A 3159 [], 3160 FileContent.modemData, FileKindDetection.equalsContents); 3161 3162 binFKinds ~= new FKind("Portable voice format 1", [], 3163 ["pvf1"], 3164 "PVF1\n", 0, [], [], 3165 [], // N/A 3166 [], 3167 FileContent.voiceModem, FileKindDetection.equalsContents); 3168 3169 binFKinds ~= new FKind("Portable voice format 2", [], 3170 ["pvf2"], 3171 "PVF2\n", 0, [], [], 3172 [], // N/A 3173 [], 3174 FileContent.voiceModem, FileKindDetection.equalsContents); 3175 3176 allFKinds ~= txtFKinds; 3177 allFKinds ~= binFKinds; 3178 3179 assert(allFKinds.byIndex.length == 3180 (txtFKinds.byIndex.length + 3181 binFKinds.byIndex.length)); 3182 3183 assert(allFKinds.byId.length == 3184 (txtFKinds.byId.length + 3185 binFKinds.byId.length)); 3186 3187 txtFKinds.rehash; 3188 binFKinds.rehash; 3189 allFKinds.rehash; 3190 } 3191 3192 // Code 3193 3194 // Interpret Command Line 3195 void loadDirKinds() 3196 { 3197 vcDirKinds ~= new DirKind(".git", "Git"); 3198 vcDirKinds ~= new DirKind(".svn", "Subversion (Svn)"); 3199 vcDirKinds ~= new DirKind(".bzr", "Bazaar (Bzr)"); 3200 vcDirKinds ~= new DirKind("RCS", "RCS"); 3201 vcDirKinds ~= new DirKind("CVS", "CVS"); 3202 vcDirKinds ~= new DirKind("MCVS", "MCVS"); 3203 vcDirKinds ~= new DirKind("RCS", "RCS"); 3204 vcDirKinds ~= new DirKind(".hg", "Mercurial (Hg)"); 3205 vcDirKinds ~= new DirKind("SCCS", "SCCS"); 3206 vcDirKinds ~= new DirKind(".wact", "WACT"); 3207 vcDirKinds ~= new DirKind("_MTN", "Monotone"); 3208 vcDirKinds ~= new DirKind("_darcs", "Darcs"); 3209 vcDirKinds ~= new DirKind("{arch}", "Arch"); 3210 3211 skippedDirKinds ~= vcDirKinds; 3212 3213 DirKind[string] vcDirKindsMap_; 3214 foreach (kind; vcDirKinds) 3215 { 3216 vcDirKindsMap[kind.fileName] = kind; 3217 } 3218 vcDirKindsMap.rehash; 3219 3220 skippedDirKinds ~= new DirKind(".trash", "Trash"); 3221 skippedDirKinds ~= new DirKind(".undo", "Undo"); 3222 skippedDirKinds ~= new DirKind(".deps", "Dependencies"); 3223 skippedDirKinds ~= new DirKind(".backups", "Backups"); 3224 skippedDirKinds ~= new DirKind(".autom4te.cache", "Automake Cache"); 3225 3226 foreach (kind; skippedDirKinds) { skippedDirKindsMap[kind.fileName] = kind; } 3227 skippedDirKindsMap.rehash; 3228 } 3229 3230 ScanContext scanContext = ScanContext.standard; 3231 KeyStrictness keyStrictness = KeyStrictness.standard; 3232 3233 bool showNameDups = false; 3234 bool showTreeContentDups = false; 3235 bool showFileContentDups = false; 3236 bool showELFSymbolDups = false; 3237 bool linkContentDups = false; 3238 3239 bool showLinkDups = false; 3240 SymlinkFollowContext followSymlinks = SymlinkFollowContext.external; 3241 bool showBrokenSymlinks = true; 3242 bool showSymlinkCycles = true; 3243 3244 bool showAnyDups = false; 3245 bool showMMaps = false; 3246 bool showUsage = false; 3247 bool showSHA1 = false; 3248 bool showLineCounts = false; 3249 3250 uint64_t noFiles = 0; 3251 uint64_t noRegFiles = 0; 3252 uint64_t noSymlinks = 0; 3253 uint64_t noSpecialFiles = 0; 3254 uint64_t noDirs = 0; 3255 3256 uint64_t noScannedFiles = 0; 3257 uint64_t noScannedRegFiles = 0; 3258 uint64_t noScannedSymlinks = 0; 3259 uint64_t noScannedSpecialFiles = 0; 3260 uint64_t noScannedDirs = 0; 3261 3262 auto shallowDensenessSum = Rational!ulong(0, 1); 3263 auto deepDensenessSum = Rational!ulong(0, 1); 3264 uint64_t densenessCount = 0; 3265 3266 FOp fOp = FOp.none; 3267 3268 bool keyAsWord = false; 3269 bool keyAsSymbol = false; 3270 bool keyAsAcronym = false; 3271 bool keyAsExact = false; 3272 3273 bool showTree = false; 3274 3275 bool useHTML = false; 3276 bool browseOutput = false; 3277 bool collectTypeHits = false; 3278 bool colorFlag = false; 3279 3280 int scanDepth = -1; 3281 3282 bool demangleELF = true; 3283 3284 bool recache = false; 3285 3286 bool useNGrams = false; 3287 3288 PathFormat pathFormat = PathFormat.relative; 3289 3290 DirSorting subsSorting = DirSorting.onTimeLastModified; 3291 BuildType buildType = BuildType.none; 3292 DuplicatesContext duplicatesContext = DuplicatesContext.internal; 3293 3294 Dir[] topDirs; 3295 Dir rootDir; 3296 } 3297 3298 struct Results 3299 { 3300 size_t numTotalHits; // Number of total hits. 3301 size_t numFilesWithHits; // Number of files with hits 3302 Bytes64 noBytesTotal; // Number of bytes total. 3303 Bytes64 noBytesTotalContents; // Number of contents bytes total. 3304 Bytes64 noBytesScanned; // Number of bytes scanned. 3305 Bytes64 noBytesSkipped; // Number of bytes skipped. 3306 Bytes64 noBytesUnreadable; // Number of bytes unreadable. 3307 } 3308 3309 version(cerealed) 3310 { 3311 void grain(T)(ref Cereal cereal, ref SysTime systime) 3312 { 3313 auto stdTime = systime.stdTime; 3314 cereal.grain(stdTime); 3315 if (stdTime != 0) 3316 { 3317 systime = SysTime(stdTime); 3318 } 3319 } 3320 } 3321 3322 /** Directory Sorting Order. */ 3323 enum DirSorting 3324 { 3325 /* onTimeCreated, /\* Windows only. Currently stored in Linux on ext4 but no */ 3326 /* * standard interface exists yet, it will probably be called */ 3327 /* * xstat(). *\/ */ 3328 onTimeLastModified, 3329 onTimeLastAccessed, 3330 onSize, 3331 onNothing, 3332 } 3333 3334 enum BuildType 3335 { 3336 none, // Don't compile 3337 devel, // Compile with debug symbols 3338 release, // Compile without debugs symbols and optimizations 3339 standard = devel, 3340 } 3341 3342 enum PathFormat 3343 { 3344 absolute, 3345 relative, 3346 } 3347 3348 /** Dir. 3349 */ 3350 class Dir : File 3351 { 3352 /** Construct File System Root Directory. */ 3353 this(Dir parent = null, GStats gstats = null) 3354 { 3355 super(parent); 3356 this._gstats = gstats; 3357 if (gstats) { ++gstats.noDirs; } 3358 } 3359 3360 this(string root_path, GStats gstats) 3361 in { assert(root_path == "/"); assert(gstats); } 3362 do 3363 { 3364 auto rootDent = DirEntry(root_path); 3365 Dir rootParent = null; 3366 this(rootDent, rootParent, gstats); 3367 } 3368 3369 this(ref DirEntry dent, Dir parent, GStats gstats) 3370 in { assert(gstats); } 3371 do 3372 { 3373 this(dent.name.baseName, parent, dent.size.Bytes64, dent.timeLastModified, dent.timeLastAccessed, gstats); 3374 } 3375 3376 this(string name, Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed, 3377 GStats gstats = null) 3378 { 3379 super(name, parent, size, timeLastModified, timeLastAccessed); 3380 this._gstats = gstats; 3381 if (gstats) { ++gstats.noDirs; } 3382 } 3383 3384 override string toTextual() const @property { return "Directory"; } 3385 3386 override Bytes64 treeSize() @property @trusted /* @safe nothrow */ 3387 { 3388 if (_treeSize.isUntouched) 3389 { 3390 _treeSize = (this.size + 3391 reduce!"a+b"(0.Bytes64, 3392 subs.byValue.map!"a.treeSize")); // recurse! 3393 } 3394 return _treeSize.get.bytes; 3395 } 3396 3397 /** Returns: Directory Tree Content Id of `this`. */ 3398 override const(SHA1Digest) treeContentId() @property @trusted /* @safe nothrow */ 3399 { 3400 if (_treeContentId.isUntouched) 3401 { 3402 _treeContentId = subs.byValue.map!"a.treeContentId".sha1Of; // TODO join loops for calculating treeSize 3403 assert(_treeContentId, "Zero tree content digest"); 3404 if (treeSize() != 0) 3405 { 3406 gstats.filesByContentId[_treeContentId] ~= assumeNotNull(cast(File)this); // TODO Avoid cast when DMD and NotNull is fixed 3407 } 3408 } 3409 return _treeContentId; 3410 } 3411 3412 override Face!Color face() const @property @safe pure nothrow { return dirFace; } 3413 3414 /** Return true if `this` is a file system root directory. */ 3415 bool isRoot() @property @safe const pure nothrow { return !parent; } 3416 3417 GStats gstats(GStats gstats) @property @safe pure /* nothrow */ { 3418 return this._gstats = gstats; 3419 } 3420 GStats gstats() @property @safe nothrow 3421 { 3422 if (!_gstats && this.parent) 3423 { 3424 _gstats = this.parent.gstats(); 3425 } 3426 return _gstats; 3427 } 3428 3429 /** Returns: Depth of Depth from File System root to this File. */ 3430 override int depth() @property @safe nothrow 3431 { 3432 if (_depth ==- 1) 3433 { 3434 _depth = parent ? parent.depth + 1 : 0; // memoized depth 3435 } 3436 return _depth; 3437 } 3438 3439 /** Scan `this` recursively for a non-diretory file with basename `name`. 3440 TODO Reuse range based algorithm this.tree(depthFirst|breadFirst) 3441 */ 3442 File find(string name) @property 3443 { 3444 auto subs_ = subs(); 3445 if (name in subs_) 3446 { 3447 auto hit = subs_[name]; 3448 Dir hitDir = cast(Dir)hit; 3449 if (!hitDir) // if not a directory 3450 return hit; 3451 } 3452 else 3453 { 3454 foreach (sub; subs_) 3455 { 3456 Dir subDir = cast(Dir)sub; 3457 if (subDir) 3458 { 3459 auto hit = subDir.find(name); 3460 if (hit) // if not a directory 3461 return hit; 3462 } 3463 } 3464 } 3465 return null; 3466 } 3467 3468 /** Append Tree Statistics. */ 3469 void addTreeStatsFromSub(F)(NotNull!F subFile, ref DirEntry subDent) 3470 { 3471 if (subDent.isFile) 3472 { 3473 /* _treeSize += subDent.size.Bytes64; */ 3474 // dbg("Updating ", _treeSize, " of ", path); 3475 3476 /** TODO Move these overloads to std.datetime */ 3477 auto ref min(in SysTime a, in SysTime b) @trusted pure nothrow { return (a < b ? a : b); } 3478 auto ref max(in SysTime a, in SysTime b) @trusted pure nothrow { return (a > b ? a : b); } 3479 3480 const lastMod = subDent.timeLastModified; 3481 _timeModifiedInterval = Interval!SysTime(min(lastMod, _timeModifiedInterval.begin), 3482 max(lastMod, _timeModifiedInterval.end)); 3483 const lastAcc = subDent.timeLastAccessed; 3484 _timeAccessedInterval = Interval!SysTime(min(lastAcc, _timeAccessedInterval.begin), 3485 max(lastAcc, _timeAccessedInterval.end)); 3486 } 3487 } 3488 3489 /** Update Statistics for Sub-File `sub` with `subDent` of `this` Dir. */ 3490 void updateStats(F)(NotNull!F subFile, ref DirEntry subDent, bool isRegFile) 3491 { 3492 auto lGS = gstats(); 3493 if (lGS) 3494 { 3495 if (lGS.showNameDups/* && */ 3496 /* !subFile.underAnyDir!(a => a.name in lGS.skippedDirKindsMap) */) 3497 { 3498 lGS.filesByName[subFile.name] ~= cast(NotNull!File)subFile; 3499 } 3500 if (lGS.showLinkDups && 3501 isRegFile) 3502 { 3503 import core.sys.posix.sys.stat; 3504 immutable stat_t stat = subDent.statBuf(); 3505 if (stat.st_nlink >= 2) 3506 { 3507 lGS.filesByInode[stat.st_ino] ~= cast(NotNull!File)subFile; 3508 } 3509 } 3510 } 3511 } 3512 3513 /** Load Contents of `this` Directory from Disk using DirEntries. 3514 Returns: `true` iff Dir was updated (reread) from disk. 3515 */ 3516 bool load(int depth = 0, bool force = false) 3517 { 3518 import std.range: empty; 3519 if (!_obseleteDir && // already loaded 3520 !force) // and not forced reload 3521 { 3522 return false; // signal already scanned 3523 } 3524 3525 // dbg("Zeroing ", _treeSize, " of ", path); 3526 _treeSize.reset; // this.size; 3527 auto oldSubs = _subs; 3528 _subs.reset; 3529 assert(_subs.length == 0); // TODO Remove when verified 3530 3531 import std.file: dirEntries, SpanMode; 3532 auto entries = dirEntries(path, SpanMode.shallow, false); // false: skip symlinks 3533 foreach (dent; entries) 3534 { 3535 immutable basename = dent.name.baseName; 3536 File sub = null; 3537 if (basename in oldSubs) 3538 { 3539 sub = oldSubs[basename]; // reuse from previous cache 3540 } 3541 else 3542 { 3543 bool isRegFile = false; 3544 if (dent.isSymlink) 3545 { 3546 sub = new Symlink(dent, assumeNotNull(this)); 3547 } 3548 else if (dent.isDir) 3549 { 3550 sub = new Dir(dent, this, gstats); 3551 } 3552 else if (dent.isFile) 3553 { 3554 // TODO Delay construction of and specific files such as 3555 // CFile, ELFFile, after FKind-recognition has been made. 3556 sub = new RegFile(dent, assumeNotNull(this)); 3557 isRegFile = true; 3558 } 3559 else 3560 { 3561 sub = new SpecFile(dent, assumeNotNull(this)); 3562 } 3563 updateStats(enforceNotNull(sub), dent, isRegFile); 3564 } 3565 auto nnsub = enforceNotNull(sub); 3566 addTreeStatsFromSub(nnsub, dent); 3567 _subs[basename] = nnsub; 3568 } 3569 _subs.rehash; // optimize hash for faster lookups 3570 3571 _obseleteDir = false; 3572 return true; 3573 } 3574 3575 bool reload(int depth = 0) { return load(depth, true); } 3576 alias sync = reload; 3577 3578 /* TODO Can we get make this const to the outside world perhaps using inout? */ 3579 ref NotNull!File[string] subs() @property { load(); return _subs; } 3580 3581 NotNull!File[] subsSorted(DirSorting sorted = DirSorting.onTimeLastModified) @property 3582 { 3583 load(); 3584 auto ssubs = _subs.values; 3585 /* TODO Use radix sort to speed things up. */ 3586 final switch (sorted) 3587 { 3588 /* case DirSorting.onTimeCreated: */ 3589 /* break; */ 3590 case DirSorting.onTimeLastModified: 3591 ssubs.sort!((a, b) => (a.timeLastModified > 3592 b.timeLastModified)); 3593 break; 3594 case DirSorting.onTimeLastAccessed: 3595 ssubs.sort!((a, b) => (a.timeLastAccessed > 3596 b.timeLastAccessed)); 3597 break; 3598 case DirSorting.onSize: 3599 ssubs.sort!((a, b) => (a.size > 3600 b.size)); 3601 break; 3602 case DirSorting.onNothing: 3603 break; 3604 } 3605 return ssubs; 3606 } 3607 3608 File sub(Name)(Name sub_name) 3609 { 3610 load(); 3611 return (sub_name in _subs) ? _subs[sub_name] : null; 3612 } 3613 File sub(File sub) 3614 { 3615 load(); 3616 return (sub.path in _subs) != null ? sub : null; 3617 } 3618 3619 version(cerealed) 3620 { 3621 void accept(Cereal cereal) 3622 { 3623 auto stdTime = timeLastModified.stdTime; 3624 cereal.grain(name, size, stdTime); 3625 timeLastModified = SysTime(stdTime); 3626 } 3627 } 3628 version(msgpack) 3629 { 3630 /** Construct from msgpack `unpacker`. */ 3631 this(Unpacker)(ref Unpacker unpacker) 3632 { 3633 fromMsgpack(msgpack.Unpacker(unpacker)); 3634 } 3635 3636 void toMsgpack(Packer)(ref Packer packer) const 3637 { 3638 /* writeln("Entering Dir.toMsgpack ", this.name); */ 3639 packer.pack(name, size, 3640 timeLastModified.stdTime, 3641 timeLastAccessed.stdTime, 3642 kind); 3643 3644 // Contents 3645 /* TODO serialize map of polymorphic objects using 3646 * packer.packArray(_subs) and type trait lookup up all child-classes of 3647 * File */ 3648 packer.pack(_subs.length); 3649 3650 if (_subs.length >= 1) 3651 { 3652 auto diffsLastModified = _subs.byValue.map!"a.timeLastModified.stdTime".encodeForwardDifference; 3653 auto diffsLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime".encodeForwardDifference; 3654 /* auto timesLastModified = _subs.byValue.map!"a.timeLastModified.stdTime"; */ 3655 /* auto timesLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime"; */ 3656 3657 packer.pack(diffsLastModified, diffsLastAccessed); 3658 3659 /* debug dbg(this.name, " sub.length: ", _subs.length); */ 3660 /* debug dbg(name, " modified diffs: ", diffsLastModified.pack.length); */ 3661 /* debug dbg(name, " accessed diffs: ", diffsLastAccessed.pack.length); */ 3662 /* debug dbg(name, " modified: ", timesLastModified.array.pack.length); */ 3663 /* debug dbg(name, " accessed: ", timesLastAccessed.array.pack.length); */ 3664 } 3665 3666 foreach (sub; _subs) 3667 { 3668 if (const regFile = cast(RegFile)sub) 3669 { 3670 packer.pack("RegFile"); 3671 regFile.toMsgpack(packer); 3672 } 3673 else if (const dir = cast(Dir)sub) 3674 { 3675 packer.pack("Dir"); 3676 dir.toMsgpack(packer); 3677 } 3678 else if (const symlink = cast(Symlink)sub) 3679 { 3680 packer.pack("Symlink"); 3681 symlink.toMsgpack(packer); 3682 } 3683 else if (const special = cast(SpecFile)sub) 3684 { 3685 packer.pack("SpecFile"); 3686 special.toMsgpack(packer); 3687 } 3688 else 3689 { 3690 immutable subClassName = sub.classinfo.name; 3691 assert(0, "Unknown sub File class " ~ subClassName); // TODO Exception 3692 } 3693 } 3694 } 3695 3696 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 3697 { 3698 unpacker.unpack(name, size); 3699 3700 long stdTime; 3701 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO Functionize 3702 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO Functionize 3703 3704 /* dbg("before:", path, " ", size, " ", timeLastModified, " ", timeLastAccessed); */ 3705 3706 // FKind 3707 if (!kind) { kind = null; } 3708 unpacker.unpack(kind); /* TODO kind = new DirKind(unpacker); */ 3709 /* dbg("after:", path); */ 3710 3711 _treeSize.reset; // this.size; 3712 3713 // Contents 3714 /* TODO unpacker.unpack(_subs); */ 3715 immutable noPreviousSubs = _subs.length == 0; 3716 size_t subs_length; unpacker.unpack(subs_length); // TODO Functionize to unpacker.unpack!size_t() 3717 3718 ForwardDifferenceCode!(long[]) diffsLastModified, 3719 diffsLastAccessed; 3720 if (subs_length >= 1) 3721 { 3722 unpacker.unpack(diffsLastModified, diffsLastAccessed); 3723 /* auto x = diffsLastModified.decodeForwardDifference; */ 3724 } 3725 3726 foreach (ix; 0..subs_length) // repeat for subs_length times 3727 { 3728 string subClassName; unpacker.unpack(subClassName); // TODO Functionize 3729 File sub = null; 3730 try 3731 { 3732 switch (subClassName) 3733 { 3734 default: 3735 assert(0, "Unknown File parent class " ~ subClassName); // TODO Exception 3736 case "Dir": 3737 auto subDir = new Dir(this, gstats); 3738 unpacker.unpack(subDir); sub = subDir; 3739 auto subDent = DirEntry(sub.path); 3740 subDir.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3741 addTreeStatsFromSub(assumeNotNull(subDir), subDent); 3742 break; 3743 case "RegFile": 3744 auto subRegFile = new RegFile(assumeNotNull(this)); 3745 unpacker.unpack(subRegFile); sub = subRegFile; 3746 auto subDent = DirEntry(sub.path); 3747 subRegFile.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3748 updateStats(assumeNotNull(subRegFile), subDent, true); 3749 addTreeStatsFromSub(assumeNotNull(subRegFile), subDent); 3750 break; 3751 case "Symlink": 3752 auto subSymlink = new Symlink(assumeNotNull(this)); 3753 unpacker.unpack(subSymlink); sub = subSymlink; 3754 break; 3755 case "SpecFile": 3756 auto SpecFile = new SpecFile(assumeNotNull(this)); 3757 unpacker.unpack(SpecFile); sub = SpecFile; 3758 break; 3759 } 3760 if (noPreviousSubs || 3761 !(sub.name in _subs)) 3762 { 3763 _subs[sub.name] = enforceNotNull(sub); 3764 } 3765 /* dbg("Unpacked Dir sub ", sub.path, " of type ", subClassName); */ 3766 } catch (FileException) { // this may be a too generic exception 3767 /* dbg(sub.path, " is not accessible anymore"); */ 3768 } 3769 } 3770 3771 } 3772 } 3773 3774 override void makeObselete() @trusted 3775 { 3776 _obseleteDir = true; 3777 _treeSize.reset; 3778 _timeModifiedInterval.reset; 3779 _timeAccessedInterval.reset; 3780 } 3781 override void makeUnObselete() @safe 3782 { 3783 _obseleteDir = false; 3784 } 3785 3786 private NotNull!File[string] _subs; // Directory contents 3787 DirKind kind; // Kind of this directory 3788 uint64_t hitCount = 0; 3789 private int _depth = -1; // Memoized Depth 3790 private bool _obseleteDir = true; // Flags that this is obselete 3791 GStats _gstats = null; 3792 3793 /* TODO Reuse Span and span in Phobos. (Span!T).init should be (T.max, T.min) */ 3794 Interval!SysTime _timeModifiedInterval; 3795 Interval!SysTime _timeAccessedInterval; 3796 3797 Nullable!(size_t, size_t.max) _treeSize; // Size of tree with this directory as root. 3798 /* TODO Make this work instead: */ 3799 /* import std.typecons: Nullable; */ 3800 /* Nullable!(Bytes64, Bytes64.max) _treeSize; // Size of tree with this directory as root. */ 3801 3802 SHA1Digest _treeContentId; 3803 } 3804 3805 /** Externally Directory Memoized Calculation of Tree Size. 3806 Is it possible to make get any of @safe pure nothrow? 3807 */ 3808 Bytes64 treeSizeMemoized(NotNull!File file, Bytes64[File] cache) @trusted /* nothrow */ 3809 { 3810 typeof(return) sum = file.size; 3811 if (auto dir = cast(Dir)file) 3812 { 3813 if (file in cache) 3814 { 3815 sum = cache[file]; 3816 } 3817 else 3818 { 3819 foreach (sub; dir.subs.byValue) 3820 { 3821 sum += treeSizeMemoized(sub, cache); 3822 } 3823 cache[file] = sum; 3824 } 3825 } 3826 return sum; 3827 } 3828 3829 /** Save File System Tree Cache under Directory `rootDir`. 3830 Returns: Serialized Byte Array. 3831 */ 3832 const(ubyte[]) saveRootDirTree(Viz viz, 3833 Dir rootDir, string cacheFile) @trusted 3834 { 3835 immutable tic = Clock.currTime; 3836 version(msgpack) 3837 { 3838 const data = rootDir.pack(); 3839 import std.file: write; 3840 } 3841 else version(cerealed) 3842 { 3843 auto enc = new Cerealiser(); // encoder 3844 enc ~= rootDir; 3845 auto data = enc.bytes; 3846 } 3847 else 3848 { 3849 ubyte[] data; 3850 } 3851 cacheFile.write(data); 3852 immutable toc = Clock.currTime; 3853 3854 viz.ppln("Cache Write".asH!2, 3855 "Wrote tree cache of size ", 3856 data.length.Bytes64, " to ", 3857 cacheFile.asPath, 3858 " in ", 3859 shortDurationString(toc - tic)); 3860 3861 return data; 3862 } 3863 3864 /** Load File System Tree Cache from `cacheFile`. 3865 Returns: Root Directory of Loaded Tree. 3866 */ 3867 Dir loadRootDirTree(Viz viz, 3868 string cacheFile, GStats gstats) @trusted 3869 { 3870 immutable tic = Clock.currTime; 3871 3872 import std.file: read; 3873 try 3874 { 3875 const data = read(cacheFile); 3876 3877 auto rootDir = new Dir(cast(Dir)null, gstats); 3878 version(msgpack) 3879 { 3880 unpack(cast(ubyte[])data, rootDir); /* Dir rootDir = new Dir(cast(const(ubyte)[])data); */ 3881 } 3882 immutable toc = Clock.currTime; 3883 3884 viz.pp("Cache Read".asH!2, 3885 "Read cache of size ", 3886 data.length.Bytes64, " from ", 3887 cacheFile.asPath, 3888 " in ", 3889 shortDurationString(toc - tic), " containing", 3890 asUList(asItem(gstats.noDirs, " Dirs,"), 3891 asItem(gstats.noRegFiles, " Regular Files,"), 3892 asItem(gstats.noSymlinks, " Symbolic Links,"), 3893 asItem(gstats.noSpecialFiles, " Special Files,"), 3894 asItem("totalling ", gstats.noFiles + 1, " Files"))); 3895 assert(gstats.noDirs + 3896 gstats.noRegFiles + 3897 gstats.noSymlinks + 3898 gstats.noSpecialFiles == gstats.noFiles + 1); 3899 return rootDir; 3900 } 3901 catch (FileException) 3902 { 3903 viz.ppln("Failed to read cache from ", cacheFile); 3904 return null; 3905 } 3906 } 3907 3908 Dir[] getDirs(NotNull!Dir rootDir, string[] topDirNames) 3909 { 3910 Dir[] topDirs; 3911 foreach (topName; topDirNames) 3912 { 3913 Dir topDir = getDir(rootDir, topName); 3914 3915 if (!topDir) 3916 { 3917 dbg("Directory " ~ topName ~ " is missing"); 3918 } 3919 else 3920 { 3921 topDirs ~= topDir; 3922 } 3923 } 3924 return topDirs; 3925 } 3926 3927 /** (Cached) Lookup of File `filePath`. 3928 */ 3929 File getFile(NotNull!Dir rootDir, string filePath, 3930 bool isDir = false, 3931 bool tolerant = false) @trusted 3932 { 3933 if (isDir) 3934 { 3935 return getDir(rootDir, filePath); 3936 } 3937 else 3938 { 3939 auto parentDir = getDir(rootDir, filePath.dirName); 3940 if (parentDir) 3941 { 3942 auto hit = parentDir.sub(filePath.baseName); 3943 if (hit) 3944 return hit; 3945 else 3946 { 3947 dbg("File path " ~ filePath ~ " doesn't exist. TODO Query user to instead find it under " 3948 ~ parentDir.path); 3949 parentDir.find(filePath.baseName); 3950 } 3951 } 3952 else 3953 { 3954 dbg("Directory " ~ parentDir.path ~ " doesn't exist"); 3955 } 3956 } 3957 return null; 3958 } 3959 3960 /** (Cached) Lookup of Directory `dirpath`. 3961 Returns: Dir if present under rootDir, null otherwise. 3962 TODO Make use of dent 3963 */ 3964 import std.path: isRooted; 3965 Dir getDir(NotNull!Dir rootDir, string dirPath, ref DirEntry dent, 3966 ref Symlink[] followedSymlinks) @trusted 3967 in { assert(dirPath.isRooted); } 3968 do 3969 { 3970 Dir currDir = rootDir; 3971 3972 import std.range: drop; 3973 import std.path: pathSplitter; 3974 foreach (part; dirPath.pathSplitter().drop(1)) // all but first 3975 { 3976 auto sub = currDir.sub(part); 3977 if (auto subDir = cast(Dir)sub) 3978 { 3979 currDir = subDir; 3980 } 3981 else if (auto subSymlink = cast(Symlink)sub) 3982 { 3983 auto subDent = DirEntry(subSymlink.absoluteNormalizedTargetPath); 3984 if (subDent.isDir) 3985 { 3986 if (followedSymlinks.find(subSymlink)) 3987 { 3988 dbg("Infinite recursion in ", subSymlink); 3989 return null; 3990 } 3991 followedSymlinks ~= subSymlink; 3992 currDir = getDir(rootDir, subSymlink.absoluteNormalizedTargetPath, subDent, followedSymlinks); // TODO Check for infinite recursion 3993 } 3994 else 3995 { 3996 dbg("Loaded path " ~ dirPath ~ " is not a directory"); 3997 return null; 3998 } 3999 } 4000 else 4001 { 4002 return null; 4003 } 4004 } 4005 return currDir; 4006 } 4007 4008 /** (Cached) Lookup of Directory `dirPath`. */ 4009 Dir getDir(NotNull!Dir rootDir, string dirPath) @trusted 4010 { 4011 Symlink[] followedSymlinks; 4012 try 4013 { 4014 auto dirDent = DirEntry(dirPath); 4015 return getDir(rootDir, dirPath, dirDent, followedSymlinks); 4016 } 4017 catch (FileException) 4018 { 4019 dbg("Exception getting Dir"); 4020 return null; 4021 } 4022 } 4023 unittest { 4024 /* auto tmp = tempfile("/tmp/fsfile"); */ 4025 } 4026 4027 enum ulong mmfile_size = 0; // 100*1024 4028 4029 auto pageSize() @trusted 4030 { 4031 version(linux) 4032 { 4033 import core.sys.posix.sys.shm: __getpagesize; 4034 return __getpagesize(); 4035 } 4036 else 4037 { 4038 return 4096; 4039 } 4040 } 4041 4042 enum KeyStrictness 4043 { 4044 exact, 4045 acronym, 4046 eitherExactOrAcronym, 4047 standard = eitherExactOrAcronym, 4048 } 4049 4050 /** Language Operator Associativity. */ 4051 enum OpAssoc { none, 4052 LR, // Left-to-Right 4053 RL, // Right-to-Left 4054 } 4055 4056 /** Language Operator Arity. */ 4057 enum OpArity 4058 { 4059 unknown, 4060 unaryPostfix, // 1-arguments 4061 unaryPrefix, // 1-arguments 4062 binary, // 2-arguments 4063 ternary, // 3-arguments 4064 } 4065 4066 /** Language Operator. */ 4067 struct Op 4068 { 4069 this(string op, 4070 OpArity arity = OpArity.unknown, 4071 OpAssoc assoc = OpAssoc.none, 4072 byte prec = -1, 4073 string desc = []) 4074 { 4075 this.op = op; 4076 this.arity = arity; 4077 this.assoc = assoc; 4078 this.prec = prec; 4079 this.desc = desc; 4080 } 4081 /** Make `this` an alias of `opOrig`. */ 4082 Op aliasOf(string opOrig) 4083 { 4084 // TODO set relation in map from op to opOrig 4085 return this; 4086 } 4087 string op; // Operator. TODO Optimize this storage using a value type? 4088 string desc; // Description 4089 OpAssoc assoc; // Associativity 4090 ubyte prec; // Precedence 4091 OpArity arity; // Arity 4092 bool overloadable; // Overloadable 4093 } 4094 4095 /** Language Operator Alias. */ 4096 struct OpAlias 4097 { 4098 this(string op, string opOrigin) 4099 { 4100 this.op = op; 4101 this.opOrigin = opOrigin; 4102 } 4103 string op; 4104 string opOrigin; 4105 } 4106 4107 FKind tryLookupKindIn(RegFile regFile, 4108 FKind[SHA1Digest] kindsById) 4109 { 4110 immutable id = regFile._cstat.kindId; 4111 if (id in kindsById) 4112 { 4113 return kindsById[id]; 4114 } 4115 else 4116 { 4117 return null; 4118 } 4119 } 4120 4121 string displayedFileName(AnyFile)(GStats gstats, 4122 AnyFile theFile) @safe pure 4123 { 4124 return ((gstats.pathFormat == PathFormat.relative && 4125 gstats.topDirs.length == 1) ? 4126 "./" ~ theFile.name : 4127 theFile.path); 4128 } 4129 4130 /** File System Scanner. */ 4131 class Scanner(Term) 4132 { 4133 this(string[] args, ref Term term) 4134 { 4135 prepare(args, term); 4136 } 4137 4138 SysTime _currTime; 4139 import std.getopt; 4140 import std..string: toLower, toUpper, startsWith, CaseSensitive; 4141 import std.mmfile; 4142 import std.stdio: writeln, stdout, stderr, stdin, popen; 4143 import std.algorithm: find, count, countUntil, min, splitter; 4144 import std.range: join; 4145 import std.conv: to; 4146 4147 import core.sys.posix.sys.mman; 4148 import core.sys.posix.pwd: passwd, getpwuid_r; 4149 version(linux) 4150 { 4151 // import core.sys.linux.sys.inotify; 4152 import core.sys.linux.sys.xattr; 4153 } 4154 import core.sys.posix.unistd: getuid, getgid; 4155 import std.file: read, FileException, exists, getcwd; 4156 import std.range: retro; 4157 import std.exception: ErrnoException; 4158 import core.sys.posix.sys.stat: stat_t, S_IRUSR, S_IRGRP, S_IROTH; 4159 4160 uint64_t _hitsCountTotal = 0; 4161 4162 Symlink[] _brokenSymlinks; 4163 4164 bool _beVerbose = false; 4165 bool _caseFold = false; 4166 bool _showSkipped = false; 4167 bool listTxtFKinds = false; 4168 bool listBinFKinds = false; 4169 string selFKindNames; 4170 string[] _topDirNames; 4171 string[] addTags; 4172 string[] removeTags; 4173 4174 private 4175 { 4176 GStats gstats = new GStats(); 4177 4178 string _cacheFile = "~/.cache/fs-root.msgpack"; 4179 4180 uid_t _uid; 4181 gid_t _gid; 4182 } 4183 4184 ioFile outFile; 4185 4186 string[] keys; // Keys to scan. 4187 typeof(keys.map!bistogramOverRepresentation) keysBists; 4188 typeof(keys.map!(sparseUIntNGramOverRepresentation!NGramOrder)) keysXGrams; 4189 Bist keysBistsUnion; 4190 XGram keysXGramsUnion; 4191 4192 string selFKindsNote; 4193 4194 void prepare(string[] args, ref Term term) 4195 { 4196 _scanChunkSize = 32*pageSize; 4197 gstats.loadFileKinds; 4198 gstats.loadDirKinds; 4199 4200 bool helpPrinted = getoptEx("FS --- File System Scanning Utility in D.\n" ~ 4201 "Usage: fs { --switches } [KEY]...\n" ~ 4202 "Note that scanning for multiple KEYs is possible.\nIf so hits are highlighted in different colors!\n" ~ 4203 "Sample calls: \n" ~ 4204 " fdo.d --color -d /lib/modules/3.13.0-24-generic/kernel/drivers/staging --browse --duplicates --recache lirc\n" ~ 4205 " fdo.d --color -d /etc -s --tree --usage -l --duplicates stallman\n" 4206 " fdo.d --color -d /etc -d /var --acronym sttccc\n" 4207 " fdo.d --color -d /etc -d /var --acronym dktp\n" 4208 " fdo.d --color -d /etc -d /var --acronym tms sttc prc dtp xsr\n" ~ 4209 " fdo.d --color -d /etc min max delta\n" ~ 4210 " fdo.d --color -d /etc if elif return len --duplicates --sort=onSize\n" ~ 4211 " fdo.d --color -k -d /bin alpha\n" ~ 4212 " fdo.d --color -d /lib -k linus" ~ 4213 " fdo.d --color -d /etc --symbol alpha beta gamma delta" ~ 4214 " fdo.d --color -d /var/spool/postfix/dev " ~ 4215 " fdo.d --color -d /etc alpha" ~ 4216 " fdo.d --color -d ~/Work/dmd --browse xyz --duplicates --do=preprocess", 4217 4218 args, 4219 std.getopt.config.caseInsensitive, 4220 4221 "verbose|v", "\tVerbose", &_beVerbose, 4222 4223 "color|C", "\tColorize Output" ~ defaultDoc(gstats.colorFlag), &gstats.colorFlag, 4224 "types|T", "\tComma separated list (CSV) of file types/kinds to scan" ~ defaultDoc(selFKindNames), &selFKindNames, 4225 "list-textual-kinds", "\tList registered textual types/kinds" ~ defaultDoc(listTxtFKinds), &listTxtFKinds, 4226 "list-binary-kinds", "\tList registered binary types/kinds" ~ defaultDoc(listBinFKinds), &listBinFKinds, 4227 "group-types|G", "\tCollect and group file types found" ~ defaultDoc(gstats.collectTypeHits), &gstats.collectTypeHits, 4228 4229 "i", "\tCase-Fold, Case-Insensitive" ~ defaultDoc(_caseFold), &_caseFold, 4230 "k", "\tShow Skipped Directories and Files" ~ defaultDoc(_showSkipped), &_showSkipped, 4231 "d", "\tRoot Directory(s) of tree(s) to scan, defaulted to current directory" ~ defaultDoc(_topDirNames), &_topDirNames, 4232 "depth", "\tDepth of tree to scan, defaulted to unlimited (-1) depth" ~ defaultDoc(gstats.scanDepth), &gstats.scanDepth, 4233 4234 // Contexts 4235 "context|x", "\tComma Separated List of Contexts. Either: " ~ enumDoc!ScanContext, &gstats.scanContext, 4236 4237 "word|w", "\tSearch for key as a complete Word (A Letter followed by more Letters and Digits)." ~ defaultDoc(gstats.keyAsWord), &gstats.keyAsWord, 4238 "symbol|ident|id|s", "\tSearch for key as a complete Symbol (Identifier)" ~ defaultDoc(gstats.keyAsSymbol), &gstats.keyAsSymbol, 4239 "acronym|a", "\tSearch for key as an acronym (relaxed)" ~ defaultDoc(gstats.keyAsAcronym), &gstats.keyAsAcronym, 4240 "exact", "\tSearch for key only with exact match (strict)" ~ defaultDoc(gstats.keyAsExact), &gstats.keyAsExact, 4241 4242 "name-duplicates|snd", "\tDetect & Show file name duplicates" ~ defaultDoc(gstats.showNameDups), &gstats.showNameDups, 4243 "hardlink-duplicates|inode-duplicates|shd", "\tDetect & Show multiple links to same inode" ~ defaultDoc(gstats.showLinkDups), &gstats.showLinkDups, 4244 "file-content-duplicates|scd", "\tDetect & Show file contents duplicates" ~ defaultDoc(gstats.showFileContentDups), &gstats.showFileContentDups, 4245 "tree-content-duplicates", "\tDetect & Show directory tree contents duplicates" ~ defaultDoc(gstats.showTreeContentDups), &gstats.showTreeContentDups, 4246 4247 "elf-symbol-duplicates", "\tDetect & Show ELF Symbol Duplicates" ~ defaultDoc(gstats.showELFSymbolDups), &gstats.showELFSymbolDups, 4248 4249 "duplicates|D", "\tDetect & Show file name and contents duplicates" ~ defaultDoc(gstats.showAnyDups), &gstats.showAnyDups, 4250 "duplicates-context", "\tDuplicates Detection Context. Either: " ~ enumDoc!DuplicatesContext, &gstats.duplicatesContext, 4251 "hardlink-content-duplicates", "\tConvert all content duplicates into hardlinks (common inode) if they reside on the same file system" ~ defaultDoc(gstats.linkContentDups), &gstats.linkContentDups, 4252 4253 "usage", "\tShow disk usage (tree size) of scanned directories" ~ defaultDoc(gstats.showUsage), &gstats.showUsage, 4254 "count-lines", "\tShow line counts of scanned files" ~ defaultDoc(gstats.showLineCounts), &gstats.showLineCounts, 4255 4256 "sha1", "\tShow SHA1 content digests" ~ defaultDoc(gstats.showSHA1), &gstats.showSHA1, 4257 4258 "mmaps", "\tShow when files are memory mapped (mmaped)" ~ defaultDoc(gstats.showMMaps), &gstats.showMMaps, 4259 4260 "follow-symlinks|f", "\tFollow symbolic links" ~ defaultDoc(gstats.followSymlinks), &gstats.followSymlinks, 4261 "broken-symlinks|l", "\tDetect & Show broken symbolic links (target is non-existing file) " ~ defaultDoc(gstats.showBrokenSymlinks), &gstats.showBrokenSymlinks, 4262 "show-symlink-cycles|l", "\tDetect & Show symbolic links cycles" ~ defaultDoc(gstats.showSymlinkCycles), &gstats.showSymlinkCycles, 4263 4264 "add-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(addTags), &addTags, 4265 "remove-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(removeTags), &removeTags, 4266 4267 "tree|W", "\tShow Scanned Tree and Followed Symbolic Links" ~ defaultDoc(gstats.showTree), &gstats.showTree, 4268 "sort|S", "\tDirectory contents sorting order. Either: " ~ enumDoc!DirSorting, &gstats.subsSorting, 4269 "build", "\tBuild Source Code. Either: " ~ enumDoc!BuildType, &gstats.buildType, 4270 4271 "path-format", "\tFormat of paths. Either: " ~ enumDoc!PathFormat ~ "." ~ defaultDoc(gstats.pathFormat), &gstats.pathFormat, 4272 4273 "cache-file|F", "\tFile System Tree Cache File" ~ defaultDoc(_cacheFile), &_cacheFile, 4274 "recache", "\tSkip initial load of cache from disk" ~ defaultDoc(gstats.recache), &gstats.recache, 4275 4276 "do", "\tOperation to perform on matching files. Either: " ~ enumDoc!FOp, &gstats.fOp, 4277 4278 "demangle-elf", "\tDemangle ELF files.", &gstats.demangleELF, 4279 4280 "use-ngrams", "\tUse NGrams to cache statistics and thereby speed up search" ~ defaultDoc(gstats.useNGrams), &gstats.useNGrams, 4281 4282 "html|H", "\tFormat output as HTML" ~ defaultDoc(gstats.useHTML), &gstats.useHTML, 4283 "browse|B", ("\tFormat output as HTML to a temporary file" ~ 4284 defaultDoc(_cacheFile) ~ 4285 " and open it with default Web browser" ~ 4286 defaultDoc(gstats.browseOutput)), &gstats.browseOutput, 4287 4288 "author", "\tPrint name of\n"~"\tthe author", 4289 delegate() { writeln("Per Nordlöw"); } 4290 ); 4291 4292 if (gstats.showAnyDups) 4293 { 4294 gstats.showNameDups = true; 4295 gstats.showLinkDups = true; 4296 gstats.showFileContentDups = true; 4297 gstats.showTreeContentDups = true; 4298 gstats.showELFSymbolDups = true; 4299 } 4300 if (helpPrinted) 4301 return; 4302 4303 _cacheFile = std.path.expandTilde(_cacheFile); 4304 4305 if (_topDirNames.empty) 4306 { 4307 _topDirNames = ["."]; 4308 } 4309 if (_topDirNames == ["."]) 4310 { 4311 gstats.pathFormat = PathFormat.relative; 4312 } 4313 else 4314 { 4315 gstats.pathFormat = PathFormat.absolute; 4316 } 4317 foreach (ref topName; _topDirNames) 4318 { 4319 if (topName == ".") 4320 { 4321 topName = topName.absolutePath.buildNormalizedPath; 4322 } 4323 else 4324 { 4325 topName = topName.expandTilde.buildNormalizedPath; 4326 } 4327 } 4328 4329 // Output Handling 4330 if (gstats.browseOutput) 4331 { 4332 gstats.useHTML = true; 4333 immutable ext = gstats.useHTML ? "html" : "results.txt"; 4334 import std.uuid: randomUUID; 4335 outFile = ioFile("/tmp/fs-" ~ randomUUID.toString() ~ 4336 "." ~ ext, 4337 "w"); 4338 /* popen("gnome-open " ~ outFile.name); */ 4339 popen("firefox -new-tab " ~ outFile.name); 4340 } 4341 else 4342 { 4343 outFile = stdout; 4344 } 4345 4346 auto cwd = getcwd(); 4347 4348 foreach (arg; args[1..$]) 4349 { 4350 if (!arg.startsWith("-")) // if argument not a flag 4351 { 4352 keys ~= arg; 4353 } 4354 } 4355 4356 // Calc stats 4357 keysBists = keys.map!bistogramOverRepresentation; 4358 keysXGrams = keys.map!(sparseUIntNGramOverRepresentation!NGramOrder); 4359 keysBistsUnion = reduce!"a | b"(typeof(keysBists.front).init, keysBists); 4360 keysXGramsUnion = reduce!"a + b"(typeof(keysXGrams.front).init, keysXGrams); 4361 4362 auto viz = new Viz(outFile, 4363 &term, 4364 gstats.showTree, 4365 gstats.useHTML ? VizForm.HTML : VizForm.textAsciiDocUTF8, 4366 gstats.colorFlag, 4367 !gstats.useHTML, // only use if HTML 4368 true, // TODO Only set if in debug mode 4369 ); 4370 4371 if (gstats.useNGrams && 4372 (!keys.empty) && 4373 keysXGramsUnion.empty) 4374 { 4375 gstats.useNGrams = false; 4376 viz.ppln("Keys must be at least of length " ~ 4377 to!string(NGramOrder + 1) ~ 4378 " in order for " ~ 4379 keysXGrams[0].typeName ~ 4380 " to be calculated"); 4381 } 4382 4383 // viz.ppln("<meta http-equiv=\"refresh\" content=\"1\"/>"); // refresh every second 4384 4385 if (selFKindNames) 4386 { 4387 foreach (lang; selFKindNames.splitterASCIIAmong!(",")) 4388 { 4389 if (lang in gstats.allFKinds.byName) // try exact match 4390 { 4391 gstats.selFKinds ~= gstats.allFKinds.byName[lang]; 4392 } 4393 else if (lang.toLower in gstats.allFKinds.byName) // else try all in lower case 4394 { 4395 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toLower]; 4396 } 4397 else if (lang.toUpper in gstats.allFKinds.byName) // else try all in upper case 4398 { 4399 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toUpper]; 4400 } 4401 else 4402 { 4403 writeln("warning: Language ", lang, " not registered"); 4404 } 4405 } 4406 if (gstats.selFKinds.byIndex.empty) 4407 { 4408 writeln("warning: None of the languages ", to!string(selFKindNames), " are registered. Defaulting to all file types."); 4409 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4410 } 4411 else 4412 { 4413 gstats.selFKinds.rehash; 4414 } 4415 } 4416 else 4417 { 4418 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4419 } 4420 4421 // Keys 4422 auto commaedKeys = keys.joiner(","); 4423 const keysPluralExt = keys.length >= 2 ? "s" : ""; 4424 string commaedKeysString = to!string(commaedKeys); 4425 if (keys) 4426 { 4427 selFKindsNote = " in " ~ (gstats.selFKinds == gstats.allFKinds ? 4428 "all " : 4429 gstats.selFKinds.byIndex.map!(a => a.kindName).join(",") ~ "-") ~ "files"; 4430 immutable underNote = " under \"" ~ (_topDirNames.reduce!"a ~ ',' ~ b") ~ "\""; 4431 const exactNote = gstats.keyAsExact ? "exact " : ""; 4432 string asNote; 4433 if (gstats.keyAsAcronym) 4434 { 4435 asNote = (" as " ~ exactNote ~ 4436 (gstats.keyAsWord ? "word" : "symbol") ~ 4437 " acronym" ~ keysPluralExt); 4438 } 4439 else if (gstats.keyAsSymbol) 4440 { 4441 asNote = " as " ~ exactNote ~ "symbol" ~ keysPluralExt; 4442 } 4443 else if (gstats.keyAsWord) 4444 { 4445 asNote = " as " ~ exactNote ~ "word" ~ keysPluralExt; 4446 } 4447 else 4448 { 4449 asNote = ""; 4450 } 4451 4452 const title = ("Searching for \"" ~ commaedKeysString ~ "\"" ~ 4453 " case-" ~ (_caseFold ? "in" : "") ~"sensitively" 4454 ~asNote ~selFKindsNote ~underNote); 4455 if (viz.form == VizForm.HTML) // only needed for HTML output 4456 { 4457 viz.ppln(faze(title, titleFace)); 4458 } 4459 4460 viz.pp(asH!1("Searching for \"", commaedKeysString, "\"", 4461 " case-", (_caseFold ? "in" : ""), "sensitively", 4462 asNote, selFKindsNote, 4463 " under ", _topDirNames.map!(a => a.asPath))); 4464 } 4465 4466 if (listTxtFKinds) 4467 { 4468 viz.pp("Textual (Source) Kinds".asH!2, 4469 gstats.txtFKinds.byIndex.asTable); 4470 } 4471 4472 if (listBinFKinds) 4473 { 4474 viz.pp("Binary Kinds".asH!2, 4475 gstats.binFKinds.byIndex.asTable); 4476 } 4477 4478 /* binFKinds.asTable, */ 4479 4480 if (_showSkipped) 4481 { 4482 viz.pp("Skipping files of type".asH!2, 4483 asUList(gstats.binFKinds.byIndex.map!(a => asItem(a.kindName.asBold, 4484 ": ", 4485 asCSL(a.exts.map!(b => b.asCode)))))); 4486 viz.pp("Skipping directories of type".asH!2, 4487 asUList(gstats.skippedDirKinds.map!(a => asItem(a.kindName.asBold, 4488 ": ", 4489 a.fileName.asCode)))); 4490 } 4491 4492 // if (key && key == key.toLower()) { // if search key is all lowercase 4493 // _caseFold = true; // we do case-insensitive search like in Emacs 4494 // } 4495 4496 _uid = getuid; 4497 _gid = getgid; 4498 4499 // Setup root directory 4500 if (!gstats.recache) 4501 { 4502 GC.disable; 4503 gstats.rootDir = loadRootDirTree(viz, _cacheFile, gstats); 4504 GC.enable; 4505 } 4506 if (!gstats.rootDir) // if first time 4507 { 4508 gstats.rootDir = new Dir("/", gstats); // filesystem root directory. TODO Make this uncopyable? 4509 } 4510 4511 // Scan for exact key match 4512 gstats.topDirs = getDirs(enforceNotNull(gstats.rootDir), _topDirNames); 4513 4514 _currTime = Clock.currTime; 4515 4516 GC.disable; 4517 scanTopDirs(viz, commaedKeysString); 4518 GC.enable; 4519 4520 GC.disable; 4521 saveRootDirTree(viz, gstats.rootDir, _cacheFile); 4522 GC.enable; 4523 4524 // Print statistics 4525 showStats(viz); 4526 } 4527 4528 void scanTopDirs(Viz viz, 4529 string commaedKeysString) 4530 { 4531 viz.pp("Results".asH!2); 4532 if (gstats.topDirs) 4533 { 4534 foreach (topIndex, topDir; gstats.topDirs) 4535 { 4536 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4537 if (ctrlC) 4538 { 4539 auto restDirs = gstats.topDirs[topIndex + 1..$]; 4540 if (!restDirs.empty) 4541 { 4542 debug dbg("Ctrl-C pressed: Skipping search of " ~ to!string(restDirs)); 4543 break; 4544 } 4545 } 4546 } 4547 4548 viz.pp("Summary".asH!2); 4549 4550 if ((gstats.noScannedFiles - gstats.noScannedDirs) == 0) 4551 { 4552 viz.ppln("No files with any content found"); 4553 } 4554 else 4555 { 4556 // Scan for acronym key match 4557 if (keys && _hitsCountTotal == 0) // if keys given but no hit found 4558 { 4559 auto keysString = (keys.length >= 2 ? "s" : "") ~ " \"" ~ commaedKeysString; 4560 if (gstats.keyAsAcronym) 4561 { 4562 viz.ppln(("No acronym matches for key" ~ keysString ~ `"` ~ 4563 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4564 " found in files of type")); 4565 } 4566 else if (!gstats.keyAsExact) 4567 { 4568 viz.ppln(("No exact matches for key" ~ keysString ~ `"` ~ 4569 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4570 " found" ~ selFKindsNote ~ 4571 ". Relaxing scan to" ~ (gstats.keyAsSymbol ? " symbol" : "") ~ " acronym match.")); 4572 gstats.keyAsAcronym = true; 4573 4574 foreach (topDir; gstats.topDirs) 4575 { 4576 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4577 } 4578 } 4579 } 4580 } 4581 } 4582 4583 assert(gstats.noScannedDirs + 4584 gstats.noScannedRegFiles + 4585 gstats.noScannedSymlinks + 4586 gstats.noScannedSpecialFiles == gstats.noScannedFiles); 4587 } 4588 4589 version(linux) 4590 { 4591 @trusted bool readable(in stat_t stat, uid_t uid, gid_t gid, ref string msg) 4592 { 4593 immutable mode = stat.st_mode; 4594 immutable ok = ((stat.st_uid == uid) && (mode & S_IRUSR) || 4595 (stat.st_gid == gid) && (mode & S_IRGRP) || 4596 (mode & S_IROTH)); 4597 if (!ok) 4598 { 4599 msg = " is not readable by you, but only by"; 4600 bool can = false; // someone can access 4601 if (mode & S_IRUSR) 4602 { 4603 can = true; 4604 msg ~= " user id " ~ to!string(stat.st_uid); 4605 4606 // Lookup user name from user id 4607 passwd pw; 4608 passwd* pw_ret; 4609 immutable size_t bufsize = 16384; 4610 char* buf = cast(char*)core.stdc.stdlib.malloc(bufsize); 4611 getpwuid_r(stat.st_uid, &pw, buf, bufsize, &pw_ret); 4612 if (pw_ret != null) 4613 { 4614 string userName; 4615 { 4616 size_t n = 0; 4617 while (pw.pw_name[n] != 0) 4618 { 4619 userName ~= pw.pw_name[n]; 4620 n++; 4621 } 4622 } 4623 msg ~= " (" ~ userName ~ ")"; 4624 4625 // string realName; 4626 // { 4627 // size_t n = 0; 4628 // while (pw.pw_gecos[n] != 0) 4629 // { 4630 // realName ~= pw.pw_gecos[n]; 4631 // n++; 4632 // } 4633 // } 4634 } 4635 core.stdc.stdlib.free(buf); 4636 4637 } 4638 if (mode & S_IRGRP) 4639 { 4640 can = true; 4641 if (msg != "") 4642 { 4643 msg ~= " or"; 4644 } 4645 msg ~= " group id " ~ to!string(stat.st_gid); 4646 } 4647 if (!can) 4648 { 4649 msg ~= " root"; 4650 } 4651 } 4652 return ok; 4653 } 4654 } 4655 4656 Results results; 4657 4658 void handleError(F)(Viz viz, 4659 NotNull!F file, bool isDir, size_t subIndex) 4660 { 4661 auto dent = DirEntry(file.path); 4662 immutable stat_t stat = dent.statBuf; 4663 string msg; 4664 if (!readable(stat, _uid, _gid, msg)) 4665 { 4666 results.noBytesUnreadable += dent.size; 4667 if (_showSkipped) 4668 { 4669 if (gstats.showTree) 4670 { 4671 auto parentDir = file.parent; 4672 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4673 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4674 } 4675 viz.ppln(file, 4676 ": ", isDir ? "Directory" : "File", 4677 faze(msg, warnFace)); 4678 } 4679 } 4680 } 4681 4682 void printSkipped(Viz viz, 4683 NotNull!RegFile regFile, 4684 size_t subIndex, 4685 const NotNull!FKind kind, KindHit kindhit, 4686 const string skipCause) 4687 { 4688 auto parentDir = regFile.parent; 4689 if (_showSkipped) 4690 { 4691 if (gstats.showTree) 4692 { 4693 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4694 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4695 } 4696 viz.pp(horizontalRuler, 4697 asH!3(regFile, 4698 ": Skipped ", kind, " file", 4699 skipCause)); 4700 } 4701 } 4702 4703 size_t _scanChunkSize; 4704 4705 KindHit isSelectedFKind(NotNull!RegFile regFile) @safe /* nothrow */ 4706 { 4707 typeof(return) kindHit = KindHit.none; 4708 FKind hitKind; 4709 4710 // Try cached kind first 4711 // First Try with kindId as try 4712 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 4713 { 4714 if (regFile._cstat.kindId in gstats.selFKinds.byId) 4715 { 4716 hitKind = gstats.selFKinds.byId[regFile._cstat.kindId]; 4717 kindHit = KindHit.cached; 4718 return kindHit; 4719 } 4720 } 4721 4722 immutable ext = regFile.realExtension; 4723 4724 // Try with hash table first 4725 if (!ext.empty && // if file has extension and 4726 ext in gstats.selFKinds.byExt) // and extensions may match specified included files 4727 { 4728 auto possibleKinds = gstats.selFKinds.byExt[ext]; 4729 foreach (kind; possibleKinds) 4730 { 4731 auto nnKind = enforceNotNull(kind); 4732 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4733 if (hit) 4734 { 4735 hitKind = nnKind; 4736 kindHit = hit; 4737 break; 4738 } 4739 } 4740 } 4741 4742 if (!hitKind) // if no hit yet 4743 { 4744 // blindly try the rest 4745 foreach (kind; gstats.selFKinds.byIndex) 4746 { 4747 auto nnKind = enforceNotNull(kind); 4748 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4749 if (hit) 4750 { 4751 hitKind = nnKind; 4752 kindHit = hit; 4753 break; 4754 } 4755 } 4756 } 4757 4758 return kindHit; 4759 } 4760 4761 /** Search for Keys `keys` in Source `src`. 4762 */ 4763 size_t scanForKeys(Source, Keys)(Viz viz, 4764 NotNull!Dir topDir, 4765 NotNull!File theFile, 4766 NotNull!Dir parentDir, 4767 ref Symlink[] fromSymlinks, 4768 in Source src, 4769 in Keys keys, 4770 in bool[] bistHits = [], 4771 ScanContext ctx = ScanContext.standard) 4772 { 4773 bool anyFileHit = false; // will become true if any hit in this file 4774 4775 typeof(return) hitCount = 0; 4776 4777 import std.ascii: newline; 4778 4779 auto thisFace = stdFace; 4780 if (gstats.colorFlag) 4781 { 4782 if (ScanContext.fileName) 4783 { 4784 thisFace = fileFace; 4785 } 4786 } 4787 4788 size_t nL = 0; // line counter 4789 foreach (line; src.splitterASCIIAmong!(newline)) 4790 { 4791 auto rest = cast(string)line; // rest of line as a string 4792 4793 bool anyLineHit = false; // will become true if any hit on current line 4794 // Hit search loop 4795 while (!rest.empty) 4796 { 4797 // Find any key 4798 4799 /* TODO Convert these to a range. */ 4800 ptrdiff_t offKB = -1; 4801 ptrdiff_t offKE = -1; 4802 4803 foreach (uint ix, key; keys) // TODO Call variadic-find instead to speed things up. 4804 { 4805 /* Bistogram Discardal */ 4806 if ((!bistHits.empty) && 4807 !bistHits[ix]) // if neither exact nor acronym match possible 4808 { 4809 continue; // try next key 4810 } 4811 4812 /* dbg("key:", key, " line:", line); */ 4813 ptrdiff_t[] acronymOffsets; 4814 if (gstats.keyAsAcronym) // acronym search 4815 { 4816 auto hit = (cast(immutable ubyte[])rest).findAcronymAt(key, 4817 gstats.keyAsSymbol ? FindContext.inSymbol : FindContext.inWord); 4818 if (!hit[0].empty) 4819 { 4820 acronymOffsets = hit[1]; 4821 offKB = hit[1][0]; 4822 offKE = hit[1][$-1] + 1; 4823 } 4824 } 4825 else 4826 { // normal search 4827 import std..string: indexOf; 4828 offKB = rest.indexOf(key, 4829 _caseFold ? CaseSensitive.no : CaseSensitive.yes); // hit begin offset 4830 offKE = offKB + key.length; // hit end offset 4831 } 4832 4833 if (offKB >= 0) // if hit 4834 { 4835 if (!gstats.showTree && ctx == ScanContext.fileName) 4836 { 4837 viz.pp(parentDir, dirSeparator); 4838 } 4839 4840 // Check Context 4841 if ((gstats.keyAsSymbol && !isSymbolASCII(rest, offKB, offKE)) || 4842 (gstats.keyAsWord && !isWordASCII (rest, offKB, offKE))) 4843 { 4844 rest = rest[offKE..$]; // move forward in line 4845 continue; 4846 } 4847 4848 if (ctx == ScanContext.fileContent && 4849 !anyLineHit) // if this is first hit 4850 { 4851 if (viz.form == VizForm.HTML) 4852 { 4853 if (!anyFileHit) 4854 { 4855 viz.pp(horizontalRuler, 4856 displayedFileName(gstats, theFile).asPath.asH!3); 4857 viz.ppTagOpen(`table`, `border=1`); 4858 anyFileHit = true; 4859 } 4860 } 4861 else 4862 { 4863 if (gstats.showTree) 4864 { 4865 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ "├" ~ "─ "); 4866 } 4867 else 4868 { 4869 foreach (fromSymlink; fromSymlinks) 4870 { 4871 viz.pp(fromSymlink, 4872 " modified ", 4873 faze(shortDurationString(_currTime - fromSymlink.timeLastModified), 4874 timeFace), 4875 " ago", 4876 " -> "); 4877 } 4878 // show file path/name 4879 viz.pp(displayedFileName(gstats, theFile).asPath); // show path 4880 } 4881 } 4882 4883 // show line:column 4884 if (viz.form == VizForm.HTML) 4885 { 4886 viz.ppTagOpen("tr"); 4887 viz.pp(to!string(nL+1).asCell, 4888 to!string(offKB+1).asCell); 4889 viz.ppTagOpen("td"); 4890 viz.ppTagOpen("code"); 4891 } 4892 else 4893 { 4894 viz.pp(faze(":" ~ to!string(nL+1) ~ ":" ~ to!string(offKB+1) ~ ":", 4895 contextFace)); 4896 } 4897 anyLineHit = true; 4898 } 4899 4900 // show content prefix 4901 viz.pp(faze(to!string(rest[0..offKB]), thisFace)); 4902 4903 // show hit part 4904 if (!acronymOffsets.empty) 4905 { 4906 foreach (aIndex, currOff; acronymOffsets) // TODO Reuse std.algorithm: zip or lockstep? Or create a new kind say named conv. 4907 { 4908 // context before 4909 if (aIndex >= 1) 4910 { 4911 immutable prevOff = acronymOffsets[aIndex-1]; 4912 if (prevOff + 1 < currOff) // at least one letter in between 4913 { 4914 viz.pp(asCtx(ix, to!string(rest[prevOff + 1 .. currOff]))); 4915 } 4916 } 4917 // hit letter 4918 viz.pp(asHit(ix, to!string(rest[currOff]))); 4919 } 4920 } 4921 else 4922 { 4923 viz.pp(asHit(ix, to!string(rest[offKB..offKE]))); 4924 } 4925 4926 rest = rest[offKE..$]; // move forward in line 4927 4928 hitCount++; // increase hit count 4929 parentDir.hitCount++; 4930 _hitsCountTotal++; 4931 4932 goto foundHit; 4933 } 4934 } 4935 foundHit: 4936 if (offKB == -1) { break; } 4937 } 4938 4939 // finalize line 4940 if (anyLineHit) 4941 { 4942 // show final context suffix 4943 viz.ppln(faze(rest, thisFace)); 4944 if (viz.form == VizForm.HTML) 4945 { 4946 viz.ppTagClose("code"); 4947 viz.ppTagClose("td"); 4948 viz.pplnTagClose("tr"); 4949 } 4950 } 4951 nL++; 4952 } 4953 4954 if (gstats.showLineCounts) 4955 { 4956 gstats.lineCountsByFile[theFile] = nL; 4957 } 4958 4959 if (anyFileHit) 4960 { 4961 viz.pplnTagClose("table"); 4962 } 4963 4964 // Previous solution 4965 // version(none) 4966 // { 4967 // ptrdiff_t offHit = 0; 4968 // foreach (ix, key; keys) 4969 // { 4970 // scope immutable hit1 = src.find(key); // single key hit 4971 // offHit = hit1.ptr - src.ptr; 4972 // if (!hit1.empty) 4973 // { 4974 // scope immutable src0 = src[0..offHit]; // src beforce hi 4975 // immutable rowHit = count(src0, newline); 4976 // immutable colHit = src0.retro.countUntil(newline); // count backwards till beginning of rowHit 4977 // immutable offBOL = offHit - colHit; 4978 // immutable cntEOL = src[offHit..$].countUntil(newline); // count forwards to end of rowHit 4979 // immutable offEOL = (cntEOL == -1 ? // if no hit 4980 // src.length : // end of file 4981 // offHit + cntEOL); // normal case 4982 // viz.pp(faze(asPath(gstats.useHTML, dent.name), pathFace)); 4983 // viz.ppln(":", rowHit + 1, 4984 // ":", colHit + 1, 4985 // ":", cast(string)src[offBOL..offEOL]); 4986 // } 4987 // } 4988 // } 4989 4990 // switch (keys.length) 4991 // { 4992 // default: 4993 // break; 4994 // case 0: 4995 // break; 4996 // case 1: 4997 // immutable hit1 = src.find(keys[0]); 4998 // if (!hit1.empty) 4999 // { 5000 // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit1.length); 5001 // } 5002 // break; 5003 // // case 2: 5004 // // immutable hit2 = src.find(keys[0], keys[1]); // find two keys 5005 // // if (!hit2[0].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit2[0].length); } 5006 // // if (!hit2[1].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit2[1].length); } 5007 // // break; 5008 // // case 3: 5009 // // immutable hit3 = src.find(keys[0], keys[1], keys[2]); // find two keys 5010 // // if (!hit3.empty) 5011 // { 5012 // // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit1.length); 5013 // // } 5014 // // break; 5015 // } 5016 return hitCount; 5017 } 5018 5019 /** Process Regular File `theRegFile`. */ 5020 void processRegFile(Viz viz, 5021 NotNull!Dir topDir, 5022 NotNull!RegFile theRegFile, 5023 NotNull!Dir parentDir, 5024 const string[] keys, 5025 ref Symlink[] fromSymlinks, 5026 size_t subIndex, 5027 GStats gstats) 5028 { 5029 scanRegFile(viz, 5030 topDir, 5031 theRegFile, 5032 parentDir, 5033 keys, 5034 fromSymlinks, 5035 subIndex); 5036 5037 // check for operations 5038 // TODO Reuse isSelectedFKind instead of this 5039 immutable ext = theRegFile.realExtension; 5040 if (ext in gstats.selFKinds.byExt) 5041 { 5042 auto matchingFKinds = gstats.selFKinds.byExt[ext]; 5043 foreach (kind; matchingFKinds) 5044 { 5045 const hit = kind.operations.find!(a => a[0] == gstats.fOp); 5046 if (!hit.empty) 5047 { 5048 const fOp = hit.front; 5049 const cmd = fOp[1]; // command string 5050 import std.process: spawnProcess; 5051 import std.algorithm: splitter; 5052 dbg("TODO Performing operation ", to!string(cmd), 5053 " on ", theRegFile.path, 5054 " by calling it using ", cmd); 5055 auto pid = spawnProcess(cmd.splitterASCIIAmong!(" ").array ~ [theRegFile.path]); 5056 } 5057 } 5058 } 5059 } 5060 5061 /** Scan `elfFile` for ELF Symbols. */ 5062 void scanELFFile(Viz viz, 5063 NotNull!RegFile elfFile, 5064 const string[] keys, 5065 GStats gstats) 5066 { 5067 import nxt.elfdoc: sectionNameExplanations; 5068 /* TODO Add mouse hovering help for sectionNameExplanations[section] */ 5069 dbg("before: ", elfFile); 5070 ELF decoder = ELF.fromFile(elfFile._mmfile); 5071 dbg("after: ", elfFile); 5072 5073 /* foreach (section; decoder.sections) */ 5074 /* { */ 5075 /* if (section.name.length) */ 5076 /* { */ 5077 /* /\* auto sst = section.StringTable; *\/ */ 5078 /* //writeln("ELF Section named ", section.name); */ 5079 /* } */ 5080 /* } */ 5081 5082 /* const sectionNames = [".symtab"/\* , ".strtab", ".dynsym" *\/]; // TODO These two other sections causes range exceptions. */ 5083 /* foreach (sectionName; sectionNames) */ 5084 /* { */ 5085 /* auto sts = decoder.getSection(sectionName); */ 5086 /* if (!sts.isNull) */ 5087 /* { */ 5088 /* SymbolTable symtab = SymbolTable(sts); */ 5089 /* // TODO Use range: auto symbolsDemangled = symtab.symbols.map!(sym => demangler(sym.name).decodeSymbol); */ 5090 /* foreach (sym; symtab.symbols) // you can add filters here */ 5091 /* { */ 5092 /* if (gstats.demangleELF) */ 5093 /* { */ 5094 /* const hit = demangler(sym.name).decodeSymbol; */ 5095 /* } */ 5096 /* else */ 5097 /* { */ 5098 /* writeln("?: ", sym.name); */ 5099 /* } */ 5100 /* } */ 5101 /* } */ 5102 /* } */ 5103 5104 auto sst = decoder.getSymbolsStringTable; 5105 if (!sst.isNull) 5106 { 5107 import nxt.algorithm_ex: findFirstOfAnyInOrder; 5108 import std.range : tee; 5109 5110 auto scan = (sst.strings 5111 .filter!(raw => !raw.empty) // skip empty raw string 5112 .tee!(raw => gstats.elfFilesBySymbol[raw.idup] ~= elfFile) // WARNING: needs raw.idup here because we can't rever to raw 5113 .map!(raw => demangler(raw).decodeSymbol) 5114 .filter!(demangling => (!keys.empty && // don't show anything if no keys given 5115 demangling.unmangled.findFirstOfAnyInOrder(keys)[1]))); // I love D :) 5116 5117 if (!scan.empty && 5118 `ELF` in gstats.selFKinds.byName) // if user selected ELF file show them 5119 { 5120 viz.pp(horizontalRuler, 5121 displayedFileName(gstats, elfFile).asPath.asH!3, 5122 asH!4(`ELF Symbol Strings Table (`, `.strtab`.asCode, `)`), 5123 scan.asTable); 5124 } 5125 } 5126 } 5127 5128 /** Search for Keys `keys` in Regular File `theRegFile`. */ 5129 void scanRegFile(Viz viz, 5130 NotNull!Dir topDir, 5131 NotNull!RegFile theRegFile, 5132 NotNull!Dir parentDir, 5133 const string[] keys, 5134 ref Symlink[] fromSymlinks, 5135 size_t subIndex) 5136 { 5137 results.noBytesTotal += theRegFile.size; 5138 results.noBytesTotalContents += theRegFile.size; 5139 5140 // Scan name 5141 if ((gstats.scanContext == ScanContext.all || 5142 gstats.scanContext == ScanContext.fileName || 5143 gstats.scanContext == ScanContext.regularFileName) && 5144 !keys.empty) 5145 { 5146 immutable hitCountInName = scanForKeys(viz, 5147 topDir, cast(NotNull!File)theRegFile, parentDir, 5148 fromSymlinks, 5149 theRegFile.name, keys, [], ScanContext.fileName); 5150 } 5151 5152 // Scan Contents 5153 if ((gstats.scanContext == ScanContext.all || 5154 gstats.scanContext == ScanContext.fileContent) && 5155 (gstats.showFileContentDups || 5156 gstats.showELFSymbolDups || 5157 !keys.empty) && 5158 theRegFile.size != 0) // non-empty file 5159 { 5160 // immutable upTo = size_t.max; 5161 5162 // TODO Flag for readText 5163 try 5164 { 5165 ++gstats.noScannedRegFiles; 5166 ++gstats.noScannedFiles; 5167 5168 // ELF Symbols 5169 if (gstats.showELFSymbolDups && 5170 theRegFile.ofKind(`ELF`, gstats.collectTypeHits, gstats.allFKinds)) 5171 { 5172 scanELFFile(viz, theRegFile, keys, gstats); 5173 } 5174 5175 // Check included kinds first because they are fast. 5176 KindHit incKindHit = isSelectedFKind(theRegFile); 5177 if (!gstats.selFKinds.byIndex.empty && // TODO Do we really need this one? 5178 !incKindHit) 5179 { 5180 return; 5181 } 5182 5183 // Super-Fast Key-File Bistogram Discardal. TODO Trim scale factor to optimal value. 5184 enum minFileSize = 256; // minimum size of file for discardal. 5185 immutable bool doBist = theRegFile.size > minFileSize; 5186 immutable bool doNGram = (gstats.useNGrams && 5187 (!gstats.keyAsSymbol) && 5188 theRegFile.size > minFileSize); 5189 immutable bool doBitStatus = true; 5190 5191 // Chunked Calculation of CStat in one pass. TODO call async. 5192 theRegFile.calculateCStatInChunks(gstats.filesByContentId, 5193 _scanChunkSize, 5194 gstats.showFileContentDups, 5195 doBist, 5196 doBitStatus); 5197 5198 // Match Bist of Keys with BistX of File 5199 bool[] bistHits; 5200 bool noBistMatch = false; 5201 if (doBist) 5202 { 5203 const theHist = theRegFile.bistogram8; 5204 auto hitsHist = keysBists.map!(a => 5205 ((a.value & theHist.value) == 5206 a.value)); // TODO Functionize to x.subsetOf(y) or reuse std.algorithm: setDifference or similar 5207 bistHits = hitsHist.map!`a == true`.array; 5208 noBistMatch = hitsHist.all!`a == false`; 5209 } 5210 /* int kix = 0; */ 5211 /* foreach (hit; bistHits) { if (!hit) { debug dbg(`Assert key ` ~ keys[kix] ~ ` not in file ` ~ theRegFile.path); } ++kix; } */ 5212 5213 bool allXGramsMiss = false; 5214 if (doNGram) 5215 { 5216 ulong keysXGramUnionMatch = keysXGramsUnion.matchDenser(theRegFile.xgram); 5217 debug dbg(theRegFile.path, 5218 ` sized `, theRegFile.size, ` : `, 5219 keysXGramsUnion.length, `, `, 5220 theRegFile.xgram.length, 5221 ` gave match:`, keysXGramUnionMatch); 5222 allXGramsMiss = keysXGramUnionMatch == 0; 5223 } 5224 5225 auto binHit = theRegFile.ofAnyKindIn(gstats.binFKinds, 5226 gstats.collectTypeHits); 5227 const binKindHit = binHit[0]; 5228 if (binKindHit) 5229 { 5230 import nxt.numerals: toOrdinal; 5231 const nnKind = binHit[1].enforceNotNull; 5232 const kindIndex = binHit[2]; 5233 if (_showSkipped) 5234 { 5235 if (gstats.showTree) 5236 { 5237 immutable intro = subIndex == parentDir.subs.length - 1 ? `└` : `├`; 5238 viz.pp(`│ `.repeat(parentDir.depth + 1).join(``) ~ intro ~ `─ `); 5239 } 5240 viz.ppln(theRegFile, `: Skipped `, nnKind, ` file at `, 5241 toOrdinal(kindIndex + 1), ` blind try`); 5242 } 5243 final switch (binKindHit) 5244 { 5245 case KindHit.none: 5246 break; 5247 case KindHit.cached: 5248 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5249 ` using cached KindId`); 5250 break; 5251 case KindHit.uncached: 5252 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5253 ` at ` ~ toOrdinal(kindIndex + 1) ~ ` extension try`); 5254 break; 5255 } 5256 } 5257 5258 if (binKindHit != KindHit.none || 5259 noBistMatch || 5260 allXGramsMiss) // or no hits possible. TODO Maybe more efficient to do histogram discardal first 5261 { 5262 results.noBytesSkipped += theRegFile.size; 5263 } 5264 else 5265 { 5266 // Search if not Binary 5267 5268 // If Source file is ok 5269 auto src = theRegFile.readOnlyContents[]; 5270 5271 results.noBytesScanned += theRegFile.size; 5272 5273 if (keys) 5274 { 5275 // Fast discardal of files with no match 5276 bool fastOk = true; 5277 if (!_caseFold) { // if no relaxation of search 5278 if (gstats.keyAsAcronym) // if no relaxation of search 5279 { 5280 /* TODO Reuse findAcronym in algorith_ex. */ 5281 } 5282 else // if no relaxation of search 5283 { 5284 switch (keys.length) 5285 { 5286 default: break; 5287 case 1: immutable hit1 = src.find(keys[0]); fastOk = !hit1.empty; break; 5288 // case 2: immutable hit2 = src.find(keys[0], keys[1]); fastOk = !hit2[0].empty; break; 5289 // case 3: immutable hit3 = src.find(keys[0], keys[1], keys[2]); fastOk = !hit3[0].empty; break; 5290 // case 4: immutable hit4 = src.find(keys[0], keys[1], keys[2], keys[3]); fastOk = !hit4[0].empty; break; 5291 // case 5: immutable hit5 = src.find(keys[0], keys[1], keys[2], keys[3], keys[4]); fastOk = !hit5[0].empty; break; 5292 } 5293 } 5294 } 5295 5296 // TODO Continue search from hit1, hit2 etc. 5297 5298 if (fastOk) 5299 { 5300 foreach (tag; addTags) gstats.ftags.addTag(theRegFile, tag); 5301 foreach (tag; removeTags) gstats.ftags.removeTag(theRegFile, tag); 5302 5303 if (theRegFile.size >= 8192) 5304 { 5305 /* if (theRegFile.xgram == null) { */ 5306 /* theRegFile.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 5307 /* } */ 5308 /* (*theRegFile.xgram).put(src); */ 5309 /* theRegFile.xgram.put(src); */ 5310 /* foreach (lix, ub0; line) { // for each ubyte in line */ 5311 /* if (lix + 1 < line.length) { */ 5312 /* immutable ub1 = line[lix + 1]; */ 5313 /* immutable dix = (cast(ushort)ub0 | */ 5314 /* cast(ushort)ub1*256); */ 5315 /* (*theRegFile.xgram)[dix] = true; */ 5316 /* } */ 5317 /* } */ 5318 auto shallowDenseness = theRegFile.bistogram8.denseness; 5319 auto deepDenseness = theRegFile.xgramDeepDenseness; 5320 // assert(deepDenseness >= 1); 5321 gstats.shallowDensenessSum += shallowDenseness; 5322 gstats.deepDensenessSum += deepDenseness; 5323 ++gstats.densenessCount; 5324 /* dbg(theRegFile.path, `:`, theRegFile.size, */ 5325 /* `, length:`, theRegFile.xgram.length, */ 5326 /* `, deepDenseness:`, deepDenseness); */ 5327 } 5328 5329 theRegFile._cstat.hitCount = scanForKeys(viz, 5330 topDir, cast(NotNull!File)theRegFile, parentDir, 5331 fromSymlinks, 5332 src, keys, bistHits, 5333 ScanContext.fileContent); 5334 } 5335 } 5336 } 5337 5338 } 5339 catch (FileException) 5340 { 5341 handleError(viz, theRegFile, false, subIndex); 5342 } 5343 catch (ErrnoException) 5344 { 5345 handleError(viz, theRegFile, false, subIndex); 5346 } 5347 theRegFile.freeContents; // TODO Call lazily only when open count is too large 5348 } 5349 } 5350 5351 /** Scan Symlink `symlink` at `parentDir` for `keys` 5352 Put results in `results`. */ 5353 void scanSymlink(Viz viz, 5354 NotNull!Dir topDir, 5355 NotNull!Symlink theSymlink, 5356 NotNull!Dir parentDir, 5357 const string[] keys, 5358 ref Symlink[] fromSymlinks) 5359 { 5360 // check for symlink cycles 5361 if (!fromSymlinks.find(theSymlink).empty) 5362 { 5363 if (gstats.showSymlinkCycles) 5364 { 5365 import std.range: back; 5366 viz.ppln(`Cycle of symbolic links: `, 5367 fromSymlinks.asPath, 5368 ` -> `, 5369 fromSymlinks.back.target); 5370 } 5371 return; 5372 } 5373 5374 // Scan name 5375 if ((gstats.scanContext == ScanContext.all || 5376 gstats.scanContext == ScanContext.fileName || 5377 gstats.scanContext == ScanContext.symlinkName) && 5378 !keys.empty) 5379 { 5380 scanForKeys(viz, 5381 topDir, cast(NotNull!File)theSymlink, enforceNotNull(theSymlink.parent), 5382 fromSymlinks, 5383 theSymlink.name, keys, [], ScanContext.fileName); 5384 } 5385 5386 // try { 5387 // results.noBytesTotal += dent.size; 5388 // } catch (Exception) 5389 // { 5390 // dbg(`Couldn't get size of `, dir.name); 5391 // } 5392 if (gstats.followSymlinks == SymlinkFollowContext.none) { return; } 5393 5394 import std.range: popBackN; 5395 fromSymlinks ~= theSymlink; 5396 immutable targetPath = theSymlink.absoluteNormalizedTargetPath; 5397 if (targetPath.exists) 5398 { 5399 theSymlink._targetStatus = SymlinkTargetStatus.present; 5400 if (_topDirNames.all!(a => !targetPath.startsWith(a))) { // if target path lies outside of all rootdirs 5401 auto targetDent = DirEntry(targetPath); 5402 auto targetFile = getFile(enforceNotNull(gstats.rootDir), targetPath, targetDent.isDir); 5403 5404 if (gstats.showTree) 5405 { 5406 viz.ppln(`│ `.repeat(parentDir.depth + 1).join(``) ~ `├` ~ `─ `, 5407 theSymlink, 5408 ` modified `, 5409 faze(shortDurationString(_currTime - theSymlink.timeLastModified), 5410 timeFace), 5411 ` ago`, ` -> `, 5412 targetFile.asPath, 5413 faze(` outside of ` ~ (_topDirNames.length == 1 ? `tree ` : `all trees `), 5414 infoFace), 5415 gstats.topDirs.asPath, 5416 faze(` is followed`, infoFace)); 5417 } 5418 5419 ++gstats.noScannedSymlinks; 5420 ++gstats.noScannedFiles; 5421 5422 if (auto targetRegFile = cast(RegFile)targetFile) 5423 { 5424 processRegFile(viz, topDir, assumeNotNull(targetRegFile), parentDir, keys, fromSymlinks, 0, gstats); 5425 } 5426 else if (auto targetDir = cast(Dir)targetFile) 5427 { 5428 scanDir(viz, topDir, assumeNotNull(targetDir), keys, fromSymlinks); 5429 } 5430 else if (auto targetSymlink = cast(Symlink)targetFile) // target is a Symlink 5431 { 5432 scanSymlink(viz, topDir, 5433 assumeNotNull(targetSymlink), 5434 enforceNotNull(targetSymlink.parent), 5435 keys, fromSymlinks); 5436 } 5437 } 5438 } 5439 else 5440 { 5441 theSymlink._targetStatus = SymlinkTargetStatus.broken; 5442 5443 if (gstats.showBrokenSymlinks) 5444 { 5445 _brokenSymlinks ~= theSymlink; 5446 5447 foreach (ix, fromSymlink; fromSymlinks) 5448 { 5449 if (gstats.showTree && ix == 0) 5450 { 5451 immutable intro = `├`; 5452 viz.pp(`│ `.repeat(theSymlink.parent.depth + 1).join(``) ~ intro ~ `─ `, 5453 theSymlink); 5454 } 5455 else 5456 { 5457 viz.pp(fromSymlink); 5458 } 5459 viz.pp(` -> `); 5460 } 5461 5462 viz.ppln(faze(theSymlink.target, missingSymlinkTargetFace), 5463 faze(` is missing`, warnFace)); 5464 } 5465 } 5466 fromSymlinks.popBackN(1); 5467 } 5468 5469 /** Scan Directory `parentDir` for `keys`. */ 5470 void scanDir(Viz viz, 5471 NotNull!Dir topDir, 5472 NotNull!Dir theDir, 5473 const string[] keys, 5474 Symlink[] fromSymlinks = [], 5475 int maxDepth = -1) 5476 { 5477 if (theDir.isRoot) { results.reset; } 5478 5479 // scan in directory name 5480 if ((gstats.scanContext == ScanContext.all || 5481 gstats.scanContext == ScanContext.fileName || 5482 gstats.scanContext == ScanContext.dirName) && 5483 !keys.empty) 5484 { 5485 scanForKeys(viz, 5486 topDir, 5487 cast(NotNull!File)theDir, 5488 enforceNotNull(theDir.parent), 5489 fromSymlinks, 5490 theDir.name, keys, [], ScanContext.fileName); 5491 } 5492 5493 try 5494 { 5495 size_t subIndex = 0; 5496 if (gstats.showTree) 5497 { 5498 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5499 5500 viz.pp(`│ `.repeat(theDir.depth).join(``) ~ intro ~ 5501 `─ `, theDir, ` modified `, 5502 faze(shortDurationString(_currTime - 5503 theDir.timeLastModified), 5504 timeFace), 5505 ` ago`); 5506 5507 if (gstats.showUsage) 5508 { 5509 viz.pp(` of Tree-Size `, theDir.treeSize); 5510 } 5511 5512 if (gstats.showSHA1) 5513 { 5514 viz.pp(` with Tree-Content-Id `, theDir.treeContentId); 5515 } 5516 viz.ppendl; 5517 } 5518 5519 ++gstats.noScannedDirs; 5520 ++gstats.noScannedFiles; 5521 5522 auto subsSorted = theDir.subsSorted(gstats.subsSorting); 5523 foreach (key, sub; subsSorted) 5524 { 5525 /* TODO Functionize to scanFile */ 5526 if (auto regFile = cast(RegFile)sub) 5527 { 5528 processRegFile(viz, topDir, assumeNotNull(regFile), theDir, keys, fromSymlinks, subIndex, gstats); 5529 } 5530 else if (auto subDir = cast(Dir)sub) 5531 { 5532 if (maxDepth == -1 || // if either all levels or 5533 maxDepth >= 1) { // levels left 5534 if (sub.name in gstats.skippedDirKindsMap) // if sub should be skipped 5535 { 5536 if (_showSkipped) 5537 { 5538 if (gstats.showTree) 5539 { 5540 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5541 viz.pp(`│ `.repeat(theDir.depth + 1).join(``) ~ intro ~ `─ `); 5542 } 5543 5544 viz.pp(subDir, 5545 ` modified `, 5546 faze(shortDurationString(_currTime - 5547 subDir.timeLastModified), 5548 timeFace), 5549 ` ago`, 5550 faze(`: Skipped Directory of type `, infoFace), 5551 gstats.skippedDirKindsMap[sub.name].kindName); 5552 } 5553 } 5554 else 5555 { 5556 scanDir(viz, topDir, 5557 assumeNotNull(subDir), 5558 keys, 5559 fromSymlinks, 5560 maxDepth >= 0 ? --maxDepth : maxDepth); 5561 } 5562 } 5563 } 5564 else if (auto subSymlink = cast(Symlink)sub) 5565 { 5566 scanSymlink(viz, topDir, assumeNotNull(subSymlink), theDir, keys, fromSymlinks); 5567 } 5568 else 5569 { 5570 if (gstats.showTree) { viz.ppendl; } 5571 } 5572 ++subIndex; 5573 5574 if (ctrlC) 5575 { 5576 viz.ppln(`Ctrl-C pressed: Aborting scan of `, theDir); 5577 break; 5578 } 5579 } 5580 5581 if (gstats.showTreeContentDups) 5582 { 5583 theDir.treeContentId; // better to put this after file scan for now 5584 } 5585 } 5586 catch (FileException) 5587 { 5588 handleError(viz, theDir, true, 0); 5589 } 5590 } 5591 5592 /** Filter out `files` that lie under any of the directories `dirPaths`. */ 5593 F[] filterUnderAnyOfPaths(F)(F[] files, 5594 string[] dirPaths) 5595 { 5596 import std.algorithm: any; 5597 import std.array: array; 5598 auto dupFilesUnderAnyTopDirName = (files 5599 .filter!(dupFile => 5600 dirPaths.any!(dirPath => 5601 dupFile.path.startsWith(dirPath))) 5602 .array // evaluate to array to get .length below 5603 ); 5604 F[] hits; 5605 final switch (gstats.duplicatesContext) 5606 { 5607 case DuplicatesContext.internal: 5608 if (dupFilesUnderAnyTopDirName.length >= 2) 5609 hits = dupFilesUnderAnyTopDirName; 5610 break; 5611 case DuplicatesContext.external: 5612 if (dupFilesUnderAnyTopDirName.length >= 1) 5613 hits = files; 5614 break; 5615 } 5616 return hits; 5617 } 5618 5619 /** Show Statistics. */ 5620 void showContentDups(Viz viz) 5621 { 5622 import std.meta : AliasSeq; 5623 foreach (ix, kind; AliasSeq!(RegFile, Dir)) 5624 { 5625 immutable typeName = ix == 0 ? `Regular File` : `Directory Tree`; 5626 viz.pp((typeName ~ ` Content Duplicates`).asH!2); 5627 foreach (digest, dupFiles; gstats.filesByContentId) 5628 { 5629 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5630 if (dupFilesOk.length >= 2) // non-empty file/directory 5631 { 5632 auto firstDup = cast(kind)dupFilesOk[0]; 5633 if (firstDup) 5634 { 5635 static if (is(kind == RegFile)) 5636 { 5637 if (firstDup._cstat.kindId) 5638 { 5639 if (firstDup._cstat.kindId in gstats.allFKinds.byId) 5640 { 5641 viz.pp(asH!3(gstats.allFKinds.byId[firstDup._cstat.kindId], 5642 ` files sharing digest `, digest, ` of size `, firstDup.treeSize)); 5643 } 5644 else 5645 { 5646 dbg(firstDup.path ~ ` kind Id ` ~ to!string(firstDup._cstat.kindId) ~ 5647 ` could not be found in allFKinds.byId`); 5648 } 5649 } 5650 viz.pp(asH!3((firstDup._cstat.bitStatus == BitStatus.bits7) ? `ASCII File` : typeName, 5651 `s sharing digest `, digest, ` of size `, firstDup.treeSize)); 5652 } 5653 else 5654 { 5655 viz.pp(asH!3(typeName, `s sharing digest `, digest, ` of size `, firstDup.size)); 5656 } 5657 5658 viz.pp(asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5659 } 5660 } 5661 } 5662 } 5663 } 5664 5665 /** Show Statistics. */ 5666 void showStats(Viz viz) 5667 { 5668 /* Duplicates */ 5669 5670 if (gstats.showNameDups) 5671 { 5672 viz.pp(`Name Duplicates`.asH!2); 5673 foreach (digest, dupFiles; gstats.filesByName) 5674 { 5675 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5676 if (!dupFilesOk.empty) 5677 { 5678 viz.pp(asH!3(`Files with same name `, 5679 faze(dupFilesOk[0].name, fileFace)), 5680 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5681 } 5682 } 5683 } 5684 5685 if (gstats.showLinkDups) 5686 { 5687 viz.pp(`Inode Duplicates (Hardlinks)`.asH!2); 5688 foreach (inode, dupFiles; gstats.filesByInode) 5689 { 5690 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5691 if (dupFilesOk.length >= 2) 5692 { 5693 viz.pp(asH!3(`Files with same inode ` ~ to!string(inode) ~ 5694 ` (hardlinks): `), 5695 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5696 } 5697 } 5698 } 5699 5700 if (gstats.showFileContentDups) 5701 { 5702 showContentDups(viz); 5703 } 5704 5705 if (gstats.showELFSymbolDups && 5706 !keys.empty) // don't show anything if no keys where given 5707 { 5708 viz.pp(`ELF Symbol Duplicates`.asH!2); 5709 foreach (raw, dupFiles; gstats.elfFilesBySymbol) 5710 { 5711 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5712 if (dupFilesOk.length >= 2) 5713 { 5714 const demangling = demangler(raw).decodeSymbol; 5715 if (demangling.unmangled.findFirstOfAnyInOrder(keys)[1]) 5716 { 5717 viz.pp(asH!3(`ELF Files with same symbol ` ~ to!string(raw)), 5718 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5719 } 5720 } 5721 } 5722 } 5723 5724 /* Broken Symlinks */ 5725 if (gstats.showBrokenSymlinks && 5726 !_brokenSymlinks.empty) 5727 { 5728 viz.pp(`Broken Symlinks `.asH!2, 5729 asUList(_brokenSymlinks.map!(x => x.asPath.asItem))); 5730 } 5731 5732 /* Counts */ 5733 viz.pp(`Scanned Types`.asH!2, 5734 /* asUList(asItem(gstats.noScannedDirs, ` Dirs, `), */ 5735 /* asItem(gstats.noScannedRegFiles, ` Regular Files, `), */ 5736 /* asItem(gstats.noScannedSymlinks, ` Symbolic Links, `), */ 5737 /* asItem(gstats.noScannedSpecialFiles, ` Special Files, `), */ 5738 /* asItem(`totalling `, gstats.noScannedFiles, ` Files`) // on extra because of lack of root */ 5739 /* ) */ 5740 asTable(asRow(asCell(asBold(`Scan Count`)), 5741 asCell(asBold(`File Type`))), 5742 asRow(asCell(gstats.noScannedDirs), 5743 asCell(asItalic(`Dirs`))), 5744 asRow(asCell(gstats.noScannedRegFiles), 5745 asCell(asItalic(`Regular Files`))), 5746 asRow(asCell(gstats.noScannedSymlinks), 5747 asCell(asItalic(`Symbolic Links`))), 5748 asRow(asCell(gstats.noScannedSpecialFiles), 5749 asCell(asItalic(`Special Files`))), 5750 asRow(asCell(gstats.noScannedFiles), 5751 asCell(asItalic(`Files`))) 5752 ) 5753 ); 5754 5755 if (gstats.densenessCount) 5756 { 5757 viz.pp(`Histograms`.asH!2, 5758 asUList(asItem(`Average Byte Bistogram (Binary Histogram) Denseness `, 5759 cast(real)(100*gstats.shallowDensenessSum / gstats.densenessCount), ` Percent`), 5760 asItem(`Average Byte `, NGramOrder, `-Gram Denseness `, 5761 cast(real)(100*gstats.deepDensenessSum / gstats.densenessCount), ` Percent`))); 5762 } 5763 5764 viz.pp(`Scanned Bytes`.asH!2, 5765 asUList(asItem(`Scanned `, results.noBytesScanned), 5766 asItem(`Skipped `, results.noBytesSkipped), 5767 asItem(`Unreadable `, results.noBytesUnreadable), 5768 asItem(`Total Contents `, results.noBytesTotalContents), 5769 asItem(`Total `, results.noBytesTotal), 5770 asItem(`Total number of hits `, results.numTotalHits), 5771 asItem(`Number of Files with hits `, results.numFilesWithHits))); 5772 5773 viz.pp(`Some Math`.asH!2); 5774 5775 { 5776 struct Stat 5777 { 5778 particle2f particle; 5779 point2r point; 5780 vec2r velocity; 5781 vec2r acceleration; 5782 mat2 rotation; 5783 Rational!uint ratInt; 5784 Vector!(Rational!int, 4) ratIntVec; 5785 Vector!(float, 2, true) normFloatVec2; 5786 Vector!(float, 3, true) normFloatVec3; 5787 Point!(Rational!int, 4) ratIntPoint; 5788 } 5789 5790 /* Vector!(Complex!float, 4) complexVec; */ 5791 5792 viz.ppln(`A number: `, 1.2e10); 5793 viz.ppln(`Randomize particle2f as TableNr0: `, randomInstanceOf!particle2f.asTableNr0); 5794 5795 alias Stats3 = Stat[3]; 5796 auto stats = new Stat[3]; 5797 randomize(stats); 5798 viz.ppln(`A ` ~ typeof(stats).stringof, `: `, stats.randomize.asTable); 5799 5800 { 5801 auto x = randomInstanceOf!Stats3; 5802 foreach (ref e; x) 5803 { 5804 e.velocity *= 1e9; 5805 } 5806 viz.ppln(`Some Stats: `, 5807 x.asTable); 5808 } 5809 } 5810 5811 5812 } 5813 } 5814 5815 void scanner(string[] args) 5816 { 5817 // Register the SIGINT signal with the signalHandler function call: 5818 version(linux) 5819 { 5820 signal(SIGABRT, &signalHandler); 5821 signal(SIGTERM, &signalHandler); 5822 signal(SIGQUIT, &signalHandler); 5823 signal(SIGINT, &signalHandler); 5824 } 5825 5826 5827 auto term = Terminal(ConsoleOutputType.linear); 5828 auto scanner = new Scanner!Terminal(args, term); 5829 }