1 /** 2 File Scanning Engine. 3 4 Make rich use of Sparse Distributed Representations (SDR) using Hash Digests 5 for relating Data and its Relations/Properties/Meta-Data. 6 7 See_Also: http://stackoverflow.com/questions/12629749/how-does-grep-run-so-fast 8 See_Also: http:www.regular-expressions.info/powergrep.html 9 See_Also: http://ridiculousfish.com/blog/posts/old-age-and-treachery.html 10 See_Also: http://www.olark.com/spw/2011/08/you-can-list-a-directory-with-8-million-files-but-not-with-ls/ 11 12 TODO: Make use parallelism_ex: pmap 13 14 TODO: Call filterUnderAnyOfPaths using std.algorithm.filter directly on AAs. Use byPair or use AA.get(key, defaultValue) 15 See_Also: http://forum.dlang.org/thread/mailman.75.1392335793.6445.digitalmars-d-learn@puremagic.com 16 See_Also: https://github.com/D-Programming-Language/druntime/pull/574 17 18 TODO: Count logical lines. 19 TODO: Lexers should be loosely coupled to FKinds instead of Files 20 TODO: Generic Token[] and specific CToken[], CxxToken[] 21 22 TODO: Don't scan for duplicates inside vc-dirs by default 23 24 TODO: Assert that files along duplicates path don't include symlinks 25 26 TODO: Implement FOp.deduplicate 27 TODO: Prevent rescans of duplicates 28 29 TODO: Defined generalized_specialized_two_way_relationship(kindD, kindDi) 30 31 TODO: Visualize hits using existingFileHitContext.asH!1 followed by a table: 32 ROW_NR | hit string in <code lang=LANG></code> 33 34 TODO: Parse and Sort GCC/Clang Compiler Messages on WARN_TYPE FILE:LINE:COL:MSG[WARN_TYPE] and use Collapsable HTML Widgets: 35 http://api.jquerymobile.com/collapsible/ 36 when presenting them 37 38 TODO: Maybe make use of https://github.com/Abscissa/scriptlike 39 40 TODO: Calculate Tree grams and bist 41 42 TODO: Get stats of the link itself not the target in SymLink constructors 43 44 TODO: RegFile with FileContent.text should be decodable to Unicode using 45 either iso-latin1, utf-8, etc. Check std.uni for how to try and decode stuff. 46 47 TODO: Search for subwords. 48 For example gtk_widget should also match widget_gtk and GtkWidget etc. 49 50 TODO: Support multi-line keys 51 52 TODO: Use hash-lookup in txtFKinds.byExt for faster guessing of source file 53 kind. Merge it with binary kind lookup. And check FileContent member of 54 kind to instead determine if it should be scanned or not. 55 Sub-Task: Case-Insensitive Matching of extensions if 56 nothing else passes. 57 58 TODO: Detect symlinks with duplicate targets and only follow one of them and 59 group them together in visualization 60 61 TODO: Add addTag, removeTag, etc and interface to fs.d for setting tags: 62 --add-tag=comedy, remove-tag=comedy 63 64 TODO: If files ends with ~ or .backup assume its a backup file, strip it from 65 end match it again and set backupFlag in FileKind 66 67 TODO: Acronym match can make use of normal histogram counts. Check denseness 68 of binary histogram (bist) to determine if we should use a sparse or dense 69 histogram. 70 71 TODO: Activate and test support for ELF and Cxx11 subkinds 72 73 TODO: Call either File.checkObseleted upon inotify. checkObseleted should remove stuff from hash tables 74 TODO: Integrate logic in clearCStat to RegFile.makeObselete 75 TODO: Upon Dir inotify call invalidate _depth, etc. 76 77 TODO: Following command: fs.d --color -d ~/ware/emacs -s lispy -k 78 shows "Skipped PNG file (png) at first extension try". 79 Assure that this logic reuses cache and instead prints something like "Skipped PNG file using cached FKind". 80 81 TODO: Cache each Dir separately to a file named after SHA1 of its path 82 83 TODO: Add ASCII kind: Requires optional stream analyzer member of FKind in 84 replacement for magicData. ASCIIFile 85 86 TODO: Defined NotAnyKind(binaryKinds) and cache it 87 88 TODO: Create PkZipFile() in Dir.load() when FKind "pkZip Archive" is found. 89 Use std.zip.ZipArchive(void[] from mmfile) 90 91 TODO: Scan Subversion Dirs with http://pastebin.com/6ZzPvpBj 92 93 TODO: Change order (binHit || allBHist8Miss) and benchmark 94 95 TODO: Display modification/access times as: 96 See: http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 97 98 TODO: Use User Defined Attributes (UDA): http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 99 TODO: Use msgPack @nonPacked when needed 100 101 TODO: Limit lines to terminal width 102 103 TODO: Create array of (OFFSET, LENGTH) and this in FKind Pattern factory 104 function. Then for source file extra slice at (OFFSET, LENGTH) and use as 105 input into hash-table from magic (if its a Lit-pattern to) 106 107 TODO: Verify that "f.tar.z" gets tuple extensions tuple("tar", "z") 108 TODO: Verify that "libc.so.1.2.3" gets tuple extensions tuple("so", "1", "2", "3") and "so" extensions should the be tried 109 TODO: Cache Symbols larger than three characters in a global hash from symbol to path 110 111 TODO: Benchmark horspool.d and perhaps use instead of std.find 112 113 TODO: Splitting into keys should not split arguments such as "a b" 114 115 TODO: Perhaps use http://www.chartjs.org/ to visualize stuff 116 117 TODO: Make use of @nonPacked in version (msgpack). 118 */ 119 module nxt.fse; 120 121 version = msgpack; // Use msgpack serialization 122 /* version = cerealed; // Use cerealed serialization */ 123 124 import std.stdio: ioFile = File, stdout; 125 import std.typecons: Tuple, tuple; 126 import std.algorithm: find, map, filter, reduce, max, min, uniq, all, joiner; 127 import std.string: representation, chompPrefix; 128 import std.stdio: write, writeln, writefln; 129 import std.path: baseName, dirName, isAbsolute, dirSeparator, extension, buildNormalizedPath, expandTilde, absolutePath; 130 import std.datetime; 131 import std.file: FileException; 132 import std.digest.sha: sha1Of, toHexString; 133 import std.range: repeat, array, empty, cycle, chain; 134 import std.stdint: uint64_t; 135 import std.traits: Unqual, isIterable; 136 import std.experimental.allocator; 137 import std.functional: memoize; 138 import std.complex: Complex; 139 140 import nxt.predicates: isUntouched; 141 142 import core.memory: GC; 143 import core.exception; 144 145 import nxt.algorithm_ex; 146 import nxt.attributes; 147 import nxt.codec; 148 import nxt.container.static_bitarray; 149 import nxt.csunits; 150 import nxt.debugio; 151 import nxt.digest_ex; 152 import nxt.elfdoc; 153 import nxt.find_ex; 154 import nxt.geometry; 155 import nxt.getopt_ex; 156 import nxt.lingua; 157 import nxt.mangling; 158 import nxt.mathml; 159 import nxt.notnull; 160 import nxt.random_ex; 161 import nxt.rational: Rational; 162 import nxt.tempfs; 163 import nxt.traits_ex; 164 import nxt.typedoc; 165 166 // import arsd.terminal : Color; 167 // import lock_free.rwqueue; 168 169 alias Bytes64 = Bytes!ulong; 170 171 import symbolic; 172 import ngram; 173 import pretty; 174 175 /* NGram Aliases */ 176 /** Not very likely that we are interested in histograms 64-bit precision 177 * Bucket/Bin Counts so pick 32-bit for now. */ 178 alias RequestedBinType = uint; 179 enum NGramOrder = 3; 180 alias Bist = NGram!(ubyte, 1, ngram.Kind.binary, ngram.Storage.denseStatic, ngram.Symmetry.ordered, void, immutable(ubyte)[]); 181 alias XGram = NGram!(ubyte, NGramOrder, ngram.Kind.saturated, ngram.Storage.sparse, ngram.Symmetry.ordered, RequestedBinType, immutable(ubyte)[]); 182 183 /* Need for signal handling */ 184 import core.stdc.stdlib; 185 version (linux) import core.sys.posix.sys.stat; 186 version (linux) import core.sys.posix.signal; 187 //version (linux) import std.c.linux.linux; 188 189 /* TODO: Set global state. 190 http://forum.dlang.org/thread/cu9fgg$28mr$1@digitaldaemon.com 191 */ 192 /** Exception Describing Process Signal. */ 193 194 shared uint ctrlC = 0; // Number of times Ctrl-C has been presed 195 class SignalCaughtException : Exception 196 { 197 int signo = int.max; 198 this(int signo, string file = __FILE__, size_t line = __LINE__ ) @safe { 199 this.signo = signo; 200 import std.conv: to; 201 super(`Signal number ` ~ to!string(signo) ~ ` at ` ~ file ~ `:` ~ to!string(line)); 202 } 203 } 204 205 void signalHandler(int signo) 206 { 207 import core.atomic: atomicOp; 208 if (signo == 2) 209 { 210 core.atomic.atomicOp!`+=`(ctrlC, 1); 211 } 212 // throw new SignalCaughtException(signo); 213 } 214 215 alias signalHandler_t = void function(int); 216 extern (C) signalHandler_t signal(int signal, signalHandler_t handler); 217 218 version (msgpack) 219 { 220 import msgpack; 221 } 222 version (cerealed) 223 { 224 /* import cerealed.cerealiser; */ 225 /* import cerealed.decerealiser; */ 226 /* import cerealed.cereal; */ 227 } 228 229 /** File Content Type Code. */ 230 enum FileContent 231 { 232 unknown, 233 binaryUnknown, 234 binary, 235 text, 236 textASCII, 237 text8Bit, 238 document, 239 spreadsheet, 240 database, 241 tagsDatabase, 242 image, 243 imageIcon, 244 audio, 245 sound = audio, 246 music = audio, 247 248 modemData, 249 imageModemFax1BPP, // One bit per pixel 250 voiceModem, 251 252 video, 253 movie, 254 media, 255 sourceCode, 256 scriptCode, 257 buildSystemCode, 258 byteCode, 259 machineCode, 260 versionControl, 261 numericalData, 262 archive, 263 compressed, 264 cache, 265 binaryCache, 266 firmware, 267 spellCheckWordList, 268 font, 269 performanceBenchmark, 270 fingerprint, 271 } 272 273 /** How File Kinds are detected. */ 274 enum FileKindDetection 275 { 276 equalsParentPathDirsAndName, // Parenting path file name must match 277 equalsName, // Only name must match 278 equalsNameAndContents, // Both name and contents must match 279 equalsNameOrContents, // Either name or contents must match 280 equalsContents, // Only contents must match 281 equalsWhatsGiven, // All information defined must match 282 } 283 284 /** Key Scan (Search) Context. */ 285 enum ScanContext 286 { 287 /* code, */ 288 /* comment, */ 289 /* string, */ 290 291 /* word, */ 292 /* symbol, */ 293 294 dirName, // Name of directory being scanned 295 dir = dirName, 296 297 fileName, // Name of file being scanned 298 name = fileName, 299 300 regularFilename, // Name of file being scanned 301 symlinkName, // Name of symbolic linke being scanned 302 303 fileContent, // Contents of file being scanned 304 content = fileContent, 305 306 /* modTime, */ 307 /* accessTime, */ 308 /* xattr, */ 309 /* size, */ 310 311 all, 312 standard = all, 313 } 314 315 enum DuplicatesContext 316 { 317 internal, // All duplicates must lie inside topDirs 318 external, // At least one duplicate lie inside 319 // topDirs. Others may lie outside 320 } 321 322 /** File Operation Type Code. */ 323 enum FOp 324 { 325 none, 326 327 checkSyntax, // Check syntax 328 lint = checkSyntax, // Check syntax alias 329 330 build, // Project-Wide Build 331 compile, // Compile 332 byteCompile, // Byte compile 333 run, // Run (Execute) 334 execute = run, 335 336 preprocess, // Preprocess C/C++/Objective-C (using cpp) 337 cpp = preprocess, 338 339 /* VCS Operations */ 340 vcStatus, 341 vcs = vcStatus, 342 343 deduplicate, // Deduplicate Files using hardlinks and Dirs using Symlink 344 } 345 346 /** Directory Operation Type Code. */ 347 enum DirOp 348 { 349 /* VCS Operations */ 350 vcStatus, 351 } 352 353 /** Shell Command. 354 */ 355 alias ShCmd = string; // Just simply a string for now. 356 357 /** Pair of Delimiters. 358 Used to desribe for example comment and string delimiter syntax. 359 */ 360 struct Delim 361 { 362 this(string intro) 363 { 364 this.intro = intro; 365 this.finish = finish.init; 366 } 367 this(string intro, string finish) 368 { 369 this.intro = intro; 370 this.finish = finish; 371 } 372 string intro; 373 string finish; // Defaults to end of line if not defined. 374 } 375 376 /* Comment Delimiters */ 377 enum defaultCommentDelims = [Delim(`#`)]; 378 enum cCommentDelims = [Delim(`/*`, `*/`), 379 Delim(`//`)]; 380 enum dCommentDelims = [Delim(`/+`, `+/`)] ~ cCommentDelims; 381 382 /* String Delimiters */ 383 enum defaultStringDelims = [Delim(`"`), 384 Delim(`'`), 385 Delim("`")]; 386 enum pythonStringDelims = [Delim(`"""`), 387 Delim(`"`), 388 Delim(`'`), 389 Delim("`")]; 390 391 /** File Kind. 392 */ 393 class FKind 394 { 395 this(T, MagicData, RefPattern)(string kindName_, 396 T baseNaming_, 397 const string[] exts_, 398 MagicData magicData, size_t magicOffset = 0, 399 RefPattern refPattern_ = RefPattern.init, 400 const string[] keywords_ = [], 401 402 Delim[] strings_ = [], 403 404 Delim[] comments_ = [], 405 406 FileContent content_ = FileContent.unknown, 407 FileKindDetection detection_ = FileKindDetection.equalsWhatsGiven, 408 Lang lang_ = Lang.unknown, 409 410 FKind superKind = null, 411 FKind[] subKinds = [], 412 string description = null, 413 string wikip = null) @trusted pure 414 { 415 this.kindName = kindName_; 416 417 // Basename 418 import std.traits: isArray; 419 import std.range: ElementType; 420 static if (is(T == string)) 421 { 422 this.baseNaming = lit(baseNaming_); 423 } 424 else static if (isArrayOf!(T, string)) 425 { 426 /+ TODO: Move to a factory function strs(x) +/ 427 auto alt_ = alt(); 428 foreach (ext; baseNaming_) // add each string as an alternative 429 { 430 alt_ ~= lit(ext); 431 } 432 this.baseNaming = alt_; 433 } 434 else static if (is(T == Patt)) 435 { 436 this.baseNaming = baseNaming_; 437 } 438 439 this.exts = exts_; 440 441 import std.traits: isAssignable; 442 static if (is(MagicData == ubyte[])) { this.magicData = lit(magicData) ; } 443 else static if (is(MagicData == string)) { this.magicData = lit(magicData.representation.dup); } 444 else static if (is(MagicData == void[])) { this.magicData = lit(cast(ubyte[])magicData); } 445 else static if (isAssignable!(Patt, MagicData)) { this.magicData = magicData; } 446 else static assert(0, `Cannot handle MagicData being type ` ~ MagicData.stringof); 447 448 this.magicOffset = magicOffset; 449 450 static if (is(RefPattern == ubyte[])) { this.refPattern = refPattern_; } 451 else static if (is(RefPattern == string)) { this.refPattern = refPattern_.representation.dup; } 452 else static if (is(RefPattern == void[])) { this.refPattern = (cast(ubyte[])refPattern_).dup; } 453 else static assert(0, `Cannot handle RefPattern being type ` ~ RefPattern.stringof); 454 455 this.keywords = keywords_; 456 457 this.strings = strings_; 458 this.comments = comments_; 459 460 this.content = content_; 461 462 if ((content_ == FileContent.sourceCode || 463 content_ == FileContent.scriptCode) && 464 detection_ == FileKindDetection.equalsWhatsGiven) 465 { 466 // relax matching of sourcecode to only need name until we have complete parsers 467 this.detection = FileKindDetection.equalsName; 468 } 469 else 470 { 471 this.detection = detection_; 472 } 473 this.lang = lang_; 474 475 this.superKind = superKind; 476 this.subKinds = subKinds; 477 this.description = description; 478 this.wikip = wikip.asURL; 479 } 480 481 override string toString() const @property @trusted pure nothrow { return kindName; } 482 483 /** Returns: Id Unique to matching behaviour of `this` FKind. If match 484 behaviour of `this` FKind changes returned id will change. 485 value is memoized. 486 */ 487 auto ref const(SHA1Digest) behaviorId() @property @safe /* pure nothrow */ 488 out(result) { assert(!result.empty); } 489 do 490 { 491 if (_behaviourDigest.empty) // if not yet defined 492 { 493 ubyte[] bytes; 494 const magicLit = cast(Lit)magicData; 495 if (magicLit) 496 { 497 bytes = msgpack.pack(exts, magicLit.bytes, magicOffset, refPattern, keywords, content, detection); 498 } 499 else 500 { 501 //dln(`warning: Handle magicData of type `, kindName); 502 } 503 _behaviourDigest = bytes.sha1Of; 504 } 505 return _behaviourDigest; 506 } 507 508 string kindName; // Kind Nick Name. 509 string description; // Kind Documenting Description. 510 AsURL!string wikip; // Wikipedia URL 511 512 FKind superKind; // Inherited pattern. For example ELF => ELF core file 513 FKind[] subKinds; // Inherited pattern. For example ELF => ELF core file 514 Patt baseNaming; // Pattern that matches typical file basenames of this Kind. May be null. 515 516 string[] parentPathDirs; // example [`lib`, `firmware`] for `/lib/firmware` or `../lib/firmware` 517 518 const string[] exts; // Typical Extensions. 519 Patt magicData; // Magic Data. 520 size_t magicOffset; // Magit Offset. 521 ubyte[] refPattern; // Reference pattern. 522 const FileContent content; 523 const FileKindDetection detection; 524 Lang lang; // Language if any 525 526 // Volatile Statistics: 527 private SHA1Digest _behaviourDigest; 528 RegFile[] hitFiles; // Files of this kind. 529 530 const string[] keywords; // Keywords 531 string[] builtins; // Builtin Functions 532 Op[] opers; // Language Opers 533 534 /* TODO: Move this to CompLang class */ 535 Delim[] strings; // String syntax. 536 Delim[] comments; // Comment syntax. 537 538 bool machineGenerated; // True if this is a machine generated file. 539 540 Tuple!(FOp, ShCmd)[] operations; // Operation and Corresponding Shell Command 541 } 542 543 /** Set of File Kinds with Internal Hashing. */ 544 class FKinds 545 { 546 void opOpAssign(string op)(FKind kind) @safe /* pure */ if (op == `~`) 547 { 548 mixin(`this.byIndex ` ~ op ~ `= kind;`); 549 this.register(kind); 550 } 551 void opOpAssign(string op)(FKinds kinds) @safe /* pure */ if (op == `~`) 552 { 553 mixin(`this.byIndex ` ~ op ~ `= kinds.byIndex;`); 554 foreach (kind; kinds.byIndex) 555 this.register(kind); 556 } 557 558 FKinds register(FKind kind) @safe /* pure */ 559 { 560 this.byName[kind.kindName] = kind; 561 foreach (const ext; kind.exts) 562 { 563 this.byExt[ext] ~= kind; 564 } 565 this.byId[kind.behaviorId] = kind; 566 if (kind.magicOffset == 0 && // only if zero-offset for now 567 kind.magicData) 568 { 569 if (const magicLit = cast(Lit)kind.magicData) 570 { 571 this.byMagic[magicLit.bytes][magicLit.bytes.length] ~= kind; 572 _magicLengths ~= magicLit.bytes.length; // add it 573 } 574 } 575 return this; 576 } 577 578 /** Rehash Internal AAs. 579 TODO: Change to @safe when https://github.com/D-Programming-Language/druntime/pull/942 has been merged 580 TODO: Change to nothrow when uniq becomes nothrow. 581 */ 582 FKinds rehash() @trusted pure /* nothrow */ 583 { 584 import std.algorithm: sort; 585 _magicLengths = _magicLengths.uniq.array; // remove duplicates 586 _magicLengths.sort(); 587 this.byName.rehash; 588 this.byExt.rehash; 589 this.byMagic.rehash; 590 this.byId.rehash; 591 return this; 592 } 593 594 FKind[] byIndex; 595 private: 596 /* TODO: These are "slaves" under byIndex and should not be modifiable outside 597 of this class but their FKind's can mutable. 598 */ 599 FKind[string] byName; // Index by unique name string 600 FKind[][string] byExt; // Index by possibly non-unique extension string 601 602 FKind[][size_t][immutable ubyte[]] byMagic; // length => zero-offset magic byte array to Binary FKind[] 603 size_t[] _magicLengths; // List of magic lengths to try as index in byMagic 604 605 FKind[SHA1Digest] byId; // Index Kinds by their behaviour 606 } 607 608 /** Match `kind` with full filename `full`. */ 609 bool matchFullName(in FKind kind, 610 const scope string full, size_t six = 0) @safe pure nothrow 611 { 612 return (kind.baseNaming && 613 !kind.baseNaming.matchFirst(full, six).empty); 614 } 615 616 /** Match `kind` with file extension `ext`. */ 617 bool matchExtension(in FKind kind, 618 const scope string ext) @safe pure nothrow 619 { 620 return !kind.exts.find(ext).empty; 621 } 622 623 bool matchName(in FKind kind, 624 const scope string full, size_t six = 0, 625 const scope string ext = null) @safe pure nothrow 626 { 627 return (kind.matchFullName(full) || 628 kind.matchExtension(ext)); 629 } 630 631 import std.range: hasSlicing; 632 633 /** Match (Magic) Contents of `kind` with `range`. 634 Returns: `true` iff match. */ 635 bool matchContents(Range)(in FKind kind, 636 in Range range, 637 in RegFile regFile) pure nothrow if (hasSlicing!Range) 638 { 639 const hit = kind.magicData.match(range, kind.magicOffset); 640 return (!hit.empty); 641 } 642 643 enum KindHit 644 { 645 none = 0, // No hit. 646 cached = 1, // Cached hit. 647 uncached = 2, // Uncached (fresh) hit. 648 } 649 650 Tuple!(KindHit, FKind, size_t) ofAnyKindIn(NotNull!RegFile regFile, 651 FKinds kinds, 652 bool collectTypeHits) 653 { 654 // using kindId 655 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 656 { 657 if (regFile._cstat.kindId in kinds.byId) 658 { 659 return tuple(KindHit.cached, 660 kinds.byId[regFile._cstat.kindId], 661 0UL); 662 } 663 } 664 665 // using extension 666 immutable ext = regFile.realExtension; // extension sans dot 667 if (!ext.empty && 668 ext in kinds.byExt) 669 { 670 foreach (kindIndex, kind; kinds.byExt[ext]) 671 { 672 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 673 if (hit) 674 { 675 return tuple(hit, kind, kindIndex); 676 } 677 } 678 } 679 680 // try all 681 foreach (kindIndex, kind; kinds.byIndex) // Iterate each kind 682 { 683 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 684 if (hit) 685 { 686 return tuple(hit, kind, kindIndex); 687 } 688 } 689 690 // no hit 691 return tuple(KindHit.none, 692 FKind.init, 693 0UL); 694 } 695 696 /** Returns: true if file with extension `ext` is of type `kind`. */ 697 KindHit ofKind(NotNull!RegFile regFile, 698 NotNull!FKind kind, 699 bool collectTypeHits, 700 FKinds allFKinds) /* nothrow */ @trusted 701 { 702 immutable hit = regFile.ofKind1(kind, 703 collectTypeHits, 704 allFKinds); 705 return hit; 706 } 707 708 KindHit ofKind(NotNull!RegFile regFile, 709 string kindName, 710 bool collectTypeHits, 711 FKinds allFKinds) /* nothrow */ @trusted 712 { 713 typeof(return) hit; 714 if (kindName in allFKinds.byName) 715 { 716 auto kind = assumeNotNull(allFKinds.byName[kindName]); 717 hit = regFile.ofKind(kind, 718 collectTypeHits, 719 allFKinds); 720 } 721 return hit; 722 } 723 724 /** Helper for ofKind. */ 725 KindHit ofKind1(NotNull!RegFile regFile, 726 NotNull!FKind kind, 727 bool collectTypeHits, 728 FKinds allFKinds) /* nothrow */ @trusted 729 { 730 // Try cached first 731 if (regFile._cstat.kindId.defined && 732 (regFile._cstat.kindId in allFKinds.byId) && // if kind is known 733 allFKinds.byId[regFile._cstat.kindId] is kind) // if cached kind equals 734 { 735 return KindHit.cached; 736 } 737 738 immutable ext = regFile.realExtension; 739 740 if (kind.superKind) 741 { 742 immutable baseHit = regFile.ofKind(enforceNotNull(kind.superKind), 743 collectTypeHits, 744 allFKinds); 745 if (!baseHit) 746 { 747 return baseHit; 748 } 749 } 750 751 bool hit = false; 752 final switch (kind.detection) 753 { 754 case FileKindDetection.equalsParentPathDirsAndName: 755 hit = (!regFile.parents.map!(a => a.name).find(kind.parentPathDirs).empty && // I love D :) 756 kind.matchName(regFile.name, 0, ext)); 757 break; 758 case FileKindDetection.equalsName: 759 hit = kind.matchName(regFile.name, 0, ext); 760 break; 761 case FileKindDetection.equalsNameAndContents: 762 hit = (kind.matchName(regFile.name, 0, ext) && 763 kind.matchContents(regFile.readOnlyContents, regFile)); 764 break; 765 case FileKindDetection.equalsNameOrContents: 766 hit = (kind.matchName(regFile.name, 0, ext) || 767 kind.matchContents(regFile.readOnlyContents, regFile)); 768 break; 769 case FileKindDetection.equalsContents: 770 hit = kind.matchContents(regFile.readOnlyContents, regFile); 771 break; 772 case FileKindDetection.equalsWhatsGiven: 773 // something must be defined 774 assert(is(kind.baseNaming) || 775 !kind.exts.empty || 776 !(kind.magicData is null)); 777 hit = ((kind.matchName(regFile.name, 0, ext) && 778 (kind.magicData is null || 779 kind.matchContents(regFile.readOnlyContents, regFile)))); 780 break; 781 } 782 if (hit) 783 { 784 if (collectTypeHits) 785 { 786 kind.hitFiles ~= regFile; 787 } 788 regFile._cstat.kindId = kind.behaviorId; // store reference in File 789 } 790 791 return hit ? KindHit.uncached : KindHit.none; 792 } 793 794 /** Directory Kind. 795 */ 796 class DirKind 797 { 798 this(string fn, 799 string kn) 800 { 801 this.fileName = fn; 802 this.kindName = kn; 803 } 804 805 version (msgpack) 806 { 807 this(Unpacker)(ref Unpacker unpacker) 808 { 809 fromMsgpack(msgpack.Unpacker(unpacker)); 810 } 811 void toMsgpack(Packer)(ref Packer packer) const 812 { 813 packer.beginArray(this.tupleof.length); 814 packer.pack(this.tupleof); 815 } 816 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 817 { 818 unpacker.beginArray; 819 unpacker.unpack(this.tupleof); 820 } 821 } 822 823 string fileName; 824 string kindName; 825 } 826 version (msgpack) unittest { 827 auto k = tuple(``, ``); 828 auto data = pack(k); 829 Tuple!(string, string) k_; data.unpack(k_); 830 assert(k == k_); 831 } 832 833 import std.file: DirEntry, getLinkAttributes; 834 import std.datetime: SysTime, Interval; 835 836 /** File. 837 */ 838 class File 839 { 840 this(Dir parent) 841 { 842 this.parent = parent; 843 if (parent) { ++parent.gstats.noFiles; } 844 } 845 this(string name, Dir parent, Bytes64 size, 846 SysTime timeLastModified, 847 SysTime timeLastAccessed) 848 { 849 this.name = name; 850 this.parent = parent; 851 this.size = size; 852 this.timeLastModified = timeLastModified; 853 this.timeLastAccessed = timeLastAccessed; 854 if (parent) { ++parent.gstats.noFiles; } 855 } 856 857 // The Real Extension without leading dot. 858 string realExtension() @safe pure nothrow const { return name.extension.chompPrefix(`.`); } 859 alias ext = realExtension; // shorthand 860 861 string toTextual() const @property { return `Any File`; } 862 863 Bytes64 treeSize() @property @trusted /* @safe pure nothrow */ { return size; } 864 865 /** Content Digest of Tree under this Directory. */ 866 const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 867 { 868 return typeof(return).init; // default to undefined 869 } 870 871 Face!Color face() const @property @safe pure nothrow { return fileFace; } 872 873 /** Check if `this` File has been invalidated by `dent`. 874 Returns: `true` iff `this` was obseleted. 875 */ 876 bool checkObseleted(ref DirEntry dent) @trusted 877 { 878 // Git-Style Check for Changes (called Decider in SCons Build Tool) 879 bool flag = false; 880 if (dent.size != this.size || // size has changes 881 (dent.timeLastModified != this.timeLastModified) // if current modtime has changed or 882 ) 883 { 884 makeObselete; 885 this.timeLastModified = dent.timeLastModified; // use new time 886 this.size = dent.size; // use new time 887 flag = true; 888 } 889 this.timeLastAccessed = dent.timeLastAccessed; // use new time 890 return flag; 891 } 892 893 void makeObselete() @trusted {} 894 void makeUnObselete() @safe {} 895 896 /** Returns: Depth of Depth from File System root to this File. */ 897 int depth() @property @safe pure nothrow 898 { 899 return parent ? parent.depth + 1 : 0; // NOTE: this is fast because parent is memoized 900 } 901 /** NOTE: Currently not used. */ 902 int depthIterative() @property @safe pure 903 out (depth) { debug assert(depth == depth); } 904 do 905 { 906 typeof(return) depth = 0; 907 for (auto curr = dir; curr !is null && !curr.isRoot; depth++) 908 { 909 curr = curr.parent; 910 } 911 return depth; 912 } 913 914 /** Get Parenting Dirs starting from parent of `this` upto root. 915 Make this even more lazily evaluted. 916 */ 917 Dir[] parentsUpwards() 918 { 919 typeof(return) parents; // collected parents 920 for (auto curr = dir; (curr !is null && 921 !curr.isRoot); curr = curr.parent) 922 { 923 parents ~= curr; 924 } 925 return parents; 926 } 927 alias dirsDownward = parentsUpwards; 928 929 /** Get Parenting Dirs starting from file system root downto containing 930 directory of `this`. 931 */ 932 auto parents() 933 { 934 return parentsUpwards.retro; 935 } 936 alias dirs = parents; // SCons style alias 937 alias parentsDownward = parents; 938 939 bool underAnyDir(alias pred = `a`)() 940 { 941 import std.algorithm: any; 942 import std.functional: unaryFun; 943 return parents.any!(unaryFun!pred); 944 } 945 946 /** Returns: Path to `this` File. 947 TODO: Reuse parents. 948 */ 949 string path() @property @trusted pure out (result) { 950 /* assert(result == pathRecursive); */ 951 } 952 do 953 { 954 if (!parent) { return dirSeparator; } 955 956 size_t pathLength = 1 + name.length; // returned path length 957 Dir[] parents; // collected parents 958 959 for (auto curr = parent; (curr !is null && 960 !curr.isRoot); curr = curr.parent) 961 { 962 pathLength += 1 + curr.name.length; 963 parents ~= curr; 964 } 965 966 // build path 967 auto thePath = new char[pathLength]; 968 size_t i = 0; // index to thePath 969 import std.range: retro; 970 foreach (currParent_; parents.retro) 971 { 972 immutable parentName = currParent_.name; 973 thePath[i++] = dirSeparator[0]; 974 thePath[i .. i + parentName.length] = parentName[]; 975 i += parentName.length; 976 } 977 thePath[i++] = dirSeparator[0]; 978 thePath[i .. i + name.length] = name[]; 979 980 return thePath; 981 } 982 983 /** Returns: Path to `this` File. 984 Recursive Heap-active implementation, slower than $(D path()). 985 */ 986 string pathRecursive() @property @trusted pure 987 { 988 if (parent) 989 { 990 static if (true) 991 { 992 import std.path: dirSeparator; 993 // NOTE: This is more efficient than buildPath(parent.path, 994 // name) because we can guarantee things about parent.path and 995 // name 996 immutable parentPath = parent.isRoot ? `` : parent.pathRecursive; 997 return parentPath ~ dirSeparator ~ name; 998 } 999 else 1000 { 1001 import std.path: buildPath; 1002 return buildPath(parent.pathRecursive, name); 1003 } 1004 } 1005 else 1006 { 1007 return `/`; // assume root folder with beginning slash 1008 } 1009 } 1010 1011 version (msgpack) 1012 { 1013 void toMsgpack(Packer)(ref Packer packer) const 1014 { 1015 writeln(`Entering File.toMsgpack `, name); 1016 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1017 } 1018 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1019 { 1020 long stdTime; 1021 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); /+ TODO: Functionize +/ 1022 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); /+ TODO: Functionize +/ 1023 } 1024 } 1025 1026 Dir parent; // Reference to parenting directory (or null if this is a root directory) 1027 alias dir = parent; // SCons style alias 1028 1029 string name; // Empty if root directory 1030 Bytes64 size; // Size of file in bytes 1031 SysTime timeLastModified; // Last modification time 1032 SysTime timeLastAccessed; // Last access time 1033 } 1034 1035 /** Maps Files to their tags. */ 1036 class FileTags 1037 { 1038 FileTags addTag(File file, const scope string tag) @safe pure /* nothrow */ 1039 { 1040 if (file in _tags) 1041 { 1042 if (_tags[file].find(tag).empty) 1043 { 1044 _tags[file] ~= tag; // add it 1045 } 1046 } 1047 else 1048 { 1049 _tags[file] = [tag]; 1050 } 1051 return this; 1052 } 1053 FileTags removeTag(File file, string tag) @safe pure 1054 { 1055 if (file in _tags) 1056 { 1057 import std.algorithm: remove; 1058 _tags[file] = _tags[file].remove!(a => a == tag); 1059 } 1060 return this; 1061 } 1062 auto ref getTags(File file) const @safe pure nothrow 1063 { 1064 return file in _tags ? _tags[file] : null; 1065 } 1066 private string[][File] _tags; // Tags for each registered file. 1067 } 1068 1069 version (linux) unittest { 1070 auto ftags = new FileTags(); 1071 1072 GStats gstats = new GStats(); 1073 1074 auto root = assumeNotNull(new Dir(cast(Dir)null, gstats)); 1075 auto etc = getDir(root, `/etc`); 1076 assert(etc.path == `/etc`); 1077 1078 auto dent = DirEntry(`/etc/passwd`); 1079 auto passwd = getFile(root, `/etc/passwd`, dent.isDir); 1080 assert(passwd.path == `/etc/passwd`); 1081 assert(passwd.parent == etc); 1082 assert(etc.sub(`passwd`) == passwd); 1083 1084 ftags.addTag(passwd, `Password`); 1085 ftags.addTag(passwd, `Password`); 1086 ftags.addTag(passwd, `Secret`); 1087 assert(ftags.getTags(passwd) == [`Password`, `Secret`]); 1088 ftags.removeTag(passwd, `Password`); 1089 assert(ftags._tags[passwd] == [`Secret`]); 1090 } 1091 1092 /** Symlink Target Status. 1093 */ 1094 enum SymlinkTargetStatus 1095 { 1096 unknown, 1097 present, 1098 broken, 1099 } 1100 1101 /** Symlink. 1102 */ 1103 class Symlink : File 1104 { 1105 this(NotNull!Dir parent) 1106 { 1107 super(parent); 1108 ++parent.gstats.noSymlinks; 1109 } 1110 this(ref DirEntry dent, NotNull!Dir parent) 1111 { 1112 Bytes64 sizeBytes; 1113 SysTime modified, accessed; 1114 bool ok = true; 1115 try 1116 { 1117 sizeBytes = dent.size.Bytes64; 1118 modified = dent.timeLastModified; 1119 accessed = dent.timeLastAccessed; 1120 } 1121 catch (Exception) 1122 { 1123 ok = false; 1124 } 1125 // const attrs = getLinkAttributes(dent.name); // attributes of target file 1126 // super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1127 super(dent.name.baseName, parent, sizeBytes, modified, accessed); 1128 if (ok) 1129 { 1130 this.retarget(dent); // trigger lazy load 1131 } 1132 ++parent.gstats.noSymlinks; 1133 } 1134 1135 override Face!Color face() const @property @safe pure nothrow 1136 { 1137 if (_targetStatus == SymlinkTargetStatus.broken) 1138 return symlinkBrokenFace; 1139 else 1140 return symlinkFace; 1141 } 1142 1143 override string toTextual() const @property { return `Symbolic Link`; } 1144 1145 string retarget(ref DirEntry dent) @trusted 1146 { 1147 import std.file: readLink; 1148 return _target = readLink(dent); 1149 } 1150 1151 /** Cached/Memoized/Lazy Lookup for target. */ 1152 string target() @property @trusted 1153 { 1154 if (!_target) // if target not yet read 1155 { 1156 auto targetDent = DirEntry(path); 1157 return retarget(targetDent); // read it 1158 } 1159 return _target; 1160 } 1161 /** Cached/Memoized/Lazy Lookup for target as absolute normalized path. */ 1162 string absoluteNormalizedTargetPath() @property @trusted 1163 { 1164 import std.path: absolutePath, buildNormalizedPath; 1165 return target.absolutePath(path.dirName).buildNormalizedPath; 1166 } 1167 1168 version (msgpack) 1169 { 1170 /** Construct from msgpack `unpacker`. */ 1171 this(Unpacker)(ref Unpacker unpacker) 1172 { 1173 fromMsgpack(msgpack.Unpacker(unpacker)); 1174 } 1175 void toMsgpack(Packer)(ref Packer packer) const 1176 { 1177 /* writeln(`Entering File.toMsgpack `, name); */ 1178 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1179 } 1180 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1181 { 1182 unpacker.unpack(name, size); 1183 long stdTime; 1184 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); /+ TODO: Functionize +/ 1185 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); /+ TODO: Functionize +/ 1186 } 1187 } 1188 1189 string _target; 1190 SymlinkTargetStatus _targetStatus = SymlinkTargetStatus.unknown; 1191 } 1192 1193 /** Special File (Character or Block Device). 1194 */ 1195 class SpecFile : File 1196 { 1197 this(NotNull!Dir parent) 1198 { 1199 super(parent); 1200 ++parent.gstats.noSpecialFiles; 1201 } 1202 this(ref DirEntry dent, NotNull!Dir parent) 1203 { 1204 super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1205 ++parent.gstats.noSpecialFiles; 1206 } 1207 1208 override Face!Color face() const @property @safe pure nothrow { return specialFileFace; } 1209 1210 override string toTextual() const @property { return `Special File`; } 1211 1212 version (msgpack) 1213 { 1214 /** Construct from msgpack `unpacker`. */ 1215 this(Unpacker)(ref Unpacker unpacker) 1216 { 1217 fromMsgpack(msgpack.Unpacker(unpacker)); 1218 } 1219 void toMsgpack(Packer)(ref Packer packer) const 1220 { 1221 /* writeln(`Entering File.toMsgpack `, name); */ 1222 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1223 } 1224 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1225 { 1226 unpacker.unpack(name, size); 1227 long stdTime; 1228 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); /+ TODO: Functionize +/ 1229 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); /+ TODO: Functionize +/ 1230 } 1231 } 1232 } 1233 1234 /** Bit (Content) Status. */ 1235 enum BitStatus 1236 { 1237 unknown, 1238 bits7, 1239 bits8, 1240 } 1241 1242 /** Regular File. 1243 */ 1244 class RegFile : File 1245 { 1246 this(NotNull!Dir parent) 1247 { 1248 super(parent); 1249 ++parent.gstats.noRegFiles; 1250 } 1251 this(ref DirEntry dent, NotNull!Dir parent) 1252 { 1253 this(dent.name.baseName, parent, dent.size.Bytes64, 1254 dent.timeLastModified, dent.timeLastAccessed); 1255 } 1256 this(string name, NotNull!Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed) 1257 { 1258 super(name, parent, size, timeLastModified, timeLastAccessed); 1259 ++parent.gstats.noRegFiles; 1260 } 1261 1262 ~this() nothrow @nogc 1263 { 1264 _cstat.deallocate(false); 1265 } 1266 1267 override string toTextual() const @property { return `Regular File`; } 1268 1269 /** Returns: Content Id of `this`. */ 1270 const(SHA1Digest) contentId() @property @trusted /* @safe pure nothrow */ 1271 { 1272 if (_cstat._contentId.isUntouched) 1273 { 1274 enum doSHA1 = true; 1275 calculateCStatInChunks(parent.gstats.filesByContentId, 1276 32*pageSize(), 1277 doSHA1); 1278 freeContents(); /+ TODO: Call lazily only when open count is too large +/ 1279 } 1280 return _cstat._contentId; 1281 } 1282 1283 /** Returns: Tree Content Id of `this`. */ 1284 override const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 1285 { 1286 return contentId; 1287 } 1288 1289 override Face!Color face() const @property @safe pure nothrow { return regFileFace; } 1290 1291 /** Returns: SHA-1 of `this` `File` Contents at `src`. */ 1292 const(SHA1Digest) contId(inout (ubyte[]) src, 1293 File[][SHA1Digest] filesByContentId) 1294 @property pure out(result) { assert(!result.empty); } // must have be defined 1295 do 1296 { 1297 if (_cstat._contentId.empty) // if not yet defined 1298 { 1299 _cstat._contentId = src.sha1Of; 1300 filesByContentId[_cstat._contentId] ~= this; 1301 } 1302 return _cstat._contentId; 1303 } 1304 1305 /** Returns: Cached/Memoized Binary Histogram of `this` `File`. */ 1306 auto ref bistogram8() @property @safe // ref needed here! 1307 { 1308 if (_cstat.bist.empty) 1309 { 1310 _cstat.bist.put(readOnlyContents); // memoized calculated 1311 } 1312 return _cstat.bist; 1313 } 1314 1315 /** Returns: Cached/Memoized XGram of `this` `File`. */ 1316 auto ref xgram() @property @safe // ref needed here! 1317 { 1318 if (_cstat.xgram.empty) 1319 { 1320 _cstat.xgram.put(readOnlyContents); // memoized calculated 1321 } 1322 return _cstat.xgram; 1323 } 1324 1325 /** Returns: Cached/Memoized XGram Deep Denseness of `this` `File`. */ 1326 auto ref xgramDeepDenseness() @property @safe 1327 { 1328 if (!_cstat._xgramDeepDenseness) 1329 { 1330 _cstat._xgramDeepDenseness = xgram.denseness(-1).numerator; 1331 } 1332 return Rational!ulong(_cstat._xgramDeepDenseness, 1333 _cstat.xgram.noBins); 1334 } 1335 1336 /** Returns: true if empty file (zero length). */ 1337 bool empty() @property const @safe { return size == 0; } 1338 1339 /** Process File in Cache Friendly Chunks. */ 1340 void calculateCStatInChunks(NotNull!File[][SHA1Digest] filesByContentId, 1341 size_t chunkSize = 32*pageSize(), 1342 bool doSHA1 = false, 1343 bool doBist = false, 1344 bool doBitStatus = false) @safe 1345 { 1346 if (_cstat._contentId.defined || empty) { doSHA1 = false; } 1347 if (!_cstat.bist.empty) { doBist = false; } 1348 if (_cstat.bitStatus != BitStatus.unknown) { doBitStatus = false; } 1349 1350 import std.digest.sha; 1351 SHA1 sha1; 1352 if (doSHA1) { sha1.start(); } 1353 1354 bool isASCII = true; 1355 1356 if (doSHA1 || doBist || doBitStatus) 1357 { 1358 import std.range: chunks; 1359 foreach (chunk; readOnlyContents.chunks(chunkSize)) 1360 { 1361 if (doSHA1) { sha1.put(chunk); } 1362 if (doBist) { _cstat.bist.put(chunk); } 1363 if (doBitStatus) 1364 { 1365 /* TODO: This can be parallelized using 64-bit wording! 1366 * Write automatic parallelizing library for this? */ 1367 foreach (elt; chunk) 1368 { 1369 import nxt.bitop_ex: bt; 1370 isASCII = isASCII && !elt.bt(7); // ASCII has no topmost bit set 1371 } 1372 } 1373 } 1374 } 1375 1376 if (doBitStatus) 1377 { 1378 _cstat.bitStatus = isASCII ? BitStatus.bits7 : BitStatus.bits8; 1379 } 1380 1381 if (doSHA1) 1382 { 1383 _cstat._contentId = sha1.finish(); 1384 filesByContentId[_cstat._contentId] ~= cast(NotNull!File)assumeNotNull(this); /+ TODO: Prettier way? +/ 1385 } 1386 } 1387 1388 /** Clear/Reset Contents Statistics of `this` `File`. */ 1389 void clearCStat(File[][SHA1Digest] filesByContentId) @safe nothrow 1390 { 1391 // SHA1-digest 1392 if (_cstat._contentId in filesByContentId) 1393 { 1394 auto dups = filesByContentId[_cstat._contentId]; 1395 import std.algorithm: remove; 1396 immutable n = dups.length; 1397 dups = dups.remove!(a => a is this); 1398 assert(n == dups.length + 1); // assert that dups were not decreased by one); 1399 } 1400 } 1401 1402 override string toString() @property @trusted 1403 { 1404 // import std.traits: fullyQualifiedName; 1405 // return fullyQualifiedName!(typeof(this)) ~ `(` ~ buildPath(parent.name, name) ~ `)`; /+ TODO: typenameof +/ 1406 return (typeof(this)).stringof ~ `(` ~ this.path ~ `)`; /+ TODO: typenameof +/ 1407 } 1408 1409 version (msgpack) 1410 { 1411 /** Construct from msgpack `unpacker`. */ 1412 this(Unpacker)(ref Unpacker unpacker) 1413 { 1414 fromMsgpack(msgpack.Unpacker(unpacker)); 1415 } 1416 1417 /** Pack. */ 1418 void toMsgpack(Packer)(ref Packer packer) const { 1419 /* writeln(`Entering RegFile.toMsgpack `, name); */ 1420 1421 packer.pack(name, size, 1422 timeLastModified.stdTime, 1423 timeLastAccessed.stdTime); 1424 1425 // CStat: TODO: Group 1426 packer.pack(_cstat.kindId); // FKind 1427 packer.pack(_cstat._contentId); // Digest 1428 1429 // Bist 1430 immutable bistFlag = !_cstat.bist.empty; 1431 packer.pack(bistFlag); 1432 if (bistFlag) { packer.pack(_cstat.bist); } 1433 1434 // XGram 1435 immutable xgramFlag = !_cstat.xgram.empty; 1436 packer.pack(xgramFlag); 1437 if (xgramFlag) 1438 { 1439 /* debug dln("packing xgram. empty:", _cstat.xgram.empty); */ 1440 packer.pack(_cstat.xgram, 1441 _cstat._xgramDeepDenseness); 1442 } 1443 1444 /* auto this_ = (cast(RegFile)this); /+ TODO: Ugly! Is there another way? */ +/ 1445 /* const tags = this_.parent.gstats.ftags.getTags(this_); */ 1446 /* immutable tagsFlag = !tags.empty; */ 1447 /* packer.pack(tagsFlag); */ 1448 /* debug dln(`Packing tags `, tags, ` of `, this_.path); */ 1449 /* if (tagsFlag) { packer.pack(tags); } */ 1450 } 1451 1452 /** Unpack. */ 1453 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) @trusted 1454 { 1455 unpacker.unpack(name, size); // Name, Size 1456 1457 // Time 1458 long stdTime; 1459 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); /+ TODO: Functionize +/ 1460 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); /+ TODO: Functionize +/ 1461 1462 // CStat: TODO: Group 1463 unpacker.unpack(_cstat.kindId); // FKind 1464 if (_cstat.kindId.defined && 1465 _cstat.kindId !in parent.gstats.allFKinds.byId) 1466 { 1467 dln(`warning: kindId `, _cstat.kindId, ` not found for `, 1468 path, `, FKinds length `, parent.gstats.allFKinds.byIndex.length); 1469 _cstat.kindId.reset; // forget it 1470 } 1471 unpacker.unpack(_cstat._contentId); // Digest 1472 if (_cstat._contentId) 1473 { 1474 parent.gstats.filesByContentId[_cstat._contentId] ~= cast(NotNull!File)this; 1475 } 1476 1477 // Bist 1478 bool bistFlag; unpacker.unpack(bistFlag); 1479 if (bistFlag) 1480 { 1481 unpacker.unpack(_cstat.bist); 1482 } 1483 1484 // XGram 1485 bool xgramFlag; unpacker.unpack(xgramFlag); 1486 if (xgramFlag) 1487 { 1488 /* if (_cstat.xgram == null) { */ 1489 /* _cstat.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 1490 /* } */ 1491 /* unpacker.unpack(*_cstat.xgram); */ 1492 unpacker.unpack(_cstat.xgram, 1493 _cstat._xgramDeepDenseness); 1494 /* debug dln(`unpacked xgram. empty:`, _cstat.xgram.empty); */ 1495 } 1496 1497 // tags 1498 /* bool tagsFlag; unpacker.unpack(tagsFlag); */ 1499 /* if (tagsFlag) { */ 1500 /* string[] tags; */ 1501 /* unpacker.unpack(tags); */ 1502 /* } */ 1503 } 1504 1505 override void makeObselete() @trusted { _cstat.reset(); /* debug dln(`Reset CStat for `, path); */ } 1506 } 1507 1508 /** Returns: Read-Only Contents of `this` Regular File. */ 1509 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1510 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1511 immutable(ubyte[]) readOnlyContents(string file = __FILE__, int line = __LINE__)() @trusted 1512 { 1513 if (_mmfile is null) 1514 { 1515 if (size == 0) // munmap fails for empty files 1516 { 1517 static assert([] !is null); 1518 return []; // empty file 1519 } 1520 else 1521 { 1522 _mmfile = new MmFile(path, MmFile.Mode.read, 1523 mmfile_size, null, pageSize()); 1524 if (parent.gstats.showMMaps) 1525 { 1526 writeln(`Mapped `, path, ` of size `, size); 1527 } 1528 } 1529 } 1530 return cast(typeof(return))_mmfile[]; 1531 } 1532 1533 /** Returns: Read-Writable Contents of `this` Regular File. */ 1534 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1535 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1536 ubyte[] readWriteableContents() @trusted 1537 { 1538 if (!_mmfile) 1539 { 1540 _mmfile = new MmFile(path, MmFile.Mode.readWrite, 1541 mmfile_size, null, pageSize()); 1542 } 1543 return cast(typeof(return))_mmfile[]; 1544 } 1545 1546 /** If needed Free Allocated Contents of `this` Regular File. */ 1547 bool freeContents() 1548 { 1549 if (_mmfile) { 1550 delete _mmfile; _mmfile = null; return true; 1551 } 1552 else { return false; } 1553 } 1554 1555 import std.mmfile; 1556 private MmFile _mmfile = null; 1557 private CStat _cstat; // Statistics about the contents of this RegFile. 1558 } 1559 1560 /** Traits */ 1561 enum isFile(T) = (is(T == File) || is(T == NotNull!File)); 1562 enum isDir(T) = (is(T == Dir) || is(T == NotNull!Dir)); 1563 enum isSymlink(T) = (is(T == Symlink) || is(T == NotNull!Symlink)); 1564 enum isRegFile(T) = (is(T == RegFile) || is(T == NotNull!RegFile)); 1565 enum isSpecialFile(T) = (is(T == SpecFile) || is(T == NotNull!SpecFile)); 1566 enum isAnyFile(T) = (isFile!T || 1567 isDir!T || 1568 isSymlink!T || 1569 isRegFile!T || 1570 isSpecialFile!T); 1571 1572 /** Return true if T is a class representing File IO. */ 1573 enum isFileIO(T) = (isAnyFile!T || 1574 is(T == ioFile)); 1575 1576 /** Contents Statistics of a Regular File. */ 1577 struct CStat 1578 { 1579 void reset() @safe nothrow 1580 { 1581 kindId[] = 0; 1582 _contentId[] = 0; 1583 hitCount = 0; 1584 bist.reset(); 1585 xgram.reset(); 1586 _xgramDeepDenseness = 0; 1587 deallocate(); 1588 } 1589 1590 void deallocate(bool nullify = true) @trusted nothrow 1591 { 1592 kindId[] = 0; 1593 /* if (xgram != null) { */ 1594 /* import core.stdc.stdlib; */ 1595 /* free(xgram); */ 1596 /* if (nullify) { */ 1597 /* xgram = null; */ 1598 /* } */ 1599 /* } */ 1600 } 1601 1602 SHA1Digest kindId; // FKind Identifier/Fingerprint of this regular file. 1603 SHA1Digest _contentId; // Content Identifier/Fingerprint. 1604 1605 /** Boolean Single Bistogram over file contents. If 1606 binHist0[cast(ubyte)x] is set then this file contains byte x. Consumes 1607 32 bytes. */ 1608 Bist bist; /+ TODO: Put in separate slice std.allocator. +/ 1609 1610 /** Boolean Pair Bistogram (Digram) over file contents (higher-order statistics). 1611 If this RegFile contains a sequence of [byte0, bytes1], 1612 then bit at index byte0 + byte1 * 256 is set in xgram. 1613 */ 1614 XGram xgram; /+ TODO: Use slice std.allocator +/ 1615 private ulong _xgramDeepDenseness = 0; 1616 1617 uint64_t hitCount = 0; 1618 BitStatus bitStatus = BitStatus.unknown; 1619 } 1620 1621 import core.sys.posix.sys.types; 1622 1623 enum SymlinkFollowContext 1624 { 1625 none, // Follow no symlinks 1626 internal, // Follow only symlinks outside of scanned tree 1627 external, // Follow only symlinks inside of scanned tree 1628 all, // Follow all symlinks 1629 standard = external 1630 } 1631 1632 /** Global Scanner Statistics. */ 1633 class GStats 1634 { 1635 NotNull!File[][string] filesByName; // Potential File Name Duplicates 1636 NotNull!File[][ino_t] filesByInode; // Potential Link Duplicates 1637 NotNull!File[][SHA1Digest] filesByContentId; // File(s) (Duplicates) Indexed on Contents SHA1. 1638 NotNull!RegFile[][string] elfFilesBySymbol; // File(s) (Duplicates) Indexed on raw unmangled symbol. 1639 FileTags ftags; 1640 1641 Bytes64[NotNull!File] treeSizesByFile; // Tree sizes. 1642 size_t[NotNull!File] lineCountsByFile; // Line counts. 1643 1644 // VCS Directories 1645 DirKind[] vcDirKinds; 1646 DirKind[string] vcDirKindsMap; 1647 1648 // Skipped Directories 1649 DirKind[] skippedDirKinds; 1650 DirKind[string] skippedDirKindsMap; 1651 1652 FKinds txtFKinds = new FKinds; // Textual 1653 FKinds binFKinds = new FKinds; // Binary (Non-Textual) 1654 FKinds allFKinds = new FKinds; // All 1655 FKinds selFKinds = new FKinds; // User selected 1656 1657 void loadFileKinds() 1658 { 1659 txtFKinds ~= new FKind("SCons", ["SConstruct", "SConscript"], 1660 ["scons"], 1661 [], 0, [], [], 1662 defaultCommentDelims, 1663 pythonStringDelims, 1664 FileContent.buildSystemCode, FileKindDetection.equalsNameAndContents); // TOOD: Inherit Python 1665 1666 txtFKinds ~= new FKind("Makefile", ["GNUmakefile", "Makefile", "makefile"], 1667 ["mk", "mak", "makefile", "make", "gnumakefile"], [], 0, [], [], 1668 defaultCommentDelims, 1669 defaultStringDelims, 1670 FileContent.sourceCode, FileKindDetection.equalsName); 1671 txtFKinds ~= new FKind("Automakefile", ["Makefile.am", "makefile.am"], 1672 ["am"], [], 0, [], [], 1673 defaultCommentDelims, 1674 defaultStringDelims, 1675 FileContent.sourceCode); 1676 txtFKinds ~= new FKind("Autoconffile", ["configure.ac", "configure.in"], 1677 [], [], 0, [], [], 1678 defaultCommentDelims, 1679 defaultStringDelims, 1680 FileContent.sourceCode); 1681 txtFKinds ~= new FKind("Doxygen", ["Doxyfile"], 1682 ["doxygen"], [], 0, [], [], 1683 defaultCommentDelims, 1684 defaultStringDelims, 1685 FileContent.sourceCode); 1686 1687 txtFKinds ~= new FKind("Rake", ["Rakefile"],/+ TODO: inherit Ruby +/ 1688 ["mk", "makefile", "make", "gnumakefile"], [], 0, [], [], 1689 [Delim("#"), Delim("=begin", "=end")], 1690 defaultStringDelims, 1691 FileContent.sourceCode, FileKindDetection.equalsName); 1692 1693 txtFKinds ~= new FKind("HTML", [], ["htm", "html", "shtml", "xhtml"], [], 0, [], [], 1694 [Delim("<!--", "-->")], 1695 defaultStringDelims, 1696 FileContent.text, FileKindDetection.equalsContents); // markup text 1697 txtFKinds ~= new FKind("XML", [], ["xml", "dtd", "xsl", "xslt", "ent", ], [], 0, "<?xml", [], 1698 [Delim("<!--", "-->")], 1699 defaultStringDelims, 1700 FileContent.text, FileKindDetection.equalsContents); /+ TODO: markup text +/ 1701 txtFKinds ~= new FKind("YAML", [], ["yaml", "yml"], [], 0, [], [], 1702 defaultCommentDelims, 1703 defaultStringDelims, 1704 FileContent.text); /+ TODO: markup text +/ 1705 txtFKinds ~= new FKind("CSS", [], ["css"], [], 0, [], [], 1706 [Delim("/*", "*/")], 1707 defaultStringDelims, 1708 FileContent.text, FileKindDetection.equalsContents); 1709 1710 txtFKinds ~= new FKind("Audacity Project", [], ["aup"], [], 0, "<?xml", [], 1711 defaultCommentDelims, 1712 defaultStringDelims, 1713 FileContent.text, FileKindDetection.equalsNameAndContents); 1714 1715 txtFKinds ~= new FKind("Comma-separated values", [], ["csv"], [], 0, [], [], /+ TODO: decribe with symbolic +/ 1716 defaultCommentDelims, 1717 defaultStringDelims, 1718 FileContent.text, FileKindDetection.equalsNameAndContents); 1719 1720 txtFKinds ~= new FKind("Tab-separated values", [], ["tsv"], [], 0, [], [], /+ TODO: describe with symbolic +/ 1721 defaultCommentDelims, 1722 defaultStringDelims, 1723 FileContent.text, FileKindDetection.equalsNameAndContents); 1724 1725 static immutable keywordsC = [ 1726 "auto", "const", "double", "float", "int", "short", "struct", 1727 "unsigned", "break", "continue", "else", "for", "long", "signed", 1728 "switch", "void", "case", "default", "enum", "goto", "register", 1729 "sizeof", "typedef", "volatile", "char", "do", "extern", "if", 1730 "return", "static", "union", "while", 1731 ]; 1732 1733 /* See_Also: https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B */ 1734 auto opersCBasic = [ 1735 // Arithmetic 1736 Op("+", OpArity.binary, OpAssoc.LR, 6, "Add"), 1737 Op("-", OpArity.binary, OpAssoc.LR, 6, "Subtract"), 1738 Op("*", OpArity.binary, OpAssoc.LR, 5, "Multiply"), 1739 Op("/", OpArity.binary, OpAssoc.LR, 5, "Divide"), 1740 Op("%", OpArity.binary, OpAssoc.LR, 5, "Remainder/Moduls"), 1741 1742 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary plus"), 1743 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary minus"), 1744 1745 Op("++", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix increment"), 1746 Op("--", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix decrement"), 1747 1748 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix increment"), 1749 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix decrement"), 1750 1751 // Assignment Arithmetic (binary) 1752 Op("=", OpArity.binary, OpAssoc.RL, 16, "Assign"), 1753 Op("+=", OpArity.binary, OpAssoc.RL, 16, "Assignment by sum"), 1754 Op("-=", OpArity.binary, OpAssoc.RL, 16, "Assignment by difference"), 1755 Op("*=", OpArity.binary, OpAssoc.RL, 16, "Assignment by product"), 1756 Op("/=", OpArity.binary, OpAssoc.RL, 16, "Assignment by quotient"), 1757 Op("%=", OpArity.binary, OpAssoc.RL, 16, "Assignment by remainder"), 1758 1759 Op("&=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise AND"), 1760 Op("|=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise OR"), 1761 1762 Op("^=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise XOR"), 1763 Op("<<=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise left shift"), 1764 Op(">>=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise right shift"), 1765 1766 Op("==", OpArity.binary, OpAssoc.LR, 9, "Equal to"), 1767 Op("!=", OpArity.binary, OpAssoc.LR, 9, "Not equal to"), 1768 1769 Op("<", OpArity.binary, OpAssoc.LR, 8, "Less than"), 1770 Op(">", OpArity.binary, OpAssoc.LR, 8, "Greater than"), 1771 Op("<=", OpArity.binary, OpAssoc.LR, 8, "Less than or equal to"), 1772 Op(">=", OpArity.binary, OpAssoc.LR, 8, "Greater than or equal to"), 1773 1774 Op("&&", OpArity.binary, OpAssoc.LR, 13, "Logical AND"), /+ TODO: Convert to math in smallcaps AND +/ 1775 Op("||", OpArity.binary, OpAssoc.LR, 14, "Logical OR"), /+ TODO: Convert to math in smallcaps OR +/ 1776 1777 Op("!", OpArity.unaryPrefix, OpAssoc.LR, 3, "Logical NOT"), /+ TODO: Convert to math in smallcaps NOT +/ 1778 1779 Op("&", OpArity.binary, OpAssoc.LR, 10, "Bitwise AND"), 1780 Op("^", OpArity.binary, OpAssoc.LR, 11, "Bitwise XOR (exclusive or)"), 1781 Op("|", OpArity.binary, OpAssoc.LR, 12, "Bitwise OR"), 1782 1783 Op("<<", OpArity.binary, OpAssoc.LR, 7, "Bitwise left shift"), 1784 Op(">>", OpArity.binary, OpAssoc.LR, 7, "Bitwise right shift"), 1785 1786 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 3, "Bitwise NOT (One's Complement)"), 1787 Op(",", OpArity.binary, OpAssoc.LR, 18, "Comma"), 1788 Op("sizeof", OpArity.unaryPrefix, OpAssoc.LR, 3, "Size-of"), 1789 1790 Op("->", OpArity.binary, OpAssoc.LR, 2, "Element selection through pointer"), 1791 Op(".", OpArity.binary, OpAssoc.LR, 2, "Element selection by reference"), 1792 1793 ]; 1794 1795 /* See_Also: https://en.wikipedia.org/wiki/Iso646.h */ 1796 auto opersC_ISO646 = [ 1797 OpAlias("and", "&&"), 1798 OpAlias("or", "||"), 1799 OpAlias("and_eq", "&="), 1800 1801 OpAlias("bitand", "&"), 1802 OpAlias("bitor", "|"), 1803 1804 OpAlias("compl", "~"), 1805 OpAlias("not", "!"), 1806 OpAlias("not_eq", "!="), 1807 OpAlias("or_eq", "|="), 1808 OpAlias("xor", "^"), 1809 OpAlias("xor_eq", "^="), 1810 ]; 1811 1812 auto opersC = opersCBasic /* ~ opersC_ISO646 */; 1813 1814 auto kindC = new FKind("C", [], ["c", "h"], [], 0, [], 1815 keywordsC, 1816 cCommentDelims, 1817 defaultStringDelims, 1818 FileContent.sourceCode, 1819 FileKindDetection.equalsWhatsGiven, 1820 Lang.c); 1821 txtFKinds ~= kindC; 1822 kindC.operations ~= tuple(FOp.checkSyntax, `gcc -x c -fsyntax-only -c`); 1823 kindC.operations ~= tuple(FOp.checkSyntax, `clang -x c -fsyntax-only -c`); 1824 kindC.operations ~= tuple(FOp.preprocess, `cpp`); 1825 kindC.opers = opersC; 1826 1827 static immutable keywordsCxx = (keywordsC ~ ["asm", "dynamic_cast", "namespace", "reinterpret_cast", "try", 1828 "bool", "explicit", "new", "static_cast", "typeid", 1829 "catch", "false", "operator", "template", "typename", 1830 "class", "friend", "private", "this", "using", 1831 "const_cast", "inline", "public", "throw", "virtual", 1832 "delete", "mutable", "protected", "true", "wchar_t", 1833 // The following are not essential when 1834 // the standard ASCII character set is 1835 // being used, but they have been added 1836 // to provide more readable alternatives 1837 // for some of the C++ operators, and 1838 // also to facilitate programming with 1839 // character sets that lack characters 1840 // needed by C++. 1841 "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq", 1842 "and_eq", "bitor", "not", "or", "xor", ]).uniq.array; 1843 1844 auto opersCxx = opersC ~ [ 1845 Op("->*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1846 Op(".*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1847 Op("::", OpArity.binary, OpAssoc.none, 1, "Scope resolution"), 1848 Op("typeid", OpArity.unaryPrefix, OpAssoc.LR, 2, "Run-time type information (RTTI))"), 1849 //Op("alignof", OpArity.unaryPrefix, OpAssoc.LR, _, _), 1850 Op("new", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory allocation"), 1851 Op("delete", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1852 Op("delete[]", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1853 /* Op("noexcept", OpArity.unaryPrefix, OpAssoc.none, _, _), */ 1854 1855 Op("dynamic_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1856 Op("reinterpret_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1857 Op("static_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1858 Op("const_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1859 1860 Op("throw", OpArity.unaryPrefix, OpAssoc.LR, 17, "Throw operator"), 1861 /* Op("catch", OpArity.unaryPrefix, OpAssoc.LR, _, _) */ 1862 ]; 1863 1864 static immutable extsCxx = ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"]; 1865 auto kindCxx = new FKind("C++", [], extsCxx, [], 0, [], 1866 keywordsCxx, 1867 cCommentDelims, 1868 defaultStringDelims, 1869 FileContent.sourceCode, 1870 FileKindDetection.equalsWhatsGiven, 1871 Lang.cxx); 1872 kindCxx.operations ~= tuple(FOp.checkSyntax, `gcc -x c++ -fsyntax-only -c`); 1873 kindCxx.operations ~= tuple(FOp.checkSyntax, `clang -x c++ -fsyntax-only -c`); 1874 kindCxx.operations ~= tuple(FOp.preprocess, `cpp`); 1875 kindCxx.opers = opersCxx; 1876 txtFKinds ~= kindCxx; 1877 static immutable keywordsCxx11 = keywordsCxx ~ ["alignas", "alignof", 1878 "char16_t", "char32_t", 1879 "constexpr", 1880 "decltype", 1881 "override", "final", 1882 "noexcept", "nullptr", 1883 "auto", 1884 "thread_local", 1885 "static_assert", ]; 1886 /+ TODO: Define as subkind +/ 1887 /* txtFKinds ~= new FKind("C++11", [], ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"], [], 0, [], */ 1888 /* keywordsCxx11, */ 1889 /* [Delim("/\*", "*\/"), */ 1890 /* Delim("//")], */ 1891 /* defaultStringDelims, */ 1892 /* FileContent.sourceCode, */ 1893 /* FileKindDetection.equalsWhatsGiven); */ 1894 1895 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1896 static immutable opersCxxMicrosoft = ["__alignof"]; 1897 1898 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1899 static immutable keywordsCxxMicrosoft = (keywordsCxx ~ [/* __abstract 2 */ 1900 "__asm", 1901 "__assume", 1902 "__based", 1903 /* __box 2 */ 1904 "__cdecl", 1905 "__declspec", 1906 /* __delegate 2 */ 1907 "__event", 1908 "__except", 1909 "__fastcall", 1910 "__finally", 1911 "__forceinline", 1912 /* __gc 2 */ 1913 /* __hook 3 */ 1914 "__identifier", 1915 "__if_exists", 1916 "__if_not_exists", 1917 "__inline", 1918 "__int16", 1919 "__int32", 1920 "__int64", 1921 "__int8", 1922 "__interface", 1923 "__leave", 1924 "__m128", 1925 "__m128d", 1926 "__m128i", 1927 "__m64", 1928 "__multiple_inheritance", 1929 /* __nogc 2 */ 1930 "__noop", 1931 /* __pin 2 */ 1932 /* __property 2 */ 1933 "__raise", 1934 /* __sealed 2 */ 1935 "__single_inheritance", 1936 "__stdcall", 1937 "__super", 1938 "__thiscall", 1939 "__try", 1940 "__except", 1941 "__finally", 1942 /* __try_cast 2 */ 1943 "__unaligned", 1944 /* __unhook 3 */ 1945 "__uuidof", 1946 /* __value 2 */ 1947 "__virtual_inheritance", 1948 "__w64", 1949 "__wchar_t", 1950 "wchar_t", 1951 "abstract", 1952 "array", 1953 "auto", 1954 "bool", 1955 "break", 1956 "case", 1957 "catch", 1958 "char", 1959 "class", 1960 "const", 1961 "const_cast", 1962 "continue", 1963 "decltype", 1964 "default", 1965 "delegate", 1966 "delete", 1967 /* deprecated 1 */ 1968 /* dllexport 1 */ 1969 /* dllimport 1 */ 1970 "do", 1971 "double", 1972 "dynamic_cast", 1973 "else", 1974 "enum", 1975 "enum class", 1976 "enum struct", 1977 "event", 1978 "explicit", 1979 "extern", 1980 "false", 1981 "finally", 1982 "float", 1983 "for", 1984 "for each", 1985 "in", 1986 "friend", 1987 "friend_as", 1988 "gcnew", 1989 "generic", 1990 "goto", 1991 "if", 1992 "initonly", 1993 "inline", 1994 "int", 1995 "interface class", 1996 "interface struct", 1997 "interior_ptr", 1998 "literal", 1999 "long", 2000 "mutable", 2001 /* naked 1 */ 2002 "namespace", 2003 "new", 2004 "new", 2005 /* noinline 1 */ 2006 /* noreturn 1 */ 2007 /* nothrow 1 */ 2008 /* novtable 1 */ 2009 "nullptr", 2010 "operator", 2011 "private", 2012 "property", 2013 /* property 1 */ 2014 "protected", 2015 "public", 2016 "ref class", 2017 "ref struct", 2018 "register", 2019 "reinterpret_cast", 2020 "return", 2021 "safecast", 2022 "sealed", 2023 /* selectany 1 */ 2024 "short", 2025 "signed", 2026 "sizeof", 2027 "static", 2028 "static_assert", 2029 "static_cast", 2030 "struct", 2031 "switch", 2032 "template", 2033 "this", 2034 /* thread 1 */ 2035 "throw", 2036 "true", 2037 "try", 2038 "typedef", 2039 "typeid", 2040 "typeid", 2041 "typename", 2042 "union", 2043 "unsigned", 2044 "using" /* declaration */, 2045 "using" /* directive */, 2046 /* uuid 1 */ 2047 "value class", 2048 "value struct", 2049 "virtual", 2050 "void", 2051 "volatile", 2052 "while"]).uniq.array; 2053 2054 static immutable xattrCxxMicrosoft = []; 2055 2056 static immutable keywordsNewObjectiveC = ["id", 2057 "in", 2058 "out", // Returned by reference 2059 "inout", // Argument is used both to provide information and to get information back 2060 "bycopy", 2061 "byref", "oneway", "self", 2062 "super", "@interface", "@end", 2063 "@implementation", "@end", 2064 "@interface", "@end", 2065 "@implementation", "@end", 2066 "@protoco", "@end", "@class" ]; 2067 2068 static immutable keywordsObjectiveC = keywordsC ~ keywordsNewObjectiveC; 2069 txtFKinds ~= new FKind("Objective-C", [], ["m", "h"], [], 0, [], 2070 keywordsObjectiveC, 2071 cCommentDelims, 2072 defaultStringDelims, 2073 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven, 2074 Lang.objectiveC); 2075 2076 static immutable keywordsObjectiveCxx = keywordsCxx ~ keywordsNewObjectiveC; 2077 txtFKinds ~= new FKind("Objective-C++", [], ["mm", "h"], [], 0, [], 2078 keywordsObjectiveCxx, 2079 defaultCommentDelims, 2080 defaultStringDelims, 2081 FileContent.sourceCode, 2082 FileKindDetection.equalsWhatsGiven, 2083 Lang.objectiveCxx); 2084 2085 static immutable keywordsSwift = ["break", "class", "continue", "default", "do", "else", "for", "func", "if", "import", 2086 "in", "let", "return", "self", "struct", "super", "switch", "unowned", "var", "weak", "while", 2087 "mutating", "extension"]; 2088 auto opersOverflowSwift = opersC ~ [Op("&+"), Op("&-"), Op("&*"), Op("&/"), Op("&%")]; 2089 auto builtinsSwift = ["print", "println"]; 2090 auto kindSwift = new FKind("Swift", [], ["swift"], [], 0, [], 2091 keywordsSwift, 2092 cCommentDelims, 2093 defaultStringDelims, 2094 FileContent.sourceCode, 2095 FileKindDetection.equalsWhatsGiven, 2096 Lang.swift); 2097 kindSwift.builtins = builtinsSwift; 2098 kindSwift.opers = opersOverflowSwift; 2099 txtFKinds ~= kindSwift; 2100 2101 static immutable keywordsCSharp = ["if"]; /+ TODO: Add keywords +/ 2102 txtFKinds ~= new FKind("C#", [], ["cs"], [], 0, [], keywordsCSharp, 2103 cCommentDelims, 2104 defaultStringDelims, 2105 FileContent.sourceCode, 2106 FileKindDetection.equalsWhatsGiven, 2107 Lang.cSharp); 2108 2109 static immutable keywordsOCaml = ["and", "as", "assert", "begin", "class", 2110 "constraint", "do", "done", "downto", "else", 2111 "end", "exception", "external", "false", "for", 2112 "fun", "function", "functor", "if", "in", 2113 "include", "inherit", "inherit!", "initializer", 2114 "lazy", "let", "match", "method", "method!", 2115 "module", "mutable", "new", "object", "of", 2116 "open", "or", 2117 "private", "rec", "sig", "struct", "then", "to", 2118 "true", "try", "type", 2119 "val", "val!", "virtual", 2120 "when", "while", "with"]; 2121 txtFKinds ~= new FKind("OCaml", [], ["ocaml"], [], 0, [], keywordsOCaml, 2122 [Delim("(*", "*)")], 2123 defaultStringDelims, 2124 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2125 2126 txtFKinds ~= new FKind("Parrot", [], ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg", ], [], 0, [], keywordsOCaml, 2127 [Delim("#"), 2128 Delim("^=", /+ TODO: Needs beginning of line instead of ^ +/ 2129 "=cut")], 2130 defaultStringDelims, 2131 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2132 2133 static immutable keywordsProlog = []; 2134 txtFKinds ~= new FKind("Prolog", [], ["pl", "pro", "P"], [], 0, [], keywordsProlog, 2135 [], 2136 [], 2137 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2138 2139 auto opersD = [ 2140 // Arithmetic 2141 Op("+", OpArity.binary, OpAssoc.LR, 10*2, "Add"), 2142 Op("-", OpArity.binary, OpAssoc.LR, 10*2, "Subtract"), 2143 Op("~", OpArity.binary, OpAssoc.LR, 10*2, "Concatenate"), 2144 2145 Op("*", OpArity.binary, OpAssoc.LR, 11*2, "Multiply"), 2146 Op("/", OpArity.binary, OpAssoc.LR, 11*2, "Divide"), 2147 Op("%", OpArity.binary, OpAssoc.LR, 11*2, "Remainder/Moduls"), 2148 2149 Op("++", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix increment"), 2150 Op("--", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix decrement"), 2151 2152 Op("^^", OpArity.binary, OpAssoc.RL, 13*2, "Power"), 2153 2154 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix increment"), 2155 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix decrement"), 2156 Op("&", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Address off"), 2157 Op("*", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Pointer Dereference"), 2158 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Plus"), 2159 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Minus"), 2160 Op("!", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Logical NOT"), /+ TODO: Convert to math in smallcaps NOT +/ 2161 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 12*2, "Bitwise NOT (One's Complement)"), 2162 2163 // Bit shift 2164 Op("<<", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise left shift"), 2165 Op(">>", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise right shift"), 2166 2167 // Comparison 2168 Op("==", OpArity.binary, OpAssoc.LR, 6*2, "Equal to"), 2169 Op("!=", OpArity.binary, OpAssoc.LR, 6*2, "Not equal to"), 2170 Op("<", OpArity.binary, OpAssoc.LR, 6*2, "Less than"), 2171 Op(">", OpArity.binary, OpAssoc.LR, 6*2, "Greater than"), 2172 Op("<=", OpArity.binary, OpAssoc.LR, 6*2, "Less than or equal to"), 2173 Op(">=", OpArity.binary, OpAssoc.LR, 6*2, "Greater than or equal to"), 2174 Op("in", OpArity.binary, OpAssoc.LR, 6*2, "In"), 2175 Op("!in", OpArity.binary, OpAssoc.LR, 6*2, "Not In"), 2176 Op("is", OpArity.binary, OpAssoc.LR, 6*2, "Is"), 2177 Op("!is", OpArity.binary, OpAssoc.LR, 6*2, "Not Is"), 2178 2179 Op("&", OpArity.binary, OpAssoc.LR, 8*2, "Bitwise AND"), 2180 Op("^", OpArity.binary, OpAssoc.LR, 7*2, "Bitwise XOR (exclusive or)"), 2181 Op("|", OpArity.binary, OpAssoc.LR, 6*2, "Bitwise OR"), 2182 2183 Op("&&", OpArity.binary, OpAssoc.LR, 5*2, "Logical AND"), /+ TODO: Convert to math in smallcaps AND +/ 2184 Op("||", OpArity.binary, OpAssoc.LR, 4*2, "Logical OR"), /+ TODO: Convert to math in smallcaps OR +/ 2185 2186 // Assignment Arithmetic (binary) 2187 Op("=", OpArity.binary, OpAssoc.RL, 2*2, "Assign"), 2188 Op("+=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by sum"), 2189 Op("-=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by difference"), 2190 Op("*=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by product"), 2191 Op("/=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by quotient"), 2192 Op("%=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by remainder"), 2193 Op("&=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise AND"), 2194 Op("|=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise OR"), 2195 Op("^=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise XOR"), 2196 Op("<<=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise left shift"), 2197 Op(">>=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise right shift"), 2198 2199 Op(",", OpArity.binary, OpAssoc.LR, 1*2, "Comma"), 2200 Op("..", OpArity.binary, OpAssoc.LR, cast(int)(0*2), "Range separator"), 2201 ]; 2202 2203 enum interpretersForD = ["rdmd", 2204 "gdmd"]; 2205 auto magicForD = shebangLine(alt(lit("rdmd"), 2206 lit("gdmd"))); 2207 2208 static immutable keywordsD = [`@property`, `@safe`, `@trusted`, `@system`, `@disable`, `abstract`, `alias`, `align`, `asm`, `assert`, `auto`, `body`, `bool`, `break`, `byte`, `case`, `cast`, `catch`, 2209 `cdouble`, `cent`, `cfloat`, `char`, `class`, `const`, `continue`, `creal`, `dchar`, `debug`, `default`, `delegate`, `delete`, `deprecated`, 2210 `do`, `double`, `else`, `enum`, `export`, `extern`, `false`, `final`, `finally`, `float`, `for`, `foreach`, `foreach_reverse`, 2211 `function`, `goto`, `idouble`, `if`, `ifloat`, `immutable`, `import`, `in`, `inout`, `int`, `interface`, `invariant`, `ireal`, 2212 `is`, `lazy`, `long`, `macro`, `mixin`, `module`, `new`, `nothrow`, `null`, `out`, `override`, `package`, `pragma`, `private`, 2213 `protected`, `public`, `pure`, `real`, `ref`, `return`, `scope`, `shared`, `short`, `static`, `struct`, `super`, `switch`, 2214 `synchronized`, `template`, `this`, `throw`, `true`, `try`, `typedef`, `typeid`, `typeof`, `ubyte`, `ucent`, `uint`, `ulong`, 2215 `union`, `unittest`, `ushort`, `version`, `void`, `volatile`, `wchar`, `while`, `with`, `__gshared`, 2216 `__thread`, `__traits`, 2217 `string`, `wstring`, `dstring`, `size_t`, `hash_t`, `ptrdiff_t`, `equals_`]; // aliases 2218 2219 static immutable builtinsD = [`toString`, `toHash`, `opCmp`, `opEquals`, 2220 `opUnary`, `opBinary`, `opApply`, `opCall`, `opAssign`, `opIndexAssign`, `opSliceAssign`, `opOpAssign`, 2221 `opIndex`, `opSlice`, `opDispatch`, 2222 `toString`, `toHash`, `opCmp`, `opEquals`, `Monitor`, `factory`, `classinfo`, `vtbl`, `offset`, `getHash`, `equals`, `compare`, `tsize`, `swap`, `next`, `init`, `flags`, `offTi`, `destroy`, `postblit`, `toString`, `toHash`, 2223 `factory`, `classinfo`, `Throwable`, `Exception`, `Error`, `capacity`, `reserve`, `assumeSafeAppend`, `clear`, 2224 `ModuleInfo`, `ClassInfo`, `MemberInfo`, `TypeInfo`]; 2225 2226 static immutable propertiesD = [`sizeof`, `stringof`, `mangleof`, `nan`, `init`, `alignof`, `max`, `min`, `infinity`, `epsilon`, `mant_dig`, ``, 2227 `max_10_exp`, `max_exp`, `min_10_exp`, `min_exp`, `min_normal`, `re`, `im`]; 2228 2229 static immutable specialsD = [`__FILE__`, `__LINE__`, `__DATE__`, `__EOF__`, `__TIME__`, `__TIMESTAMP__`, `__VENDOR__`, `__VERSION__`, `#line`]; 2230 2231 auto kindDInterface = new FKind("D Interface", [], ["di"], 2232 magicForD, 0, 2233 [], 2234 keywordsD, 2235 dCommentDelims, 2236 defaultStringDelims, 2237 FileContent.sourceCode, 2238 FileKindDetection.equalsNameOrContents, 2239 Lang.d); 2240 kindDInterface.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2241 kindDInterface.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); /+ TODO: Include paths +/ 2242 txtFKinds ~= kindDInterface; 2243 2244 auto kindDDoc = new FKind("D Documentation", [], ["dd"], 2245 magicForD, 0, 2246 [], 2247 keywordsD, 2248 dCommentDelims, 2249 defaultStringDelims, 2250 FileContent.sourceCode, 2251 FileKindDetection.equalsNameOrContents); 2252 txtFKinds ~= kindDDoc; 2253 2254 auto kindD = new FKind("D", [], ["d", "di"], 2255 magicForD, 0, 2256 [], 2257 keywordsD, 2258 dCommentDelims, 2259 defaultStringDelims, 2260 FileContent.sourceCode, 2261 FileKindDetection.equalsNameOrContents, 2262 Lang.d); 2263 kindD.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2264 kindD.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); /+ TODO: Include paths +/ 2265 txtFKinds ~= kindD; 2266 2267 auto kindDi = new FKind("D Interface", [], ["di"], 2268 magicForD, 0, 2269 [], 2270 keywordsD, 2271 dCommentDelims, 2272 defaultStringDelims, 2273 FileContent.sourceCode, 2274 FileKindDetection.equalsNameOrContents, 2275 Lang.d); 2276 kindDi.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2277 kindDi.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); /+ TODO: Include paths +/ 2278 txtFKinds ~= kindDi; 2279 2280 static immutable keywordsRust = ["as", "box", "break", "continue", "crate", 2281 "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in", 2282 "let", "loop", "match", "mod", "mut", "priv", "proc", "pub", "ref", 2283 "return", "self", "static", "struct", "super", "true", "trait", 2284 "type", "unsafe", "use", "while"]; 2285 2286 auto kindRust = new FKind("Rust", [], ["rs"], 2287 [], 0, 2288 [], 2289 keywordsRust, 2290 cCommentDelims, 2291 defaultStringDelims, 2292 FileContent.sourceCode, 2293 FileKindDetection.equalsNameOrContents, 2294 Lang.rust); 2295 txtFKinds ~= kindRust; 2296 2297 static immutable keywordsFortran77 = ["if", "else"]; 2298 /+ TODO: Support .h files but require it to contain some Fortran-specific or be parseable. +/ 2299 auto kindFortan = new FKind("Fortran", [], ["f", "fortran", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], [], 0, [], keywordsFortran77, 2300 [Delim("^C")], /+ TODO: Need beginning of line instead ^. seq(bol(), alt(lit('C'), lit('c'))); /+ TODO: Add chars chs("cC"); +/ +/ 2301 defaultStringDelims, 2302 FileContent.sourceCode, 2303 FileKindDetection.equalsNameOrContents, 2304 Lang.fortran); 2305 kindFortan.operations ~= tuple(FOp.checkSyntax, `gcc -x fortran -fsyntax-only`); 2306 txtFKinds ~= kindFortan; 2307 2308 // Ada 2309 import nxt.ada_defs; 2310 static immutable keywordsAda83 = ada_defs.keywords83; 2311 static immutable keywordsAda95 = keywordsAda83 ~ ada_defs.keywordsNew95; 2312 static immutable keywordsAda2005 = keywordsAda95 ~ ada_defs.keywordsNew2005; 2313 static immutable keywordsAda2012 = keywordsAda2005 ~ ada_defs.keywordsNew2012; 2314 static immutable extsAda = ["ada", "adb", "ads"]; 2315 txtFKinds ~= new FKind("Ada 82", [], extsAda, [], 0, [], keywordsAda83, 2316 [Delim("--")], 2317 defaultStringDelims, 2318 FileContent.sourceCode); 2319 txtFKinds ~= new FKind("Ada 95", [], extsAda, [], 0, [], keywordsAda95, 2320 [Delim("--")], 2321 defaultStringDelims, 2322 FileContent.sourceCode); 2323 txtFKinds ~= new FKind("Ada 2005", [], extsAda, [], 0, [], keywordsAda2005, 2324 [Delim("--")], 2325 defaultStringDelims, 2326 FileContent.sourceCode); 2327 txtFKinds ~= new FKind("Ada 2012", [], extsAda, [], 0, [], keywordsAda2012, 2328 [Delim("--")], 2329 defaultStringDelims, 2330 FileContent.sourceCode); 2331 txtFKinds ~= new FKind("Ada", [], extsAda, [], 0, [], keywordsAda2012, 2332 [Delim("--")], 2333 defaultStringDelims, 2334 FileContent.sourceCode); 2335 2336 auto aliKind = new FKind("Ada Library File", [], ["ali"], [], 0, `V "GNAT Lib v`, [], 2337 [], // N/A 2338 defaultStringDelims, 2339 FileContent.fingerprint); /+ TODO: Parse version following magic tag? +/ 2340 aliKind.machineGenerated = true; 2341 txtFKinds ~= aliKind; 2342 2343 txtFKinds ~= new FKind("Pascal", [], ["pas", "pascal"], [], 0, [], [], 2344 [Delim("(*", "*)"),// Old-Style 2345 Delim("{", "}"),// Turbo Pascal 2346 Delim("//")],// Delphi 2347 defaultStringDelims, 2348 FileContent.sourceCode, FileKindDetection.equalsContents); 2349 txtFKinds ~= new FKind("Delphi", [], ["pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj"], 2350 [], 0, [], [], 2351 [Delim("//")], 2352 defaultStringDelims, 2353 FileContent.sourceCode, FileKindDetection.equalsContents); 2354 2355 txtFKinds ~= new FKind("Objective-C", [], ["m"], [], 0, [], [], 2356 cCommentDelims, 2357 defaultStringDelims, 2358 FileContent.sourceCode); 2359 2360 static immutable keywordsPython = ["and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return", 2361 "break", "else", "global", "not", "try", "class", "except", "if", "or", "while", 2362 "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print"]; 2363 2364 // Scripting 2365 2366 auto kindPython = new FKind("Python", [], ["py"], 2367 shebangLine(lit("python")), 0, [], 2368 keywordsPython, 2369 defaultCommentDelims, 2370 pythonStringDelims, 2371 FileContent.scriptCode); 2372 txtFKinds ~= kindPython; 2373 2374 txtFKinds ~= new FKind("Ruby", [], ["rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec", ], 2375 shebangLine(lit("ruby")), 0, 2376 [], [], 2377 [Delim("#"), Delim("=begin", "=end")], 2378 defaultStringDelims, 2379 FileContent.scriptCode); 2380 2381 txtFKinds ~= new FKind("Scala", [], ["scala", ], 2382 shebangLine(lit("scala")), 0, 2383 [], [], 2384 cCommentDelims, 2385 defaultStringDelims, 2386 FileContent.scriptCode); 2387 txtFKinds ~= new FKind("Scheme", [], ["scm", "ss"], 2388 [], 0, 2389 [], [], 2390 [Delim(";")], 2391 defaultStringDelims, 2392 FileContent.scriptCode); 2393 2394 txtFKinds ~= new FKind("Smalltalk", [], ["st"], [], 0, [], [], 2395 [Delim("\"", "\"")], 2396 defaultStringDelims, 2397 FileContent.sourceCode); 2398 2399 txtFKinds ~= new FKind("Perl", [], ["pl", "pm", "pm6", "pod", "t", "psgi", ], 2400 shebangLine(lit("perl")), 0, 2401 [], [], 2402 defaultCommentDelims, 2403 defaultStringDelims, 2404 FileContent.scriptCode); 2405 txtFKinds ~= new FKind("PHP", [], ["php", "phpt", "php3", "php4", "php5", "phtml", ], 2406 shebangLine(lit("php")), 0, 2407 [], [], 2408 defaultCommentDelims ~ cCommentDelims, 2409 defaultStringDelims, 2410 FileContent.scriptCode); 2411 txtFKinds ~= new FKind("Plone", [], ["pt", "cpt", "metadata", "cpy", "py", ], [], 0, [], [], 2412 defaultCommentDelims, 2413 defaultStringDelims, 2414 FileContent.scriptCode); 2415 2416 txtFKinds ~= new FKind("Shell", [], ["sh"], 2417 shebangLine(lit("sh")), 0, 2418 [], [], 2419 defaultCommentDelims, 2420 defaultStringDelims, 2421 FileContent.scriptCode); 2422 txtFKinds ~= new FKind("Bash", [], ["bash"], 2423 shebangLine(lit("bash")), 0, 2424 [], [], 2425 defaultCommentDelims, 2426 defaultStringDelims, 2427 FileContent.scriptCode); 2428 txtFKinds ~= new FKind("Zsh", [], ["zsh"], 2429 shebangLine(lit("zsh")), 0, 2430 [], [], 2431 defaultCommentDelims, 2432 defaultStringDelims, 2433 FileContent.scriptCode); 2434 2435 txtFKinds ~= new FKind("Batch", [], ["bat", "cmd"], [], 0, [], [], 2436 [Delim("REM")], 2437 defaultStringDelims, 2438 FileContent.scriptCode); 2439 2440 txtFKinds ~= new FKind("TCL", [], ["tcl", "itcl", "itk", ], [], 0, [], [], 2441 defaultCommentDelims, 2442 defaultStringDelims, 2443 FileContent.scriptCode); 2444 txtFKinds ~= new FKind("Tex", [], ["tex", "cls", "sty", ], [], 0, [], [], 2445 [Delim("%")], 2446 defaultStringDelims, 2447 FileContent.scriptCode); 2448 txtFKinds ~= new FKind("TT", [], ["tt", "tt2", "ttml", ], [], 0, [], [], 2449 defaultCommentDelims, 2450 defaultStringDelims, 2451 FileContent.scriptCode); 2452 txtFKinds ~= new FKind("Viz Basic", [], ["bas", "cls", "frm", "ctl", "vb", "resx", ], [], 0, [], [], 2453 [Delim("'")], 2454 defaultStringDelims, 2455 FileContent.scriptCode); 2456 2457 txtFKinds ~= new FKind("Verilog", [], ["v", "vh", "sv"], [], 0, [], [], 2458 cCommentDelims, 2459 defaultStringDelims, 2460 FileContent.scriptCode); 2461 txtFKinds ~= new FKind("VHDL", [], ["vhd", "vhdl"], [], 0, [], [], 2462 [Delim("--")], 2463 defaultStringDelims, 2464 FileContent.scriptCode); 2465 2466 txtFKinds ~= new FKind("Clojure", [], ["clj"], [], 0, [], [], 2467 [Delim(";")], 2468 defaultStringDelims, 2469 FileContent.sourceCode); 2470 txtFKinds ~= new FKind("Go", [], ["go"], [], 0, [], [], 2471 cCommentDelims, 2472 defaultStringDelims, 2473 FileContent.sourceCode); 2474 2475 auto kindJava = new FKind("Java", [], ["java", "properties"], [], 0, [], [], 2476 cCommentDelims, 2477 defaultStringDelims, 2478 FileContent.sourceCode); 2479 txtFKinds ~= kindJava; 2480 kindJava.operations ~= tuple(FOp.byteCompile, `javac`); 2481 2482 txtFKinds ~= new FKind("Groovy", [], ["groovy", "gtmpl", "gpp", "grunit"], [], 0, [], [], 2483 cCommentDelims, 2484 defaultStringDelims, 2485 FileContent.sourceCode); 2486 txtFKinds ~= new FKind("Haskell", [], ["hs", "lhs"], [], 0, [], [], 2487 [Delim("--}"), 2488 Delim("{-", "-}")], 2489 defaultStringDelims, 2490 FileContent.sourceCode); 2491 2492 static immutable keywordsJavascript = ["break", "case", "catch", "continue", "debugger", "default", "delete", 2493 "do", "else", "finally", "for", "function", "if", "in", "instanceof", 2494 "new", "return", "switch", "this", "throw", "try", "typeof", "var", 2495 "void", "while", "with" ]; 2496 txtFKinds ~= new FKind("JavaScript", [], ["js"], 2497 [], 0, [], 2498 keywordsJavascript, 2499 cCommentDelims, 2500 defaultStringDelims, 2501 FileContent.scriptCode); 2502 txtFKinds ~= new FKind("JavaScript Object Notation", 2503 [], ["json"], 2504 [], 0, [], [], 2505 [], // N/A 2506 defaultStringDelims, 2507 FileContent.sourceCode); 2508 2509 auto dubFKind = new FKind("DUB", 2510 ["dub.json"], ["json"], 2511 [], 0, [], [], 2512 [], // N/A 2513 defaultStringDelims, 2514 FileContent.scriptCode); 2515 txtFKinds ~= dubFKind; 2516 dubFKind.operations ~= tuple(FOp.build, `dub`); 2517 2518 /+ TODO: Inherit XML +/ 2519 txtFKinds ~= new FKind("JSP", [], ["jsp", "jspx", "jhtm", "jhtml"], [], 0, [], [], 2520 [Delim("<!--", "--%>"), // XML 2521 Delim("<%--", "--%>")], 2522 defaultStringDelims, 2523 FileContent.scriptCode); 2524 2525 txtFKinds ~= new FKind("ActionScript", [], ["as", "mxml"], [], 0, [], [], 2526 cCommentDelims, // N/A 2527 defaultStringDelims, 2528 FileContent.scriptCode); 2529 2530 txtFKinds ~= new FKind("LUA", [], ["lua"], [], 0, [], [], 2531 [Delim("--")], 2532 defaultStringDelims, 2533 FileContent.scriptCode); 2534 txtFKinds ~= new FKind("Mason", [], ["mas", "mhtml", "mpl", "mtxt"], [], 0, [], [], 2535 [], /+ TODO: Need symbolic +/ 2536 defaultStringDelims, 2537 FileContent.scriptCode); 2538 2539 txtFKinds ~= new FKind("CFMX", [], ["cfc", "cfm", "cfml"], [], 0, [], [], 2540 [], // N/A 2541 defaultStringDelims, 2542 FileContent.scriptCode); 2543 2544 // Simulation 2545 static immutable keywordsModelica = ["algorithm", "discrete", "false", "loop", "pure", 2546 "and", "each", "final", "model", "record", 2547 "annotation", "else", "flow", "not", "redeclare", 2548 "elseif", "for", "operator", "replaceable", 2549 "block", "elsewhen", "function", "or", "return", 2550 "break", "encapsulated", "if", "outer", "stream", 2551 "class", "end", "import", "output", "then", 2552 "connect", "enumeration", "impure", "package", "true", 2553 "connector", "equation", "in", "parameter", "type", 2554 "constant", "expandable", "initial", "partial", "when", 2555 "constrainedby", "extends", "inner", "protected", "while", 2556 "der", "external", "input", "public", "within"]; 2557 auto kindModelica = new FKind("Modelica", [], ["mo"], [], 0, [], 2558 keywordsModelica, 2559 cCommentDelims, 2560 defaultStringDelims, 2561 FileContent.sourceCode, 2562 FileKindDetection.equalsWhatsGiven, 2563 Lang.modelica); 2564 2565 // Numerical Computing 2566 2567 txtFKinds ~= new FKind("Matlab", [], ["m"], [], 0, [], [], 2568 [Delim("%{", "}%"), /+ TODO: Prio 1 +/ 2569 Delim("%")], /+ TODO: Prio 2 +/ 2570 defaultStringDelims, 2571 FileContent.sourceCode); 2572 auto kindOctave = new FKind("Octave", [], ["m"], [], 0, [], [], 2573 [Delim("%{", "}%"), /+ TODO: Prio 1 +/ 2574 Delim("%"), 2575 Delim("#")], 2576 defaultStringDelims, 2577 FileContent.sourceCode); 2578 txtFKinds ~= kindOctave; 2579 kindOctave.operations ~= tuple(FOp.byteCompile, `octave`); 2580 2581 txtFKinds ~= new FKind("Julia", [], ["jl"], [], 0, [], [], 2582 defaultCommentDelims, 2583 defaultStringDelims, 2584 FileContent.sourceCode); // ((:execute "julia") (:evaluate "julia -e")) 2585 2586 txtFKinds ~= new FKind("Erlang", [], ["erl", "hrl"], [], 0, [], [], 2587 [Delim("%")], 2588 defaultStringDelims, 2589 FileContent.sourceCode); 2590 2591 auto magicForElisp = seq(shebangLine(lit("emacs")), 2592 ws(), 2593 lit("--script")); 2594 auto kindElisp = new FKind("Emacs-Lisp", [], 2595 ["el", "lisp"], 2596 magicForElisp, 0, // Script Execution 2597 [], [], 2598 [Delim(";")], 2599 defaultStringDelims, 2600 FileContent.sourceCode); 2601 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs -batch -f batch-byte-compile`); 2602 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs --script`); 2603 /* kindELisp.moduleName = "(provide 'MODULE_NAME)"; */ 2604 /* kindELisp.moduleImport = "(require 'MODULE_NAME)"; */ 2605 txtFKinds ~= kindElisp; 2606 2607 txtFKinds ~= new FKind("Lisp", [], ["lisp", "lsp"], [], 0, [], [], 2608 [Delim(";")], 2609 defaultStringDelims, 2610 FileContent.sourceCode); 2611 txtFKinds ~= new FKind("PostScript", [], ["ps", "postscript"], [], 0, "%!", [], 2612 [Delim("%")], 2613 defaultStringDelims, 2614 FileContent.sourceCode); 2615 2616 txtFKinds ~= new FKind("CMake", [], ["cmake"], [], 0, [], [], 2617 defaultCommentDelims, 2618 defaultStringDelims, 2619 FileContent.sourceCode); 2620 2621 // http://stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary 2622 txtFKinds ~= new FKind("Pure ASCII", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2623 [], // N/A 2624 defaultStringDelims, 2625 FileContent.textASCII); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 2626 txtFKinds ~= new FKind("8-Bit Text", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2627 [], // N/A 2628 defaultStringDelims, 2629 FileContent.text8Bit); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 or 128–255 2630 2631 txtFKinds ~= new FKind("Assembler", [], ["asm", "s"], [], 0, [], [], 2632 [], // N/A 2633 defaultStringDelims, 2634 FileContent.sourceCode); 2635 2636 // https://en.wikipedia.org/wiki/Diff 2637 auto diffKind = new FKind("Diff", [], ["diff", "patch"], 2638 "diff", 0, 2639 [], [], 2640 [], // N/A 2641 defaultStringDelims, 2642 FileContent.text); 2643 txtFKinds ~= diffKind; 2644 diffKind.wikip = "https://en.wikipedia.org/wiki/Diff"; 2645 2646 auto pemCertKind = new FKind(`PEM certificate`, [], [`cert`], 2647 `-----BEGIN CERTIFICATE-----`, 0, 2648 [], [], 2649 [], // N/A 2650 [], // N/A 2651 FileContent.text, 2652 FileKindDetection.equalsContents); 2653 txtFKinds ~= pemCertKind; 2654 2655 auto pemCertReqKind = new FKind(`PEM certificate request`, [], [`cert`], 2656 `-----BEGIN CERTIFICATE REQ`, 0, 2657 [], [], 2658 [], // N/A 2659 [], // N/A 2660 FileContent.text, 2661 FileKindDetection.equalsContents); 2662 txtFKinds ~= pemCertReqKind; 2663 2664 auto pemRSAPrivateKeyKind = new FKind(`PEM RSA private key`, [], [`cert`], 2665 `-----BEGIN RSA PRIVATE`, 0, 2666 [], [], 2667 [], // N/A 2668 [], // N/A 2669 FileContent.text, 2670 FileKindDetection.equalsContents); 2671 txtFKinds ~= pemRSAPrivateKeyKind; 2672 2673 auto pemDSAPrivateKeyKind = new FKind(`PEM DSA private key`, [], [`cert`], 2674 `-----BEGIN DSA PRIVATE`, 0, 2675 [], [], 2676 [], // N/A 2677 [], // N/A 2678 FileContent.text, 2679 FileKindDetection.equalsContents); 2680 txtFKinds ~= pemDSAPrivateKeyKind; 2681 2682 auto pemECPrivateKeyKind = new FKind(`PEM EC private key`, [], [`cert`], 2683 `-----BEGIN EC PRIVATE`, 0, 2684 [], [], 2685 [], // N/A 2686 [], // N/A 2687 FileContent.text, 2688 FileKindDetection.equalsContents); 2689 txtFKinds ~= pemECPrivateKeyKind; 2690 2691 // Binaries 2692 2693 static immutable extsELF = ["o", "so", "ko", "os", "out", "bin", "x", "elf", "axf", "prx", "puff", "none"]; // ELF file extensions 2694 2695 auto elfKind = new FKind("ELF", 2696 [], extsELF, x"7F 45 4C 46", 0, [], [], 2697 [], // N/A 2698 [], // N/A 2699 FileContent.machineCode, 2700 FileKindDetection.equalsContents); 2701 elfKind.wikip = "https://en.wikipedia.org/wiki/Executable_and_Linkable_Format"; 2702 binFKinds ~= elfKind; 2703 /* auto extsExeELF = ["out", "bin", "x", "elf", ]; // ELF file extensions */ 2704 /* auto elfExeKind = new FKind("ELF executable", [], extsExeELF, [0x2, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2705 /* auto elfSOKind = new FKind("ELF shared object", [], ["so", "ko"], [0x3, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2706 /* auto elfCoreKind = new FKind("ELF core file", [], ["core"], [0x4, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2707 /* binFKinds ~= elfExeKind; */ 2708 /* elfKind.subKinds ~= elfSOKind; */ 2709 /* elfKind.subKinds ~= elfCoreKind; */ 2710 /* elfKind.subKinds ~= elfKind; */ 2711 2712 /+ TODO: Specialize to not steal results from file's magics. +/ 2713 auto linuxFirmwareKind = new FKind("Linux Firmware", 2714 [], ["bin", "ucode", "dat", "sbcf", "fw"], [], 0, [], [], 2715 [], // N/A 2716 [], // N/A 2717 FileContent.binaryUnknown, 2718 FileKindDetection.equalsParentPathDirsAndName); 2719 linuxFirmwareKind.parentPathDirs = ["lib", "firmware"]; 2720 binFKinds ~= linuxFirmwareKind; 2721 2722 /+ TODO: Specialize to not steal results from file's magics. +/ 2723 auto linuxHwDbKind = new FKind("Linux Hardware Database Index", 2724 "hwdb.bin", ["bin"], "KSLPHHRH", 0, [], [], 2725 [], // N/A 2726 [], // N/A 2727 FileContent.binaryUnknown, 2728 FileKindDetection.equalsNameAndContents); 2729 binFKinds ~= linuxHwDbKind; 2730 2731 // Executables 2732 binFKinds ~= new FKind("Mach-O", [], ["o"], x"CE FA ED FE", 0, [], [], 2733 [], // N/A 2734 [], // N/A 2735 FileContent.machineCode, FileKindDetection.equalsContents); 2736 2737 binFKinds ~= new FKind("modules.symbols.bin", [], ["bin"], 2738 cast(ubyte[])[0xB0, 0x07, 0xF4, 0x57, 0x00, 0x02, 0x00, 0x01, 0x20], 0, [], [], 2739 [], // N/A 2740 [], // N/A 2741 FileContent.binaryUnknown, FileKindDetection.equalsContents); 2742 2743 auto kindCOFF = new FKind("COFF/i386/32", [], ["o"], x"4C 01", 0, [], [], 2744 [], // N/A 2745 [], // N/A 2746 FileContent.machineCode, FileKindDetection.equalsContents); 2747 kindCOFF.description = "Common Object File Format"; 2748 binFKinds ~= kindCOFF; 2749 2750 auto kindPECOFF = new FKind("PE/COFF", [], ["cpl", "exe", "dll", "ocx", "sys", "scr", "drv", "obj"], 2751 "PE\0\0", 0x60, // And ("MZ") at offset 0x0 2752 [], [], 2753 [], // N/A 2754 [], // N/A 2755 FileContent.machineCode, FileKindDetection.equalsContents); 2756 kindPECOFF.description = "COFF Portable Executable"; 2757 binFKinds ~= kindPECOFF; 2758 2759 auto kindDOSMZ = new FKind("DOS-MZ", [], ["exe", "dll"], "MZ", 0, [], [], 2760 [], // N/A 2761 [], // N/A 2762 FileContent.machineCode); 2763 kindDOSMZ.description = "MS-DOS, OS/2 or MS Windows executable"; 2764 binFKinds ~= kindDOSMZ; 2765 2766 // Caches 2767 binFKinds ~= new FKind("ld.so.cache", [], ["cache"], "ld.so-", 0, [], [], 2768 [], // N/A 2769 [], // N/A 2770 FileContent.binaryCache); 2771 2772 // Profile Data 2773 binFKinds ~= new FKind("perf benchmark data", [], ["data"], "PERFILE2h", 0, [], [], 2774 [], // N/A 2775 [], // N/A 2776 FileContent.performanceBenchmark); 2777 2778 // Images 2779 binFKinds ~= new FKind("GIF87a", [], ["gif"], "GIF87a", 0, [], [], 2780 [], // N/A 2781 [], // N/A 2782 FileContent.image); 2783 binFKinds ~= new FKind("GIF89a", [], ["gif"], "GIF89a", 0, [], [], 2784 [], // N/A 2785 [], // N/A 2786 FileContent.image); 2787 auto extJPEG = ["jpeg", "jpg", "j2k", "jpeg2000"]; 2788 binFKinds ~= new FKind("JPEG", [], extJPEG, x"FF D8", 0, [], [], 2789 [], // N/A 2790 [], // N/A 2791 FileContent.image); /+ TODO: Support ends with [0xFF, 0xD9] +/ 2792 binFKinds ~= new FKind("JPEG/JFIF", [], extJPEG, x"FF D8", 0, [], [], 2793 [], // N/A 2794 [], // N/A 2795 FileContent.image); /+ TODO: Support ends with ['J','F','I','F', 0x00] +/ 2796 binFKinds ~= new FKind("JPEG/Exif", [], extJPEG, x"FF D8", 0, [], [], 2797 [], // N/A 2798 [], // N/A 2799 FileContent.image); /+ TODO: Support contains ['E','x','i','f', 0x00] followed by metadata +/ 2800 2801 binFKinds ~= new FKind("Pack200-Compressed Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 2802 [], // N/A 2803 [], // N/A 2804 FileContent.machineCode); 2805 2806 binFKinds ~= new FKind("JRun Server Application", [], ["jsa"], 2807 cast(ubyte[])[0xa2,0xab,0x0b,0xf0, 2808 0x01,0x00,0x00,0x00, 2809 0x00,0x00,0x20,0x00], 0, [], [], 2810 [], // N/A 2811 [], // N/A 2812 FileContent.machineCode); 2813 2814 binFKinds ~= new FKind("PNG", [], ["png"], 2815 cast(ubyte[])[137, 80, 78, 71, 13, 10, 26, 10], 0, [], [], 2816 [], // N/A 2817 [], // N/A 2818 FileContent.image); 2819 2820 auto icnsKind = new FKind("Apple Icon Image", [], ["icns"], 2821 "icns", 0, [], [], 2822 [], // N/A 2823 [], // N/A 2824 FileContent.imageIcon); 2825 icnsKind.wikip = "https://en.wikipedia.org/wiki/Apple_Icon_Image_format"; 2826 binFKinds ~= icnsKind; 2827 /+ TODO: read with http://icns.sourceforge.net/ +/ 2828 2829 auto kindPDF = new FKind("PDF", [], ["pdf"], "%PDF", 0, [], [], 2830 [], // N/A 2831 [], // N/A 2832 FileContent.document); 2833 kindPDF.description = "Portable Document Format"; 2834 binFKinds ~= kindPDF; 2835 2836 auto kindMarkdownFmt = new FKind("Markdown", [], ["md", "markdown"], 2837 [], 0, 2838 [], [], 2839 [], // N/A 2840 defaultStringDelims, 2841 FileContent.binaryCache); 2842 kindMarkdownFmt.wikip = "https://en.wikipedia.org/wiki/Markdown"; 2843 binFKinds ~= kindMarkdownFmt; 2844 2845 auto kindAsciiDocFmt = new FKind("AsciiDoc", [], ["ad", "adoc", "asciidoc"], 2846 [], 0, 2847 [], [], 2848 [], // N/A 2849 defaultStringDelims, 2850 FileContent.binaryCache); 2851 binFKinds ~= kindAsciiDocFmt; 2852 2853 auto kindLatexPDFFmt = new FKind("LaTeX PDF Format", [], ["fmt"], 2854 cast(ubyte[])['W','2','T','X', 2855 0x00,0x00,0x00,0x08, 2856 0x70,0x64,0x66,0x74, 2857 0x65,0x78], 0, [], [], 2858 [], // N/A 2859 defaultStringDelims, 2860 FileContent.binaryCache); 2861 binFKinds ~= kindLatexPDFFmt; 2862 2863 binFKinds ~= new FKind("Microsoft Office Document", [], ["doc", "docx", "xls", "ppt"], x"D0 CF 11 E0", 0, [], [], 2864 [], // N/A 2865 defaultStringDelims, 2866 FileContent.document); 2867 2868 // Fonts 2869 2870 auto kindTTF = new FKind("TrueType Font", [], ["ttf"], x"00 01 00 00 00", 0, [], [], 2871 [], // N/A 2872 defaultStringDelims, 2873 FileContent.font); 2874 binFKinds ~= kindTTF; 2875 2876 auto kindTTCF = new FKind("TrueType/OpenType Font Collection", [], ["ttc"], "ttcf", 0, [], [], 2877 [], // N/A 2878 defaultStringDelims, 2879 FileContent.font); 2880 binFKinds ~= kindTTCF; 2881 2882 auto kindWOFF = new FKind("Web Open Font", [], ["woff"], "wOFF", 0, [], [], 2883 [], // N/A 2884 defaultStringDelims, 2885 FileContent.font); /+ TODO: container for kindSFNT +/ 2886 binFKinds ~= kindWOFF; 2887 2888 auto kindSFNT = new FKind("Spline Font", [], ["sfnt"], "sfnt", 0, [], [], 2889 [], // N/A 2890 defaultStringDelims, 2891 FileContent.font); /+ TODO: container for Sfnt +/ 2892 binFKinds ~= kindSFNT; 2893 2894 // Audio 2895 2896 binFKinds ~= new FKind("MIDI", [], ["mid", "midi"], "MThd", 0, [], [], 2897 [], // N/A 2898 defaultStringDelims, 2899 FileContent.audio, FileKindDetection.equalsNameAndContents); 2900 2901 // Au 2902 auto auKind = new FKind("Au", [], ["au", "snd"], ".snd", 0, [], [], 2903 [], // N/A 2904 defaultStringDelims, 2905 FileContent.audio, FileKindDetection.equalsNameAndContents); 2906 auKind.wikip = "https://en.wikipedia.org/wiki/Au_file_format"; 2907 binFKinds ~= auKind; 2908 2909 binFKinds ~= new FKind("Ogg", [], ["ogg", "oga", "ogv"], 2910 cast(ubyte[])[0x4F,0x67,0x67,0x53, 2911 0x00,0x02,0x00,0x00, 2912 0x00,0x00,0x00,0x00, 2913 0x00, 0x00], 0, [], [], 2914 [], // N/A 2915 defaultStringDelims, 2916 FileContent.media); 2917 2918 /+ TODO: Support RIFF....WAVEfmt using symbolic seq(lit("RIFF"), any(4), lit("WAVEfmt")) +/ 2919 binFKinds ~= new FKind("WAV", [], ["wav", "wave"], "RIFF", 0, [], [], 2920 [], // N/A 2921 defaultStringDelims, 2922 FileContent.audio, FileKindDetection.equalsContents); 2923 2924 // Archives 2925 2926 auto kindBSDAr = new FKind("BSD Archive", [], ["a", "ar"], "!<arch>\n", 0, [], [], 2927 [], // N/A 2928 defaultStringDelims, 2929 FileContent.archive, FileKindDetection.equalsContents); 2930 kindBSDAr.description = "BSD 4.4 and Mac OSX Archive"; 2931 binFKinds ~= kindBSDAr; 2932 2933 binFKinds ~= new FKind("GNU tar Archive", [], ["tar"], "ustar\040\040\0", 257, [], [], 2934 [], // N/A 2935 defaultStringDelims, 2936 FileContent.archive, FileKindDetection.equalsContents); /+ TODO: Specialized Derivation of "POSIX tar Archive" +/ 2937 binFKinds ~= new FKind("POSIX tar Archive", [], ["tar"], "ustar\0", 257, [], [], 2938 [], // N/A 2939 defaultStringDelims, 2940 FileContent.archive, FileKindDetection.equalsContents); 2941 2942 binFKinds ~= new FKind("pkZip Archive", [], ["zip", "jar", "pptx", "docx", "xlsx"], "PK\003\004", 0, [], [], 2943 [], // N/A 2944 defaultStringDelims, 2945 FileContent.archive, FileKindDetection.equalsContents); 2946 binFKinds ~= new FKind("pkZip Archive (empty)", [], ["zip", "jar"], "PK\005\006", 0, [], [], 2947 [], // N/A 2948 defaultStringDelims, 2949 FileContent.archive, FileKindDetection.equalsContents); 2950 2951 binFKinds ~= new FKind("PAK file", [], ["pak"], cast(ubyte[])[0x40, 0x00, 0x00, 0x00, 2952 0x4a, 0x12, 0x00, 0x00, 2953 0x01, 0x2d, 0x23, 0xcb, 2954 0x6d, 0x00, 0x00, 0x2f], 0, [], [], 2955 [], // N/A 2956 defaultStringDelims, 2957 FileContent.spellCheckWordList, 2958 FileKindDetection.equalsNameAndContents); 2959 2960 binFKinds ~= new FKind("LZW-Compressed", [], ["z", "tar.z"], x"1F 9D", 0, [], [], 2961 [], // N/A 2962 defaultStringDelims, 2963 FileContent.compressed); 2964 binFKinds ~= new FKind("LZH-Compressed", [], ["z", "tar.z"], x"1F A0", 0, [], [], 2965 [], // N/A 2966 defaultStringDelims, 2967 FileContent.compressed); 2968 2969 binFKinds ~= new FKind("CompressedZ", [], ["z"], "\037\235", 0, [], [], 2970 [], // N/A 2971 defaultStringDelims, 2972 FileContent.compressed); 2973 binFKinds ~= new FKind("GNU-Zip (gzip)", [], ["tgz", "gz", "gzip", "dz"], "\037\213", 0, [], [], 2974 [], // N/A 2975 defaultStringDelims, 2976 FileContent.compressed); 2977 binFKinds ~= new FKind("BZip", [], ["bz2", "bz", "tbz2", "bzip2"], "BZh", 0, [], [], 2978 [], // N/A 2979 defaultStringDelims, 2980 FileContent.compressed); 2981 binFKinds ~= new FKind("XZ/7-Zip", [], ["xz", "txz", "7z", "t7z", "lzma", "tlzma", "lz", "tlz"], 2982 cast(ubyte[])[0xFD, '7', 'z', 'X', 'Z', 0x00], 0, [], [], 2983 [], // N/A 2984 defaultStringDelims, 2985 FileContent.compressed); 2986 binFKinds ~= new FKind("LZX", [], ["lzx"], "LZX", 0, [], [], 2987 [], // N/A 2988 defaultStringDelims, 2989 FileContent.compressed); 2990 binFKinds ~= new FKind("SZip", [], ["szip"], "SZ\x0a\4", 0, [], [], 2991 [], // N/A 2992 defaultStringDelims, 2993 FileContent.compressed); 2994 2995 binFKinds ~= new FKind("Git Bundle", [], ["bundle"], "# v2 git bundle", 0, [], [], 2996 [], // N/A 2997 defaultStringDelims, 2998 FileContent.versionControl); 2999 3000 binFKinds ~= new FKind("Emacs-Lisp Bytes Code", [], ["elc"], ";ELC\27\0\0\0", 0, [], [], 3001 [], // N/A 3002 defaultStringDelims, 3003 FileContent.byteCode, FileKindDetection.equalsContents); 3004 binFKinds ~= new FKind("Python Bytes Code", [], ["pyc"], x"0D 0A", 2, [], [], 3005 [], // N/A 3006 defaultStringDelims, 3007 FileContent.byteCode, FileKindDetection.equalsNameAndContents); /+ TODO: Handle versions at src[0..2] +/ 3008 3009 binFKinds ~= new FKind("Zshell Wordcode", [], ["zwc"], x"07 06 05 04", 0, [], [], 3010 [], // N/A 3011 defaultStringDelims, 3012 FileContent.byteCode); 3013 3014 binFKinds ~= new FKind("Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 3015 [], // N/A 3016 defaultStringDelims, 3017 FileContent.byteCode, FileKindDetection.equalsContents); 3018 binFKinds ~= new FKind("Java KeyStore", [], [], x"FE ED FE ED", 0, [], [], 3019 [], // N/A 3020 defaultStringDelims, 3021 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3022 binFKinds ~= new FKind("Java JCE KeyStore", [], [], x"CE CE CE CE", 0, [], [], 3023 [], // N/A 3024 defaultStringDelims, 3025 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3026 3027 binFKinds ~= new FKind("LLVM Bitcode", [], ["bc"], "BC", 0, [], [], 3028 [], // N/A 3029 defaultStringDelims, 3030 FileContent.byteCode, FileKindDetection.equalsNameAndContents); 3031 3032 binFKinds ~= new FKind("MATLAB MAT", [], ["mat"], "MATLAB 5.0 MAT-file", 0, [], [], 3033 [], // N/A 3034 defaultStringDelims, 3035 FileContent.numericalData, FileKindDetection.equalsContents); 3036 3037 auto hdf4Kind = new FKind("HDF4", [], ["hdf", "h4", "hdf4", "he4"], x"0E 03 13 01", 0, [], [], 3038 [], // N/A 3039 defaultStringDelims, 3040 FileContent.numericalData); 3041 binFKinds ~= hdf4Kind; 3042 hdf4Kind.description = "Hierarchical Data Format version 4"; 3043 3044 auto hdf5Kind = new FKind("HDF5", "Hierarchical Data Format version 5", ["hdf", "h5", "hdf5", "he5"], x"89 48 44 46 0D 0A 1A 0A", 0, [], [], 3045 [], // N/A 3046 defaultStringDelims, 3047 FileContent.numericalData); 3048 binFKinds ~= hdf5Kind; 3049 hdf5Kind.description = "Hierarchical Data Format version 5"; 3050 3051 auto numpyKind = new FKind("NUMPY", "NUMPY", ["npy", "numpy"], x"93 4E 55 4D 50 59", 0, [], [], 3052 [], // N/A 3053 defaultStringDelims, 3054 FileContent.numericalData); 3055 binFKinds ~= numpyKind; 3056 3057 binFKinds ~= new FKind("GNU GLOBAL Database", ["GTAGS", "GRTAGS", "GPATH", "GSYMS"], [], "b1\5\0", 0, [], [], 3058 [], // N/A 3059 defaultStringDelims, 3060 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3061 3062 // SQLite 3063 static immutable extsSQLite = ["sql", "sqlite", "sqlite3"]; 3064 binFKinds ~= new FKind("MySQL table definition file", [], extsSQLite, x"FE 01", 0, [], [], 3065 [], // N/A 3066 defaultStringDelims, 3067 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3068 binFKinds ~= new FKind("MySQL MyISAM index file", [], extsSQLite, x"FE FE 07", 0, [], [], 3069 [], // N/A 3070 defaultStringDelims, 3071 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3072 binFKinds ~= new FKind("MySQL MyISAM compressed data file", [], extsSQLite, x"FE FE 08", 0, [], [], 3073 [], // N/A 3074 defaultStringDelims, 3075 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3076 binFKinds ~= new FKind("MySQL Maria index file", [], extsSQLite, x"FF FF FF", 0, [], [], 3077 [], // N/A 3078 defaultStringDelims, 3079 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3080 binFKinds ~= new FKind("MySQL Maria compressed data file", [], extsSQLite, x"FF FF FF", 0, [], [], 3081 [], // N/A 3082 defaultStringDelims, 3083 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3084 binFKinds ~= new FKind("SQLite format 3", [], extsSQLite , "SQLite format 3", 0, [], [], 3085 [], // N/A 3086 defaultStringDelims, 3087 FileContent.tagsDatabase, FileKindDetection.equalsContents); /+ TODO: Why is this detected at 49:th try? +/ 3088 3089 binFKinds ~= new FKind("Vim swap", [], ["swo"], [], 0, "b0VIM ", [], 3090 [], // N/A 3091 defaultStringDelims, 3092 FileContent.binaryCache); 3093 3094 binFKinds ~= new FKind("PCH", "(GCC) Precompiled header", ["pch", "gpch"], "gpch", 0, [], [], 3095 [], // N/A 3096 defaultStringDelims, 3097 FileContent.cache); 3098 3099 binFKinds ~= new FKind("Firmware", [], ["fw"], cast(ubyte[])[], 0, [], [], 3100 [], // N/A 3101 defaultStringDelims, 3102 FileContent.cache, FileKindDetection.equalsName); /+ TODO: Add check for binary contents and that some parenting directory is named "firmware" +/ 3103 3104 binFKinds ~= new FKind("LibreOffice or OpenOffice RDB", [], ["rdb"], 3105 cast(ubyte[])[0x43,0x53,0x4d,0x48, 3106 0x4a,0x2d,0xd0,0x26, 3107 0x00,0x02,0x00,0x00, 3108 0x00,0x02,0x00,0x02], 0, [], [], 3109 [], // N/A 3110 defaultStringDelims, 3111 FileContent.database, FileKindDetection.equalsName); /+ TODO: Add check for binary contents and that some parenting directory is named "firmware" +/ 3112 3113 binFKinds ~= new FKind("sconsign", [], ["sconsign", "sconsign.dblite", "dblite"], x"7d 71 01 28", 0, [], [], 3114 [], // N/A 3115 defaultStringDelims, 3116 FileContent.cache, FileKindDetection.equalsNameAndContents); 3117 3118 binFKinds ~= new FKind("GnuPG (GPG) key public ring", [], ["gpg"], x"99 01", 0, [], [], 3119 [], // N/A 3120 defaultStringDelims, 3121 FileContent.binary, FileKindDetection.equalsNameOrContents); 3122 binFKinds ~= new FKind("GnuPG (GPG) encrypted data", [], [], x"85 02", 0, [], [], 3123 [], // N/A 3124 defaultStringDelims, 3125 FileContent.binary, FileKindDetection.equalsContents); 3126 binFKinds ~= new FKind("GNUPG (GPG) key trust database", [], [], "\001gpg", 0, [], [], 3127 [], // N/A 3128 defaultStringDelims, 3129 FileContent.binary, FileKindDetection.equalsContents); 3130 3131 binFKinds ~= new FKind("aspell word list (rowl)", [], ["rws"], "aspell default speller rowl ", 0, [], [], 3132 [], // N/A 3133 defaultStringDelims, 3134 FileContent.spellCheckWordList, FileKindDetection.equalsNameAndContents); 3135 3136 binFKinds ~= new FKind("DS_Store", ".DS_Store", [], "Mac OS X Desktop Services Store ", 0, [], [], 3137 [], // N/A 3138 [], 3139 FileContent.binary, FileKindDetection.equalsName); 3140 3141 /* Fax image created in the CCITT Group 3 compressed format, which is 3142 * used for digital transmission of fax data and supports 1 bit per 3143 * pixel 3144 */ 3145 binFKinds ~= new FKind("CCITT Group 3 compressed format", [], /+ TODO: Altenative name: Digifax-G3, G3 Fax +/ 3146 ["g3", "G3"], 3147 "PC Research, Inc", 0, [], [], 3148 [], // N/A 3149 [], 3150 FileContent.imageModemFax1BPP, FileKindDetection.equalsContents); 3151 3152 binFKinds ~= new FKind("Raw Modem Data version 1", [], 3153 ["rmd1"], 3154 "RMD1", 0, [], [], 3155 [], // N/A 3156 [], 3157 FileContent.modemData, FileKindDetection.equalsContents); 3158 3159 binFKinds ~= new FKind("Portable voice format 1", [], 3160 ["pvf1"], 3161 "PVF1\n", 0, [], [], 3162 [], // N/A 3163 [], 3164 FileContent.voiceModem, FileKindDetection.equalsContents); 3165 3166 binFKinds ~= new FKind("Portable voice format 2", [], 3167 ["pvf2"], 3168 "PVF2\n", 0, [], [], 3169 [], // N/A 3170 [], 3171 FileContent.voiceModem, FileKindDetection.equalsContents); 3172 3173 allFKinds ~= txtFKinds; 3174 allFKinds ~= binFKinds; 3175 3176 assert(allFKinds.byIndex.length == 3177 (txtFKinds.byIndex.length + 3178 binFKinds.byIndex.length)); 3179 3180 assert(allFKinds.byId.length == 3181 (txtFKinds.byId.length + 3182 binFKinds.byId.length)); 3183 3184 txtFKinds.rehash; 3185 binFKinds.rehash; 3186 allFKinds.rehash; 3187 } 3188 3189 // Code 3190 3191 // Interpret Command Line 3192 void loadDirKinds() 3193 { 3194 vcDirKinds ~= new DirKind(".git", "Git"); 3195 vcDirKinds ~= new DirKind(".svn", "Subversion (Svn)"); 3196 vcDirKinds ~= new DirKind(".bzr", "Bazaar (Bzr)"); 3197 vcDirKinds ~= new DirKind("RCS", "RCS"); 3198 vcDirKinds ~= new DirKind("CVS", "CVS"); 3199 vcDirKinds ~= new DirKind("MCVS", "MCVS"); 3200 vcDirKinds ~= new DirKind("RCS", "RCS"); 3201 vcDirKinds ~= new DirKind(".hg", "Mercurial (Hg)"); 3202 vcDirKinds ~= new DirKind("SCCS", "SCCS"); 3203 vcDirKinds ~= new DirKind(".wact", "WACT"); 3204 vcDirKinds ~= new DirKind("_MTN", "Monotone"); 3205 vcDirKinds ~= new DirKind("_darcs", "Darcs"); 3206 vcDirKinds ~= new DirKind("{arch}", "Arch"); 3207 3208 skippedDirKinds ~= vcDirKinds; 3209 3210 DirKind[string] vcDirKindsMap_; 3211 foreach (kind; vcDirKinds) 3212 { 3213 vcDirKindsMap[kind.fileName] = kind; 3214 } 3215 vcDirKindsMap.rehash; 3216 3217 skippedDirKinds ~= new DirKind(".trash", "Trash"); 3218 skippedDirKinds ~= new DirKind(".undo", "Undo"); 3219 skippedDirKinds ~= new DirKind(".deps", "Dependencies"); 3220 skippedDirKinds ~= new DirKind(".backups", "Backups"); 3221 skippedDirKinds ~= new DirKind(".autom4te.cache", "Automake Cache"); 3222 3223 foreach (kind; skippedDirKinds) { skippedDirKindsMap[kind.fileName] = kind; } 3224 skippedDirKindsMap.rehash; 3225 } 3226 3227 ScanContext scanContext = ScanContext.standard; 3228 KeyStrictness keyStrictness = KeyStrictness.standard; 3229 3230 bool showNameDups = false; 3231 bool showTreeContentDups = false; 3232 bool showFileContentDups = false; 3233 bool showELFSymbolDups = false; 3234 bool linkContentDups = false; 3235 3236 bool showLinkDups = false; 3237 SymlinkFollowContext followSymlinks = SymlinkFollowContext.external; 3238 bool showBrokenSymlinks = true; 3239 bool showSymlinkCycles = true; 3240 3241 bool showAnyDups = false; 3242 bool showMMaps = false; 3243 bool showUsage = false; 3244 bool showSHA1 = false; 3245 bool showLineCounts = false; 3246 3247 uint64_t noFiles = 0; 3248 uint64_t noRegFiles = 0; 3249 uint64_t noSymlinks = 0; 3250 uint64_t noSpecialFiles = 0; 3251 uint64_t noDirs = 0; 3252 3253 uint64_t noScannedFiles = 0; 3254 uint64_t noScannedRegFiles = 0; 3255 uint64_t noScannedSymlinks = 0; 3256 uint64_t noScannedSpecialFiles = 0; 3257 uint64_t noScannedDirs = 0; 3258 3259 auto shallowDensenessSum = Rational!ulong(0, 1); 3260 auto deepDensenessSum = Rational!ulong(0, 1); 3261 uint64_t densenessCount = 0; 3262 3263 FOp fOp = FOp.none; 3264 3265 bool keyAsWord = false; 3266 bool keyAsSymbol = false; 3267 bool keyAsAcronym = false; 3268 bool keyAsExact = false; 3269 3270 bool showTree = false; 3271 3272 bool useHTML = false; 3273 bool browseOutput = false; 3274 bool collectTypeHits = false; 3275 bool colorFlag = false; 3276 3277 int scanDepth = -1; 3278 3279 bool demangleELF = true; 3280 3281 bool recache = false; 3282 3283 bool useNGrams = false; 3284 3285 PathFormat pathFormat = PathFormat.relative; 3286 3287 DirSorting subsSorting = DirSorting.onTimeLastModified; 3288 BuildType buildType = BuildType.none; 3289 DuplicatesContext duplicatesContext = DuplicatesContext.internal; 3290 3291 Dir[] topDirs; 3292 Dir rootDir; 3293 } 3294 3295 struct Results 3296 { 3297 size_t numTotalHits; // Number of total hits. 3298 size_t numFilesWithHits; // Number of files with hits 3299 Bytes64 noBytesTotal; // Number of bytes total. 3300 Bytes64 noBytesTotalContents; // Number of contents bytes total. 3301 Bytes64 noBytesScanned; // Number of bytes scanned. 3302 Bytes64 noBytesSkipped; // Number of bytes skipped. 3303 Bytes64 noBytesUnreadable; // Number of bytes unreadable. 3304 } 3305 3306 version (cerealed) 3307 { 3308 void grain(T)(ref Cereal cereal, ref SysTime systime) 3309 { 3310 auto stdTime = systime.stdTime; 3311 cereal.grain(stdTime); 3312 if (stdTime != 0) 3313 { 3314 systime = SysTime(stdTime); 3315 } 3316 } 3317 } 3318 3319 /** Directory Sorting Order. */ 3320 enum DirSorting 3321 { 3322 /* onTimeCreated, /\* Windows only. Currently stored in Linux on ext4 but no */ 3323 /* * standard interface exists yet, it will probably be called */ 3324 /* * xstat(). *\/ */ 3325 onTimeLastModified, 3326 onTimeLastAccessed, 3327 onSize, 3328 onNothing, 3329 } 3330 3331 enum BuildType 3332 { 3333 none, // Don't compile 3334 devel, // Compile with debug symbols 3335 release, // Compile without debugs symbols and optimizations 3336 standard = devel, 3337 } 3338 3339 enum PathFormat 3340 { 3341 absolute, 3342 relative, 3343 } 3344 3345 /** Dir. 3346 */ 3347 class Dir : File 3348 { 3349 /** Construct File System Root Directory. */ 3350 this(Dir parent = null, GStats gstats = null) 3351 { 3352 super(parent); 3353 this._gstats = gstats; 3354 if (gstats) { ++gstats.noDirs; } 3355 } 3356 3357 this(string rootPath, GStats gstats) 3358 in { assert(rootPath == "/"); assert(gstats); } 3359 do 3360 { 3361 auto rootDent = DirEntry(rootPath); 3362 Dir rootParent = null; 3363 this(rootDent, rootParent, gstats); 3364 } 3365 3366 this(ref DirEntry dent, Dir parent, GStats gstats) 3367 in { assert(gstats); } 3368 do 3369 { 3370 this(dent.name.baseName, parent, dent.size.Bytes64, dent.timeLastModified, dent.timeLastAccessed, gstats); 3371 } 3372 3373 this(string name, Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed, 3374 GStats gstats = null) 3375 { 3376 super(name, parent, size, timeLastModified, timeLastAccessed); 3377 this._gstats = gstats; 3378 if (gstats) { ++gstats.noDirs; } 3379 } 3380 3381 override string toTextual() const @property { return "Directory"; } 3382 3383 override Bytes64 treeSize() @property @trusted /* @safe nothrow */ 3384 { 3385 if (_treeSize.isUntouched) 3386 { 3387 _treeSize = (this.size + 3388 reduce!"a+b"(0.Bytes64, 3389 subs.byValue.map!"a.treeSize")); // recurse! 3390 } 3391 return _treeSize.get.bytes; 3392 } 3393 3394 /** Returns: Directory Tree Content Id of `this`. */ 3395 override const(SHA1Digest) treeContentId() @property @trusted /* @safe nothrow */ 3396 { 3397 if (_treeContentId.isUntouched) 3398 { 3399 _treeContentId = subs.byValue.map!"a.treeContentId".sha1Of; /+ TODO: join loops for calculating treeSize +/ 3400 assert(_treeContentId, "Zero tree content digest"); 3401 if (treeSize() != 0) 3402 { 3403 gstats.filesByContentId[_treeContentId] ~= assumeNotNull(cast(File)this); /+ TODO: Avoid cast when DMD and NotNull is fixed +/ 3404 } 3405 } 3406 return _treeContentId; 3407 } 3408 3409 override Face!Color face() const @property @safe pure nothrow { return dirFace; } 3410 3411 /** Return true if `this` is a file system root directory. */ 3412 bool isRoot() @property @safe const pure nothrow { return !parent; } 3413 3414 GStats gstats(GStats gstats) @property @safe pure /* nothrow */ { 3415 return this._gstats = gstats; 3416 } 3417 GStats gstats() @property @safe nothrow 3418 { 3419 if (!_gstats && this.parent) 3420 { 3421 _gstats = this.parent.gstats(); 3422 } 3423 return _gstats; 3424 } 3425 3426 /** Returns: Depth of Depth from File System root to this File. */ 3427 override int depth() @property @safe nothrow 3428 { 3429 if (_depth ==- 1) 3430 { 3431 _depth = parent ? parent.depth + 1 : 0; // memoized depth 3432 } 3433 return _depth; 3434 } 3435 3436 /** Scan `this` recursively for a non-diretory file with basename `name`. 3437 TODO: Reuse range based algorithm this.tree(depthFirst|breadFirst) 3438 */ 3439 File find(string name) @property 3440 { 3441 auto subs_ = subs(); 3442 if (name in subs_) 3443 { 3444 auto hit = subs_[name]; 3445 Dir hitDir = cast(Dir)hit; 3446 if (!hitDir) // if not a directory 3447 return hit; 3448 } 3449 else 3450 { 3451 foreach (sub; subs_) 3452 { 3453 Dir subDir = cast(Dir)sub; 3454 if (subDir) 3455 { 3456 auto hit = subDir.find(name); 3457 if (hit) // if not a directory 3458 return hit; 3459 } 3460 } 3461 } 3462 return null; 3463 } 3464 3465 /** Append Tree Statistics. */ 3466 void addTreeStatsFromSub(F)(NotNull!F subFile, ref DirEntry subDent) 3467 { 3468 if (subDent.isFile) 3469 { 3470 /* _treeSize += subDent.size.Bytes64; */ 3471 // dbg("Updating ", _treeSize, " of ", path); 3472 3473 /++ TODO: Move these overloads to std.datetime +/ 3474 auto ref min(in SysTime a, in SysTime b) @trusted pure nothrow { return (a < b ? a : b); } 3475 auto ref max(in SysTime a, in SysTime b) @trusted pure nothrow { return (a > b ? a : b); } 3476 3477 const lastMod = subDent.timeLastModified; 3478 _timeModifiedInterval = Interval!SysTime(min(lastMod, _timeModifiedInterval.begin), 3479 max(lastMod, _timeModifiedInterval.end)); 3480 const lastAcc = subDent.timeLastAccessed; 3481 _timeAccessedInterval = Interval!SysTime(min(lastAcc, _timeAccessedInterval.begin), 3482 max(lastAcc, _timeAccessedInterval.end)); 3483 } 3484 } 3485 3486 /** Update Statistics for Sub-File `sub` with `subDent` of `this` Dir. */ 3487 void updateStats(F)(NotNull!F subFile, ref DirEntry subDent, bool isRegFile) 3488 { 3489 auto lGS = gstats(); 3490 if (lGS) 3491 { 3492 if (lGS.showNameDups/* && */ 3493 /* !subFile.underAnyDir!(a => a.name in lGS.skippedDirKindsMap) */) 3494 { 3495 lGS.filesByName[subFile.name] ~= cast(NotNull!File)subFile; 3496 } 3497 if (lGS.showLinkDups && 3498 isRegFile) 3499 { 3500 import core.sys.posix.sys.stat; 3501 immutable stat_t stat = subDent.statBuf(); 3502 if (stat.st_nlink >= 2) 3503 { 3504 lGS.filesByInode[stat.st_ino] ~= cast(NotNull!File)subFile; 3505 } 3506 } 3507 } 3508 } 3509 3510 /** Load Contents of `this` Directory from Disk using DirEntries. 3511 Returns: `true` iff Dir was updated (reread) from disk. 3512 */ 3513 bool load(int depth = 0, bool force = false) 3514 { 3515 import std.range: empty; 3516 if (!_obseleteDir && // already loaded 3517 !force) // and not forced reload 3518 { 3519 return false; // signal already scanned 3520 } 3521 3522 // dbg("Zeroing ", _treeSize, " of ", path); 3523 _treeSize.reset; // this.size; 3524 auto oldSubs = _subs; 3525 _subs.reset; 3526 assert(_subs.length == 0); /+ TODO: Remove when verified +/ 3527 3528 import std.file: dirEntries, SpanMode; 3529 auto entries = dirEntries(path, SpanMode.shallow, false); // false: skip symlinks 3530 foreach (dent; entries) 3531 { 3532 immutable basename = dent.name.baseName; 3533 File sub = null; 3534 if (basename in oldSubs) 3535 { 3536 sub = oldSubs[basename]; // reuse from previous cache 3537 } 3538 else 3539 { 3540 bool isRegFile = false; 3541 if (dent.isSymlink) 3542 { 3543 sub = new Symlink(dent, assumeNotNull(this)); 3544 } 3545 else if (dent.isDir) 3546 { 3547 sub = new Dir(dent, this, gstats); 3548 } 3549 else if (dent.isFile) 3550 { 3551 /+ TODO: Delay construction of and specific files such as +/ 3552 // CFile, ELFFile, after FKind-recognition has been made. 3553 sub = new RegFile(dent, assumeNotNull(this)); 3554 isRegFile = true; 3555 } 3556 else 3557 { 3558 sub = new SpecFile(dent, assumeNotNull(this)); 3559 } 3560 updateStats(enforceNotNull(sub), dent, isRegFile); 3561 } 3562 auto nnsub = enforceNotNull(sub); 3563 addTreeStatsFromSub(nnsub, dent); 3564 _subs[basename] = nnsub; 3565 } 3566 _subs.rehash; // optimize hash for faster lookups 3567 3568 _obseleteDir = false; 3569 return true; 3570 } 3571 3572 bool reload(int depth = 0) { return load(depth, true); } 3573 alias sync = reload; 3574 3575 /* TODO: Can we get make this const to the outside world perhaps using inout? */ 3576 ref NotNull!File[string] subs() @property { load(); return _subs; } 3577 3578 NotNull!File[] subsSorted(DirSorting sorted = DirSorting.onTimeLastModified) @property 3579 { 3580 load(); 3581 auto ssubs = _subs.values; 3582 /* TODO: Use radix sort to speed things up. */ 3583 final switch (sorted) 3584 { 3585 /* case DirSorting.onTimeCreated: */ 3586 /* break; */ 3587 case DirSorting.onTimeLastModified: 3588 ssubs.sort!((a, b) => (a.timeLastModified > 3589 b.timeLastModified)); 3590 break; 3591 case DirSorting.onTimeLastAccessed: 3592 ssubs.sort!((a, b) => (a.timeLastAccessed > 3593 b.timeLastAccessed)); 3594 break; 3595 case DirSorting.onSize: 3596 ssubs.sort!((a, b) => (a.size > 3597 b.size)); 3598 break; 3599 case DirSorting.onNothing: 3600 break; 3601 } 3602 return ssubs; 3603 } 3604 3605 File sub(Name)(Name sub_name) 3606 { 3607 load(); 3608 return (sub_name in _subs) ? _subs[sub_name] : null; 3609 } 3610 File sub(File sub) 3611 { 3612 load(); 3613 return (sub.path in _subs) != null ? sub : null; 3614 } 3615 3616 version (cerealed) 3617 { 3618 void accept(Cereal cereal) 3619 { 3620 auto stdTime = timeLastModified.stdTime; 3621 cereal.grain(name, size, stdTime); 3622 timeLastModified = SysTime(stdTime); 3623 } 3624 } 3625 version (msgpack) 3626 { 3627 /** Construct from msgpack `unpacker`. */ 3628 this(Unpacker)(ref Unpacker unpacker) 3629 { 3630 fromMsgpack(msgpack.Unpacker(unpacker)); 3631 } 3632 3633 void toMsgpack(Packer)(ref Packer packer) const 3634 { 3635 /* writeln("Entering Dir.toMsgpack ", this.name); */ 3636 packer.pack(name, size, 3637 timeLastModified.stdTime, 3638 timeLastAccessed.stdTime, 3639 kind); 3640 3641 // Contents 3642 /* TODO: serialize map of polymorphic objects using 3643 * packer.packArray(_subs) and type trait lookup up all child-classes of 3644 * File */ 3645 packer.pack(_subs.length); 3646 3647 if (_subs.length >= 1) 3648 { 3649 auto diffsLastModified = _subs.byValue.map!"a.timeLastModified.stdTime".encodeForwardDifference; 3650 auto diffsLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime".encodeForwardDifference; 3651 /* auto timesLastModified = _subs.byValue.map!"a.timeLastModified.stdTime"; */ 3652 /* auto timesLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime"; */ 3653 3654 packer.pack(diffsLastModified, diffsLastAccessed); 3655 3656 /* debug dbg(this.name, " sub.length: ", _subs.length); */ 3657 /* debug dbg(name, " modified diffs: ", diffsLastModified.pack.length); */ 3658 /* debug dbg(name, " accessed diffs: ", diffsLastAccessed.pack.length); */ 3659 /* debug dbg(name, " modified: ", timesLastModified.array.pack.length); */ 3660 /* debug dbg(name, " accessed: ", timesLastAccessed.array.pack.length); */ 3661 } 3662 3663 foreach (sub; _subs) 3664 { 3665 if (const regFile = cast(RegFile)sub) 3666 { 3667 packer.pack("RegFile"); 3668 regFile.toMsgpack(packer); 3669 } 3670 else if (const dir = cast(Dir)sub) 3671 { 3672 packer.pack("Dir"); 3673 dir.toMsgpack(packer); 3674 } 3675 else if (const symlink = cast(Symlink)sub) 3676 { 3677 packer.pack("Symlink"); 3678 symlink.toMsgpack(packer); 3679 } 3680 else if (const special = cast(SpecFile)sub) 3681 { 3682 packer.pack("SpecFile"); 3683 special.toMsgpack(packer); 3684 } 3685 else 3686 { 3687 immutable subClassName = sub.classinfo.name; 3688 assert(0, "Unknown sub File class " ~ subClassName); /+ TODO: Exception +/ 3689 } 3690 } 3691 } 3692 3693 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 3694 { 3695 unpacker.unpack(name, size); 3696 3697 long stdTime; 3698 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); /+ TODO: Functionize +/ 3699 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); /+ TODO: Functionize +/ 3700 3701 /* dbg("before:", path, " ", size, " ", timeLastModified, " ", timeLastAccessed); */ 3702 3703 // FKind 3704 if (!kind) { kind = null; } 3705 unpacker.unpack(kind); /* TODO: kind = new DirKind(unpacker); */ 3706 /* dbg("after:", path); */ 3707 3708 _treeSize.reset; // this.size; 3709 3710 // Contents 3711 /* TODO: unpacker.unpack(_subs); */ 3712 immutable noPreviousSubs = _subs.length == 0; 3713 size_t subs_length; unpacker.unpack(subs_length); /+ TODO: Functionize to unpacker.unpack!size_t() +/ 3714 3715 ForwardDifferenceCode!(long[]) diffsLastModified, 3716 diffsLastAccessed; 3717 if (subs_length >= 1) 3718 { 3719 unpacker.unpack(diffsLastModified, diffsLastAccessed); 3720 /* auto x = diffsLastModified.decodeForwardDifference; */ 3721 } 3722 3723 foreach (ix; 0..subs_length) // repeat for subs_length times 3724 { 3725 string subClassName; unpacker.unpack(subClassName); /+ TODO: Functionize +/ 3726 File sub = null; 3727 try 3728 { 3729 switch (subClassName) 3730 { 3731 default: 3732 assert(0, "Unknown File parent class " ~ subClassName); /+ TODO: Exception +/ 3733 case "Dir": 3734 auto subDir = new Dir(this, gstats); 3735 unpacker.unpack(subDir); sub = subDir; 3736 auto subDent = DirEntry(sub.path); 3737 subDir.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3738 addTreeStatsFromSub(assumeNotNull(subDir), subDent); 3739 break; 3740 case "RegFile": 3741 auto subRegFile = new RegFile(assumeNotNull(this)); 3742 unpacker.unpack(subRegFile); sub = subRegFile; 3743 auto subDent = DirEntry(sub.path); 3744 subRegFile.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3745 updateStats(assumeNotNull(subRegFile), subDent, true); 3746 addTreeStatsFromSub(assumeNotNull(subRegFile), subDent); 3747 break; 3748 case "Symlink": 3749 auto subSymlink = new Symlink(assumeNotNull(this)); 3750 unpacker.unpack(subSymlink); sub = subSymlink; 3751 break; 3752 case "SpecFile": 3753 auto SpecFile = new SpecFile(assumeNotNull(this)); 3754 unpacker.unpack(SpecFile); sub = SpecFile; 3755 break; 3756 } 3757 if (noPreviousSubs || 3758 !(sub.name in _subs)) 3759 { 3760 _subs[sub.name] = enforceNotNull(sub); 3761 } 3762 /* dbg("Unpacked Dir sub ", sub.path, " of type ", subClassName); */ 3763 } catch (FileException) { // this may be a too generic exception 3764 /* dbg(sub.path, " is not accessible anymore"); */ 3765 } 3766 } 3767 3768 } 3769 } 3770 3771 override void makeObselete() @trusted 3772 { 3773 _obseleteDir = true; 3774 _treeSize.reset; 3775 _timeModifiedInterval.reset; 3776 _timeAccessedInterval.reset; 3777 } 3778 override void makeUnObselete() @safe 3779 { 3780 _obseleteDir = false; 3781 } 3782 3783 private NotNull!File[string] _subs; // Directory contents 3784 DirKind kind; // Kind of this directory 3785 uint64_t hitCount = 0; 3786 private int _depth = -1; // Memoized Depth 3787 private bool _obseleteDir = true; // Flags that this is obselete 3788 GStats _gstats = null; 3789 3790 /* TODO: Reuse Span and span in Phobos. (Span!T).init should be (T.max, T.min) */ 3791 Interval!SysTime _timeModifiedInterval; 3792 Interval!SysTime _timeAccessedInterval; 3793 3794 Nullable!(size_t, size_t.max) _treeSize; // Size of tree with this directory as root. 3795 /* TODO: Make this work instead: */ 3796 /* import std.typecons: Nullable; */ 3797 /* Nullable!(Bytes64, Bytes64.max) _treeSize; // Size of tree with this directory as root. */ 3798 3799 SHA1Digest _treeContentId; 3800 } 3801 3802 /** Externally Directory Memoized Calculation of Tree Size. 3803 Is it possible to make get any of @safe pure nothrow? 3804 */ 3805 Bytes64 treeSizeMemoized(NotNull!File file, Bytes64[File] cache) @trusted /* nothrow */ 3806 { 3807 typeof(return) sum = file.size; 3808 if (auto dir = cast(Dir)file) 3809 { 3810 if (file in cache) 3811 { 3812 sum = cache[file]; 3813 } 3814 else 3815 { 3816 foreach (sub; dir.subs.byValue) 3817 { 3818 sum += treeSizeMemoized(sub, cache); 3819 } 3820 cache[file] = sum; 3821 } 3822 } 3823 return sum; 3824 } 3825 3826 /** Save File System Tree Cache under Directory `rootDir`. 3827 Returns: Serialized Byte Array. 3828 */ 3829 const(ubyte[]) saveRootDirTree(Viz viz, 3830 Dir rootDir, string cacheFile) @trusted 3831 { 3832 immutable tic = Clock.currTime; 3833 version (msgpack) 3834 { 3835 const data = rootDir.pack(); 3836 import std.file: write; 3837 } 3838 else version (cerealed) 3839 { 3840 auto enc = new Cerealiser(); // encoder 3841 enc ~= rootDir; 3842 auto data = enc.bytes; 3843 } 3844 else 3845 { 3846 ubyte[] data; 3847 } 3848 cacheFile.write(data); 3849 immutable toc = Clock.currTime; 3850 3851 viz.ppln("Cache Write".asH!2, 3852 "Wrote tree cache of size ", 3853 data.length.Bytes64, " to ", 3854 cacheFile.asPath, 3855 " in ", 3856 shortDurationString(toc - tic)); 3857 3858 return data; 3859 } 3860 3861 /** Load File System Tree Cache from `cacheFile`. 3862 Returns: Root Directory of Loaded Tree. 3863 */ 3864 Dir loadRootDirTree(Viz viz, 3865 string cacheFile, GStats gstats) @trusted 3866 { 3867 immutable tic = Clock.currTime; 3868 3869 import std.file: read; 3870 try 3871 { 3872 const data = read(cacheFile); 3873 3874 auto rootDir = new Dir(cast(Dir)null, gstats); 3875 version (msgpack) 3876 { 3877 unpack(cast(ubyte[])data, rootDir); /* Dir rootDir = new Dir(cast(const(ubyte)[])data); */ 3878 } 3879 immutable toc = Clock.currTime; 3880 3881 viz.pp("Cache Read".asH!2, 3882 "Read cache of size ", 3883 data.length.Bytes64, " from ", 3884 cacheFile.asPath, 3885 " in ", 3886 shortDurationString(toc - tic), " containing", 3887 asUList(asItem(gstats.noDirs, " Dirs,"), 3888 asItem(gstats.noRegFiles, " Regular Files,"), 3889 asItem(gstats.noSymlinks, " Symbolic Links,"), 3890 asItem(gstats.noSpecialFiles, " Special Files,"), 3891 asItem("totalling ", gstats.noFiles + 1, " Files"))); 3892 assert(gstats.noDirs + 3893 gstats.noRegFiles + 3894 gstats.noSymlinks + 3895 gstats.noSpecialFiles == gstats.noFiles + 1); 3896 return rootDir; 3897 } 3898 catch (FileException) 3899 { 3900 viz.ppln("Failed to read cache from ", cacheFile); 3901 return null; 3902 } 3903 } 3904 3905 Dir[] getDirs(NotNull!Dir rootDir, string[] topDirNames) 3906 { 3907 Dir[] topDirs; 3908 foreach (topName; topDirNames) 3909 { 3910 Dir topDir = getDir(rootDir, topName); 3911 3912 if (!topDir) 3913 { 3914 dbg("Directory " ~ topName ~ " is missing"); 3915 } 3916 else 3917 { 3918 topDirs ~= topDir; 3919 } 3920 } 3921 return topDirs; 3922 } 3923 3924 /** (Cached) Lookup of File `filePath`. 3925 */ 3926 File getFile(NotNull!Dir rootDir, string filePath, 3927 bool isDir = false, 3928 bool tolerant = false) @trusted 3929 { 3930 if (isDir) 3931 { 3932 return getDir(rootDir, filePath); 3933 } 3934 else 3935 { 3936 auto parentDir = getDir(rootDir, filePath.dirName); 3937 if (parentDir) 3938 { 3939 auto hit = parentDir.sub(filePath.baseName); 3940 if (hit) 3941 return hit; 3942 else 3943 { 3944 dbg("File path " ~ filePath ~ " doesn't exist. TODO: Query user to instead find it under " 3945 ~ parentDir.path); 3946 parentDir.find(filePath.baseName); 3947 } 3948 } 3949 else 3950 { 3951 dbg("Directory " ~ parentDir.path ~ " doesn't exist"); 3952 } 3953 } 3954 return null; 3955 } 3956 3957 /** (Cached) Lookup of Directory `dirpath`. 3958 Returns: Dir if present under rootDir, null otherwise. 3959 TODO: Make use of dent 3960 */ 3961 import std.path: isRooted; 3962 Dir getDir(NotNull!Dir rootDir, string dirPath, ref DirEntry dent, 3963 ref Symlink[] followedSymlinks) @trusted 3964 in { assert(dirPath.isRooted); } 3965 do 3966 { 3967 Dir currDir = rootDir; 3968 3969 import std.range: drop; 3970 import std.path: pathSplitter; 3971 foreach (part; dirPath.pathSplitter().drop(1)) // all but first 3972 { 3973 auto sub = currDir.sub(part); 3974 if (auto subDir = cast(Dir)sub) 3975 { 3976 currDir = subDir; 3977 } 3978 else if (auto subSymlink = cast(Symlink)sub) 3979 { 3980 auto subDent = DirEntry(subSymlink.absoluteNormalizedTargetPath); 3981 if (subDent.isDir) 3982 { 3983 if (followedSymlinks.find(subSymlink)) 3984 { 3985 dbg("Infinite recursion in ", subSymlink); 3986 return null; 3987 } 3988 followedSymlinks ~= subSymlink; 3989 currDir = getDir(rootDir, subSymlink.absoluteNormalizedTargetPath, subDent, followedSymlinks); /+ TODO: Check for infinite recursion +/ 3990 } 3991 else 3992 { 3993 dbg("Loaded path " ~ dirPath ~ " is not a directory"); 3994 return null; 3995 } 3996 } 3997 else 3998 { 3999 return null; 4000 } 4001 } 4002 return currDir; 4003 } 4004 4005 /** (Cached) Lookup of Directory `dirPath`. */ 4006 Dir getDir(NotNull!Dir rootDir, string dirPath) @trusted 4007 { 4008 Symlink[] followedSymlinks; 4009 try 4010 { 4011 auto dirDent = DirEntry(dirPath); 4012 return getDir(rootDir, dirPath, dirDent, followedSymlinks); 4013 } 4014 catch (FileException) 4015 { 4016 dbg("Exception getting Dir"); 4017 return null; 4018 } 4019 } 4020 unittest { 4021 /* auto tmp = tempfile("/tmp/fsfile"); */ 4022 } 4023 4024 enum ulong mmfile_size = 0; // 100*1024 4025 4026 auto pageSize() @trusted 4027 { 4028 version (linux) 4029 { 4030 import core.sys.posix.sys.shm: __getpagesize; 4031 return __getpagesize(); 4032 } 4033 else 4034 { 4035 return 4096; 4036 } 4037 } 4038 4039 enum KeyStrictness 4040 { 4041 exact, 4042 acronym, 4043 eitherExactOrAcronym, 4044 standard = eitherExactOrAcronym, 4045 } 4046 4047 /** Language Operator Associativity. */ 4048 enum OpAssoc { none, 4049 LR, // Left-to-Right 4050 RL, // Right-to-Left 4051 } 4052 4053 /** Language Operator Arity. */ 4054 enum OpArity 4055 { 4056 unknown, 4057 unaryPostfix, // 1-arguments 4058 unaryPrefix, // 1-arguments 4059 binary, // 2-arguments 4060 ternary, // 3-arguments 4061 } 4062 4063 /** Language Operator. */ 4064 struct Op 4065 { 4066 this(string op, 4067 OpArity arity = OpArity.unknown, 4068 OpAssoc assoc = OpAssoc.none, 4069 byte prec = -1, 4070 string desc = []) 4071 { 4072 this.op = op; 4073 this.arity = arity; 4074 this.assoc = assoc; 4075 this.prec = prec; 4076 this.desc = desc; 4077 } 4078 /** Make `this` an alias of `opOrig`. */ 4079 Op aliasOf(string opOrig) 4080 { 4081 /+ TODO: set relation in map from op to opOrig +/ 4082 return this; 4083 } 4084 string op; // Operator. TODO: Optimize this storage using a value type? 4085 string desc; // Description 4086 OpAssoc assoc; // Associativity 4087 ubyte prec; // Precedence 4088 OpArity arity; // Arity 4089 bool overloadable; // Overloadable 4090 } 4091 4092 /** Language Operator Alias. */ 4093 struct OpAlias 4094 { 4095 this(string op, string opOrigin) 4096 { 4097 this.op = op; 4098 this.opOrigin = opOrigin; 4099 } 4100 string op; 4101 string opOrigin; 4102 } 4103 4104 FKind tryLookupKindIn(RegFile regFile, 4105 FKind[SHA1Digest] kindsById) 4106 { 4107 immutable id = regFile._cstat.kindId; 4108 if (id in kindsById) 4109 { 4110 return kindsById[id]; 4111 } 4112 else 4113 { 4114 return null; 4115 } 4116 } 4117 4118 string displayedFilename(AnyFile)(GStats gstats, 4119 AnyFile theFile) @safe pure 4120 { 4121 return ((gstats.pathFormat == PathFormat.relative && 4122 gstats.topDirs.length == 1) ? 4123 "./" ~ theFile.name : 4124 theFile.path); 4125 } 4126 4127 /** File System Scanner. */ 4128 class Scanner(Term) 4129 { 4130 this(string[] args, ref Term term) 4131 { 4132 prepare(args, term); 4133 } 4134 4135 SysTime _currTime; 4136 import std.getopt; 4137 import std.string: toLower, toUpper, startsWith, CaseSensitive; 4138 import std.mmfile; 4139 import std.stdio: writeln, stdout, stderr, stdin, popen; 4140 import std.algorithm: find, count, countUntil, min, splitter; 4141 import std.range: join; 4142 import std.conv: to; 4143 4144 import core.sys.posix.sys.mman; 4145 import core.sys.posix.pwd: passwd, getpwuid_r; 4146 version (linux) 4147 { 4148 // import core.sys.linux.sys.inotify; 4149 import core.sys.linux.sys.xattr; 4150 } 4151 import core.sys.posix.unistd: getuid, getgid; 4152 import std.file: read, FileException, exists, getcwd; 4153 import std.range: retro; 4154 import std.exception: ErrnoException; 4155 import core.sys.posix.sys.stat: stat_t, S_IRUSR, S_IRGRP, S_IROTH; 4156 4157 uint64_t _hitsCountTotal = 0; 4158 4159 Symlink[] _brokenSymlinks; 4160 4161 bool _beVerbose = false; 4162 bool _caseFold = false; 4163 bool _showSkipped = false; 4164 bool listTxtFKinds = false; 4165 bool listBinFKinds = false; 4166 string selFKindNames; 4167 string[] _topDirNames; 4168 string[] addTags; 4169 string[] removeTags; 4170 4171 private 4172 { 4173 GStats gstats = new GStats(); 4174 4175 string _cacheFile = "~/.cache/fs-root.msgpack"; 4176 4177 uid_t _uid; 4178 gid_t _gid; 4179 } 4180 4181 ioFile outFile; 4182 4183 string[] keys; // Keys to scan. 4184 typeof(keys.map!bistogramOverRepresentation) keysBists; 4185 typeof(keys.map!(sparseUIntNGramOverRepresentation!NGramOrder)) keysXGrams; 4186 Bist keysBistsUnion; 4187 XGram keysXGramsUnion; 4188 4189 string selFKindsNote; 4190 4191 void prepare(string[] args, ref Term term) 4192 { 4193 _scanChunkSize = 32*pageSize; 4194 gstats.loadFileKinds; 4195 gstats.loadDirKinds; 4196 4197 bool helpPrinted = getoptEx("FS --- File System Scanning Utility in D.\n" ~ 4198 "Usage: fs { --switches } [KEY]...\n" ~ 4199 "Note that scanning for multiple KEYs is possible.\nIf so hits are highlighted in different colors!\n" ~ 4200 "Sample calls: \n" ~ 4201 " fdo.d --color -d /lib/modules/3.13.0-24-generic/kernel/drivers/staging --browse --duplicates --recache lirc\n" ~ 4202 " fdo.d --color -d /etc -s --tree --usage -l --duplicates stallman\n" ~ 4203 " fdo.d --color -d /etc -d /var --acronym sttccc\n" ~ 4204 " fdo.d --color -d /etc -d /var --acronym dktp\n" ~ 4205 " fdo.d --color -d /etc -d /var --acronym tms sttc prc dtp xsr\n" ~ 4206 " fdo.d --color -d /etc min max delta\n" ~ 4207 " fdo.d --color -d /etc if elif return len --duplicates --sort=onSize\n" ~ 4208 " fdo.d --color -k -d /bin alpha\n" ~ 4209 " fdo.d --color -d /lib -k linus" ~ 4210 " fdo.d --color -d /etc --symbol alpha beta gamma delta" ~ 4211 " fdo.d --color -d /var/spool/postfix/dev " ~ 4212 " fdo.d --color -d /etc alpha" ~ 4213 " fdo.d --color -d ~/Work/dmd --browse xyz --duplicates --do=preprocess", 4214 4215 args, 4216 std.getopt.config.caseInsensitive, 4217 4218 "verbose|v", "\tVerbose", &_beVerbose, 4219 4220 "color|C", "\tColorize Output" ~ defaultDoc(gstats.colorFlag), &gstats.colorFlag, 4221 "types|T", "\tComma separated list (CSV) of file types/kinds to scan" ~ defaultDoc(selFKindNames), &selFKindNames, 4222 "list-textual-kinds", "\tList registered textual types/kinds" ~ defaultDoc(listTxtFKinds), &listTxtFKinds, 4223 "list-binary-kinds", "\tList registered binary types/kinds" ~ defaultDoc(listBinFKinds), &listBinFKinds, 4224 "group-types|G", "\tCollect and group file types found" ~ defaultDoc(gstats.collectTypeHits), &gstats.collectTypeHits, 4225 4226 "i", "\tCase-Fold, Case-Insensitive" ~ defaultDoc(_caseFold), &_caseFold, 4227 "k", "\tShow Skipped Directories and Files" ~ defaultDoc(_showSkipped), &_showSkipped, 4228 "d", "\tRoot Directory(s) of tree(s) to scan, defaulted to current directory" ~ defaultDoc(_topDirNames), &_topDirNames, 4229 "depth", "\tDepth of tree to scan, defaulted to unlimited (-1) depth" ~ defaultDoc(gstats.scanDepth), &gstats.scanDepth, 4230 4231 // Contexts 4232 "context|x", "\tComma Separated List of Contexts. Among: " ~ enumDoc!ScanContext, &gstats.scanContext, 4233 4234 "word|w", "\tSearch for key as a complete Word (A Letter followed by more Letters and Digits)." ~ defaultDoc(gstats.keyAsWord), &gstats.keyAsWord, 4235 "symbol|ident|id|s", "\tSearch for key as a complete Symbol (Identifier)" ~ defaultDoc(gstats.keyAsSymbol), &gstats.keyAsSymbol, 4236 "acronym|a", "\tSearch for key as an acronym (relaxed)" ~ defaultDoc(gstats.keyAsAcronym), &gstats.keyAsAcronym, 4237 "exact", "\tSearch for key only with exact match (strict)" ~ defaultDoc(gstats.keyAsExact), &gstats.keyAsExact, 4238 4239 "name-duplicates|snd", "\tDetect & Show file name duplicates" ~ defaultDoc(gstats.showNameDups), &gstats.showNameDups, 4240 "hardlink-duplicates|inode-duplicates|shd", "\tDetect & Show multiple links to same inode" ~ defaultDoc(gstats.showLinkDups), &gstats.showLinkDups, 4241 "file-content-duplicates|scd", "\tDetect & Show file contents duplicates" ~ defaultDoc(gstats.showFileContentDups), &gstats.showFileContentDups, 4242 "tree-content-duplicates", "\tDetect & Show directory tree contents duplicates" ~ defaultDoc(gstats.showTreeContentDups), &gstats.showTreeContentDups, 4243 4244 "elf-symbol-duplicates", "\tDetect & Show ELF Symbol Duplicates" ~ defaultDoc(gstats.showELFSymbolDups), &gstats.showELFSymbolDups, 4245 4246 "duplicates|D", "\tDetect & Show file name and contents duplicates" ~ defaultDoc(gstats.showAnyDups), &gstats.showAnyDups, 4247 "duplicates-context", "\tDuplicates Detection Context. Among: " ~ enumDoc!DuplicatesContext, &gstats.duplicatesContext, 4248 "hardlink-content-duplicates", "\tConvert all content duplicates into hardlinks (common inode) if they reside on the same file system" ~ defaultDoc(gstats.linkContentDups), &gstats.linkContentDups, 4249 4250 "usage", "\tShow disk usage (tree size) of scanned directories" ~ defaultDoc(gstats.showUsage), &gstats.showUsage, 4251 "count-lines", "\tShow line counts of scanned files" ~ defaultDoc(gstats.showLineCounts), &gstats.showLineCounts, 4252 4253 "sha1", "\tShow SHA1 content digests" ~ defaultDoc(gstats.showSHA1), &gstats.showSHA1, 4254 4255 "mmaps", "\tShow when files are memory mapped (mmaped)" ~ defaultDoc(gstats.showMMaps), &gstats.showMMaps, 4256 4257 "follow-symlinks|f", "\tFollow symbolic links" ~ defaultDoc(gstats.followSymlinks), &gstats.followSymlinks, 4258 "broken-symlinks|l", "\tDetect & Show broken symbolic links (target is non-existing file) " ~ defaultDoc(gstats.showBrokenSymlinks), &gstats.showBrokenSymlinks, 4259 "show-symlink-cycles|l", "\tDetect & Show symbolic links cycles" ~ defaultDoc(gstats.showSymlinkCycles), &gstats.showSymlinkCycles, 4260 4261 "add-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(addTags), &addTags, 4262 "remove-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(removeTags), &removeTags, 4263 4264 "tree|W", "\tShow Scanned Tree and Followed Symbolic Links" ~ defaultDoc(gstats.showTree), &gstats.showTree, 4265 "sort|S", "\tDirectory contents sorting order. Among: " ~ enumDoc!DirSorting, &gstats.subsSorting, 4266 "build", "\tBuild Source Code. Among: " ~ enumDoc!BuildType, &gstats.buildType, 4267 4268 "path-format", "\tFormat of paths. Among: " ~ enumDoc!PathFormat ~ "." ~ defaultDoc(gstats.pathFormat), &gstats.pathFormat, 4269 4270 "cache-file|F", "\tFile System Tree Cache File" ~ defaultDoc(_cacheFile), &_cacheFile, 4271 "recache", "\tSkip initial load of cache from disk" ~ defaultDoc(gstats.recache), &gstats.recache, 4272 4273 "do", "\tOperation to perform on matching files. Among: " ~ enumDoc!FOp, &gstats.fOp, 4274 4275 "demangle-elf", "\tDemangle ELF files.", &gstats.demangleELF, 4276 4277 "use-ngrams", "\tUse NGrams to cache statistics and thereby speed up search" ~ defaultDoc(gstats.useNGrams), &gstats.useNGrams, 4278 4279 "html|H", "\tFormat output as HTML" ~ defaultDoc(gstats.useHTML), &gstats.useHTML, 4280 "browse|B", ("\tFormat output as HTML to a temporary file" ~ 4281 defaultDoc(_cacheFile) ~ 4282 " and open it with default Web browser" ~ 4283 defaultDoc(gstats.browseOutput)), &gstats.browseOutput, 4284 4285 "author", "\tPrint name of\n"~"\tthe author", 4286 delegate() { writeln("Per Nordlöw"); } 4287 ); 4288 4289 if (gstats.showAnyDups) 4290 { 4291 gstats.showNameDups = true; 4292 gstats.showLinkDups = true; 4293 gstats.showFileContentDups = true; 4294 gstats.showTreeContentDups = true; 4295 gstats.showELFSymbolDups = true; 4296 } 4297 if (helpPrinted) 4298 return; 4299 4300 _cacheFile = std.path.expandTilde(_cacheFile); 4301 4302 if (_topDirNames.empty) 4303 { 4304 _topDirNames = ["."]; 4305 } 4306 if (_topDirNames == ["."]) 4307 { 4308 gstats.pathFormat = PathFormat.relative; 4309 } 4310 else 4311 { 4312 gstats.pathFormat = PathFormat.absolute; 4313 } 4314 foreach (ref topName; _topDirNames) 4315 { 4316 if (topName == ".") 4317 { 4318 topName = topName.absolutePath.buildNormalizedPath; 4319 } 4320 else 4321 { 4322 topName = topName.expandTilde.buildNormalizedPath; 4323 } 4324 } 4325 4326 // Output Handling 4327 if (gstats.browseOutput) 4328 { 4329 gstats.useHTML = true; 4330 immutable ext = gstats.useHTML ? "html" : "results.txt"; 4331 import std.uuid: randomUUID; 4332 outFile = ioFile("/tmp/fs-" ~ randomUUID.toString() ~ 4333 "." ~ ext, 4334 "w"); 4335 /* popen("gnome-open " ~ outFile.name); */ 4336 popen("firefox -new-tab " ~ outFile.name); 4337 } 4338 else 4339 { 4340 outFile = stdout; 4341 } 4342 4343 auto cwd = getcwd(); 4344 4345 foreach (arg; args[1..$]) 4346 { 4347 if (!arg.startsWith("-")) // if argument not a flag 4348 { 4349 keys ~= arg; 4350 } 4351 } 4352 4353 // Calc stats 4354 keysBists = keys.map!bistogramOverRepresentation; 4355 keysXGrams = keys.map!(sparseUIntNGramOverRepresentation!NGramOrder); 4356 keysBistsUnion = reduce!"a | b"(typeof(keysBists.front).init, keysBists); 4357 keysXGramsUnion = reduce!"a + b"(typeof(keysXGrams.front).init, keysXGrams); 4358 4359 auto viz = new Viz(outFile, 4360 &term, 4361 gstats.showTree, 4362 gstats.useHTML ? VizForm.HTML : VizForm.textAsciiDocUTF8, 4363 gstats.colorFlag, 4364 !gstats.useHTML, // only use if HTML 4365 true, /+ TODO: Only set if in debug mode +/ 4366 ); 4367 4368 if (gstats.useNGrams && 4369 (!keys.empty) && 4370 keysXGramsUnion.empty) 4371 { 4372 gstats.useNGrams = false; 4373 viz.ppln("Keys must be at least of length " ~ 4374 to!string(NGramOrder + 1) ~ 4375 " in order for " ~ 4376 keysXGrams[0].typeName ~ 4377 " to be calculated"); 4378 } 4379 4380 // viz.ppln("<meta http-equiv=\"refresh\" content=\"1\"/>"); // refresh every second 4381 4382 if (selFKindNames) 4383 { 4384 foreach (lang; selFKindNames.splitterASCIIAmong!(",")) 4385 { 4386 if (lang in gstats.allFKinds.byName) // try exact match 4387 { 4388 gstats.selFKinds ~= gstats.allFKinds.byName[lang]; 4389 } 4390 else if (lang.toLower in gstats.allFKinds.byName) // else try all in lower case 4391 { 4392 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toLower]; 4393 } 4394 else if (lang.toUpper in gstats.allFKinds.byName) // else try all in upper case 4395 { 4396 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toUpper]; 4397 } 4398 else 4399 { 4400 writeln("warning: Language ", lang, " not registered"); 4401 } 4402 } 4403 if (gstats.selFKinds.byIndex.empty) 4404 { 4405 writeln("warning: None of the languages ", to!string(selFKindNames), " are registered. Defaulting to all file types."); 4406 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4407 } 4408 else 4409 { 4410 gstats.selFKinds.rehash; 4411 } 4412 } 4413 else 4414 { 4415 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4416 } 4417 4418 // Keys 4419 auto commaedKeys = keys.joiner(","); 4420 const keysPluralExt = keys.length >= 2 ? "s" : ""; 4421 string commaedKeysString = to!string(commaedKeys); 4422 if (keys) 4423 { 4424 selFKindsNote = " in " ~ (gstats.selFKinds == gstats.allFKinds ? 4425 "all " : 4426 gstats.selFKinds.byIndex.map!(a => a.kindName).join(",") ~ "-") ~ "files"; 4427 immutable underNote = " under \"" ~ (_topDirNames.reduce!"a ~ ',' ~ b") ~ "\""; 4428 const exactNote = gstats.keyAsExact ? "exact " : ""; 4429 string asNote; 4430 if (gstats.keyAsAcronym) 4431 { 4432 asNote = (" as " ~ exactNote ~ 4433 (gstats.keyAsWord ? "word" : "symbol") ~ 4434 " acronym" ~ keysPluralExt); 4435 } 4436 else if (gstats.keyAsSymbol) 4437 { 4438 asNote = " as " ~ exactNote ~ "symbol" ~ keysPluralExt; 4439 } 4440 else if (gstats.keyAsWord) 4441 { 4442 asNote = " as " ~ exactNote ~ "word" ~ keysPluralExt; 4443 } 4444 else 4445 { 4446 asNote = ""; 4447 } 4448 4449 const title = ("Searching for \"" ~ commaedKeysString ~ "\"" ~ 4450 " case-" ~ (_caseFold ? "in" : "") ~"sensitively" 4451 ~asNote ~selFKindsNote ~underNote); 4452 if (viz.form == VizForm.HTML) // only needed for HTML output 4453 { 4454 viz.ppln(faze(title, titleFace)); 4455 } 4456 4457 viz.pp(asH!1("Searching for \"", commaedKeysString, "\"", 4458 " case-", (_caseFold ? "in" : ""), "sensitively", 4459 asNote, selFKindsNote, 4460 " under ", _topDirNames.map!(a => a.asPath))); 4461 } 4462 4463 if (listTxtFKinds) 4464 { 4465 viz.pp("Textual (Source) Kinds".asH!2, 4466 gstats.txtFKinds.byIndex.asTable); 4467 } 4468 4469 if (listBinFKinds) 4470 { 4471 viz.pp("Binary Kinds".asH!2, 4472 gstats.binFKinds.byIndex.asTable); 4473 } 4474 4475 /* binFKinds.asTable, */ 4476 4477 if (_showSkipped) 4478 { 4479 viz.pp("Skipping files of type".asH!2, 4480 asUList(gstats.binFKinds.byIndex.map!(a => asItem(a.kindName.asBold, 4481 ": ", 4482 asCSL(a.exts.map!(b => b.asCode)))))); 4483 viz.pp("Skipping directories of type".asH!2, 4484 asUList(gstats.skippedDirKinds.map!(a => asItem(a.kindName.asBold, 4485 ": ", 4486 a.fileName.asCode)))); 4487 } 4488 4489 // if (key && key == key.toLower()) { // if search key is all lowercase 4490 // _caseFold = true; // we do case-insensitive search like in Emacs 4491 // } 4492 4493 _uid = getuid; 4494 _gid = getgid; 4495 4496 // Setup root directory 4497 if (!gstats.recache) 4498 { 4499 GC.disable; 4500 gstats.rootDir = loadRootDirTree(viz, _cacheFile, gstats); 4501 GC.enable; 4502 } 4503 if (!gstats.rootDir) // if first time 4504 { 4505 gstats.rootDir = new Dir("/", gstats); // filesystem root directory. TODO: Make this uncopyable? 4506 } 4507 4508 // Scan for exact key match 4509 gstats.topDirs = getDirs(enforceNotNull(gstats.rootDir), _topDirNames); 4510 4511 _currTime = Clock.currTime; 4512 4513 GC.disable; 4514 scanTopDirs(viz, commaedKeysString); 4515 GC.enable; 4516 4517 GC.disable; 4518 saveRootDirTree(viz, gstats.rootDir, _cacheFile); 4519 GC.enable; 4520 4521 // Print statistics 4522 showStats(viz); 4523 } 4524 4525 void scanTopDirs(Viz viz, 4526 string commaedKeysString) 4527 { 4528 viz.pp("Results".asH!2); 4529 if (gstats.topDirs) 4530 { 4531 foreach (topIndex, topDir; gstats.topDirs) 4532 { 4533 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4534 if (ctrlC) 4535 { 4536 auto restDirs = gstats.topDirs[topIndex + 1..$]; 4537 if (!restDirs.empty) 4538 { 4539 debug dbg("Ctrl-C pressed: Skipping search of " ~ to!string(restDirs)); 4540 break; 4541 } 4542 } 4543 } 4544 4545 viz.pp("Summary".asH!2); 4546 4547 if ((gstats.noScannedFiles - gstats.noScannedDirs) == 0) 4548 { 4549 viz.ppln("No files with any content found"); 4550 } 4551 else 4552 { 4553 // Scan for acronym key match 4554 if (keys && _hitsCountTotal == 0) // if keys given but no hit found 4555 { 4556 auto keysString = (keys.length >= 2 ? "s" : "") ~ " \"" ~ commaedKeysString; 4557 if (gstats.keyAsAcronym) 4558 { 4559 viz.ppln(("No acronym matches for key" ~ keysString ~ `"` ~ 4560 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4561 " found in files of type")); 4562 } 4563 else if (!gstats.keyAsExact) 4564 { 4565 viz.ppln(("No exact matches for key" ~ keysString ~ `"` ~ 4566 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4567 " found" ~ selFKindsNote ~ 4568 ". Relaxing scan to" ~ (gstats.keyAsSymbol ? " symbol" : "") ~ " acronym match.")); 4569 gstats.keyAsAcronym = true; 4570 4571 foreach (topDir; gstats.topDirs) 4572 { 4573 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4574 } 4575 } 4576 } 4577 } 4578 } 4579 4580 assert(gstats.noScannedDirs + 4581 gstats.noScannedRegFiles + 4582 gstats.noScannedSymlinks + 4583 gstats.noScannedSpecialFiles == gstats.noScannedFiles); 4584 } 4585 4586 version (linux) 4587 { 4588 @trusted bool readable(in stat_t stat, uid_t uid, gid_t gid, ref string msg) 4589 { 4590 immutable mode = stat.st_mode; 4591 immutable ok = ((stat.st_uid == uid) && (mode & S_IRUSR) || 4592 (stat.st_gid == gid) && (mode & S_IRGRP) || 4593 (mode & S_IROTH)); 4594 if (!ok) 4595 { 4596 msg = " is not readable by you, but only by"; 4597 bool can = false; // someone can access 4598 if (mode & S_IRUSR) 4599 { 4600 can = true; 4601 msg ~= " user id " ~ to!string(stat.st_uid); 4602 4603 // Lookup user name from user id 4604 passwd pw; 4605 passwd* pw_ret; 4606 immutable size_t bufsize = 16384; 4607 char* buf = cast(char*)core.stdc.stdlib.malloc(bufsize); 4608 getpwuid_r(stat.st_uid, &pw, buf, bufsize, &pw_ret); 4609 if (pw_ret != null) 4610 { 4611 string userName; 4612 { 4613 size_t n = 0; 4614 while (pw.pw_name[n] != 0) 4615 { 4616 userName ~= pw.pw_name[n]; 4617 n++; 4618 } 4619 } 4620 msg ~= " (" ~ userName ~ ")"; 4621 4622 // string realName; 4623 // { 4624 // size_t n = 0; 4625 // while (pw.pw_gecos[n] != 0) 4626 // { 4627 // realName ~= pw.pw_gecos[n]; 4628 // n++; 4629 // } 4630 // } 4631 } 4632 core.stdc.stdlib.free(buf); 4633 4634 } 4635 if (mode & S_IRGRP) 4636 { 4637 can = true; 4638 if (msg != "") 4639 { 4640 msg ~= " or"; 4641 } 4642 msg ~= " group id " ~ to!string(stat.st_gid); 4643 } 4644 if (!can) 4645 { 4646 msg ~= " root"; 4647 } 4648 } 4649 return ok; 4650 } 4651 } 4652 4653 Results results; 4654 4655 void handleError(F)(Viz viz, 4656 NotNull!F file, bool isDir, size_t subIndex) 4657 { 4658 auto dent = DirEntry(file.path); 4659 immutable stat_t stat = dent.statBuf; 4660 string msg; 4661 if (!readable(stat, _uid, _gid, msg)) 4662 { 4663 results.noBytesUnreadable += dent.size; 4664 if (_showSkipped) 4665 { 4666 if (gstats.showTree) 4667 { 4668 auto parentDir = file.parent; 4669 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4670 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4671 } 4672 viz.ppln(file, 4673 ": ", isDir ? "Directory" : "File", 4674 faze(msg, warnFace)); 4675 } 4676 } 4677 } 4678 4679 void printSkipped(Viz viz, 4680 NotNull!RegFile regFile, 4681 size_t subIndex, 4682 const NotNull!FKind kind, KindHit kindhit, 4683 const string skipCause) 4684 { 4685 auto parentDir = regFile.parent; 4686 if (_showSkipped) 4687 { 4688 if (gstats.showTree) 4689 { 4690 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4691 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4692 } 4693 viz.pp(horizontalRuler, 4694 asH!3(regFile, 4695 ": Skipped ", kind, " file", 4696 skipCause)); 4697 } 4698 } 4699 4700 size_t _scanChunkSize; 4701 4702 KindHit isSelectedFKind(NotNull!RegFile regFile) @safe /* nothrow */ 4703 { 4704 typeof(return) kindHit = KindHit.none; 4705 FKind hitKind; 4706 4707 // Try cached kind first 4708 // First Try with kindId as try 4709 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 4710 { 4711 if (regFile._cstat.kindId in gstats.selFKinds.byId) 4712 { 4713 hitKind = gstats.selFKinds.byId[regFile._cstat.kindId]; 4714 kindHit = KindHit.cached; 4715 return kindHit; 4716 } 4717 } 4718 4719 immutable ext = regFile.realExtension; 4720 4721 // Try with hash table first 4722 if (!ext.empty && // if file has extension and 4723 ext in gstats.selFKinds.byExt) // and extensions may match specified included files 4724 { 4725 auto possibleKinds = gstats.selFKinds.byExt[ext]; 4726 foreach (kind; possibleKinds) 4727 { 4728 auto nnKind = enforceNotNull(kind); 4729 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4730 if (hit) 4731 { 4732 hitKind = nnKind; 4733 kindHit = hit; 4734 break; 4735 } 4736 } 4737 } 4738 4739 if (!hitKind) // if no hit yet 4740 { 4741 // blindly try the rest 4742 foreach (kind; gstats.selFKinds.byIndex) 4743 { 4744 auto nnKind = enforceNotNull(kind); 4745 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4746 if (hit) 4747 { 4748 hitKind = nnKind; 4749 kindHit = hit; 4750 break; 4751 } 4752 } 4753 } 4754 4755 return kindHit; 4756 } 4757 4758 /** Search for Keys `keys` in Source `src`. 4759 */ 4760 size_t scanForKeys(Source, Keys)(Viz viz, 4761 NotNull!Dir topDir, 4762 NotNull!File theFile, 4763 NotNull!Dir parentDir, 4764 ref Symlink[] fromSymlinks, 4765 in Source src, 4766 in Keys keys, 4767 in bool[] bistHits = [], 4768 ScanContext ctx = ScanContext.standard) 4769 { 4770 bool anyFileHit = false; // will become true if any hit in this file 4771 4772 typeof(return) hitCount = 0; 4773 4774 import std.ascii: newline; 4775 4776 auto thisFace = stdFace; 4777 if (gstats.colorFlag) 4778 { 4779 if (ScanContext.fileName) 4780 { 4781 thisFace = fileFace; 4782 } 4783 } 4784 4785 size_t nL = 0; // line counter 4786 foreach (line; src.splitterASCIIAmong!(newline)) 4787 { 4788 auto rest = cast(string)line; // rest of line as a string 4789 4790 bool anyLineHit = false; // will become true if any hit on current line 4791 // Hit search loop 4792 while (!rest.empty) 4793 { 4794 // Find any key 4795 4796 /* TODO: Convert these to a range. */ 4797 ptrdiff_t offKB = -1; 4798 ptrdiff_t offKE = -1; 4799 4800 foreach (uint ix, key; keys) /+ TODO: Call variadic-find instead to speed things up. +/ 4801 { 4802 /* Bistogram Discardal */ 4803 if ((!bistHits.empty) && 4804 !bistHits[ix]) // if neither exact nor acronym match possible 4805 { 4806 continue; // try next key 4807 } 4808 4809 /* dbg("key:", key, " line:", line); */ 4810 ptrdiff_t[] acronymOffsets; 4811 if (gstats.keyAsAcronym) // acronym search 4812 { 4813 auto hit = (cast(immutable ubyte[])rest).findAcronymAt(key, 4814 gstats.keyAsSymbol ? FindContext.inSymbol : FindContext.inWord); 4815 if (!hit[0].empty) 4816 { 4817 acronymOffsets = hit[1]; 4818 offKB = hit[1][0]; 4819 offKE = hit[1][$-1] + 1; 4820 } 4821 } 4822 else 4823 { // normal search 4824 import std.string: indexOf; 4825 offKB = rest.indexOf(key, 4826 _caseFold ? CaseSensitive.no : CaseSensitive.yes); // hit begin offset 4827 offKE = offKB + key.length; // hit end offset 4828 } 4829 4830 if (offKB >= 0) // if hit 4831 { 4832 if (!gstats.showTree && ctx == ScanContext.fileName) 4833 { 4834 viz.pp(parentDir, dirSeparator); 4835 } 4836 4837 // Check Context 4838 if ((gstats.keyAsSymbol && !isSymbolASCII(rest, offKB, offKE)) || 4839 (gstats.keyAsWord && !isWordASCII (rest, offKB, offKE))) 4840 { 4841 rest = rest[offKE..$]; // move forward in line 4842 continue; 4843 } 4844 4845 if (ctx == ScanContext.fileContent && 4846 !anyLineHit) // if this is first hit 4847 { 4848 if (viz.form == VizForm.HTML) 4849 { 4850 if (!anyFileHit) 4851 { 4852 viz.pp(horizontalRuler, 4853 displayedFilename(gstats, theFile).asPath.asH!3); 4854 viz.ppTagOpen(`table`, `border=1`); 4855 anyFileHit = true; 4856 } 4857 } 4858 else 4859 { 4860 if (gstats.showTree) 4861 { 4862 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ "├" ~ "─ "); 4863 } 4864 else 4865 { 4866 foreach (fromSymlink; fromSymlinks) 4867 { 4868 viz.pp(fromSymlink, 4869 " modified ", 4870 faze(shortDurationString(_currTime - fromSymlink.timeLastModified), 4871 timeFace), 4872 " ago", 4873 " -> "); 4874 } 4875 // show file path/name 4876 viz.pp(displayedFilename(gstats, theFile).asPath); // show path 4877 } 4878 } 4879 4880 // show line:column 4881 if (viz.form == VizForm.HTML) 4882 { 4883 viz.ppTagOpen("tr"); 4884 viz.pp(to!string(nL+1).asCell, 4885 to!string(offKB+1).asCell); 4886 viz.ppTagOpen("td"); 4887 viz.ppTagOpen("code"); 4888 } 4889 else 4890 { 4891 viz.pp(faze(":" ~ to!string(nL+1) ~ ":" ~ to!string(offKB+1) ~ ":", 4892 contextFace)); 4893 } 4894 anyLineHit = true; 4895 } 4896 4897 // show content prefix 4898 viz.pp(faze(to!string(rest[0..offKB]), thisFace)); 4899 4900 // show hit part 4901 if (!acronymOffsets.empty) 4902 { 4903 foreach (aIndex, currOff; acronymOffsets) /+ TODO: Reuse std.algorithm: zip or lockstep? Or create a new kind say named conv. +/ 4904 { 4905 // context before 4906 if (aIndex >= 1) 4907 { 4908 immutable prevOff = acronymOffsets[aIndex-1]; 4909 if (prevOff + 1 < currOff) // at least one letter in between 4910 { 4911 viz.pp(asCtx(ix, to!string(rest[prevOff + 1 .. currOff]))); 4912 } 4913 } 4914 // hit letter 4915 viz.pp(asHit(ix, to!string(rest[currOff]))); 4916 } 4917 } 4918 else 4919 { 4920 viz.pp(asHit(ix, to!string(rest[offKB..offKE]))); 4921 } 4922 4923 rest = rest[offKE..$]; // move forward in line 4924 4925 hitCount++; // increase hit count 4926 parentDir.hitCount++; 4927 _hitsCountTotal++; 4928 4929 goto foundHit; 4930 } 4931 } 4932 foundHit: 4933 if (offKB == -1) { break; } 4934 } 4935 4936 // finalize line 4937 if (anyLineHit) 4938 { 4939 // show final context suffix 4940 viz.ppln(faze(rest, thisFace)); 4941 if (viz.form == VizForm.HTML) 4942 { 4943 viz.ppTagClose("code"); 4944 viz.ppTagClose("td"); 4945 viz.pplnTagClose("tr"); 4946 } 4947 } 4948 nL++; 4949 } 4950 4951 if (gstats.showLineCounts) 4952 { 4953 gstats.lineCountsByFile[theFile] = nL; 4954 } 4955 4956 if (anyFileHit) 4957 { 4958 viz.pplnTagClose("table"); 4959 } 4960 4961 // Previous solution 4962 // version (none) 4963 // { 4964 // ptrdiff_t offHit = 0; 4965 // foreach (ix, key; keys) 4966 // { 4967 // scope immutable hit1 = src.find(key); // single key hit 4968 // offHit = hit1.ptr - src.ptr; 4969 // if (!hit1.empty) 4970 // { 4971 // scope immutable src0 = src[0..offHit]; // src beforce hi 4972 // immutable rowHit = count(src0, newline); 4973 // immutable colHit = src0.retro.countUntil(newline); // count backwards till beginning of rowHit 4974 // immutable offBOL = offHit - colHit; 4975 // immutable cntEOL = src[offHit..$].countUntil(newline); // count forwards to end of rowHit 4976 // immutable offEOL = (cntEOL == -1 ? // if no hit 4977 // src.length : // end of file 4978 // offHit + cntEOL); // normal case 4979 // viz.pp(faze(asPath(gstats.useHTML, dent.name), pathFace)); 4980 // viz.ppln(":", rowHit + 1, 4981 // ":", colHit + 1, 4982 // ":", cast(string)src[offBOL..offEOL]); 4983 // } 4984 // } 4985 // } 4986 4987 // switch (keys.length) 4988 // { 4989 // default: 4990 // break; 4991 // case 0: 4992 // break; 4993 // case 1: 4994 // immutable hit1 = src.find(keys[0]); 4995 // if (!hit1.empty) 4996 // { 4997 // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit1.length); 4998 // } 4999 // break; 5000 // // case 2: 5001 // // immutable hit2 = src.find(keys[0], keys[1]); // find two keys 5002 // // if (!hit2[0].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit2[0].length); } 5003 // // if (!hit2[1].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit2[1].length); } 5004 // // break; 5005 // // case 3: 5006 // // immutable hit3 = src.find(keys[0], keys[1], keys[2]); // find two keys 5007 // // if (!hit3.empty) 5008 // { 5009 // // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit1.length); 5010 // // } 5011 // // break; 5012 // } 5013 return hitCount; 5014 } 5015 5016 /** Process Regular File `theRegFile`. */ 5017 void processRegFile(Viz viz, 5018 NotNull!Dir topDir, 5019 NotNull!RegFile theRegFile, 5020 NotNull!Dir parentDir, 5021 const string[] keys, 5022 ref Symlink[] fromSymlinks, 5023 size_t subIndex, 5024 GStats gstats) 5025 { 5026 scanRegFile(viz, 5027 topDir, 5028 theRegFile, 5029 parentDir, 5030 keys, 5031 fromSymlinks, 5032 subIndex); 5033 5034 // check for operations 5035 /+ TODO: Reuse isSelectedFKind instead of this +/ 5036 immutable ext = theRegFile.realExtension; 5037 if (ext in gstats.selFKinds.byExt) 5038 { 5039 auto matchingFKinds = gstats.selFKinds.byExt[ext]; 5040 foreach (kind; matchingFKinds) 5041 { 5042 const hit = kind.operations.find!(a => a[0] == gstats.fOp); 5043 if (!hit.empty) 5044 { 5045 const fOp = hit.front; 5046 const cmd = fOp[1]; // command string 5047 import std.process: spawnProcess; 5048 import std.algorithm: splitter; 5049 dbg("TODO: Performing operation ", to!string(cmd), 5050 " on ", theRegFile.path, 5051 " by calling it using ", cmd); 5052 auto pid = spawnProcess(cmd.splitterASCIIAmong!(" ").array ~ [theRegFile.path]); 5053 } 5054 } 5055 } 5056 } 5057 5058 /** Scan `elfFile` for ELF Symbols. */ 5059 void scanELFFile(Viz viz, 5060 NotNull!RegFile elfFile, 5061 const string[] keys, 5062 GStats gstats) 5063 { 5064 import nxt.elfdoc: sectionNameExplanations; 5065 /* TODO: Add mouse hovering help for sectionNameExplanations[section] */ 5066 dbg("before: ", elfFile); 5067 ELF decoder = ELF.fromFile(elfFile._mmfile); 5068 dbg("after: ", elfFile); 5069 5070 /* foreach (section; decoder.sections) */ 5071 /* { */ 5072 /* if (section.name.length) */ 5073 /* { */ 5074 /* /\* auto sst = section.StringTable; *\/ */ 5075 /* //writeln("ELF Section named ", section.name); */ 5076 /* } */ 5077 /* } */ 5078 5079 /* const sectionNames = [".symtab"/\* , ".strtab", ".dynsym" *\/]; /+ TODO: These two other sections causes range exceptions. */ +/ 5080 /* foreach (sectionName; sectionNames) */ 5081 /* { */ 5082 /* auto sts = decoder.getSection(sectionName); */ 5083 /* if (!sts.isNull) */ 5084 /* { */ 5085 /* SymbolTable symtab = SymbolTable(sts); */ 5086 /* /+ TODO: Use range: auto symbolsDemangled = symtab.symbols.map!(sym => demangler(sym.name).decodeSymbol); */ +/ 5087 /* foreach (sym; symtab.symbols) // you can add filters here */ 5088 /* { */ 5089 /* if (gstats.demangleELF) */ 5090 /* { */ 5091 /* const hit = demangler(sym.name).decodeSymbol; */ 5092 /* } */ 5093 /* else */ 5094 /* { */ 5095 /* writeln("?: ", sym.name); */ 5096 /* } */ 5097 /* } */ 5098 /* } */ 5099 /* } */ 5100 5101 auto sst = decoder.getSymbolsStringTable; 5102 if (!sst.isNull) 5103 { 5104 import nxt.algorithm_ex: findFirstOfAnyInOrder; 5105 import std.range : tee; 5106 5107 auto scan = (sst.strings 5108 .filter!(raw => !raw.empty) // skip empty raw string 5109 .tee!(raw => gstats.elfFilesBySymbol[raw.idup] ~= elfFile) // WARNING: needs raw.idup here because we can't rever to raw 5110 .map!(raw => demangler(raw).decodeSymbol) 5111 .filter!(demangling => (!keys.empty && // don't show anything if no keys given 5112 demangling.unmangled.findFirstOfAnyInOrder(keys)[1]))); // I love D :) 5113 5114 if (!scan.empty && 5115 `ELF` in gstats.selFKinds.byName) // if user selected ELF file show them 5116 { 5117 viz.pp(horizontalRuler, 5118 displayedFilename(gstats, elfFile).asPath.asH!3, 5119 asH!4(`ELF Symbol Strings Table (`, `.strtab`.asCode, `)`), 5120 scan.asTable); 5121 } 5122 } 5123 } 5124 5125 /** Search for Keys `keys` in Regular File `theRegFile`. */ 5126 void scanRegFile(Viz viz, 5127 NotNull!Dir topDir, 5128 NotNull!RegFile theRegFile, 5129 NotNull!Dir parentDir, 5130 const string[] keys, 5131 ref Symlink[] fromSymlinks, 5132 size_t subIndex) 5133 { 5134 results.noBytesTotal += theRegFile.size; 5135 results.noBytesTotalContents += theRegFile.size; 5136 5137 // Scan name 5138 if ((gstats.scanContext == ScanContext.all || 5139 gstats.scanContext == ScanContext.fileName || 5140 gstats.scanContext == ScanContext.regularFilename) && 5141 !keys.empty) 5142 { 5143 immutable hitCountInName = scanForKeys(viz, 5144 topDir, cast(NotNull!File)theRegFile, parentDir, 5145 fromSymlinks, 5146 theRegFile.name, keys, [], ScanContext.fileName); 5147 } 5148 5149 // Scan Contents 5150 if ((gstats.scanContext == ScanContext.all || 5151 gstats.scanContext == ScanContext.fileContent) && 5152 (gstats.showFileContentDups || 5153 gstats.showELFSymbolDups || 5154 !keys.empty) && 5155 theRegFile.size != 0) // non-empty file 5156 { 5157 // immutable upTo = size_t.max; 5158 5159 /+ TODO: Flag for readText +/ 5160 try 5161 { 5162 ++gstats.noScannedRegFiles; 5163 ++gstats.noScannedFiles; 5164 5165 // ELF Symbols 5166 if (gstats.showELFSymbolDups && 5167 theRegFile.ofKind(`ELF`, gstats.collectTypeHits, gstats.allFKinds)) 5168 { 5169 scanELFFile(viz, theRegFile, keys, gstats); 5170 } 5171 5172 // Check included kinds first because they are fast. 5173 KindHit incKindHit = isSelectedFKind(theRegFile); 5174 if (!gstats.selFKinds.byIndex.empty && /+ TODO: Do we really need this one? +/ 5175 !incKindHit) 5176 { 5177 return; 5178 } 5179 5180 // Super-Fast Key-File Bistogram Discardal. TODO: Trim scale factor to optimal value. 5181 enum minFileSize = 256; // minimum size of file for discardal. 5182 immutable bool doBist = theRegFile.size > minFileSize; 5183 immutable bool doNGram = (gstats.useNGrams && 5184 (!gstats.keyAsSymbol) && 5185 theRegFile.size > minFileSize); 5186 immutable bool doBitStatus = true; 5187 5188 // Chunked Calculation of CStat in one pass. TODO: call async. 5189 theRegFile.calculateCStatInChunks(gstats.filesByContentId, 5190 _scanChunkSize, 5191 gstats.showFileContentDups, 5192 doBist, 5193 doBitStatus); 5194 5195 // Match Bist of Keys with BistX of File 5196 bool[] bistHits; 5197 bool noBistMatch = false; 5198 if (doBist) 5199 { 5200 const theHist = theRegFile.bistogram8; 5201 auto hitsHist = keysBists.map!(a => 5202 ((a.value & theHist.value) == 5203 a.value)); /+ TODO: Functionize to x.subsetOf(y) or reuse std.algorithm: setDifference or similar +/ 5204 bistHits = hitsHist.map!`a == true`.array; 5205 noBistMatch = hitsHist.all!`a == false`; 5206 } 5207 /* int kix = 0; */ 5208 /* foreach (hit; bistHits) { if (!hit) { debug dbg(`Assert key ` ~ keys[kix] ~ ` not in file ` ~ theRegFile.path); } ++kix; } */ 5209 5210 bool allXGramsMiss = false; 5211 if (doNGram) 5212 { 5213 ulong keysXGramUnionMatch = keysXGramsUnion.matchDenser(theRegFile.xgram); 5214 debug dbg(theRegFile.path, 5215 ` sized `, theRegFile.size, ` : `, 5216 keysXGramsUnion.length, `, `, 5217 theRegFile.xgram.length, 5218 ` gave match:`, keysXGramUnionMatch); 5219 allXGramsMiss = keysXGramUnionMatch == 0; 5220 } 5221 5222 auto binHit = theRegFile.ofAnyKindIn(gstats.binFKinds, 5223 gstats.collectTypeHits); 5224 const binKindHit = binHit[0]; 5225 if (binKindHit) 5226 { 5227 import nxt.numerals: toOrdinal; 5228 const nnKind = binHit[1].enforceNotNull; 5229 const kindIndex = binHit[2]; 5230 if (_showSkipped) 5231 { 5232 if (gstats.showTree) 5233 { 5234 immutable intro = subIndex == parentDir.subs.length - 1 ? `└` : `├`; 5235 viz.pp(`│ `.repeat(parentDir.depth + 1).join(``) ~ intro ~ `─ `); 5236 } 5237 viz.ppln(theRegFile, `: Skipped `, nnKind, ` file at `, 5238 toOrdinal(kindIndex + 1), ` blind try`); 5239 } 5240 final switch (binKindHit) 5241 { 5242 case KindHit.none: 5243 break; 5244 case KindHit.cached: 5245 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5246 ` using cached KindId`); 5247 break; 5248 case KindHit.uncached: 5249 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5250 ` at ` ~ toOrdinal(kindIndex + 1) ~ ` extension try`); 5251 break; 5252 } 5253 } 5254 5255 if (binKindHit != KindHit.none || 5256 noBistMatch || 5257 allXGramsMiss) // or no hits possible. TODO: Maybe more efficient to do histogram discardal first 5258 { 5259 results.noBytesSkipped += theRegFile.size; 5260 } 5261 else 5262 { 5263 // Search if not Binary 5264 5265 // If Source file is ok 5266 auto src = theRegFile.readOnlyContents[]; 5267 5268 results.noBytesScanned += theRegFile.size; 5269 5270 if (keys) 5271 { 5272 // Fast discardal of files with no match 5273 bool fastOk = true; 5274 if (!_caseFold) { // if no relaxation of search 5275 if (gstats.keyAsAcronym) // if no relaxation of search 5276 { 5277 /* TODO: Reuse findAcronym in algorith_ex. */ 5278 } 5279 else // if no relaxation of search 5280 { 5281 switch (keys.length) 5282 { 5283 default: break; 5284 case 1: immutable hit1 = src.find(keys[0]); fastOk = !hit1.empty; break; 5285 // case 2: immutable hit2 = src.find(keys[0], keys[1]); fastOk = !hit2[0].empty; break; 5286 // case 3: immutable hit3 = src.find(keys[0], keys[1], keys[2]); fastOk = !hit3[0].empty; break; 5287 // case 4: immutable hit4 = src.find(keys[0], keys[1], keys[2], keys[3]); fastOk = !hit4[0].empty; break; 5288 // case 5: immutable hit5 = src.find(keys[0], keys[1], keys[2], keys[3], keys[4]); fastOk = !hit5[0].empty; break; 5289 } 5290 } 5291 } 5292 5293 /+ TODO: Continue search from hit1, hit2 etc. +/ 5294 5295 if (fastOk) 5296 { 5297 foreach (tag; addTags) gstats.ftags.addTag(theRegFile, tag); 5298 foreach (tag; removeTags) gstats.ftags.removeTag(theRegFile, tag); 5299 5300 if (theRegFile.size >= 8192) 5301 { 5302 /* if (theRegFile.xgram == null) { */ 5303 /* theRegFile.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 5304 /* } */ 5305 /* (*theRegFile.xgram).put(src); */ 5306 /* theRegFile.xgram.put(src); */ 5307 /* foreach (lix, ub0; line) { // for each ubyte in line */ 5308 /* if (lix + 1 < line.length) { */ 5309 /* immutable ub1 = line[lix + 1]; */ 5310 /* immutable dix = (cast(ushort)ub0 | */ 5311 /* cast(ushort)ub1*256); */ 5312 /* (*theRegFile.xgram)[dix] = true; */ 5313 /* } */ 5314 /* } */ 5315 auto shallowDenseness = theRegFile.bistogram8.denseness; 5316 auto deepDenseness = theRegFile.xgramDeepDenseness; 5317 // assert(deepDenseness >= 1); 5318 gstats.shallowDensenessSum += shallowDenseness; 5319 gstats.deepDensenessSum += deepDenseness; 5320 ++gstats.densenessCount; 5321 /* dbg(theRegFile.path, `:`, theRegFile.size, */ 5322 /* `, length:`, theRegFile.xgram.length, */ 5323 /* `, deepDenseness:`, deepDenseness); */ 5324 } 5325 5326 theRegFile._cstat.hitCount = scanForKeys(viz, 5327 topDir, cast(NotNull!File)theRegFile, parentDir, 5328 fromSymlinks, 5329 src, keys, bistHits, 5330 ScanContext.fileContent); 5331 } 5332 } 5333 } 5334 5335 } 5336 catch (FileException) 5337 { 5338 handleError(viz, theRegFile, false, subIndex); 5339 } 5340 catch (ErrnoException) 5341 { 5342 handleError(viz, theRegFile, false, subIndex); 5343 } 5344 theRegFile.freeContents; /+ TODO: Call lazily only when open count is too large +/ 5345 } 5346 } 5347 5348 /** Scan Symlink `symlink` at `parentDir` for `keys` 5349 Put results in `results`. */ 5350 void scanSymlink(Viz viz, 5351 NotNull!Dir topDir, 5352 NotNull!Symlink theSymlink, 5353 NotNull!Dir parentDir, 5354 const string[] keys, 5355 ref Symlink[] fromSymlinks) 5356 { 5357 // check for symlink cycles 5358 if (!fromSymlinks.find(theSymlink).empty) 5359 { 5360 if (gstats.showSymlinkCycles) 5361 { 5362 import std.range: back; 5363 viz.ppln(`Cycle of symbolic links: `, 5364 fromSymlinks.asPath, 5365 ` -> `, 5366 fromSymlinks.back.target); 5367 } 5368 return; 5369 } 5370 5371 // Scan name 5372 if ((gstats.scanContext == ScanContext.all || 5373 gstats.scanContext == ScanContext.fileName || 5374 gstats.scanContext == ScanContext.symlinkName) && 5375 !keys.empty) 5376 { 5377 scanForKeys(viz, 5378 topDir, cast(NotNull!File)theSymlink, enforceNotNull(theSymlink.parent), 5379 fromSymlinks, 5380 theSymlink.name, keys, [], ScanContext.fileName); 5381 } 5382 5383 // try { 5384 // results.noBytesTotal += dent.size; 5385 // } catch (Exception) 5386 // { 5387 // dbg(`Couldn't get size of `, dir.name); 5388 // } 5389 if (gstats.followSymlinks == SymlinkFollowContext.none) { return; } 5390 5391 import std.range: popBackN; 5392 fromSymlinks ~= theSymlink; 5393 immutable targetPath = theSymlink.absoluteNormalizedTargetPath; 5394 if (targetPath.exists) 5395 { 5396 theSymlink._targetStatus = SymlinkTargetStatus.present; 5397 if (_topDirNames.all!(a => !targetPath.startsWith(a))) { // if target path lies outside of all rootdirs 5398 auto targetDent = DirEntry(targetPath); 5399 auto targetFile = getFile(enforceNotNull(gstats.rootDir), targetPath, targetDent.isDir); 5400 5401 if (gstats.showTree) 5402 { 5403 viz.ppln(`│ `.repeat(parentDir.depth + 1).join(``) ~ `├` ~ `─ `, 5404 theSymlink, 5405 ` modified `, 5406 faze(shortDurationString(_currTime - theSymlink.timeLastModified), 5407 timeFace), 5408 ` ago`, ` -> `, 5409 targetFile.asPath, 5410 faze(` outside of ` ~ (_topDirNames.length == 1 ? `tree ` : `all trees `), 5411 infoFace), 5412 gstats.topDirs.asPath, 5413 faze(` is followed`, infoFace)); 5414 } 5415 5416 ++gstats.noScannedSymlinks; 5417 ++gstats.noScannedFiles; 5418 5419 if (auto targetRegFile = cast(RegFile)targetFile) 5420 { 5421 processRegFile(viz, topDir, assumeNotNull(targetRegFile), parentDir, keys, fromSymlinks, 0, gstats); 5422 } 5423 else if (auto targetDir = cast(Dir)targetFile) 5424 { 5425 scanDir(viz, topDir, assumeNotNull(targetDir), keys, fromSymlinks); 5426 } 5427 else if (auto targetSymlink = cast(Symlink)targetFile) // target is a Symlink 5428 { 5429 scanSymlink(viz, topDir, 5430 assumeNotNull(targetSymlink), 5431 enforceNotNull(targetSymlink.parent), 5432 keys, fromSymlinks); 5433 } 5434 } 5435 } 5436 else 5437 { 5438 theSymlink._targetStatus = SymlinkTargetStatus.broken; 5439 5440 if (gstats.showBrokenSymlinks) 5441 { 5442 _brokenSymlinks ~= theSymlink; 5443 5444 foreach (ix, fromSymlink; fromSymlinks) 5445 { 5446 if (gstats.showTree && ix == 0) 5447 { 5448 immutable intro = `├`; 5449 viz.pp(`│ `.repeat(theSymlink.parent.depth + 1).join(``) ~ intro ~ `─ `, 5450 theSymlink); 5451 } 5452 else 5453 { 5454 viz.pp(fromSymlink); 5455 } 5456 viz.pp(` -> `); 5457 } 5458 5459 viz.ppln(faze(theSymlink.target, missingSymlinkTargetFace), 5460 faze(` is missing`, warnFace)); 5461 } 5462 } 5463 fromSymlinks.popBackN(1); 5464 } 5465 5466 /** Scan Directory `parentDir` for `keys`. */ 5467 void scanDir(Viz viz, 5468 NotNull!Dir topDir, 5469 NotNull!Dir theDir, 5470 const string[] keys, 5471 Symlink[] fromSymlinks = [], 5472 int maxDepth = -1) 5473 { 5474 if (theDir.isRoot) { results.reset; } 5475 5476 // scan in directory name 5477 if ((gstats.scanContext == ScanContext.all || 5478 gstats.scanContext == ScanContext.fileName || 5479 gstats.scanContext == ScanContext.dirName) && 5480 !keys.empty) 5481 { 5482 scanForKeys(viz, 5483 topDir, 5484 cast(NotNull!File)theDir, 5485 enforceNotNull(theDir.parent), 5486 fromSymlinks, 5487 theDir.name, keys, [], ScanContext.fileName); 5488 } 5489 5490 try 5491 { 5492 size_t subIndex = 0; 5493 if (gstats.showTree) 5494 { 5495 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5496 5497 viz.pp(`│ `.repeat(theDir.depth).join(``) ~ intro ~ 5498 `─ `, theDir, ` modified `, 5499 faze(shortDurationString(_currTime - 5500 theDir.timeLastModified), 5501 timeFace), 5502 ` ago`); 5503 5504 if (gstats.showUsage) 5505 { 5506 viz.pp(` of Tree-Size `, theDir.treeSize); 5507 } 5508 5509 if (gstats.showSHA1) 5510 { 5511 viz.pp(` with Tree-Content-Id `, theDir.treeContentId); 5512 } 5513 viz.ppendl; 5514 } 5515 5516 ++gstats.noScannedDirs; 5517 ++gstats.noScannedFiles; 5518 5519 auto subsSorted = theDir.subsSorted(gstats.subsSorting); 5520 foreach (key, sub; subsSorted) 5521 { 5522 /* TODO: Functionize to scanFile */ 5523 if (auto regFile = cast(RegFile)sub) 5524 { 5525 processRegFile(viz, topDir, assumeNotNull(regFile), theDir, keys, fromSymlinks, subIndex, gstats); 5526 } 5527 else if (auto subDir = cast(Dir)sub) 5528 { 5529 if (maxDepth == -1 || // if either all levels or 5530 maxDepth >= 1) { // levels left 5531 if (sub.name in gstats.skippedDirKindsMap) // if sub should be skipped 5532 { 5533 if (_showSkipped) 5534 { 5535 if (gstats.showTree) 5536 { 5537 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5538 viz.pp(`│ `.repeat(theDir.depth + 1).join(``) ~ intro ~ `─ `); 5539 } 5540 5541 viz.pp(subDir, 5542 ` modified `, 5543 faze(shortDurationString(_currTime - 5544 subDir.timeLastModified), 5545 timeFace), 5546 ` ago`, 5547 faze(`: Skipped Directory of type `, infoFace), 5548 gstats.skippedDirKindsMap[sub.name].kindName); 5549 } 5550 } 5551 else 5552 { 5553 scanDir(viz, topDir, 5554 assumeNotNull(subDir), 5555 keys, 5556 fromSymlinks, 5557 maxDepth >= 0 ? --maxDepth : maxDepth); 5558 } 5559 } 5560 } 5561 else if (auto subSymlink = cast(Symlink)sub) 5562 { 5563 scanSymlink(viz, topDir, assumeNotNull(subSymlink), theDir, keys, fromSymlinks); 5564 } 5565 else 5566 { 5567 if (gstats.showTree) { viz.ppendl; } 5568 } 5569 ++subIndex; 5570 5571 if (ctrlC) 5572 { 5573 viz.ppln(`Ctrl-C pressed: Aborting scan of `, theDir); 5574 break; 5575 } 5576 } 5577 5578 if (gstats.showTreeContentDups) 5579 { 5580 theDir.treeContentId; // better to put this after file scan for now 5581 } 5582 } 5583 catch (FileException) 5584 { 5585 handleError(viz, theDir, true, 0); 5586 } 5587 } 5588 5589 /** Filter out `files` that lie under any of the directories `dirPaths`. */ 5590 F[] filterUnderAnyOfPaths(F)(F[] files, 5591 string[] dirPaths) 5592 { 5593 import std.algorithm: any; 5594 import std.array: array; 5595 auto dupFilesUnderAnyTopDirName = (files 5596 .filter!(dupFile => 5597 dirPaths.any!(dirPath => 5598 dupFile.path.startsWith(dirPath))) 5599 .array // evaluate to array to get .length below 5600 ); 5601 F[] hits; 5602 final switch (gstats.duplicatesContext) 5603 { 5604 case DuplicatesContext.internal: 5605 if (dupFilesUnderAnyTopDirName.length >= 2) 5606 hits = dupFilesUnderAnyTopDirName; 5607 break; 5608 case DuplicatesContext.external: 5609 if (dupFilesUnderAnyTopDirName.length >= 1) 5610 hits = files; 5611 break; 5612 } 5613 return hits; 5614 } 5615 5616 /** Show Statistics. */ 5617 void showContentDups(Viz viz) 5618 { 5619 import std.meta : AliasSeq; 5620 foreach (ix, kind; AliasSeq!(RegFile, Dir)) 5621 { 5622 immutable typeName = ix == 0 ? `Regular File` : `Directory Tree`; 5623 viz.pp((typeName ~ ` Content Duplicates`).asH!2); 5624 foreach (digest, dupFiles; gstats.filesByContentId) 5625 { 5626 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5627 if (dupFilesOk.length >= 2) // non-empty file/directory 5628 { 5629 auto firstDup = cast(kind)dupFilesOk[0]; 5630 if (firstDup) 5631 { 5632 static if (is(kind == RegFile)) 5633 { 5634 if (firstDup._cstat.kindId) 5635 { 5636 if (firstDup._cstat.kindId in gstats.allFKinds.byId) 5637 { 5638 viz.pp(asH!3(gstats.allFKinds.byId[firstDup._cstat.kindId], 5639 ` files sharing digest `, digest, ` of size `, firstDup.treeSize)); 5640 } 5641 else 5642 { 5643 dbg(firstDup.path ~ ` kind Id ` ~ to!string(firstDup._cstat.kindId) ~ 5644 ` could not be found in allFKinds.byId`); 5645 } 5646 } 5647 viz.pp(asH!3((firstDup._cstat.bitStatus == BitStatus.bits7) ? `ASCII File` : typeName, 5648 `s sharing digest `, digest, ` of size `, firstDup.treeSize)); 5649 } 5650 else 5651 { 5652 viz.pp(asH!3(typeName, `s sharing digest `, digest, ` of size `, firstDup.size)); 5653 } 5654 5655 viz.pp(asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5656 } 5657 } 5658 } 5659 } 5660 } 5661 5662 /** Show Statistics. */ 5663 void showStats(Viz viz) 5664 { 5665 /* Duplicates */ 5666 5667 if (gstats.showNameDups) 5668 { 5669 viz.pp(`Name Duplicates`.asH!2); 5670 foreach (digest, dupFiles; gstats.filesByName) 5671 { 5672 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5673 if (!dupFilesOk.empty) 5674 { 5675 viz.pp(asH!3(`Files with same name `, 5676 faze(dupFilesOk[0].name, fileFace)), 5677 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5678 } 5679 } 5680 } 5681 5682 if (gstats.showLinkDups) 5683 { 5684 viz.pp(`Inode Duplicates (Hardlinks)`.asH!2); 5685 foreach (inode, dupFiles; gstats.filesByInode) 5686 { 5687 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5688 if (dupFilesOk.length >= 2) 5689 { 5690 viz.pp(asH!3(`Files with same inode ` ~ to!string(inode) ~ 5691 ` (hardlinks): `), 5692 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5693 } 5694 } 5695 } 5696 5697 if (gstats.showFileContentDups) 5698 { 5699 showContentDups(viz); 5700 } 5701 5702 if (gstats.showELFSymbolDups && 5703 !keys.empty) // don't show anything if no keys where given 5704 { 5705 viz.pp(`ELF Symbol Duplicates`.asH!2); 5706 foreach (raw, dupFiles; gstats.elfFilesBySymbol) 5707 { 5708 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5709 if (dupFilesOk.length >= 2) 5710 { 5711 const demangling = demangler(raw).decodeSymbol; 5712 if (demangling.unmangled.findFirstOfAnyInOrder(keys)[1]) 5713 { 5714 viz.pp(asH!3(`ELF Files with same symbol ` ~ to!string(raw)), 5715 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5716 } 5717 } 5718 } 5719 } 5720 5721 /* Broken Symlinks */ 5722 if (gstats.showBrokenSymlinks && 5723 !_brokenSymlinks.empty) 5724 { 5725 viz.pp(`Broken Symlinks `.asH!2, 5726 asUList(_brokenSymlinks.map!(x => x.asPath.asItem))); 5727 } 5728 5729 /* Counts */ 5730 viz.pp(`Scanned Types`.asH!2, 5731 /* asUList(asItem(gstats.noScannedDirs, ` Dirs, `), */ 5732 /* asItem(gstats.noScannedRegFiles, ` Regular Files, `), */ 5733 /* asItem(gstats.noScannedSymlinks, ` Symbolic Links, `), */ 5734 /* asItem(gstats.noScannedSpecialFiles, ` Special Files, `), */ 5735 /* asItem(`totalling `, gstats.noScannedFiles, ` Files`) // on extra because of lack of root */ 5736 /* ) */ 5737 asTable(asRow(asCell(asBold(`Scan Count`)), 5738 asCell(asBold(`File Type`))), 5739 asRow(asCell(gstats.noScannedDirs), 5740 asCell(asItalic(`Dirs`))), 5741 asRow(asCell(gstats.noScannedRegFiles), 5742 asCell(asItalic(`Regular Files`))), 5743 asRow(asCell(gstats.noScannedSymlinks), 5744 asCell(asItalic(`Symbolic Links`))), 5745 asRow(asCell(gstats.noScannedSpecialFiles), 5746 asCell(asItalic(`Special Files`))), 5747 asRow(asCell(gstats.noScannedFiles), 5748 asCell(asItalic(`Files`))) 5749 ) 5750 ); 5751 5752 if (gstats.densenessCount) 5753 { 5754 viz.pp(`Histograms`.asH!2, 5755 asUList(asItem(`Average Byte Bistogram (Binary Histogram) Denseness `, 5756 cast(real)(100*gstats.shallowDensenessSum / gstats.densenessCount), ` Percent`), 5757 asItem(`Average Byte `, NGramOrder, `-Gram Denseness `, 5758 cast(real)(100*gstats.deepDensenessSum / gstats.densenessCount), ` Percent`))); 5759 } 5760 5761 viz.pp(`Scanned Bytes`.asH!2, 5762 asUList(asItem(`Scanned `, results.noBytesScanned), 5763 asItem(`Skipped `, results.noBytesSkipped), 5764 asItem(`Unreadable `, results.noBytesUnreadable), 5765 asItem(`Total Contents `, results.noBytesTotalContents), 5766 asItem(`Total `, results.noBytesTotal), 5767 asItem(`Total number of hits `, results.numTotalHits), 5768 asItem(`Number of Files with hits `, results.numFilesWithHits))); 5769 5770 viz.pp(`Some Math`.asH!2); 5771 5772 { 5773 struct Stat 5774 { 5775 particle2f particle; 5776 point2r point; 5777 vec2r velocity; 5778 vec2r acceleration; 5779 mat2 rotation; 5780 Rational!uint ratInt; 5781 Vector!(Rational!int, 4) ratIntVec; 5782 Vector!(float, 2, true) normFloatVec2; 5783 Vector!(float, 3, true) normFloatVec3; 5784 Point!(Rational!int, 4) ratIntPoint; 5785 } 5786 5787 /* Vector!(Complex!float, 4) complexVec; */ 5788 5789 viz.ppln(`A number: `, 1.2e10); 5790 viz.ppln(`Randomize particle2f as TableNr0: `, randomInstanceOf!particle2f.asTableNr0); 5791 5792 alias Stats3 = Stat[3]; 5793 auto stats = new Stat[3]; 5794 randomize(stats); 5795 viz.ppln(`A ` ~ typeof(stats).stringof, `: `, stats.randomize.asTable); 5796 5797 { 5798 auto x = randomInstanceOf!Stats3; 5799 foreach (ref e; x) 5800 { 5801 e.velocity *= 1e9; 5802 } 5803 viz.ppln(`Some Stats: `, 5804 x.asTable); 5805 } 5806 } 5807 5808 5809 } 5810 } 5811 5812 void scanner(string[] args) 5813 { 5814 // Register the SIGINT signal with the signalHandler function call: 5815 version (linux) 5816 { 5817 signal(SIGABRT, &signalHandler); 5818 signal(SIGTERM, &signalHandler); 5819 signal(SIGQUIT, &signalHandler); 5820 signal(SIGINT, &signalHandler); 5821 } 5822 5823 5824 auto term = Terminal(ConsoleOutputType.linear); 5825 auto scanner = new Scanner!Terminal(args, term); 5826 }