1 /** 2 File Scanning Engine. 3 4 Make rich use of Sparse Distributed Representations (SDR) using Hash Digests 5 for relating Data and its Relations/Properties/Meta-Data. 6 7 See_Also: http://stackoverflow.com/questions/12629749/how-does-grep-run-so-fast 8 See_Also: http:www.regular-expressions.info/powergrep.html 9 See_Also: http://ridiculousfish.com/blog/posts/old-age-and-treachery.html 10 See_Also: http://www.olark.com/spw/2011/08/you-can-list-a-directory-with-8-million-files-but-not-with-ls/ 11 12 TODO: Make use parallelism_ex: pmap 13 14 TODO: Call filterUnderAnyOfPaths using std.algorithm.filter directly on AAs. Use byPair or use AA.get(key, defaultValue) 15 See_Also: http://forum.dlang.org/thread/mailman.75.1392335793.6445.digitalmars-d-learn@puremagic.com 16 See_Also: https://github.com/D-Programming-Language/druntime/pull/574 17 18 TODO: Count logical lines. 19 TODO: Lexers should be loosely coupled to FKinds instead of Files 20 TODO: Generic Token[] and specific CToken[], CxxToken[] 21 22 TODO: Don't scan for duplicates inside vc-dirs by default 23 24 TODO: Assert that files along duplicates path don't include symlinks 25 26 TODO: Implement FOp.deduplicate 27 TODO: Prevent rescans of duplicates 28 29 TODO: Defined generalized_specialized_two_way_relationship(kindD, kindDi) 30 31 TODO: Visualize hits using existingFileHitContext.asH!1 followed by a table: 32 ROW_NR | hit string in <code lang=LANG></code> 33 34 TODO: Parse and Sort GCC/Clang Compiler Messages on WARN_TYPE FILE:LINE:COL:MSG[WARN_TYPE] and use Collapsable HTML Widgets: 35 http://api.jquerymobile.com/collapsible/ 36 when presenting them 37 38 TODO: Maybe make use of https://github.com/Abscissa/scriptlike 39 40 TODO: Calculate Tree grams and bist 41 42 TODO: Get stats of the link itself not the target in SymLink constructors 43 44 TODO: RegFile with FileContent.text should be decodable to Unicode using 45 either iso-latin1, utf-8, etc. Check std.uni for how to try and decode stuff. 46 47 TODO: Search for subwords. 48 For example gtk_widget should also match widget_gtk and GtkWidget etc. 49 50 TODO: Support multi-line keys 51 52 TODO: Use hash-lookup in txtFKinds.byExt for faster guessing of source file 53 kind. Merge it with binary kind lookup. And check FileContent member of 54 kind to instead determine if it should be scanned or not. 55 Sub-Task: Case-Insensitive Matching of extensions if 56 nothing else passes. 57 58 TODO: Detect symlinks with duplicate targets and only follow one of them and 59 group them together in visualization 60 61 TODO: Add addTag, removeTag, etc and interface to fs.d for setting tags: 62 --add-tag=comedy, remove-tag=comedy 63 64 TODO: If files ends with ~ or .backup assume its a backup file, strip it from 65 end match it again and set backupFlag in FileKind 66 67 TODO: Acronym match can make use of normal histogram counts. Check denseness 68 of binary histogram (bist) to determine if we should use a sparse or dense 69 histogram. 70 71 TODO: Activate and test support for ELF and Cxx11 subkinds 72 73 TODO: Call either File.checkObseleted upon inotify. checkObseleted should remove stuff from hash tables 74 TODO: Integrate logic in clearCStat to RegFile.makeObselete 75 TODO: Upon Dir inotify call invalidate _depth, etc. 76 77 TODO: Following command: fs.d --color -d ~/ware/emacs -s lispy -k 78 shows "Skipped PNG file (png) at first extension try". 79 Assure that this logic reuses cache and instead prints something like "Skipped PNG file using cached FKind". 80 81 TODO: Cache each Dir separately to a file named after SHA1 of its path 82 83 TODO: Add ASCII kind: Requires optional stream analyzer member of FKind in 84 replacement for magicData. ASCIIFile 85 86 TODO: Defined NotAnyKind(binaryKinds) and cache it 87 88 TODO: Create PkZipFile() in Dir.load() when FKind "pkZip Archive" is found. 89 Use std.zip.ZipArchive(void[] from mmfile) 90 91 TODO: Scan Subversion Dirs with http://pastebin.com/6ZzPvpBj 92 93 TODO: Change order (binHit || allBHist8Miss) and benchmark 94 95 TODO: Display modification/access times as: 96 See: http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 97 98 TODO: Use User Defined Attributes (UDA): http://forum.dlang.org/thread/k7afq6$2832$1@digitalmars.com 99 TODO: Use msgPack @nonPacked when needed 100 101 TODO: Limit lines to terminal width 102 103 TODO: Create array of (OFFSET, LENGTH) and this in FKind Pattern factory 104 function. Then for source file extra slice at (OFFSET, LENGTH) and use as 105 input into hash-table from magic (if its a Lit-pattern to) 106 107 TODO: Verify that "f.tar.z" gets tuple extensions tuple("tar", "z") 108 TODO: Verify that "libc.so.1.2.3" gets tuple extensions tuple("so", "1", "2", "3") and "so" extensions should the be tried 109 TODO: Cache Symbols larger than three characters in a global hash from symbol to path 110 111 TODO: Benchmark horspool.d and perhaps use instead of std.find 112 113 TODO: Splitting into keys should not split arguments such as "a b" 114 115 TODO: Perhaps use http://www.chartjs.org/ to visualize stuff 116 117 TODO: Make use of @nonPacked in version(msgpack). 118 */ 119 module nxt.fs; 120 121 version = msgpack; // Use msgpack serialization 122 /* version = cerealed; // Use cerealed serialization */ 123 124 import std.stdio: ioFile = File, stdout; 125 import std.typecons: Tuple, tuple; 126 import std.algorithm: find, map, filter, reduce, max, min, uniq, all, joiner; 127 import std.string: representation, chompPrefix; 128 import std.stdio: write, writeln, writefln; 129 import std.path: baseName, dirName, isAbsolute, dirSeparator, extension, buildNormalizedPath, expandTilde, absolutePath; 130 import std.datetime; 131 import std.file: FileException; 132 import std.digest.sha: sha1Of, toHexString; 133 import std.range: repeat, array, empty, cycle, chain; 134 import std.stdint: uint64_t; 135 import std.traits: Unqual, isInstanceOf, isIterable; 136 import std.experimental.allocator; 137 import std.functional: memoize; 138 import std.complex: Complex; 139 140 import nxt.predicates: isUntouched; 141 142 import core.memory: GC; 143 import core.exception; 144 145 import nxt.traits_ex; 146 import nxt.getopt_ex; 147 import nxt.digest_ex; 148 import nxt.algorithm_ex; 149 import nxt.codec; 150 import nxt.csunits; 151 alias Bytes64 = Bytes!ulong; 152 import arsd.terminal : Color; 153 import nxt.symbolic; 154 import nxt.static_bitarray; 155 import nxt.dbgio; 156 import nxt.tempfs; 157 import nxt.rational: Rational; 158 import nxt.ngram; 159 import nxt.notnull; 160 import nxt.pretty; 161 162 import nxt.geometry; 163 import nxt.random_ex; 164 import nxt.mathml; 165 import nxt.mangling; 166 import nxt.lingua; 167 import nxt.attributes; 168 import nxt.find_ex; 169 170 import nxt.elf; 171 import nxt.typedoc; 172 import lock_free.rwqueue; 173 174 /* NGram Aliases */ 175 /** Not very likely that we are interested in histograms 64-bit precision 176 * Bucket/Bin Counts so pick 32-bit for now. */ 177 alias RequestedBinType = uint; 178 enum NGramOrder = 3; 179 alias Bist = NGram!(ubyte, 1, ngram.Kind.binary, ngram.Storage.denseStatic, ngram.Symmetry.ordered, void, immutable(ubyte)[]); 180 alias XGram = NGram!(ubyte, NGramOrder, ngram.Kind.saturated, ngram.Storage.sparse, ngram.Symmetry.ordered, RequestedBinType, immutable(ubyte)[]); 181 182 /* Need for signal handling */ 183 import core.stdc.stdlib; 184 version(linux) import core.sys.posix.sys.stat; 185 version(linux) import core.sys.posix.signal; 186 //version(linux) import std.c.linux.linux; 187 188 /* TODO: Set global state. 189 http://forum.dlang.org/thread/cu9fgg$28mr$1@digitaldaemon.com 190 */ 191 /** Exception Describing Process Signal. */ 192 193 shared uint ctrlC = 0; // Number of times Ctrl-C has been presed 194 class SignalCaughtException : Exception 195 { 196 int signo = int.max; 197 this(int signo, string file = __FILE__, size_t line = __LINE__ ) @safe { 198 this.signo = signo; 199 import std.conv: to; 200 super(`Signal number ` ~ to!string(signo) ~ ` at ` ~ file ~ `:` ~ to!string(line)); 201 } 202 } 203 204 void signalHandler(int signo) 205 { 206 import core.atomic: atomicOp; 207 if (signo == 2) 208 { 209 core.atomic.atomicOp!`+=`(ctrlC, 1); 210 } 211 // throw new SignalCaughtException(signo); 212 } 213 214 alias signalHandler_t = void function(int); 215 extern (C) signalHandler_t signal(int signal, signalHandler_t handler); 216 217 version(msgpack) 218 { 219 import msgpack; 220 } 221 version(cerealed) 222 { 223 /* import cerealed.cerealiser; */ 224 /* import cerealed.decerealiser; */ 225 /* import cerealed.cereal; */ 226 } 227 228 /** File Content Type Code. */ 229 enum FileContent 230 { 231 unknown, 232 binaryUnknown, 233 binary, 234 text, 235 textASCII, 236 text8Bit, 237 document, 238 spreadsheet, 239 database, 240 tagsDatabase, 241 image, 242 imageIcon, 243 audio, 244 sound = audio, 245 music = audio, 246 247 modemData, 248 imageModemFax1BPP, // One bit per pixel 249 voiceModem, 250 251 video, 252 movie, 253 media, 254 sourceCode, 255 scriptCode, 256 buildSystemCode, 257 byteCode, 258 machineCode, 259 versionControl, 260 numericalData, 261 archive, 262 compressed, 263 cache, 264 binaryCache, 265 firmware, 266 spellCheckWordList, 267 font, 268 performanceBenchmark, 269 fingerprint, 270 } 271 272 /** How File Kinds are detected. */ 273 enum FileKindDetection 274 { 275 equalsParentPathDirsAndName, // Parenting path file name must match 276 equalsName, // Only name must match 277 equalsNameAndContents, // Both name and contents must match 278 equalsNameOrContents, // Either name or contents must match 279 equalsContents, // Only contents must match 280 equalsWhatsGiven, // All information defined must match 281 } 282 283 /** Key Scan (Search) Context. */ 284 enum ScanContext 285 { 286 /* code, */ 287 /* comment, */ 288 /* string, */ 289 290 /* word, */ 291 /* symbol, */ 292 293 dirName, // Name of directory being scanned 294 dir = dirName, 295 296 fileName, // Name of file being scanned 297 name = fileName, 298 299 regularFilename, // Name of file being scanned 300 symlinkName, // Name of symbolic linke being scanned 301 302 fileContent, // Contents of file being scanned 303 content = fileContent, 304 305 /* modTime, */ 306 /* accessTime, */ 307 /* xattr, */ 308 /* size, */ 309 310 all, 311 standard = all, 312 } 313 314 enum DuplicatesContext 315 { 316 internal, // All duplicates must lie inside topDirs 317 external, // At least one duplicate lie inside 318 // topDirs. Others may lie outside 319 } 320 321 /** File Operation Type Code. */ 322 enum FOp 323 { 324 none, 325 326 checkSyntax, // Check syntax 327 lint = checkSyntax, // Check syntax alias 328 329 build, // Project-Wide Build 330 compile, // Compile 331 byteCompile, // Byte compile 332 run, // Run (Execute) 333 execute = run, 334 335 preprocess, // Preprocess C/C++/Objective-C (using cpp) 336 cpp = preprocess, 337 338 /* VCS Operations */ 339 vcStatus, 340 vcs = vcStatus, 341 342 deduplicate, // Deduplicate Files using hardlinks and Dirs using Symlink 343 } 344 345 /** Directory Operation Type Code. */ 346 enum DirOp 347 { 348 /* VCS Operations */ 349 vcStatus, 350 } 351 352 /** Shell Command. 353 */ 354 alias ShCmd = string; // Just simply a string for now. 355 356 /** Pair of Delimiters. 357 Used to desribe for example comment and string delimiter syntax. 358 */ 359 struct Delim 360 { 361 this(string intro) 362 { 363 this.intro = intro; 364 this.finish = finish.init; 365 } 366 this(string intro, string finish) 367 { 368 this.intro = intro; 369 this.finish = finish; 370 } 371 string intro; 372 string finish; // Defaults to end of line if not defined. 373 } 374 375 /* Comment Delimiters */ 376 enum defaultCommentDelims = [Delim(`#`)]; 377 enum cCommentDelims = [Delim(`/*`, `*/`), 378 Delim(`//`)]; 379 enum dCommentDelims = [Delim(`/+`, `+/`)] ~ cCommentDelims; 380 381 /* String Delimiters */ 382 enum defaultStringDelims = [Delim(`"`), 383 Delim(`'`), 384 Delim("`")]; 385 enum pythonStringDelims = [Delim(`"""`), 386 Delim(`"`), 387 Delim(`'`), 388 Delim("`")]; 389 390 /** File Kind. 391 */ 392 class FKind 393 { 394 this(T, MagicData, RefPattern)(string kindName_, 395 T baseNaming_, 396 const string[] exts_, 397 MagicData magicData, size_t magicOffset = 0, 398 RefPattern refPattern_ = RefPattern.init, 399 const string[] keywords_ = [], 400 401 Delim[] strings_ = [], 402 403 Delim[] comments_ = [], 404 405 FileContent content_ = FileContent.unknown, 406 FileKindDetection detection_ = FileKindDetection.equalsWhatsGiven, 407 Lang lang_ = Lang.unknown, 408 409 FKind superKind = null, 410 FKind[] subKinds = [], 411 string description = null, 412 string wikip = null) @trusted pure 413 { 414 this.kindName = kindName_; 415 416 // Basename 417 import std.traits: isArray; 418 import std.range: ElementType; 419 static if (is(T == string)) 420 { 421 this.baseNaming = lit(baseNaming_); 422 } 423 else static if (isArrayOf!(T, string)) 424 { 425 // TODO: Move to a factory function strs(x) 426 auto alt_ = alt(); 427 foreach (ext; baseNaming_) // add each string as an alternative 428 { 429 alt_ ~= lit(ext); 430 } 431 this.baseNaming = alt_; 432 } 433 else static if (is(T == Patt)) 434 { 435 this.baseNaming = baseNaming_; 436 } 437 438 this.exts = exts_; 439 440 import std.traits: isAssignable; 441 static if (is(MagicData == ubyte[])) { this.magicData = lit(magicData) ; } 442 else static if (is(MagicData == string)) { this.magicData = lit(magicData.representation.dup); } 443 else static if (is(MagicData == void[])) { this.magicData = lit(cast(ubyte[])magicData); } 444 else static if (isAssignable!(Patt, MagicData)) { this.magicData = magicData; } 445 else static assert(0, `Cannot handle MagicData being type ` ~ MagicData.stringof); 446 447 this.magicOffset = magicOffset; 448 449 static if (is(RefPattern == ubyte[])) { this.refPattern = refPattern_; } 450 else static if (is(RefPattern == string)) { this.refPattern = refPattern_.representation.dup; } 451 else static if (is(RefPattern == void[])) { this.refPattern = (cast(ubyte[])refPattern_).dup; } 452 else static assert(0, `Cannot handle RefPattern being type ` ~ RefPattern.stringof); 453 454 this.keywords = keywords_; 455 456 this.strings = strings_; 457 this.comments = comments_; 458 459 this.content = content_; 460 461 if ((content_ == FileContent.sourceCode || 462 content_ == FileContent.scriptCode) && 463 detection_ == FileKindDetection.equalsWhatsGiven) 464 { 465 // relax matching of sourcecode to only need name until we have complete parsers 466 this.detection = FileKindDetection.equalsName; 467 } 468 else 469 { 470 this.detection = detection_; 471 } 472 this.lang = lang_; 473 474 this.superKind = superKind; 475 this.subKinds = subKinds; 476 this.description = description; 477 this.wikip = wikip.asURL; 478 } 479 480 override string toString() const @property @trusted pure nothrow { return kindName; } 481 482 /** Returns: Id Unique to matching behaviour of `this` FKind. If match 483 behaviour of `this` FKind changes returned id will change. 484 value is memoized. 485 */ 486 auto ref const(SHA1Digest) behaviorId() @property @safe /* pure nothrow */ 487 out(result) { assert(!result.empty); } 488 do 489 { 490 if (_behaviourDigest.empty) // if not yet defined 491 { 492 ubyte[] bytes; 493 const magicLit = cast(Lit)magicData; 494 if (magicLit) 495 { 496 bytes = msgpack.pack(exts, magicLit.bytes, magicOffset, refPattern, keywords, content, detection); 497 } 498 else 499 { 500 //dln(`warning: Handle magicData of type `, kindName); 501 } 502 _behaviourDigest = bytes.sha1Of; 503 } 504 return _behaviourDigest; 505 } 506 507 string kindName; // Kind Nick Name. 508 string description; // Kind Documenting Description. 509 AsURL!string wikip; // Wikipedia URL 510 511 FKind superKind; // Inherited pattern. For example ELF => ELF core file 512 FKind[] subKinds; // Inherited pattern. For example ELF => ELF core file 513 Patt baseNaming; // Pattern that matches typical file basenames of this Kind. May be null. 514 515 string[] parentPathDirs; // example [`lib`, `firmware`] for `/lib/firmware` or `../lib/firmware` 516 517 const string[] exts; // Typical Extensions. 518 Patt magicData; // Magic Data. 519 size_t magicOffset; // Magit Offset. 520 ubyte[] refPattern; // Reference pattern. 521 const FileContent content; 522 const FileKindDetection detection; 523 Lang lang; // Language if any 524 525 // Volatile Statistics: 526 private SHA1Digest _behaviourDigest; 527 RegFile[] hitFiles; // Files of this kind. 528 529 const string[] keywords; // Keywords 530 string[] builtins; // Builtin Functions 531 Op[] opers; // Language Opers 532 533 /* TODO: Move this to CompLang class */ 534 Delim[] strings; // String syntax. 535 Delim[] comments; // Comment syntax. 536 537 bool machineGenerated; // True if this is a machine generated file. 538 539 Tuple!(FOp, ShCmd)[] operations; // Operation and Corresponding Shell Command 540 } 541 542 /** Set of File Kinds with Internal Hashing. */ 543 class FKinds 544 { 545 void opOpAssign(string op)(FKind kind) @safe /* pure */ if (op == `~`) 546 { 547 mixin(`this.byIndex ` ~ op ~ `= kind;`); 548 this.register(kind); 549 } 550 void opOpAssign(string op)(FKinds kinds) @safe /* pure */ if (op == `~`) 551 { 552 mixin(`this.byIndex ` ~ op ~ `= kinds.byIndex;`); 553 foreach (kind; kinds.byIndex) 554 this.register(kind); 555 } 556 557 FKinds register(FKind kind) @safe /* pure */ 558 { 559 this.byName[kind.kindName] = kind; 560 foreach (const ext; kind.exts) 561 { 562 this.byExt[ext] ~= kind; 563 } 564 this.byId[kind.behaviorId] = kind; 565 if (kind.magicOffset == 0 && // only if zero-offset for now 566 kind.magicData) 567 { 568 if (const magicLit = cast(Lit)kind.magicData) 569 { 570 this.byMagic[magicLit.bytes][magicLit.bytes.length] ~= kind; 571 _magicLengths ~= magicLit.bytes.length; // add it 572 } 573 } 574 return this; 575 } 576 577 /** Rehash Internal AAs. 578 TODO: Change to @safe when https://github.com/D-Programming-Language/druntime/pull/942 has been merged 579 TODO: Change to nothrow when uniq becomes nothrow. 580 */ 581 FKinds rehash() @trusted pure /* nothrow */ 582 { 583 import std.algorithm: sort; 584 _magicLengths = _magicLengths.uniq.array; // remove duplicates 585 _magicLengths.sort(); 586 this.byName.rehash; 587 this.byExt.rehash; 588 this.byMagic.rehash; 589 this.byId.rehash; 590 return this; 591 } 592 593 FKind[] byIndex; 594 private: 595 /* TODO: These are "slaves" under byIndex and should not be modifiable outside 596 of this class but their FKind's can mutable. 597 */ 598 FKind[string] byName; // Index by unique name string 599 FKind[][string] byExt; // Index by possibly non-unique extension string 600 601 FKind[][size_t][immutable ubyte[]] byMagic; // length => zero-offset magic byte array to Binary FKind[] 602 size_t[] _magicLengths; // List of magic lengths to try as index in byMagic 603 604 FKind[SHA1Digest] byId; // Index Kinds by their behaviour 605 } 606 607 /** Match `kind` with full filename `full`. */ 608 bool matchFullName(in FKind kind, 609 const scope string full, size_t six = 0) @safe pure nothrow 610 { 611 return (kind.baseNaming && 612 !kind.baseNaming.match(full, six).empty); 613 } 614 615 /** Match `kind` with file extension `ext`. */ 616 bool matchExtension(in FKind kind, 617 const scope string ext) @safe pure nothrow 618 { 619 return !kind.exts.find(ext).empty; 620 } 621 622 bool matchName(in FKind kind, 623 const scope string full, size_t six = 0, 624 const scope string ext = null) @safe pure nothrow 625 { 626 return (kind.matchFullName(full) || 627 kind.matchExtension(ext)); 628 } 629 630 import std.range: hasSlicing; 631 632 /** Match (Magic) Contents of `kind` with `range`. 633 Returns: `true` iff match. */ 634 bool matchContents(Range)(in FKind kind, 635 in Range range, 636 in RegFile regFile) pure nothrow if (hasSlicing!Range) 637 { 638 const hit = kind.magicData.matchU(range, kind.magicOffset); 639 return (!hit.empty); 640 } 641 642 enum KindHit 643 { 644 none = 0, // No hit. 645 cached = 1, // Cached hit. 646 uncached = 2, // Uncached (fresh) hit. 647 } 648 649 Tuple!(KindHit, FKind, size_t) ofAnyKindIn(NotNull!RegFile regFile, 650 FKinds kinds, 651 bool collectTypeHits) 652 { 653 // using kindId 654 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 655 { 656 if (regFile._cstat.kindId in kinds.byId) 657 { 658 return tuple(KindHit.cached, 659 kinds.byId[regFile._cstat.kindId], 660 0UL); 661 } 662 } 663 664 // using extension 665 immutable ext = regFile.realExtension; // extension sans dot 666 if (!ext.empty && 667 ext in kinds.byExt) 668 { 669 foreach (kindIndex, kind; kinds.byExt[ext]) 670 { 671 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 672 if (hit) 673 { 674 return tuple(hit, kind, kindIndex); 675 } 676 } 677 } 678 679 // try all 680 foreach (kindIndex, kind; kinds.byIndex) // Iterate each kind 681 { 682 auto hit = regFile.ofKind(kind.enforceNotNull, collectTypeHits, kinds); 683 if (hit) 684 { 685 return tuple(hit, kind, kindIndex); 686 } 687 } 688 689 // no hit 690 return tuple(KindHit.none, 691 FKind.init, 692 0UL); 693 } 694 695 /** Returns: true if file with extension `ext` is of type `kind`. */ 696 KindHit ofKind(NotNull!RegFile regFile, 697 NotNull!FKind kind, 698 bool collectTypeHits, 699 FKinds allFKinds) /* nothrow */ @trusted 700 { 701 immutable hit = regFile.ofKind1(kind, 702 collectTypeHits, 703 allFKinds); 704 return hit; 705 } 706 707 KindHit ofKind(NotNull!RegFile regFile, 708 string kindName, 709 bool collectTypeHits, 710 FKinds allFKinds) /* nothrow */ @trusted 711 { 712 typeof(return) hit; 713 if (kindName in allFKinds.byName) 714 { 715 auto kind = assumeNotNull(allFKinds.byName[kindName]); 716 hit = regFile.ofKind(kind, 717 collectTypeHits, 718 allFKinds); 719 } 720 return hit; 721 } 722 723 /** Helper for ofKind. */ 724 KindHit ofKind1(NotNull!RegFile regFile, 725 NotNull!FKind kind, 726 bool collectTypeHits, 727 FKinds allFKinds) /* nothrow */ @trusted 728 { 729 // Try cached first 730 if (regFile._cstat.kindId.defined && 731 (regFile._cstat.kindId in allFKinds.byId) && // if kind is known 732 allFKinds.byId[regFile._cstat.kindId] is kind) // if cached kind equals 733 { 734 return KindHit.cached; 735 } 736 737 immutable ext = regFile.realExtension; 738 739 if (kind.superKind) 740 { 741 immutable baseHit = regFile.ofKind(enforceNotNull(kind.superKind), 742 collectTypeHits, 743 allFKinds); 744 if (!baseHit) 745 { 746 return baseHit; 747 } 748 } 749 750 bool hit = false; 751 final switch (kind.detection) 752 { 753 case FileKindDetection.equalsParentPathDirsAndName: 754 hit = (!regFile.parents.map!(a => a.name).find(kind.parentPathDirs).empty && // I love D :) 755 kind.matchName(regFile.name, 0, ext)); 756 break; 757 case FileKindDetection.equalsName: 758 hit = kind.matchName(regFile.name, 0, ext); 759 break; 760 case FileKindDetection.equalsNameAndContents: 761 hit = (kind.matchName(regFile.name, 0, ext) && 762 kind.matchContents(regFile.readOnlyContents, regFile)); 763 break; 764 case FileKindDetection.equalsNameOrContents: 765 hit = (kind.matchName(regFile.name, 0, ext) || 766 kind.matchContents(regFile.readOnlyContents, regFile)); 767 break; 768 case FileKindDetection.equalsContents: 769 hit = kind.matchContents(regFile.readOnlyContents, regFile); 770 break; 771 case FileKindDetection.equalsWhatsGiven: 772 // something must be defined 773 assert(is(kind.baseNaming) || 774 !kind.exts.empty || 775 !(kind.magicData is null)); 776 hit = ((kind.matchName(regFile.name, 0, ext) && 777 (kind.magicData is null || 778 kind.matchContents(regFile.readOnlyContents, regFile)))); 779 break; 780 } 781 if (hit) 782 { 783 if (collectTypeHits) 784 { 785 kind.hitFiles ~= regFile; 786 } 787 regFile._cstat.kindId = kind.behaviorId; // store reference in File 788 } 789 790 return hit ? KindHit.uncached : KindHit.none; 791 } 792 793 /** Directory Kind. 794 */ 795 class DirKind 796 { 797 this(string fn, 798 string kn) 799 { 800 this.fileName = fn; 801 this.kindName = kn; 802 } 803 804 version(msgpack) 805 { 806 this(Unpacker)(ref Unpacker unpacker) 807 { 808 fromMsgpack(msgpack.Unpacker(unpacker)); 809 } 810 void toMsgpack(Packer)(ref Packer packer) const 811 { 812 packer.beginArray(this.tupleof.length); 813 packer.pack(this.tupleof); 814 } 815 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 816 { 817 unpacker.beginArray; 818 unpacker.unpack(this.tupleof); 819 } 820 } 821 822 string fileName; 823 string kindName; 824 } 825 version(msgpack) unittest 826 { 827 auto k = tuple(``, ``); 828 auto data = pack(k); 829 Tuple!(string, string) k_; data.unpack(k_); 830 assert(k == k_); 831 } 832 833 import std.file: DirEntry, getLinkAttributes; 834 import std.datetime: SysTime, Interval; 835 836 /** File. 837 */ 838 class File 839 { 840 this(Dir parent) 841 { 842 this.parent = parent; 843 if (parent) { ++parent.gstats.noFiles; } 844 } 845 this(string name, Dir parent, Bytes64 size, 846 SysTime timeLastModified, 847 SysTime timeLastAccessed) 848 { 849 this.name = name; 850 this.parent = parent; 851 this.size = size; 852 this.timeLastModified = timeLastModified; 853 this.timeLastAccessed = timeLastAccessed; 854 if (parent) { ++parent.gstats.noFiles; } 855 } 856 857 // The Real Extension without leading dot. 858 string realExtension() @safe pure nothrow const { return name.extension.chompPrefix(`.`); } 859 alias ext = realExtension; // shorthand 860 861 string toTextual() const @property { return `Any File`; } 862 863 Bytes64 treeSize() @property @trusted /* @safe pure nothrow */ { return size; } 864 865 /** Content Digest of Tree under this Directory. */ 866 const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 867 { 868 return typeof(return).init; // default to undefined 869 } 870 871 Face!Color face() const @property @safe pure nothrow { return fileFace; } 872 873 /** Check if `this` File has been invalidated by `dent`. 874 Returns: `true` iff `this` was obseleted. 875 */ 876 bool checkObseleted(ref DirEntry dent) @trusted 877 { 878 // Git-Style Check for Changes (called Decider in SCons Build Tool) 879 bool flag = false; 880 if (dent.size != this.size || // size has changes 881 (dent.timeLastModified != this.timeLastModified) // if current modtime has changed or 882 ) 883 { 884 makeObselete; 885 this.timeLastModified = dent.timeLastModified; // use new time 886 this.size = dent.size; // use new time 887 flag = true; 888 } 889 this.timeLastAccessed = dent.timeLastAccessed; // use new time 890 return flag; 891 } 892 893 void makeObselete() @trusted {} 894 void makeUnObselete() @safe {} 895 896 /** Returns: Depth of Depth from File System root to this File. */ 897 int depth() @property @safe pure nothrow 898 { 899 return parent ? parent.depth + 1 : 0; // NOTE: this is fast because parent is memoized 900 } 901 /** NOTE: Currently not used. */ 902 int depthIterative() @property @safe pure 903 out (depth) { debug assert(depth == depth); } 904 do 905 { 906 typeof(return) depth = 0; 907 for (auto curr = dir; curr !is null && !curr.isRoot; depth++) 908 { 909 curr = curr.parent; 910 } 911 return depth; 912 } 913 914 /** Get Parenting Dirs starting from parent of `this` upto root. 915 Make this even more lazily evaluted. 916 */ 917 Dir[] parentsUpwards() 918 { 919 typeof(return) parents; // collected parents 920 for (auto curr = dir; (curr !is null && 921 !curr.isRoot); curr = curr.parent) 922 { 923 parents ~= curr; 924 } 925 return parents; 926 } 927 alias dirsDownward = parentsUpwards; 928 929 /** Get Parenting Dirs starting from file system root downto containing 930 directory of `this`. 931 */ 932 auto parents() 933 { 934 return parentsUpwards.retro; 935 } 936 alias dirs = parents; // SCons style alias 937 alias parentsDownward = parents; 938 939 bool underAnyDir(alias pred = `a`)() 940 { 941 import std.algorithm: any; 942 import std.functional: unaryFun; 943 return parents.any!(unaryFun!pred); 944 } 945 946 /** Returns: Path to `this` File. 947 TODO: Reuse parents. 948 */ 949 string path() @property @trusted pure out (result) { 950 /* assert(result == pathRecursive); */ 951 } 952 do 953 { 954 if (!parent) { return dirSeparator; } 955 956 size_t pathLength = 1 + name.length; // returned path length 957 Dir[] parents; // collected parents 958 959 for (auto curr = parent; (curr !is null && 960 !curr.isRoot); curr = curr.parent) 961 { 962 pathLength += 1 + curr.name.length; 963 parents ~= curr; 964 } 965 966 // build path 967 auto thePath = new char[pathLength]; 968 size_t i = 0; // index to thePath 969 import std.range: retro; 970 foreach (currParent_; parents.retro) 971 { 972 immutable parentName = currParent_.name; 973 thePath[i++] = dirSeparator[0]; 974 thePath[i .. i + parentName.length] = parentName[]; 975 i += parentName.length; 976 } 977 thePath[i++] = dirSeparator[0]; 978 thePath[i .. i + name.length] = name[]; 979 980 return thePath; 981 } 982 983 /** Returns: Path to `this` File. 984 Recursive Heap-active implementation, slower than $(D path()). 985 */ 986 string pathRecursive() @property @trusted pure 987 { 988 if (parent) 989 { 990 static if (true) 991 { 992 import std.path: dirSeparator; 993 // NOTE: This is more efficient than buildPath(parent.path, 994 // name) because we can guarantee things about parent.path and 995 // name 996 immutable parentPath = parent.isRoot ? `` : parent.pathRecursive; 997 return parentPath ~ dirSeparator ~ name; 998 } 999 else 1000 { 1001 import std.path: buildPath; 1002 return buildPath(parent.pathRecursive, name); 1003 } 1004 } 1005 else 1006 { 1007 return `/`; // assume root folder with beginning slash 1008 } 1009 } 1010 1011 version(msgpack) 1012 { 1013 void toMsgpack(Packer)(ref Packer packer) const 1014 { 1015 writeln(`Entering File.toMsgpack `, name); 1016 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1017 } 1018 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1019 { 1020 long stdTime; 1021 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize 1022 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize 1023 } 1024 } 1025 1026 Dir parent; // Reference to parenting directory (or null if this is a root directory) 1027 alias dir = parent; // SCons style alias 1028 1029 string name; // Empty if root directory 1030 Bytes64 size; // Size of file in bytes 1031 SysTime timeLastModified; // Last modification time 1032 SysTime timeLastAccessed; // Last access time 1033 } 1034 1035 /** Maps Files to their tags. */ 1036 class FileTags 1037 { 1038 FileTags addTag(File file, const scope string tag) @safe pure /* nothrow */ 1039 { 1040 if (file in _tags) 1041 { 1042 if (_tags[file].find(tag).empty) 1043 { 1044 _tags[file] ~= tag; // add it 1045 } 1046 } 1047 else 1048 { 1049 _tags[file] = [tag]; 1050 } 1051 return this; 1052 } 1053 FileTags removeTag(File file, string tag) @safe pure 1054 { 1055 if (file in _tags) 1056 { 1057 import std.algorithm: remove; 1058 _tags[file] = _tags[file].remove!(a => a == tag); 1059 } 1060 return this; 1061 } 1062 auto ref getTags(File file) const @safe pure nothrow 1063 { 1064 return file in _tags ? _tags[file] : null; 1065 } 1066 private string[][File] _tags; // Tags for each registered file. 1067 } 1068 1069 version(linux) unittest 1070 { 1071 auto ftags = new FileTags(); 1072 1073 GStats gstats = new GStats(); 1074 1075 auto root = assumeNotNull(new Dir(cast(Dir)null, gstats)); 1076 auto etc = getDir(root, `/etc`); 1077 assert(etc.path == `/etc`); 1078 1079 auto dent = DirEntry(`/etc/passwd`); 1080 auto passwd = getFile(root, `/etc/passwd`, dent.isDir); 1081 assert(passwd.path == `/etc/passwd`); 1082 assert(passwd.parent == etc); 1083 assert(etc.sub(`passwd`) == passwd); 1084 1085 ftags.addTag(passwd, `Password`); 1086 ftags.addTag(passwd, `Password`); 1087 ftags.addTag(passwd, `Secret`); 1088 assert(ftags.getTags(passwd) == [`Password`, `Secret`]); 1089 ftags.removeTag(passwd, `Password`); 1090 assert(ftags._tags[passwd] == [`Secret`]); 1091 } 1092 1093 /** Symlink Target Status. 1094 */ 1095 enum SymlinkTargetStatus 1096 { 1097 unknown, 1098 present, 1099 broken, 1100 } 1101 1102 /** Symlink. 1103 */ 1104 class Symlink : File 1105 { 1106 this(NotNull!Dir parent) 1107 { 1108 super(parent); 1109 ++parent.gstats.noSymlinks; 1110 } 1111 this(ref DirEntry dent, NotNull!Dir parent) 1112 { 1113 Bytes64 sizeBytes; 1114 SysTime modified, accessed; 1115 bool ok = true; 1116 try 1117 { 1118 sizeBytes = dent.size.Bytes64; 1119 modified = dent.timeLastModified; 1120 accessed = dent.timeLastAccessed; 1121 } 1122 catch (Exception) 1123 { 1124 ok = false; 1125 } 1126 // const attrs = getLinkAttributes(dent.name); // attributes of target file 1127 // super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1128 super(dent.name.baseName, parent, sizeBytes, modified, accessed); 1129 if (ok) 1130 { 1131 this.retarget(dent); // trigger lazy load 1132 } 1133 ++parent.gstats.noSymlinks; 1134 } 1135 1136 override Face!Color face() const @property @safe pure nothrow 1137 { 1138 if (_targetStatus == SymlinkTargetStatus.broken) 1139 return symlinkBrokenFace; 1140 else 1141 return symlinkFace; 1142 } 1143 1144 override string toTextual() const @property { return `Symbolic Link`; } 1145 1146 string retarget(ref DirEntry dent) @trusted 1147 { 1148 import std.file: readLink; 1149 return _target = readLink(dent); 1150 } 1151 1152 /** Cached/Memoized/Lazy Lookup for target. */ 1153 string target() @property @trusted 1154 { 1155 if (!_target) // if target not yet read 1156 { 1157 auto targetDent = DirEntry(path); 1158 return retarget(targetDent); // read it 1159 } 1160 return _target; 1161 } 1162 /** Cached/Memoized/Lazy Lookup for target as absolute normalized path. */ 1163 string absoluteNormalizedTargetPath() @property @trusted 1164 { 1165 import std.path: absolutePath, buildNormalizedPath; 1166 return target.absolutePath(path.dirName).buildNormalizedPath; 1167 } 1168 1169 version(msgpack) 1170 { 1171 /** Construct from msgpack `unpacker`. */ 1172 this(Unpacker)(ref Unpacker unpacker) 1173 { 1174 fromMsgpack(msgpack.Unpacker(unpacker)); 1175 } 1176 void toMsgpack(Packer)(ref Packer packer) const 1177 { 1178 /* writeln(`Entering File.toMsgpack `, name); */ 1179 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1180 } 1181 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1182 { 1183 unpacker.unpack(name, size); 1184 long stdTime; 1185 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize 1186 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize 1187 } 1188 } 1189 1190 string _target; 1191 SymlinkTargetStatus _targetStatus = SymlinkTargetStatus.unknown; 1192 } 1193 1194 /** Special File (Character or Block Device). 1195 */ 1196 class SpecFile : File 1197 { 1198 this(NotNull!Dir parent) 1199 { 1200 super(parent); 1201 ++parent.gstats.noSpecialFiles; 1202 } 1203 this(ref DirEntry dent, NotNull!Dir parent) 1204 { 1205 super(dent.name.baseName, parent, 0.Bytes64, cast(SysTime)0, cast(SysTime)0); 1206 ++parent.gstats.noSpecialFiles; 1207 } 1208 1209 override Face!Color face() const @property @safe pure nothrow { return specialFileFace; } 1210 1211 override string toTextual() const @property { return `Special File`; } 1212 1213 version(msgpack) 1214 { 1215 /** Construct from msgpack `unpacker`. */ 1216 this(Unpacker)(ref Unpacker unpacker) 1217 { 1218 fromMsgpack(msgpack.Unpacker(unpacker)); 1219 } 1220 void toMsgpack(Packer)(ref Packer packer) const 1221 { 1222 /* writeln(`Entering File.toMsgpack `, name); */ 1223 packer.pack(name, size, timeLastModified.stdTime, timeLastAccessed.stdTime); 1224 } 1225 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 1226 { 1227 unpacker.unpack(name, size); 1228 long stdTime; 1229 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize 1230 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize 1231 } 1232 } 1233 } 1234 1235 /** Bit (Content) Status. */ 1236 enum BitStatus 1237 { 1238 unknown, 1239 bits7, 1240 bits8, 1241 } 1242 1243 /** Regular File. 1244 */ 1245 class RegFile : File 1246 { 1247 this(NotNull!Dir parent) 1248 { 1249 super(parent); 1250 ++parent.gstats.noRegFiles; 1251 } 1252 this(ref DirEntry dent, NotNull!Dir parent) 1253 { 1254 this(dent.name.baseName, parent, dent.size.Bytes64, 1255 dent.timeLastModified, dent.timeLastAccessed); 1256 } 1257 this(string name, NotNull!Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed) 1258 { 1259 super(name, parent, size, timeLastModified, timeLastAccessed); 1260 ++parent.gstats.noRegFiles; 1261 } 1262 1263 ~this() nothrow @nogc 1264 { 1265 _cstat.deallocate(false); 1266 } 1267 1268 override string toTextual() const @property { return `Regular File`; } 1269 1270 /** Returns: Content Id of `this`. */ 1271 const(SHA1Digest) contentId() @property @trusted /* @safe pure nothrow */ 1272 { 1273 if (_cstat._contentId.isUntouched) 1274 { 1275 enum doSHA1 = true; 1276 calculateCStatInChunks(parent.gstats.filesByContentId, 1277 32*pageSize(), 1278 doSHA1); 1279 freeContents(); // TODO: Call lazily only when open count is too large 1280 } 1281 return _cstat._contentId; 1282 } 1283 1284 /** Returns: Tree Content Id of `this`. */ 1285 override const(SHA1Digest) treeContentId() @property @trusted /* @safe pure nothrow */ 1286 { 1287 return contentId; 1288 } 1289 1290 override Face!Color face() const @property @safe pure nothrow { return regFileFace; } 1291 1292 /** Returns: SHA-1 of `this` `File` Contents at `src`. */ 1293 const(SHA1Digest) contId(inout (ubyte[]) src, 1294 File[][SHA1Digest] filesByContentId) 1295 @property pure out(result) { assert(!result.empty); } // must have be defined 1296 do 1297 { 1298 if (_cstat._contentId.empty) // if not yet defined 1299 { 1300 _cstat._contentId = src.sha1Of; 1301 filesByContentId[_cstat._contentId] ~= this; 1302 } 1303 return _cstat._contentId; 1304 } 1305 1306 /** Returns: Cached/Memoized Binary Histogram of `this` `File`. */ 1307 auto ref bistogram8() @property @safe // ref needed here! 1308 { 1309 if (_cstat.bist.empty) 1310 { 1311 _cstat.bist.put(readOnlyContents); // memoized calculated 1312 } 1313 return _cstat.bist; 1314 } 1315 1316 /** Returns: Cached/Memoized XGram of `this` `File`. */ 1317 auto ref xgram() @property @safe // ref needed here! 1318 { 1319 if (_cstat.xgram.empty) 1320 { 1321 _cstat.xgram.put(readOnlyContents); // memoized calculated 1322 } 1323 return _cstat.xgram; 1324 } 1325 1326 /** Returns: Cached/Memoized XGram Deep Denseness of `this` `File`. */ 1327 auto ref xgramDeepDenseness() @property @safe 1328 { 1329 if (!_cstat._xgramDeepDenseness) 1330 { 1331 _cstat._xgramDeepDenseness = xgram.denseness(-1).numerator; 1332 } 1333 return Rational!ulong(_cstat._xgramDeepDenseness, 1334 _cstat.xgram.noBins); 1335 } 1336 1337 /** Returns: true if empty file (zero length). */ 1338 bool empty() @property const @safe { return size == 0; } 1339 1340 /** Process File in Cache Friendly Chunks. */ 1341 void calculateCStatInChunks(NotNull!File[][SHA1Digest] filesByContentId, 1342 size_t chunkSize = 32*pageSize(), 1343 bool doSHA1 = false, 1344 bool doBist = false, 1345 bool doBitStatus = false) @safe 1346 { 1347 if (_cstat._contentId.defined || empty) { doSHA1 = false; } 1348 if (!_cstat.bist.empty) { doBist = false; } 1349 if (_cstat.bitStatus != BitStatus.unknown) { doBitStatus = false; } 1350 1351 import std.digest.sha; 1352 SHA1 sha1; 1353 if (doSHA1) { sha1.start(); } 1354 1355 bool isASCII = true; 1356 1357 if (doSHA1 || doBist || doBitStatus) 1358 { 1359 import std.range: chunks; 1360 foreach (chunk; readOnlyContents.chunks(chunkSize)) 1361 { 1362 if (doSHA1) { sha1.put(chunk); } 1363 if (doBist) { _cstat.bist.put(chunk); } 1364 if (doBitStatus) 1365 { 1366 /* TODO: This can be parallelized using 64-bit wording! 1367 * Write automatic parallelizing library for this? */ 1368 foreach (elt; chunk) 1369 { 1370 import nxt.bitop_ex: bt; 1371 isASCII = isASCII && !elt.bt(7); // ASCII has no topmost bit set 1372 } 1373 } 1374 } 1375 } 1376 1377 if (doBitStatus) 1378 { 1379 _cstat.bitStatus = isASCII ? BitStatus.bits7 : BitStatus.bits8; 1380 } 1381 1382 if (doSHA1) 1383 { 1384 _cstat._contentId = sha1.finish(); 1385 filesByContentId[_cstat._contentId] ~= cast(NotNull!File)assumeNotNull(this); // TODO: Prettier way? 1386 } 1387 } 1388 1389 /** Clear/Reset Contents Statistics of `this` `File`. */ 1390 void clearCStat(File[][SHA1Digest] filesByContentId) @safe nothrow 1391 { 1392 // SHA1-digest 1393 if (_cstat._contentId in filesByContentId) 1394 { 1395 auto dups = filesByContentId[_cstat._contentId]; 1396 import std.algorithm: remove; 1397 immutable n = dups.length; 1398 dups = dups.remove!(a => a is this); 1399 assert(n == dups.length + 1); // assert that dups were not decreased by one); 1400 } 1401 } 1402 1403 override string toString() @property @trusted 1404 { 1405 // import std.traits: fullyQualifiedName; 1406 // return fullyQualifiedName!(typeof(this)) ~ `(` ~ buildPath(parent.name, name) ~ `)`; // TODO: typenameof 1407 return (typeof(this)).stringof ~ `(` ~ this.path ~ `)`; // TODO: typenameof 1408 } 1409 1410 version(msgpack) 1411 { 1412 /** Construct from msgpack `unpacker`. */ 1413 this(Unpacker)(ref Unpacker unpacker) 1414 { 1415 fromMsgpack(msgpack.Unpacker(unpacker)); 1416 } 1417 1418 /** Pack. */ 1419 void toMsgpack(Packer)(ref Packer packer) const { 1420 /* writeln(`Entering RegFile.toMsgpack `, name); */ 1421 1422 packer.pack(name, size, 1423 timeLastModified.stdTime, 1424 timeLastAccessed.stdTime); 1425 1426 // CStat: TODO: Group 1427 packer.pack(_cstat.kindId); // FKind 1428 packer.pack(_cstat._contentId); // Digest 1429 1430 // Bist 1431 immutable bistFlag = !_cstat.bist.empty; 1432 packer.pack(bistFlag); 1433 if (bistFlag) { packer.pack(_cstat.bist); } 1434 1435 // XGram 1436 immutable xgramFlag = !_cstat.xgram.empty; 1437 packer.pack(xgramFlag); 1438 if (xgramFlag) 1439 { 1440 /* debug dln("packing xgram. empty:", _cstat.xgram.empty); */ 1441 packer.pack(_cstat.xgram, 1442 _cstat._xgramDeepDenseness); 1443 } 1444 1445 /* auto this_ = (cast(RegFile)this); // TODO: Ugly! Is there another way? */ 1446 /* const tags = this_.parent.gstats.ftags.getTags(this_); */ 1447 /* immutable tagsFlag = !tags.empty; */ 1448 /* packer.pack(tagsFlag); */ 1449 /* debug dln(`Packing tags `, tags, ` of `, this_.path); */ 1450 /* if (tagsFlag) { packer.pack(tags); } */ 1451 } 1452 1453 /** Unpack. */ 1454 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) @trusted 1455 { 1456 unpacker.unpack(name, size); // Name, Size 1457 1458 // Time 1459 long stdTime; 1460 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize 1461 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize 1462 1463 // CStat: TODO: Group 1464 unpacker.unpack(_cstat.kindId); // FKind 1465 if (_cstat.kindId.defined && 1466 _cstat.kindId !in parent.gstats.allFKinds.byId) 1467 { 1468 dln(`warning: kindId `, _cstat.kindId, ` not found for `, 1469 path, `, FKinds length `, parent.gstats.allFKinds.byIndex.length); 1470 _cstat.kindId.reset; // forget it 1471 } 1472 unpacker.unpack(_cstat._contentId); // Digest 1473 if (_cstat._contentId) 1474 { 1475 parent.gstats.filesByContentId[_cstat._contentId] ~= cast(NotNull!File)this; 1476 } 1477 1478 // Bist 1479 bool bistFlag; unpacker.unpack(bistFlag); 1480 if (bistFlag) 1481 { 1482 unpacker.unpack(_cstat.bist); 1483 } 1484 1485 // XGram 1486 bool xgramFlag; unpacker.unpack(xgramFlag); 1487 if (xgramFlag) 1488 { 1489 /* if (_cstat.xgram == null) { */ 1490 /* _cstat.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 1491 /* } */ 1492 /* unpacker.unpack(*_cstat.xgram); */ 1493 unpacker.unpack(_cstat.xgram, 1494 _cstat._xgramDeepDenseness); 1495 /* debug dln(`unpacked xgram. empty:`, _cstat.xgram.empty); */ 1496 } 1497 1498 // tags 1499 /* bool tagsFlag; unpacker.unpack(tagsFlag); */ 1500 /* if (tagsFlag) { */ 1501 /* string[] tags; */ 1502 /* unpacker.unpack(tags); */ 1503 /* } */ 1504 } 1505 1506 override void makeObselete() @trusted { _cstat.reset(); /* debug dln(`Reset CStat for `, path); */ } 1507 } 1508 1509 /** Returns: Read-Only Contents of `this` Regular File. */ 1510 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1511 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1512 immutable(ubyte[]) readOnlyContents(string file = __FILE__, int line = __LINE__)() @trusted 1513 { 1514 if (_mmfile is null) 1515 { 1516 if (size == 0) // munmap fails for empty files 1517 { 1518 static assert([] !is null); 1519 return []; // empty file 1520 } 1521 else 1522 { 1523 _mmfile = new MmFile(path, MmFile.Mode.read, 1524 mmfile_size, null, pageSize()); 1525 if (parent.gstats.showMMaps) 1526 { 1527 writeln(`Mapped `, path, ` of size `, size); 1528 } 1529 } 1530 } 1531 return cast(typeof(return))_mmfile[]; 1532 } 1533 1534 /** Returns: Read-Writable Contents of `this` Regular File. */ 1535 // } catch (InvalidMemoryOperationError) { viz.ppln(outFile, useHTML, `Failed to mmap `, dent.name); } 1536 // scope immutable src = cast(immutable ubyte[]) read(dent.name, upTo); 1537 ubyte[] readWriteableContents() @trusted 1538 { 1539 if (!_mmfile) 1540 { 1541 _mmfile = new MmFile(path, MmFile.Mode.readWrite, 1542 mmfile_size, null, pageSize()); 1543 } 1544 return cast(typeof(return))_mmfile[]; 1545 } 1546 1547 /** If needed Free Allocated Contents of `this` Regular File. */ 1548 bool freeContents() 1549 { 1550 if (_mmfile) { 1551 delete _mmfile; _mmfile = null; return true; 1552 } 1553 else { return false; } 1554 } 1555 1556 import std.mmfile; 1557 private MmFile _mmfile = null; 1558 private CStat _cstat; // Statistics about the contents of this RegFile. 1559 } 1560 1561 /** Traits */ 1562 enum isFile(T) = (is(T == File) || is(T == NotNull!File)); 1563 enum isDir(T) = (is(T == Dir) || is(T == NotNull!Dir)); 1564 enum isSymlink(T) = (is(T == Symlink) || is(T == NotNull!Symlink)); 1565 enum isRegFile(T) = (is(T == RegFile) || is(T == NotNull!RegFile)); 1566 enum isSpecialFile(T) = (is(T == SpecFile) || is(T == NotNull!SpecFile)); 1567 enum isAnyFile(T) = (isFile!T || 1568 isDir!T || 1569 isSymlink!T || 1570 isRegFile!T || 1571 isSpecialFile!T); 1572 1573 /** Return true if T is a class representing File IO. */ 1574 enum isFileIO(T) = (isAnyFile!T || 1575 is(T == ioFile)); 1576 1577 /** Contents Statistics of a Regular File. */ 1578 struct CStat 1579 { 1580 void reset() @safe nothrow 1581 { 1582 kindId[] = 0; 1583 _contentId[] = 0; 1584 hitCount = 0; 1585 bist.reset(); 1586 xgram.reset(); 1587 _xgramDeepDenseness = 0; 1588 deallocate(); 1589 } 1590 1591 void deallocate(bool nullify = true) @trusted nothrow 1592 { 1593 kindId[] = 0; 1594 /* if (xgram != null) { */ 1595 /* import core.stdc.stdlib; */ 1596 /* free(xgram); */ 1597 /* if (nullify) { */ 1598 /* xgram = null; */ 1599 /* } */ 1600 /* } */ 1601 } 1602 1603 SHA1Digest kindId; // FKind Identifier/Fingerprint of this regular file. 1604 SHA1Digest _contentId; // Content Identifier/Fingerprint. 1605 1606 /** Boolean Single Bistogram over file contents. If 1607 binHist0[cast(ubyte)x] is set then this file contains byte x. Consumes 1608 32 bytes. */ 1609 Bist bist; // TODO: Put in separate slice std.allocator. 1610 1611 /** Boolean Pair Bistogram (Digram) over file contents (higher-order statistics). 1612 If this RegFile contains a sequence of [byte0, bytes1], 1613 then bit at index byte0 + byte1 * 256 is set in xgram. 1614 */ 1615 XGram xgram; // TODO: Use slice std.allocator 1616 private ulong _xgramDeepDenseness = 0; 1617 1618 uint64_t hitCount = 0; 1619 BitStatus bitStatus = BitStatus.unknown; 1620 } 1621 1622 import core.sys.posix.sys.types; 1623 1624 enum SymlinkFollowContext 1625 { 1626 none, // Follow no symlinks 1627 internal, // Follow only symlinks outside of scanned tree 1628 external, // Follow only symlinks inside of scanned tree 1629 all, // Follow all symlinks 1630 standard = external 1631 } 1632 1633 /** Global Scanner Statistics. */ 1634 class GStats 1635 { 1636 NotNull!File[][string] filesByName; // Potential File Name Duplicates 1637 NotNull!File[][ino_t] filesByInode; // Potential Link Duplicates 1638 NotNull!File[][SHA1Digest] filesByContentId; // File(s) (Duplicates) Indexed on Contents SHA1. 1639 NotNull!RegFile[][string] elfFilesBySymbol; // File(s) (Duplicates) Indexed on raw unmangled symbol. 1640 FileTags ftags; 1641 1642 Bytes64[NotNull!File] treeSizesByFile; // Tree sizes. 1643 size_t[NotNull!File] lineCountsByFile; // Line counts. 1644 1645 // VCS Directories 1646 DirKind[] vcDirKinds; 1647 DirKind[string] vcDirKindsMap; 1648 1649 // Skipped Directories 1650 DirKind[] skippedDirKinds; 1651 DirKind[string] skippedDirKindsMap; 1652 1653 FKinds txtFKinds = new FKinds; // Textual 1654 FKinds binFKinds = new FKinds; // Binary (Non-Textual) 1655 FKinds allFKinds = new FKinds; // All 1656 FKinds selFKinds = new FKinds; // User selected 1657 1658 void loadFileKinds() 1659 { 1660 txtFKinds ~= new FKind("SCons", ["SConstruct", "SConscript"], 1661 ["scons"], 1662 [], 0, [], [], 1663 defaultCommentDelims, 1664 pythonStringDelims, 1665 FileContent.buildSystemCode, FileKindDetection.equalsNameAndContents); // TOOD: Inherit Python 1666 1667 txtFKinds ~= new FKind("Makefile", ["GNUmakefile", "Makefile", "makefile"], 1668 ["mk", "mak", "makefile", "make", "gnumakefile"], [], 0, [], [], 1669 defaultCommentDelims, 1670 defaultStringDelims, 1671 FileContent.sourceCode, FileKindDetection.equalsName); 1672 txtFKinds ~= new FKind("Automakefile", ["Makefile.am", "makefile.am"], 1673 ["am"], [], 0, [], [], 1674 defaultCommentDelims, 1675 defaultStringDelims, 1676 FileContent.sourceCode); 1677 txtFKinds ~= new FKind("Autoconffile", ["configure.ac", "configure.in"], 1678 [], [], 0, [], [], 1679 defaultCommentDelims, 1680 defaultStringDelims, 1681 FileContent.sourceCode); 1682 txtFKinds ~= new FKind("Doxygen", ["Doxyfile"], 1683 ["doxygen"], [], 0, [], [], 1684 defaultCommentDelims, 1685 defaultStringDelims, 1686 FileContent.sourceCode); 1687 1688 txtFKinds ~= new FKind("Rake", ["Rakefile"],// TODO: inherit Ruby 1689 ["mk", "makefile", "make", "gnumakefile"], [], 0, [], [], 1690 [Delim("#"), Delim("=begin", "=end")], 1691 defaultStringDelims, 1692 FileContent.sourceCode, FileKindDetection.equalsName); 1693 1694 txtFKinds ~= new FKind("HTML", [], ["htm", "html", "shtml", "xhtml"], [], 0, [], [], 1695 [Delim("<!--", "-->")], 1696 defaultStringDelims, 1697 FileContent.text, FileKindDetection.equalsContents); // markup text 1698 txtFKinds ~= new FKind("XML", [], ["xml", "dtd", "xsl", "xslt", "ent", ], [], 0, "<?xml", [], 1699 [Delim("<!--", "-->")], 1700 defaultStringDelims, 1701 FileContent.text, FileKindDetection.equalsContents); // TODO: markup text 1702 txtFKinds ~= new FKind("YAML", [], ["yaml", "yml"], [], 0, [], [], 1703 defaultCommentDelims, 1704 defaultStringDelims, 1705 FileContent.text); // TODO: markup text 1706 txtFKinds ~= new FKind("CSS", [], ["css"], [], 0, [], [], 1707 [Delim("/*", "*/")], 1708 defaultStringDelims, 1709 FileContent.text, FileKindDetection.equalsContents); 1710 1711 txtFKinds ~= new FKind("Audacity Project", [], ["aup"], [], 0, "<?xml", [], 1712 defaultCommentDelims, 1713 defaultStringDelims, 1714 FileContent.text, FileKindDetection.equalsNameAndContents); 1715 1716 txtFKinds ~= new FKind("Comma-separated values", [], ["csv"], [], 0, [], [], // TODO: decribe with symbolic 1717 defaultCommentDelims, 1718 defaultStringDelims, 1719 FileContent.text, FileKindDetection.equalsNameAndContents); 1720 1721 txtFKinds ~= new FKind("Tab-separated values", [], ["tsv"], [], 0, [], [], // TODO: describe with symbolic 1722 defaultCommentDelims, 1723 defaultStringDelims, 1724 FileContent.text, FileKindDetection.equalsNameAndContents); 1725 1726 static immutable keywordsC = [ 1727 "auto", "const", "double", "float", "int", "short", "struct", 1728 "unsigned", "break", "continue", "else", "for", "long", "signed", 1729 "switch", "void", "case", "default", "enum", "goto", "register", 1730 "sizeof", "typedef", "volatile", "char", "do", "extern", "if", 1731 "return", "static", "union", "while", 1732 ]; 1733 1734 /* See_Also: https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B */ 1735 auto opersCBasic = [ 1736 // Arithmetic 1737 Op("+", OpArity.binary, OpAssoc.LR, 6, "Add"), 1738 Op("-", OpArity.binary, OpAssoc.LR, 6, "Subtract"), 1739 Op("*", OpArity.binary, OpAssoc.LR, 5, "Multiply"), 1740 Op("/", OpArity.binary, OpAssoc.LR, 5, "Divide"), 1741 Op("%", OpArity.binary, OpAssoc.LR, 5, "Remainder/Moduls"), 1742 1743 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary plus"), 1744 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 3, "Unary minus"), 1745 1746 Op("++", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix increment"), 1747 Op("--", OpArity.unaryPostfix, OpAssoc.LR, 2, "Suffix decrement"), 1748 1749 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix increment"), 1750 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 3, "Prefix decrement"), 1751 1752 // Assignment Arithmetic (binary) 1753 Op("=", OpArity.binary, OpAssoc.RL, 16, "Assign"), 1754 Op("+=", OpArity.binary, OpAssoc.RL, 16, "Assignment by sum"), 1755 Op("-=", OpArity.binary, OpAssoc.RL, 16, "Assignment by difference"), 1756 Op("*=", OpArity.binary, OpAssoc.RL, 16, "Assignment by product"), 1757 Op("/=", OpArity.binary, OpAssoc.RL, 16, "Assignment by quotient"), 1758 Op("%=", OpArity.binary, OpAssoc.RL, 16, "Assignment by remainder"), 1759 1760 Op("&=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise AND"), 1761 Op("|=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise OR"), 1762 1763 Op("^=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise XOR"), 1764 Op("<<=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise left shift"), 1765 Op(">>=", OpArity.binary, OpAssoc.RL, 16, "Assignment by bitwise right shift"), 1766 1767 Op("==", OpArity.binary, OpAssoc.LR, 9, "Equal to"), 1768 Op("!=", OpArity.binary, OpAssoc.LR, 9, "Not equal to"), 1769 1770 Op("<", OpArity.binary, OpAssoc.LR, 8, "Less than"), 1771 Op(">", OpArity.binary, OpAssoc.LR, 8, "Greater than"), 1772 Op("<=", OpArity.binary, OpAssoc.LR, 8, "Less than or equal to"), 1773 Op(">=", OpArity.binary, OpAssoc.LR, 8, "Greater than or equal to"), 1774 1775 Op("&&", OpArity.binary, OpAssoc.LR, 13, "Logical AND"), // TODO: Convert to math in smallcaps AND 1776 Op("||", OpArity.binary, OpAssoc.LR, 14, "Logical OR"), // TODO: Convert to math in smallcaps OR 1777 1778 Op("!", OpArity.unaryPrefix, OpAssoc.LR, 3, "Logical NOT"), // TODO: Convert to math in smallcaps NOT 1779 1780 Op("&", OpArity.binary, OpAssoc.LR, 10, "Bitwise AND"), 1781 Op("^", OpArity.binary, OpAssoc.LR, 11, "Bitwise XOR (exclusive or)"), 1782 Op("|", OpArity.binary, OpAssoc.LR, 12, "Bitwise OR"), 1783 1784 Op("<<", OpArity.binary, OpAssoc.LR, 7, "Bitwise left shift"), 1785 Op(">>", OpArity.binary, OpAssoc.LR, 7, "Bitwise right shift"), 1786 1787 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 3, "Bitwise NOT (One's Complement)"), 1788 Op(",", OpArity.binary, OpAssoc.LR, 18, "Comma"), 1789 Op("sizeof", OpArity.unaryPrefix, OpAssoc.LR, 3, "Size-of"), 1790 1791 Op("->", OpArity.binary, OpAssoc.LR, 2, "Element selection through pointer"), 1792 Op(".", OpArity.binary, OpAssoc.LR, 2, "Element selection by reference"), 1793 1794 ]; 1795 1796 /* See_Also: https://en.wikipedia.org/wiki/Iso646.h */ 1797 auto opersC_ISO646 = [ 1798 OpAlias("and", "&&"), 1799 OpAlias("or", "||"), 1800 OpAlias("and_eq", "&="), 1801 1802 OpAlias("bitand", "&"), 1803 OpAlias("bitor", "|"), 1804 1805 OpAlias("compl", "~"), 1806 OpAlias("not", "!"), 1807 OpAlias("not_eq", "!="), 1808 OpAlias("or_eq", "|="), 1809 OpAlias("xor", "^"), 1810 OpAlias("xor_eq", "^="), 1811 ]; 1812 1813 auto opersC = opersCBasic /* ~ opersC_ISO646 */; 1814 1815 auto kindC = new FKind("C", [], ["c", "h"], [], 0, [], 1816 keywordsC, 1817 cCommentDelims, 1818 defaultStringDelims, 1819 FileContent.sourceCode, 1820 FileKindDetection.equalsWhatsGiven, 1821 Lang.c); 1822 txtFKinds ~= kindC; 1823 kindC.operations ~= tuple(FOp.checkSyntax, `gcc -x c -fsyntax-only -c`); 1824 kindC.operations ~= tuple(FOp.checkSyntax, `clang -x c -fsyntax-only -c`); 1825 kindC.operations ~= tuple(FOp.preprocess, `cpp`); 1826 kindC.opers = opersC; 1827 1828 static immutable keywordsCxx = (keywordsC ~ ["asm", "dynamic_cast", "namespace", "reinterpret_cast", "try", 1829 "bool", "explicit", "new", "static_cast", "typeid", 1830 "catch", "false", "operator", "template", "typename", 1831 "class", "friend", "private", "this", "using", 1832 "const_cast", "inline", "public", "throw", "virtual", 1833 "delete", "mutable", "protected", "true", "wchar_t", 1834 // The following are not essential when 1835 // the standard ASCII character set is 1836 // being used, but they have been added 1837 // to provide more readable alternatives 1838 // for some of the C++ operators, and 1839 // also to facilitate programming with 1840 // character sets that lack characters 1841 // needed by C++. 1842 "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq", 1843 "and_eq", "bitor", "not", "or", "xor", ]).uniq.array; 1844 1845 auto opersCxx = opersC ~ [ 1846 Op("->*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1847 Op(".*", OpArity.binary, OpAssoc.LR, 4, "Pointer to member"), 1848 Op("::", OpArity.binary, OpAssoc.none, 1, "Scope resolution"), 1849 Op("typeid", OpArity.unaryPrefix, OpAssoc.LR, 2, "Run-time type information (RTTI))"), 1850 //Op("alignof", OpArity.unaryPrefix, OpAssoc.LR, _, _), 1851 Op("new", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory allocation"), 1852 Op("delete", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1853 Op("delete[]", OpArity.unaryPrefix, OpAssoc.RL, 3, "Dynamic memory deallocation"), 1854 /* Op("noexcept", OpArity.unaryPrefix, OpAssoc.none, _, _), */ 1855 1856 Op("dynamic_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1857 Op("reinterpret_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1858 Op("static_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1859 Op("const_cast", OpArity.unaryPrefix, OpAssoc.LR, 2, "Type cast"), 1860 1861 Op("throw", OpArity.unaryPrefix, OpAssoc.LR, 17, "Throw operator"), 1862 /* Op("catch", OpArity.unaryPrefix, OpAssoc.LR, _, _) */ 1863 ]; 1864 1865 static immutable extsCxx = ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"]; 1866 auto kindCxx = new FKind("C++", [], extsCxx, [], 0, [], 1867 keywordsCxx, 1868 cCommentDelims, 1869 defaultStringDelims, 1870 FileContent.sourceCode, 1871 FileKindDetection.equalsWhatsGiven, 1872 Lang.cxx); 1873 kindCxx.operations ~= tuple(FOp.checkSyntax, `gcc -x c++ -fsyntax-only -c`); 1874 kindCxx.operations ~= tuple(FOp.checkSyntax, `clang -x c++ -fsyntax-only -c`); 1875 kindCxx.operations ~= tuple(FOp.preprocess, `cpp`); 1876 kindCxx.opers = opersCxx; 1877 txtFKinds ~= kindCxx; 1878 static immutable keywordsCxx11 = keywordsCxx ~ ["alignas", "alignof", 1879 "char16_t", "char32_t", 1880 "constexpr", 1881 "decltype", 1882 "override", "final", 1883 "noexcept", "nullptr", 1884 "auto", 1885 "thread_local", 1886 "static_assert", ]; 1887 // TODO: Define as subkind 1888 /* txtFKinds ~= new FKind("C++11", [], ["cpp", "hpp", "cxx", "hxx", "c++", "h++", "C", "H"], [], 0, [], */ 1889 /* keywordsCxx11, */ 1890 /* [Delim("/\*", "*\/"), */ 1891 /* Delim("//")], */ 1892 /* defaultStringDelims, */ 1893 /* FileContent.sourceCode, */ 1894 /* FileKindDetection.equalsWhatsGiven); */ 1895 1896 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1897 static immutable opersCxxMicrosoft = ["__alignof"]; 1898 1899 /* See_Also: http://msdn.microsoft.com/en-us/library/2e6a4at9.aspx */ 1900 static immutable keywordsCxxMicrosoft = (keywordsCxx ~ [/* __abstract 2 */ 1901 "__asm", 1902 "__assume", 1903 "__based", 1904 /* __box 2 */ 1905 "__cdecl", 1906 "__declspec", 1907 /* __delegate 2 */ 1908 "__event", 1909 "__except", 1910 "__fastcall", 1911 "__finally", 1912 "__forceinline", 1913 /* __gc 2 */ 1914 /* __hook 3 */ 1915 "__identifier", 1916 "__if_exists", 1917 "__if_not_exists", 1918 "__inline", 1919 "__int16", 1920 "__int32", 1921 "__int64", 1922 "__int8", 1923 "__interface", 1924 "__leave", 1925 "__m128", 1926 "__m128d", 1927 "__m128i", 1928 "__m64", 1929 "__multiple_inheritance", 1930 /* __nogc 2 */ 1931 "__noop", 1932 /* __pin 2 */ 1933 /* __property 2 */ 1934 "__raise", 1935 /* __sealed 2 */ 1936 "__single_inheritance", 1937 "__stdcall", 1938 "__super", 1939 "__thiscall", 1940 "__try", 1941 "__except", 1942 "__finally", 1943 /* __try_cast 2 */ 1944 "__unaligned", 1945 /* __unhook 3 */ 1946 "__uuidof", 1947 /* __value 2 */ 1948 "__virtual_inheritance", 1949 "__w64", 1950 "__wchar_t", 1951 "wchar_t", 1952 "abstract", 1953 "array", 1954 "auto", 1955 "bool", 1956 "break", 1957 "case", 1958 "catch", 1959 "char", 1960 "class", 1961 "const", 1962 "const_cast", 1963 "continue", 1964 "decltype", 1965 "default", 1966 "delegate", 1967 "delete", 1968 /* deprecated 1 */ 1969 /* dllexport 1 */ 1970 /* dllimport 1 */ 1971 "do", 1972 "double", 1973 "dynamic_cast", 1974 "else", 1975 "enum", 1976 "enum class", 1977 "enum struct", 1978 "event", 1979 "explicit", 1980 "extern", 1981 "false", 1982 "finally", 1983 "float", 1984 "for", 1985 "for each", 1986 "in", 1987 "friend", 1988 "friend_as", 1989 "gcnew", 1990 "generic", 1991 "goto", 1992 "if", 1993 "initonly", 1994 "inline", 1995 "int", 1996 "interface class", 1997 "interface struct", 1998 "interior_ptr", 1999 "literal", 2000 "long", 2001 "mutable", 2002 /* naked 1 */ 2003 "namespace", 2004 "new", 2005 "new", 2006 /* noinline 1 */ 2007 /* noreturn 1 */ 2008 /* nothrow 1 */ 2009 /* novtable 1 */ 2010 "nullptr", 2011 "operator", 2012 "private", 2013 "property", 2014 /* property 1 */ 2015 "protected", 2016 "public", 2017 "ref class", 2018 "ref struct", 2019 "register", 2020 "reinterpret_cast", 2021 "return", 2022 "safecast", 2023 "sealed", 2024 /* selectany 1 */ 2025 "short", 2026 "signed", 2027 "sizeof", 2028 "static", 2029 "static_assert", 2030 "static_cast", 2031 "struct", 2032 "switch", 2033 "template", 2034 "this", 2035 /* thread 1 */ 2036 "throw", 2037 "true", 2038 "try", 2039 "typedef", 2040 "typeid", 2041 "typeid", 2042 "typename", 2043 "union", 2044 "unsigned", 2045 "using" /* declaration */, 2046 "using" /* directive */, 2047 /* uuid 1 */ 2048 "value class", 2049 "value struct", 2050 "virtual", 2051 "void", 2052 "volatile", 2053 "while"]).uniq.array; 2054 2055 static immutable xattrCxxMicrosoft = []; 2056 2057 static immutable keywordsNewObjectiveC = ["id", 2058 "in", 2059 "out", // Returned by reference 2060 "inout", // Argument is used both to provide information and to get information back 2061 "bycopy", 2062 "byref", "oneway", "self", 2063 "super", "@interface", "@end", 2064 "@implementation", "@end", 2065 "@interface", "@end", 2066 "@implementation", "@end", 2067 "@protoco", "@end", "@class" ]; 2068 2069 static immutable keywordsObjectiveC = keywordsC ~ keywordsNewObjectiveC; 2070 txtFKinds ~= new FKind("Objective-C", [], ["m", "h"], [], 0, [], 2071 keywordsObjectiveC, 2072 cCommentDelims, 2073 defaultStringDelims, 2074 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven, 2075 Lang.objectiveC); 2076 2077 static immutable keywordsObjectiveCxx = keywordsCxx ~ keywordsNewObjectiveC; 2078 txtFKinds ~= new FKind("Objective-C++", [], ["mm", "h"], [], 0, [], 2079 keywordsObjectiveCxx, 2080 defaultCommentDelims, 2081 defaultStringDelims, 2082 FileContent.sourceCode, 2083 FileKindDetection.equalsWhatsGiven, 2084 Lang.objectiveCxx); 2085 2086 static immutable keywordsSwift = ["break", "class", "continue", "default", "do", "else", "for", "func", "if", "import", 2087 "in", "let", "return", "self", "struct", "super", "switch", "unowned", "var", "weak", "while", 2088 "mutating", "extension"]; 2089 auto opersOverflowSwift = opersC ~ [Op("&+"), Op("&-"), Op("&*"), Op("&/"), Op("&%")]; 2090 auto builtinsSwift = ["print", "println"]; 2091 auto kindSwift = new FKind("Swift", [], ["swift"], [], 0, [], 2092 keywordsSwift, 2093 cCommentDelims, 2094 defaultStringDelims, 2095 FileContent.sourceCode, 2096 FileKindDetection.equalsWhatsGiven, 2097 Lang.swift); 2098 kindSwift.builtins = builtinsSwift; 2099 kindSwift.opers = opersOverflowSwift; 2100 txtFKinds ~= kindSwift; 2101 2102 static immutable keywordsCSharp = ["if"]; // TODO: Add keywords 2103 txtFKinds ~= new FKind("C#", [], ["cs"], [], 0, [], keywordsCSharp, 2104 cCommentDelims, 2105 defaultStringDelims, 2106 FileContent.sourceCode, 2107 FileKindDetection.equalsWhatsGiven, 2108 Lang.cSharp); 2109 2110 static immutable keywordsOCaml = ["and", "as", "assert", "begin", "class", 2111 "constraint", "do", "done", "downto", "else", 2112 "end", "exception", "external", "false", "for", 2113 "fun", "function", "functor", "if", "in", 2114 "include", "inherit", "inherit!", "initializer", 2115 "lazy", "let", "match", "method", "method!", 2116 "module", "mutable", "new", "object", "of", 2117 "open", "or", 2118 "private", "rec", "sig", "struct", "then", "to", 2119 "true", "try", "type", 2120 "val", "val!", "virtual", 2121 "when", "while", "with"]; 2122 txtFKinds ~= new FKind("OCaml", [], ["ocaml"], [], 0, [], keywordsOCaml, 2123 [Delim("(*", "*)")], 2124 defaultStringDelims, 2125 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2126 2127 txtFKinds ~= new FKind("Parrot", [], ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg", ], [], 0, [], keywordsOCaml, 2128 [Delim("#"), 2129 Delim("^=", // TODO: Needs beginning of line instead of ^ 2130 "=cut")], 2131 defaultStringDelims, 2132 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2133 2134 static immutable keywordsProlog = []; 2135 txtFKinds ~= new FKind("Prolog", [], ["pl", "pro", "P"], [], 0, [], keywordsProlog, 2136 [], 2137 [], 2138 FileContent.sourceCode, FileKindDetection.equalsWhatsGiven); 2139 2140 auto opersD = [ 2141 // Arithmetic 2142 Op("+", OpArity.binary, OpAssoc.LR, 10*2, "Add"), 2143 Op("-", OpArity.binary, OpAssoc.LR, 10*2, "Subtract"), 2144 Op("~", OpArity.binary, OpAssoc.LR, 10*2, "Concatenate"), 2145 2146 Op("*", OpArity.binary, OpAssoc.LR, 11*2, "Multiply"), 2147 Op("/", OpArity.binary, OpAssoc.LR, 11*2, "Divide"), 2148 Op("%", OpArity.binary, OpAssoc.LR, 11*2, "Remainder/Moduls"), 2149 2150 Op("++", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix increment"), 2151 Op("--", OpArity.unaryPostfix, OpAssoc.LR, cast(int)(14.5*2), "Suffix decrement"), 2152 2153 Op("^^", OpArity.binary, OpAssoc.RL, 13*2, "Power"), 2154 2155 Op("++", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix increment"), 2156 Op("--", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Prefix decrement"), 2157 Op("&", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Address off"), 2158 Op("*", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Pointer Dereference"), 2159 Op("+", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Plus"), 2160 Op("-", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Unary Minus"), 2161 Op("!", OpArity.unaryPrefix, OpAssoc.RL, 12*2, "Logical NOT"), // TODO: Convert to math in smallcaps NOT 2162 Op("~", OpArity.unaryPrefix, OpAssoc.LR, 12*2, "Bitwise NOT (One's Complement)"), 2163 2164 // Bit shift 2165 Op("<<", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise left shift"), 2166 Op(">>", OpArity.binary, OpAssoc.LR, 9*2, "Bitwise right shift"), 2167 2168 // Comparison 2169 Op("==", OpArity.binary, OpAssoc.LR, 6*2, "Equal to"), 2170 Op("!=", OpArity.binary, OpAssoc.LR, 6*2, "Not equal to"), 2171 Op("<", OpArity.binary, OpAssoc.LR, 6*2, "Less than"), 2172 Op(">", OpArity.binary, OpAssoc.LR, 6*2, "Greater than"), 2173 Op("<=", OpArity.binary, OpAssoc.LR, 6*2, "Less than or equal to"), 2174 Op(">=", OpArity.binary, OpAssoc.LR, 6*2, "Greater than or equal to"), 2175 Op("in", OpArity.binary, OpAssoc.LR, 6*2, "In"), 2176 Op("!in", OpArity.binary, OpAssoc.LR, 6*2, "Not In"), 2177 Op("is", OpArity.binary, OpAssoc.LR, 6*2, "Is"), 2178 Op("!is", OpArity.binary, OpAssoc.LR, 6*2, "Not Is"), 2179 2180 Op("&", OpArity.binary, OpAssoc.LR, 8*2, "Bitwise AND"), 2181 Op("^", OpArity.binary, OpAssoc.LR, 7*2, "Bitwise XOR (exclusive or)"), 2182 Op("|", OpArity.binary, OpAssoc.LR, 6*2, "Bitwise OR"), 2183 2184 Op("&&", OpArity.binary, OpAssoc.LR, 5*2, "Logical AND"), // TODO: Convert to math in smallcaps AND 2185 Op("||", OpArity.binary, OpAssoc.LR, 4*2, "Logical OR"), // TODO: Convert to math in smallcaps OR 2186 2187 // Assignment Arithmetic (binary) 2188 Op("=", OpArity.binary, OpAssoc.RL, 2*2, "Assign"), 2189 Op("+=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by sum"), 2190 Op("-=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by difference"), 2191 Op("*=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by product"), 2192 Op("/=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by quotient"), 2193 Op("%=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by remainder"), 2194 Op("&=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise AND"), 2195 Op("|=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise OR"), 2196 Op("^=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise XOR"), 2197 Op("<<=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise left shift"), 2198 Op(">>=", OpArity.binary, OpAssoc.RL, 2*2, "Assignment by bitwise right shift"), 2199 2200 Op(",", OpArity.binary, OpAssoc.LR, 1*2, "Comma"), 2201 Op("..", OpArity.binary, OpAssoc.LR, cast(int)(0*2), "Range separator"), 2202 ]; 2203 2204 enum interpretersForD = ["rdmd", 2205 "gdmd"]; 2206 auto magicForD = shebangLine(alt(lit("rdmd"), 2207 lit("gdmd"))); 2208 2209 static immutable keywordsD = [`@property`, `@safe`, `@trusted`, `@system`, `@disable`, `abstract`, `alias`, `align`, `asm`, `assert`, `auto`, `body`, `bool`, `break`, `byte`, `case`, `cast`, `catch`, 2210 `cdouble`, `cent`, `cfloat`, `char`, `class`, `const`, `continue`, `creal`, `dchar`, `debug`, `default`, `delegate`, `delete`, `deprecated`, 2211 `do`, `double`, `else`, `enum`, `export`, `extern`, `false`, `final`, `finally`, `float`, `for`, `foreach`, `foreach_reverse`, 2212 `function`, `goto`, `idouble`, `if`, `ifloat`, `immutable`, `import`, `in`, `inout`, `int`, `interface`, `invariant`, `ireal`, 2213 `is`, `lazy`, `long`, `macro`, `mixin`, `module`, `new`, `nothrow`, `null`, `out`, `override`, `package`, `pragma`, `private`, 2214 `protected`, `public`, `pure`, `real`, `ref`, `return`, `scope`, `shared`, `short`, `static`, `struct`, `super`, `switch`, 2215 `synchronized`, `template`, `this`, `throw`, `true`, `try`, `typedef`, `typeid`, `typeof`, `ubyte`, `ucent`, `uint`, `ulong`, 2216 `union`, `unittest`, `ushort`, `version`, `void`, `volatile`, `wchar`, `while`, `with`, `__gshared`, 2217 `__thread`, `__traits`, 2218 `string`, `wstring`, `dstring`, `size_t`, `hash_t`, `ptrdiff_t`, `equals_`]; // aliases 2219 2220 static immutable builtinsD = [`toString`, `toHash`, `opCmp`, `opEquals`, 2221 `opUnary`, `opBinary`, `opApply`, `opCall`, `opAssign`, `opIndexAssign`, `opSliceAssign`, `opOpAssign`, 2222 `opIndex`, `opSlice`, `opDispatch`, 2223 `toString`, `toHash`, `opCmp`, `opEquals`, `Monitor`, `factory`, `classinfo`, `vtbl`, `offset`, `getHash`, `equals`, `compare`, `tsize`, `swap`, `next`, `init`, `flags`, `offTi`, `destroy`, `postblit`, `toString`, `toHash`, 2224 `factory`, `classinfo`, `Throwable`, `Exception`, `Error`, `capacity`, `reserve`, `assumeSafeAppend`, `clear`, 2225 `ModuleInfo`, `ClassInfo`, `MemberInfo`, `TypeInfo`]; 2226 2227 static immutable propertiesD = [`sizeof`, `stringof`, `mangleof`, `nan`, `init`, `alignof`, `max`, `min`, `infinity`, `epsilon`, `mant_dig`, ``, 2228 `max_10_exp`, `max_exp`, `min_10_exp`, `min_exp`, `min_normal`, `re`, `im`]; 2229 2230 static immutable specialsD = [`__FILE__`, `__LINE__`, `__DATE__`, `__EOF__`, `__TIME__`, `__TIMESTAMP__`, `__VENDOR__`, `__VERSION__`, `#line`]; 2231 2232 auto kindDInterface = new FKind("D Interface", [], ["di"], 2233 magicForD, 0, 2234 [], 2235 keywordsD, 2236 dCommentDelims, 2237 defaultStringDelims, 2238 FileContent.sourceCode, 2239 FileKindDetection.equalsNameOrContents, 2240 Lang.d); 2241 kindDInterface.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2242 kindDInterface.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths 2243 txtFKinds ~= kindDInterface; 2244 2245 auto kindDDoc = new FKind("D Documentation", [], ["dd"], 2246 magicForD, 0, 2247 [], 2248 keywordsD, 2249 dCommentDelims, 2250 defaultStringDelims, 2251 FileContent.sourceCode, 2252 FileKindDetection.equalsNameOrContents); 2253 txtFKinds ~= kindDDoc; 2254 2255 auto kindD = new FKind("D", [], ["d", "di"], 2256 magicForD, 0, 2257 [], 2258 keywordsD, 2259 dCommentDelims, 2260 defaultStringDelims, 2261 FileContent.sourceCode, 2262 FileKindDetection.equalsNameOrContents, 2263 Lang.d); 2264 kindD.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2265 kindD.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths 2266 txtFKinds ~= kindD; 2267 2268 auto kindDi = new FKind("D Interface", [], ["di"], 2269 magicForD, 0, 2270 [], 2271 keywordsD, 2272 dCommentDelims, 2273 defaultStringDelims, 2274 FileContent.sourceCode, 2275 FileKindDetection.equalsNameOrContents, 2276 Lang.d); 2277 kindDi.operations ~= tuple(FOp.checkSyntax, `gdc -fsyntax-only`); 2278 kindDi.operations ~= tuple(FOp.checkSyntax, `dmd -debug -wi -c -o-`); // TODO: Include paths 2279 txtFKinds ~= kindDi; 2280 2281 static immutable keywordsRust = ["as", "box", "break", "continue", "crate", 2282 "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in", 2283 "let", "loop", "match", "mod", "mut", "priv", "proc", "pub", "ref", 2284 "return", "self", "static", "struct", "super", "true", "trait", 2285 "type", "unsafe", "use", "while"]; 2286 2287 auto kindRust = new FKind("Rust", [], ["rs"], 2288 [], 0, 2289 [], 2290 keywordsRust, 2291 cCommentDelims, 2292 defaultStringDelims, 2293 FileContent.sourceCode, 2294 FileKindDetection.equalsNameOrContents, 2295 Lang.rust); 2296 txtFKinds ~= kindRust; 2297 2298 static immutable keywordsFortran77 = ["if", "else"]; 2299 // TODO: Support .h files but require it to contain some Fortran-specific or be parseable. 2300 auto kindFortan = new FKind("Fortran", [], ["f", "fortran", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], [], 0, [], keywordsFortran77, 2301 [Delim("^C")], // TODO: Need beginning of line instead ^. seq(bol(), alt(lit('C'), lit('c'))); // TODO: Add chars chs("cC"); 2302 defaultStringDelims, 2303 FileContent.sourceCode, 2304 FileKindDetection.equalsNameOrContents, 2305 Lang.fortran); 2306 kindFortan.operations ~= tuple(FOp.checkSyntax, `gcc -x fortran -fsyntax-only`); 2307 txtFKinds ~= kindFortan; 2308 2309 // Ada 2310 import nxt.ada_defs; 2311 static immutable keywordsAda83 = ada_defs.keywords83; 2312 static immutable keywordsAda95 = keywordsAda83 ~ ada_defs.keywordsNew95; 2313 static immutable keywordsAda2005 = keywordsAda95 ~ ada_defs.keywordsNew2005; 2314 static immutable keywordsAda2012 = keywordsAda2005 ~ ada_defs.keywordsNew2012; 2315 static immutable extsAda = ["ada", "adb", "ads"]; 2316 txtFKinds ~= new FKind("Ada 82", [], extsAda, [], 0, [], keywordsAda83, 2317 [Delim("--")], 2318 defaultStringDelims, 2319 FileContent.sourceCode); 2320 txtFKinds ~= new FKind("Ada 95", [], extsAda, [], 0, [], keywordsAda95, 2321 [Delim("--")], 2322 defaultStringDelims, 2323 FileContent.sourceCode); 2324 txtFKinds ~= new FKind("Ada 2005", [], extsAda, [], 0, [], keywordsAda2005, 2325 [Delim("--")], 2326 defaultStringDelims, 2327 FileContent.sourceCode); 2328 txtFKinds ~= new FKind("Ada 2012", [], extsAda, [], 0, [], keywordsAda2012, 2329 [Delim("--")], 2330 defaultStringDelims, 2331 FileContent.sourceCode); 2332 txtFKinds ~= new FKind("Ada", [], extsAda, [], 0, [], keywordsAda2012, 2333 [Delim("--")], 2334 defaultStringDelims, 2335 FileContent.sourceCode); 2336 2337 auto aliKind = new FKind("Ada Library File", [], ["ali"], [], 0, `V "GNAT Lib v`, [], 2338 [], // N/A 2339 defaultStringDelims, 2340 FileContent.fingerprint); // TODO: Parse version following magic tag? 2341 aliKind.machineGenerated = true; 2342 txtFKinds ~= aliKind; 2343 2344 txtFKinds ~= new FKind("Pascal", [], ["pas", "pascal"], [], 0, [], [], 2345 [Delim("(*", "*)"),// Old-Style 2346 Delim("{", "}"),// Turbo Pascal 2347 Delim("//")],// Delphi 2348 defaultStringDelims, 2349 FileContent.sourceCode, FileKindDetection.equalsContents); 2350 txtFKinds ~= new FKind("Delphi", [], ["pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj"], 2351 [], 0, [], [], 2352 [Delim("//")], 2353 defaultStringDelims, 2354 FileContent.sourceCode, FileKindDetection.equalsContents); 2355 2356 txtFKinds ~= new FKind("Objective-C", [], ["m"], [], 0, [], [], 2357 cCommentDelims, 2358 defaultStringDelims, 2359 FileContent.sourceCode); 2360 2361 static immutable keywordsPython = ["and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return", 2362 "break", "else", "global", "not", "try", "class", "except", "if", "or", "while", 2363 "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print"]; 2364 2365 // Scripting 2366 2367 auto kindPython = new FKind("Python", [], ["py"], 2368 shebangLine(lit("python")), 0, [], 2369 keywordsPython, 2370 defaultCommentDelims, 2371 pythonStringDelims, 2372 FileContent.scriptCode); 2373 txtFKinds ~= kindPython; 2374 2375 txtFKinds ~= new FKind("Ruby", [], ["rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec", ], 2376 shebangLine(lit("ruby")), 0, 2377 [], [], 2378 [Delim("#"), Delim("=begin", "=end")], 2379 defaultStringDelims, 2380 FileContent.scriptCode); 2381 2382 txtFKinds ~= new FKind("Scala", [], ["scala", ], 2383 shebangLine(lit("scala")), 0, 2384 [], [], 2385 cCommentDelims, 2386 defaultStringDelims, 2387 FileContent.scriptCode); 2388 txtFKinds ~= new FKind("Scheme", [], ["scm", "ss"], 2389 [], 0, 2390 [], [], 2391 [Delim(";")], 2392 defaultStringDelims, 2393 FileContent.scriptCode); 2394 2395 txtFKinds ~= new FKind("Smalltalk", [], ["st"], [], 0, [], [], 2396 [Delim("\"", "\"")], 2397 defaultStringDelims, 2398 FileContent.sourceCode); 2399 2400 txtFKinds ~= new FKind("Perl", [], ["pl", "pm", "pm6", "pod", "t", "psgi", ], 2401 shebangLine(lit("perl")), 0, 2402 [], [], 2403 defaultCommentDelims, 2404 defaultStringDelims, 2405 FileContent.scriptCode); 2406 txtFKinds ~= new FKind("PHP", [], ["php", "phpt", "php3", "php4", "php5", "phtml", ], 2407 shebangLine(lit("php")), 0, 2408 [], [], 2409 defaultCommentDelims ~ cCommentDelims, 2410 defaultStringDelims, 2411 FileContent.scriptCode); 2412 txtFKinds ~= new FKind("Plone", [], ["pt", "cpt", "metadata", "cpy", "py", ], [], 0, [], [], 2413 defaultCommentDelims, 2414 defaultStringDelims, 2415 FileContent.scriptCode); 2416 2417 txtFKinds ~= new FKind("Shell", [], ["sh"], 2418 shebangLine(lit("sh")), 0, 2419 [], [], 2420 defaultCommentDelims, 2421 defaultStringDelims, 2422 FileContent.scriptCode); 2423 txtFKinds ~= new FKind("Bash", [], ["bash"], 2424 shebangLine(lit("bash")), 0, 2425 [], [], 2426 defaultCommentDelims, 2427 defaultStringDelims, 2428 FileContent.scriptCode); 2429 txtFKinds ~= new FKind("Zsh", [], ["zsh"], 2430 shebangLine(lit("zsh")), 0, 2431 [], [], 2432 defaultCommentDelims, 2433 defaultStringDelims, 2434 FileContent.scriptCode); 2435 2436 txtFKinds ~= new FKind("Batch", [], ["bat", "cmd"], [], 0, [], [], 2437 [Delim("REM")], 2438 defaultStringDelims, 2439 FileContent.scriptCode); 2440 2441 txtFKinds ~= new FKind("TCL", [], ["tcl", "itcl", "itk", ], [], 0, [], [], 2442 defaultCommentDelims, 2443 defaultStringDelims, 2444 FileContent.scriptCode); 2445 txtFKinds ~= new FKind("Tex", [], ["tex", "cls", "sty", ], [], 0, [], [], 2446 [Delim("%")], 2447 defaultStringDelims, 2448 FileContent.scriptCode); 2449 txtFKinds ~= new FKind("TT", [], ["tt", "tt2", "ttml", ], [], 0, [], [], 2450 defaultCommentDelims, 2451 defaultStringDelims, 2452 FileContent.scriptCode); 2453 txtFKinds ~= new FKind("Viz Basic", [], ["bas", "cls", "frm", "ctl", "vb", "resx", ], [], 0, [], [], 2454 [Delim("'")], 2455 defaultStringDelims, 2456 FileContent.scriptCode); 2457 2458 txtFKinds ~= new FKind("Verilog", [], ["v", "vh", "sv"], [], 0, [], [], 2459 cCommentDelims, 2460 defaultStringDelims, 2461 FileContent.scriptCode); 2462 txtFKinds ~= new FKind("VHDL", [], ["vhd", "vhdl"], [], 0, [], [], 2463 [Delim("--")], 2464 defaultStringDelims, 2465 FileContent.scriptCode); 2466 2467 txtFKinds ~= new FKind("Clojure", [], ["clj"], [], 0, [], [], 2468 [Delim(";")], 2469 defaultStringDelims, 2470 FileContent.sourceCode); 2471 txtFKinds ~= new FKind("Go", [], ["go"], [], 0, [], [], 2472 cCommentDelims, 2473 defaultStringDelims, 2474 FileContent.sourceCode); 2475 2476 auto kindJava = new FKind("Java", [], ["java", "properties"], [], 0, [], [], 2477 cCommentDelims, 2478 defaultStringDelims, 2479 FileContent.sourceCode); 2480 txtFKinds ~= kindJava; 2481 kindJava.operations ~= tuple(FOp.byteCompile, `javac`); 2482 2483 txtFKinds ~= new FKind("Groovy", [], ["groovy", "gtmpl", "gpp", "grunit"], [], 0, [], [], 2484 cCommentDelims, 2485 defaultStringDelims, 2486 FileContent.sourceCode); 2487 txtFKinds ~= new FKind("Haskell", [], ["hs", "lhs"], [], 0, [], [], 2488 [Delim("--}"), 2489 Delim("{-", "-}")], 2490 defaultStringDelims, 2491 FileContent.sourceCode); 2492 2493 static immutable keywordsJavascript = ["break", "case", "catch", "continue", "debugger", "default", "delete", 2494 "do", "else", "finally", "for", "function", "if", "in", "instanceof", 2495 "new", "return", "switch", "this", "throw", "try", "typeof", "var", 2496 "void", "while", "with" ]; 2497 txtFKinds ~= new FKind("JavaScript", [], ["js"], 2498 [], 0, [], 2499 keywordsJavascript, 2500 cCommentDelims, 2501 defaultStringDelims, 2502 FileContent.scriptCode); 2503 txtFKinds ~= new FKind("JavaScript Object Notation", 2504 [], ["json"], 2505 [], 0, [], [], 2506 [], // N/A 2507 defaultStringDelims, 2508 FileContent.sourceCode); 2509 2510 auto dubFKind = new FKind("DUB", 2511 ["dub.json"], ["json"], 2512 [], 0, [], [], 2513 [], // N/A 2514 defaultStringDelims, 2515 FileContent.scriptCode); 2516 txtFKinds ~= dubFKind; 2517 dubFKind.operations ~= tuple(FOp.build, `dub`); 2518 2519 // TODO: Inherit XML 2520 txtFKinds ~= new FKind("JSP", [], ["jsp", "jspx", "jhtm", "jhtml"], [], 0, [], [], 2521 [Delim("<!--", "--%>"), // XML 2522 Delim("<%--", "--%>")], 2523 defaultStringDelims, 2524 FileContent.scriptCode); 2525 2526 txtFKinds ~= new FKind("ActionScript", [], ["as", "mxml"], [], 0, [], [], 2527 cCommentDelims, // N/A 2528 defaultStringDelims, 2529 FileContent.scriptCode); 2530 2531 txtFKinds ~= new FKind("LUA", [], ["lua"], [], 0, [], [], 2532 [Delim("--")], 2533 defaultStringDelims, 2534 FileContent.scriptCode); 2535 txtFKinds ~= new FKind("Mason", [], ["mas", "mhtml", "mpl", "mtxt"], [], 0, [], [], 2536 [], // TODO: Need symbolic 2537 defaultStringDelims, 2538 FileContent.scriptCode); 2539 2540 txtFKinds ~= new FKind("CFMX", [], ["cfc", "cfm", "cfml"], [], 0, [], [], 2541 [], // N/A 2542 defaultStringDelims, 2543 FileContent.scriptCode); 2544 2545 // Simulation 2546 static immutable keywordsModelica = ["algorithm", "discrete", "false", "loop", "pure", 2547 "and", "each", "final", "model", "record", 2548 "annotation", "else", "flow", "not", "redeclare", 2549 "elseif", "for", "operator", "replaceable", 2550 "block", "elsewhen", "function", "or", "return", 2551 "break", "encapsulated", "if", "outer", "stream", 2552 "class", "end", "import", "output", "then", 2553 "connect", "enumeration", "impure", "package", "true", 2554 "connector", "equation", "in", "parameter", "type", 2555 "constant", "expandable", "initial", "partial", "when", 2556 "constrainedby", "extends", "inner", "protected", "while", 2557 "der", "external", "input", "public", "within"]; 2558 auto kindModelica = new FKind("Modelica", [], ["mo"], [], 0, [], 2559 keywordsModelica, 2560 cCommentDelims, 2561 defaultStringDelims, 2562 FileContent.sourceCode, 2563 FileKindDetection.equalsWhatsGiven, 2564 Lang.modelica); 2565 2566 // Numerical Computing 2567 2568 txtFKinds ~= new FKind("Matlab", [], ["m"], [], 0, [], [], 2569 [Delim("%{", "}%"), // TODO: Prio 1 2570 Delim("%")], // TODO: Prio 2 2571 defaultStringDelims, 2572 FileContent.sourceCode); 2573 auto kindOctave = new FKind("Octave", [], ["m"], [], 0, [], [], 2574 [Delim("%{", "}%"), // TODO: Prio 1 2575 Delim("%"), 2576 Delim("#")], 2577 defaultStringDelims, 2578 FileContent.sourceCode); 2579 txtFKinds ~= kindOctave; 2580 kindOctave.operations ~= tuple(FOp.byteCompile, `octave`); 2581 2582 txtFKinds ~= new FKind("Julia", [], ["jl"], [], 0, [], [], 2583 defaultCommentDelims, 2584 defaultStringDelims, 2585 FileContent.sourceCode); // ((:execute "julia") (:evaluate "julia -e")) 2586 2587 txtFKinds ~= new FKind("Erlang", [], ["erl", "hrl"], [], 0, [], [], 2588 [Delim("%")], 2589 defaultStringDelims, 2590 FileContent.sourceCode); 2591 2592 auto magicForElisp = seq(shebangLine(lit("emacs")), 2593 ws(), 2594 lit("--script")); 2595 auto kindElisp = new FKind("Emacs-Lisp", [], 2596 ["el", "lisp"], 2597 magicForElisp, 0, // Script Execution 2598 [], [], 2599 [Delim(";")], 2600 defaultStringDelims, 2601 FileContent.sourceCode); 2602 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs -batch -f batch-byte-compile`); 2603 kindElisp.operations ~= tuple(FOp.byteCompile, `emacs --script`); 2604 /* kindELisp.moduleName = "(provide 'MODULE_NAME)"; */ 2605 /* kindELisp.moduleImport = "(require 'MODULE_NAME)"; */ 2606 txtFKinds ~= kindElisp; 2607 2608 txtFKinds ~= new FKind("Lisp", [], ["lisp", "lsp"], [], 0, [], [], 2609 [Delim(";")], 2610 defaultStringDelims, 2611 FileContent.sourceCode); 2612 txtFKinds ~= new FKind("PostScript", [], ["ps", "postscript"], [], 0, "%!", [], 2613 [Delim("%")], 2614 defaultStringDelims, 2615 FileContent.sourceCode); 2616 2617 txtFKinds ~= new FKind("CMake", [], ["cmake"], [], 0, [], [], 2618 defaultCommentDelims, 2619 defaultStringDelims, 2620 FileContent.sourceCode); 2621 2622 // http://stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary 2623 txtFKinds ~= new FKind("Pure ASCII", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2624 [], // N/A 2625 defaultStringDelims, 2626 FileContent.textASCII); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 2627 txtFKinds ~= new FKind("8-Bit Text", [], ["ascii", "txt", "text", "README", "INSTALL"], [], 0, [], [], 2628 [], // N/A 2629 defaultStringDelims, 2630 FileContent.text8Bit); // NOTE: Extend with matcher where all bytes are in either: 9–13 or 32–126 or 128–255 2631 2632 txtFKinds ~= new FKind("Assembler", [], ["asm", "s"], [], 0, [], [], 2633 [], // N/A 2634 defaultStringDelims, 2635 FileContent.sourceCode); 2636 2637 // https://en.wikipedia.org/wiki/Diff 2638 auto diffKind = new FKind("Diff", [], ["diff", "patch"], 2639 "diff", 0, 2640 [], [], 2641 [], // N/A 2642 defaultStringDelims, 2643 FileContent.text); 2644 txtFKinds ~= diffKind; 2645 diffKind.wikip = "https://en.wikipedia.org/wiki/Diff"; 2646 2647 auto pemCertKind = new FKind(`PEM certificate`, [], [`cert`], 2648 `-----BEGIN CERTIFICATE-----`, 0, 2649 [], [], 2650 [], // N/A 2651 [], // N/A 2652 FileContent.text, 2653 FileKindDetection.equalsContents); 2654 txtFKinds ~= pemCertKind; 2655 2656 auto pemCertReqKind = new FKind(`PEM certificate request`, [], [`cert`], 2657 `-----BEGIN CERTIFICATE REQ`, 0, 2658 [], [], 2659 [], // N/A 2660 [], // N/A 2661 FileContent.text, 2662 FileKindDetection.equalsContents); 2663 txtFKinds ~= pemCertReqKind; 2664 2665 auto pemRSAPrivateKeyKind = new FKind(`PEM RSA private key`, [], [`cert`], 2666 `-----BEGIN RSA PRIVATE`, 0, 2667 [], [], 2668 [], // N/A 2669 [], // N/A 2670 FileContent.text, 2671 FileKindDetection.equalsContents); 2672 txtFKinds ~= pemRSAPrivateKeyKind; 2673 2674 auto pemDSAPrivateKeyKind = new FKind(`PEM DSA private key`, [], [`cert`], 2675 `-----BEGIN DSA PRIVATE`, 0, 2676 [], [], 2677 [], // N/A 2678 [], // N/A 2679 FileContent.text, 2680 FileKindDetection.equalsContents); 2681 txtFKinds ~= pemDSAPrivateKeyKind; 2682 2683 auto pemECPrivateKeyKind = new FKind(`PEM EC private key`, [], [`cert`], 2684 `-----BEGIN EC PRIVATE`, 0, 2685 [], [], 2686 [], // N/A 2687 [], // N/A 2688 FileContent.text, 2689 FileKindDetection.equalsContents); 2690 txtFKinds ~= pemECPrivateKeyKind; 2691 2692 // Binaries 2693 2694 static immutable extsELF = ["o", "so", "ko", "os", "out", "bin", "x", "elf", "axf", "prx", "puff", "none"]; // ELF file extensions 2695 2696 auto elfKind = new FKind("ELF", 2697 [], extsELF, x"7F 45 4C 46", 0, [], [], 2698 [], // N/A 2699 [], // N/A 2700 FileContent.machineCode, 2701 FileKindDetection.equalsContents); 2702 elfKind.wikip = "https://en.wikipedia.org/wiki/Executable_and_Linkable_Format"; 2703 binFKinds ~= elfKind; 2704 /* auto extsExeELF = ["out", "bin", "x", "elf", ]; // ELF file extensions */ 2705 /* auto elfExeKind = new FKind("ELF executable", [], extsExeELF, [0x2, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2706 /* auto elfSOKind = new FKind("ELF shared object", [], ["so", "ko"], [0x3, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2707 /* auto elfCoreKind = new FKind("ELF core file", [], ["core"], [0x4, 0x0], 16, [], [], FileContent.machineCode, FileKindDetection.equalsContents, elfKind); */ 2708 /* binFKinds ~= elfExeKind; */ 2709 /* elfKind.subKinds ~= elfSOKind; */ 2710 /* elfKind.subKinds ~= elfCoreKind; */ 2711 /* elfKind.subKinds ~= elfKind; */ 2712 2713 // TODO: Specialize to not steal results from file's magics. 2714 auto linuxFirmwareKind = new FKind("Linux Firmware", 2715 [], ["bin", "ucode", "dat", "sbcf", "fw"], [], 0, [], [], 2716 [], // N/A 2717 [], // N/A 2718 FileContent.binaryUnknown, 2719 FileKindDetection.equalsParentPathDirsAndName); 2720 linuxFirmwareKind.parentPathDirs = ["lib", "firmware"]; 2721 binFKinds ~= linuxFirmwareKind; 2722 2723 // TODO: Specialize to not steal results from file's magics. 2724 auto linuxHwDbKind = new FKind("Linux Hardware Database Index", 2725 "hwdb.bin", ["bin"], "KSLPHHRH", 0, [], [], 2726 [], // N/A 2727 [], // N/A 2728 FileContent.binaryUnknown, 2729 FileKindDetection.equalsNameAndContents); 2730 binFKinds ~= linuxHwDbKind; 2731 2732 // Executables 2733 binFKinds ~= new FKind("Mach-O", [], ["o"], x"CE FA ED FE", 0, [], [], 2734 [], // N/A 2735 [], // N/A 2736 FileContent.machineCode, FileKindDetection.equalsContents); 2737 2738 binFKinds ~= new FKind("modules.symbols.bin", [], ["bin"], 2739 cast(ubyte[])[0xB0, 0x07, 0xF4, 0x57, 0x00, 0x02, 0x00, 0x01, 0x20], 0, [], [], 2740 [], // N/A 2741 [], // N/A 2742 FileContent.binaryUnknown, FileKindDetection.equalsContents); 2743 2744 auto kindCOFF = new FKind("COFF/i386/32", [], ["o"], x"4C 01", 0, [], [], 2745 [], // N/A 2746 [], // N/A 2747 FileContent.machineCode, FileKindDetection.equalsContents); 2748 kindCOFF.description = "Common Object File Format"; 2749 binFKinds ~= kindCOFF; 2750 2751 auto kindPECOFF = new FKind("PE/COFF", [], ["cpl", "exe", "dll", "ocx", "sys", "scr", "drv", "obj"], 2752 "PE\0\0", 0x60, // And ("MZ") at offset 0x0 2753 [], [], 2754 [], // N/A 2755 [], // N/A 2756 FileContent.machineCode, FileKindDetection.equalsContents); 2757 kindPECOFF.description = "COFF Portable Executable"; 2758 binFKinds ~= kindPECOFF; 2759 2760 auto kindDOSMZ = new FKind("DOS-MZ", [], ["exe", "dll"], "MZ", 0, [], [], 2761 [], // N/A 2762 [], // N/A 2763 FileContent.machineCode); 2764 kindDOSMZ.description = "MS-DOS, OS/2 or MS Windows executable"; 2765 binFKinds ~= kindDOSMZ; 2766 2767 // Caches 2768 binFKinds ~= new FKind("ld.so.cache", [], ["cache"], "ld.so-", 0, [], [], 2769 [], // N/A 2770 [], // N/A 2771 FileContent.binaryCache); 2772 2773 // Profile Data 2774 binFKinds ~= new FKind("perf benchmark data", [], ["data"], "PERFILE2h", 0, [], [], 2775 [], // N/A 2776 [], // N/A 2777 FileContent.performanceBenchmark); 2778 2779 // Images 2780 binFKinds ~= new FKind("GIF87a", [], ["gif"], "GIF87a", 0, [], [], 2781 [], // N/A 2782 [], // N/A 2783 FileContent.image); 2784 binFKinds ~= new FKind("GIF89a", [], ["gif"], "GIF89a", 0, [], [], 2785 [], // N/A 2786 [], // N/A 2787 FileContent.image); 2788 auto extJPEG = ["jpeg", "jpg", "j2k", "jpeg2000"]; 2789 binFKinds ~= new FKind("JPEG", [], extJPEG, x"FF D8", 0, [], [], 2790 [], // N/A 2791 [], // N/A 2792 FileContent.image); // TODO: Support ends with [0xFF, 0xD9] 2793 binFKinds ~= new FKind("JPEG/JFIF", [], extJPEG, x"FF D8", 0, [], [], 2794 [], // N/A 2795 [], // N/A 2796 FileContent.image); // TODO: Support ends with ['J','F','I','F', 0x00] 2797 binFKinds ~= new FKind("JPEG/Exif", [], extJPEG, x"FF D8", 0, [], [], 2798 [], // N/A 2799 [], // N/A 2800 FileContent.image); // TODO: Support contains ['E','x','i','f', 0x00] followed by metadata 2801 2802 binFKinds ~= new FKind("Pack200-Compressed Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 2803 [], // N/A 2804 [], // N/A 2805 FileContent.machineCode); 2806 2807 binFKinds ~= new FKind("JRun Server Application", [], ["jsa"], 2808 cast(ubyte[])[0xa2,0xab,0x0b,0xf0, 2809 0x01,0x00,0x00,0x00, 2810 0x00,0x00,0x20,0x00], 0, [], [], 2811 [], // N/A 2812 [], // N/A 2813 FileContent.machineCode); 2814 2815 binFKinds ~= new FKind("PNG", [], ["png"], 2816 cast(ubyte[])[137, 80, 78, 71, 13, 10, 26, 10], 0, [], [], 2817 [], // N/A 2818 [], // N/A 2819 FileContent.image); 2820 2821 auto icnsKind = new FKind("Apple Icon Image", [], ["icns"], 2822 "icns", 0, [], [], 2823 [], // N/A 2824 [], // N/A 2825 FileContent.imageIcon); 2826 icnsKind.wikip = "https://en.wikipedia.org/wiki/Apple_Icon_Image_format"; 2827 binFKinds ~= icnsKind; 2828 // TODO: read with http://icns.sourceforge.net/ 2829 2830 auto kindPDF = new FKind("PDF", [], ["pdf"], "%PDF", 0, [], [], 2831 [], // N/A 2832 [], // N/A 2833 FileContent.document); 2834 kindPDF.description = "Portable Document Format"; 2835 binFKinds ~= kindPDF; 2836 2837 auto kindMarkdownFmt = new FKind("Markdown", [], ["md", "markdown"], 2838 [], 0, 2839 [], [], 2840 [], // N/A 2841 defaultStringDelims, 2842 FileContent.binaryCache); 2843 kindMarkdownFmt.wikip = "https://en.wikipedia.org/wiki/Markdown"; 2844 binFKinds ~= kindMarkdownFmt; 2845 2846 auto kindAsciiDocFmt = new FKind("AsciiDoc", [], ["ad", "adoc", "asciidoc"], 2847 [], 0, 2848 [], [], 2849 [], // N/A 2850 defaultStringDelims, 2851 FileContent.binaryCache); 2852 binFKinds ~= kindAsciiDocFmt; 2853 2854 auto kindLatexPDFFmt = new FKind("LaTeX PDF Format", [], ["fmt"], 2855 cast(ubyte[])['W','2','T','X', 2856 0x00,0x00,0x00,0x08, 2857 0x70,0x64,0x66,0x74, 2858 0x65,0x78], 0, [], [], 2859 [], // N/A 2860 defaultStringDelims, 2861 FileContent.binaryCache); 2862 binFKinds ~= kindLatexPDFFmt; 2863 2864 binFKinds ~= new FKind("Microsoft Office Document", [], ["doc", "docx", "xls", "ppt"], x"D0 CF 11 E0", 0, [], [], 2865 [], // N/A 2866 defaultStringDelims, 2867 FileContent.document); 2868 2869 // Fonts 2870 2871 auto kindTTF = new FKind("TrueType Font", [], ["ttf"], x"00 01 00 00 00", 0, [], [], 2872 [], // N/A 2873 defaultStringDelims, 2874 FileContent.font); 2875 binFKinds ~= kindTTF; 2876 2877 auto kindTTCF = new FKind("TrueType/OpenType Font Collection", [], ["ttc"], "ttcf", 0, [], [], 2878 [], // N/A 2879 defaultStringDelims, 2880 FileContent.font); 2881 binFKinds ~= kindTTCF; 2882 2883 auto kindWOFF = new FKind("Web Open Font", [], ["woff"], "wOFF", 0, [], [], 2884 [], // N/A 2885 defaultStringDelims, 2886 FileContent.font); // TODO: container for kindSFNT 2887 binFKinds ~= kindWOFF; 2888 2889 auto kindSFNT = new FKind("Spline Font", [], ["sfnt"], "sfnt", 0, [], [], 2890 [], // N/A 2891 defaultStringDelims, 2892 FileContent.font); // TODO: container for Sfnt 2893 binFKinds ~= kindSFNT; 2894 2895 // Audio 2896 2897 binFKinds ~= new FKind("MIDI", [], ["mid", "midi"], "MThd", 0, [], [], 2898 [], // N/A 2899 defaultStringDelims, 2900 FileContent.audio, FileKindDetection.equalsNameAndContents); 2901 2902 // Au 2903 auto auKind = new FKind("Au", [], ["au", "snd"], ".snd", 0, [], [], 2904 [], // N/A 2905 defaultStringDelims, 2906 FileContent.audio, FileKindDetection.equalsNameAndContents); 2907 auKind.wikip = "https://en.wikipedia.org/wiki/Au_file_format"; 2908 binFKinds ~= auKind; 2909 2910 binFKinds ~= new FKind("Ogg", [], ["ogg", "oga", "ogv"], 2911 cast(ubyte[])[0x4F,0x67,0x67,0x53, 2912 0x00,0x02,0x00,0x00, 2913 0x00,0x00,0x00,0x00, 2914 0x00, 0x00], 0, [], [], 2915 [], // N/A 2916 defaultStringDelims, 2917 FileContent.media); 2918 2919 // TODO: Support RIFF....WAVEfmt using symbolic seq(lit("RIFF"), any(4), lit("WAVEfmt")) 2920 binFKinds ~= new FKind("WAV", [], ["wav", "wave"], "RIFF", 0, [], [], 2921 [], // N/A 2922 defaultStringDelims, 2923 FileContent.audio, FileKindDetection.equalsContents); 2924 2925 // Archives 2926 2927 auto kindBSDAr = new FKind("BSD Archive", [], ["a", "ar"], "!<arch>\n", 0, [], [], 2928 [], // N/A 2929 defaultStringDelims, 2930 FileContent.archive, FileKindDetection.equalsContents); 2931 kindBSDAr.description = "BSD 4.4 and Mac OSX Archive"; 2932 binFKinds ~= kindBSDAr; 2933 2934 binFKinds ~= new FKind("GNU tar Archive", [], ["tar"], "ustar\040\040\0", 257, [], [], 2935 [], // N/A 2936 defaultStringDelims, 2937 FileContent.archive, FileKindDetection.equalsContents); // TODO: Specialized Derivation of "POSIX tar Archive" 2938 binFKinds ~= new FKind("POSIX tar Archive", [], ["tar"], "ustar\0", 257, [], [], 2939 [], // N/A 2940 defaultStringDelims, 2941 FileContent.archive, FileKindDetection.equalsContents); 2942 2943 binFKinds ~= new FKind("pkZip Archive", [], ["zip", "jar", "pptx", "docx", "xlsx"], "PK\003\004", 0, [], [], 2944 [], // N/A 2945 defaultStringDelims, 2946 FileContent.archive, FileKindDetection.equalsContents); 2947 binFKinds ~= new FKind("pkZip Archive (empty)", [], ["zip", "jar"], "PK\005\006", 0, [], [], 2948 [], // N/A 2949 defaultStringDelims, 2950 FileContent.archive, FileKindDetection.equalsContents); 2951 2952 binFKinds ~= new FKind("PAK file", [], ["pak"], cast(ubyte[])[0x40, 0x00, 0x00, 0x00, 2953 0x4a, 0x12, 0x00, 0x00, 2954 0x01, 0x2d, 0x23, 0xcb, 2955 0x6d, 0x00, 0x00, 0x2f], 0, [], [], 2956 [], // N/A 2957 defaultStringDelims, 2958 FileContent.spellCheckWordList, 2959 FileKindDetection.equalsNameAndContents); 2960 2961 binFKinds ~= new FKind("LZW-Compressed", [], ["z", "tar.z"], x"1F 9D", 0, [], [], 2962 [], // N/A 2963 defaultStringDelims, 2964 FileContent.compressed); 2965 binFKinds ~= new FKind("LZH-Compressed", [], ["z", "tar.z"], x"1F A0", 0, [], [], 2966 [], // N/A 2967 defaultStringDelims, 2968 FileContent.compressed); 2969 2970 binFKinds ~= new FKind("CompressedZ", [], ["z"], "\037\235", 0, [], [], 2971 [], // N/A 2972 defaultStringDelims, 2973 FileContent.compressed); 2974 binFKinds ~= new FKind("GNU-Zip (gzip)", [], ["tgz", "gz", "gzip", "dz"], "\037\213", 0, [], [], 2975 [], // N/A 2976 defaultStringDelims, 2977 FileContent.compressed); 2978 binFKinds ~= new FKind("BZip", [], ["bz2", "bz", "tbz2", "bzip2"], "BZh", 0, [], [], 2979 [], // N/A 2980 defaultStringDelims, 2981 FileContent.compressed); 2982 binFKinds ~= new FKind("XZ/7-Zip", [], ["xz", "txz", "7z", "t7z", "lzma", "tlzma", "lz", "tlz"], 2983 cast(ubyte[])[0xFD, '7', 'z', 'X', 'Z', 0x00], 0, [], [], 2984 [], // N/A 2985 defaultStringDelims, 2986 FileContent.compressed); 2987 binFKinds ~= new FKind("LZX", [], ["lzx"], "LZX", 0, [], [], 2988 [], // N/A 2989 defaultStringDelims, 2990 FileContent.compressed); 2991 binFKinds ~= new FKind("SZip", [], ["szip"], "SZ\x0a\4", 0, [], [], 2992 [], // N/A 2993 defaultStringDelims, 2994 FileContent.compressed); 2995 2996 binFKinds ~= new FKind("Git Bundle", [], ["bundle"], "# v2 git bundle", 0, [], [], 2997 [], // N/A 2998 defaultStringDelims, 2999 FileContent.versionControl); 3000 3001 binFKinds ~= new FKind("Emacs-Lisp Bytes Code", [], ["elc"], ";ELC\27\0\0\0", 0, [], [], 3002 [], // N/A 3003 defaultStringDelims, 3004 FileContent.byteCode, FileKindDetection.equalsContents); 3005 binFKinds ~= new FKind("Python Bytes Code", [], ["pyc"], x"0D 0A", 2, [], [], 3006 [], // N/A 3007 defaultStringDelims, 3008 FileContent.byteCode, FileKindDetection.equalsNameAndContents); // TODO: Handle versions at src[0..2] 3009 3010 binFKinds ~= new FKind("Zshell Wordcode", [], ["zwc"], x"07 06 05 04", 0, [], [], 3011 [], // N/A 3012 defaultStringDelims, 3013 FileContent.byteCode); 3014 3015 binFKinds ~= new FKind("Java Bytes Code", [], ["class"], x"CA FE BA BE", 0, [], [], 3016 [], // N/A 3017 defaultStringDelims, 3018 FileContent.byteCode, FileKindDetection.equalsContents); 3019 binFKinds ~= new FKind("Java KeyStore", [], [], x"FE ED FE ED", 0, [], [], 3020 [], // N/A 3021 defaultStringDelims, 3022 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3023 binFKinds ~= new FKind("Java JCE KeyStore", [], [], x"CE CE CE CE", 0, [], [], 3024 [], // N/A 3025 defaultStringDelims, 3026 FileContent.binaryUnknown, FileKindDetection.equalsContents); 3027 3028 binFKinds ~= new FKind("LLVM Bitcode", [], ["bc"], "BC", 0, [], [], 3029 [], // N/A 3030 defaultStringDelims, 3031 FileContent.byteCode, FileKindDetection.equalsNameAndContents); 3032 3033 binFKinds ~= new FKind("MATLAB MAT", [], ["mat"], "MATLAB 5.0 MAT-file", 0, [], [], 3034 [], // N/A 3035 defaultStringDelims, 3036 FileContent.numericalData, FileKindDetection.equalsContents); 3037 3038 auto hdf4Kind = new FKind("HDF4", [], ["hdf", "h4", "hdf4", "he4"], x"0E 03 13 01", 0, [], [], 3039 [], // N/A 3040 defaultStringDelims, 3041 FileContent.numericalData); 3042 binFKinds ~= hdf4Kind; 3043 hdf4Kind.description = "Hierarchical Data Format version 4"; 3044 3045 auto hdf5Kind = new FKind("HDF5", "Hierarchical Data Format version 5", ["hdf", "h5", "hdf5", "he5"], x"89 48 44 46 0D 0A 1A 0A", 0, [], [], 3046 [], // N/A 3047 defaultStringDelims, 3048 FileContent.numericalData); 3049 binFKinds ~= hdf5Kind; 3050 hdf5Kind.description = "Hierarchical Data Format version 5"; 3051 3052 auto numpyKind = new FKind("NUMPY", "NUMPY", ["npy", "numpy"], x"93 4E 55 4D 50 59", 0, [], [], 3053 [], // N/A 3054 defaultStringDelims, 3055 FileContent.numericalData); 3056 binFKinds ~= numpyKind; 3057 3058 binFKinds ~= new FKind("GNU GLOBAL Database", ["GTAGS", "GRTAGS", "GPATH", "GSYMS"], [], "b1\5\0", 0, [], [], 3059 [], // N/A 3060 defaultStringDelims, 3061 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3062 3063 // SQLite 3064 static immutable extsSQLite = ["sql", "sqlite", "sqlite3"]; 3065 binFKinds ~= new FKind("MySQL table definition file", [], extsSQLite, x"FE 01", 0, [], [], 3066 [], // N/A 3067 defaultStringDelims, 3068 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3069 binFKinds ~= new FKind("MySQL MyISAM index file", [], extsSQLite, x"FE FE 07", 0, [], [], 3070 [], // N/A 3071 defaultStringDelims, 3072 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3073 binFKinds ~= new FKind("MySQL MyISAM compressed data file", [], extsSQLite, x"FE FE 08", 0, [], [], 3074 [], // N/A 3075 defaultStringDelims, 3076 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3077 binFKinds ~= new FKind("MySQL Maria index file", [], extsSQLite, x"FF FF FF", 0, [], [], 3078 [], // N/A 3079 defaultStringDelims, 3080 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3081 binFKinds ~= new FKind("MySQL Maria compressed data file", [], extsSQLite, x"FF FF FF", 0, [], [], 3082 [], // N/A 3083 defaultStringDelims, 3084 FileContent.tagsDatabase, FileKindDetection.equalsContents); 3085 binFKinds ~= new FKind("SQLite format 3", [], extsSQLite , "SQLite format 3", 0, [], [], 3086 [], // N/A 3087 defaultStringDelims, 3088 FileContent.tagsDatabase, FileKindDetection.equalsContents); // TODO: Why is this detected at 49:th try? 3089 3090 binFKinds ~= new FKind("Vim swap", [], ["swo"], [], 0, "b0VIM ", [], 3091 [], // N/A 3092 defaultStringDelims, 3093 FileContent.binaryCache); 3094 3095 binFKinds ~= new FKind("PCH", "(GCC) Precompiled header", ["pch", "gpch"], "gpch", 0, [], [], 3096 [], // N/A 3097 defaultStringDelims, 3098 FileContent.cache); 3099 3100 binFKinds ~= new FKind("Firmware", [], ["fw"], cast(ubyte[])[], 0, [], [], 3101 [], // N/A 3102 defaultStringDelims, 3103 FileContent.cache, FileKindDetection.equalsName); // TODO: Add check for binary contents and that some parenting directory is named "firmware" 3104 3105 binFKinds ~= new FKind("LibreOffice or OpenOffice RDB", [], ["rdb"], 3106 cast(ubyte[])[0x43,0x53,0x4d,0x48, 3107 0x4a,0x2d,0xd0,0x26, 3108 0x00,0x02,0x00,0x00, 3109 0x00,0x02,0x00,0x02], 0, [], [], 3110 [], // N/A 3111 defaultStringDelims, 3112 FileContent.database, FileKindDetection.equalsName); // TODO: Add check for binary contents and that some parenting directory is named "firmware" 3113 3114 binFKinds ~= new FKind("sconsign", [], ["sconsign", "sconsign.dblite", "dblite"], x"7d 71 01 28", 0, [], [], 3115 [], // N/A 3116 defaultStringDelims, 3117 FileContent.cache, FileKindDetection.equalsNameAndContents); 3118 3119 binFKinds ~= new FKind("GnuPG (GPG) key public ring", [], ["gpg"], x"99 01", 0, [], [], 3120 [], // N/A 3121 defaultStringDelims, 3122 FileContent.binary, FileKindDetection.equalsNameOrContents); 3123 binFKinds ~= new FKind("GnuPG (GPG) encrypted data", [], [], x"85 02", 0, [], [], 3124 [], // N/A 3125 defaultStringDelims, 3126 FileContent.binary, FileKindDetection.equalsContents); 3127 binFKinds ~= new FKind("GNUPG (GPG) key trust database", [], [], "\001gpg", 0, [], [], 3128 [], // N/A 3129 defaultStringDelims, 3130 FileContent.binary, FileKindDetection.equalsContents); 3131 3132 binFKinds ~= new FKind("aspell word list (rowl)", [], ["rws"], "aspell default speller rowl ", 0, [], [], 3133 [], // N/A 3134 defaultStringDelims, 3135 FileContent.spellCheckWordList, FileKindDetection.equalsNameAndContents); 3136 3137 binFKinds ~= new FKind("DS_Store", ".DS_Store", [], "Mac OS X Desktop Services Store ", 0, [], [], 3138 [], // N/A 3139 [], 3140 FileContent.binary, FileKindDetection.equalsName); 3141 3142 /* Fax image created in the CCITT Group 3 compressed format, which is 3143 * used for digital transmission of fax data and supports 1 bit per 3144 * pixel 3145 */ 3146 binFKinds ~= new FKind("CCITT Group 3 compressed format", [], // TODO: Altenative name: Digifax-G3, G3 Fax 3147 ["g3", "G3"], 3148 "PC Research, Inc", 0, [], [], 3149 [], // N/A 3150 [], 3151 FileContent.imageModemFax1BPP, FileKindDetection.equalsContents); 3152 3153 binFKinds ~= new FKind("Raw Modem Data version 1", [], 3154 ["rmd1"], 3155 "RMD1", 0, [], [], 3156 [], // N/A 3157 [], 3158 FileContent.modemData, FileKindDetection.equalsContents); 3159 3160 binFKinds ~= new FKind("Portable voice format 1", [], 3161 ["pvf1"], 3162 "PVF1\n", 0, [], [], 3163 [], // N/A 3164 [], 3165 FileContent.voiceModem, FileKindDetection.equalsContents); 3166 3167 binFKinds ~= new FKind("Portable voice format 2", [], 3168 ["pvf2"], 3169 "PVF2\n", 0, [], [], 3170 [], // N/A 3171 [], 3172 FileContent.voiceModem, FileKindDetection.equalsContents); 3173 3174 allFKinds ~= txtFKinds; 3175 allFKinds ~= binFKinds; 3176 3177 assert(allFKinds.byIndex.length == 3178 (txtFKinds.byIndex.length + 3179 binFKinds.byIndex.length)); 3180 3181 assert(allFKinds.byId.length == 3182 (txtFKinds.byId.length + 3183 binFKinds.byId.length)); 3184 3185 txtFKinds.rehash; 3186 binFKinds.rehash; 3187 allFKinds.rehash; 3188 } 3189 3190 // Code 3191 3192 // Interpret Command Line 3193 void loadDirKinds() 3194 { 3195 vcDirKinds ~= new DirKind(".git", "Git"); 3196 vcDirKinds ~= new DirKind(".svn", "Subversion (Svn)"); 3197 vcDirKinds ~= new DirKind(".bzr", "Bazaar (Bzr)"); 3198 vcDirKinds ~= new DirKind("RCS", "RCS"); 3199 vcDirKinds ~= new DirKind("CVS", "CVS"); 3200 vcDirKinds ~= new DirKind("MCVS", "MCVS"); 3201 vcDirKinds ~= new DirKind("RCS", "RCS"); 3202 vcDirKinds ~= new DirKind(".hg", "Mercurial (Hg)"); 3203 vcDirKinds ~= new DirKind("SCCS", "SCCS"); 3204 vcDirKinds ~= new DirKind(".wact", "WACT"); 3205 vcDirKinds ~= new DirKind("_MTN", "Monotone"); 3206 vcDirKinds ~= new DirKind("_darcs", "Darcs"); 3207 vcDirKinds ~= new DirKind("{arch}", "Arch"); 3208 3209 skippedDirKinds ~= vcDirKinds; 3210 3211 DirKind[string] vcDirKindsMap_; 3212 foreach (kind; vcDirKinds) 3213 { 3214 vcDirKindsMap[kind.fileName] = kind; 3215 } 3216 vcDirKindsMap.rehash; 3217 3218 skippedDirKinds ~= new DirKind(".trash", "Trash"); 3219 skippedDirKinds ~= new DirKind(".undo", "Undo"); 3220 skippedDirKinds ~= new DirKind(".deps", "Dependencies"); 3221 skippedDirKinds ~= new DirKind(".backups", "Backups"); 3222 skippedDirKinds ~= new DirKind(".autom4te.cache", "Automake Cache"); 3223 3224 foreach (kind; skippedDirKinds) { skippedDirKindsMap[kind.fileName] = kind; } 3225 skippedDirKindsMap.rehash; 3226 } 3227 3228 ScanContext scanContext = ScanContext.standard; 3229 KeyStrictness keyStrictness = KeyStrictness.standard; 3230 3231 bool showNameDups = false; 3232 bool showTreeContentDups = false; 3233 bool showFileContentDups = false; 3234 bool showELFSymbolDups = false; 3235 bool linkContentDups = false; 3236 3237 bool showLinkDups = false; 3238 SymlinkFollowContext followSymlinks = SymlinkFollowContext.external; 3239 bool showBrokenSymlinks = true; 3240 bool showSymlinkCycles = true; 3241 3242 bool showAnyDups = false; 3243 bool showMMaps = false; 3244 bool showUsage = false; 3245 bool showSHA1 = false; 3246 bool showLineCounts = false; 3247 3248 uint64_t noFiles = 0; 3249 uint64_t noRegFiles = 0; 3250 uint64_t noSymlinks = 0; 3251 uint64_t noSpecialFiles = 0; 3252 uint64_t noDirs = 0; 3253 3254 uint64_t noScannedFiles = 0; 3255 uint64_t noScannedRegFiles = 0; 3256 uint64_t noScannedSymlinks = 0; 3257 uint64_t noScannedSpecialFiles = 0; 3258 uint64_t noScannedDirs = 0; 3259 3260 auto shallowDensenessSum = Rational!ulong(0, 1); 3261 auto deepDensenessSum = Rational!ulong(0, 1); 3262 uint64_t densenessCount = 0; 3263 3264 FOp fOp = FOp.none; 3265 3266 bool keyAsWord = false; 3267 bool keyAsSymbol = false; 3268 bool keyAsAcronym = false; 3269 bool keyAsExact = false; 3270 3271 bool showTree = false; 3272 3273 bool useHTML = false; 3274 bool browseOutput = false; 3275 bool collectTypeHits = false; 3276 bool colorFlag = false; 3277 3278 int scanDepth = -1; 3279 3280 bool demangleELF = true; 3281 3282 bool recache = false; 3283 3284 bool useNGrams = false; 3285 3286 PathFormat pathFormat = PathFormat.relative; 3287 3288 DirSorting subsSorting = DirSorting.onTimeLastModified; 3289 BuildType buildType = BuildType.none; 3290 DuplicatesContext duplicatesContext = DuplicatesContext.internal; 3291 3292 Dir[] topDirs; 3293 Dir rootDir; 3294 } 3295 3296 struct Results 3297 { 3298 size_t numTotalHits; // Number of total hits. 3299 size_t numFilesWithHits; // Number of files with hits 3300 Bytes64 noBytesTotal; // Number of bytes total. 3301 Bytes64 noBytesTotalContents; // Number of contents bytes total. 3302 Bytes64 noBytesScanned; // Number of bytes scanned. 3303 Bytes64 noBytesSkipped; // Number of bytes skipped. 3304 Bytes64 noBytesUnreadable; // Number of bytes unreadable. 3305 } 3306 3307 version(cerealed) 3308 { 3309 void grain(T)(ref Cereal cereal, ref SysTime systime) 3310 { 3311 auto stdTime = systime.stdTime; 3312 cereal.grain(stdTime); 3313 if (stdTime != 0) 3314 { 3315 systime = SysTime(stdTime); 3316 } 3317 } 3318 } 3319 3320 /** Directory Sorting Order. */ 3321 enum DirSorting 3322 { 3323 /* onTimeCreated, /\* Windows only. Currently stored in Linux on ext4 but no */ 3324 /* * standard interface exists yet, it will probably be called */ 3325 /* * xstat(). *\/ */ 3326 onTimeLastModified, 3327 onTimeLastAccessed, 3328 onSize, 3329 onNothing, 3330 } 3331 3332 enum BuildType 3333 { 3334 none, // Don't compile 3335 devel, // Compile with debug symbols 3336 release, // Compile without debugs symbols and optimizations 3337 standard = devel, 3338 } 3339 3340 enum PathFormat 3341 { 3342 absolute, 3343 relative, 3344 } 3345 3346 /** Dir. 3347 */ 3348 class Dir : File 3349 { 3350 /** Construct File System Root Directory. */ 3351 this(Dir parent = null, GStats gstats = null) 3352 { 3353 super(parent); 3354 this._gstats = gstats; 3355 if (gstats) { ++gstats.noDirs; } 3356 } 3357 3358 this(string root_path, GStats gstats) 3359 in { assert(root_path == "/"); assert(gstats); } 3360 do 3361 { 3362 auto rootDent = DirEntry(root_path); 3363 Dir rootParent = null; 3364 this(rootDent, rootParent, gstats); 3365 } 3366 3367 this(ref DirEntry dent, Dir parent, GStats gstats) 3368 in { assert(gstats); } 3369 do 3370 { 3371 this(dent.name.baseName, parent, dent.size.Bytes64, dent.timeLastModified, dent.timeLastAccessed, gstats); 3372 } 3373 3374 this(string name, Dir parent, Bytes64 size, SysTime timeLastModified, SysTime timeLastAccessed, 3375 GStats gstats = null) 3376 { 3377 super(name, parent, size, timeLastModified, timeLastAccessed); 3378 this._gstats = gstats; 3379 if (gstats) { ++gstats.noDirs; } 3380 } 3381 3382 override string toTextual() const @property { return "Directory"; } 3383 3384 override Bytes64 treeSize() @property @trusted /* @safe nothrow */ 3385 { 3386 if (_treeSize.isUntouched) 3387 { 3388 _treeSize = (this.size + 3389 reduce!"a+b"(0.Bytes64, 3390 subs.byValue.map!"a.treeSize")); // recurse! 3391 } 3392 return _treeSize.get.bytes; 3393 } 3394 3395 /** Returns: Directory Tree Content Id of `this`. */ 3396 override const(SHA1Digest) treeContentId() @property @trusted /* @safe nothrow */ 3397 { 3398 if (_treeContentId.isUntouched) 3399 { 3400 _treeContentId = subs.byValue.map!"a.treeContentId".sha1Of; // TODO: join loops for calculating treeSize 3401 assert(_treeContentId, "Zero tree content digest"); 3402 if (treeSize() != 0) 3403 { 3404 gstats.filesByContentId[_treeContentId] ~= assumeNotNull(cast(File)this); // TODO: Avoid cast when DMD and NotNull is fixed 3405 } 3406 } 3407 return _treeContentId; 3408 } 3409 3410 override Face!Color face() const @property @safe pure nothrow { return dirFace; } 3411 3412 /** Return true if `this` is a file system root directory. */ 3413 bool isRoot() @property @safe const pure nothrow { return !parent; } 3414 3415 GStats gstats(GStats gstats) @property @safe pure /* nothrow */ { 3416 return this._gstats = gstats; 3417 } 3418 GStats gstats() @property @safe nothrow 3419 { 3420 if (!_gstats && this.parent) 3421 { 3422 _gstats = this.parent.gstats(); 3423 } 3424 return _gstats; 3425 } 3426 3427 /** Returns: Depth of Depth from File System root to this File. */ 3428 override int depth() @property @safe nothrow 3429 { 3430 if (_depth ==- 1) 3431 { 3432 _depth = parent ? parent.depth + 1 : 0; // memoized depth 3433 } 3434 return _depth; 3435 } 3436 3437 /** Scan `this` recursively for a non-diretory file with basename `name`. 3438 TODO: Reuse range based algorithm this.tree(depthFirst|breadFirst) 3439 */ 3440 File find(string name) @property 3441 { 3442 auto subs_ = subs(); 3443 if (name in subs_) 3444 { 3445 auto hit = subs_[name]; 3446 Dir hitDir = cast(Dir)hit; 3447 if (!hitDir) // if not a directory 3448 return hit; 3449 } 3450 else 3451 { 3452 foreach (sub; subs_) 3453 { 3454 Dir subDir = cast(Dir)sub; 3455 if (subDir) 3456 { 3457 auto hit = subDir.find(name); 3458 if (hit) // if not a directory 3459 return hit; 3460 } 3461 } 3462 } 3463 return null; 3464 } 3465 3466 /** Append Tree Statistics. */ 3467 void addTreeStatsFromSub(F)(NotNull!F subFile, ref DirEntry subDent) 3468 { 3469 if (subDent.isFile) 3470 { 3471 /* _treeSize += subDent.size.Bytes64; */ 3472 // dbg("Updating ", _treeSize, " of ", path); 3473 3474 /** TODO: Move these overloads to std.datetime */ 3475 auto ref min(in SysTime a, in SysTime b) @trusted pure nothrow { return (a < b ? a : b); } 3476 auto ref max(in SysTime a, in SysTime b) @trusted pure nothrow { return (a > b ? a : b); } 3477 3478 const lastMod = subDent.timeLastModified; 3479 _timeModifiedInterval = Interval!SysTime(min(lastMod, _timeModifiedInterval.begin), 3480 max(lastMod, _timeModifiedInterval.end)); 3481 const lastAcc = subDent.timeLastAccessed; 3482 _timeAccessedInterval = Interval!SysTime(min(lastAcc, _timeAccessedInterval.begin), 3483 max(lastAcc, _timeAccessedInterval.end)); 3484 } 3485 } 3486 3487 /** Update Statistics for Sub-File `sub` with `subDent` of `this` Dir. */ 3488 void updateStats(F)(NotNull!F subFile, ref DirEntry subDent, bool isRegFile) 3489 { 3490 auto lGS = gstats(); 3491 if (lGS) 3492 { 3493 if (lGS.showNameDups/* && */ 3494 /* !subFile.underAnyDir!(a => a.name in lGS.skippedDirKindsMap) */) 3495 { 3496 lGS.filesByName[subFile.name] ~= cast(NotNull!File)subFile; 3497 } 3498 if (lGS.showLinkDups && 3499 isRegFile) 3500 { 3501 import core.sys.posix.sys.stat; 3502 immutable stat_t stat = subDent.statBuf(); 3503 if (stat.st_nlink >= 2) 3504 { 3505 lGS.filesByInode[stat.st_ino] ~= cast(NotNull!File)subFile; 3506 } 3507 } 3508 } 3509 } 3510 3511 /** Load Contents of `this` Directory from Disk using DirEntries. 3512 Returns: `true` iff Dir was updated (reread) from disk. 3513 */ 3514 bool load(int depth = 0, bool force = false) 3515 { 3516 import std.range: empty; 3517 if (!_obseleteDir && // already loaded 3518 !force) // and not forced reload 3519 { 3520 return false; // signal already scanned 3521 } 3522 3523 // dbg("Zeroing ", _treeSize, " of ", path); 3524 _treeSize.reset; // this.size; 3525 auto oldSubs = _subs; 3526 _subs.reset; 3527 assert(_subs.length == 0); // TODO: Remove when verified 3528 3529 import std.file: dirEntries, SpanMode; 3530 auto entries = dirEntries(path, SpanMode.shallow, false); // false: skip symlinks 3531 foreach (dent; entries) 3532 { 3533 immutable basename = dent.name.baseName; 3534 File sub = null; 3535 if (basename in oldSubs) 3536 { 3537 sub = oldSubs[basename]; // reuse from previous cache 3538 } 3539 else 3540 { 3541 bool isRegFile = false; 3542 if (dent.isSymlink) 3543 { 3544 sub = new Symlink(dent, assumeNotNull(this)); 3545 } 3546 else if (dent.isDir) 3547 { 3548 sub = new Dir(dent, this, gstats); 3549 } 3550 else if (dent.isFile) 3551 { 3552 // TODO: Delay construction of and specific files such as 3553 // CFile, ELFFile, after FKind-recognition has been made. 3554 sub = new RegFile(dent, assumeNotNull(this)); 3555 isRegFile = true; 3556 } 3557 else 3558 { 3559 sub = new SpecFile(dent, assumeNotNull(this)); 3560 } 3561 updateStats(enforceNotNull(sub), dent, isRegFile); 3562 } 3563 auto nnsub = enforceNotNull(sub); 3564 addTreeStatsFromSub(nnsub, dent); 3565 _subs[basename] = nnsub; 3566 } 3567 _subs.rehash; // optimize hash for faster lookups 3568 3569 _obseleteDir = false; 3570 return true; 3571 } 3572 3573 bool reload(int depth = 0) { return load(depth, true); } 3574 alias sync = reload; 3575 3576 /* TODO: Can we get make this const to the outside world perhaps using inout? */ 3577 ref NotNull!File[string] subs() @property { load(); return _subs; } 3578 3579 NotNull!File[] subsSorted(DirSorting sorted = DirSorting.onTimeLastModified) @property 3580 { 3581 load(); 3582 auto ssubs = _subs.values; 3583 /* TODO: Use radix sort to speed things up. */ 3584 final switch (sorted) 3585 { 3586 /* case DirSorting.onTimeCreated: */ 3587 /* break; */ 3588 case DirSorting.onTimeLastModified: 3589 ssubs.sort!((a, b) => (a.timeLastModified > 3590 b.timeLastModified)); 3591 break; 3592 case DirSorting.onTimeLastAccessed: 3593 ssubs.sort!((a, b) => (a.timeLastAccessed > 3594 b.timeLastAccessed)); 3595 break; 3596 case DirSorting.onSize: 3597 ssubs.sort!((a, b) => (a.size > 3598 b.size)); 3599 break; 3600 case DirSorting.onNothing: 3601 break; 3602 } 3603 return ssubs; 3604 } 3605 3606 File sub(Name)(Name sub_name) 3607 { 3608 load(); 3609 return (sub_name in _subs) ? _subs[sub_name] : null; 3610 } 3611 File sub(File sub) 3612 { 3613 load(); 3614 return (sub.path in _subs) != null ? sub : null; 3615 } 3616 3617 version(cerealed) 3618 { 3619 void accept(Cereal cereal) 3620 { 3621 auto stdTime = timeLastModified.stdTime; 3622 cereal.grain(name, size, stdTime); 3623 timeLastModified = SysTime(stdTime); 3624 } 3625 } 3626 version(msgpack) 3627 { 3628 /** Construct from msgpack `unpacker`. */ 3629 this(Unpacker)(ref Unpacker unpacker) 3630 { 3631 fromMsgpack(msgpack.Unpacker(unpacker)); 3632 } 3633 3634 void toMsgpack(Packer)(ref Packer packer) const 3635 { 3636 /* writeln("Entering Dir.toMsgpack ", this.name); */ 3637 packer.pack(name, size, 3638 timeLastModified.stdTime, 3639 timeLastAccessed.stdTime, 3640 kind); 3641 3642 // Contents 3643 /* TODO: serialize map of polymorphic objects using 3644 * packer.packArray(_subs) and type trait lookup up all child-classes of 3645 * File */ 3646 packer.pack(_subs.length); 3647 3648 if (_subs.length >= 1) 3649 { 3650 auto diffsLastModified = _subs.byValue.map!"a.timeLastModified.stdTime".encodeForwardDifference; 3651 auto diffsLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime".encodeForwardDifference; 3652 /* auto timesLastModified = _subs.byValue.map!"a.timeLastModified.stdTime"; */ 3653 /* auto timesLastAccessed = _subs.byValue.map!"a.timeLastAccessed.stdTime"; */ 3654 3655 packer.pack(diffsLastModified, diffsLastAccessed); 3656 3657 /* debug dbg(this.name, " sub.length: ", _subs.length); */ 3658 /* debug dbg(name, " modified diffs: ", diffsLastModified.pack.length); */ 3659 /* debug dbg(name, " accessed diffs: ", diffsLastAccessed.pack.length); */ 3660 /* debug dbg(name, " modified: ", timesLastModified.array.pack.length); */ 3661 /* debug dbg(name, " accessed: ", timesLastAccessed.array.pack.length); */ 3662 } 3663 3664 foreach (sub; _subs) 3665 { 3666 if (const regFile = cast(RegFile)sub) 3667 { 3668 packer.pack("RegFile"); 3669 regFile.toMsgpack(packer); 3670 } 3671 else if (const dir = cast(Dir)sub) 3672 { 3673 packer.pack("Dir"); 3674 dir.toMsgpack(packer); 3675 } 3676 else if (const symlink = cast(Symlink)sub) 3677 { 3678 packer.pack("Symlink"); 3679 symlink.toMsgpack(packer); 3680 } 3681 else if (const special = cast(SpecFile)sub) 3682 { 3683 packer.pack("SpecFile"); 3684 special.toMsgpack(packer); 3685 } 3686 else 3687 { 3688 immutable subClassName = sub.classinfo.name; 3689 assert(0, "Unknown sub File class " ~ subClassName); // TODO: Exception 3690 } 3691 } 3692 } 3693 3694 void fromMsgpack(Unpacker)(auto ref Unpacker unpacker) 3695 { 3696 unpacker.unpack(name, size); 3697 3698 long stdTime; 3699 unpacker.unpack(stdTime); timeLastModified = SysTime(stdTime); // TODO: Functionize 3700 unpacker.unpack(stdTime); timeLastAccessed = SysTime(stdTime); // TODO: Functionize 3701 3702 /* dbg("before:", path, " ", size, " ", timeLastModified, " ", timeLastAccessed); */ 3703 3704 // FKind 3705 if (!kind) { kind = null; } 3706 unpacker.unpack(kind); /* TODO: kind = new DirKind(unpacker); */ 3707 /* dbg("after:", path); */ 3708 3709 _treeSize.reset; // this.size; 3710 3711 // Contents 3712 /* TODO: unpacker.unpack(_subs); */ 3713 immutable noPreviousSubs = _subs.length == 0; 3714 size_t subs_length; unpacker.unpack(subs_length); // TODO: Functionize to unpacker.unpack!size_t() 3715 3716 ForwardDifferenceCode!(long[]) diffsLastModified, 3717 diffsLastAccessed; 3718 if (subs_length >= 1) 3719 { 3720 unpacker.unpack(diffsLastModified, diffsLastAccessed); 3721 /* auto x = diffsLastModified.decodeForwardDifference; */ 3722 } 3723 3724 foreach (ix; 0..subs_length) // repeat for subs_length times 3725 { 3726 string subClassName; unpacker.unpack(subClassName); // TODO: Functionize 3727 File sub = null; 3728 try 3729 { 3730 switch (subClassName) 3731 { 3732 default: 3733 assert(0, "Unknown File parent class " ~ subClassName); // TODO: Exception 3734 case "Dir": 3735 auto subDir = new Dir(this, gstats); 3736 unpacker.unpack(subDir); sub = subDir; 3737 auto subDent = DirEntry(sub.path); 3738 subDir.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3739 addTreeStatsFromSub(assumeNotNull(subDir), subDent); 3740 break; 3741 case "RegFile": 3742 auto subRegFile = new RegFile(assumeNotNull(this)); 3743 unpacker.unpack(subRegFile); sub = subRegFile; 3744 auto subDent = DirEntry(sub.path); 3745 subRegFile.checkObseleted(subDent); // Invalidate Statistics using fresh CStat if needed 3746 updateStats(assumeNotNull(subRegFile), subDent, true); 3747 addTreeStatsFromSub(assumeNotNull(subRegFile), subDent); 3748 break; 3749 case "Symlink": 3750 auto subSymlink = new Symlink(assumeNotNull(this)); 3751 unpacker.unpack(subSymlink); sub = subSymlink; 3752 break; 3753 case "SpecFile": 3754 auto SpecFile = new SpecFile(assumeNotNull(this)); 3755 unpacker.unpack(SpecFile); sub = SpecFile; 3756 break; 3757 } 3758 if (noPreviousSubs || 3759 !(sub.name in _subs)) 3760 { 3761 _subs[sub.name] = enforceNotNull(sub); 3762 } 3763 /* dbg("Unpacked Dir sub ", sub.path, " of type ", subClassName); */ 3764 } catch (FileException) { // this may be a too generic exception 3765 /* dbg(sub.path, " is not accessible anymore"); */ 3766 } 3767 } 3768 3769 } 3770 } 3771 3772 override void makeObselete() @trusted 3773 { 3774 _obseleteDir = true; 3775 _treeSize.reset; 3776 _timeModifiedInterval.reset; 3777 _timeAccessedInterval.reset; 3778 } 3779 override void makeUnObselete() @safe 3780 { 3781 _obseleteDir = false; 3782 } 3783 3784 private NotNull!File[string] _subs; // Directory contents 3785 DirKind kind; // Kind of this directory 3786 uint64_t hitCount = 0; 3787 private int _depth = -1; // Memoized Depth 3788 private bool _obseleteDir = true; // Flags that this is obselete 3789 GStats _gstats = null; 3790 3791 /* TODO: Reuse Span and span in Phobos. (Span!T).init should be (T.max, T.min) */ 3792 Interval!SysTime _timeModifiedInterval; 3793 Interval!SysTime _timeAccessedInterval; 3794 3795 Nullable!(size_t, size_t.max) _treeSize; // Size of tree with this directory as root. 3796 /* TODO: Make this work instead: */ 3797 /* import std.typecons: Nullable; */ 3798 /* Nullable!(Bytes64, Bytes64.max) _treeSize; // Size of tree with this directory as root. */ 3799 3800 SHA1Digest _treeContentId; 3801 } 3802 3803 /** Externally Directory Memoized Calculation of Tree Size. 3804 Is it possible to make get any of @safe pure nothrow? 3805 */ 3806 Bytes64 treeSizeMemoized(NotNull!File file, Bytes64[File] cache) @trusted /* nothrow */ 3807 { 3808 typeof(return) sum = file.size; 3809 if (auto dir = cast(Dir)file) 3810 { 3811 if (file in cache) 3812 { 3813 sum = cache[file]; 3814 } 3815 else 3816 { 3817 foreach (sub; dir.subs.byValue) 3818 { 3819 sum += treeSizeMemoized(sub, cache); 3820 } 3821 cache[file] = sum; 3822 } 3823 } 3824 return sum; 3825 } 3826 3827 /** Save File System Tree Cache under Directory `rootDir`. 3828 Returns: Serialized Byte Array. 3829 */ 3830 const(ubyte[]) saveRootDirTree(Viz viz, 3831 Dir rootDir, string cacheFile) @trusted 3832 { 3833 immutable tic = Clock.currTime; 3834 version(msgpack) 3835 { 3836 const data = rootDir.pack(); 3837 import std.file: write; 3838 } 3839 else version(cerealed) 3840 { 3841 auto enc = new Cerealiser(); // encoder 3842 enc ~= rootDir; 3843 auto data = enc.bytes; 3844 } 3845 else 3846 { 3847 ubyte[] data; 3848 } 3849 cacheFile.write(data); 3850 immutable toc = Clock.currTime; 3851 3852 viz.ppln("Cache Write".asH!2, 3853 "Wrote tree cache of size ", 3854 data.length.Bytes64, " to ", 3855 cacheFile.asPath, 3856 " in ", 3857 shortDurationString(toc - tic)); 3858 3859 return data; 3860 } 3861 3862 /** Load File System Tree Cache from `cacheFile`. 3863 Returns: Root Directory of Loaded Tree. 3864 */ 3865 Dir loadRootDirTree(Viz viz, 3866 string cacheFile, GStats gstats) @trusted 3867 { 3868 immutable tic = Clock.currTime; 3869 3870 import std.file: read; 3871 try 3872 { 3873 const data = read(cacheFile); 3874 3875 auto rootDir = new Dir(cast(Dir)null, gstats); 3876 version(msgpack) 3877 { 3878 unpack(cast(ubyte[])data, rootDir); /* Dir rootDir = new Dir(cast(const(ubyte)[])data); */ 3879 } 3880 immutable toc = Clock.currTime; 3881 3882 viz.pp("Cache Read".asH!2, 3883 "Read cache of size ", 3884 data.length.Bytes64, " from ", 3885 cacheFile.asPath, 3886 " in ", 3887 shortDurationString(toc - tic), " containing", 3888 asUList(asItem(gstats.noDirs, " Dirs,"), 3889 asItem(gstats.noRegFiles, " Regular Files,"), 3890 asItem(gstats.noSymlinks, " Symbolic Links,"), 3891 asItem(gstats.noSpecialFiles, " Special Files,"), 3892 asItem("totalling ", gstats.noFiles + 1, " Files"))); 3893 assert(gstats.noDirs + 3894 gstats.noRegFiles + 3895 gstats.noSymlinks + 3896 gstats.noSpecialFiles == gstats.noFiles + 1); 3897 return rootDir; 3898 } 3899 catch (FileException) 3900 { 3901 viz.ppln("Failed to read cache from ", cacheFile); 3902 return null; 3903 } 3904 } 3905 3906 Dir[] getDirs(NotNull!Dir rootDir, string[] topDirNames) 3907 { 3908 Dir[] topDirs; 3909 foreach (topName; topDirNames) 3910 { 3911 Dir topDir = getDir(rootDir, topName); 3912 3913 if (!topDir) 3914 { 3915 dbg("Directory " ~ topName ~ " is missing"); 3916 } 3917 else 3918 { 3919 topDirs ~= topDir; 3920 } 3921 } 3922 return topDirs; 3923 } 3924 3925 /** (Cached) Lookup of File `filePath`. 3926 */ 3927 File getFile(NotNull!Dir rootDir, string filePath, 3928 bool isDir = false, 3929 bool tolerant = false) @trusted 3930 { 3931 if (isDir) 3932 { 3933 return getDir(rootDir, filePath); 3934 } 3935 else 3936 { 3937 auto parentDir = getDir(rootDir, filePath.dirName); 3938 if (parentDir) 3939 { 3940 auto hit = parentDir.sub(filePath.baseName); 3941 if (hit) 3942 return hit; 3943 else 3944 { 3945 dbg("File path " ~ filePath ~ " doesn't exist. TODO: Query user to instead find it under " 3946 ~ parentDir.path); 3947 parentDir.find(filePath.baseName); 3948 } 3949 } 3950 else 3951 { 3952 dbg("Directory " ~ parentDir.path ~ " doesn't exist"); 3953 } 3954 } 3955 return null; 3956 } 3957 3958 /** (Cached) Lookup of Directory `dirpath`. 3959 Returns: Dir if present under rootDir, null otherwise. 3960 TODO: Make use of dent 3961 */ 3962 import std.path: isRooted; 3963 Dir getDir(NotNull!Dir rootDir, string dirPath, ref DirEntry dent, 3964 ref Symlink[] followedSymlinks) @trusted 3965 in { assert(dirPath.isRooted); } 3966 do 3967 { 3968 Dir currDir = rootDir; 3969 3970 import std.range: drop; 3971 import std.path: pathSplitter; 3972 foreach (part; dirPath.pathSplitter().drop(1)) // all but first 3973 { 3974 auto sub = currDir.sub(part); 3975 if (auto subDir = cast(Dir)sub) 3976 { 3977 currDir = subDir; 3978 } 3979 else if (auto subSymlink = cast(Symlink)sub) 3980 { 3981 auto subDent = DirEntry(subSymlink.absoluteNormalizedTargetPath); 3982 if (subDent.isDir) 3983 { 3984 if (followedSymlinks.find(subSymlink)) 3985 { 3986 dbg("Infinite recursion in ", subSymlink); 3987 return null; 3988 } 3989 followedSymlinks ~= subSymlink; 3990 currDir = getDir(rootDir, subSymlink.absoluteNormalizedTargetPath, subDent, followedSymlinks); // TODO: Check for infinite recursion 3991 } 3992 else 3993 { 3994 dbg("Loaded path " ~ dirPath ~ " is not a directory"); 3995 return null; 3996 } 3997 } 3998 else 3999 { 4000 return null; 4001 } 4002 } 4003 return currDir; 4004 } 4005 4006 /** (Cached) Lookup of Directory `dirPath`. */ 4007 Dir getDir(NotNull!Dir rootDir, string dirPath) @trusted 4008 { 4009 Symlink[] followedSymlinks; 4010 try 4011 { 4012 auto dirDent = DirEntry(dirPath); 4013 return getDir(rootDir, dirPath, dirDent, followedSymlinks); 4014 } 4015 catch (FileException) 4016 { 4017 dbg("Exception getting Dir"); 4018 return null; 4019 } 4020 } 4021 unittest { 4022 /* auto tmp = tempfile("/tmp/fsfile"); */ 4023 } 4024 4025 enum ulong mmfile_size = 0; // 100*1024 4026 4027 auto pageSize() @trusted 4028 { 4029 version(linux) 4030 { 4031 import core.sys.posix.sys.shm: __getpagesize; 4032 return __getpagesize(); 4033 } 4034 else 4035 { 4036 return 4096; 4037 } 4038 } 4039 4040 enum KeyStrictness 4041 { 4042 exact, 4043 acronym, 4044 eitherExactOrAcronym, 4045 standard = eitherExactOrAcronym, 4046 } 4047 4048 /** Language Operator Associativity. */ 4049 enum OpAssoc { none, 4050 LR, // Left-to-Right 4051 RL, // Right-to-Left 4052 } 4053 4054 /** Language Operator Arity. */ 4055 enum OpArity 4056 { 4057 unknown, 4058 unaryPostfix, // 1-arguments 4059 unaryPrefix, // 1-arguments 4060 binary, // 2-arguments 4061 ternary, // 3-arguments 4062 } 4063 4064 /** Language Operator. */ 4065 struct Op 4066 { 4067 this(string op, 4068 OpArity arity = OpArity.unknown, 4069 OpAssoc assoc = OpAssoc.none, 4070 byte prec = -1, 4071 string desc = []) 4072 { 4073 this.op = op; 4074 this.arity = arity; 4075 this.assoc = assoc; 4076 this.prec = prec; 4077 this.desc = desc; 4078 } 4079 /** Make `this` an alias of `opOrig`. */ 4080 Op aliasOf(string opOrig) 4081 { 4082 // TODO: set relation in map from op to opOrig 4083 return this; 4084 } 4085 string op; // Operator. TODO: Optimize this storage using a value type? 4086 string desc; // Description 4087 OpAssoc assoc; // Associativity 4088 ubyte prec; // Precedence 4089 OpArity arity; // Arity 4090 bool overloadable; // Overloadable 4091 } 4092 4093 /** Language Operator Alias. */ 4094 struct OpAlias 4095 { 4096 this(string op, string opOrigin) 4097 { 4098 this.op = op; 4099 this.opOrigin = opOrigin; 4100 } 4101 string op; 4102 string opOrigin; 4103 } 4104 4105 FKind tryLookupKindIn(RegFile regFile, 4106 FKind[SHA1Digest] kindsById) 4107 { 4108 immutable id = regFile._cstat.kindId; 4109 if (id in kindsById) 4110 { 4111 return kindsById[id]; 4112 } 4113 else 4114 { 4115 return null; 4116 } 4117 } 4118 4119 string displayedFilename(AnyFile)(GStats gstats, 4120 AnyFile theFile) @safe pure 4121 { 4122 return ((gstats.pathFormat == PathFormat.relative && 4123 gstats.topDirs.length == 1) ? 4124 "./" ~ theFile.name : 4125 theFile.path); 4126 } 4127 4128 /** File System Scanner. */ 4129 class Scanner(Term) 4130 { 4131 this(string[] args, ref Term term) 4132 { 4133 prepare(args, term); 4134 } 4135 4136 SysTime _currTime; 4137 import std.getopt; 4138 import std.string: toLower, toUpper, startsWith, CaseSensitive; 4139 import std.mmfile; 4140 import std.stdio: writeln, stdout, stderr, stdin, popen; 4141 import std.algorithm: find, count, countUntil, min, splitter; 4142 import std.range: join; 4143 import std.conv: to; 4144 4145 import core.sys.posix.sys.mman; 4146 import core.sys.posix.pwd: passwd, getpwuid_r; 4147 version(linux) 4148 { 4149 // import core.sys.linux.sys.inotify; 4150 import core.sys.linux.sys.xattr; 4151 } 4152 import core.sys.posix.unistd: getuid, getgid; 4153 import std.file: read, FileException, exists, getcwd; 4154 import std.range: retro; 4155 import std.exception: ErrnoException; 4156 import core.sys.posix.sys.stat: stat_t, S_IRUSR, S_IRGRP, S_IROTH; 4157 4158 uint64_t _hitsCountTotal = 0; 4159 4160 Symlink[] _brokenSymlinks; 4161 4162 bool _beVerbose = false; 4163 bool _caseFold = false; 4164 bool _showSkipped = false; 4165 bool listTxtFKinds = false; 4166 bool listBinFKinds = false; 4167 string selFKindNames; 4168 string[] _topDirNames; 4169 string[] addTags; 4170 string[] removeTags; 4171 4172 private 4173 { 4174 GStats gstats = new GStats(); 4175 4176 string _cacheFile = "~/.cache/fs-root.msgpack"; 4177 4178 uid_t _uid; 4179 gid_t _gid; 4180 } 4181 4182 ioFile outFile; 4183 4184 string[] keys; // Keys to scan. 4185 typeof(keys.map!bistogramOverRepresentation) keysBists; 4186 typeof(keys.map!(sparseUIntNGramOverRepresentation!NGramOrder)) keysXGrams; 4187 Bist keysBistsUnion; 4188 XGram keysXGramsUnion; 4189 4190 string selFKindsNote; 4191 4192 void prepare(string[] args, ref Term term) 4193 { 4194 _scanChunkSize = 32*pageSize; 4195 gstats.loadFileKinds; 4196 gstats.loadDirKinds; 4197 4198 bool helpPrinted = getoptEx("FS --- File System Scanning Utility in D.\n" ~ 4199 "Usage: fs { --switches } [KEY]...\n" ~ 4200 "Note that scanning for multiple KEYs is possible.\nIf so hits are highlighted in different colors!\n" ~ 4201 "Sample calls: \n" ~ 4202 " fdo.d --color -d /lib/modules/3.13.0-24-generic/kernel/drivers/staging --browse --duplicates --recache lirc\n" ~ 4203 " fdo.d --color -d /etc -s --tree --usage -l --duplicates stallman\n" 4204 " fdo.d --color -d /etc -d /var --acronym sttccc\n" 4205 " fdo.d --color -d /etc -d /var --acronym dktp\n" 4206 " fdo.d --color -d /etc -d /var --acronym tms sttc prc dtp xsr\n" ~ 4207 " fdo.d --color -d /etc min max delta\n" ~ 4208 " fdo.d --color -d /etc if elif return len --duplicates --sort=onSize\n" ~ 4209 " fdo.d --color -k -d /bin alpha\n" ~ 4210 " fdo.d --color -d /lib -k linus" ~ 4211 " fdo.d --color -d /etc --symbol alpha beta gamma delta" ~ 4212 " fdo.d --color -d /var/spool/postfix/dev " ~ 4213 " fdo.d --color -d /etc alpha" ~ 4214 " fdo.d --color -d ~/Work/dmd --browse xyz --duplicates --do=preprocess", 4215 4216 args, 4217 std.getopt.config.caseInsensitive, 4218 4219 "verbose|v", "\tVerbose", &_beVerbose, 4220 4221 "color|C", "\tColorize Output" ~ defaultDoc(gstats.colorFlag), &gstats.colorFlag, 4222 "types|T", "\tComma separated list (CSV) of file types/kinds to scan" ~ defaultDoc(selFKindNames), &selFKindNames, 4223 "list-textual-kinds", "\tList registered textual types/kinds" ~ defaultDoc(listTxtFKinds), &listTxtFKinds, 4224 "list-binary-kinds", "\tList registered binary types/kinds" ~ defaultDoc(listBinFKinds), &listBinFKinds, 4225 "group-types|G", "\tCollect and group file types found" ~ defaultDoc(gstats.collectTypeHits), &gstats.collectTypeHits, 4226 4227 "i", "\tCase-Fold, Case-Insensitive" ~ defaultDoc(_caseFold), &_caseFold, 4228 "k", "\tShow Skipped Directories and Files" ~ defaultDoc(_showSkipped), &_showSkipped, 4229 "d", "\tRoot Directory(s) of tree(s) to scan, defaulted to current directory" ~ defaultDoc(_topDirNames), &_topDirNames, 4230 "depth", "\tDepth of tree to scan, defaulted to unlimited (-1) depth" ~ defaultDoc(gstats.scanDepth), &gstats.scanDepth, 4231 4232 // Contexts 4233 "context|x", "\tComma Separated List of Contexts. Either: " ~ enumDoc!ScanContext, &gstats.scanContext, 4234 4235 "word|w", "\tSearch for key as a complete Word (A Letter followed by more Letters and Digits)." ~ defaultDoc(gstats.keyAsWord), &gstats.keyAsWord, 4236 "symbol|ident|id|s", "\tSearch for key as a complete Symbol (Identifier)" ~ defaultDoc(gstats.keyAsSymbol), &gstats.keyAsSymbol, 4237 "acronym|a", "\tSearch for key as an acronym (relaxed)" ~ defaultDoc(gstats.keyAsAcronym), &gstats.keyAsAcronym, 4238 "exact", "\tSearch for key only with exact match (strict)" ~ defaultDoc(gstats.keyAsExact), &gstats.keyAsExact, 4239 4240 "name-duplicates|snd", "\tDetect & Show file name duplicates" ~ defaultDoc(gstats.showNameDups), &gstats.showNameDups, 4241 "hardlink-duplicates|inode-duplicates|shd", "\tDetect & Show multiple links to same inode" ~ defaultDoc(gstats.showLinkDups), &gstats.showLinkDups, 4242 "file-content-duplicates|scd", "\tDetect & Show file contents duplicates" ~ defaultDoc(gstats.showFileContentDups), &gstats.showFileContentDups, 4243 "tree-content-duplicates", "\tDetect & Show directory tree contents duplicates" ~ defaultDoc(gstats.showTreeContentDups), &gstats.showTreeContentDups, 4244 4245 "elf-symbol-duplicates", "\tDetect & Show ELF Symbol Duplicates" ~ defaultDoc(gstats.showELFSymbolDups), &gstats.showELFSymbolDups, 4246 4247 "duplicates|D", "\tDetect & Show file name and contents duplicates" ~ defaultDoc(gstats.showAnyDups), &gstats.showAnyDups, 4248 "duplicates-context", "\tDuplicates Detection Context. Either: " ~ enumDoc!DuplicatesContext, &gstats.duplicatesContext, 4249 "hardlink-content-duplicates", "\tConvert all content duplicates into hardlinks (common inode) if they reside on the same file system" ~ defaultDoc(gstats.linkContentDups), &gstats.linkContentDups, 4250 4251 "usage", "\tShow disk usage (tree size) of scanned directories" ~ defaultDoc(gstats.showUsage), &gstats.showUsage, 4252 "count-lines", "\tShow line counts of scanned files" ~ defaultDoc(gstats.showLineCounts), &gstats.showLineCounts, 4253 4254 "sha1", "\tShow SHA1 content digests" ~ defaultDoc(gstats.showSHA1), &gstats.showSHA1, 4255 4256 "mmaps", "\tShow when files are memory mapped (mmaped)" ~ defaultDoc(gstats.showMMaps), &gstats.showMMaps, 4257 4258 "follow-symlinks|f", "\tFollow symbolic links" ~ defaultDoc(gstats.followSymlinks), &gstats.followSymlinks, 4259 "broken-symlinks|l", "\tDetect & Show broken symbolic links (target is non-existing file) " ~ defaultDoc(gstats.showBrokenSymlinks), &gstats.showBrokenSymlinks, 4260 "show-symlink-cycles|l", "\tDetect & Show symbolic links cycles" ~ defaultDoc(gstats.showSymlinkCycles), &gstats.showSymlinkCycles, 4261 4262 "add-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(addTags), &addTags, 4263 "remove-tag", "\tAdd tag string(s) to matching files" ~ defaultDoc(removeTags), &removeTags, 4264 4265 "tree|W", "\tShow Scanned Tree and Followed Symbolic Links" ~ defaultDoc(gstats.showTree), &gstats.showTree, 4266 "sort|S", "\tDirectory contents sorting order. Either: " ~ enumDoc!DirSorting, &gstats.subsSorting, 4267 "build", "\tBuild Source Code. Either: " ~ enumDoc!BuildType, &gstats.buildType, 4268 4269 "path-format", "\tFormat of paths. Either: " ~ enumDoc!PathFormat ~ "." ~ defaultDoc(gstats.pathFormat), &gstats.pathFormat, 4270 4271 "cache-file|F", "\tFile System Tree Cache File" ~ defaultDoc(_cacheFile), &_cacheFile, 4272 "recache", "\tSkip initial load of cache from disk" ~ defaultDoc(gstats.recache), &gstats.recache, 4273 4274 "do", "\tOperation to perform on matching files. Either: " ~ enumDoc!FOp, &gstats.fOp, 4275 4276 "demangle-elf", "\tDemangle ELF files.", &gstats.demangleELF, 4277 4278 "use-ngrams", "\tUse NGrams to cache statistics and thereby speed up search" ~ defaultDoc(gstats.useNGrams), &gstats.useNGrams, 4279 4280 "html|H", "\tFormat output as HTML" ~ defaultDoc(gstats.useHTML), &gstats.useHTML, 4281 "browse|B", ("\tFormat output as HTML to a temporary file" ~ 4282 defaultDoc(_cacheFile) ~ 4283 " and open it with default Web browser" ~ 4284 defaultDoc(gstats.browseOutput)), &gstats.browseOutput, 4285 4286 "author", "\tPrint name of\n"~"\tthe author", 4287 delegate() { writeln("Per Nordlöw"); } 4288 ); 4289 4290 if (gstats.showAnyDups) 4291 { 4292 gstats.showNameDups = true; 4293 gstats.showLinkDups = true; 4294 gstats.showFileContentDups = true; 4295 gstats.showTreeContentDups = true; 4296 gstats.showELFSymbolDups = true; 4297 } 4298 if (helpPrinted) 4299 return; 4300 4301 _cacheFile = std.path.expandTilde(_cacheFile); 4302 4303 if (_topDirNames.empty) 4304 { 4305 _topDirNames = ["."]; 4306 } 4307 if (_topDirNames == ["."]) 4308 { 4309 gstats.pathFormat = PathFormat.relative; 4310 } 4311 else 4312 { 4313 gstats.pathFormat = PathFormat.absolute; 4314 } 4315 foreach (ref topName; _topDirNames) 4316 { 4317 if (topName == ".") 4318 { 4319 topName = topName.absolutePath.buildNormalizedPath; 4320 } 4321 else 4322 { 4323 topName = topName.expandTilde.buildNormalizedPath; 4324 } 4325 } 4326 4327 // Output Handling 4328 if (gstats.browseOutput) 4329 { 4330 gstats.useHTML = true; 4331 immutable ext = gstats.useHTML ? "html" : "results.txt"; 4332 import std.uuid: randomUUID; 4333 outFile = ioFile("/tmp/fs-" ~ randomUUID.toString() ~ 4334 "." ~ ext, 4335 "w"); 4336 /* popen("gnome-open " ~ outFile.name); */ 4337 popen("firefox -new-tab " ~ outFile.name); 4338 } 4339 else 4340 { 4341 outFile = stdout; 4342 } 4343 4344 auto cwd = getcwd(); 4345 4346 foreach (arg; args[1..$]) 4347 { 4348 if (!arg.startsWith("-")) // if argument not a flag 4349 { 4350 keys ~= arg; 4351 } 4352 } 4353 4354 // Calc stats 4355 keysBists = keys.map!bistogramOverRepresentation; 4356 keysXGrams = keys.map!(sparseUIntNGramOverRepresentation!NGramOrder); 4357 keysBistsUnion = reduce!"a | b"(typeof(keysBists.front).init, keysBists); 4358 keysXGramsUnion = reduce!"a + b"(typeof(keysXGrams.front).init, keysXGrams); 4359 4360 auto viz = new Viz(outFile, 4361 &term, 4362 gstats.showTree, 4363 gstats.useHTML ? VizForm.HTML : VizForm.textAsciiDocUTF8, 4364 gstats.colorFlag, 4365 !gstats.useHTML, // only use if HTML 4366 true, // TODO: Only set if in debug mode 4367 ); 4368 4369 if (gstats.useNGrams && 4370 (!keys.empty) && 4371 keysXGramsUnion.empty) 4372 { 4373 gstats.useNGrams = false; 4374 viz.ppln("Keys must be at least of length " ~ 4375 to!string(NGramOrder + 1) ~ 4376 " in order for " ~ 4377 keysXGrams[0].typeName ~ 4378 " to be calculated"); 4379 } 4380 4381 // viz.ppln("<meta http-equiv=\"refresh\" content=\"1\"/>"); // refresh every second 4382 4383 if (selFKindNames) 4384 { 4385 foreach (lang; selFKindNames.splitterASCIIAmong!(",")) 4386 { 4387 if (lang in gstats.allFKinds.byName) // try exact match 4388 { 4389 gstats.selFKinds ~= gstats.allFKinds.byName[lang]; 4390 } 4391 else if (lang.toLower in gstats.allFKinds.byName) // else try all in lower case 4392 { 4393 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toLower]; 4394 } 4395 else if (lang.toUpper in gstats.allFKinds.byName) // else try all in upper case 4396 { 4397 gstats.selFKinds ~= gstats.allFKinds.byName[lang.toUpper]; 4398 } 4399 else 4400 { 4401 writeln("warning: Language ", lang, " not registered"); 4402 } 4403 } 4404 if (gstats.selFKinds.byIndex.empty) 4405 { 4406 writeln("warning: None of the languages ", to!string(selFKindNames), " are registered. Defaulting to all file types."); 4407 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4408 } 4409 else 4410 { 4411 gstats.selFKinds.rehash; 4412 } 4413 } 4414 else 4415 { 4416 gstats.selFKinds = gstats.allFKinds; // just reuse allFKinds 4417 } 4418 4419 // Keys 4420 auto commaedKeys = keys.joiner(","); 4421 const keysPluralExt = keys.length >= 2 ? "s" : ""; 4422 string commaedKeysString = to!string(commaedKeys); 4423 if (keys) 4424 { 4425 selFKindsNote = " in " ~ (gstats.selFKinds == gstats.allFKinds ? 4426 "all " : 4427 gstats.selFKinds.byIndex.map!(a => a.kindName).join(",") ~ "-") ~ "files"; 4428 immutable underNote = " under \"" ~ (_topDirNames.reduce!"a ~ ',' ~ b") ~ "\""; 4429 const exactNote = gstats.keyAsExact ? "exact " : ""; 4430 string asNote; 4431 if (gstats.keyAsAcronym) 4432 { 4433 asNote = (" as " ~ exactNote ~ 4434 (gstats.keyAsWord ? "word" : "symbol") ~ 4435 " acronym" ~ keysPluralExt); 4436 } 4437 else if (gstats.keyAsSymbol) 4438 { 4439 asNote = " as " ~ exactNote ~ "symbol" ~ keysPluralExt; 4440 } 4441 else if (gstats.keyAsWord) 4442 { 4443 asNote = " as " ~ exactNote ~ "word" ~ keysPluralExt; 4444 } 4445 else 4446 { 4447 asNote = ""; 4448 } 4449 4450 const title = ("Searching for \"" ~ commaedKeysString ~ "\"" ~ 4451 " case-" ~ (_caseFold ? "in" : "") ~"sensitively" 4452 ~asNote ~selFKindsNote ~underNote); 4453 if (viz.form == VizForm.HTML) // only needed for HTML output 4454 { 4455 viz.ppln(faze(title, titleFace)); 4456 } 4457 4458 viz.pp(asH!1("Searching for \"", commaedKeysString, "\"", 4459 " case-", (_caseFold ? "in" : ""), "sensitively", 4460 asNote, selFKindsNote, 4461 " under ", _topDirNames.map!(a => a.asPath))); 4462 } 4463 4464 if (listTxtFKinds) 4465 { 4466 viz.pp("Textual (Source) Kinds".asH!2, 4467 gstats.txtFKinds.byIndex.asTable); 4468 } 4469 4470 if (listBinFKinds) 4471 { 4472 viz.pp("Binary Kinds".asH!2, 4473 gstats.binFKinds.byIndex.asTable); 4474 } 4475 4476 /* binFKinds.asTable, */ 4477 4478 if (_showSkipped) 4479 { 4480 viz.pp("Skipping files of type".asH!2, 4481 asUList(gstats.binFKinds.byIndex.map!(a => asItem(a.kindName.asBold, 4482 ": ", 4483 asCSL(a.exts.map!(b => b.asCode)))))); 4484 viz.pp("Skipping directories of type".asH!2, 4485 asUList(gstats.skippedDirKinds.map!(a => asItem(a.kindName.asBold, 4486 ": ", 4487 a.fileName.asCode)))); 4488 } 4489 4490 // if (key && key == key.toLower()) { // if search key is all lowercase 4491 // _caseFold = true; // we do case-insensitive search like in Emacs 4492 // } 4493 4494 _uid = getuid; 4495 _gid = getgid; 4496 4497 // Setup root directory 4498 if (!gstats.recache) 4499 { 4500 GC.disable; 4501 gstats.rootDir = loadRootDirTree(viz, _cacheFile, gstats); 4502 GC.enable; 4503 } 4504 if (!gstats.rootDir) // if first time 4505 { 4506 gstats.rootDir = new Dir("/", gstats); // filesystem root directory. TODO: Make this uncopyable? 4507 } 4508 4509 // Scan for exact key match 4510 gstats.topDirs = getDirs(enforceNotNull(gstats.rootDir), _topDirNames); 4511 4512 _currTime = Clock.currTime; 4513 4514 GC.disable; 4515 scanTopDirs(viz, commaedKeysString); 4516 GC.enable; 4517 4518 GC.disable; 4519 saveRootDirTree(viz, gstats.rootDir, _cacheFile); 4520 GC.enable; 4521 4522 // Print statistics 4523 showStats(viz); 4524 } 4525 4526 void scanTopDirs(Viz viz, 4527 string commaedKeysString) 4528 { 4529 viz.pp("Results".asH!2); 4530 if (gstats.topDirs) 4531 { 4532 foreach (topIndex, topDir; gstats.topDirs) 4533 { 4534 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4535 if (ctrlC) 4536 { 4537 auto restDirs = gstats.topDirs[topIndex + 1..$]; 4538 if (!restDirs.empty) 4539 { 4540 debug dbg("Ctrl-C pressed: Skipping search of " ~ to!string(restDirs)); 4541 break; 4542 } 4543 } 4544 } 4545 4546 viz.pp("Summary".asH!2); 4547 4548 if ((gstats.noScannedFiles - gstats.noScannedDirs) == 0) 4549 { 4550 viz.ppln("No files with any content found"); 4551 } 4552 else 4553 { 4554 // Scan for acronym key match 4555 if (keys && _hitsCountTotal == 0) // if keys given but no hit found 4556 { 4557 auto keysString = (keys.length >= 2 ? "s" : "") ~ " \"" ~ commaedKeysString; 4558 if (gstats.keyAsAcronym) 4559 { 4560 viz.ppln(("No acronym matches for key" ~ keysString ~ `"` ~ 4561 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4562 " found in files of type")); 4563 } 4564 else if (!gstats.keyAsExact) 4565 { 4566 viz.ppln(("No exact matches for key" ~ keysString ~ `"` ~ 4567 (gstats.keyAsSymbol ? " as symbol" : "") ~ 4568 " found" ~ selFKindsNote ~ 4569 ". Relaxing scan to" ~ (gstats.keyAsSymbol ? " symbol" : "") ~ " acronym match.")); 4570 gstats.keyAsAcronym = true; 4571 4572 foreach (topDir; gstats.topDirs) 4573 { 4574 scanDir(viz, assumeNotNull(topDir), assumeNotNull(topDir), keys); 4575 } 4576 } 4577 } 4578 } 4579 } 4580 4581 assert(gstats.noScannedDirs + 4582 gstats.noScannedRegFiles + 4583 gstats.noScannedSymlinks + 4584 gstats.noScannedSpecialFiles == gstats.noScannedFiles); 4585 } 4586 4587 version(linux) 4588 { 4589 @trusted bool readable(in stat_t stat, uid_t uid, gid_t gid, ref string msg) 4590 { 4591 immutable mode = stat.st_mode; 4592 immutable ok = ((stat.st_uid == uid) && (mode & S_IRUSR) || 4593 (stat.st_gid == gid) && (mode & S_IRGRP) || 4594 (mode & S_IROTH)); 4595 if (!ok) 4596 { 4597 msg = " is not readable by you, but only by"; 4598 bool can = false; // someone can access 4599 if (mode & S_IRUSR) 4600 { 4601 can = true; 4602 msg ~= " user id " ~ to!string(stat.st_uid); 4603 4604 // Lookup user name from user id 4605 passwd pw; 4606 passwd* pw_ret; 4607 immutable size_t bufsize = 16384; 4608 char* buf = cast(char*)core.stdc.stdlib.malloc(bufsize); 4609 getpwuid_r(stat.st_uid, &pw, buf, bufsize, &pw_ret); 4610 if (pw_ret != null) 4611 { 4612 string userName; 4613 { 4614 size_t n = 0; 4615 while (pw.pw_name[n] != 0) 4616 { 4617 userName ~= pw.pw_name[n]; 4618 n++; 4619 } 4620 } 4621 msg ~= " (" ~ userName ~ ")"; 4622 4623 // string realName; 4624 // { 4625 // size_t n = 0; 4626 // while (pw.pw_gecos[n] != 0) 4627 // { 4628 // realName ~= pw.pw_gecos[n]; 4629 // n++; 4630 // } 4631 // } 4632 } 4633 core.stdc.stdlib.free(buf); 4634 4635 } 4636 if (mode & S_IRGRP) 4637 { 4638 can = true; 4639 if (msg != "") 4640 { 4641 msg ~= " or"; 4642 } 4643 msg ~= " group id " ~ to!string(stat.st_gid); 4644 } 4645 if (!can) 4646 { 4647 msg ~= " root"; 4648 } 4649 } 4650 return ok; 4651 } 4652 } 4653 4654 Results results; 4655 4656 void handleError(F)(Viz viz, 4657 NotNull!F file, bool isDir, size_t subIndex) 4658 { 4659 auto dent = DirEntry(file.path); 4660 immutable stat_t stat = dent.statBuf; 4661 string msg; 4662 if (!readable(stat, _uid, _gid, msg)) 4663 { 4664 results.noBytesUnreadable += dent.size; 4665 if (_showSkipped) 4666 { 4667 if (gstats.showTree) 4668 { 4669 auto parentDir = file.parent; 4670 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4671 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4672 } 4673 viz.ppln(file, 4674 ": ", isDir ? "Directory" : "File", 4675 faze(msg, warnFace)); 4676 } 4677 } 4678 } 4679 4680 void printSkipped(Viz viz, 4681 NotNull!RegFile regFile, 4682 size_t subIndex, 4683 const NotNull!FKind kind, KindHit kindhit, 4684 const string skipCause) 4685 { 4686 auto parentDir = regFile.parent; 4687 if (_showSkipped) 4688 { 4689 if (gstats.showTree) 4690 { 4691 immutable intro = subIndex == parentDir.subs.length - 1 ? "└" : "├"; 4692 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ intro ~ "─ "); 4693 } 4694 viz.pp(horizontalRuler, 4695 asH!3(regFile, 4696 ": Skipped ", kind, " file", 4697 skipCause)); 4698 } 4699 } 4700 4701 size_t _scanChunkSize; 4702 4703 KindHit isSelectedFKind(NotNull!RegFile regFile) @safe /* nothrow */ 4704 { 4705 typeof(return) kindHit = KindHit.none; 4706 FKind hitKind; 4707 4708 // Try cached kind first 4709 // First Try with kindId as try 4710 if (regFile._cstat.kindId.defined) // kindId is already defined and uptodate 4711 { 4712 if (regFile._cstat.kindId in gstats.selFKinds.byId) 4713 { 4714 hitKind = gstats.selFKinds.byId[regFile._cstat.kindId]; 4715 kindHit = KindHit.cached; 4716 return kindHit; 4717 } 4718 } 4719 4720 immutable ext = regFile.realExtension; 4721 4722 // Try with hash table first 4723 if (!ext.empty && // if file has extension and 4724 ext in gstats.selFKinds.byExt) // and extensions may match specified included files 4725 { 4726 auto possibleKinds = gstats.selFKinds.byExt[ext]; 4727 foreach (kind; possibleKinds) 4728 { 4729 auto nnKind = enforceNotNull(kind); 4730 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4731 if (hit) 4732 { 4733 hitKind = nnKind; 4734 kindHit = hit; 4735 break; 4736 } 4737 } 4738 } 4739 4740 if (!hitKind) // if no hit yet 4741 { 4742 // blindly try the rest 4743 foreach (kind; gstats.selFKinds.byIndex) 4744 { 4745 auto nnKind = enforceNotNull(kind); 4746 immutable hit = regFile.ofKind(nnKind, gstats.collectTypeHits, gstats.allFKinds); 4747 if (hit) 4748 { 4749 hitKind = nnKind; 4750 kindHit = hit; 4751 break; 4752 } 4753 } 4754 } 4755 4756 return kindHit; 4757 } 4758 4759 /** Search for Keys `keys` in Source `src`. 4760 */ 4761 size_t scanForKeys(Source, Keys)(Viz viz, 4762 NotNull!Dir topDir, 4763 NotNull!File theFile, 4764 NotNull!Dir parentDir, 4765 ref Symlink[] fromSymlinks, 4766 in Source src, 4767 in Keys keys, 4768 in bool[] bistHits = [], 4769 ScanContext ctx = ScanContext.standard) 4770 { 4771 bool anyFileHit = false; // will become true if any hit in this file 4772 4773 typeof(return) hitCount = 0; 4774 4775 import std.ascii: newline; 4776 4777 auto thisFace = stdFace; 4778 if (gstats.colorFlag) 4779 { 4780 if (ScanContext.fileName) 4781 { 4782 thisFace = fileFace; 4783 } 4784 } 4785 4786 size_t nL = 0; // line counter 4787 foreach (line; src.splitterASCIIAmong!(newline)) 4788 { 4789 auto rest = cast(string)line; // rest of line as a string 4790 4791 bool anyLineHit = false; // will become true if any hit on current line 4792 // Hit search loop 4793 while (!rest.empty) 4794 { 4795 // Find any key 4796 4797 /* TODO: Convert these to a range. */ 4798 ptrdiff_t offKB = -1; 4799 ptrdiff_t offKE = -1; 4800 4801 foreach (uint ix, key; keys) // TODO: Call variadic-find instead to speed things up. 4802 { 4803 /* Bistogram Discardal */ 4804 if ((!bistHits.empty) && 4805 !bistHits[ix]) // if neither exact nor acronym match possible 4806 { 4807 continue; // try next key 4808 } 4809 4810 /* dbg("key:", key, " line:", line); */ 4811 ptrdiff_t[] acronymOffsets; 4812 if (gstats.keyAsAcronym) // acronym search 4813 { 4814 auto hit = (cast(immutable ubyte[])rest).findAcronymAt(key, 4815 gstats.keyAsSymbol ? FindContext.inSymbol : FindContext.inWord); 4816 if (!hit[0].empty) 4817 { 4818 acronymOffsets = hit[1]; 4819 offKB = hit[1][0]; 4820 offKE = hit[1][$-1] + 1; 4821 } 4822 } 4823 else 4824 { // normal search 4825 import std.string: indexOf; 4826 offKB = rest.indexOf(key, 4827 _caseFold ? CaseSensitive.no : CaseSensitive.yes); // hit begin offset 4828 offKE = offKB + key.length; // hit end offset 4829 } 4830 4831 if (offKB >= 0) // if hit 4832 { 4833 if (!gstats.showTree && ctx == ScanContext.fileName) 4834 { 4835 viz.pp(parentDir, dirSeparator); 4836 } 4837 4838 // Check Context 4839 if ((gstats.keyAsSymbol && !isSymbolASCII(rest, offKB, offKE)) || 4840 (gstats.keyAsWord && !isWordASCII (rest, offKB, offKE))) 4841 { 4842 rest = rest[offKE..$]; // move forward in line 4843 continue; 4844 } 4845 4846 if (ctx == ScanContext.fileContent && 4847 !anyLineHit) // if this is first hit 4848 { 4849 if (viz.form == VizForm.HTML) 4850 { 4851 if (!anyFileHit) 4852 { 4853 viz.pp(horizontalRuler, 4854 displayedFilename(gstats, theFile).asPath.asH!3); 4855 viz.ppTagOpen(`table`, `border=1`); 4856 anyFileHit = true; 4857 } 4858 } 4859 else 4860 { 4861 if (gstats.showTree) 4862 { 4863 viz.pp("│ ".repeat(parentDir.depth + 1).join("") ~ "├" ~ "─ "); 4864 } 4865 else 4866 { 4867 foreach (fromSymlink; fromSymlinks) 4868 { 4869 viz.pp(fromSymlink, 4870 " modified ", 4871 faze(shortDurationString(_currTime - fromSymlink.timeLastModified), 4872 timeFace), 4873 " ago", 4874 " -> "); 4875 } 4876 // show file path/name 4877 viz.pp(displayedFilename(gstats, theFile).asPath); // show path 4878 } 4879 } 4880 4881 // show line:column 4882 if (viz.form == VizForm.HTML) 4883 { 4884 viz.ppTagOpen("tr"); 4885 viz.pp(to!string(nL+1).asCell, 4886 to!string(offKB+1).asCell); 4887 viz.ppTagOpen("td"); 4888 viz.ppTagOpen("code"); 4889 } 4890 else 4891 { 4892 viz.pp(faze(":" ~ to!string(nL+1) ~ ":" ~ to!string(offKB+1) ~ ":", 4893 contextFace)); 4894 } 4895 anyLineHit = true; 4896 } 4897 4898 // show content prefix 4899 viz.pp(faze(to!string(rest[0..offKB]), thisFace)); 4900 4901 // show hit part 4902 if (!acronymOffsets.empty) 4903 { 4904 foreach (aIndex, currOff; acronymOffsets) // TODO: Reuse std.algorithm: zip or lockstep? Or create a new kind say named conv. 4905 { 4906 // context before 4907 if (aIndex >= 1) 4908 { 4909 immutable prevOff = acronymOffsets[aIndex-1]; 4910 if (prevOff + 1 < currOff) // at least one letter in between 4911 { 4912 viz.pp(asCtx(ix, to!string(rest[prevOff + 1 .. currOff]))); 4913 } 4914 } 4915 // hit letter 4916 viz.pp(asHit(ix, to!string(rest[currOff]))); 4917 } 4918 } 4919 else 4920 { 4921 viz.pp(asHit(ix, to!string(rest[offKB..offKE]))); 4922 } 4923 4924 rest = rest[offKE..$]; // move forward in line 4925 4926 hitCount++; // increase hit count 4927 parentDir.hitCount++; 4928 _hitsCountTotal++; 4929 4930 goto foundHit; 4931 } 4932 } 4933 foundHit: 4934 if (offKB == -1) { break; } 4935 } 4936 4937 // finalize line 4938 if (anyLineHit) 4939 { 4940 // show final context suffix 4941 viz.ppln(faze(rest, thisFace)); 4942 if (viz.form == VizForm.HTML) 4943 { 4944 viz.ppTagClose("code"); 4945 viz.ppTagClose("td"); 4946 viz.pplnTagClose("tr"); 4947 } 4948 } 4949 nL++; 4950 } 4951 4952 if (gstats.showLineCounts) 4953 { 4954 gstats.lineCountsByFile[theFile] = nL; 4955 } 4956 4957 if (anyFileHit) 4958 { 4959 viz.pplnTagClose("table"); 4960 } 4961 4962 // Previous solution 4963 // version(none) 4964 // { 4965 // ptrdiff_t offHit = 0; 4966 // foreach (ix, key; keys) 4967 // { 4968 // scope immutable hit1 = src.find(key); // single key hit 4969 // offHit = hit1.ptr - src.ptr; 4970 // if (!hit1.empty) 4971 // { 4972 // scope immutable src0 = src[0..offHit]; // src beforce hi 4973 // immutable rowHit = count(src0, newline); 4974 // immutable colHit = src0.retro.countUntil(newline); // count backwards till beginning of rowHit 4975 // immutable offBOL = offHit - colHit; 4976 // immutable cntEOL = src[offHit..$].countUntil(newline); // count forwards to end of rowHit 4977 // immutable offEOL = (cntEOL == -1 ? // if no hit 4978 // src.length : // end of file 4979 // offHit + cntEOL); // normal case 4980 // viz.pp(faze(asPath(gstats.useHTML, dent.name), pathFace)); 4981 // viz.ppln(":", rowHit + 1, 4982 // ":", colHit + 1, 4983 // ":", cast(string)src[offBOL..offEOL]); 4984 // } 4985 // } 4986 // } 4987 4988 // switch (keys.length) 4989 // { 4990 // default: 4991 // break; 4992 // case 0: 4993 // break; 4994 // case 1: 4995 // immutable hit1 = src.find(keys[0]); 4996 // if (!hit1.empty) 4997 // { 4998 // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit1.length); 4999 // } 5000 // break; 5001 // // case 2: 5002 // // immutable hit2 = src.find(keys[0], keys[1]); // find two keys 5003 // // if (!hit2[0].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]), ":1: HIT offset: ", hit2[0].length); } 5004 // // if (!hit2[1].empty) { viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit2[1].length); } 5005 // // break; 5006 // // case 3: 5007 // // immutable hit3 = src.find(keys[0], keys[1], keys[2]); // find two keys 5008 // // if (!hit3.empty) 5009 // { 5010 // // viz.ppln(asPath(gstats.useHTML, dent.name[2..$]) , ":1: HIT offset: ", hit1.length); 5011 // // } 5012 // // break; 5013 // } 5014 return hitCount; 5015 } 5016 5017 /** Process Regular File `theRegFile`. */ 5018 void processRegFile(Viz viz, 5019 NotNull!Dir topDir, 5020 NotNull!RegFile theRegFile, 5021 NotNull!Dir parentDir, 5022 const string[] keys, 5023 ref Symlink[] fromSymlinks, 5024 size_t subIndex, 5025 GStats gstats) 5026 { 5027 scanRegFile(viz, 5028 topDir, 5029 theRegFile, 5030 parentDir, 5031 keys, 5032 fromSymlinks, 5033 subIndex); 5034 5035 // check for operations 5036 // TODO: Reuse isSelectedFKind instead of this 5037 immutable ext = theRegFile.realExtension; 5038 if (ext in gstats.selFKinds.byExt) 5039 { 5040 auto matchingFKinds = gstats.selFKinds.byExt[ext]; 5041 foreach (kind; matchingFKinds) 5042 { 5043 const hit = kind.operations.find!(a => a[0] == gstats.fOp); 5044 if (!hit.empty) 5045 { 5046 const fOp = hit.front; 5047 const cmd = fOp[1]; // command string 5048 import std.process: spawnProcess; 5049 import std.algorithm: splitter; 5050 dbg("TODO: Performing operation ", to!string(cmd), 5051 " on ", theRegFile.path, 5052 " by calling it using ", cmd); 5053 auto pid = spawnProcess(cmd.splitterASCIIAmong!(" ").array ~ [theRegFile.path]); 5054 } 5055 } 5056 } 5057 } 5058 5059 /** Scan `elfFile` for ELF Symbols. */ 5060 void scanELFFile(Viz viz, 5061 NotNull!RegFile elfFile, 5062 const string[] keys, 5063 GStats gstats) 5064 { 5065 import nxt.elfdoc: sectionNameExplanations; 5066 /* TODO: Add mouse hovering help for sectionNameExplanations[section] */ 5067 dbg("before: ", elfFile); 5068 ELF decoder = ELF.fromFile(elfFile._mmfile); 5069 dbg("after: ", elfFile); 5070 5071 /* foreach (section; decoder.sections) */ 5072 /* { */ 5073 /* if (section.name.length) */ 5074 /* { */ 5075 /* /\* auto sst = section.StringTable; *\/ */ 5076 /* //writeln("ELF Section named ", section.name); */ 5077 /* } */ 5078 /* } */ 5079 5080 /* const sectionNames = [".symtab"/\* , ".strtab", ".dynsym" *\/]; // TODO: These two other sections causes range exceptions. */ 5081 /* foreach (sectionName; sectionNames) */ 5082 /* { */ 5083 /* auto sts = decoder.getSection(sectionName); */ 5084 /* if (!sts.isNull) */ 5085 /* { */ 5086 /* SymbolTable symtab = SymbolTable(sts); */ 5087 /* // TODO: Use range: auto symbolsDemangled = symtab.symbols.map!(sym => demangler(sym.name).decodeSymbol); */ 5088 /* foreach (sym; symtab.symbols) // you can add filters here */ 5089 /* { */ 5090 /* if (gstats.demangleELF) */ 5091 /* { */ 5092 /* const hit = demangler(sym.name).decodeSymbol; */ 5093 /* } */ 5094 /* else */ 5095 /* { */ 5096 /* writeln("?: ", sym.name); */ 5097 /* } */ 5098 /* } */ 5099 /* } */ 5100 /* } */ 5101 5102 auto sst = decoder.getSymbolsStringTable; 5103 if (!sst.isNull) 5104 { 5105 import nxt.algorithm_ex: findFirstOfAnyInOrder; 5106 import std.range : tee; 5107 5108 auto scan = (sst.strings 5109 .filter!(raw => !raw.empty) // skip empty raw string 5110 .tee!(raw => gstats.elfFilesBySymbol[raw.idup] ~= elfFile) // WARNING: needs raw.idup here because we can't rever to raw 5111 .map!(raw => demangler(raw).decodeSymbol) 5112 .filter!(demangling => (!keys.empty && // don't show anything if no keys given 5113 demangling.unmangled.findFirstOfAnyInOrder(keys)[1]))); // I love D :) 5114 5115 if (!scan.empty && 5116 `ELF` in gstats.selFKinds.byName) // if user selected ELF file show them 5117 { 5118 viz.pp(horizontalRuler, 5119 displayedFilename(gstats, elfFile).asPath.asH!3, 5120 asH!4(`ELF Symbol Strings Table (`, `.strtab`.asCode, `)`), 5121 scan.asTable); 5122 } 5123 } 5124 } 5125 5126 /** Search for Keys `keys` in Regular File `theRegFile`. */ 5127 void scanRegFile(Viz viz, 5128 NotNull!Dir topDir, 5129 NotNull!RegFile theRegFile, 5130 NotNull!Dir parentDir, 5131 const string[] keys, 5132 ref Symlink[] fromSymlinks, 5133 size_t subIndex) 5134 { 5135 results.noBytesTotal += theRegFile.size; 5136 results.noBytesTotalContents += theRegFile.size; 5137 5138 // Scan name 5139 if ((gstats.scanContext == ScanContext.all || 5140 gstats.scanContext == ScanContext.fileName || 5141 gstats.scanContext == ScanContext.regularFilename) && 5142 !keys.empty) 5143 { 5144 immutable hitCountInName = scanForKeys(viz, 5145 topDir, cast(NotNull!File)theRegFile, parentDir, 5146 fromSymlinks, 5147 theRegFile.name, keys, [], ScanContext.fileName); 5148 } 5149 5150 // Scan Contents 5151 if ((gstats.scanContext == ScanContext.all || 5152 gstats.scanContext == ScanContext.fileContent) && 5153 (gstats.showFileContentDups || 5154 gstats.showELFSymbolDups || 5155 !keys.empty) && 5156 theRegFile.size != 0) // non-empty file 5157 { 5158 // immutable upTo = size_t.max; 5159 5160 // TODO: Flag for readText 5161 try 5162 { 5163 ++gstats.noScannedRegFiles; 5164 ++gstats.noScannedFiles; 5165 5166 // ELF Symbols 5167 if (gstats.showELFSymbolDups && 5168 theRegFile.ofKind(`ELF`, gstats.collectTypeHits, gstats.allFKinds)) 5169 { 5170 scanELFFile(viz, theRegFile, keys, gstats); 5171 } 5172 5173 // Check included kinds first because they are fast. 5174 KindHit incKindHit = isSelectedFKind(theRegFile); 5175 if (!gstats.selFKinds.byIndex.empty && // TODO: Do we really need this one? 5176 !incKindHit) 5177 { 5178 return; 5179 } 5180 5181 // Super-Fast Key-File Bistogram Discardal. TODO: Trim scale factor to optimal value. 5182 enum minFileSize = 256; // minimum size of file for discardal. 5183 immutable bool doBist = theRegFile.size > minFileSize; 5184 immutable bool doNGram = (gstats.useNGrams && 5185 (!gstats.keyAsSymbol) && 5186 theRegFile.size > minFileSize); 5187 immutable bool doBitStatus = true; 5188 5189 // Chunked Calculation of CStat in one pass. TODO: call async. 5190 theRegFile.calculateCStatInChunks(gstats.filesByContentId, 5191 _scanChunkSize, 5192 gstats.showFileContentDups, 5193 doBist, 5194 doBitStatus); 5195 5196 // Match Bist of Keys with BistX of File 5197 bool[] bistHits; 5198 bool noBistMatch = false; 5199 if (doBist) 5200 { 5201 const theHist = theRegFile.bistogram8; 5202 auto hitsHist = keysBists.map!(a => 5203 ((a.value & theHist.value) == 5204 a.value)); // TODO: Functionize to x.subsetOf(y) or reuse std.algorithm: setDifference or similar 5205 bistHits = hitsHist.map!`a == true`.array; 5206 noBistMatch = hitsHist.all!`a == false`; 5207 } 5208 /* int kix = 0; */ 5209 /* foreach (hit; bistHits) { if (!hit) { debug dbg(`Assert key ` ~ keys[kix] ~ ` not in file ` ~ theRegFile.path); } ++kix; } */ 5210 5211 bool allXGramsMiss = false; 5212 if (doNGram) 5213 { 5214 ulong keysXGramUnionMatch = keysXGramsUnion.matchDenser(theRegFile.xgram); 5215 debug dbg(theRegFile.path, 5216 ` sized `, theRegFile.size, ` : `, 5217 keysXGramsUnion.length, `, `, 5218 theRegFile.xgram.length, 5219 ` gave match:`, keysXGramUnionMatch); 5220 allXGramsMiss = keysXGramUnionMatch == 0; 5221 } 5222 5223 auto binHit = theRegFile.ofAnyKindIn(gstats.binFKinds, 5224 gstats.collectTypeHits); 5225 const binKindHit = binHit[0]; 5226 if (binKindHit) 5227 { 5228 import nxt.numerals: toOrdinal; 5229 const nnKind = binHit[1].enforceNotNull; 5230 const kindIndex = binHit[2]; 5231 if (_showSkipped) 5232 { 5233 if (gstats.showTree) 5234 { 5235 immutable intro = subIndex == parentDir.subs.length - 1 ? `└` : `├`; 5236 viz.pp(`│ `.repeat(parentDir.depth + 1).join(``) ~ intro ~ `─ `); 5237 } 5238 viz.ppln(theRegFile, `: Skipped `, nnKind, ` file at `, 5239 toOrdinal(kindIndex + 1), ` blind try`); 5240 } 5241 final switch (binKindHit) 5242 { 5243 case KindHit.none: 5244 break; 5245 case KindHit.cached: 5246 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5247 ` using cached KindId`); 5248 break; 5249 case KindHit.uncached: 5250 printSkipped(viz, theRegFile, subIndex, nnKind, binKindHit, 5251 ` at ` ~ toOrdinal(kindIndex + 1) ~ ` extension try`); 5252 break; 5253 } 5254 } 5255 5256 if (binKindHit != KindHit.none || 5257 noBistMatch || 5258 allXGramsMiss) // or no hits possible. TODO: Maybe more efficient to do histogram discardal first 5259 { 5260 results.noBytesSkipped += theRegFile.size; 5261 } 5262 else 5263 { 5264 // Search if not Binary 5265 5266 // If Source file is ok 5267 auto src = theRegFile.readOnlyContents[]; 5268 5269 results.noBytesScanned += theRegFile.size; 5270 5271 if (keys) 5272 { 5273 // Fast discardal of files with no match 5274 bool fastOk = true; 5275 if (!_caseFold) { // if no relaxation of search 5276 if (gstats.keyAsAcronym) // if no relaxation of search 5277 { 5278 /* TODO: Reuse findAcronym in algorith_ex. */ 5279 } 5280 else // if no relaxation of search 5281 { 5282 switch (keys.length) 5283 { 5284 default: break; 5285 case 1: immutable hit1 = src.find(keys[0]); fastOk = !hit1.empty; break; 5286 // case 2: immutable hit2 = src.find(keys[0], keys[1]); fastOk = !hit2[0].empty; break; 5287 // case 3: immutable hit3 = src.find(keys[0], keys[1], keys[2]); fastOk = !hit3[0].empty; break; 5288 // case 4: immutable hit4 = src.find(keys[0], keys[1], keys[2], keys[3]); fastOk = !hit4[0].empty; break; 5289 // case 5: immutable hit5 = src.find(keys[0], keys[1], keys[2], keys[3], keys[4]); fastOk = !hit5[0].empty; break; 5290 } 5291 } 5292 } 5293 5294 // TODO: Continue search from hit1, hit2 etc. 5295 5296 if (fastOk) 5297 { 5298 foreach (tag; addTags) gstats.ftags.addTag(theRegFile, tag); 5299 foreach (tag; removeTags) gstats.ftags.removeTag(theRegFile, tag); 5300 5301 if (theRegFile.size >= 8192) 5302 { 5303 /* if (theRegFile.xgram == null) { */ 5304 /* theRegFile.xgram = cast(XGram*)core.stdc.stdlib.malloc(XGram.sizeof); */ 5305 /* } */ 5306 /* (*theRegFile.xgram).put(src); */ 5307 /* theRegFile.xgram.put(src); */ 5308 /* foreach (lix, ub0; line) { // for each ubyte in line */ 5309 /* if (lix + 1 < line.length) { */ 5310 /* immutable ub1 = line[lix + 1]; */ 5311 /* immutable dix = (cast(ushort)ub0 | */ 5312 /* cast(ushort)ub1*256); */ 5313 /* (*theRegFile.xgram)[dix] = true; */ 5314 /* } */ 5315 /* } */ 5316 auto shallowDenseness = theRegFile.bistogram8.denseness; 5317 auto deepDenseness = theRegFile.xgramDeepDenseness; 5318 // assert(deepDenseness >= 1); 5319 gstats.shallowDensenessSum += shallowDenseness; 5320 gstats.deepDensenessSum += deepDenseness; 5321 ++gstats.densenessCount; 5322 /* dbg(theRegFile.path, `:`, theRegFile.size, */ 5323 /* `, length:`, theRegFile.xgram.length, */ 5324 /* `, deepDenseness:`, deepDenseness); */ 5325 } 5326 5327 theRegFile._cstat.hitCount = scanForKeys(viz, 5328 topDir, cast(NotNull!File)theRegFile, parentDir, 5329 fromSymlinks, 5330 src, keys, bistHits, 5331 ScanContext.fileContent); 5332 } 5333 } 5334 } 5335 5336 } 5337 catch (FileException) 5338 { 5339 handleError(viz, theRegFile, false, subIndex); 5340 } 5341 catch (ErrnoException) 5342 { 5343 handleError(viz, theRegFile, false, subIndex); 5344 } 5345 theRegFile.freeContents; // TODO: Call lazily only when open count is too large 5346 } 5347 } 5348 5349 /** Scan Symlink `symlink` at `parentDir` for `keys` 5350 Put results in `results`. */ 5351 void scanSymlink(Viz viz, 5352 NotNull!Dir topDir, 5353 NotNull!Symlink theSymlink, 5354 NotNull!Dir parentDir, 5355 const string[] keys, 5356 ref Symlink[] fromSymlinks) 5357 { 5358 // check for symlink cycles 5359 if (!fromSymlinks.find(theSymlink).empty) 5360 { 5361 if (gstats.showSymlinkCycles) 5362 { 5363 import std.range: back; 5364 viz.ppln(`Cycle of symbolic links: `, 5365 fromSymlinks.asPath, 5366 ` -> `, 5367 fromSymlinks.back.target); 5368 } 5369 return; 5370 } 5371 5372 // Scan name 5373 if ((gstats.scanContext == ScanContext.all || 5374 gstats.scanContext == ScanContext.fileName || 5375 gstats.scanContext == ScanContext.symlinkName) && 5376 !keys.empty) 5377 { 5378 scanForKeys(viz, 5379 topDir, cast(NotNull!File)theSymlink, enforceNotNull(theSymlink.parent), 5380 fromSymlinks, 5381 theSymlink.name, keys, [], ScanContext.fileName); 5382 } 5383 5384 // try { 5385 // results.noBytesTotal += dent.size; 5386 // } catch (Exception) 5387 // { 5388 // dbg(`Couldn't get size of `, dir.name); 5389 // } 5390 if (gstats.followSymlinks == SymlinkFollowContext.none) { return; } 5391 5392 import std.range: popBackN; 5393 fromSymlinks ~= theSymlink; 5394 immutable targetPath = theSymlink.absoluteNormalizedTargetPath; 5395 if (targetPath.exists) 5396 { 5397 theSymlink._targetStatus = SymlinkTargetStatus.present; 5398 if (_topDirNames.all!(a => !targetPath.startsWith(a))) { // if target path lies outside of all rootdirs 5399 auto targetDent = DirEntry(targetPath); 5400 auto targetFile = getFile(enforceNotNull(gstats.rootDir), targetPath, targetDent.isDir); 5401 5402 if (gstats.showTree) 5403 { 5404 viz.ppln(`│ `.repeat(parentDir.depth + 1).join(``) ~ `├` ~ `─ `, 5405 theSymlink, 5406 ` modified `, 5407 faze(shortDurationString(_currTime - theSymlink.timeLastModified), 5408 timeFace), 5409 ` ago`, ` -> `, 5410 targetFile.asPath, 5411 faze(` outside of ` ~ (_topDirNames.length == 1 ? `tree ` : `all trees `), 5412 infoFace), 5413 gstats.topDirs.asPath, 5414 faze(` is followed`, infoFace)); 5415 } 5416 5417 ++gstats.noScannedSymlinks; 5418 ++gstats.noScannedFiles; 5419 5420 if (auto targetRegFile = cast(RegFile)targetFile) 5421 { 5422 processRegFile(viz, topDir, assumeNotNull(targetRegFile), parentDir, keys, fromSymlinks, 0, gstats); 5423 } 5424 else if (auto targetDir = cast(Dir)targetFile) 5425 { 5426 scanDir(viz, topDir, assumeNotNull(targetDir), keys, fromSymlinks); 5427 } 5428 else if (auto targetSymlink = cast(Symlink)targetFile) // target is a Symlink 5429 { 5430 scanSymlink(viz, topDir, 5431 assumeNotNull(targetSymlink), 5432 enforceNotNull(targetSymlink.parent), 5433 keys, fromSymlinks); 5434 } 5435 } 5436 } 5437 else 5438 { 5439 theSymlink._targetStatus = SymlinkTargetStatus.broken; 5440 5441 if (gstats.showBrokenSymlinks) 5442 { 5443 _brokenSymlinks ~= theSymlink; 5444 5445 foreach (ix, fromSymlink; fromSymlinks) 5446 { 5447 if (gstats.showTree && ix == 0) 5448 { 5449 immutable intro = `├`; 5450 viz.pp(`│ `.repeat(theSymlink.parent.depth + 1).join(``) ~ intro ~ `─ `, 5451 theSymlink); 5452 } 5453 else 5454 { 5455 viz.pp(fromSymlink); 5456 } 5457 viz.pp(` -> `); 5458 } 5459 5460 viz.ppln(faze(theSymlink.target, missingSymlinkTargetFace), 5461 faze(` is missing`, warnFace)); 5462 } 5463 } 5464 fromSymlinks.popBackN(1); 5465 } 5466 5467 /** Scan Directory `parentDir` for `keys`. */ 5468 void scanDir(Viz viz, 5469 NotNull!Dir topDir, 5470 NotNull!Dir theDir, 5471 const string[] keys, 5472 Symlink[] fromSymlinks = [], 5473 int maxDepth = -1) 5474 { 5475 if (theDir.isRoot) { results.reset; } 5476 5477 // scan in directory name 5478 if ((gstats.scanContext == ScanContext.all || 5479 gstats.scanContext == ScanContext.fileName || 5480 gstats.scanContext == ScanContext.dirName) && 5481 !keys.empty) 5482 { 5483 scanForKeys(viz, 5484 topDir, 5485 cast(NotNull!File)theDir, 5486 enforceNotNull(theDir.parent), 5487 fromSymlinks, 5488 theDir.name, keys, [], ScanContext.fileName); 5489 } 5490 5491 try 5492 { 5493 size_t subIndex = 0; 5494 if (gstats.showTree) 5495 { 5496 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5497 5498 viz.pp(`│ `.repeat(theDir.depth).join(``) ~ intro ~ 5499 `─ `, theDir, ` modified `, 5500 faze(shortDurationString(_currTime - 5501 theDir.timeLastModified), 5502 timeFace), 5503 ` ago`); 5504 5505 if (gstats.showUsage) 5506 { 5507 viz.pp(` of Tree-Size `, theDir.treeSize); 5508 } 5509 5510 if (gstats.showSHA1) 5511 { 5512 viz.pp(` with Tree-Content-Id `, theDir.treeContentId); 5513 } 5514 viz.ppendl; 5515 } 5516 5517 ++gstats.noScannedDirs; 5518 ++gstats.noScannedFiles; 5519 5520 auto subsSorted = theDir.subsSorted(gstats.subsSorting); 5521 foreach (key, sub; subsSorted) 5522 { 5523 /* TODO: Functionize to scanFile */ 5524 if (auto regFile = cast(RegFile)sub) 5525 { 5526 processRegFile(viz, topDir, assumeNotNull(regFile), theDir, keys, fromSymlinks, subIndex, gstats); 5527 } 5528 else if (auto subDir = cast(Dir)sub) 5529 { 5530 if (maxDepth == -1 || // if either all levels or 5531 maxDepth >= 1) { // levels left 5532 if (sub.name in gstats.skippedDirKindsMap) // if sub should be skipped 5533 { 5534 if (_showSkipped) 5535 { 5536 if (gstats.showTree) 5537 { 5538 immutable intro = subIndex == theDir.subs.length - 1 ? `└` : `├`; 5539 viz.pp(`│ `.repeat(theDir.depth + 1).join(``) ~ intro ~ `─ `); 5540 } 5541 5542 viz.pp(subDir, 5543 ` modified `, 5544 faze(shortDurationString(_currTime - 5545 subDir.timeLastModified), 5546 timeFace), 5547 ` ago`, 5548 faze(`: Skipped Directory of type `, infoFace), 5549 gstats.skippedDirKindsMap[sub.name].kindName); 5550 } 5551 } 5552 else 5553 { 5554 scanDir(viz, topDir, 5555 assumeNotNull(subDir), 5556 keys, 5557 fromSymlinks, 5558 maxDepth >= 0 ? --maxDepth : maxDepth); 5559 } 5560 } 5561 } 5562 else if (auto subSymlink = cast(Symlink)sub) 5563 { 5564 scanSymlink(viz, topDir, assumeNotNull(subSymlink), theDir, keys, fromSymlinks); 5565 } 5566 else 5567 { 5568 if (gstats.showTree) { viz.ppendl; } 5569 } 5570 ++subIndex; 5571 5572 if (ctrlC) 5573 { 5574 viz.ppln(`Ctrl-C pressed: Aborting scan of `, theDir); 5575 break; 5576 } 5577 } 5578 5579 if (gstats.showTreeContentDups) 5580 { 5581 theDir.treeContentId; // better to put this after file scan for now 5582 } 5583 } 5584 catch (FileException) 5585 { 5586 handleError(viz, theDir, true, 0); 5587 } 5588 } 5589 5590 /** Filter out `files` that lie under any of the directories `dirPaths`. */ 5591 F[] filterUnderAnyOfPaths(F)(F[] files, 5592 string[] dirPaths) 5593 { 5594 import std.algorithm: any; 5595 import std.array: array; 5596 auto dupFilesUnderAnyTopDirName = (files 5597 .filter!(dupFile => 5598 dirPaths.any!(dirPath => 5599 dupFile.path.startsWith(dirPath))) 5600 .array // evaluate to array to get .length below 5601 ); 5602 F[] hits; 5603 final switch (gstats.duplicatesContext) 5604 { 5605 case DuplicatesContext.internal: 5606 if (dupFilesUnderAnyTopDirName.length >= 2) 5607 hits = dupFilesUnderAnyTopDirName; 5608 break; 5609 case DuplicatesContext.external: 5610 if (dupFilesUnderAnyTopDirName.length >= 1) 5611 hits = files; 5612 break; 5613 } 5614 return hits; 5615 } 5616 5617 /** Show Statistics. */ 5618 void showContentDups(Viz viz) 5619 { 5620 import std.meta : AliasSeq; 5621 foreach (ix, kind; AliasSeq!(RegFile, Dir)) 5622 { 5623 immutable typeName = ix == 0 ? `Regular File` : `Directory Tree`; 5624 viz.pp((typeName ~ ` Content Duplicates`).asH!2); 5625 foreach (digest, dupFiles; gstats.filesByContentId) 5626 { 5627 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5628 if (dupFilesOk.length >= 2) // non-empty file/directory 5629 { 5630 auto firstDup = cast(kind)dupFilesOk[0]; 5631 if (firstDup) 5632 { 5633 static if (is(kind == RegFile)) 5634 { 5635 if (firstDup._cstat.kindId) 5636 { 5637 if (firstDup._cstat.kindId in gstats.allFKinds.byId) 5638 { 5639 viz.pp(asH!3(gstats.allFKinds.byId[firstDup._cstat.kindId], 5640 ` files sharing digest `, digest, ` of size `, firstDup.treeSize)); 5641 } 5642 else 5643 { 5644 dbg(firstDup.path ~ ` kind Id ` ~ to!string(firstDup._cstat.kindId) ~ 5645 ` could not be found in allFKinds.byId`); 5646 } 5647 } 5648 viz.pp(asH!3((firstDup._cstat.bitStatus == BitStatus.bits7) ? `ASCII File` : typeName, 5649 `s sharing digest `, digest, ` of size `, firstDup.treeSize)); 5650 } 5651 else 5652 { 5653 viz.pp(asH!3(typeName, `s sharing digest `, digest, ` of size `, firstDup.size)); 5654 } 5655 5656 viz.pp(asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5657 } 5658 } 5659 } 5660 } 5661 } 5662 5663 /** Show Statistics. */ 5664 void showStats(Viz viz) 5665 { 5666 /* Duplicates */ 5667 5668 if (gstats.showNameDups) 5669 { 5670 viz.pp(`Name Duplicates`.asH!2); 5671 foreach (digest, dupFiles; gstats.filesByName) 5672 { 5673 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5674 if (!dupFilesOk.empty) 5675 { 5676 viz.pp(asH!3(`Files with same name `, 5677 faze(dupFilesOk[0].name, fileFace)), 5678 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5679 } 5680 } 5681 } 5682 5683 if (gstats.showLinkDups) 5684 { 5685 viz.pp(`Inode Duplicates (Hardlinks)`.asH!2); 5686 foreach (inode, dupFiles; gstats.filesByInode) 5687 { 5688 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5689 if (dupFilesOk.length >= 2) 5690 { 5691 viz.pp(asH!3(`Files with same inode ` ~ to!string(inode) ~ 5692 ` (hardlinks): `), 5693 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5694 } 5695 } 5696 } 5697 5698 if (gstats.showFileContentDups) 5699 { 5700 showContentDups(viz); 5701 } 5702 5703 if (gstats.showELFSymbolDups && 5704 !keys.empty) // don't show anything if no keys where given 5705 { 5706 viz.pp(`ELF Symbol Duplicates`.asH!2); 5707 foreach (raw, dupFiles; gstats.elfFilesBySymbol) 5708 { 5709 auto dupFilesOk = filterUnderAnyOfPaths(dupFiles, _topDirNames); 5710 if (dupFilesOk.length >= 2) 5711 { 5712 const demangling = demangler(raw).decodeSymbol; 5713 if (demangling.unmangled.findFirstOfAnyInOrder(keys)[1]) 5714 { 5715 viz.pp(asH!3(`ELF Files with same symbol ` ~ to!string(raw)), 5716 asUList(dupFilesOk.map!(x => x.asPath.asItem))); 5717 } 5718 } 5719 } 5720 } 5721 5722 /* Broken Symlinks */ 5723 if (gstats.showBrokenSymlinks && 5724 !_brokenSymlinks.empty) 5725 { 5726 viz.pp(`Broken Symlinks `.asH!2, 5727 asUList(_brokenSymlinks.map!(x => x.asPath.asItem))); 5728 } 5729 5730 /* Counts */ 5731 viz.pp(`Scanned Types`.asH!2, 5732 /* asUList(asItem(gstats.noScannedDirs, ` Dirs, `), */ 5733 /* asItem(gstats.noScannedRegFiles, ` Regular Files, `), */ 5734 /* asItem(gstats.noScannedSymlinks, ` Symbolic Links, `), */ 5735 /* asItem(gstats.noScannedSpecialFiles, ` Special Files, `), */ 5736 /* asItem(`totalling `, gstats.noScannedFiles, ` Files`) // on extra because of lack of root */ 5737 /* ) */ 5738 asTable(asRow(asCell(asBold(`Scan Count`)), 5739 asCell(asBold(`File Type`))), 5740 asRow(asCell(gstats.noScannedDirs), 5741 asCell(asItalic(`Dirs`))), 5742 asRow(asCell(gstats.noScannedRegFiles), 5743 asCell(asItalic(`Regular Files`))), 5744 asRow(asCell(gstats.noScannedSymlinks), 5745 asCell(asItalic(`Symbolic Links`))), 5746 asRow(asCell(gstats.noScannedSpecialFiles), 5747 asCell(asItalic(`Special Files`))), 5748 asRow(asCell(gstats.noScannedFiles), 5749 asCell(asItalic(`Files`))) 5750 ) 5751 ); 5752 5753 if (gstats.densenessCount) 5754 { 5755 viz.pp(`Histograms`.asH!2, 5756 asUList(asItem(`Average Byte Bistogram (Binary Histogram) Denseness `, 5757 cast(real)(100*gstats.shallowDensenessSum / gstats.densenessCount), ` Percent`), 5758 asItem(`Average Byte `, NGramOrder, `-Gram Denseness `, 5759 cast(real)(100*gstats.deepDensenessSum / gstats.densenessCount), ` Percent`))); 5760 } 5761 5762 viz.pp(`Scanned Bytes`.asH!2, 5763 asUList(asItem(`Scanned `, results.noBytesScanned), 5764 asItem(`Skipped `, results.noBytesSkipped), 5765 asItem(`Unreadable `, results.noBytesUnreadable), 5766 asItem(`Total Contents `, results.noBytesTotalContents), 5767 asItem(`Total `, results.noBytesTotal), 5768 asItem(`Total number of hits `, results.numTotalHits), 5769 asItem(`Number of Files with hits `, results.numFilesWithHits))); 5770 5771 viz.pp(`Some Math`.asH!2); 5772 5773 { 5774 struct Stat 5775 { 5776 particle2f particle; 5777 point2r point; 5778 vec2r velocity; 5779 vec2r acceleration; 5780 mat2 rotation; 5781 Rational!uint ratInt; 5782 Vector!(Rational!int, 4) ratIntVec; 5783 Vector!(float, 2, true) normFloatVec2; 5784 Vector!(float, 3, true) normFloatVec3; 5785 Point!(Rational!int, 4) ratIntPoint; 5786 } 5787 5788 /* Vector!(Complex!float, 4) complexVec; */ 5789 5790 viz.ppln(`A number: `, 1.2e10); 5791 viz.ppln(`Randomize particle2f as TableNr0: `, randomInstanceOf!particle2f.asTableNr0); 5792 5793 alias Stats3 = Stat[3]; 5794 auto stats = new Stat[3]; 5795 randomize(stats); 5796 viz.ppln(`A ` ~ typeof(stats).stringof, `: `, stats.randomize.asTable); 5797 5798 { 5799 auto x = randomInstanceOf!Stats3; 5800 foreach (ref e; x) 5801 { 5802 e.velocity *= 1e9; 5803 } 5804 viz.ppln(`Some Stats: `, 5805 x.asTable); 5806 } 5807 } 5808 5809 5810 } 5811 } 5812 5813 void scanner(string[] args) 5814 { 5815 // Register the SIGINT signal with the signalHandler function call: 5816 version(linux) 5817 { 5818 signal(SIGABRT, &signalHandler); 5819 signal(SIGTERM, &signalHandler); 5820 signal(SIGQUIT, &signalHandler); 5821 signal(SIGINT, &signalHandler); 5822 } 5823 5824 5825 auto term = Terminal(ConsoleOutputType.linear); 5826 auto scanner = new Scanner!Terminal(args, term); 5827 }