1 /** Extensions to std.file.
2 	Copyright: Per Nordlöw 2024-.
3 	License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
4 	Authors: $(WEB Per Nordlöw)
5 */
6 module nxt.file;
7 
8 import std.stdio : File;
9 import nxt.path : FileName, FilePath, DirPath;
10 import nxt.pattern : PNode = Node;
11 
12 private enum PAGESIZE = 4096;
13 
14 @safe:
15 
16 /++ Type of file.
17  +/
18 struct FileType {
19 	DataFormat format; ///< Format associated with file type.
20 	alias format this;
21 	PNode fileNamePattern; ///< Pattern matching file name(s) often associated.
22 }
23 
24 typeof(FileName.str) matchFirst(scope return /+ref+/ FileName input, const PNode node) pure nothrow /+@nogc+/ {
25 	import nxt.pattern : matchFirst;
26 	return input.str.matchFirst(node);
27 }
28 
29 /++ Get pattern that matches a file name ending with '.'~`s`. +/
30 static PNode fileExtension(string s) pure nothrow {
31 	import nxt.pattern : seq, lit, eob;
32 	return seq(lit('.'), lit(s), eob());
33 }
34 
35 /// ditto
36 @safe pure unittest {
37 	auto app_d = FileName("app.d");
38 	assert( app_d.matchFirst(fileExtension("d")));
39 	assert(!app_d.matchFirst(fileExtension("c")));
40 	assert(!app_d.matchFirst(fileExtension("cpp")));
41 	auto app = FileName("app");
42 	assert(!app.matchFirst(fileExtension("d")));
43 	assert(!app.matchFirst(fileExtension("c")));
44 	assert(!app.matchFirst(fileExtension("cpp")));
45 	auto app_c = FileName("app.c");
46 	assert(!app_c.matchFirst(fileExtension("d")));
47 	assert( app_c.matchFirst(fileExtension("c")));
48 	assert(!app_c.matchFirst(fileExtension("cpp")));
49 }
50 
51 /++ (Data) Format of file contents.
52 
53 	The `name` can be either a programming language such as "C",
54 	"C++", "D", etc or a data format such as "JSON", "XML" etc.
55 
56 	See: https://en.wikipedia.org/wiki/File_format
57  +/
58 struct DataFormat {
59 	/++ TODO: Make this a `PNode namePattern` to support both, for instance,
60     "JavaScript Object Notation" and "JSON". +/
61 	string name;
62 	alias name this;
63 	/// Pattern matching file contents often associated.
64 	PNode contentPattern;
65 }
66 
67 /++ Extension of filename.
68 	See: https://en.wikipedia.org/wiki/Filename_extension
69  +/
70 struct FileExtension {
71 	string value;
72 	alias value this;
73 }
74 
75 /** Read file $(D path) into raw array with one extra terminating zero byte.
76  *
77  * This extra terminating zero (`null`) byte at the end is typically used as a
78  * sentinel value to speed up textual parsers or when characters are sent to a
79  * C-function taking a zero-terminated string as input.
80  *
81  * TODO: Add or merge to Phobos?
82  *
83  * See_Also: https://en.wikipedia.org/wiki/Sentinel_value
84  * See_Also: http://forum.dlang.org/post/pdzxpkusvifelumkrtdb@forum.dlang.org
85  */
86 immutable(void)[] rawReadZ(FilePath path) @safe {
87 	return File(path.str, `rb`).rawReadZ();
88 }
89 /// ditto
90 immutable(void)[] rawReadZ(scope File file) @trusted
91 {
92 	import std.array : uninitializedArray;
93 
94 	alias Data = ubyte[];
95 	Data data = uninitializedArray!(Data)(file.size + 1); // one extra for terminator
96 
97 	file.rawRead(data);
98 	data[file.size] = '\0';	 // zero terminator for sentinel
99 
100 	import std.exception : assumeUnique;
101 	return assumeUnique(data);
102 }
103 
104 ///
105 version (Posix)
106 @safe unittest {
107 	import nxt.algorithm.searching : endsWith;
108 	const d = cast(const(char)[]) FilePath(`/etc/passwd`).rawReadZ();
109 	assert(d.endsWith('\0')); // has 0-terminator
110 }
111 
112 /++ Find path for `a` (or `FilePath.init` if not found) in `pathVariableName`.
113 	TODO: Add caching of result and detect changes via inotify.
114  +/
115 FilePath findExecutable(FileName a, scope const(char)[] pathVariableName = "PATH") {
116 	return findFileInPath(a, "PATH", onlyExecutable: true);
117 }
118 
119 ///
120 @safe unittest {
121 	version (Posix) {
122 		assert(findExecutable(FileName("ls")) == FilePath("/usr/bin/ls"));
123 		assert(!findExecutable(FileName("xyz")));
124 	}
125 }
126 
127 /++ Find path for `a` (or `FilePath.init` if not found) in `pathVariableName`.
128 	TODO: Add caching of result and detect changes via inotify.
129  +/
130 FilePath findFileInPath(FileName a, scope const(char)[] pathVariableName, bool onlyExecutable) /+nothrow+/ {
131 	import std.algorithm : splitter;
132 	import std.process : environment;
133 	const envPATH = environment.get(pathVariableName, ""); // TODO: nothrow
134 	foreach (const p; envPATH.splitter(':')) {
135 		import nxt.path : buildPath, exists;
136 		const path = DirPath(p).buildPath(a);
137 		// pick first match
138 		if (onlyExecutable && path.toString.isExecutable)
139 			return path;
140 		if (path.exists)
141 			return path;
142 	}
143 	return typeof(return).init;
144 }
145 
146 version (Posix)
147 private bool isExecutable(in char[] path) @trusted nothrow @nogc {
148 	import std.internal.cstring : tempCString;
149 	import core.sys.posix.unistd : access, X_OK;
150     return access(path.tempCString(), X_OK) == 0;
151 }
152 
153 /++ Get path to default temporary directory.
154 	See_Also: `std.file.tempDir`
155 	See: https://forum.dlang.org/post/gg9kds$1at0$1@digitalmars.com
156  +/
157 DirPath tempDir() {
158 	import std.file : std_tempDir = tempDir;
159 	return typeof(return)(std_tempDir);
160 }
161 
162 ///
163 @safe unittest {
164 	version (Posix) {
165 		assert(tempDir().str == "/tmp/");
166 	}
167 }
168 
169 /** Returns the path to a new (unique) temporary file under `tempDir`.
170     See_Also: https://forum.dlang.org/post/ytmwfzmeqjumzfzxithe@forum.dlang.org
171     See_Also: https://dlang.org/library/std/stdio/file.tmpfile.html
172  */
173 string tempSubFilePath(string prefix = null, string extension = null) @safe {
174 	import std.file : tempDir;
175 	import std.uuid : randomUUID;
176 	import std.path : buildPath;
177 	/+ TODO: use allocation via lazy range or nxt.appending.append() +/
178 	return tempDir().buildPath(prefix ~ randomUUID.toString() ~ extension);
179 }
180 
181 ///
182 @safe unittest {
183 	import nxt.algorithm.searching : canFind, endsWith;
184 	const prefix = "_xyz_";
185 	const ext = "_ext_";
186 	const path = tempSubFilePath(prefix, ext);
187 	assert(path.canFind(prefix));
188 	assert(path.endsWith(ext));
189 }
190 
191 /++ Get path to home directory.
192 	See_Also: `tempDir`
193 	See: https://forum.dlang.org/post/gg9kds$1at0$1@digitalmars.com
194  +/
195 DirPath homeDir() {
196 	import std.process : environment;
197     version(Windows) {
198         // On Windows, USERPROFILE is typically used, but HOMEPATH is an alternative
199 		if (const home = environment.get("USERPROFILE"))
200 			return typeof(return)(home);
201         // Fallback to HOMEDRIVE + HOMEPATH
202         const homeDrive = environment.get("HOMEDRIVE");
203         const homePath = environment.get("HOMEPATH");
204         if (homeDrive && homePath)
205             return typeof(return)(buildPath(homeDrive, homePath));
206     } else {
207         if (const home = environment.get("HOME"))
208 			return typeof(return)(home);
209     }
210     throw new Exception("No home directory environment variable is set.");
211 }
212 
213 ///
214 @safe unittest {
215 	version (Posix) {
216 		import std.path : expandTilde;
217 		assert(homeDir().str == "~".expandTilde);
218 	}
219 }
220 
221 /++ Get path to the default cache (home) directory.
222 	See: `XDG_CACHE_HOME`
223 	See: https://specifications.freedesktop.org/basedir-spec/latest/
224 	See_Also: `tempDir`.
225  +/
226 DirPath cacheHomeDir() {
227 	import std.process : environment;
228     version(Windows) {
229         if (const home = environment.get("XDG_CACHE_HOME"))
230 			return typeof(return)(home);
231     } else {
232         if (const home = environment.get("XDG_CACHE_HOME"))
233 			return typeof(return)(home);
234     }
235 	// throw new Exception("The `XDG_CACHE_HOME` environment variable is unset");
236 	import nxt.path : buildPath;
237 	return homeDir.buildPath(DirPath(`.cache`));
238 }
239 
240 ///
241 @safe unittest {
242 	version (Posix) {
243 		import nxt.path : buildPath;
244 		assert(cacheHomeDir() == homeDir.buildPath(DirPath(`.cache`)));
245 	}
246 }
247 
248 /++ Variant of `std.file.remove()` that returns status instead of throwing.
249 
250 	Modified copy of `std.file.remove()`.
251 
252 	Returns: `true` iff file of path `name` was successfully removed,
253 	         `false` otherwise.
254 
255 	Typically used in contexts where a `nothrow` variant of
256 	`std.file.remove()` is require such as in class destructors/finalizers in which an
257 	illegal memory operation exception otherwise file be thrown.
258  +/
259 bool removeIfExists(scope const(char)[] name) @trusted nothrow @nogc {
260 	import std.internal.cstring : tempCString;
261 	// implicit conversion to pointer via `TempCStringBuffer` `alias this`:
262 	scope const(FSChar)* namez = name.tempCString!FSChar();
263     version (Windows) {
264 		return DeleteFileW(namez) == 0;
265     } else version (Posix) {
266         static import core.stdc.stdio;
267 		return core.stdc.stdio.remove(namez) == 0;
268     }
269 }
270 
271 /++ Character type used for operating system filesystem APIs.
272 	Copied from `std.file`.
273  +/
274 version (Windows)
275     private alias FSChar = WCHAR;       // WCHAR can be aliased to wchar or wchar_t
276 else version (Posix)
277     private alias FSChar = char;
278 else
279     static assert(0);
280 
281 ///
282 @safe nothrow unittest {
283 	// TODO: test `removeIfExists`
284 }
285 
286 import std.file : DirEntry;
287 
288 /++ Identical to `std.file.rmdirRecurse` on POSIX.
289 	On Windows it removes read-only bits before deleting.
290 	TODO: Integrate into Phobos as `rmdirRecurse(bool forced)`.
291 	TODO: Make a non-throwing version bool tryRmdirRecurse(bool forced).
292  +/
293 void rmdirRecurseForced(in DirPath path, bool followSymlink = false) {
294 	rmdirRecurseForced(path.str, followSymlink);
295 }
296 /// ditto
297 void rmdirRecurseForced(in char[] path, bool followSymlink = false) @trusted {
298 	// passing `de` as an r-value segfaults so store in l-value
299 	auto de = DirEntry(cast(string)path);
300 	rmdirRecurseForced(de, followSymlink);
301 }
302 /// ditto
303 void rmdirRecurseForced(ref DirEntry de, bool followSymlink = false) {
304 	import std.file : FileException, remove, dirEntries, SpanMode, attrIsDir, rmdir, attrIsDir, setAttributes;
305 	if (!de.isDir)
306 		throw new FileException(de.name, "Trying to remove non-directory " ~ de.name);
307 	if (de.isSymlink) {
308 		version (Windows)
309 			rmdir(de.name);
310 		else
311 			remove(de.name);
312 		return;
313 	}
314 	foreach (ref e; dirEntries(de.name, SpanMode.depth, followSymlink)) {
315 		version (Windows) {
316 			import core.sys.windows.windows : FILE_ATTRIBUTE_READONLY;
317 			if ((e.attributes & FILE_ATTRIBUTE_READONLY) != 0)
318 				e.name.setAttributes(e.attributes & ~FILE_ATTRIBUTE_READONLY);
319 		}
320 		attrIsDir(e.linkAttributes) ? rmdir(e.name) : remove(e.name);
321 	}
322 	rmdir(de.name); // dir itself
323 }
324 
325 ///
326 @safe nothrow unittest {
327 	// TODO: test `rmdirRecurseForced`
328 }
329 
330 import std.file : PreserveAttributes, preserveAttributesDefault;
331 
332 /++ Copy directory `from` to `to` recursively. +/
333 void copyRecurse(scope const(char)[] from, scope const(char)[] to, in PreserveAttributes preserve = preserveAttributesDefault) {
334     import std.file : copy, dirEntries, isDir, isFile, mkdirRecurse, SpanMode;
335     import std.path : buildPath;
336     if (from.isDir()) {
337         to.mkdirRecurse();
338         const from_ = () @trusted {
339             return cast(string) from;
340         }();
341         foreach (entry; dirEntries(from_, SpanMode.breadth)) {
342 			const fn = entry.name[from.length + 1 .. $]; // +1 skip separator
343             const dst = () @trusted { return to.buildPath(fn); }();
344             if (entry.name.isFile())
345                 entry.name.copy(dst, preserve);
346             else
347                 dst.mkdirRecurse();
348         }
349     } else
350         from.copy(to, preserve);
351 }
352 /// ditto
353 void copyRecurse(DirPath from, DirPath to, in PreserveAttributes preserve = preserveAttributesDefault)
354 	=> copyRecurse(from.str, to.str, preserve);
355 
356 /++ Directory Scanning Flags|Options. +/
357 struct ScanFlags {
358 	alias Depth = ushort; // as path length <= 4096 on all architectures
359 	Depth depthMin = 0;
360 	Depth depthLength = Depth.max;
361 	bool followSymlink = true;
362 }
363 
364 void dirEntries(in char[] root,
365 				in ScanFlags scanFlags = ScanFlags.init,
366 				in ScanFlags.Depth depth = ScanFlags.Depth.init) {
367 	import std.file : std_dirEntries = dirEntries, SpanMode;
368 	const root_ = () @trusted { return cast(string)(root); }();
369 	foreach (ref dent; std_dirEntries(root_, SpanMode.shallow, scanFlags.followSymlink)) {
370 		const depth1 = cast(ScanFlags.Depth)(depth + 1);
371 		if (dent.isDir && depth1 < scanFlags.depthMin + scanFlags.depthLength)
372 			dirEntries(dent.name, scanFlags, depth1);
373 		else if (depth >= scanFlags.depthMin) {
374 			assert(0, "TODO: Turn into a range");
375 		}
376 	}
377 }
378 /// ditto
379 void dirEntries(DirPath root,
380 				in ScanFlags scanFlags = ScanFlags.init,
381 				in ScanFlags.Depth depth = ScanFlags.Depth.init) @trusted {
382 	dirEntries(root.str, scanFlags, depth);
383 }
384 
385 /** Create a new temporary file starting with ($D namePrefix) and ending with 6
386 	randomly defined characters.
387 
388     Returns: File Descriptor to opened file.
389  */
390 version (linux)
391 int tempfile(in char[] namePrefix = null) @trusted {
392 	import core.sys.posix.stdlib: mkstemp;
393 	char[PAGESIZE] buf;
394 	buf[0 .. namePrefix.length] = namePrefix[]; // copy the name into the mutable buffer
395 	buf[namePrefix.length .. namePrefix.length + 6] = "XXXXXX"[];
396 	buf[namePrefix.length + 6] = 0; // make sure it is zero terminated yourself
397 	auto tmp = mkstemp(buf.ptr);
398 	return tmp;
399 }
400 
401 /** TODO: Scoped variant of tempfile.
402     Search http://forum.dlang.org/thread/mailman.262.1386205638.3242.digitalmars-d-learn@puremagic.com
403  */
404 
405 /** Create a New Temporary Directory Tree.
406 
407     Returns: Path to root of tree.
408  */
409 char* temptree(char* name_x, char* template_ = null) @safe {
410 	return null;
411 }