1 modulenxt.bylinefast;
2 3 importstd.stdio : File;
4 importstd.typecons : Flag;
5 6 aliasKeepTerminator = Flag!"keepTerminator";
7 8 /**
9 Reads by line in an efficient way (10 times faster than File.byLine from
10 std.stdio). This is accomplished by reading entire buffers (fgetc() is not
11 used), and allocating as little as possible.
12 13 The char \n is considered as default separator, removing the previous \r if
14 it exists.
15 16 The \n is never returned. The \r is not returned if it was
17 part of a \r\n (but it is returned if it was by itself).
18 19 The returned string is always a substring of a temporary buffer, that must
20 not be stored. If necessary, you must use str[] or .dup or .idup to copy to
21 another string. DIP-25 return qualifier is used in front() to add extra
22 checks in @safe callers of front().
23 24 Example:
25 26 File f = File("file.txt");
27 foreach (string line; ByLineFast(f)) {
28 ...process line...
29 //Make a copy:
30 string copy = line[];
31 }
32 33 The file isn't closed when done iterating, unless it was the only reference to
34 the file (same as std.stdio.byLine). (example: ByLineFast(File("file.txt"))).
35 */36 structByLineFast(Char, Terminator)
37 {
38 importcore.stdc.string : memmove;
39 importstd.stdio : fgetc, ungetc;
40 41 Filefile;
42 char[] line;
43 boolfirst_call = true;
44 char[] buffer;
45 char[] strBuffer;
46 conststringseparator;
47 KeepTerminatorkeepTerminator;
48 49 this(Filef,
50 KeepTerminatorkt = KeepTerminator.no,
51 stringseparator = "\n",
52 uintbufferSize = 4096) @safe53 {
54 assert(bufferSize > 0);
55 file = f;
56 this.separator = separator;
57 this.keepTerminator = kt;
58 buffer.length = bufferSize;
59 }
60 61 @propertyboolempty() const @trustedscope62 {
63 // Its important to check "line !is null" instead of64 // "line.length != 0", otherwise, no empty lines can65 // be returned, the iteration would be closed.66 if (line !isnull)
67 {
68 returnfalse;
69 }
70 if (!file.isOpen)
71 {
72 // Clean the buffer to avoid pointer false positives:73 (cast(char[])buffer)[] = 0;
74 returntrue;
75 }
76 77 // First read. Determine if it's empty and put the char back.78 automutableFP = (cast(File*) &file).getFP();
79 constc = fgetc(mutableFP);
80 if (c == -1)
81 {
82 // Clean the buffer to avoid pointer false positives:83 (cast(char[])buffer)[] = 0;
84 returntrue;
85 }
86 if (ungetc(c, mutableFP) != c)
87 {
88 assert(0, "Bug in cstdlib implementation");
89 }
90 returnfalse;
91 }
92 93 @propertychar[] front() @safereturnscope94 {
95 if (first_call)
96 {
97 popFront();
98 first_call = false;
99 }
100 returnline;
101 }
102 103 voidpopFront() @trustedscope104 {
105 if (strBuffer.length == 0)
106 {
107 strBuffer = file.rawRead(buffer);
108 if (strBuffer.length == 0)
109 {
110 file.detach();
111 line = null;
112 return;
113 }
114 }
115 116 // import std.string : indexOf; // TODO array_algorithm indexOf117 importnxt.array_algorithm : indexOf;
118 constpos = strBuffer.indexOf(this.separator);
119 if (pos != -1)
120 {
121 if (pos != 0 && strBuffer[pos-1] == '\r')
122 {
123 line = strBuffer[0 .. (pos-1)];
124 }
125 else126 {
127 line = strBuffer[0 .. pos];
128 }
129 // Pop the line, skipping the terminator:130 strBuffer = strBuffer[(pos+1) .. $];
131 }
132 else133 {
134 // More needs to be read here. Copy the tail of the buffer135 // to the beginning, and try to read with the empty part of136 // the buffer.137 // If no buffer was left, extend the size of the buffer before138 // reading. If the file has ended, then the line is the entire139 // buffer.140 141 if (strBuffer.ptr != buffer.ptr)
142 {
143 // Must use memmove because there might be overlap144 memmove(buffer.ptr, strBuffer.ptr,
145 strBuffer.length * char.sizeof);
146 }
147 constspaceBegin = strBuffer.length;
148 if (strBuffer.length == buffer.length)
149 {
150 // Must extend the buffer to keep reading.151 assumeSafeAppend(buffer);
152 buffer.length = buffer.length * 2;
153 }
154 constreadPart = file.rawRead(buffer[spaceBegin .. $]);
155 if (readPart.length == 0)
156 {
157 // End of the file. Return whats in the buffer.158 // The next popFront() will try to read again, and then159 // mark empty condition.160 if (spaceBegin != 0 && buffer[spaceBegin-1] == '\r')
161 {
162 line = buffer[0 .. spaceBegin-1];
163 }
164 else165 {
166 line = buffer[0 .. spaceBegin];
167 }
168 strBuffer = null;
169 return;
170 }
171 strBuffer = buffer[0 .. spaceBegin + readPart.length];
172 // Now that we have new data in strBuffer, we can go on.173 // If a line isn't found, the buffer will be extended again to read more.174 popFront();
175 }
176 }
177 }
178 179 autobyLineFast(Terminator = char,
180 Char = char)(Filef,
181 KeepTerminatorkeepTerminator = KeepTerminator.no,
182 stringseparator = "\n",
183 uintbufferSize = 4096) @safe// TODO lookup preferred block type184 {
185 returnByLineFast!(Char, Terminator)(f, keepTerminator, separator, bufferSize);
186 }
187 188 version(none):
189 190 version(linux)
191 unittest192 {
193 importstd.stdio: File, writeln;
194 importstd.algorithm.searching: count;
195 importtempfs : tempFilePath;
196 importstd.file : write;
197 198 constpath = tempFilePath("x");
199 200 writeln(path);
201 File(path, "wb").write("a\n");
202 203 assert(File(path, "rb").byLineFast.count ==
204 File(path, "rb").byLine.count);
205 }
206 207 unittest208 {
209 importstd.stdio: File, writeln;
210 importstd.algorithm.searching: count;
211 212 constpath = "/media/per/NORDLOW_2019-06-/Knowledge/DBpedia/latest/instance_types_en.ttl";
213 214 importstd.datetime: StopWatch;
215 216 doubled1, d2;
217 218 {
219 StopWatchsw;
220 sw.start;
221 constc1 = File(path).byLine.count;
222 sw.stop;
223 d1 = sw.peek.msecs;
224 writeln("byLine: ", d1, "msecs");
225 }
226 227 {
228 StopWatchsw;
229 sw.start;
230 constc2 = File(path).byLineFast.count;
231 sw.stop;
232 d2 = sw.peek.msecs;
233 writeln("byLineFast: ", d2, "msecs");
234 }
235 236 writeln("Speed-Up: ", d1 / d2);
237 }