1 /** First and Higher Order Statistics: $(LUCKY Histograms) and $(LUCKY N-grams).
2 
3 	Copyright: Per Nordlöw 2022-.
4 	License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
5 	Authors: $(WEB Per Nordlöw)
6 
7 	TODO: Replace static ifs with static final switch when Issue 6921 is fixed
8 
9 	TODO: Remove overloads using std.string:representation and use sparse maps
10 	over dchar for strings.
11 */
12 module nxt.ngram;
13 
14 // version = print;
15 
16 import nxt.container.static_bitarray;
17 import std.range: InputRange, ElementType, isInputRange;
18 import std.traits: isSomeChar, isUnsigned, isIntegral, isFloatingPoint, Unqual, isIterable, isAssociativeArray, CommonType;
19 import std.stdint: uint64_t;
20 import std.typecons: Tuple, tuple;
21 import nxt.predicates: allZero, allEqualTo;
22 import nxt.nesses: denseness;
23 import nxt.rational: Rational;
24 // import msgpack;
25 import std.numeric: dotProduct;
26 import std.string: representation;
27 import std.conv: to;
28 
29 /** N-Gram Model Kind. */
30 enum Kind
31 {
32 	saturated, // Standard Saturated Integers. Saturation is more robust than default integer wrap-around.
33 	binary // Binary Gram.
34 }
35 
36 /** N-Gram Model Storage. */
37 enum Storage
38 {
39 	denseDynamic,// Dynamically allocated dense storage.
40 	denseStatic, // Statically allocated dense storage.
41 	sparse, // N-Dimensional/Level D Maps/Dictionaries.
42 }
43 
44 /** N-Gram Model Symmetry. */
45 enum Symmetry
46 {
47 	ordered, // Order between elements matters.
48 	unordered, // Order of elements is ignored (relaxed). Tuples are sorted before inserted.
49 }
50 
51 /**
52    Computes $(LUCKY N-Gram) Model of Input Range $(D range).
53 
54    If N == 1 this N-gram becomes a standard $(LUCKY Histogram), also called a
55    $(LUCKY Unigram).
56 
57    If Bin/Bucket Type $(D RequestedBinType), requested by user, is $(D void) it
58    will be chosen automatically based on element type of $(D Range).
59 
60    TODO: Add optimized put() be precalculating maximum number of elements that
61    can be inserted without any risk overflow.
62 */
63 struct NGram(ValueType,
64 			 int N = 1,
65 			 Kind kind = Kind.saturated,
66 			 Storage storage = Storage.denseDynamic,
67 			 Symmetry symmetry = Symmetry.ordered,
68 			 RequestedBinType = void,
69 			 Range) if (is(RequestedBinType == void) ||
70 						isUnsigned!RequestedBinType ||
71 						isFloatingPoint!RequestedBinType)
72 {
73 	this(in Range range) @safe pure nothrow { put(range); }
74 
75 	auto ref value() @property @safe pure inout nothrow { return _bins; }
76 
77 	alias Q = Rational!ulong;
78 
79 	@property static int order() @safe pure nothrow { return N; }
80 
81 	enum isBinary = (kind == Kind.binary);
82 	enum isDense = (storage == Storage.denseStatic ||
83 					storage == Storage.denseDynamic);
84 	enum isSparse = !isDense;
85 	enum cacheDeepDenseness = false;
86 
87 	enum getStorage = storage;
88 
89 	/** Returns: Documentation of the type of $(D this). */
90 	string typeName() @property const
91 	{
92 		string prefix;
93 		static	  if (N == 1) { prefix = "Uni"; }
94 		else static if (N == 2) { prefix = "Bi"; }
95 		else static if (N == 3) { prefix = "Tri"; }
96 		else static if (N == 4) { prefix = "Qua"; }
97 		else static if (N == 5) { prefix = "Pen"; }
98 			else					{ prefix = to!string(N) ~ "-"; }
99 		string rval;
100 		static if (isBinary)
101 		{
102 			rval ~= "Bit";
103 		}
104 		else
105 		{
106 			rval ~= BinType.stringof;
107 		}
108 		rval ~= "-" ~ prefix ~ "Gram over " ~ ValueType.stringof ~ "s";
109 		return rval;
110 	}
111 
112 	string toString() @property const
113 	{
114 		string rval = typeName ~ ": ";
115 
116 		// print contents
117 		static if (isAA)
118 		{
119 			rval ~= to!string(_bins);
120 		}
121 		else if (N >= 2 ||
122 				 this.denseness < Q(1, 2)) // if less than half of the bins are occupied
123 		{
124 			// show as sparse
125 			rval ~= "[";
126 
127 			/+ TODO: Replace these with recursion? +/
128 			static if (N == 1)
129 			{
130 				bool begun = false;
131 				foreach (ix, elt; _bins) /+ TODO: Make static_bitarray support ref foreach and make elt const ref +/
132 				{
133 					if (elt)
134 					{
135 						if (begun) { rval ~= ", "; }
136 						rval ~= "{" ~ to!string(ix) ~ "}#" ~ to!string(elt);
137 						begun = true;
138 					}
139 				}
140 			}
141 			else static if (N == 2)
142 			{
143 				bool begun = false;
144 				foreach (ix0, const ref elt0; _bins)
145 				{
146 					foreach (ix1, elt1; elt0) /+ TODO: Make static_bitarray support ref foreach and make elt1 const ref +/
147 					{
148 						if (elt1)
149 						{
150 							if (begun) { rval ~= ", "; }
151 							rval ~= ("{" ~
152 									 to!string(ix0) ~ "," ~
153 									 to!string(ix1) ~ "}#" ~
154 									 to!string(elt1));
155 							begun = true;
156 						}
157 					}
158 				}
159 			}
160 			else static if (N == 3)
161 			{
162 				bool begun = false;
163 				foreach (ix0, const ref elt0; _bins)
164 				{
165 					foreach (ix1, const ref elt1; elt0)
166 					{
167 						foreach (ix2, elt2; elt1) { /+ TODO: Make static_bitarray support ref foreach and make elt2 const ref +/
168 							if (elt2)
169 							{
170 								if (begun) { rval ~= ", "; }
171 								rval ~= ("{" ~
172 										 to!string(ix0) ~ "," ~
173 										 to!string(ix1) ~ "," ~
174 										 to!string(ix2) ~ "}#" ~
175 										 to!string(elt2));
176 								begun = true;
177 							}
178 						}
179 					}
180 				}
181 			}
182 			else static if (N == 4)
183 			{
184 				bool begun = false;
185 				foreach (ix0, const ref elt0; _bins)
186 				{
187 					foreach (ix1, const ref elt1; elt0)
188 					{
189 						foreach (ix2, elt2; elt1)
190 						{
191 							foreach (ix3, elt3; elt2) /+ TODO: Make static_bitarray support ref foreach and make elt2 const ref +/
192 							{
193 								if (elt3)
194 								{
195 									if (begun) { rval ~= ", "; }
196 									rval ~= ("{" ~
197 											 to!string(ix0) ~ "," ~
198 											 to!string(ix1) ~ "," ~
199 											 to!string(ix2) ~ "," ~
200 											 to!string(ix3) ~ "}#" ~
201 											 to!string(elt3));
202 									begun = true;
203 								}
204 							}
205 						}
206 					}
207 				}
208 			}
209 			else static if (N == 5)
210 			{
211 				bool begun = false;
212 				foreach (ix0, const ref elt0; _bins)
213 				{
214 					foreach (ix1, const ref elt1; elt0)
215 					{
216 						foreach (ix2, const ref elt2; elt1)
217 						{
218 							foreach (ix3, const ref elt3; elt2) /+ TODO: Make static_bitarray support ref foreach and make elt2 const ref +/
219 							{
220 								foreach (ix4, elt4; elt3) /+ TODO: Make static_bitarray support ref foreach and make elt2 const ref +/
221 								{
222 									if (elt4)
223 									{
224 										if (begun) { rval ~= ", "; }
225 										rval ~= ("{" ~
226 												 to!string(ix0) ~ "," ~
227 												 to!string(ix1) ~ "," ~
228 												 to!string(ix2) ~ "," ~
229 												 to!string(ix3) ~ "," ~
230 												 to!string(ix4) ~ "}#" ~
231 												 to!string(elt4));
232 										begun = true;
233 									}
234 								}
235 							}
236 						}
237 					}
238 				}
239 			}
240 			else
241 			{
242 				static assert(0, "N >= 5 not supported");
243 			}
244 			rval ~= "]";
245 		}
246 		else
247 		{
248 			rval ~= to!string(_bins); // default
249 		}
250 
251 		static if (N == 1)
252 		{
253 			rval ~= " denseness:" ~ to!string(denseness);
254 		}
255 		else
256 		{
257 			rval ~= (" shallowDenseness:" ~ to!string(denseness(0)) ~
258 					 " deepDenseness:" ~ to!string(denseness(-1)));
259 		}
260 
261 		return rval;
262 	}
263 
264 	void reset() @safe nothrow
265 	{
266 		import nxt.algorithm_ex: reset;
267 		_bins.reset();
268 	}
269 	// alias reset = clear;
270 
271 	bool empty() const @property pure @trusted nothrow
272 	{
273 		static if (isAA)
274 		{
275 			return _bins.length == 0;
276 		}
277 		else
278 		{
279 			return _bins.allZero;
280 		}
281 	}
282 
283 	static if (N >= 2 &&
284 			   storage != Storage.sparse)
285 	{
286 		Q shallowDenseness() const pure @property @trusted nothrow
287 		{
288 			static if (N == 2)
289 			{
290 				ulong x = 0;
291 				foreach (const ref elt0; 0..combE) // every possible first ngram element
292 				{
293 					bool used = false;
294 					foreach (elt1; 0..combE) // every possible second ngram element
295 					{
296 						if (_bins[elt0][elt1]) // if any bit is used
297 						{
298 							used = true; // we flag it and exit
299 							break;
300 						}
301 					}
302 					if (used) ++x;
303 				}
304 				return Q(x, combE);
305 			}
306 			else
307 			{
308 				return Q(1, 1);
309 			}
310 		}
311 	}
312 
313 	typeof(this) opAssign(RhsRange)(in NGram!(ValueType, N, kind, storage, symmetry, RequestedBinType, RhsRange) rhs)
314 	if (isInputRange!RhsRange) // if (is(Unqual!Range == Unqual!RhsRange))
315 	{
316 		static if	  (storage == Storage.denseDynamic)
317 			_bins = rhs._bins.dup;
318 		else static if (storage == Storage.sparse)
319 			_bins = cast(BinType[ValueType[N]])(rhs._bins.dup); /+ TODO: How can we prevent this cast? +/
320 		else static if (storage == Storage.denseStatic)
321 			_bins = rhs._bins;
322 		else
323 			static assert(0, "Cannot handle storage of type " ~ to!string(storage));
324 		return this;
325 	}
326 
327 	static if (N == 1)
328 	{
329 		void normalize() @safe pure /* nothrow */
330 		{
331 			static if (kind != Kind.binary)
332 			{
333 				static if (isFloatingPoint!ValueType)
334 				{
335 					immutable scaleFactor = (cast(real)1) / _bins.length; // precalc scaling
336 				}
337 				foreach (ref elt; _bins) /+ TODO: Why can this throw for associative arrays? +/
338 				{
339 					static if (isIntegral!ValueType ||
340 							   isSomeChar!ValueType)
341 					{
342 						elt /= 2; // drop least significant bit
343 					}
344 					else static if (isFloatingPoint!ValueType)
345 					{
346 						elt =* scaleFactor;
347 					}
348 				}
349 			}
350 		}
351 	}
352 
353 	/** Invalidate Local Statistics. */
354 	void invalidateStats() @safe nothrow
355 	{
356 		static if (isAA && cacheDeepDenseness) { _deepDenseness = 0; }
357 	}
358 
359 	/** Saturated Increment $(D x) by one. */
360 	ref T incs(T)(ref T x) @safe nothrow if (isIntegral!T) { if (x != T.max) { ++x; invalidateStats(); } return x; }
361 	/** Saturated Increment $(D x) by one. */
362 	ref T incs(T)(ref T x) @safe nothrow if (isFloatingPoint!T) { invalidateStats(); return ++x; }
363 
364 	static if (kind == Kind.saturated)
365 	{
366 		/** Increase bin of $(D ng).
367 			Bin of ng must already be allocated if its stored in a map.
368 		*/
369 		ref NGram inc(ValueType[N] ng) @safe pure nothrow
370 		{
371 			static if (isAA)
372 			{
373 				incs(_bins[ng]); // just one case!
374 			}
375 			else
376 			{
377 				static		if (N == 1)
378 				{
379 					static if (storage == Storage.denseDynamic)
380 					{
381 						if (!_bins) { _bins = new BinType[noABins]; }
382 					}
383 					incs(_bins[ng[0]]);
384 				}
385 				else static if (N == 2)
386 				{
387 					static if (storage == Storage.denseDynamic)
388 					{
389 						if (!_bins) { _bins = new BinType[][noABins]; }
390 						if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[noABins]; }
391 					}
392 					incs(_bins[ng[0]][ng[1]]);
393 				}
394 				else static if (N == 3)
395 				{
396 					static if (storage == Storage.denseDynamic)
397 					{
398 						if (!_bins) { _bins = new BinType[][][noABins]; }
399 						if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][noABins]; }
400 						if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[noABins]; }
401 					}
402 					incs(_bins[ng[0]][ng[1]][ng[2]]);
403 				}
404 				else static if (N == 4)
405 				{
406 					static if (storage == Storage.denseDynamic)
407 					{
408 						if (!_bins) { _bins = new BinType[][][][noABins]; }
409 						if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][][noABins]; }
410 						if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[][noABins]; }
411 						if (!_bins[ng[0]][ng[1]][ng[2]]) { _bins[ng[0]][ng[1]][ng[2]] = new BinType[noABins]; }
412 					}
413 					incs(_bins[ng[0]][ng[1]][ng[2]][ng[3]]);
414 				}
415 				else static if (N == 5)
416 				{
417 					static if (storage == Storage.denseDynamic)
418 					{
419 						if (!_bins) { _bins = new BinType[][][][][noABins]; }
420 						if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][][][noABins]; }
421 						if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[][][noABins]; }
422 						if (!_bins[ng[0]][ng[1]][ng[2]]) { _bins[ng[0]][ng[1]][ng[2]] = new BinType[][noABins]; }
423 						if (!_bins[ng[0]][ng[1]][ng[2]][ng[3]]) { _bins[ng[0]][ng[1]][ng[2]][ng[3]] = new BinType[noABins]; }
424 					}
425 					incs(_bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]);
426 				}
427 				else
428 				{
429 					static assert(0, "N >= 6 not supported");
430 				}
431 			}
432 			return this;
433 		}
434 	}
435 
436 	/** Returns: Bin Count of NGram $(D ng). */
437 	BinType opIndex(T)(in T ng) const @safe pure nothrow if (isIntegral!T ||
438 															 __traits(isStaticArray, T))
439 	{
440 		static if (isAA)
441 		{
442 			if (ng in _bins)
443 			{
444 				return _bins[ng];
445 			}
446 			else
447 			{
448 				return 0; // empty means zero bin
449 			}
450 		}
451 		else
452 		{
453 			static	  if (N == 1) { return _bins[ng]; }
454 			else static if (N == 2) { return _bins[ng[0]][ng[1]]; }
455 			else static if (N == 3) { return _bins[ng[0]][ng[1]][ng[2]]; }
456 			else static if (N == 4) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]]; }
457 			else static if (N == 5) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]; }
458 			else { static assert(0, "N >= 6 not supported"); }
459 		}
460 	}
461 
462 	/* ref BinType opIndexAssign(Index)(BinType b, Index i) @trusted pure nothrow if (isIntegral!Index) in { */
463 
464 	/* } */
465 
466 	/** Returns: Bin Count of NGram $(D ng). */
467 	auto opIndexAssign(T)(in T ng) const @safe pure nothrow if (isIntegral!T ||
468 																__traits(isStaticArray, T))
469 	{
470 		static if (isAA)
471 		{
472 			if (ng in _bins)
473 			{
474 				return _bins[ng];
475 			}
476 			else
477 			{
478 				return 0; // empty means zero bin
479 			}
480 		}
481 		else
482 		{
483 			static	  if (N == 1) { return _bins[ng]; }
484 			else static if (N == 2) { return _bins[ng[0]][ng[1]]; }
485 			else static if (N == 3) { return _bins[ng[0]][ng[1]][ng[2]]; }
486 			else static if (N == 4) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]]; }
487 			else static if (N == 5) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]; }
488 			else { static assert(0, "N >= 6 not supported"); }
489 		}
490 	}
491 
492 	/** Put NGram Element $(D ng) in $(D this).
493 	 */
494 	ref NGram put(ValueType[N] ng) @safe pure nothrow
495 	{
496 		static if (isBinary)
497 		{
498 			static	  if (N == 1) { _bins[ng[0]] = true; }
499 			else static if (N == 2) { _bins[ng[0]][ng[1]] = true; }
500 			else static if (N == 3) { _bins[ng[0]][ng[1]][ng[2]] = true; }
501 			else static if (N == 4) { _bins[ng[0]][ng[1]][ng[2]][ng[3]] = true; }
502 			else static if (N == 5) { _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]] = true; }
503 			else { static assert(0, "N >= 6 not supported"); }
504 		}
505 		else
506 		{
507 			static if (isAA)
508 			{
509 				if (ng in _bins)
510 				{
511 					inc(ng); // increase bin
512 				}
513 				else
514 				{
515 					_bins[ng] = 1; // initial bin count
516 				}
517 			}
518 			else
519 			{
520 				inc(ng);
521 			}
522 		}
523 		invalidateStats();
524 		return this;
525 	}
526 
527 	/** Put NGram Elements $(D range) in $(D this).
528 	 */
529 	ref NGram put(T)(in T range) @safe pure nothrow if (isIterable!T)
530 	{
531 		static if (N == 1)
532 		{
533 			foreach (ix, const ref elt; range)
534 			{
535 				put([elt]);
536 			}
537 		}
538 		else static if (N == 2)
539 		{
540 			ValueType prev; // previous element
541 			foreach (ix, const ref elt; range)
542 			{
543 				if (ix >= N-1) { put([prev, elt]); }
544 				prev = elt;
545 			}
546 		}
547 		else static if (N == 3)
548 		{
549 			ValueType pprev, prev; // previous elements
550 			foreach (ix, const ref elt; range)
551 			{
552 				if (ix >= N-1) { put([pprev, prev, elt]); }
553 				pprev = prev;
554 				prev = elt;
555 			}
556 		}
557 		else static if (N == 4)
558 		{
559 			ValueType ppprev, pprev, prev; // previous elements
560 			foreach (ix, const ref elt; range)
561 			{
562 				if (ix >= N-1) { put([ppprev, pprev, prev, elt]); }
563 				ppprev = pprev;
564 				pprev = prev;
565 				prev = elt;
566 			}
567 		}
568 		else static if (N == 5)
569 		{
570 			ValueType pppprev, ppprev, pprev, prev; // previous elements
571 			foreach (ix, const ref elt; range)
572 			{
573 				if (ix >= N-1) { put([pppprev, ppprev, pprev, prev, elt]); }
574 				pppprev = ppprev;
575 				ppprev = pprev;
576 				pprev = prev;
577 				prev = elt;
578 			}
579 		}
580 		return this;
581 	}
582 
583 	/** Scalar (Dot) Product Match $(D this) with $(D rhs). */
584 	CommonType!(ulong, BinType) matchDenser(rhsValueType,
585 											Kind rhsKind = Kind.saturated,
586 											Storage rhsStorage = Storage.denseDynamic,
587 											RhsRange)(in NGram!(rhsValueType,
588 																N,
589 																rhsKind,
590 																rhsStorage,
591 																symmetry,
592 																RequestedBinType,
593 																RhsRange) rhs) const @trusted pure /* nothrow */
594 	{
595 		static if (this.isDense && rhs.isDense)
596 		{
597 			static if (N == 1) return dotProduct(this._bins, rhs._bins);
598 			else static assert(0, "N >= " ~ to!string(N) ~ " not supported");
599 		}
600 		else static if (this.isSparse)
601 		{
602 			typeof(return) sum = 0;
603 			foreach (ix, const ref bin; _bins) // assume lhs is sparsest
604 			{
605 				sum += bin*rhs[ix];
606 			}
607 			return sum;
608 		}
609 		else static if (rhs.isSparse)
610 		{
611 			typeof(return) sum = 0;
612 			foreach (ix, const ref bin; rhs._bins) // assume lhs is sparsest
613 			{
614 				sum += this[ix]*bin;
615 			}
616 			return sum;
617 		}
618 		else
619 		{
620 			static assert(0, "Combination of " ~ typeof(storage).stringof ~ " and " ~
621 						  typeof(rhsStorage).stringof ~ " not supported");
622 			return 0;
623 		}
624 	}
625 
626 	auto opBinary(string op,
627 				  string file = __FILE__, int line = __LINE__)(in NGram rhs) const @trusted pure /* nothrow */
628 	{
629 		NGram tmp;
630 		static if (this.isSparse ||
631 				   rhs.isSparse)
632 		{
633 			void doIt(in NGram a, in NGram b) // more functional than orderInPlace
634 			{
635 				foreach (ix, const ref bin; a._bins)
636 				{
637 					if (ix in b._bins)
638 					{
639 						mixin("tmp._bins[ix] = bin " ~ op ~ " b._bins[ix];"); // propagate pointwise operation
640 					}
641 				}
642 			}
643 			if (this.length < rhs.length) { doIt(this, rhs); }
644 			else						  { doIt(rhs, this); }
645 		}
646 		else static if (this.isBinary &&
647 						rhs.isBinary)
648 		{
649 			mixin("tmp._bins = _bins " ~ op ~ "rhs._bins;"); // bitsets cannot be sliced (yet)
650 		}
651 		else static if (this.isDense &&
652 						rhs.isDense &&
653 						N == 1)
654 		{
655 			assert(this.length == rhs.length);
656 			import std.range: zip;
657 			import std.algorithm: map;
658 			import std.array: array;
659 			tmp._bins = zip(this._bins[], rhs._bins[]).map!(a => mixin("a[0] " ~ op ~ "a[1]"))().array; /+ TODO: Use zipWith when Issue 8715 is fixed +/
660 			/* foreach (ix, let; this._bins) { /+ TODO: Reuse Phobos algorithm or range */ +/
661 			/*	 mixin("tmp[ix] = _bins[ix] " ~ op ~ "rhs._bins[ix];"); */
662 			/* } */
663 		}
664 		else
665 		{
666 			static assert(0, "Combination of " ~ to!string(this.getStorage) ~ " and " ~
667 						  to!string(rhs.getStorage) ~ " and N == " ~ to!string(N) ~ " not supported");
668 		}
669 		return tmp;
670 	}
671 
672 	/** Determine Bin (Counter) Type. */
673 	static if (isBinary)
674 	{
675 		alias BinType = bool;
676 	}
677 	else static if (is(RequestedBinType == void))
678 	{
679 		alias BinType = uint64_t;   // Bin (counter) type. Count long by default.
680 	}
681 	else
682 	{
683 		alias BinType = RequestedBinType;
684 	}
685 
686 	enum bitsE = 8 * ValueType.sizeof; /** Number of Bits in ValueType (element). TODO: This may have to be fixed for ref types. */
687 	enum combE = 2^^bitsE;  /** Number of combinations in element. */
688 	enum noABins = combE; // Number of bins per array dimension
689 	enum noBins = combE^^N; /** Maximum number of bins (possible). */
690 
691 	// Determine storage structure base on element type.
692 	static if (storage == Storage.sparse)
693 	{
694 		BinType[ValueType[N]] _bins;
695 	}
696 	else static if (isBinary &&
697 					N*bitsE <= 32) // safely fits on stack
698 	{
699 		static if		(storage == Storage.denseStatic)
700 		{
701 			static	  if (N == 1) StaticBitArray!noABins _bins;
702 			else static if (N == 2) StaticBitArray!noABins[noABins] _bins;
703 			else static if (N == 3) StaticBitArray!noABins[noABins][noABins] _bins;
704 			else static assert(0, "Dense static N >= 4 does no safely fit on stack");
705 		}
706 		else static if (storage == Storage.denseDynamic)
707 		{
708 			static	  if (N == 1) StaticBitArray!noABins _bins;
709 			else static if (N == 2) StaticBitArray!noABins[] _bins;
710 			else static if (N == 3) StaticBitArray!noABins[][] _bins;
711 			else static if (N == 4) StaticBitArray!noABins[][][] _bins;
712 			else static if (N == 5) StaticBitArray!noABins[][][][] _bins;
713 			else static assert(0, "N >= 6 not supported");
714 		}
715 		else static if (storage == Storage.sparse) // shouldn't happen
716 		{
717 		}
718 	}
719 	else static if (storage == Storage.denseStatic &&
720 					(isUnsigned!ValueType ||
721 					 isSomeChar!ValueType) &&
722 					N*bitsE <= 16) // safely fits on stack
723 	{
724 		static	  if (N == 1) BinType[noABins] _bins;
725 		else static if (N == 2) BinType[noABins][noABins] _bins;
726 		else static assert(0, "Dense static N >= 3 does not safely fit on stack");
727 	}
728 	else static if (storage == Storage.denseDynamic &&
729 					(isUnsigned!ValueType ||
730 					 isSomeChar!ValueType)) // safely fits on stack
731 	{
732 		static	  if (N == 1) BinType[] _bins;
733 		else static if (N == 2) BinType[][] _bins;
734 		else static if (N == 3) BinType[][][] _bins;
735 		else static if (N == 4) BinType[][][][] _bins;
736 		else static if (N == 5) BinType[][][][][] _bins;
737 		else static assert(0, "N >= 6 not supported");
738 	}
739 	else
740 	{
741 		BinType[ValueType[N]] _bins;
742 	}
743 
744 	private alias _bins this;
745 
746 	enum isAA = isAssociativeArray!(typeof(_bins));
747 
748 	static if (isAA && cacheDeepDenseness)
749 	{
750 		ulong _deepDenseness;	// Cache deep denseness in associative arrays
751 	}
752 
753 	Q densenessUncached(int depth = -1) const pure @property @trusted nothrow
754 	{
755 		static if (isBinary)
756 		{
757 			static if (N >= 2)
758 			{
759 				if (N == 2 && depth == 0)  // if shallow wanted
760 				{
761 					return shallowDenseness();
762 				}
763 			}
764 			return _bins.denseness;
765 		}
766 		else
767 		{
768 			static if (isAA)
769 			{
770 				return Q(_bins.length, noBins);
771 			}
772 			else
773 			{
774 				return _bins.denseness(depth);
775 			}
776 		}
777 	}
778 
779 	/** Returns: Number of Non-Zero Elements in $(D range). */
780 	Q denseness(int depth = -1) const pure @property @safe nothrow
781 	{
782 		static if (isAA && cacheDeepDenseness)
783 		{
784 			if (depth == -1)  // -1 means deepDenseness
785 			{
786 				if (!_deepDenseness)  // if not yet defined.
787 				{
788 					unqual(this).densenessUncached(depth).numerator; // calculate it
789 				}
790 				return Q(_deepDenseness, noBins);
791 			}
792 		}
793 		return densenessUncached(depth);
794 	}
795 }
796 
797 /**
798    Computes $(LUCKY Bi-Gram) of $(D range).
799 */
800 auto ngram(int N = 2,
801 		   Kind kind = Kind.saturated,
802 		   Storage storage = Storage.denseDynamic,
803 		   Symmetry symmetry = Symmetry.ordered,
804 		   RequestedBinType = void,
805 		   Range)(in Range range) @safe pure nothrow if (isIterable!Range)
806 {
807 	return NGram!(Unqual!(ElementType!Range), N,
808 				  kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
809 }
810 auto sparseUIntNGramOverRepresentation(int N = 2,
811 									   Kind kind = Kind.saturated,
812 									   Symmetry symmetry = Symmetry.ordered)(in string range) @safe pure nothrow
813 {
814 	auto y = range.representation;
815 	return ngram!(N, kind, Storage.sparse, symmetry, uint)(y);
816 }
817 
818 auto histogram(Kind kind = Kind.saturated,
819 			   Storage storage = Storage.denseDynamic,
820 			   Symmetry symmetry = Symmetry.ordered,
821 			   RequestedBinType = void,
822 			   Range)(in Range range) @safe pure nothrow if (isIterable!Range)
823 {
824 	return NGram!(Unqual!(ElementType!Range), 1, kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
825 }
826 alias Hist = NGram;
827 alias hist = histogram;
828 alias unigram = histogram;
829 
830 unittest {
831 	const ubyte[] x = [1, 2, 3, 4, 5];
832 	const h = x.histogram!(Kind.saturated, Storage.denseStatic);
833 	const hh = h + h;
834 
835 	auto h2 = x.histogram!(Kind.saturated, Storage.denseStatic);
836 	h2.put(x);
837 	assert(hh == h2);
838 }
839 
840 unittest {
841 	const ubyte[] x;
842 	const hDS = x.histogram!(Kind.saturated, Storage.denseStatic);  assert(hDS.empty);
843 
844 	const hDD = x.histogram!(Kind.saturated, Storage.denseDynamic); assert(hDD.empty);
845 
846 	auto h = x.histogram!(Kind.saturated, Storage.sparse); assert(h.empty);
847 	const ubyte[5] x1 = [1, 2, 3, 4, 5];
848 	h.put(x1); assert(!h.empty);
849 	assert(h.matchDenser(h) == x1.length);
850 	auto hh = h + h;
851 	assert(hh.matchDenser(hh) == 4*x1.length);
852 
853 	h.destroy(); assert(h.empty);
854 
855 	auto hU32 = x.histogram!(Kind.saturated, Storage.sparse, Symmetry.ordered, uint);
856 }
857 
858 unittest {
859 	alias ValueType = ubyte;
860 	const ValueType[3] x = [11, 12, 13];
861 
862 	auto h = x.histogram!(Kind.saturated, Storage.denseStatic);
863 	assert(h[0] == 0);
864 	assert(h[11] == 1 && h[12] == 1 && h[13] == 1);
865 	assert(3 == h.matchDenser(h));
866 
867 	h.put(x);
868 	assert(h[11] == 2 && h[12] == 2 && h[13] == 2);
869 	assert(12 == h.matchDenser(h));
870 
871 	version (print) dbg(h);
872 
873 	auto hD = x.histogram!(Kind.saturated, Storage.denseDynamic);
874 	version (print) dbg(hD);
875 }
876 
877 unittest {
878 	alias ValueType = ulong;
879 	const ValueType[3] x = [1, 2, 3];
880 	auto h = x.histogram!(Kind.saturated, Storage.denseStatic);
881 	h.put([4, 5, 6]);
882 	assert(h.length == 6);
883 }
884 
885 /**
886    Computes $(LUCKY Binary Histogram) of $(D range).
887 */
888 auto bistogram(Range)(in Range range) @safe pure nothrow if (isIterable!Range)
889 
890 {
891 	return NGram!(Unqual!(ElementType!Range), 1,
892 				  Kind.binary,
893 				  Storage.denseStatic,
894 				  Symmetry.ordered,
895 				  void,
896 				  Unqual!Range)(range);
897 }
898 alias bunigram = bistogram;
899 
900 unittest {
901 	const ubyte[] x;
902 	const b = x.bistogram;
903 	assert(b.empty);
904 
905 	// test const unqualification
906 	NGram!(ubyte, 1, Kind.binary, Storage.denseStatic, Symmetry.ordered, void, const(ubyte)[]) cb;
907 	cb = b;
908 	assert(cb == b);
909 
910 	// test immutable unqualification
911 	NGram!(ubyte, 1, Kind.binary, Storage.denseStatic, Symmetry.ordered, void, immutable(ubyte)[]) ib;
912 	ib = b;
913 	assert(ib == b);
914 }
915 
916 unittest {
917 	const ubyte[3] x = [1, 2, 3];
918 	const h_ = x.bistogram;
919 	assert(8*h_.sizeof == 2^^8);
920 	const h = h_;
921 	assert(h[1] && h[2] && h[3]);
922 	assert(!h[0]);
923 	assert(!h[4]);
924 
925 	const h2_ = h_;
926 	assert((h2_ & h_) == h2_);
927 	assert((h2_ | h_) == h2_);
928 	assert((h2_.value & h_.value) == h2_.value);
929 	assert((h2_.value | h_.value) == h2_.value);
930 }
931 unittest {
932 	const ushort[3] x = [1, 2, 3];
933 	const h_ = x.bistogram;
934 	assert(8*h_.sizeof == 2^^16);
935 	const h = h_;
936 	assert(h[1] && h[2] && h[3]);
937 	assert(!h[0]);
938 	assert(!h[4]);
939 }
940 
941 /**
942    Computes $(LUCKY Binary (Occurrence) NGram) of Bytes in Input String $(D x).
943 */
944 auto bistogramOverRepresentation(in string x) pure nothrow
945 
946 {
947 	return bistogram(x.representation);
948 }
949 unittest {
950 	const x = "abcdef";
951 	auto h = x.bistogramOverRepresentation;
952 	size_t ix = 0;
953 	foreach (const bin; h)
954 	{
955 		if (ix >= cast(ubyte)'a' &&
956 			ix <= cast(ubyte)'f')
957 		{
958 			assert(bin);
959 		}
960 		else
961 		{
962 			assert(!bin);
963 		}
964 		++ix;
965 	}
966 	version (print) dbg(h);
967 }
968 
969 /**
970    Computes $(LUCKY Bi-Gram) of $(D range).
971 */
972 auto bigram(Kind kind = Kind.saturated,
973 			Storage storage = Storage.denseDynamic,
974 			Symmetry symmetry = Symmetry.ordered,
975 			RequestedBinType = void,
976 			Range)(in Range range) @safe pure nothrow if (isIterable!Range)
977 {
978 	return NGram!(Unqual!(ElementType!Range), 2,
979 				  kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
980 }
981 auto sparseUIntBigram(Kind kind = Kind.saturated,
982 					  Symmetry symmetry = Symmetry.ordered,
983 					  Range)(in Range range) @safe pure nothrow if (isIterable!Range)
984 {
985 	return NGram!(Unqual!(ElementType!Range), 2,
986 				  kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
987 }
988 
989 unittest {
990 	const ubyte[] x = [1, 2, 3, 3, 3, 4, 4, 5];
991 
992 	auto bSp = x.bigram!(Kind.saturated, Storage.sparse);
993 	alias SparseNGram = Unqual!(typeof(bSp));
994 
995 	// check msgpacking
996 	// auto bSPBytes = bSp.pack();
997 	// SparseNGram bSp_;
998 	// bSPBytes.unpack(bSp_);
999 	// assert(bSp == bSp_);
1000 
1001 	auto bS = x.bigram!(Kind.saturated, Storage.denseStatic);
1002 	const bSCopy = bS;
1003 	bS.put(x);
1004 	assert(bS != bSCopy);
1005 
1006 	const bD = x.bigram!(Kind.saturated, Storage.denseDynamic);
1007 	version (print) dbg(bD);
1008 
1009 	const bb = x.bigram!(Kind.binary, Storage.denseStatic);
1010 	version (print) dbg(typeof(bb._bins).stringof);
1011 
1012 	assert(bb.denseness(0).numerator == 4);
1013 	// assert(bb.denseness(-1).numerator == 6);
1014 }
1015 unittest {
1016 	const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1017 
1018 	const bS = x.bigram!(Kind.saturated, Storage.denseStatic);
1019 	version (print) dbg(bS);
1020 	assert(bS.denseness.numerator == x.length - bS.order + 1);
1021 
1022 	const bB = x.bigram!(Kind.binary, Storage.denseStatic);
1023 	version (print) dbg(bB);
1024 }
1025 
1026 unittest {
1027 	const x = [1, 2, 3, 4, 5];
1028 	auto h = x.bigram!(Kind.saturated, Storage.sparse);
1029 	assert(h.matchDenser(h) == x.length - 1);
1030 }
1031 
1032 /**
1033    Computes $(LUCKY Tri-Gram) of $(D range).
1034 */
1035 auto trigram(Kind kind = Kind.saturated,
1036 			 Storage storage = Storage.denseDynamic,
1037 			 Symmetry symmetry = Symmetry.ordered,
1038 			 RequestedBinType = void,
1039 			 Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1040 {
1041 	return NGram!(Unqual!(ElementType!Range), 3,
1042 				  kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1043 }
1044 auto sparseUIntTrigram(Kind kind = Kind.saturated,
1045 					  Symmetry symmetry = Symmetry.ordered,
1046 					  Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1047 {
1048 	return NGram!(Unqual!(ElementType!Range), 3,
1049 				  kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
1050 }
1051 unittest {
1052 	const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1053 
1054 	const bS = x.trigram!(Kind.saturated, Storage.denseStatic);
1055 	version (print) dbg(bS);
1056 	assert(bS.denseness.numerator == x.length - bS.order + 1);
1057 
1058 	const bD = x.trigram!(Kind.saturated, Storage.denseDynamic);
1059 	version (print) dbg(bD);
1060 
1061 	const bB = x.trigram!(Kind.binary, Storage.denseStatic);
1062 	/* version (print) dbg(bB); */
1063 }
1064 
1065 /**
1066    Computes $(LUCKY Qua-Gram) of $(D range).
1067 */
1068 auto quagram(Kind kind = Kind.saturated,
1069 			 Storage storage = Storage.denseDynamic,
1070 			 Symmetry symmetry = Symmetry.ordered,
1071 			 RequestedBinType = void,
1072 			 Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1073 {
1074 	return NGram!(Unqual!(ElementType!Range), 4,
1075 				  kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1076 }
1077 auto sparseUIntQuagram(Kind kind = Kind.saturated,
1078 					   Symmetry symmetry = Symmetry.ordered,
1079 					   Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1080 {
1081 	return NGram!(Unqual!(ElementType!Range), 4,
1082 				  kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
1083 }
1084 unittest {
1085 	const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1086 
1087 	const bD = x.quagram!(Kind.saturated, Storage.denseDynamic);
1088 	version (print) dbg(bD);
1089 	assert(bD.denseness.numerator == x.length - bD.order + 1);
1090 }
1091 auto sparseUIntQuagramOverRepresentation(in string x) pure nothrow
1092 {
1093 	return sparseUIntQuagram(x.representation);
1094 }
1095 
1096 /**
1097    Computes $(LUCKY Pen-Gram) of $(D range).
1098 */
1099 auto pengram(Kind kind = Kind.saturated,
1100 			 Storage storage = Storage.denseDynamic,
1101 			 Symmetry symmetry = Symmetry.ordered,
1102 			 RequestedBinType = void,
1103 			 Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1104 {
1105 	return NGram!(Unqual!(ElementType!Range), 5,
1106 				  kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1107 }
1108 unittest {
1109 	const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1110 
1111 	const bS = x.pengram!(Kind.saturated, Storage.sparse);
1112 	const bS_ = bS;
1113 	assert(bS_ == bS);
1114 
1115 	// skipping denseStatic because it doesn't fit stack anyway
1116 
1117 	const bD = x.pengram!(Kind.saturated, Storage.denseDynamic);
1118 	version (print) dbg(bD);
1119 	assert(bD.denseness.numerator == x.length - bD.order + 1);
1120 }