1 /** First and Higher Order Statistics: $(LUCKY Histograms) and $(LUCKY N-grams).
2 
3     Copyright: Per Nordlöw 2018-.
4     License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
5     Authors: $(WEB Per Nordlöw)
6 
7     TODO Replace static ifs with static final switch when Issue 6921 is fixed
8 
9     TODO Remove overloads using std.string:representation and use sparse maps
10     over dchar for strings.
11 */
12 module nxt.ngram;
13 
14 import nxt.static_bitarray;
15 import std.range: InputRange, ElementType, isInputRange;
16 import std.traits: isSomeChar, isUnsigned, isIntegral, isFloatingPoint, Unqual, isStaticArray, isIterable, isAssociativeArray, CommonType;
17 import std.stdint: uint64_t;
18 import std.typecons: Tuple, tuple;
19 import nxt.predicates: allZero, allEqualTo;
20 import nxt.nesses: denseness;
21 import nxt.rational: Rational;
22 // import msgpack;
23 import std.numeric: dotProduct;
24 import std..string: representation;
25 import std.conv: to;
26 
27 // version = print;
28 
29 /** N-Gram Model Kind. */
30 enum Kind
31 {
32     saturated, // Standard Saturated Integers. Saturation is more robust than default integer wrap-around.
33     binary // Binary Gram.
34 }
35 
36 /** N-Gram Model Storage. */
37 enum Storage
38 {
39     denseDynamic,// Dynamically allocated dense storage.
40     denseStatic, // Statically allocated dense storage.
41     sparse, // N-Dimensional/Level D Maps/Dictionaries.
42 }
43 
44 /** N-Gram Model Symmetry. */
45 enum Symmetry
46 {
47     ordered, // Order between elements matters.
48     unordered, // Order of elements is ignored (relaxed). Tuples are sorted before inserted.
49 }
50 
51 /**
52    Computes $(LUCKY N-Gram) Model of Input Range $(D range).
53 
54    If N == 1 this N-gram becomes a standard $(LUCKY Histogram), also called a
55    $(LUCKY Unigram).
56 
57    If Bin/Bucket Type $(D RequestedBinType), requested by user, is $(D void) it
58    will be chosen automatically based on element type of $(D Range).
59 
60    TODO Add optimized put() be precalculating maximum number of elements that
61    can be inserted without any risk overflow.
62 */
63 struct NGram(ValueType,
64              int N = 1,
65              Kind kind = Kind.saturated,
66              Storage storage = Storage.denseDynamic,
67              Symmetry symmetry = Symmetry.ordered,
68              RequestedBinType = void,
69              Range) if (is(RequestedBinType == void) ||
70                         isUnsigned!RequestedBinType ||
71                         isFloatingPoint!RequestedBinType)
72 {
73     this(in Range range) @safe pure nothrow { put(range); }
74 
75     auto ref value() @property @safe pure inout nothrow { return _bins; }
76 
77     alias Q = Rational!ulong;
78 
79     @property static int order() @safe pure nothrow { return N; }
80 
81     enum isBinary = (kind == Kind.binary);
82     enum isDense = (storage == Storage.denseStatic ||
83                     storage == Storage.denseDynamic);
84     enum isSparse = !isDense;
85     enum cacheDeepDenseness = false;
86 
87     enum getStorage = storage;
88 
89     /** Returns: Documentation of the type of $(D this). */
90     string typeName() @property const
91     {
92         string prefix;
93         static      if (N == 1) { prefix = "Uni"; }
94         else static if (N == 2) { prefix = "Bi"; }
95         else static if (N == 3) { prefix = "Tri"; }
96         else static if (N == 4) { prefix = "Qua"; }
97         else static if (N == 5) { prefix = "Pen"; }
98             else                    { prefix = to!string(N) ~ "-"; }
99         string rval;
100         static if (isBinary)
101         {
102             rval ~= "Bit";
103         }
104         else
105         {
106             rval ~= BinType.stringof;
107         }
108         rval ~= "-" ~ prefix ~ "Gram over " ~ ValueType.stringof ~ "s";
109         return rval;
110     }
111 
112     string toString() @property const
113     {
114         string rval = typeName ~ ": ";
115 
116         // print contents
117         static if (isAA)
118         {
119             rval ~= to!string(_bins);
120         }
121         else if (N >= 2 ||
122                  this.denseness < Q(1, 2)) // if less than half of the bins are occupied
123         {
124             // show as sparse
125             rval ~= "[";
126 
127             // TODO Replace these with recursion?
128             static if (N == 1)
129             {
130                 bool begun = false;
131                 foreach (ix, elt; _bins) // TODO Make static_bitarray support ref foreach and make elt const ref
132                 {
133                     if (elt)
134                     {
135                         if (begun) { rval ~= ", "; }
136                         rval ~= "{" ~ to!string(ix) ~ "}#" ~ to!string(elt);
137                         begun = true;
138                     }
139                 }
140             }
141             else static if (N == 2)
142             {
143                 bool begun = false;
144                 foreach (ix0, const ref elt0; _bins)
145                 {
146                     foreach (ix1, elt1; elt0) // TODO Make static_bitarray support ref foreach and make elt1 const ref
147                     {
148                         if (elt1)
149                         {
150                             if (begun) { rval ~= ", "; }
151                             rval ~= ("{" ~
152                                      to!string(ix0) ~ "," ~
153                                      to!string(ix1) ~ "}#" ~
154                                      to!string(elt1));
155                             begun = true;
156                         }
157                     }
158                 }
159             }
160             else static if (N == 3)
161             {
162                 bool begun = false;
163                 foreach (ix0, const ref elt0; _bins)
164                 {
165                     foreach (ix1, const ref elt1; elt0)
166                     {
167                         foreach (ix2, elt2; elt1) { // TODO Make static_bitarray support ref foreach and make elt2 const ref
168                             if (elt2)
169                             {
170                                 if (begun) { rval ~= ", "; }
171                                 rval ~= ("{" ~
172                                          to!string(ix0) ~ "," ~
173                                          to!string(ix1) ~ "," ~
174                                          to!string(ix2) ~ "}#" ~
175                                          to!string(elt2));
176                                 begun = true;
177                             }
178                         }
179                     }
180                 }
181             }
182             else static if (N == 4)
183             {
184                 bool begun = false;
185                 foreach (ix0, const ref elt0; _bins)
186                 {
187                     foreach (ix1, const ref elt1; elt0)
188                     {
189                         foreach (ix2, elt2; elt1)
190                         {
191                             foreach (ix3, elt3; elt2) // TODO Make static_bitarray support ref foreach and make elt2 const ref
192                             {
193                                 if (elt3)
194                                 {
195                                     if (begun) { rval ~= ", "; }
196                                     rval ~= ("{" ~
197                                              to!string(ix0) ~ "," ~
198                                              to!string(ix1) ~ "," ~
199                                              to!string(ix2) ~ "," ~
200                                              to!string(ix3) ~ "}#" ~
201                                              to!string(elt3));
202                                     begun = true;
203                                 }
204                             }
205                         }
206                     }
207                 }
208             }
209             else static if (N == 5)
210             {
211                 bool begun = false;
212                 foreach (ix0, const ref elt0; _bins)
213                 {
214                     foreach (ix1, const ref elt1; elt0)
215                     {
216                         foreach (ix2, const ref elt2; elt1)
217                         {
218                             foreach (ix3, const ref elt3; elt2) // TODO Make static_bitarray support ref foreach and make elt2 const ref
219                             {
220                                 foreach (ix4, elt4; elt3) // TODO Make static_bitarray support ref foreach and make elt2 const ref
221                                 {
222                                     if (elt4)
223                                     {
224                                         if (begun) { rval ~= ", "; }
225                                         rval ~= ("{" ~
226                                                  to!string(ix0) ~ "," ~
227                                                  to!string(ix1) ~ "," ~
228                                                  to!string(ix2) ~ "," ~
229                                                  to!string(ix3) ~ "," ~
230                                                  to!string(ix4) ~ "}#" ~
231                                                  to!string(elt4));
232                                         begun = true;
233                                     }
234                                 }
235                             }
236                         }
237                     }
238                 }
239             }
240             else
241             {
242                 static assert(0, "N >= 5 not supported");
243             }
244             rval ~= "]";
245         }
246         else
247         {
248             rval ~= to!string(_bins); // default
249         }
250 
251         static if (N == 1)
252         {
253             rval ~= " denseness:" ~ to!string(denseness);
254         }
255         else
256         {
257             rval ~= (" shallowDenseness:" ~ to!string(denseness(0)) ~
258                      " deepDenseness:" ~ to!string(denseness(-1)));
259         }
260 
261         return rval;
262     }
263 
264     void reset() @safe nothrow
265     {
266         import nxt.algorithm_ex: reset;
267         _bins.reset();
268     }
269     // alias reset = clear;
270 
271     bool empty() const pure @property @trusted nothrow
272     {
273         static if (isAA)
274         {
275             return _bins.length == 0;
276         }
277         else
278         {
279             return _bins.allZero;
280         }
281     }
282 
283     static if (N >= 2 &&
284                storage != Storage.sparse)
285     {
286         Q shallowDenseness() const pure @property @trusted nothrow
287         {
288             static if (N == 2)
289             {
290                 ulong x = 0;
291                 foreach (const ref elt0; 0..combE) // every possible first ngram element
292                 {
293                     bool used = false;
294                     foreach (elt1; 0..combE) // every possible second ngram element
295                     {
296                         if (_bins[elt0][elt1]) // if any bit is used
297                         {
298                             used = true; // we flag it and exit
299                             break;
300                         }
301                     }
302                     if (used) ++x;
303                 }
304                 return Q(x, combE);
305             }
306             else
307             {
308                 return Q(1, 1);
309             }
310         }
311     }
312 
313     typeof(this) opAssign(RhsRange)(in NGram!(ValueType, N, kind, storage, symmetry, RequestedBinType, RhsRange) rhs)
314     if (isInputRange!RhsRange) // if (is(Unqual!Range == Unqual!RhsRange))
315     {
316         static if      (storage == Storage.denseDynamic)
317             _bins = rhs._bins.dup;
318         else static if (storage == Storage.sparse)
319             _bins = cast(BinType[ValueType[N]])(rhs._bins.dup); // TODO How can we prevent this cast?
320         else static if (storage == Storage.denseStatic)
321             _bins = rhs._bins;
322         else
323             static assert(0, "Cannot handle storage of type " ~ to!string(storage));
324         return this;
325     }
326 
327     static if (N == 1)
328     {
329         void normalize() @safe pure /* nothrow */
330         {
331             static if (kind != Kind.binary)
332             {
333                 static if (isFloatingPoint!ValueType)
334                 {
335                     immutable scaleFactor = (cast(real)1) / _bins.length; // precalc scaling
336                 }
337                 foreach (ref elt; _bins) // TODO Why can this throw for associative arrays?
338                 {
339                     static if (isIntegral!ValueType ||
340                                isSomeChar!ValueType)
341                     {
342                         elt /= 2; // drop least significant bit
343                     }
344                     else static if (isFloatingPoint!ValueType)
345                     {
346                         elt =* scaleFactor;
347                     }
348                 }
349             }
350         }
351     }
352 
353     /** Invalidate Local Statistics. */
354     void invalidateStats() @safe nothrow
355     {
356         static if (isAA && cacheDeepDenseness) { _deepDenseness = 0; }
357     }
358 
359     /** Saturated Increment $(D x) by one. */
360     ref T incs(T)(ref T x) @safe nothrow if (isIntegral!T) { if (x != T.max) { ++x; invalidateStats(); } return x; }
361     /** Saturated Increment $(D x) by one. */
362     ref T incs(T)(ref T x) @safe nothrow if (isFloatingPoint!T) { invalidateStats(); return ++x; }
363 
364     static if (kind == Kind.saturated)
365     {
366         /** Increase bin of $(D ng).
367             Bin of ng must already be allocated if its stored in a map.
368         */
369         ref NGram inc(ValueType[N] ng) @safe pure nothrow
370         {
371             static if (isAA)
372             {
373                 incs(_bins[ng]); // just one case!
374             }
375             else
376             {
377                 static        if (N == 1)
378                 {
379                     static if (storage == Storage.denseDynamic)
380                     {
381                         if (!_bins) { _bins = new BinType[noABins]; }
382                     }
383                     incs(_bins[ng[0]]);
384                 }
385                 else static if (N == 2)
386                 {
387                     static if (storage == Storage.denseDynamic)
388                     {
389                         if (!_bins) { _bins = new BinType[][noABins]; }
390                         if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[noABins]; }
391                     }
392                     incs(_bins[ng[0]][ng[1]]);
393                 }
394                 else static if (N == 3)
395                 {
396                     static if (storage == Storage.denseDynamic)
397                     {
398                         if (!_bins) { _bins = new BinType[][][noABins]; }
399                         if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][noABins]; }
400                         if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[noABins]; }
401                     }
402                     incs(_bins[ng[0]][ng[1]][ng[2]]);
403                 }
404                 else static if (N == 4)
405                 {
406                     static if (storage == Storage.denseDynamic)
407                     {
408                         if (!_bins) { _bins = new BinType[][][][noABins]; }
409                         if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][][noABins]; }
410                         if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[][noABins]; }
411                         if (!_bins[ng[0]][ng[1]][ng[2]]) { _bins[ng[0]][ng[1]][ng[2]] = new BinType[noABins]; }
412                     }
413                     incs(_bins[ng[0]][ng[1]][ng[2]][ng[3]]);
414                 }
415                 else static if (N == 5)
416                 {
417                     static if (storage == Storage.denseDynamic)
418                     {
419                         if (!_bins) { _bins = new BinType[][][][][noABins]; }
420                         if (!_bins[ng[0]]) { _bins[ng[0]] = new BinType[][][][noABins]; }
421                         if (!_bins[ng[0]][ng[1]]) { _bins[ng[0]][ng[1]] = new BinType[][][noABins]; }
422                         if (!_bins[ng[0]][ng[1]][ng[2]]) { _bins[ng[0]][ng[1]][ng[2]] = new BinType[][noABins]; }
423                         if (!_bins[ng[0]][ng[1]][ng[2]][ng[3]]) { _bins[ng[0]][ng[1]][ng[2]][ng[3]] = new BinType[noABins]; }
424                     }
425                     incs(_bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]);
426                 }
427                 else
428                 {
429                     static assert(0, "N >= 6 not supported");
430                 }
431             }
432             return this;
433         }
434     }
435 
436     /** Returns: Bin Count of NGram $(D ng). */
437     BinType opIndex(T)(in T ng) const @safe pure nothrow if (isIntegral!T ||
438                                                              isStaticArray!T)
439     {
440         static if (isAA)
441         {
442             if (ng in _bins)
443             {
444                 return _bins[ng];
445             }
446             else
447             {
448                 return 0; // empty means zero bin
449             }
450         }
451         else
452         {
453             static      if (N == 1) { return _bins[ng]; }
454             else static if (N == 2) { return _bins[ng[0]][ng[1]]; }
455             else static if (N == 3) { return _bins[ng[0]][ng[1]][ng[2]]; }
456             else static if (N == 4) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]]; }
457             else static if (N == 5) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]; }
458             else { static assert(0, "N >= 6 not supported"); }
459         }
460     }
461 
462     /* ref BinType opIndexAssign(Index)(BinType b, Index i) @trusted pure nothrow if (isIntegral!Index) in { */
463 
464     /* } */
465 
466     /** Returns: Bin Count of NGram $(D ng). */
467     auto opIndexAssign(T)(in T ng) const @safe pure nothrow if (isIntegral!T ||
468                                                                 isStaticArray!T)
469     {
470         static if (isAA)
471         {
472             if (ng in _bins)
473             {
474                 return _bins[ng];
475             }
476             else
477             {
478                 return 0; // empty means zero bin
479             }
480         }
481         else
482         {
483             static      if (N == 1) { return _bins[ng]; }
484             else static if (N == 2) { return _bins[ng[0]][ng[1]]; }
485             else static if (N == 3) { return _bins[ng[0]][ng[1]][ng[2]]; }
486             else static if (N == 4) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]]; }
487             else static if (N == 5) { return _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]]; }
488             else { static assert(0, "N >= 6 not supported"); }
489         }
490     }
491 
492     /** Put NGram Element $(D ng) in $(D this).
493      */
494     ref NGram put(ValueType[N] ng) @safe pure nothrow
495     {
496         static if (isBinary)
497         {
498             static      if (N == 1) { _bins[ng[0]] = true; }
499             else static if (N == 2) { _bins[ng[0]][ng[1]] = true; }
500             else static if (N == 3) { _bins[ng[0]][ng[1]][ng[2]] = true; }
501             else static if (N == 4) { _bins[ng[0]][ng[1]][ng[2]][ng[3]] = true; }
502             else static if (N == 5) { _bins[ng[0]][ng[1]][ng[2]][ng[3]][ng[4]] = true; }
503             else { static assert(0, "N >= 6 not supported"); }
504         }
505         else
506         {
507             static if (isAA)
508             {
509                 if (ng in _bins)
510                 {
511                     inc(ng); // increase bin
512                 }
513                 else
514                 {
515                     _bins[ng] = 1; // initial bin count
516                 }
517             }
518             else
519             {
520                 inc(ng);
521             }
522         }
523         invalidateStats();
524         return this;
525     }
526 
527     /** Put NGram Elements $(D range) in $(D this).
528      */
529     ref NGram put(T)(in T range) @safe pure nothrow if (isIterable!T)
530     {
531         static if (N == 1)
532         {
533             foreach (ix, const ref elt; range)
534             {
535                 put([elt]);
536             }
537         }
538         else static if (N == 2)
539         {
540             ValueType prev; // previous element
541             foreach (ix, const ref elt; range)
542             {
543                 if (ix >= N-1) { put([prev, elt]); }
544                 prev = elt;
545             }
546         }
547         else static if (N == 3)
548         {
549             ValueType pprev, prev; // previous elements
550             foreach (ix, const ref elt; range)
551             {
552                 if (ix >= N-1) { put([pprev, prev, elt]); }
553                 pprev = prev;
554                 prev = elt;
555             }
556         }
557         else static if (N == 4)
558         {
559             ValueType ppprev, pprev, prev; // previous elements
560             foreach (ix, const ref elt; range)
561             {
562                 if (ix >= N-1) { put([ppprev, pprev, prev, elt]); }
563                 ppprev = pprev;
564                 pprev = prev;
565                 prev = elt;
566             }
567         }
568         else static if (N == 5)
569         {
570             ValueType pppprev, ppprev, pprev, prev; // previous elements
571             foreach (ix, const ref elt; range)
572             {
573                 if (ix >= N-1) { put([pppprev, ppprev, pprev, prev, elt]); }
574                 pppprev = ppprev;
575                 ppprev = pprev;
576                 pprev = prev;
577                 prev = elt;
578             }
579         }
580         return this;
581     }
582 
583     /** Scalar (Dot) Product Match $(D this) with $(D rhs). */
584     CommonType!(ulong, BinType) matchDenser(rhsValueType,
585                                             Kind rhsKind = Kind.saturated,
586                                             Storage rhsStorage = Storage.denseDynamic,
587                                             RhsRange)(in NGram!(rhsValueType,
588                                                                 N,
589                                                                 rhsKind,
590                                                                 rhsStorage,
591                                                                 symmetry,
592                                                                 RequestedBinType,
593                                                                 RhsRange) rhs) const @trusted pure /* nothrow */
594     {
595         static if (this.isDense && rhs.isDense)
596         {
597             static if (N == 1) return dotProduct(this._bins, rhs._bins);
598             else static assert(0, "N >= " ~ to!string(N) ~ " not supported");
599         }
600         else static if (this.isSparse)
601         {
602             typeof(return) sum = 0;
603             foreach (ix, const ref bin; _bins) // assume lhs is sparsest
604             {
605                 sum += bin*rhs[ix];
606             }
607             return sum;
608         }
609         else static if (rhs.isSparse)
610         {
611             typeof(return) sum = 0;
612             foreach (ix, const ref bin; rhs._bins) // assume lhs is sparsest
613             {
614                 sum += this[ix]*bin;
615             }
616             return sum;
617         }
618         else
619         {
620             static assert(0, "Combination of " ~ typeof(storage).stringof ~ " and " ~
621                           typeof(rhsStorage).stringof ~ " not supported");
622             return 0;
623         }
624     }
625 
626     auto opBinary(string op,
627                   string file = __FILE__, int line = __LINE__)(in NGram rhs) const @trusted pure /* nothrow */
628     {
629         NGram tmp;
630         static if (this.isSparse ||
631                    rhs.isSparse)
632         {
633             void doIt(in NGram a, in NGram b) // more functional than orderInPlace
634             {
635                 foreach (ix, const ref bin; a._bins)
636                 {
637                     if (ix in b._bins)
638                     {
639                         mixin("tmp._bins[ix] = bin " ~ op ~ " b._bins[ix];"); // propagate pointwise operation
640                     }
641                 }
642             }
643             if (this.length < rhs.length) { doIt(this, rhs); }
644             else                          { doIt(rhs, this); }
645         }
646         else static if (this.isBinary &&
647                         rhs.isBinary)
648         {
649             mixin("tmp._bins = _bins " ~ op ~ "rhs._bins;"); // bitsets cannot be sliced (yet)
650         }
651         else static if (this.isDense &&
652                         rhs.isDense &&
653                         N == 1)
654         {
655             assert(this.length == rhs.length);
656             import std.range: zip;
657             import std.algorithm: map;
658             import std.array: array;
659             tmp._bins = zip(this._bins[], rhs._bins[]).map!(a => mixin("a[0] " ~ op ~ "a[1]"))().array; // TODO Use zipWith when Issue 8715 is fixed
660             /* foreach (ix, let; this._bins) { // TODO Reuse Phobos algorithm or range */
661             /*     mixin("tmp[ix] = _bins[ix] " ~ op ~ "rhs._bins[ix];"); */
662             /* } */
663         }
664         else
665         {
666             static assert(0, "Combination of " ~ to!string(this.getStorage) ~ " and " ~
667                           to!string(rhs.getStorage) ~ " and N == " ~ to!string(N) ~ " not supported");
668         }
669         return tmp;
670     }
671 
672     /** Determine Bin (Counter) Type. */
673     static if (isBinary)
674     {
675         alias BinType = bool;
676     }
677     else static if (is(RequestedBinType == void))
678     {
679         alias BinType = uint64_t;   // Bin (counter) type. Count long by default.
680     }
681     else
682     {
683         alias BinType = RequestedBinType;
684     }
685 
686     enum bitsE = 8 * ValueType.sizeof; /** Number of Bits in ValueType (element). TODO This may have to be fixed for ref types. */
687     enum combE = 2^^bitsE;  /** Number of combinations in element. */
688     enum noABins = combE; // Number of bins per array dimension
689     enum noBins = combE^^N; /** Maximum number of bins (possible). */
690 
691     // Determine storage structure base on element type.
692     static if (storage == Storage.sparse)
693     {
694         BinType[ValueType[N]] _bins;
695     }
696     else static if (isBinary &&
697                     N*bitsE <= 32) // safely fits on stack
698     {
699         static if        (storage == Storage.denseStatic)
700         {
701             static      if (N == 1) StaticBitArray!noABins _bins;
702             else static if (N == 2) StaticBitArray!noABins[noABins] _bins;
703             else static if (N == 3) StaticBitArray!noABins[noABins][noABins] _bins;
704             else static assert(0, "Dense static N >= 4 does no safely fit on stack");
705         }
706         else static if (storage == Storage.denseDynamic)
707         {
708             static      if (N == 1) StaticBitArray!noABins _bins;
709             else static if (N == 2) StaticBitArray!noABins[] _bins;
710             else static if (N == 3) StaticBitArray!noABins[][] _bins;
711             else static if (N == 4) StaticBitArray!noABins[][][] _bins;
712             else static if (N == 5) StaticBitArray!noABins[][][][] _bins;
713             else static assert(0, "N >= 6 not supported");
714         }
715         else static if (storage == Storage.sparse) // shouldn't happen
716         {
717         }
718     }
719     else static if (storage == Storage.denseStatic &&
720                     (isUnsigned!ValueType ||
721                      isSomeChar!ValueType) &&
722                     N*bitsE <= 16) // safely fits on stack
723     {
724         static      if (N == 1) BinType[noABins] _bins;
725         else static if (N == 2) BinType[noABins][noABins] _bins;
726         else static assert(0, "Dense static N >= 3 does not safely fit on stack");
727     }
728     else static if (storage == Storage.denseDynamic &&
729                     (isUnsigned!ValueType ||
730                      isSomeChar!ValueType)) // safely fits on stack
731     {
732         static      if (N == 1) BinType[] _bins;
733         else static if (N == 2) BinType[][] _bins;
734         else static if (N == 3) BinType[][][] _bins;
735         else static if (N == 4) BinType[][][][] _bins;
736         else static if (N == 5) BinType[][][][][] _bins;
737         else static assert(0, "N >= 6 not supported");
738     }
739     else
740     {
741         BinType[ValueType[N]] _bins;
742     }
743 
744     private alias _bins this;
745 
746     enum isAA = isAssociativeArray!(typeof(_bins));
747 
748     static if (isAA && cacheDeepDenseness)
749     {
750         ulong _deepDenseness;    // Cache deep denseness in associative arrays
751     }
752 
753     Q densenessUncached(int depth = -1) const pure @property @trusted nothrow
754     {
755         static if (isBinary)
756         {
757             static if (N >= 2)
758             {
759                 if (N == 2 && depth == 0)  // if shallow wanted
760                 {
761                     return shallowDenseness();
762                 }
763             }
764             return _bins.denseness;
765         }
766         else
767         {
768             static if (isAA)
769             {
770                 return Q(_bins.length, noBins);
771             }
772             else
773             {
774                 return _bins.denseness(depth);
775             }
776         }
777     }
778 
779     /** Returns: Number of Non-Zero Elements in $(D range). */
780     Q denseness(int depth = -1) const pure @property @safe nothrow
781     {
782         static if (isAA && cacheDeepDenseness)
783         {
784             if (depth == -1)  // -1 means deepDenseness
785             {
786                 if (!_deepDenseness)  // if not yet defined.
787                 {
788                     unqual(this).densenessUncached(depth).numerator; // calculate it
789                 }
790                 return Q(_deepDenseness, noBins);
791             }
792         }
793         return densenessUncached(depth);
794     }
795 }
796 
797 /**
798    Computes $(LUCKY Bi-Gram) of $(D range).
799 */
800 auto ngram(int N = 2,
801            Kind kind = Kind.saturated,
802            Storage storage = Storage.denseDynamic,
803            Symmetry symmetry = Symmetry.ordered,
804            RequestedBinType = void,
805            Range)(in Range range) @safe pure nothrow if (isIterable!Range)
806 {
807     return NGram!(Unqual!(ElementType!Range), N,
808                   kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
809 }
810 auto sparseUIntNGramOverRepresentation(int N = 2,
811                                        Kind kind = Kind.saturated,
812                                        Symmetry symmetry = Symmetry.ordered)(in string range) @safe pure nothrow
813 {
814     auto y = range.representation;
815     return ngram!(N, kind, Storage.sparse, symmetry, uint)(y);
816 }
817 
818 auto histogram(Kind kind = Kind.saturated,
819                Storage storage = Storage.denseDynamic,
820                Symmetry symmetry = Symmetry.ordered,
821                RequestedBinType = void,
822                Range)(in Range range) @safe pure nothrow if (isIterable!Range)
823 {
824     return NGram!(Unqual!(ElementType!Range), 1, kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
825 }
826 alias Hist = NGram;
827 alias hist = histogram;
828 alias unigram = histogram;
829 
830 unittest
831 {
832     const ubyte[] x = [1, 2, 3, 4, 5];
833     const h = x.histogram!(Kind.saturated, Storage.denseStatic);
834     const hh = h + h;
835 
836     auto h2 = x.histogram!(Kind.saturated, Storage.denseStatic);
837     h2.put(x);
838     assert(hh == h2);
839 }
840 
841 unittest
842 {
843     const ubyte[] x;
844     const hDS = x.histogram!(Kind.saturated, Storage.denseStatic);  assert(hDS.empty);
845 
846     const hDD = x.histogram!(Kind.saturated, Storage.denseDynamic); assert(hDD.empty);
847 
848     auto h = x.histogram!(Kind.saturated, Storage.sparse); assert(h.empty);
849     const ubyte[5] x1 = [1, 2, 3, 4, 5];
850     h.put(x1); assert(!h.empty);
851     assert(h.matchDenser(h) == x1.length);
852     auto hh = h + h;
853     assert(hh.matchDenser(hh) == 4*x1.length);
854 
855     h.destroy(); assert(h.empty);
856 
857     auto hU32 = x.histogram!(Kind.saturated, Storage.sparse, Symmetry.ordered, uint);
858 }
859 
860 unittest
861 {
862     alias ValueType = ubyte;
863     const ValueType[3] x = [11, 12, 13];
864 
865     auto h = x.histogram!(Kind.saturated, Storage.denseStatic);
866     assert(h[0] == 0);
867     assert(h[11] == 1 && h[12] == 1 && h[13] == 1);
868     assert(3 == h.matchDenser(h));
869 
870     h.put(x);
871     assert(h[11] == 2 && h[12] == 2 && h[13] == 2);
872     assert(12 == h.matchDenser(h));
873 
874     version(print) dbg(h);
875 
876     auto hD = x.histogram!(Kind.saturated, Storage.denseDynamic);
877     version(print) dbg(hD);
878 }
879 
880 unittest
881 {
882     alias ValueType = ulong;
883     const ValueType[3] x = [1, 2, 3];
884     auto h = x.histogram!(Kind.saturated, Storage.denseStatic);
885     h.put([4, 5, 6]);
886     assert(h.length == 6);
887 }
888 
889 /**
890    Computes $(LUCKY Binary Histogram) of $(D range).
891 */
892 auto bistogram(Range)(in Range range) @safe pure nothrow if (isIterable!Range)
893 
894 {
895     return NGram!(Unqual!(ElementType!Range), 1,
896                   Kind.binary,
897                   Storage.denseStatic,
898                   Symmetry.ordered,
899                   void,
900                   Unqual!Range)(range);
901 }
902 alias bunigram = bistogram;
903 
904 unittest
905 {
906     const ubyte[] x;
907     const b = x.bistogram;
908     assert(b.empty);
909 
910     // test const unqualification
911     NGram!(ubyte, 1, Kind.binary, Storage.denseStatic, Symmetry.ordered, void, const(ubyte)[]) cb;
912     cb = b;
913     assert(cb == b);
914 
915     // test immutable unqualification
916     NGram!(ubyte, 1, Kind.binary, Storage.denseStatic, Symmetry.ordered, void, immutable(ubyte)[]) ib;
917     ib = b;
918     assert(ib == b);
919 }
920 
921 unittest
922 {
923     const ubyte[3] x = [1, 2, 3];
924     const h_ = x.bistogram;
925     assert(8*h_.sizeof == 2^^8);
926     const h = h_;
927     assert(h[1] && h[2] && h[3]);
928     assert(!h[0]);
929     assert(!h[4]);
930 
931     const h2_ = h_;
932     assert((h2_ & h_) == h2_);
933     assert((h2_ | h_) == h2_);
934     assert((h2_.value & h_.value) == h2_.value);
935     assert((h2_.value | h_.value) == h2_.value);
936 }
937 unittest
938 {
939     const ushort[3] x = [1, 2, 3];
940     const h_ = x.bistogram;
941     assert(8*h_.sizeof == 2^^16);
942     const h = h_;
943     assert(h[1] && h[2] && h[3]);
944     assert(!h[0]);
945     assert(!h[4]);
946 }
947 
948 /**
949    Computes $(LUCKY Binary (Occurrence) NGram) of Bytes in Input String $(D x).
950 */
951 auto bistogramOverRepresentation(in string x) pure nothrow
952 
953 {
954     return bistogram(x.representation);
955 }
956 unittest
957 {
958     const x = "abcdef";
959     auto h = x.bistogramOverRepresentation;
960     size_t ix = 0;
961     foreach (const bin; h)
962     {
963         if (ix >= cast(ubyte)'a' &&
964             ix <= cast(ubyte)'f')
965         {
966             assert(bin);
967         }
968         else
969         {
970             assert(!bin);
971         }
972         ++ix;
973     }
974     version(print) dbg(h);
975 }
976 
977 /**
978    Computes $(LUCKY Bi-Gram) of $(D range).
979 */
980 auto bigram(Kind kind = Kind.saturated,
981             Storage storage = Storage.denseDynamic,
982             Symmetry symmetry = Symmetry.ordered,
983             RequestedBinType = void,
984             Range)(in Range range) @safe pure nothrow if (isIterable!Range)
985 {
986     return NGram!(Unqual!(ElementType!Range), 2,
987                   kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
988 }
989 auto sparseUIntBigram(Kind kind = Kind.saturated,
990                       Symmetry symmetry = Symmetry.ordered,
991                       Range)(in Range range) @safe pure nothrow if (isIterable!Range)
992 {
993     return NGram!(Unqual!(ElementType!Range), 2,
994                   kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
995 }
996 
997 unittest
998 {
999     const ubyte[] x = [1, 2, 3, 3, 3, 4, 4, 5];
1000 
1001     auto bSp = x.bigram!(Kind.saturated, Storage.sparse);
1002     alias SparseNGram = Unqual!(typeof(bSp));
1003 
1004     // check msgpacking
1005     // auto bSPBytes = bSp.pack();
1006     // SparseNGram bSp_;
1007     // bSPBytes.unpack(bSp_);
1008     // assert(bSp == bSp_);
1009 
1010     auto bS = x.bigram!(Kind.saturated, Storage.denseStatic);
1011     const bSCopy = bS;
1012     bS.put(x);
1013     assert(bS != bSCopy);
1014 
1015     const bD = x.bigram!(Kind.saturated, Storage.denseDynamic);
1016     version(print) dbg(bD);
1017 
1018     const bb = x.bigram!(Kind.binary, Storage.denseStatic);
1019     version(print) dbg(typeof(bb._bins).stringof);
1020 
1021     assert(bb.denseness(0).numerator == 4);
1022     // assert(bb.denseness(-1).numerator == 6);
1023 }
1024 unittest
1025 {
1026     const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1027 
1028     const bS = x.bigram!(Kind.saturated, Storage.denseStatic);
1029     version(print) dbg(bS);
1030     assert(bS.denseness.numerator == x.length - bS.order + 1);
1031 
1032     const bB = x.bigram!(Kind.binary, Storage.denseStatic);
1033     version(print) dbg(bB);
1034 }
1035 
1036 unittest
1037 {
1038     const x = [1, 2, 3, 4, 5];
1039     auto h = x.bigram!(Kind.saturated, Storage.sparse);
1040     assert(h.matchDenser(h) == x.length - 1);
1041 }
1042 
1043 /**
1044    Computes $(LUCKY Tri-Gram) of $(D range).
1045 */
1046 auto trigram(Kind kind = Kind.saturated,
1047              Storage storage = Storage.denseDynamic,
1048              Symmetry symmetry = Symmetry.ordered,
1049              RequestedBinType = void,
1050              Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1051 {
1052     return NGram!(Unqual!(ElementType!Range), 3,
1053                   kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1054 }
1055 auto sparseUIntTrigram(Kind kind = Kind.saturated,
1056                       Symmetry symmetry = Symmetry.ordered,
1057                       Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1058 {
1059     return NGram!(Unqual!(ElementType!Range), 3,
1060                   kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
1061 }
1062 unittest
1063 {
1064     const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1065 
1066     const bS = x.trigram!(Kind.saturated, Storage.denseStatic);
1067     version(print) dbg(bS);
1068     assert(bS.denseness.numerator == x.length - bS.order + 1);
1069 
1070     const bD = x.trigram!(Kind.saturated, Storage.denseDynamic);
1071     version(print) dbg(bD);
1072 
1073     const bB = x.trigram!(Kind.binary, Storage.denseStatic);
1074     /* version(print) dbg(bB); */
1075 }
1076 
1077 /**
1078    Computes $(LUCKY Qua-Gram) of $(D range).
1079 */
1080 auto quagram(Kind kind = Kind.saturated,
1081              Storage storage = Storage.denseDynamic,
1082              Symmetry symmetry = Symmetry.ordered,
1083              RequestedBinType = void,
1084              Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1085 {
1086     return NGram!(Unqual!(ElementType!Range), 4,
1087                   kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1088 }
1089 auto sparseUIntQuagram(Kind kind = Kind.saturated,
1090                        Symmetry symmetry = Symmetry.ordered,
1091                        Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1092 {
1093     return NGram!(Unqual!(ElementType!Range), 4,
1094                   kind, Storage.sparse, symmetry, uint, Unqual!(Range))(range);
1095 }
1096 unittest
1097 {
1098     const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1099 
1100     const bD = x.quagram!(Kind.saturated, Storage.denseDynamic);
1101     version(print) dbg(bD);
1102     assert(bD.denseness.numerator == x.length - bD.order + 1);
1103 }
1104 auto sparseUIntQuagramOverRepresentation(in string x) pure nothrow
1105 {
1106     return sparseUIntQuagram(x.representation);
1107 }
1108 
1109 /**
1110    Computes $(LUCKY Pen-Gram) of $(D range).
1111 */
1112 auto pengram(Kind kind = Kind.saturated,
1113              Storage storage = Storage.denseDynamic,
1114              Symmetry symmetry = Symmetry.ordered,
1115              RequestedBinType = void,
1116              Range)(in Range range) @safe pure nothrow if (isIterable!Range)
1117 {
1118     return NGram!(Unqual!(ElementType!Range), 5,
1119                   kind, storage, symmetry, RequestedBinType, Unqual!(Range))(range);
1120 }
1121 unittest
1122 {
1123     const ubyte[] x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
1124 
1125     const bS = x.pengram!(Kind.saturated, Storage.sparse);
1126     const bS_ = bS;
1127     assert(bS_ == bS);
1128 
1129     // skipping denseStatic because it doesn't fit stack anyway
1130 
1131     const bD = x.pengram!(Kind.saturated, Storage.denseDynamic);
1132     version(print) dbg(bD);
1133     assert(bD.denseness.numerator == x.length - bD.order + 1);
1134 }