1 /** 2 * FASTQ is a format for storing DNA sequences together with the associated 3 * quality information often encoded in ascii characters. It is typically made 4 * of 4 lines for example 2 fastq entries would look like this. 5 * 6 * @seq1 7 * TTATTTTAAT 8 * + 9 * ?+BBB/DHH@ 10 * @seq2 11 * GACCCTTTGCA 12 * + 13 * ?+BHB/DIH@ 14 * 15 * See_Also: https://en.wikipedia.org/wiki/FASTQ_format 16 * See_Also: http://forum.dlang.org/post/nd01qd$2k8c$1@digitalmars.com 17 */ 18 module fastq; 19 20 pure nothrow @safe @nogc: 21 22 struct FastQRecord 23 { 24 /+ TODO: `inout` support like in `rdf.d` +/ 25 const(char)[] sequenceId; 26 const(char)[] sequenceLetters; 27 const(char)[] quality; 28 29 static auto parse(const(char)[] from) 30 { 31 static struct Result 32 { 33 @safe pure nothrow: 34 private 35 { 36 const(char)[] source; 37 FastQRecord value; 38 bool isEmpty; 39 } 40 41 this(const(char)[] source) 42 { 43 this.source = source; 44 popFront; 45 } 46 47 @property 48 { 49 FastQRecord front() 50 { 51 return value; 52 } 53 54 bool empty() 55 { 56 return isEmpty; 57 } 58 } 59 60 void popFront() 61 { 62 import std.string : indexOf; 63 64 if (source is null) 65 { 66 isEmpty = true; 67 return; 68 } 69 70 void tidyInput() 71 { 72 foreach(i, c; source) 73 { 74 switch(c) 75 { 76 case 0: .. case ' ': 77 break; 78 default: 79 source = source[i .. $]; 80 return; 81 } 82 } 83 84 source = null; 85 } 86 87 tidyInput(); 88 89 if (source is null) 90 return; 91 92 // sequenceId 93 94 assert(source[0] == '@'); 95 96 ptrdiff_t len = source.indexOf("\n"); 97 assert(len > 0); 98 99 value.sequenceId = source[1 .. len]; 100 if (value.sequenceId[$-1] == "\r"[0]) 101 value.sequenceId = value.sequenceId[0 .. $-1]; 102 103 source = source[len + 1 .. $]; 104 105 // sequenceLetters 106 107 len = source.indexOf("\n"); 108 assert(len > 0); 109 110 value.sequenceLetters = source[0 .. len]; 111 if (value.sequenceLetters[$-1] == "\r"[0]) 112 value.sequenceLetters = value.sequenceLetters[0 .. $-1]; 113 114 source = source[len + 1 .. $]; 115 116 // +sequenceId 117 118 len = source.indexOf("\n"); 119 assert(len > 0); 120 source = source[len + 1 .. $]; 121 122 // quality 123 124 len = source.indexOf("\n"); 125 assert(len > 0); 126 127 value.quality = source[0 .. len]; 128 if (value.quality[$-1] == "\r"[0]) 129 value.quality = value.quality[0 .. $-1]; 130 131 if (source.length > len + 1) 132 { 133 source = source[len + 1 .. $]; 134 tidyInput(); 135 } else 136 source = null; 137 } 138 } 139 140 return Result(from); 141 } 142 } 143 144 unittest { 145 string input = ` 146 @seq1 147 TTATTTTAAT 148 + 149 ?+BBB/DHH@ 150 @seq2 151 GACCCTTTGCA 152 + 153 ?+BHB/DIH@ 154 @SEQ_ID 155 GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT 156 + 157 !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65 158 `[1 .. $]; 159 assert(equal(FastQRecord.parse(input), 160 [FastQRecord("seq1", "TTATTTTAAT", "?+BBB/DHH@"), 161 FastQRecord("seq2", "GACCCTTTGCA", "?+BHB/DIH@"), 162 FastQRecord("SEQ_ID", "GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT", "!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65")].s[])); 163 } 164 165 version (unittest) 166 { 167 import std.algorithm.comparison : equal; 168 import nxt.array_help : s; 169 }