1 /** Generic Language Constructs. 2 See_Also: https://en.wikipedia.org/wiki/Predicate_(grammar) 3 4 Note that ! and ? are more definite sentence enders than . 5 6 TODO: `isSomeString` => `isStringLike` 7 8 TODO: Use static foreach to add declarations for all isX, for each X 9 10 See_Also: http://forum.dlang.org/thread/mgdtuxkuswfxxoithwxh@forum.dlang.org 11 */ 12 module nxt.lingua; 13 14 import std.traits : isSomeChar, isSomeString; 15 import std.algorithm.comparison : among; 16 import std.algorithm.iteration : uniq; 17 import std.array : array; 18 import std.conv; 19 20 /+ TODO: add overload to std.algorithm.among that takes an immutable array as +/ 21 // argument to prevent calls to aliasSeqOf 22 import std.meta : aliasSeqOf; 23 24 import nxt.iso_639_1: Language; 25 26 @safe pure: 27 28 /** Computer Token Usage. */ 29 enum Usage 30 { 31 unknown, 32 definition, 33 reference, 34 call 35 } 36 37 /// ================ English Articles 38 39 /** English indefinite articles. */ 40 static immutable englishIndefiniteArticles = [`a`, `an`]; 41 42 /** English definite articles. */ 43 static immutable englishDefiniteArticles = [`the`]; 44 45 /** English definite articles. */ 46 static immutable englishArticles = englishIndefiniteArticles ~ englishDefiniteArticles; 47 48 bool isEnglishIndefiniteArticle(S)(in S s) => cast(bool)s.among!(aliasSeqOf!englishIndefiniteArticles); 49 bool isEnglishDefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!englishDefiniteArticles); 50 bool isEnglishArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!englishArticles); 51 52 /// ================ German Articles 53 54 /** German indefinite articles. */ 55 static immutable germanIndefiniteArticles = [`ein`, `eine`, `einer`, `einen`, `einem`, `eines`]; 56 57 /** German definite articles. */ 58 static immutable germanDefiniteArticles = [`der`, `die`, `das`, `den`, `dem`, `des`]; 59 60 /** German definite articles. */ 61 static immutable germanArticles = germanIndefiniteArticles ~ germanDefiniteArticles; 62 63 /** Check if $(D s) is a Vowel. */ 64 bool isGermanIndefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!germanIndefiniteArticles); 65 66 /** Check if $(D s) is a Vowel. */ 67 bool isGermanDefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!germanDefiniteArticles); 68 69 /** Check if $(D s) is a Vowel. */ 70 bool isGermanArticle(S)(in S s) if (isSomeString!C) => cast(bool)s.among!(aliasSeqOf!germanArticles); 71 72 /// ================ Vowels 73 74 /** English vowel type. 75 * See_Also: https://simple.wikipedia.org/wiki/Vowel 76 */ 77 enum EnglishVowel { a, o, u, e, i, y, 78 A, O, U, E, I, Y } 79 80 /** English Vowels. */ 81 static immutable dchar[] englishVowels = ['a', 'o', 'u', 'e', 'i', 'y', 82 'A', 'O', 'U', 'E', 'I', 'Y']; 83 84 /** Check if `c` is a Vowel. */ 85 bool isEnglishVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishVowels); 86 87 /** English Accented Vowels. */ 88 static immutable dchar[] englishAccentedVowels = ['é']; 89 90 /** Check if `c` is an Accented Vowel. */ 91 bool isEnglishAccentedVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishAccentedVowels); 92 93 nothrow @nogc unittest { 94 assert('é'.isEnglishAccentedVowel); 95 } 96 97 /** Swedish Hard Vowels. */ 98 static immutable swedishHardVowels = ['a', 'o', 'u', 'å', 99 'A', 'O', 'U', 'Å']; 100 101 /** Swedish Soft Vowels. */ 102 static immutable swedishSoftVowels = ['e', 'i', 'y', 'ä', 'ö', 103 'E', 'I', 'Y', 'Ä', 'Ö']; 104 105 /** Swedish Vowels. */ 106 static immutable swedishVowels = swedishHardVowels ~ swedishSoftVowels; 107 108 /** Check if `c` is a Swedish Vowel. */ 109 bool isSwedishVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishVowels); 110 111 /** Check if `c` is a Swedish hard vowel. */ 112 bool isSwedishHardVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishHardVowels); 113 114 /** Check if `c` is a Swedish soft vowel. */ 115 bool isSwedishSoftVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishSoftVowels); 116 117 /** Spanish Accented Vowels. */ 118 enum spanishAccentedVowels = ['á', 'é', 'í', 'ó', 'ú', 119 'Á', 'É', 'Í', 'Ó', 'Ú']; 120 121 /** Check if `c` is a Spanish Accented Vowel. */ 122 bool isSpanishAccentedVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!spanishAccentedVowels); 123 124 /** Check if `c` is a Spanish Vowel. */ 125 bool isSpanishVowel(const dchar c) nothrow @nogc => (c.isEnglishVowel || 126 c.isSpanishAccentedVowel); 127 128 nothrow @nogc unittest { 129 assert('é'.isSpanishVowel); 130 } 131 132 /** Check if `c` is a Vowel in language $(D lang). */ 133 bool isVowel(const dchar c, Language lang) nothrow @nogc 134 { 135 switch (lang) 136 { 137 case Language.en: return c.isEnglishVowel; 138 case Language.sv: return c.isSwedishVowel; 139 default: return c.isEnglishVowel; 140 } 141 } 142 143 nothrow @nogc unittest { 144 assert(!'k'.isSwedishVowel); 145 assert('å'.isSwedishVowel); 146 } 147 148 /** English consonant type. 149 * See_Also: https://simple.wikipedia.org/wiki/Consonant 150 */ 151 enum EnglishConsonant { b, c, d, f, g, h, j, k, l, m, n, p, q, r, s, t, v, w, x } 152 153 /** English lowercase consontant characters. */ 154 static immutable dchar[] englishLowerConsonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x']; 155 156 /** English uppercase consontant characters. */ 157 static immutable dchar[] englishUpperConsonants = ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X']; 158 159 /** English consontant characters. */ 160 static immutable dchar[] englishConsonants = englishLowerConsonants ~ englishUpperConsonants; 161 162 /** Check if `c` is a Consonant. */ 163 bool isEnglishConsonant(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishConsonants); 164 alias isSwedishConsonant = isEnglishConsonant; 165 166 nothrow @nogc unittest { 167 assert('k'.isEnglishConsonant); 168 assert(!'å'.isEnglishConsonant); 169 } 170 171 /** English letters. */ 172 static immutable dchar[] englishLetters = englishVowels ~ englishConsonants; 173 174 /** Check if `c` is a letter. */ 175 bool isEnglishLetter(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishLetters); 176 alias isEnglish = isEnglishLetter; 177 178 nothrow @nogc unittest { 179 assert('k'.isEnglishLetter); 180 assert(!'å'.isEnglishLetter); 181 } 182 183 static immutable englishDoubleConsonants = [`bb`, `dd`, `ff`, `gg`, `mm`, `nn`, `pp`, `rr`, `tt`, `ck`, `ft`]; 184 185 /** Check if `s` is an English Double consonant. */ 186 bool isEnglishDoubleConsonant(scope const(char)[] s) nothrow @nogc => cast(bool)s.among!(`bb`, `dd`, `ff`, `gg`, `mm`, `nn`, `pp`, `rr`, `tt`, `ck`, `ft`); 187 188 /** Computer token. */ 189 enum TokenId 190 { 191 unknown, 192 193 keyword, 194 type, 195 constant, 196 comment, 197 variableName, 198 functionName, 199 builtinName, 200 templateName, 201 macroName, 202 aliasName, 203 enumeration, 204 enumerator, 205 constructor, 206 destructors, 207 operator, 208 } 209 210 /** Swedish Verb Inflection (conjugation of a verb). 211 * 212 * See_Also: http://www.101languages.net/swedish/swedish-verb-conjugator/ 213 * See_Also: http://www.verbix.com/webverbix/Swedish/springa.html 214 */ 215 enum SwedishVerbInflection 216 { 217 unknown, 218 } 219 220 /** Verb Form. 221 * 222 * See_Also: http://verb.woxikon.se/sv/springa 223 */ 224 enum VerbForm 225 { 226 unknown, 227 228 imperative, // Swedish example: spring 229 230 infinitive, // sv:infinitiv,grundform. Swedish example: springa 231 base = infinitive, 232 233 presentIndicative, // sv:presens. Swedish example: springer 234 235 presentParticiple, // sv:presens particip. Swedish example: springande 236 gerund = presentParticiple, // Form that functions as a noun. Source: https://en.wikipedia.org/wiki/Gerund 237 238 pastIndicative, // sv:imperfekt. Swedish example: sprang 239 preteritum = pastIndicative, 240 241 supinum, // Swedish example: sprungit 242 pastParticiple = supinum, 243 244 perfekt, // sv:perfekt. Swedish example: har sprungit 245 246 perfektParticiple, // sv:perfekt particip. Swedish example: sprungen 247 248 pluskvamperfekt, // sv:pluskvamperfekt. Swedish example: hade sprungit 249 250 futurum, // Swedish example:ska springa 251 252 futurumExaktum, // Swedish example:ska ha sprungit 253 futurumPerfect = futurumExaktum, 254 255 konditionalisI, // Swedish example:skulle springa 256 257 conditionalPerfect, // Swedish example:skulle ha sprungit 258 konditionalisII = conditionalPerfect, 259 } 260 261 /** Verb Instance. */ 262 struct Verb(S) 263 if (isSomeString!S) 264 { 265 S expr; 266 VerbForm form; 267 alias expr this; 268 } 269 270 /** Subject Count. */ 271 enum Count 272 { 273 unknown, 274 singular, 275 plural, 276 uncountable 277 } 278 279 struct Noun(S) 280 if (isSomeString!S) 281 { 282 S expr; 283 Count count; 284 alias expr this; 285 } 286 287 /** Comparation. 288 * See_Also: https://en.wikipedia.org/wiki/Comparison_(grammar) 289 */ 290 enum Comparation 291 { 292 unknown, 293 positive, 294 comparative, 295 superlative, 296 elative, 297 exzessive 298 } 299 300 struct Adjective(S) 301 if (isSomeString!S) 302 { 303 S expr; 304 Comparation comparation; 305 alias expr this; 306 } 307 308 /** English Tense. 309 * 310 * Same as "tempus" in Swedish. 311 * 312 * See_Also: http://www.ego4u.com/en/cram-up/grammar/tenses-graphic 313 * See_Also: http://www.ego4u.com/en/cram-up/grammar/tenses-examples 314 */ 315 enum Tense 316 { 317 unknown, 318 319 present, presens = present, // sv:nutid 320 past, preteritum = past, imperfekt = past, // sv:dåtid, https://en.wikipedia.org/wiki/Past_tense 321 future, futurum = future, // framtid, https://en.wikipedia.org/wiki/Future_tense 322 323 pastMoment, 324 presentMoment, // sv:plays 325 futureMoment, // [will|is going to|intends to] play 326 327 pastPeriod, 328 presentPeriod, 329 futurePeriod, 330 331 pastResult, 332 presentResult, 333 futureResult, 334 335 pastDuration, 336 presentDuration, 337 futureDuration, 338 } 339 alias Tempus = Tense; 340 341 nothrow @nogc 342 { 343 bool isPast(Tense tense) => cast(bool)tense.among!(Tense.past, Tense.pastMoment, Tense.pastPeriod, Tense.pastResult, Tense.pastDuration); 344 bool isPresent(Tense tense) => cast(bool)tense.among!(Tense.present, Tense.presentMoment, Tense.presentPeriod, Tense.presentResult, Tense.presentDuration); 345 bool isFuture(Tense tense) => cast(bool)tense.among!(Tense.future, Tense.futureMoment, Tense.futurePeriod, Tense.futureResult, Tense.futureDuration); 346 } 347 348 /** Part of a Sentence. */ 349 enum SentencePart 350 { 351 unknown, 352 subject, 353 predicate, 354 adverbial, 355 object, 356 } 357 358 enum Adverbial 359 { 360 unknown, 361 362 manner, // they were playing `happily` (sätts-adverbial in Swedish) 363 364 place, // we met in `London`, `at the beach` 365 space = place, 366 367 time, // they start work `at six thirty` 368 369 probability, // `perhaps` the weather will be fine 370 371 direction, // superman flew `in`, the car drove `out` (förändring av tillstånd in Swedish) 372 location, // are you `in`?, the ball is `out` (oföränderligt tillstånd in Swedish) 373 374 quantifier, // he weighs `63 kilograms` (måtts-adverbial in Swedish) 375 376 comparation, // (grads-adverbial in Swedish) 377 378 cause, // (orsaks-adverbial in Swedish) 379 380 circumstance, // (omständighets-adverbial in Swedish) 381 } 382 383 class Part 384 { 385 } 386 387 // class Predicate : Part 388 // { 389 // } 390 391 /** Article (of noun). 392 * 393 * See_Also: https://en.wikipedia.org/wiki/Article_(grammar) 394 */ 395 enum Article 396 { 397 unknown, ///< Unknown. 398 definite, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Definite_article 399 indefinite, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Indefinite_article 400 proper, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Proper_article 401 partitive, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Partitive_article. 402 negative, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Negative_article 403 zero, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Zero_article 404 } 405 406 class Subject : Part 407 { 408 Article article; 409 } 410 411 static immutable implies = [`in order to`]; 412 413 /** Subject Person. */ 414 enum Person 415 { 416 unknown, 417 first, 418 second, 419 third 420 } 421 422 /** Grammatical Gender. 423 * 424 * Called genus in Swedish. 425 * 426 * See_Also: https://en.wikipedia.org/wiki/Grammatical_gender 427 * See_Also: https://sv.wikipedia.org/wiki/Genus_(k%C3%B6nsbegrepp) 428 */ 429 enum Gender 430 { 431 unknown, 432 433 male, masculine = male, // maskulinum 434 435 female, feminine = female, // femininum 436 437 neutral, neuter = neutral, neutrum = neuter, // non-alive. for example: "något" 438 439 common, utrum = common, reale = utrum, // Present in Swedish. real/alive. for example: "någon" 440 } 441 442 /** (Grammatical) Mood. 443 * 444 * Sometimes also called mode. 445 * 446 * Named modus in Swedish. 447 * 448 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood 449 * See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#mood 450 */ 451 enum Mood 452 { 453 unknown, 454 455 indicative, // indikativ in Swedish. Example: I eat pizza. 456 457 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#subjunctive 458 subjunctive, // Example: if I were to eat more pizza, I would be sick. 459 conjunctive = subjunctive, // konjunktiv in Swedish 460 461 conditional, 462 optative, 463 464 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#imperative 465 imperative, // imperativ in Swedish. Example: eat the pizza! 466 467 jussive, 468 potential, 469 inferential, 470 interrogative, 471 472 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#wh-question 473 whQuestion, // Example: who is eating pizza? 474 475 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#yn-question 476 ynQuestion, // Example: did you eat pizza? 477 } 478 479 /** Check if $(D mood) is a Realis Mood. 480 * 481 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood#Realis_moods 482 */ 483 bool isRealis(Mood mood) @nogc nothrow => cast(bool)mood.among!(Mood.indicative); 484 enum realisMoods = [Mood.indicative]; 485 486 /** Check if $(D mood) is a Irrealis Mood. 487 * 488 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood#Irrealis_moods 489 */ 490 bool isIrrealis(Mood mood) @nogc nothrow => cast(bool)mood.among!(Mood.subjunctive, Mood.conditional, Mood.optative, Mood.imperative, Mood.jussive, Mood.potential, Mood.inferential); 491 492 enum irrealisMoods = [Mood.subjunctive, 493 Mood.conditional, 494 Mood.optative, 495 Mood.imperative, 496 Mood.jussive, 497 Mood.potential, 498 Mood.inferential]; 499 500 /** English Negation Prefixes. 501 * 502 * See_Also: http://www.english-for-students.com/Negative-Prefixes.html 503 */ 504 static immutable englishNegationPrefixes = [ `un`, `non`, `dis`, `im`, `in`, `il`, `ir`, ]; 505 506 static immutable swedishNegationPrefixes = [ `icke`, `o`, ]; 507 508 /** English Noun Suffixes. 509 * 510 * See_Also: http://www.english-for-students.com/Noun-Suffixes.html 511 */ 512 static immutable adjectiveNounSuffixes = [ `ness`, `ity`, `ment`, `ance` ]; 513 static immutable verbNounSuffixes = [ `tion`, `sion`, `ment`, `ence` ]; 514 static immutable nounNounSuffixes = [ `ship`, `hood` ]; 515 static immutable allNounSuffixes = (adjectiveNounSuffixes ~ 516 verbNounSuffixes ~ 517 nounNounSuffixes ~ 518 [ `s`, `ses`, `xes`, `zes`, `ches`, `shes`, `men`, `ies`, ]); 519 520 /** English Verb Suffixes. */ 521 static immutable verbSuffixes = [ `s`, `ies`, `es`, `es`, `ed`, `ed`, `ing`, `ing`, ]; 522 523 /** English Adjective Suffixes. */ 524 static immutable adjectiveSuffixes = [ `er`, `est`, `er`, `est` ]; 525 526 /** English Job/Professin Title Suffixes. 527 * 528 * Typically built from noun or verb bases. 529 * 530 * See_Also: http://www.english-for-students.com/Job-Title-Suffixes.html 531 */ 532 static immutable jobTitleSuffixes = [ `or`, // traitor 533 `er`, // builder 534 `ist`, // typist 535 `an`, // technician 536 `man`, // dustman, barman 537 `woman`, // policewoman 538 `ian`, // optician 539 `person`, // chairperson 540 `sperson`, // spokesperson 541 `ess`, // waitress 542 `ive` // representative 543 ]; 544 545 /** English Linking Verbs in Nominative Form. 546 */ 547 static immutable englishLinkingVerbs = [`is`, `seem`, `look`, `appear to be`, `could be`]; 548 static immutable swedishLinkingVerbs = [`är`, `verkar`, `ser`, `kan vara`]; 549 550 /** English Word Suffixes. */ 551 static immutable wordSuffixes = [ allNounSuffixes ~ verbSuffixes ~ adjectiveSuffixes ].uniq.array; 552 553 /** Return string $(D word) in plural optionally in $(D count). */ 554 inout(string) inPlural(scope return inout(string) word, 555 in int count = 2, 556 scope return inout(string) pluralWord = null) 557 { 558 if (count == 1 || word.length == 0) 559 return word; // it isn't actually inPlural 560 if (pluralWord !is null) 561 return pluralWord; 562 switch (word[$ - 1]) 563 { 564 case 's': 565 case 'a', 'e', 'i', 'o', 'u': 566 return word ~ `es`; 567 case 'f': 568 return word[0 .. $-1] ~ `ves`; 569 case 'y': 570 return word[0 .. $-1] ~ `ies`; 571 default: 572 return word ~ `s`; 573 } 574 } 575 576 /** Return $(D s) lemmatized (normalized). 577 * 578 * See_Also: https://en.wikipedia.org/wiki/Lemmatisation 579 */ 580 inout(S) lemmatized(S)(scope return inout(S) s) nothrow if (isSomeString!S) 581 { 582 if (s.among!(`be`, `is`, `am`, `are`)) return `be`; 583 else if (s.among!(`do`, `does`)) return `do`; 584 else return s; 585 } 586 587 /** 588 TODO: Reuse knet translation query instead. 589 */ 590 string negationIn(in Language lang) nothrow @nogc 591 { 592 switch (lang) 593 { 594 case Language.en: return `not`; 595 case Language.sv: return `inte`; 596 case Language.de: return `nicht`; 597 default: return `not`; 598 } 599 } 600 601 enum Manner 602 { 603 /+ TODO: add unknown +/ 604 formal, 605 informal, 606 slang, 607 rude, 608 } 609 610 /** Grammatical Case. 611 * 612 * See_Also: https://en.wikipedia.org/wiki/Grammatical_case 613 */ 614 enum Case 615 { 616 unknown, 617 nominative, 618 genitive, 619 dative, 620 accusative, 621 ablative 622 } 623 624 /** English Subject Pronouns. 625 * 626 * See_Also: https://en.wikipedia.org/wiki/Subject_pronoun 627 */ 628 static immutable englishSubjectPronouns = [`I`, // 1st-person singular 629 `you`, // 2nd-person singular 630 `he`, `she`, `it`, // 3rd-person singular 631 `we`, // 1st-person plural 632 `they`, // 2nd-person plural 633 `what`, // interrogate singular (Object) 634 `who`]; // interrogate singular 635 636 /** Swedish Subject Pronouns. 637 * 638 * See_Also: https://en.wikipedia.org/wiki/Subject_pronoun 639 */ 640 static immutable swedishSubjectPronouns = [`jag`, // 1st-person singular 641 `du`, // 2nd-person singular 642 `han`, `hon`, `den`, `det`, // 3rd-person singular 643 `vi`, // 1st-person plural 644 `de`, // 2nd-person plural 645 `vad`, // interrogate singular (Object) 646 `vem`, // interrogate singular 647 `vilka`]; // interrogate plural 648 649 /** English Object Pronouns. 650 * 651 * See_Also: https://en.wikipedia.org/wiki/Object_pronoun 652 */ 653 static immutable englishObjectPronouns = [`me`, // 1st-person singular 654 `you`, // 2nd-person singular 655 `him,`, `her`, // 3rd-person singular 656 `us`, // 1st-person plural 657 `them`, // 2nd-person plural 658 `whom`]; // interrogate singular 659 660 /** Swedish Object Pronouns. 661 * 662 * See_Also: https://en.wikipedia.org/wiki/Object_pronoun 663 */ 664 static immutable swedishObjectPronouns = [`mig`, `dig`, 665 `honom,`, `henne`, 666 `oss`, 667 `dem`]; 668 669 enum Casing 670 { 671 unknown, 672 lower, 673 upper, 674 capitalized, 675 camel 676 }