1 /** Generic Language Constructs. 2 See_Also: https://en.wikipedia.org/wiki/Predicate_(grammar) 3 4 Note that ! and ? are more definite sentence enders than . 5 6 TODO: `isSomeString` => `isStringLike` 7 8 TODO: Use static foreach to add declarations for all isX, for each X 9 10 See_Also: http://forum.dlang.org/thread/mgdtuxkuswfxxoithwxh@forum.dlang.org 11 */ 12 module nxt.lingua; 13 14 import std.traits : isSomeChar, isSomeString; 15 import std.algorithm.comparison : among; 16 import std.algorithm.iteration : uniq; 17 import std.array : array; 18 import std.conv; 19 20 // TODO: add overload to std.algorithm.among that takes an immutable array as 21 // argument to prevent calls to aliasSeqOf 22 import std.meta : aliasSeqOf; 23 24 import nxt.languages: Lang; 25 26 @safe pure: 27 28 /** Computer Token Usage. */ 29 enum Usage 30 { 31 unknown, 32 definition, 33 reference, 34 call 35 } 36 37 /// ================ English Articles 38 39 /** English indefinite articles. */ 40 static immutable englishIndefiniteArticles = [`a`, `an`]; 41 42 /** English definite articles. */ 43 static immutable englishDefiniteArticles = [`the`]; 44 45 /** English definite articles. */ 46 static immutable englishArticles = englishIndefiniteArticles ~ englishDefiniteArticles; 47 48 bool isEnglishIndefiniteArticle(S)(in S s) => cast(bool)s.among!(aliasSeqOf!englishIndefiniteArticles); 49 bool isEnglishDefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!englishDefiniteArticles); 50 bool isEnglishArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!englishArticles); 51 52 /// ================ German Articles 53 54 /** German indefinite articles. */ 55 static immutable germanIndefiniteArticles = [`ein`, `eine`, `einer`, `einen`, `einem`, `eines`]; 56 57 /** German definite articles. */ 58 static immutable germanDefiniteArticles = [`der`, `die`, `das`, `den`, `dem`, `des`]; 59 60 /** German definite articles. */ 61 static immutable germanArticles = germanIndefiniteArticles ~ germanDefiniteArticles; 62 63 /** Check if $(D s) is a Vowel. */ 64 bool isGermanIndefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!germanIndefiniteArticles); 65 66 /** Check if $(D s) is a Vowel. */ 67 bool isGermanDefiniteArticle(S)(in S s) if (isSomeString!S) => cast(bool)s.among!(aliasSeqOf!germanDefiniteArticles); 68 69 /** Check if $(D s) is a Vowel. */ 70 bool isGermanArticle(S)(in S s) if (isSomeString!C) => cast(bool)s.among!(aliasSeqOf!germanArticles); 71 72 /// ================ Vowels 73 74 /** English vowel type. 75 * See_Also: https://simple.wikipedia.org/wiki/Vowel 76 */ 77 enum EnglishVowel { a, o, u, e, i, y, 78 A, O, U, E, I, Y } 79 80 /** English Vowels. */ 81 static immutable dchar[] englishVowels = ['a', 'o', 'u', 'e', 'i', 'y', 82 'A', 'O', 'U', 'E', 'I', 'Y']; 83 84 /** Check if `c` is a Vowel. */ 85 bool isEnglishVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishVowels); 86 87 /** English Accented Vowels. */ 88 static immutable dchar[] englishAccentedVowels = ['é']; 89 90 /** Check if `c` is an Accented Vowel. */ 91 bool isEnglishAccentedVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishAccentedVowels); 92 93 nothrow @nogc unittest 94 { 95 assert('é'.isEnglishAccentedVowel); 96 } 97 98 /** Swedish Hard Vowels. */ 99 static immutable swedishHardVowels = ['a', 'o', 'u', 'å', 100 'A', 'O', 'U', 'Å']; 101 102 /** Swedish Soft Vowels. */ 103 static immutable swedishSoftVowels = ['e', 'i', 'y', 'ä', 'ö', 104 'E', 'I', 'Y', 'Ä', 'Ö']; 105 106 /** Swedish Vowels. */ 107 static immutable swedishVowels = swedishHardVowels ~ swedishSoftVowels; 108 109 /** Check if `c` is a Swedish Vowel. */ 110 bool isSwedishVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishVowels); 111 112 /** Check if `c` is a Swedish hard vowel. */ 113 bool isSwedishHardVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishHardVowels); 114 115 /** Check if `c` is a Swedish soft vowel. */ 116 bool isSwedishSoftVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!swedishSoftVowels); 117 118 /** Spanish Accented Vowels. */ 119 enum spanishAccentedVowels = ['á', 'é', 'í', 'ó', 'ú', 120 'Á', 'É', 'Í', 'Ó', 'Ú']; 121 122 /** Check if `c` is a Spanish Accented Vowel. */ 123 bool isSpanishAccentedVowel(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!spanishAccentedVowels); 124 125 /** Check if `c` is a Spanish Vowel. */ 126 bool isSpanishVowel(const dchar c) nothrow @nogc => (c.isEnglishVowel || 127 c.isSpanishAccentedVowel); 128 129 nothrow @nogc unittest 130 { 131 assert('é'.isSpanishVowel); 132 } 133 134 /** Check if `c` is a Vowel in language $(D lang). */ 135 bool isVowel(const dchar c, Lang lang) nothrow @nogc 136 { 137 switch (lang) 138 { 139 case Lang.en: return c.isEnglishVowel; 140 case Lang.sv: return c.isSwedishVowel; 141 default: return c.isEnglishVowel; 142 } 143 } 144 145 nothrow @nogc unittest 146 { 147 assert(!'k'.isSwedishVowel); 148 assert('å'.isSwedishVowel); 149 } 150 151 /** English consonant type. 152 * See_Also: https://simple.wikipedia.org/wiki/Consonant 153 */ 154 enum EnglishConsonant { b, c, d, f, g, h, j, k, l, m, n, p, q, r, s, t, v, w, x } 155 156 /** English lowercase consontant characters. */ 157 static immutable dchar[] englishLowerConsonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x']; 158 159 /** English uppercase consontant characters. */ 160 static immutable dchar[] englishUpperConsonants = ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X']; 161 162 /** English consontant characters. */ 163 static immutable dchar[] englishConsonants = englishLowerConsonants ~ englishUpperConsonants; 164 165 /** Check if `c` is a Consonant. */ 166 bool isEnglishConsonant(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishConsonants); 167 alias isSwedishConsonant = isEnglishConsonant; 168 169 nothrow @nogc unittest 170 { 171 assert('k'.isEnglishConsonant); 172 assert(!'å'.isEnglishConsonant); 173 } 174 175 /** English letters. */ 176 static immutable dchar[] englishLetters = englishVowels ~ englishConsonants; 177 178 /** Check if `c` is a letter. */ 179 bool isEnglishLetter(const dchar c) nothrow @nogc => cast(bool)c.among!(aliasSeqOf!englishLetters); 180 alias isEnglish = isEnglishLetter; 181 182 nothrow @nogc unittest 183 { 184 assert('k'.isEnglishLetter); 185 assert(!'å'.isEnglishLetter); 186 } 187 188 static immutable englishDoubleConsonants = [`bb`, `dd`, `ff`, `gg`, `mm`, `nn`, `pp`, `rr`, `tt`, `ck`, `ft`]; 189 190 /** Check if `s` is an English Double consonant. */ 191 bool isEnglishDoubleConsonant(scope const(char)[] s) nothrow @nogc => cast(bool)s.among!(`bb`, `dd`, `ff`, `gg`, `mm`, `nn`, `pp`, `rr`, `tt`, `ck`, `ft`); 192 193 /** Computer token. */ 194 enum TokenId 195 { 196 unknown, 197 198 keyword, 199 type, 200 constant, 201 comment, 202 variableName, 203 functionName, 204 builtinName, 205 templateName, 206 macroName, 207 aliasName, 208 enumeration, 209 enumerator, 210 constructor, 211 destructors, 212 operator, 213 } 214 215 /** Swedish Verb Inflection (conjugation of a verb). 216 * 217 * See_Also: http://www.101languages.net/swedish/swedish-verb-conjugator/ 218 * See_Also: http://www.verbix.com/webverbix/Swedish/springa.html 219 */ 220 enum SwedishVerbInflection 221 { 222 unknown, 223 } 224 225 /** Verb Form. 226 * 227 * See_Also: http://verb.woxikon.se/sv/springa 228 */ 229 enum VerbForm 230 { 231 unknown, 232 233 imperative, // Swedish example: spring 234 235 infinitive, // sv:infinitiv,grundform. Swedish example: springa 236 base = infinitive, 237 238 presentIndicative, // sv:presens. Swedish example: springer 239 240 presentParticiple, // sv:presens particip. Swedish example: springande 241 gerund = presentParticiple, // Form that functions as a noun. Source: https://en.wikipedia.org/wiki/Gerund 242 243 pastIndicative, // sv:imperfekt. Swedish example: sprang 244 preteritum = pastIndicative, 245 246 supinum, // Swedish example: sprungit 247 pastParticiple = supinum, 248 249 perfekt, // sv:perfekt. Swedish example: har sprungit 250 251 perfektParticiple, // sv:perfekt particip. Swedish example: sprungen 252 253 pluskvamperfekt, // sv:pluskvamperfekt. Swedish example: hade sprungit 254 255 futurum, // Swedish example:ska springa 256 257 futurumExaktum, // Swedish example:ska ha sprungit 258 futurumPerfect = futurumExaktum, 259 260 konditionalisI, // Swedish example:skulle springa 261 262 conditionalPerfect, // Swedish example:skulle ha sprungit 263 konditionalisII = conditionalPerfect, 264 } 265 266 /** Verb Instance. */ 267 struct Verb(S) 268 if (isSomeString!S) 269 { 270 S expr; 271 VerbForm form; 272 alias expr this; 273 } 274 275 /** Subject Count. */ 276 enum Count 277 { 278 unknown, 279 singular, 280 plural, 281 uncountable 282 } 283 284 struct Noun(S) 285 if (isSomeString!S) 286 { 287 S expr; 288 Count count; 289 alias expr this; 290 } 291 292 /** Comparation. 293 * See_Also: https://en.wikipedia.org/wiki/Comparison_(grammar) 294 */ 295 enum Comparation 296 { 297 unknown, 298 positive, 299 comparative, 300 superlative, 301 elative, 302 exzessive 303 } 304 305 struct Adjective(S) 306 if (isSomeString!S) 307 { 308 S expr; 309 Comparation comparation; 310 alias expr this; 311 } 312 313 /** English Tense. 314 * 315 * Same as "tempus" in Swedish. 316 * 317 * See_Also: http://www.ego4u.com/en/cram-up/grammar/tenses-graphic 318 * See_Also: http://www.ego4u.com/en/cram-up/grammar/tenses-examples 319 */ 320 enum Tense 321 { 322 unknown, 323 324 present, presens = present, // sv:nutid 325 past, preteritum = past, imperfekt = past, // sv:dåtid, https://en.wikipedia.org/wiki/Past_tense 326 future, futurum = future, // framtid, https://en.wikipedia.org/wiki/Future_tense 327 328 pastMoment, 329 presentMoment, // sv:plays 330 futureMoment, // [will|is going to|intends to] play 331 332 pastPeriod, 333 presentPeriod, 334 futurePeriod, 335 336 pastResult, 337 presentResult, 338 futureResult, 339 340 pastDuration, 341 presentDuration, 342 futureDuration, 343 } 344 alias Tempus = Tense; 345 346 nothrow @nogc 347 { 348 bool isPast(Tense tense) => cast(bool)tense.among!(Tense.past, Tense.pastMoment, Tense.pastPeriod, Tense.pastResult, Tense.pastDuration); 349 bool isPresent(Tense tense) => cast(bool)tense.among!(Tense.present, Tense.presentMoment, Tense.presentPeriod, Tense.presentResult, Tense.presentDuration); 350 bool isFuture(Tense tense) => cast(bool)tense.among!(Tense.future, Tense.futureMoment, Tense.futurePeriod, Tense.futureResult, Tense.futureDuration); 351 } 352 353 /** Part of a Sentence. */ 354 enum SentencePart 355 { 356 unknown, 357 subject, 358 predicate, 359 adverbial, 360 object, 361 } 362 363 enum Adverbial 364 { 365 unknown, 366 367 manner, // they were playing `happily` (sätts-adverbial in Swedish) 368 369 place, // we met in `London`, `at the beach` 370 space = place, 371 372 time, // they start work `at six thirty` 373 374 probability, // `perhaps` the weather will be fine 375 376 direction, // superman flew `in`, the car drove `out` (förändring av tillstånd in Swedish) 377 location, // are you `in`?, the ball is `out` (oföränderligt tillstånd in Swedish) 378 379 quantifier, // he weighs `63 kilograms` (måtts-adverbial in Swedish) 380 381 comparation, // (grads-adverbial in Swedish) 382 383 cause, // (orsaks-adverbial in Swedish) 384 385 circumstance, // (omständighets-adverbial in Swedish) 386 } 387 388 class Part 389 { 390 } 391 392 // class Predicate : Part 393 // { 394 // } 395 396 /** Article (of noun). 397 * 398 * See_Also: https://en.wikipedia.org/wiki/Article_(grammar) 399 */ 400 enum Article 401 { 402 unknown, ///< Unknown. 403 definite, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Definite_article 404 indefinite, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Indefinite_article 405 proper, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Proper_article 406 partitive, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Partitive_article. 407 negative, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Negative_article 408 zero, ///< See_Also: https://en.wikipedia.org/wiki/Article_(grammar)#Zero_article 409 } 410 411 class Subject : Part 412 { 413 Article article; 414 } 415 416 static immutable implies = [`in order to`]; 417 418 /** Subject Person. */ 419 enum Person 420 { 421 unknown, 422 first, 423 second, 424 third 425 } 426 427 /** Grammatical Gender. 428 * 429 * Called genus in Swedish. 430 * 431 * See_Also: https://en.wikipedia.org/wiki/Grammatical_gender 432 * See_Also: https://sv.wikipedia.org/wiki/Genus_(k%C3%B6nsbegrepp) 433 */ 434 enum Gender 435 { 436 unknown, 437 438 male, masculine = male, // maskulinum 439 440 female, feminine = female, // femininum 441 442 neutral, neuter = neutral, neutrum = neuter, // non-alive. for example: "något" 443 444 common, utrum = common, reale = utrum, // Present in Swedish. real/alive. for example: "någon" 445 } 446 447 /** (Grammatical) Mood. 448 * 449 * Sometimes also called mode. 450 * 451 * Named modus in Swedish. 452 * 453 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood 454 * See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#mood 455 */ 456 enum Mood 457 { 458 unknown, 459 460 indicative, // indikativ in Swedish. Example: I eat pizza. 461 462 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#subjunctive 463 subjunctive, // Example: if I were to eat more pizza, I would be sick. 464 conjunctive = subjunctive, // konjunktiv in Swedish 465 466 conditional, 467 optative, 468 469 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#imperative 470 imperative, // imperativ in Swedish. Example: eat the pizza! 471 472 jussive, 473 potential, 474 inferential, 475 interrogative, 476 477 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#wh-question 478 whQuestion, // Example: who is eating pizza? 479 480 /// See_Also: https://www.cse.unsw.edu.au/~billw/nlpdict.html#yn-question 481 ynQuestion, // Example: did you eat pizza? 482 } 483 484 /** Check if $(D mood) is a Realis Mood. 485 * 486 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood#Realis_moods 487 */ 488 bool isRealis(Mood mood) @nogc nothrow => cast(bool)mood.among!(Mood.indicative); 489 enum realisMoods = [Mood.indicative]; 490 491 /** Check if $(D mood) is a Irrealis Mood. 492 * 493 * See_Also: https://en.wikipedia.org/wiki/Grammatical_mood#Irrealis_moods 494 */ 495 bool isIrrealis(Mood mood) @nogc nothrow => cast(bool)mood.among!(Mood.subjunctive, Mood.conditional, Mood.optative, Mood.imperative, Mood.jussive, Mood.potential, Mood.inferential); 496 497 enum irrealisMoods = [Mood.subjunctive, 498 Mood.conditional, 499 Mood.optative, 500 Mood.imperative, 501 Mood.jussive, 502 Mood.potential, 503 Mood.inferential]; 504 505 /** English Negation Prefixes. 506 * 507 * See_Also: http://www.english-for-students.com/Negative-Prefixes.html 508 */ 509 static immutable englishNegationPrefixes = [ `un`, `non`, `dis`, `im`, `in`, `il`, `ir`, ]; 510 511 static immutable swedishNegationPrefixes = [ `icke`, `o`, ]; 512 513 /** English Noun Suffixes. 514 * 515 * See_Also: http://www.english-for-students.com/Noun-Suffixes.html 516 */ 517 static immutable adjectiveNounSuffixes = [ `ness`, `ity`, `ment`, `ance` ]; 518 static immutable verbNounSuffixes = [ `tion`, `sion`, `ment`, `ence` ]; 519 static immutable nounNounSuffixes = [ `ship`, `hood` ]; 520 static immutable allNounSuffixes = (adjectiveNounSuffixes ~ 521 verbNounSuffixes ~ 522 nounNounSuffixes ~ 523 [ `s`, `ses`, `xes`, `zes`, `ches`, `shes`, `men`, `ies`, ]); 524 525 /** English Verb Suffixes. */ 526 static immutable verbSuffixes = [ `s`, `ies`, `es`, `es`, `ed`, `ed`, `ing`, `ing`, ]; 527 528 /** English Adjective Suffixes. */ 529 static immutable adjectiveSuffixes = [ `er`, `est`, `er`, `est` ]; 530 531 /** English Job/Professin Title Suffixes. 532 * 533 * Typically built from noun or verb bases. 534 * 535 * See_Also: http://www.english-for-students.com/Job-Title-Suffixes.html 536 */ 537 static immutable jobTitleSuffixes = [ `or`, // traitor 538 `er`, // builder 539 `ist`, // typist 540 `an`, // technician 541 `man`, // dustman, barman 542 `woman`, // policewoman 543 `ian`, // optician 544 `person`, // chairperson 545 `sperson`, // spokesperson 546 `ess`, // waitress 547 `ive` // representative 548 ]; 549 550 /** English Linking Verbs in Nominative Form. 551 */ 552 static immutable englishLinkingVerbs = [`is`, `seem`, `look`, `appear to be`, `could be`]; 553 static immutable swedishLinkingVerbs = [`är`, `verkar`, `ser`, `kan vara`]; 554 555 /** English Word Suffixes. */ 556 static immutable wordSuffixes = [ allNounSuffixes ~ verbSuffixes ~ adjectiveSuffixes ].uniq.array; 557 558 /** Return string $(D word) in plural optionally in $(D count). */ 559 inout(string) inPlural(scope return inout(string) word, 560 in int count = 2, 561 scope return inout(string) pluralWord = null) 562 { 563 if (count == 1 || word.length == 0) 564 return word; // it isn't actually inPlural 565 if (pluralWord !is null) 566 return pluralWord; 567 switch (word[$ - 1]) 568 { 569 case 's': 570 case 'a', 'e', 'i', 'o', 'u': 571 return word ~ `es`; 572 case 'f': 573 return word[0 .. $-1] ~ `ves`; 574 case 'y': 575 return word[0 .. $-1] ~ `ies`; 576 default: 577 return word ~ `s`; 578 } 579 } 580 581 /** Return $(D s) lemmatized (normalized). 582 * 583 * See_Also: https://en.wikipedia.org/wiki/Lemmatisation 584 */ 585 inout(S) lemmatized(S)(scope return inout(S) s) nothrow if (isSomeString!S) 586 { 587 if (s.among!(`be`, `is`, `am`, `are`)) return `be`; 588 else if (s.among!(`do`, `does`)) return `do`; 589 else return s; 590 } 591 592 /** 593 TODO: Reuse knet translation query instead. 594 */ 595 string negationIn(in Lang lang) nothrow @nogc 596 { 597 switch (lang) 598 { 599 case Lang.en: return `not`; 600 case Lang.sv: return `inte`; 601 case Lang.de: return `nicht`; 602 default: return `not`; 603 } 604 } 605 606 enum Manner 607 { 608 // TODO: add unknown 609 formal, 610 informal, 611 slang, 612 rude, 613 } 614 615 /** Grammatical Case. 616 * 617 * See_Also: https://en.wikipedia.org/wiki/Grammatical_case 618 */ 619 enum Case 620 { 621 unknown, 622 nominative, 623 genitive, 624 dative, 625 accusative, 626 ablative 627 } 628 629 /** English Subject Pronouns. 630 * 631 * See_Also: https://en.wikipedia.org/wiki/Subject_pronoun 632 */ 633 static immutable englishSubjectPronouns = [`I`, // 1st-person singular 634 `you`, // 2nd-person singular 635 `he`, `she`, `it`, // 3rd-person singular 636 `we`, // 1st-person plural 637 `they`, // 2nd-person plural 638 `what`, // interrogate singular (Object) 639 `who`]; // interrogate singular 640 641 /** Swedish Subject Pronouns. 642 * 643 * See_Also: https://en.wikipedia.org/wiki/Subject_pronoun 644 */ 645 static immutable swedishSubjectPronouns = [`jag`, // 1st-person singular 646 `du`, // 2nd-person singular 647 `han`, `hon`, `den`, `det`, // 3rd-person singular 648 `vi`, // 1st-person plural 649 `de`, // 2nd-person plural 650 `vad`, // interrogate singular (Object) 651 `vem`, // interrogate singular 652 `vilka`]; // interrogate plural 653 654 /** English Object Pronouns. 655 * 656 * See_Also: https://en.wikipedia.org/wiki/Object_pronoun 657 */ 658 static immutable englishObjectPronouns = [`me`, // 1st-person singular 659 `you`, // 2nd-person singular 660 `him,`, `her`, // 3rd-person singular 661 `us`, // 1st-person plural 662 `them`, // 2nd-person plural 663 `whom`]; // interrogate singular 664 665 /** Swedish Object Pronouns. 666 * 667 * See_Also: https://en.wikipedia.org/wiki/Object_pronoun 668 */ 669 static immutable swedishObjectPronouns = [`mig`, `dig`, 670 `honom,`, `henne`, 671 `oss`, 672 `dem`]; 673 674 enum Casing 675 { 676 unknown, 677 lower, 678 upper, 679 capitalized, 680 camel 681 }