1package Lingua::EN::Inflect; 2 3use strict; 4use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA); 5use Env; 6 7require Exporter; 8@ISA = qw(Exporter); 9 10$VERSION = '1.89'; 11 12%EXPORT_TAGS = 13( 14 ALL => [ qw( classical inflect 15 PL PL_N PL_V PL_ADJ NO NUM A AN 16 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq 17 PART_PRES 18 ORD 19 NUMWORDS 20 def_noun def_verb def_adj def_a def_an )], 21 22 INFLECTIONS => [ qw( classical inflect 23 PL PL_N PL_V PL_ADJ PL_eq 24 NO NUM A AN PART_PRES )], 25 26 PLURALS => [ qw( classical inflect 27 PL PL_N PL_V PL_ADJ NO NUM 28 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )], 29 30 COMPARISONS => [ qw( classical 31 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )], 32 33 ARTICLES => [ qw( classical inflect NUM A AN )], 34 35 NUMERICAL => [ qw( ORD NUMWORDS )], 36 37 USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )], 38); 39 40Exporter::export_ok_tags(qw( ALL )); 41 42# SUPPORT CLASSICAL PLURALIZATIONS 43 44my %def_classical = ( 45 all => 0, 46 zero => 0, 47 herd => 0, 48 names => 1, 49 persons => 0, 50 ancient => 0, 51); 52 53my %all_classical = ( 54 all => 1, 55 zero => 1, 56 herd => 1, 57 names => 1, 58 persons => 1, 59 ancient => 1, 60); 61 62my %classical = %def_classical; 63 64my $classical_mode = join '|', keys %all_classical; 65 $classical_mode = qr/^(?:$classical_mode)$/; 66 67sub classical 68{ 69 if (!@_) { 70 %classical = %all_classical; 71 return; 72 } 73 if (@_==1 && $_[0] !~ $classical_mode) { 74 %classical = $_[0] ? %all_classical : (); 75 return; 76 } 77 while (@_) { 78 my $arg = shift; 79 if ($arg !~ $classical_mode) { 80 die "Unknown classical mode ($arg)\n"; 81 } 82 if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; } 83 else { $classical{$arg} = 1; } 84 85 if ($arg eq 'all') { 86 %classical = $classical{all} ? %all_classical : (); 87 } 88 } 89} 90 91my $persistent_count; 92 93sub NUM # (;$count,$show) 94{ 95 if (defined $_[0]) 96 { 97 $persistent_count = $_[0]; 98 return $_[0] if !defined($_[1]) || $_[1]; 99 } 100 else 101 { 102 $persistent_count = undef; 103 } 104 return ''; 105} 106 107 108# 0. PERFORM GENERAL INFLECTIONS IN A STRING 109 110sub enclose { "(?:$_[0])" } 111 112sub inflect 113{ 114 my $save_persistent_count = $persistent_count; 115 my @sections = split /(NUM\([^)]*\))/, $_[0]; 116 my $inflection = ""; 117 118 foreach ( @sections ) 119 { 120 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe) 121 { 122 1 while 123 s/\bPL \( ([^),]*) (, ([^)]*) )? \) / PL($1,$3) /xeg 124 || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \) / PL_N($1,$3) /xeg 125 || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \) / PL_V($1,$3) /xeg 126 || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \) / PL_ADJ($1,$3) /xeg 127 || s/\bAN? \( ([^),]*) (, ([^)]*) )? \) / A($1,$3) /xeg 128 || s/\bNO \( ([^),]*) (, ([^)]*) )? \) / NO($1,$3) /xeg 129 || s/\bORD \( ([^)]*) \) / ORD($1) /xeg 130 || s/\bNUMWORDS \( ([^)]*) \) / NUMWORDS($1) /xeg 131 || s/\bPART_PRES \( ([^)]*) \) / PART_PRES($1) /xeg 132 } 133 134 $inflection .= $_; 135 } 136 137 $persistent_count = $save_persistent_count; 138 return $inflection; 139} 140 141 142# 1. PLURALS 143 144my %PL_sb_irregular_s = 145( 146 "corpus" => "corpuses|corpora", 147 "opus" => "opuses|opera", 148 "genus" => "genera", 149 "mythos" => "mythoi", 150 "penis" => "penises|penes", 151 "testis" => "testes", 152 "atlas" => "atlases|atlantes", 153); 154 155my %PL_sb_irregular = 156( 157 "child" => "children", 158 "brother" => "brothers|brethren", 159 "loaf" => "loaves", 160 "hoof" => "hoofs|hooves", 161 "beef" => "beefs|beeves", 162 "money" => "monies", 163 "mongoose" => "mongooses", 164 "ox" => "oxen", 165 "cow" => "cows|kine", 166 "soliloquy" => "soliloquies", 167 "graffito" => "graffiti", 168 "prima donna" => "prima donnas|prime donne", 169 "octopus" => "octopuses|octopodes", 170 "genie" => "genies|genii", 171 "ganglion" => "ganglions|ganglia", 172 "trilby" => "trilbys", 173 "turf" => "turfs|turves", 174 "numen" => "numina", 175 "atman" => "atmas", 176 "occiput" => "occiputs|occipita", 177 178 %PL_sb_irregular_s, 179); 180 181my $PL_sb_irregular = enclose join '|', keys %PL_sb_irregular; 182 183# CLASSICAL "..is" -> "..ides" 184 185my @PL_sb_C_is_ides = 186( 187# GENERAL WORDS... 188 189 "ephemeris", "iris", "clitoris", 190 "chrysalis", "epididymis", 191 192# INFLAMATIONS... 193 194 ".*itis", 195 196); 197 198my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides; 199 200# CLASSICAL "..a" -> "..ata" 201 202my @PL_sb_C_a_ata = 203( 204 "anathema", "bema", "carcinoma", "charisma", "diploma", 205 "dogma", "drama", "edema", "enema", "enigma", "lemma", 206 "lymphoma", "magma", "melisma", "miasma", "oedema", 207 "sarcoma", "schema", "soma", "stigma", "stoma", "trauma", 208 "gumma", "pragma", 209); 210 211my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata; 212 213# UNCONDITIONAL "..a" -> "..ae" 214 215my $PL_sb_U_a_ae = enclose join "|", 216( 217 "alumna", "alga", "vertebra", "persona" 218); 219 220# CLASSICAL "..a" -> "..ae" 221 222my $PL_sb_C_a_ae = enclose join "|", 223( 224 "amoeba", "antenna", "formula", "hyperbola", 225 "medusa", "nebula", "parabola", "abscissa", 226 "hydra", "nova", "lacuna", "aurora", ".*umbra", 227 "flora", "fauna", 228); 229 230# CLASSICAL "..en" -> "..ina" 231 232my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) } 233( 234 "stamen", "foramen", "lumen" 235); 236 237# UNCONDITIONAL "..um" -> "..a" 238 239my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) } 240( 241 "bacterium", "agendum", "desideratum", "erratum", 242 "stratum", "datum", "ovum", "extremum", 243 "candelabrum", 244); 245 246# CLASSICAL "..um" -> "..a" 247 248my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) } 249( 250 "maximum", "minimum", "momentum", "optimum", 251 "quantum", "cranium", "curriculum", "dictum", 252 "phylum", "aquarium", "compendium", "emporium", 253 "enconium", "gymnasium", "honorarium", "interregnum", 254 "lustrum", "memorandum", "millennium", "rostrum", 255 "spectrum", "speculum", "stadium", "trapezium", 256 "ultimatum", "medium", "vacuum", "velum", 257 "consortium", 258); 259 260# UNCONDITIONAL "..us" -> "i" 261 262my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) } 263( 264 "alumnus", "alveolus", "bacillus", "bronchus", 265 "locus", "nucleus", "stimulus", "meniscus", 266); 267 268# CLASSICAL "..us" -> "..i" 269 270my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) } 271( 272 "focus", "radius", "genius", 273 "incubus", "succubus", "nimbus", 274 "fungus", "nucleolus", "stylus", 275 "torus", "umbilicus", "uterus", 276 "hippopotamus", 277); 278 279# CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS) 280 281my $PL_sb_C_us_us = enclose join "|", 282( 283 "status", "apparatus", "prospectus", "sinus", 284 "hiatus", "impetus", "plexus", 285); 286 287# UNCONDITIONAL "..on" -> "a" 288 289my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) } 290( 291 "criterion", "perihelion", "aphelion", 292 "phenomenon", "prolegomenon", "noumenon", 293 "organon", "asyndeton", "hyperbaton", 294); 295 296# CLASSICAL "..on" -> "..a" 297 298my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) } 299( 300 "oxymoron", 301); 302 303# CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os") 304 305my @PL_sb_C_o_i = 306( 307 "solo", "soprano", "basso", "alto", 308 "contralto", "tempo", "piano", "virtuoso", 309); 310my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i; 311 312# ALWAYS "..o" -> "..os" 313 314my $PL_sb_U_o_os = enclose join "|", 315( 316 "albino", "archipelago", "armadillo", 317 "commando", "crescendo", "fiasco", 318 "ditto", "dynamo", "embryo", 319 "ghetto", "guano", "inferno", 320 "jumbo", "lumbago", "magneto", 321 "manifesto", "medico", "octavo", 322 "photo", "pro", "quarto", 323 "canto", "lingo", "generalissimo", 324 "stylo", "rhino", "casino", 325 "auto", "macro", 'zero', 326 327 @PL_sb_C_o_i, 328); 329 330 331# UNCONDITIONAL "..[ei]x" -> "..ices" 332 333my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) } 334( 335 "codex", "murex", "silex", 336); 337 338my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) } 339( 340 "radix", "helix", 341); 342 343# CLASSICAL "..[ei]x" -> "..ices" 344 345my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) } 346( 347 "vortex", "vertex", "cortex", "latex", 348 "pontifex", "apex", "index", "simplex", 349); 350 351my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) } 352( 353 "appendix", 354); 355 356# ARABIC: ".." -> "..i" 357 358my $PL_sb_C_i = enclose join "|", 359( 360 "afrit", "afreet", "efreet", 361); 362 363# HEBREW: ".." -> "..im" 364 365my $PL_sb_C_im = enclose join "|", 366( 367 "goy", "seraph", "cherub", 368); 369 370# UNCONDITIONAL "..man" -> "..mans" 371 372my $PL_sb_U_man_mans = enclose join "|", 373qw( 374 human 375 Alabaman Bahaman Burman German 376 Hiroshiman Liman Nakayaman Oklahoman 377 Panaman Selman Sonaman Tacoman Yakiman 378 Yokohaman Yuman 379); 380 381my @PL_sb_uninflected_s = 382( 383# PAIRS OR GROUPS SUBSUMED TO A SINGULAR... 384 "breeches", "britches", "clippers", "gallows", "hijinks", 385 "headquarters", "pliers", "scissors", "testes", "herpes", 386 "pincers", "shears", "proceedings", "trousers", 387 388# UNASSIMILATED LATIN 4th DECLENSION 389 390 "cantus", "coitus", "nexus", 391 392# RECENT IMPORTS... 393 "contretemps", "corps", "debris", 394 ".*ois", "siemens", 395 396# DISEASES 397 ".*measles", "mumps", 398 399# MISCELLANEOUS OTHERS... 400 "diabetes", "jackanapes", "series", "species", "rabies", 401 "chassis", "innings", "news", "mews", 402); 403 404my $PL_sb_uninflected_herd = enclose join "|", 405# DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION 406( 407 "wildebeest", "swine", "eland", "bison", "buffalo", 408 "elk", "moose", "rhinoceros", 409); 410 411my $PL_sb_uninflected = enclose join "|", 412( 413# SOME FISH AND HERD ANIMALS 414 ".*fish", "tuna", "salmon", "mackerel", "trout", 415 "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting", 416 417 ".*deer", ".*sheep", 418 419# ALL NATIONALS ENDING IN -ese 420 "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese", 421 "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese", 422 "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese", 423 "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese", 424 "Shavese", "Vermontese", "Wenchowese", "Yengeese", 425 ".*[nrlm]ese", 426 427# SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE) 428 429 @PL_sb_uninflected_s, 430 431# DISEASES 432 ".*pox", 433 434 435# OTHER ODDITIES 436 "graffiti", "djinn" 437); 438 439# SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es) 440 441my $PL_sb_singular_s = enclose join '|', 442( 443 ".*ss", 444 "acropolis", "aegis", "alias", "asbestos", "bathos", "bias", 445 "bronchitis", "bursitis", "caddis", "cannabis", 446 "canvas", "chaos", "cosmos", "dais", "digitalis", 447 "epidermis", "ethos", "eyas", "gas", "glottis", 448 "hubris", "ibis", "lens", "mantis", "marquis", "metropolis", 449 "pathos", "pelvis", "polis", "rhinoceros", 450 "sassafras", "trellis", ".*us", "[A-Z].*es", 451 452 @PL_sb_C_is_ides, 453); 454 455my $PL_v_special_s = enclose join '|', 456( 457 $PL_sb_singular_s, 458 @PL_sb_uninflected_s, 459 keys %PL_sb_irregular_s, 460 '(.*[csx])is', 461 '(.*)ceps', 462 '[A-Z].*s', 463); 464 465my %PL_sb_postfix_adj = ( 466 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'], 467 'martial' => [qw(court)], 468); 469 470foreach (keys %PL_sb_postfix_adj) { 471 $PL_sb_postfix_adj{$_} = enclose 472 enclose(join('|', @{$PL_sb_postfix_adj{$_}})) 473 . "(?=(?:-|\\s+)$_)"; 474} 475 476my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)'; 477 478my $PL_sb_military = 'major|lieutenant|brigadier|adjutant|quartermaster'; 479my $PL_sb_general = '((?!'.$PL_sb_military.').*?)((-|\s+)general)'; 480 481my $PL_prep = enclose join '|', qw ( 482 about above across after among around at athwart before behind 483 below beneath beside besides between betwixt beyond but by 484 during except for from in into near of off on onto out over 485 since till to under until unto upon with 486); 487 488my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'; 489 490my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eu])((-|\s+)(.*))?)'; 491 492 493my %PL_pron_nom = 494( 495# NOMINATIVE REFLEXIVE 496 497"i" => "we", "myself" => "ourselves", 498"you" => "you", "yourself" => "yourselves", 499"she" => "they", "herself" => "themselves", 500"he" => "they", "himself" => "themselves", 501"it" => "they", "itself" => "themselves", 502"they" => "they", "themself" => "themselves", 503 504# POSSESSIVE 505 506"mine" => "ours", 507"yours" => "yours", 508"hers" => "theirs", 509"his" => "theirs", 510"its" => "theirs", 511"theirs" => "theirs", 512); 513 514my %PL_pron_acc = 515( 516# ACCUSATIVE REFLEXIVE 517 518"me" => "us", "myself" => "ourselves", 519"you" => "you", "yourself" => "yourselves", 520"her" => "them", "herself" => "themselves", 521"him" => "them", "himself" => "themselves", 522"it" => "them", "itself" => "themselves", 523"them" => "them", "themself" => "themselves", 524); 525 526my $PL_pron_acc = enclose join '|', keys %PL_pron_acc; 527 528my %PL_v_irregular_pres = 529( 530# 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR 531# 3RD PERS. (INDET.) 532 533"am" => "are", "are" => "are", "is" => "are", 534"was" => "were", "were" => "were", "was" => "were", 535"have" => "have", "have" => "have", "has" => "have", 536"do" => "do", "do" => "do", "does" => "do", 537); 538 539my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres; 540 541my %PL_v_ambiguous_pres = 542( 543# 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR 544# 3RD PERS. (INDET.) 545 546"act" => "act", "act" => "act", "acts" => "act", 547"blame" => "blame", "blame" => "blame", "blames" => "blame", 548"can" => "can", "can" => "can", "can" => "can", 549"must" => "must", "must" => "must", "must" => "must", 550"fly" => "fly", "fly" => "fly", "flies" => "fly", 551"copy" => "copy", "copy" => "copy", "copies" => "copy", 552"drink" => "drink", "drink" => "drink", "drinks" => "drink", 553"fight" => "fight", "fight" => "fight", "fights" => "fight", 554"fire" => "fire", "fire" => "fire", "fires" => "fire", 555"like" => "like", "like" => "like", "likes" => "like", 556"look" => "look", "look" => "look", "looks" => "look", 557"make" => "make", "make" => "make", "makes" => "make", 558"reach" => "reach", "reach" => "reach", "reaches" => "reach", 559"run" => "run", "run" => "run", "runs" => "run", 560"sink" => "sink", "sink" => "sink", "sinks" => "sink", 561"sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep", 562"view" => "view", "view" => "view", "views" => "view", 563); 564 565my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres; 566 567 568my $PL_v_irregular_non_pres = enclose join '|', 569( 570"did", "had", "ate", "made", "put", 571"spent", "fought", "sank", "gave", "sought", 572"shall", "could", "ought", "should", 573); 574 575my $PL_v_ambiguous_non_pres = enclose join '|', 576( 577"thought", "saw", "bent", "will", "might", "cut", 578); 579 580# "..oes" -> "..oe" (the rest are "..oes" -> "o") 581 582my $PL_v_oes_oe = enclose join "|", 583qw( 584 .*shoes .*hoes .*toes 585 canoes floes oboes roes throes woes 586); 587 588my $PL_count_zero = enclose join '|', 589( 5900, "no", "zero", "nil" 591); 592 593my $PL_count_one = enclose join '|', 594( 5951, "a", "an", "one", "each", "every", "this", "that", 596); 597 598my %PL_adj_special = 599( 600"a" => "some", "an" => "some", 601"this" => "these", "that" => "those", 602); 603my $PL_adj_special = enclose join '|', keys %PL_adj_special; 604 605my %PL_adj_poss = 606( 607"my" => "our", 608"your" => "your", 609"its" => "their", 610"her" => "their", 611"his" => "their", 612"their" => "their", 613); 614my $PL_adj_poss = enclose join '|', keys %PL_adj_poss; 615 616 617sub checkpat 618{ 619local $SIG{__WARN__} = sub {0}; 620do {$@ =~ s/at.*?$//; 621 die "\nBad user-defined singular pattern:\n\t$@\n"} 622 if (!eval "'' =~ m/$_[0]/; 1;" or $@); 623return @_; 624} 625 626sub checkpatsubs 627{ 628checkpat($_[0]); 629if (defined $_[1]) 630{ 631 local $SIG{__WARN__} = sub {0}; 632 do {$@ =~ s/at.*?$//; 633 die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"} 634 if (!eval "qq{$_[1]}; 1;" or $@); 635} 636return @_; 637} 638 639my @PL_sb_user_defined = (); 640my @PL_v_user_defined = (); 641my @PL_adj_user_defined = (); 642my @A_a_user_defined = (); 643 644sub def_noun 645{ 646 unshift @PL_sb_user_defined, checkpatsubs(@_); 647 return 1; 648} 649 650sub def_verb 651{ 652 unshift @PL_v_user_defined, checkpatsubs(@_[4,5]); 653 unshift @PL_v_user_defined, checkpatsubs(@_[2,3]); 654 unshift @PL_v_user_defined, checkpatsubs(@_[0,1]); 655 return 1; 656} 657 658sub def_adj 659{ 660 unshift @PL_adj_user_defined, checkpatsubs(@_); 661 return 1; 662} 663 664sub def_a 665{ 666unshift @A_a_user_defined, checkpat(@_,'a'); 667return 1; 668} 669 670sub def_an 671{ 672unshift @A_a_user_defined, checkpat(@_,'an'); 673return 1; 674} 675 676sub ud_match 677{ 678my $word = shift; 679for (my $i=0; $i < @_; $i+=2) 680{ 681 if ($word =~ /^(?:$_[$i])$/i) 682 { 683 last unless defined $_[$i+1]; 684 return eval '"'.$_[$i+1].'"'; 685 } 686} 687return undef; 688} 689 690do 691{ 692local $SIG{__WARN__} = sub {0}; 693my $rcfile; 694 695$rcfile = $INC{'Lingua//EN/Inflect.pm'} || ''; 696$rcfile =~ s/Inflect.pm$/.inflectrc/; 697do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n" 698if $rcfile && -r $rcfile && -s $rcfile; 699 700$rcfile = "$ENV{HOME}/.inflectrc" || ''; 701do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n" 702if $rcfile && -r $rcfile && -s $rcfile; 703}; 704 705sub postprocess # FIX PEDANTRY AND CAPITALIZATION :-) 706{ 707my ($orig, $inflected) = @_; 708$inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e; 709return $orig =~ /^I$/ ? $inflected 710 : $orig =~ /^[A-Z]+$/ ? uc $inflected 711 : $orig =~ /^[A-Z]/ ? ucfirst $inflected 712 : $inflected; 713} 714 715sub PL 716# PL($word,$number) 717{ 718my ($str, $count) = @_; 719my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/); 720return $str unless $word; 721my $plural = postprocess $word, _PL_special_adjective($word,$count) 722 || _PL_special_verb($word,$count) 723 || _PL_noun($word,$count); 724return $pre.$plural.$post; 725} 726 727sub PL_N 728# PL_N($word,$number) 729{ 730my ($str, $count) = @_; 731my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/); 732return $str unless $word; 733my $plural = postprocess $word, _PL_noun($word,$count); 734return $pre.$plural.$post; 735} 736 737sub PL_V 738# PL_V($word,$number) 739{ 740my ($str, $count) = @_; 741my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/); 742return $str unless $word; 743my $plural = postprocess $word, _PL_special_verb($word,$count) 744 || _PL_general_verb($word,$count); 745return $pre.$plural.$post; 746} 747 748sub PL_ADJ 749# PL_ADJ($word,$number) 750{ 751my ($str, $count) = @_; 752my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/); 753return $str unless $word; 754my $plural = postprocess $word, _PL_special_adjective($word,$count) 755 || $word; 756return $pre.$plural.$post; 757} 758 759sub PL_eq { _PL_eq(@_, \&PL); } 760sub PL_N_eq { _PL_eq(@_, \&PL_N); } 761sub PL_V_eq { _PL_eq(@_, \&PL_V); } 762sub PL_ADJ_eq { _PL_eq(@_, \&PL_ADJ); } 763 764sub _PL_eq 765{ 766my ( $word1, $word2, $PL ) = @_; 767my %classval = %classical; 768%classical = %all_classical; 769my $result = ""; 770$result = "eq" if !$result && $word1 eq $word2; 771$result = "p:s" if !$result && $word1 eq &$PL($word2); 772$result = "s:p" if !$result && &$PL($word1) eq $word2; 773%classical = (); 774$result = "p:s" if !$result && $word1 eq &$PL($word2); 775$result = "s:p" if !$result && &$PL($word1) eq $word2; 776%classical = %classval; 777 778if ($PL == \&PL || $PL == \&PL_N) 779{ 780 $result = "p:p" 781 if !$result && _PL_check_plurals_N($word1,$word2); 782 $result = "p:p" 783 if !$result && _PL_check_plurals_N($word2,$word1); 784} 785if ($PL == \&PL || $PL == \&PL_ADJ) 786{ 787 $result = "p:p" 788 if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL); 789} 790 791return $result; 792} 793 794sub _PL_reg_plurals 795{ 796 $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/ 797} 798 799sub _PL_check_plurals_N 800{ 801my $pair = "$_[0]|$_[1]"; 802foreach ( values %PL_sb_irregular_s ) { return 1 if $_ eq $pair; } 803foreach ( values %PL_sb_irregular ) { return 1 if $_ eq $pair; } 804 805return 1 if _PL_reg_plurals($pair, $PL_sb_C_a_ata, "as","ata") 806 || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides") 807 || _PL_reg_plurals($pair, $PL_sb_C_a_ae, "s","e") 808 || _PL_reg_plurals($pair, $PL_sb_C_en_ina, "ens","ina") 809 || _PL_reg_plurals($pair, $PL_sb_C_um_a, "ums","a") 810 || _PL_reg_plurals($pair, $PL_sb_C_us_i, "uses","i") 811 || _PL_reg_plurals($pair, $PL_sb_C_on_a, "ons","a") 812 || _PL_reg_plurals($pair, $PL_sb_C_o_i, "os","i") 813 || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices") 814 || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices") 815 || _PL_reg_plurals($pair, $PL_sb_C_i, "s","i") 816 || _PL_reg_plurals($pair, $PL_sb_C_im, "s","im") 817 818 || _PL_reg_plurals($pair, '.*eau', "s","x") 819 || _PL_reg_plurals($pair, '.*ieu', "s","x") 820 || _PL_reg_plurals($pair, '.*tri', "xes","ces") 821 || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges"); 822 823 824return 0; 825} 826 827sub _PL_check_plurals_ADJ 828{ 829my ( $word1a, $word2a ) = @_; 830my ( $word1b, $word2b ) = @_; 831 832$word1a = '' unless $word1a =~ s/'s?$//; 833$word2a = '' unless $word2a =~ s/'s?$//; 834$word1b = '' unless $word1b =~ s/s'$//; 835$word2b = '' unless $word2b =~ s/s'$//; 836 837if ($word1a) 838{ 839 return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a) 840 || _PL_check_plurals_N($word2a, $word1a) ); 841 return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b) 842 || _PL_check_plurals_N($word2b, $word1a) ); 843} 844if ($word1b) 845{ 846 return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a) 847 || _PL_check_plurals_N($word2a, $word1b) ); 848 return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b) 849 || _PL_check_plurals_N($word2b, $word1b) ); 850} 851 852 853return ""; 854} 855 856sub _PL_noun 857{ 858my ( $word, $count ) = @_; 859my $value; # UTILITY VARIABLE 860 861# DEFAULT TO PLURAL 862 863$count = $persistent_count 864 if !defined($count) && defined($persistent_count); 865 866$count = (defined $count and $count=~/^($PL_count_one)$/io 867 or defined $count and $classical{zero} 868 and $count=~/^($PL_count_zero)$/io) 869 ? 1 870 : 2; 871 872return $word if $count==1; 873 874# HANDLE USER-DEFINED NOUNS 875 876return $value if defined($value = ud_match($word, @PL_sb_user_defined)); 877 878 879# HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS 880 881$word eq '' and return $word; 882 883$word =~ /^($PL_sb_uninflected)$/i 884 and return $word; 885 886$classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i 887 and return $word; 888 889 890# HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.) 891 892$word =~ /^(?:$PL_sb_postfix_adj)$/i 893 and $value = $2 894 and return _PL_noun($1,2) 895 . $value; 896 897$word =~ /^(?:$PL_sb_prep_dual_compound)$/i 898 and $value = [$2,$3] 899 and return _PL_noun($1,2) 900 . $value->[0] 901 . _PL_noun($value->[1]); 902 903$word =~ /^(?:$PL_sb_prep_compound)$/i 904 and $value = $2 905 and return _PL_noun($1,2) 906 . $value; 907 908# HANDLE PRONOUNS 909 910$word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i 911 and return $1.$PL_pron_acc{lc($2)}; 912 913$value = $PL_pron_nom{lc($word)} 914 and return $value; 915 916$word =~ /^($PL_pron_acc)$/i 917 and return $PL_pron_acc{lc($1)}; 918 919# HANDLE ISOLATED IRREGULAR PLURALS 920 921$word =~ /(.*)\b($PL_sb_irregular)$/i 922 and return $1 . $PL_sb_irregular{lc $2}; 923$word =~ /($PL_sb_U_man_mans)$/i 924 and return "$1s"; 925$word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people"; 926 927# HANDLE FAMILIES OF IRREGULAR PLURALS 928 929$word =~ /(.*)man$/i and return "$1men"; 930$word =~ /(.*[ml])ouse$/i and return "$1ice"; 931$word =~ /(.*)goose$/i and return "$1geese"; 932$word =~ /(.*)tooth$/i and return "$1teeth"; 933$word =~ /(.*)foot$/i and return "$1feet"; 934 935# HANDLE UNASSIMILATED IMPORTS 936 937$word =~ /(.*)ceps$/i and return $word; 938$word =~ /(.*)zoon$/i and return "$1zoa"; 939$word =~ /(.*[csx])is$/i and return "$1es"; 940$word =~ /($PL_sb_U_ex_ices)ex$/i and return "$1ices"; 941$word =~ /($PL_sb_U_ix_ices)ix$/i and return "$1ices"; 942$word =~ /($PL_sb_U_um_a)um$/i and return "$1a"; 943$word =~ /($PL_sb_U_us_i)us$/i and return "$1i"; 944$word =~ /($PL_sb_U_on_a)on$/i and return "$1a"; 945$word =~ /($PL_sb_U_a_ae)$/i and return "$1e"; 946 947# HANDLE INCOMPLETELY ASSIMILATED IMPORTS 948 949if ($classical{ancient}) 950{ 951 $word =~ /(.*)trix$/i and return "$1trices"; 952 $word =~ /(.*)eau$/i and return "$1eaux"; 953 $word =~ /(.*)ieu$/i and return "$1ieux"; 954 $word =~ /(.{2,}[yia])nx$/i and return "$1nges"; 955 $word =~ /($PL_sb_C_en_ina)en$/i and return "$1ina"; 956 $word =~ /($PL_sb_C_ex_ices)ex$/i and return "$1ices"; 957 $word =~ /($PL_sb_C_ix_ices)ix$/i and return "$1ices"; 958 $word =~ /($PL_sb_C_um_a)um$/i and return "$1a"; 959 $word =~ /($PL_sb_C_us_i)us$/i and return "$1i"; 960 $word =~ /($PL_sb_C_us_us)$/i and return "$1"; 961 $word =~ /($PL_sb_C_a_ae)$/i and return "$1e"; 962 $word =~ /($PL_sb_C_a_ata)a$/i and return "$1ata"; 963 $word =~ /($PL_sb_C_is_ides)is$/i and return "$1ides"; 964 $word =~ /($PL_sb_C_o_i)o$/i and return "$1i"; 965 $word =~ /($PL_sb_C_on_a)on$/i and return "$1a"; 966 $word =~ /$PL_sb_C_im$/i and return "${word}im"; 967 $word =~ /$PL_sb_C_i$/i and return "${word}i"; 968} 969 970 971# HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS 972 973$word =~ /^($PL_sb_singular_s)$/i and return "$1es"; 974$word =~ /^([A-Z].*s)$/ and $classical{names} and return "$1es"; 975$word =~ /(.*)([cs]h|[zx])$/i and return "$1$2es"; 976# $word =~ /(.*)(us)$/i and return "$1$2es"; 977 978# HANDLE ...f -> ...ves 979 980$word =~ /(.*[eao])lf$/i and return "$1lves"; 981$word =~ /(.*[^d])eaf$/i and return "$1eaves"; 982$word =~ /(.*[nlw])ife$/i and return "$1ives"; 983$word =~ /(.*)arf$/i and return "$1arves"; 984 985# HANDLE ...y 986 987$word =~ /(.*[aeiou])y$/i and return "$1ys"; 988$word =~ /([A-Z].*y)$/ and $classical{names} and return "$1s"; 989$word =~ /(.*)y$/i and return "$1ies"; 990 991# HANDLE ...o 992 993$word =~ /$PL_sb_U_o_os$/i and return "${word}s"; 994$word =~ /[aeiou]o$/i and return "${word}s"; 995$word =~ /o$/i and return "${word}es"; 996 997 998# OTHERWISE JUST ADD ...s 999 1000return "${word}s"; 1001} 1002 1003 1004sub _PL_special_verb 1005{ 1006my ( $word, $count ) = @_; 1007$count = $persistent_count 1008 if !defined($count) && defined($persistent_count); 1009$count = (defined $count and $count=~/^($PL_count_one)$/io or 1010 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1 1011 : 2; 1012 1013return undef if $count=~/^($PL_count_one)$/io; 1014 1015my $value; # UTILITY VARIABLE 1016 1017# HANDLE USER-DEFINED VERBS 1018 1019return $value if defined($value = ud_match($word, @PL_v_user_defined)); 1020 1021# HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND) 1022 1023$word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i 1024 and return $PL_v_irregular_pres{lc $1}.$2; 1025 1026# HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES 1027 1028$word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i 1029 and return $word; 1030 1031# HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND) 1032 1033$word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i 1034 and return $PL_v_irregular_pres{lc $1}.$2; 1035 1036$word =~ /^\S+n't\b/i 1037 and return $word; 1038 1039# HANDLE SPECIAL CASES 1040 1041$word =~ /^($PL_v_special_s)$/ and return undef; 1042$word =~ /\s/ and return undef; 1043 1044# HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS) 1045 1046$word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i and return "$1$2"; 1047 1048$word =~ /^(..+)ies$/i and return "$1y"; 1049 1050$word =~ /($PL_v_oes_oe)$/ and return substr($1,0,-1); 1051$word =~ /^(.+)oes$/i and return "$1o"; 1052 1053$word =~ /^(.*[^s])s$/i and return $1; 1054 1055# OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE) 1056 1057return undef; 1058} 1059 1060sub _PL_general_verb 1061{ 1062my ( $word, $count ) = @_; 1063$count = $persistent_count 1064 if !defined($count) && defined($persistent_count); 1065$count = (defined $count and $count=~/^($PL_count_one)$/io or 1066 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1 1067 : 2; 1068 1069return $word if $count=~/^($PL_count_one)$/io; 1070 1071# HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND) 1072 1073$word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i 1074 and return $PL_v_ambiguous_pres{lc $1}.$2; 1075 1076# HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES 1077 1078$word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i 1079 and return $word; 1080 1081# OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED 1082 1083return $word; 1084 1085} 1086 1087sub _PL_special_adjective 1088{ 1089my ( $word, $count ) = @_; 1090$count = $persistent_count 1091 if !defined($count) && defined($persistent_count); 1092$count = (defined $count and $count=~/^($PL_count_one)$/io or 1093 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1 1094 : 2; 1095 1096return $word if $count=~/^($PL_count_one)$/io; 1097 1098 1099# HANDLE USER-DEFINED ADJECTIVES 1100 1101my $value; 1102return $value if defined($value = ud_match($word, @PL_adj_user_defined)); 1103 1104# HANDLE KNOWN CASES 1105 1106$word =~ /^($PL_adj_special)$/i 1107 and return $PL_adj_special{lc $1}; 1108 1109# HANDLE POSSESSIVES 1110 1111$word =~ /^($PL_adj_poss)$/i 1112 and return $PL_adj_poss{lc $1}; 1113 1114$word =~ /^(.*)'s?$/ and do { my $pl = PL_N($1); 1115 return "$pl'" . ($pl =~ m/s$/ ? "" : "s"); 1116 }; 1117 1118# OTHERWISE, NO IDEA 1119 1120return undef; 1121 1122} 1123 1124 1125# 2. INDEFINITE ARTICLES 1126 1127# THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND" 1128# CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY 1129# TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!) 1130 1131my $A_abbrev = q{ 1132(?! FJO | [HLMNS]Y. | RY[EO] | SQU 1133 | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]) 1134[FHLMNRSX][A-Z] 1135}; 1136 1137# THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A 1138# 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE 1139# IMPLIES AN ABBREVIATION. 1140 1141my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'; 1142 1143# EXCEPTIONS TO EXCEPTIONS 1144 1145my $A_explicit_an = enclose join '|', 1146( 1147"euler", 1148"hour(?!i)", "heir", "honest", "hono", 1149); 1150 1151sub A 1152{ 1153my ($str, $count) = @_; 1154my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i ); 1155return $str unless $word; 1156my $result = _indef_article($word,$count); 1157return $pre.$result.$post; 1158} 1159 1160sub AN { goto &A } 1161 1162sub _indef_article 1163{ 1164my ( $word, $count ) = @_; 1165 1166$count = $persistent_count 1167 if !defined($count) && defined($persistent_count); 1168 1169return "$count $word" 1170 if defined $count && $count!~/^($PL_count_one)$/io; 1171 1172# HANDLE USER-DEFINED VARIANTS 1173 1174my $value; 1175return $value if defined($value = ud_match($word, @A_a_user_defined)); 1176 1177# HANDLE SPECIAL CASES 1178 1179$word =~ /^($A_explicit_an)/i and return "an $word"; 1180 1181# HANDLE ABBREVIATIONS 1182 1183$word =~ /^($A_abbrev)/ox and return "an $word"; 1184$word =~ /^[aefhilmnorsx][.-]/i and return "an $word"; 1185$word =~ /^[a-z][.-]/i and return "a $word"; 1186 1187# HANDLE CONSONANTS 1188 1189$word =~ /^[^aeiouy]/i and return "a $word"; 1190 1191# HANDLE SPECIAL VOWEL-FORMS 1192 1193$word =~ /^e[uw]/i and return "a $word"; 1194$word =~ /^onc?e\b/i and return "a $word"; 1195$word =~ /^uni([^nmd]|mo)/i and return "a $word"; 1196$word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word"; 1197 1198# HANDLE SPECIAL CAPITALS 1199 1200$word =~ /^U[NK][AIEO]?/ and return "a $word"; 1201 1202# HANDLE VOWELS 1203 1204$word =~ /^[aeiou]/i and return "an $word"; 1205 1206# HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND) 1207 1208$word =~ /^($A_y_cons)/io and return "an $word"; 1209 1210# OTHERWISE, GUESS "a" 1211 return "a $word"; 1212} 1213 1214# 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)" 1215 1216sub NO 1217{ 1218my ($str, $count) = @_; 1219my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/); 1220 1221$count = $persistent_count 1222 if !defined($count) && defined($persistent_count); 1223$count = 0 unless $count; 1224 1225return "$pre$count " . PL($word,$count) . $post 1226 unless $count =~ /^$PL_count_zero$/; 1227return "${pre}no ". PL($word,0) . $post ; 1228} 1229 1230 1231# PARTICIPLES 1232 1233sub PART_PRES 1234{ 1235 local $_ = PL_V(shift,2); 1236 s/ie$/y/ 1237 or s/ue$/u/ 1238 or s/([auy])e$/$1/ 1239 or s/ski$/ski/ 1240 or s/i$// 1241 or s/([^e])e$/$1/ 1242 or m/er$/ 1243 or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/; 1244 return "${_}ing"; 1245} 1246 1247 1248 1249# NUMERICAL INFLECTIONS 1250 1251my %nth = 1252( 1253 0 => 'th', 1254 1 => 'st', 1255 2 => 'nd', 1256 3 => 'rd', 1257 4 => 'th', 1258 5 => 'th', 1259 6 => 'th', 1260 7 => 'th', 1261 8 => 'th', 1262 9 => 'th', 1263 11 => 'th', 1264 12 => 'th', 1265 13 => 'th', 1266); 1267 1268 1269my %ordinal; 1270@ordinal{qw(ty one two three five eight nine twelve )}= 1271 qw(tieth first second third fifth eighth ninth twelfth); 1272 1273my $ordinal_suff = join '|', keys %ordinal, ""; 1274 1275$ordinal{""} = 'th'; 1276 1277sub ORD($) 1278{ 1279 my $num = shift; 1280 if ($num =~ /\d/) { 1281 return $num . ($nth{$num%100} || $nth{$num%10}); 1282 } 1283 else { 1284 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/; 1285 return $num; 1286 } 1287} 1288 1289 1290my %default_args = 1291( 1292 'group' => 0, 1293 'comma' => ',', 1294 'and' => 'and', 1295 'zero' => 'zero', 1296 'one' => 'one', 1297 'decimal' => 'point', 1298); 1299 1300my @unit = ('',qw(one two three four five six seven eight nine)); 1301my @teen = qw(ten eleven twelve thirteen fourteen 1302 fifteen sixteen seventeen eighteen nineteen); 1303my @ten = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety)); 1304my @mill = map { (my $val=$_) =~ s/_/illion/; " $val" } 1305 ('',qw(thousand m_ b_ tr_ quadr_ quint_ sext_ sept_ oct_ non_ dec_)); 1306 1307 1308sub mill { my $ind = $_[0]||0; 1309 die "Number out of range\n" if $ind > $#mill; 1310 return $ind<@mill ? $mill[$ind] : ' ???illion'; } 1311 1312sub unit { return $unit[$_[0]]. mill($_[1]); } 1313 1314sub ten 1315{ 1316 return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2]) 1317 if $_[0] ne '1'; 1318 return $teen[$_[1]]. $mill[$_[2]||0]; 1319} 1320 1321sub hund 1322{ 1323 return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '') 1324 . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0]; 1325 return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2]; 1326 return ''; 1327} 1328 1329 1330sub enword 1331{ 1332 my ($num,$group,$zero,$one,$comma,$and) = @_; 1333 1334 if ($group==1) 1335 { 1336 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg; 1337 } 1338 elsif ($group==2) 1339 { 1340 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg; 1341 $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e; 1342 } 1343 elsif ($group==3) 1344 { 1345 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg; 1346 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e; 1347 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e; 1348 } 1349 elsif ($num+0==0) { 1350 $num = $zero; 1351 } 1352 elsif ($num+0==1) { 1353 $num = $one; 1354 } 1355 else { 1356 $num =~ s/\A\s*0+//; 1357 my $mill = 0; 1358 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e; 1359 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e; 1360 $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e; 1361 } 1362 return $num; 1363} 1364 1365sub NUMWORDS 1366{ 1367 my $num = shift; 1368 my %arg = ( %default_args, @_ ); 1369 my $group = $arg{group}; 1370 1371 die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/; 1372 my $sign = ($num =~ /\A\s*\+/) ? "plus" 1373 : ($num =~ /\A\s*\-/) ? "minus" 1374 : ''; 1375 1376 my ($zero, $one) = @arg{'zero','one'}; 1377 my $comma = $arg{comma}; 1378 my $and = $arg{'and'}; 1379 1380 my $ord = $num =~ s/(st|nd|rd|th)\Z//; 1381 my @chunks = ($arg{decimal}) 1382 ? $group ? split(/\./, $num) : split(/\./, $num, 2) 1383 : ($num); 1384 1385 my $first = 1; 1386 1387 if ($chunks[0] eq '') { $first=0; shift @chunks; } 1388 1389 foreach ( @chunks ) 1390 { 1391 s/\D//g; 1392 $_ = '0' unless $_; 1393 1394 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) } 1395 else { $_ = enword($_,$group,$zero,$one,$comma,$and) } 1396 1397 s/, \Z//; 1398 s/\s+,/,/g; 1399 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first; 1400 s/\s+/ /g; 1401 s/(\A\s|\s\Z)//g; 1402 $first = '' if $first; 1403 } 1404 1405 my @numchunks = (); 1406 if ($first =~ /0/) 1407 { 1408 unshift @chunks, ''; 1409 } 1410 else 1411 { 1412 @numchunks = split /\Q$comma /, $chunks[0]; 1413 } 1414 1415 $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/ 1416 if $ord and @numchunks; 1417 1418 foreach (@chunks[1..$#chunks]) 1419 { 1420 push @numchunks, $arg{decimal}; 1421 push @numchunks, split /\Q$comma /; 1422 } 1423 1424 if (wantarray) 1425 { 1426 unshift @numchunks, $sign if $sign; 1427 return @numchunks 1428 } 1429 elsif ($group) 1430 { 1431 return ($sign?"$sign ":'') . join ", ", @numchunks; 1432 } 1433 else 1434 { 1435 $num = ($sign?"$sign ":'') . shift @numchunks; 1436 $first = ($num !~ /$arg{decimal}\Z/); 1437 foreach ( @numchunks ) 1438 { 1439 if (/\A$arg{decimal}\Z/) 1440 { 1441 $num .= " $_"; 1442 $first = 0; 1443 } 1444 elsif ($first) 1445 { 1446 $num .= "$comma $_"; 1447 } 1448 else 1449 { 1450 $num .= " $_"; 1451 } 1452 } 1453 return $num; 1454 } 1455} 1456 14571; 1458 1459__END__ 1460 1461=head1 NAME 1462 1463Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an". 1464 1465=head1 VERSION 1466 1467This document describes version 1.86 of Lingua::EN::Inflect, 1468released October 20, 2000. 1469 1470=head1 SYNOPSIS 1471 1472 use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM 1473 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq 1474 A AN 1475 PART_PRES 1476 ORD NUMWORDS 1477 inflect classical 1478 def_noun def_verb def_adj def_a def_an ); 1479 1480 1481 # UNCONDITIONALLY FORM THE PLURAL 1482 1483 print "The plural of ", $word, " is ", PL($word), "\n"; 1484 1485 1486 # CONDITIONALLY FORM THE PLURAL 1487 1488 print "I saw $cat_count ", PL("cat",$cat_count), "\n"; 1489 1490 1491 # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH 1492 1493 print PL_N("I",$N1), PL_V("saw",$N1), 1494 PL_ADJ("my",$N2), PL_N("saw",$N2), "\n"; 1495 1496 1497 # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION: 1498 1499 print "There ", PL_V("was",$errors), NO(" error",$errors), "\n"; 1500 1501 1502 # USE DEFAULT COUNTS: 1503 1504 print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw"); 1505 print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n"; 1506 1507 1508 # COMPARE TWO WORDS "NUMBER-INSENSITIVELY": 1509 1510 print "same\n" if PL_eq($word1, $word2); 1511 print "same noun\n" if PL_eq_N($word1, $word2); 1512 print "same verb\n" if PL_eq_V($word1, $word2); 1513 print "same adj.\n" if PL_eq_ADJ($word1, $word2); 1514 1515 1516 # ADD CORRECT "a" OR "an" FOR A GIVEN WORD: 1517 1518 print "Did you want ", A($thing), " or ", AN($idea), "\n"; 1519 1520 1521 # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.) 1522 1523 print "It was", ORD($position), " from the left\n"; 1524 1525 # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.) 1526 # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING... 1527 1528 $words = NUMWORDS(1234); # "one thousand, two hundred and thirty-four" 1529 $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth" 1530 1531 1532 # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"... 1533 1534 @words = NUMWORDS(1234); # ("one thousand","two hundred and thirty-four") 1535 1536 1537 # OPTIONAL PARAMETERS CHANGE TRANSLATION: 1538 1539 $words = NUMWORDS(12345, group=>1); 1540 # "one, two, three, four, five" 1541 1542 $words = NUMWORDS(12345, group=>2); 1543 # "twelve, thirty-four, five" 1544 1545 $words = NUMWORDS(12345, group=>3); 1546 # "one twenty-three, forty-five" 1547 1548 $words = NUMWORDS(1234, 'and'=>''); 1549 # "one thousand, two hundred thirty-four" 1550 1551 $words = NUMWORDS(1234, 'and'=>', plus'); 1552 # "one thousand, two hundred, plus thirty-four" 1553 1554 $words = NUMWORDS(555_1202, group=>1, zero=>'oh'); 1555 # "five, five, five, one, two, oh, two" 1556 1557 $words = NUMWORDS(555_1202, group=>1, one=>'unity'); 1558 # "five, five, five, unity, two, oh, two" 1559 1560 $words = NUMWORDS(123.456, group=>1, decimal=>'mark'); 1561 # "one two three mark four five six" 1562 1563 1564 # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim") 1565 1566 classical; # USE ALL CLASSICAL PLURALS 1567 1568 classical 1; # USE ALL CLASSICAL PLURALS 1569 classical 0; # USE ALL MODERN PLURALS (DEFAULT) 1570 1571 classical 'zero'; # "no error" INSTEAD OF "no errors" 1572 classical zero=>1; # "no error" INSTEAD OF "no errors" 1573 classical zero=>0; # "no errors" INSTEAD OF "no error" 1574 1575 classical 'herd'; # "2 buffalo" INSTEAD OF "2 buffalos" 1576 classical herd=>1; # "2 buffalo" INSTEAD OF "2 buffalos" 1577 classical herd=>0; # "2 buffalos" INSTEAD OF "2 buffalo" 1578 1579 classical 'persons'; # "2 chairpersons" INSTEAD OF "2 chairpeople" 1580 classical persons=>1; # "2 chairpersons" INSTEAD OF "2 chairpeople" 1581 classical persons=>0; # "2 chairpeople" INSTEAD OF "2 chairpersons" 1582 1583 classical 'ancient'; # "2 formulae" INSTEAD OF "2 formulas" 1584 classical ancient=>1; # "2 formulae" INSTEAD OF "2 formulas" 1585 classical ancient=>0; # "2 formulas" INSTEAD OF "2 formulae" 1586 1587 1588 1589 # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()" 1590 # "NUM()" AND "ORD()" WITHIN STRINGS: 1591 1592 print inflect("The plural of $word is PL($word)\n"); 1593 print inflect("I saw $cat_count PL("cat",$cat_count)\n"); 1594 print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)"); 1595 print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)"); 1596 print inflect("I saw NUM($cat_count) PL("cat")\nNUM()"); 1597 print inflect("There PL_V(was,$errors) NO(error,$errors)\n"); 1598 print inflect("There NUM($errors,) PL_V(was) NO(error)\n"; 1599 print inflect("Did you want A($thing) or AN($idea)\n"); 1600 print inflect("It was ORD($position) from the left\n"); 1601 1602 1603 # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES): 1604 1605 def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL 1606 1607 def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL 1608 "will" => "will", # 2ND PERSON SINGULAR => PLURAL 1609 "will" => "will", # 3RD PERSON SINGULAR => PLURAL 1610 1611 def_adj "hir" => "their", # SINGULAR => PLURAL 1612 1613 def_a "h" # "AY HALWAYS SEZ 'HAITCH'!" 1614 1615 def_an "horrendous.*" # "AN HORRENDOUS AFFECTATION" 1616 1617 1618=head1 DESCRIPTION 1619 1620The exportable subroutines of Lingua::EN::Inflect provide plural 1621inflections, "a"/"an" selection for English words, and manipulation 1622of numbers as words 1623 1624Plural forms of all nouns, most verbs, and some adjectives are 1625provided. Where appropriate, "classical" variants (for example: "brother" -> 1626"brethren", "dogma" -> "dogmata", etc.) are also provided. 1627 1628Pronunciation-based "a"/"an" selection is provided for all English 1629words, and most initialisms. 1630 1631It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd, 3rd) 1632and to english words ("one", "two", "three). 1633 1634In generating these inflections, Lingua::EN::Inflect follows the Oxford 1635English Dictionary and the guidelines in Fowler's Modern English 1636Usage, preferring the former where the two disagree. 1637 1638The module is built around standard British spelling, but is designed 1639to cope with common American variants as well. Slang, jargon, and 1640other English dialects are I<not> explicitly catered for. 1641 1642Where two or more inflected forms exist for a single word (typically a 1643"classical" form and a "modern" form), Lingua::EN::Inflect prefers the 1644more common form (typically the "modern" one), unless "classical" 1645processing has been specified 1646(see L<"MODERN VS CLASSICAL INFLECTIONS">). 1647 1648=head1 FORMING PLURALS 1649 1650=head2 Inflecting Plurals 1651 1652All of the C<PL_...> plural inflection subroutines take the word to be 1653inflected as their first argument and return the corresponding inflection. 1654Note that all such subroutines expect the I<singular> form of the word. The 1655results of passing a plural form are undefined (and unlikely to be correct). 1656 1657The C<PL_...> subroutines also take an optional second argument, 1658which indicates the grammatical "number" of the word (or of another word 1659with which the word being inflected must agree). If the "number" argument is 1660supplied and is not C<1> (or C<"one"> or C<"a">, or some other adjective that 1661implies the singular), the plural form of the word is returned. If the 1662"number" argument I<does> indicate singularity, the (uninflected) word 1663itself is returned. If the number argument is omitted, the plural form 1664is returned unconditionally. 1665 1666The various subroutines are: 1667 1668=over 8 1669 1670=item C<PL_N($;$)> 1671 1672The exportable subroutine C<PL_N()> takes a I<singular> English noun or 1673pronoun and returns its plural. Pronouns in the nominative ("I" -> 1674"we") and accusative ("me" -> "us") cases are handled, as are 1675possessive pronouns ("mine" -> "ours"). 1676 1677 1678=item C<PL_V($;$)> 1679 1680The exportable subroutine C<PL_V()> takes the I<singular> form of a 1681conjugated verb (that is, one which is already in the correct "person" 1682and "mood") and returns the corresponding plural conjugation. 1683 1684 1685=item C<PL_ADJ($;$)> 1686 1687The exportable subroutine C<PL_ADJ()> takes the I<singular> form of 1688certain types of adjectives and returns the corresponding plural form. 1689Adjectives that are correctly handled include: "numerical" adjectives 1690("a" -> "some"), demonstrative adjectives ("this" -> "these", "that" -> 1691"those"), and possessives ("my" -> "our", "cat's" -> "cats'", "child's" 1692-> "childrens'", etc.) 1693 1694 1695=item C<PL($;$)> 1696 1697The exportable subroutine C<PL()> takes a I<singular> English noun, 1698pronoun, verb, or adjective and returns its plural form. Where a word 1699has more than one inflection depending on its part of speech (for 1700example, the noun "thought" inflects to "thoughts", the verb "thought" 1701to "thought"), the (singular) noun sense is preferred to the (singular) 1702verb sense. 1703 1704Hence C<PL("knife")> will return "knives" ("knife" having been treated 1705as a singular noun), whereas C<PL("knifes")> will return "knife" 1706("knifes" having been treated as a 3rd person singular verb). 1707 1708The inherent ambiguity of such cases suggests that, 1709where the part of speech is known, C<PL_N>, C<PL_V>, and 1710C<PL_ADJ> should be used in preference to C<PL>. 1711 1712=back 1713 1714Note that all these subroutines ignore any whitespace surrounding the 1715word being inflected, but preserve that whitespace when the result is 1716returned. For example, C<S<PL(" cat ")>> returns S<" cats ">. 1717 1718 1719=head2 Numbered plurals 1720 1721The C<PL_...> subroutines return only the inflected word, not the count that 1722was used to inflect it. Thus, in order to produce "I saw 3 ducks", it 1723is necessary to use: 1724 1725 print "I saw $N ", PL_N($animal,$N), "\n"; 1726 1727Since the usual purpose of producing a plural is to make it agree with 1728a preceding count, Lingua::EN::Inflect provides an exportable subroutine 1729(C<NO($;$)>) which, given a word and a(n optional) count, returns the 1730count followed by the correctly inflected word. Hence the previous 1731example can be rewritten: 1732 1733 print "I saw ", NO($animal,$N), "\n"; 1734 1735In addition, if the count is zero (or some other term which implies 1736zero, such as C<"zero">, C<"nil">, etc.) the count is replaced by the 1737word "no". Hence, if C<$N> had the value zero, the previous example 1738would print the somewhat more elegant: 1739 1740 I saw no animals 1741 1742rather than: 1743 1744 I saw 0 animals 1745 1746Note that the name of the subroutine is a pun: the subroutine 1747returns either a number (a I<No.>) or a C<"no">, in front of the 1748inflected word. 1749 1750 1751=head2 Reducing the number of counts required 1752 1753In some contexts, the need to supply an explicit count to the various 1754C<PL_...> subroutines makes for tiresome repetition. For example: 1755 1756 print PL_ADJ("This",$errors), PL_N(" error",$errors), 1757 PL_V(" was",$errors), " fatal.\n"; 1758 1759Lingua::EN::Inflect therefore provides an exportable subroutine 1760(C<NUM($;$)>) which may be used to set a persistent "default number" 1761value. If such a value is set, it is subsequently used whenever an 1762optional second "number" argument is omitted. The default value thus set 1763can subsequently be removed by calling C<NUM()> with no arguments. 1764Hence we could rewrite the previous example: 1765 1766 NUM($errors); 1767 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"; 1768 NUM(); 1769 1770Normally, C<NUM()> returns its first argument, so that it may also 1771be "inlined" in contexts like: 1772 1773 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n" 1774 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n" 1775 if $severity > 1; 1776 1777However, in certain contexts (see L<"INTERPOLATING INFLECTIONS IN STRINGS">) 1778it is preferable that C<NUM()> return an empty string. Hence C<NUM()> 1779provides an optional second argument. If that argument is supplied (that is, if 1780it is defined) and evaluates to false, C<NUM> returns an empty string 1781instead of its first argument. For example: 1782 1783 print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n"; 1784 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n" 1785 if $severity > 1; 1786 1787 1788 1789=head2 Number-insensitive equality 1790 1791Lingua::EN::Inflect also provides a solution to the problem 1792of comparing words of differing plurality through the exportable subroutines 1793C<PL_eq($$)>, C<PL_N_eq($$)>, C<PL_V_eq($$)>, and C<PL_ADJ_eq($$)>. 1794Each of these subroutines takes two strings, and compares them 1795using the corresponding plural-inflection subroutine (C<PL()>, C<PL_N()>, 1796C<PL_V()>, and C<PL_ADJ()> respectively). 1797 1798The comparison returns true if: 1799 1800=over 8 1801 1802=item * 1803 1804the strings are C<eq>-equal, or 1805 1806=item * 1807 1808one string is C<eq>-equal to a plural form of the other, or 1809 1810=item * 1811 1812the strings are two different plural forms of the one word. 1813 1814=back 1815 1816Hence all of the following return true: 1817 1818 PL_eq("index","index") # RETURNS "eq" 1819 PL_eq("index","indexes") # RETURNS "s:p" 1820 PL_eq("index","indices") # RETURNS "s:p" 1821 PL_eq("indexes","index") # RETURNS "p:s" 1822 PL_eq("indices","index") # RETURNS "p:s" 1823 PL_eq("indices","indexes") # RETURNS "p:p" 1824 PL_eq("indexes","indices") # RETURNS "p:p" 1825 PL_eq("indices","indices") # RETURNS "eq" 1826 1827As indicated by the comments in the previous example, the actual value 1828returned by the various C<PL_eq_...> subroutines encodes which of the 1829three equality rules succeeded: "eq" is returned if the strings were 1830identical, "s:p" if the strings were singular and plural respectively, 1831"p:s" for plural and singular, and "p:p" for two distinct plurals. 1832Inequality is indicated by returning an empty string. 1833 1834It should be noted that two distinct singular words which happen to take 1835the same plural form are I<not> considered equal, nor are cases where 1836one (singular) word's plural is the other (plural) word's singular. 1837Hence all of the following return false: 1838 1839 PL_eq("base","basis") # ALTHOUGH BOTH -> "bases" 1840 PL_eq("syrinx","syringe") # ALTHOUGH BOTH -> "syringes" 1841 PL_eq("she","he") # ALTHOUGH BOTH -> "they" 1842 1843 PL_eq("opus","operas") # ALTHOUGH "opus" -> "opera" -> "operas" 1844 PL_eq("taxi","taxes") # ALTHOUGH "taxi" -> "taxis" -> "taxes" 1845 1846Note too that, although the comparison is "number-insensitive" it is I<not> 1847case-insensitive (that is, C<PL("time","Times")> returns false. To obtain 1848both number and case insensitivity, prefix both arguments with C<lc> 1849(that is, C<PL(lc "time", lc "Times")> returns true). 1850 1851 1852=head1 OTHER VERB FORMS 1853 1854=head2 Present participles 1855 1856C<Lingua::EN::Inflect> also provides the C<PART_PRES> subroutine, 1857which can take a 3rd person singular verb and 1858correctly inflect it to its present participle: 1859 1860 PART_PRES("runs") # "running" 1861 PART_PRES("loves") # "loving" 1862 PART_PRES("eats") # "eating" 1863 PART_PRES("bats") # "batting" 1864 PART_PRES("spies") # "spying" 1865 1866 1867=head1 PROVIDING INDEFINITE ARTICLES 1868 1869=head2 Selecting indefinite articles 1870 1871Lingua::EN::Inflect provides two exportable subroutines (C<A($;$)> and 1872C<AN($;$)>) which will correctly prepend the appropriate indefinite 1873article to a word, depending on its pronunciation. For example: 1874 1875 A("cat") # -> "a cat" 1876 AN("cat") # -> "a cat" 1877 A("euphemism") # -> "a euphemism" 1878 A("Euler number") # -> "an Euler number" 1879 A("hour") # -> "an hour" 1880 A("houri") # -> "a houri" 1881 1882The two subroutines are I<identical> in function and may be used 1883interchangeably. The only reason that two versions are provided is to 1884enhance the readability of code such as: 1885 1886 print "That is ", AN($errortype), " error\n; 1887 print "That is ", A($fataltype), " fatal error\n; 1888 1889Note that in both cases the actual article provided depends I<only> on 1890the pronunciation of the first argument, I<not> on the name of the 1891subroutine. 1892 1893C<A()> and C<AN()> will ignore any indefinite article that already 1894exists at the start of the string. Thus: 1895 1896 @half_arked = ( 1897 "a elephant", 1898 "a giraffe", 1899 "an ewe", 1900 "a orangutan", 1901 ); 1902 1903 print A($_), "\n" for @half_arked; 1904 1905 # prints: 1906 # an elephant 1907 # a giraffe 1908 # a ewe 1909 # an orangutan 1910 1911 1912C<A()> and C<AN()> both take an optional second argument. As with the 1913C<PL_...> subroutines, this second argument is a "number" specifier. If 1914its value is C<1> (or some other value implying singularity), C<A()> and 1915C<AN()> insert "a" or "an" as appropriate. If the number specifier 1916implies plurality, (C<A()> and C<AN()> insert the actual second argument instead. 1917For example: 1918 1919 A("cat",1) # -> "a cat" 1920 A("cat",2) # -> "2 cat" 1921 A("cat","one") # -> "one cat" 1922 A("cat","no") # -> "no cat" 1923 1924Note that, as implied by the previous examples, C<A()> and 1925C<AN()> both assume that their job is merely to provide the correct 1926qualifier for a word (that is: "a", "an", or the specified count). 1927In other words, they assume that the word they are given has 1928already been correctly inflected for plurality. Hence, if C<$N> 1929has the value 2, then: 1930 1931 print A("cat",$N); 1932 1933prints "2 cat", instead of "2 cats". The correct approach is to use: 1934 1935 print A(PL("cat",$N),$N); 1936 1937or, better still: 1938 1939 print NO("cat",$N); 1940 1941Note too that, like the various C<PL_...> subroutines, whenever C<A()> 1942and C<AN()> are called with only one argument they are subject to the 1943effects of any preceding call to C<NUM()>. Hence, another possible 1944solution is: 1945 1946 NUM($N); 1947 print A(PL("cat")); 1948 1949 1950=head2 Indefinite articles and initialisms 1951 1952"Initialisms" (sometimes inaccurately called "acronyms") are terms which 1953have been formed from the initial letters of words in a phrase (for 1954example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.) 1955 1956Such terms present a particular challenge when selecting between "a" 1957and "an", since they are sometimes pronounced as if they were a single 1958word ("nay-tow", "sku-ba") and sometimes as a series of letter names 1959("en-eff-ell", "ess-oh-ess"). 1960 1961C<A()> and C<AN()> cope with this dichotomy using a series of inbuilt 1962rules, which may be summarized as: 1963 1964=over 8 1965 1966=item 1. 1967 1968If the word starts with a single letter, followed by a period or dash 1969(for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray", "T-square"), then 1970choose the appropriate article for the I<sound> of the first letter 1971("an R.I.P.", "a C.O.D.", "an e-mail", "an X-ray", "a T-square"). 1972 1973=item 2. 1974 1975If the first two letters of the word are capitals, 1976consonants, and do not appear at the start of any known English word, 1977(for example, "LCD", "XML", "YWCA"), then once again choose "a" or 1978"an" depending on the I<sound> of the first letter ("an LCD", "an 1979XML", "a YWCA"). 1980 1981=item 3. 1982 1983Otherwise, assume the string is a capitalized word or a 1984pronounceable initialism (for example, "LED", "OPEC", "FAQ", "UNESCO"), and 1985therefore takes "a" or "an" according to the (apparent) pronunciation of 1986the entire word ("a LED", "an OPEC", "a FAQ", "a UNESCO"). 1987 1988=back 1989 1990Note that rules 1 and 3 together imply that the presence or absence of 1991punctuation may change the selection of indefinite article for a 1992particular initialism (for example, "a FAQ" but "an F.A.Q."). 1993 1994 1995=head2 Indefinite articles and "soft H's" 1996 1997Words beginning in the letter 'H' present another type of difficulty 1998when selecting a suitable indefinite article. In a few such words 1999(for example, "hour", "honour", "heir") the 'H' is not voiced at 2000all, and so such words inflect with "an". The remaining cases 2001("voiced H's") may be divided into two categories: 2002"hard H's" (such as "hangman", "holograph", "hat", etc.) and 2003"soft H's" (such as "hysterical", "horrendous", "holy", etc.) 2004 2005Hard H's always take "a" as their indefinite article, and soft 2006H's normally do so as well. But I<some> English speakers prefer 2007"an" for soft H's (although the practice is now generally considered an 2008affectation, rather than a legitimate grammatical alternative). 2009 2010At present, the C<A()> and C<AN()> subroutines ignore soft H's and use 2011"a" for any voiced 'H'. The author would, however, welcome feedback on 2012this decision (envisaging a possible future "soft H" mode). 2013 2014 2015=head1 INFLECTING ORDINALS 2016 2017Occasionally it is useful to present an integer value as an ordinal 2018rather than as a numeral. For example: 2019 2020 Enter password (1st attempt): ******** 2021 Enter password (2nd attempt): ********* 2022 Enter password (3rd attempt): ********* 2023 No 4th attempt. Access denied. 2024 2025To this end, Lingua::EN::Inflect provides the C<ORD()> subroutine. 2026<ORD()> takes a single argument and forms its ordinal equivalent. 2027If the argument isn't a numerical integer, it just adds "-th". 2028 2029 2030=head1 CONVERTING NUMBERS TO WORDS 2031 2032The exportable subroutine C<NUMWORDS> takes a number (cardinal or ordinal) 2033and returns an English representation of that number. In a scalar context 2034a string is returned. Hence: 2035 2036 use Lingua::EN::Inflect qw( NUMWORDS ); 2037 2038 $words = NUMWORDS(1234567); 2039 2040puts the string: 2041 2042 "one million, two hundred and thirty-four thousand, five hundred and sixty-seven" 2043 2044into $words. 2045 2046In a list context each comma-separated chunk is returned as a separate element. 2047Hence: 2048 2049 @words = NUMWORDS(1234567); 2050 2051puts the list: 2052 2053 ("one million", 2054 "two hundred and thirty-four thousand", 2055 "five hundred and sixty-seven") 2056 2057into @words. 2058 2059Non-digits (apart from an optional leading plus or minus sign, 2060any decimal points, and ordinal suffixes -- see below) are silently 2061ignored, so the following all produce identical results: 2062 2063 NUMWORDS(5551202); 2064 NUMWORDS(5_551_202); 2065 NUMWORDS("5,551,202"); 2066 NUMWORDS("555-1202"); 2067 2068That last case is a little awkward since it's almost certainly a phone number, 2069and "five million, five hundred and fifty-one thousand, two hundred and two" 2070probably isn't what's wanted. 2071 2072To overcome this, C<NUMWORDS()> takes an optional named argument, 'group', 2073which changes how numbers are translated. The argument must be a 2074positive integer less than four, which indicated how the digits of the 2075number are to be grouped. If the argument is C<1>, then each digit is 2076translated separately. If the argument is C<2>, pairs of digits 2077(starting from the I<left>) are grouped together. If the argument is 2078C<3>, triples of numbers (again, from the I<left>) are grouped. Hence: 2079 2080 NUMWORDS("555-1202", group=>1) 2081 2082returns C<"five, five, five, one, two, zero, two">, whilst: 2083 2084 NUMWORDS("555-1202", group=>2) 2085 2086returns C<"fifty-five, fifty-one, twenty, two">, and: 2087 2088 NUMWORDS("555-1202", group=>3) 2089 2090returns C<"five fifty-five, one twenty, two">. 2091 2092Phone numbers are often written in words as 2093C<"five..five..five..one..two..zero..two">, which is also easy to 2094achieve: 2095 2096 join '..', NUMWORDS("555-1202", group=>1) 2097 2098C<NUMWORDS> also handles decimal fractions. Hence: 2099 2100 NUMWORDS("1.2345") 2101 2102returns C<"one point two three four five"> in a scalar context 2103and C<("one","point","two","three","four","five")>) in an array context. 2104Exponent form (C<"1.234e56">) is not yet handled. 2105 2106Multiple decimal points are only translated in one of the "grouping" modes. 2107Hence: 2108 2109 NUMWORDS(101.202.303) 2110 2111returns C<"one hundred and one point two zero two three zero three">, 2112whereas: 2113 2114 NUMWORDS(101.202.303, group=>1) 2115 2116returns C<"one zero one point two zero two point three zero three">. 2117 2118The digit C<'0'> is unusual in that in may be translated to English as "zero", 2119"oh", or "nought". To cater for this diversity, C<NUMWORDS> may be passed 2120a named argument, 'zero', which may be set to 2121the desired translation of C<'0'>. For example: 2122 2123 print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh') 2124 2125prints C<"five..five..five..one..two..oh..two">. 2126By default, zero is rendered as "zero". 2127 2128Likewise, the digit C<'1'> may be rendered as "one" or "a/an" (or very 2129occasionally other variants), depending on the context. So there is a 2130C<'one'> argument as well: 2131 2132 print NUMWORDS($_, one=>'a solitary', zero=>'no more'), 2133 PL(" bottle of beer on the wall\n", $_) 2134 for (3,2,1,0); 2135 2136 # prints: 2137 # three bottles of beer on the wall 2138 # two bottles of beer on the wall 2139 # a solitary bottle of beer on the wall 2140 # no more bottles of beer on the wall 2141 2142Care is needed if the word "a/an" is to be used as a C<'one'> value. 2143Unless the next word is known in advance, it's almost always necessary 2144to use the C<A> function as well: 2145 2146 print A( NUMWORDS(1, one=>'a') . " $_\n") 2147 for qw(cat aardvark ewe hour); 2148 2149 # prints: 2150 # a cat 2151 # an aardvark 2152 # a ewe 2153 # an hour 2154 2155Another major regional variation in number translation is the use of 2156"and" in certain contexts. The named argument 'and' 2157allows the programmer to specify how "and" should be handled. Hence: 2158 2159 print scalar NUMWORDS("765", 'and'=>'') 2160 2161prints "seven hundred sixty-five", instead of "seven hundred and sixty-five". 2162By default, the "and" is included. 2163 2164The translation of the decimal point is also subject to variation 2165(with "point", "dot", and "decimal" being the favorites). 2166The named argument 'decimal' allows the 2167programmer to how the decimal point should be rendered. Hence: 2168 2169 print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot') 2170 2171prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one zero one" 2172By default, the decimal point is rendered as "point". 2173 2174C<NUMWORDS> also handles the ordinal forms of numbers. So: 2175 2176 print scalar NUMWORDS('1st'); 2177 print scalar NUMWORDS('3rd'); 2178 print scalar NUMWORDS('202nd'); 2179 print scalar NUMWORDS('1000000th'); 2180 2181print: 2182 2183 first 2184 third 2185 two hundred and twenty-second 2186 one millionth 2187 2188Two common idioms in this regard are: 2189 2190 print scalar NUMWORDS(ORD($number)); 2191 2192and: 2193 2194 print scalar ORD(NUMWORDS($number)); 2195 2196These are identical in effect, except when $number contains a decimal: 2197 2198 $number = 99.09; 2199 print scalar NUMWORDS(ORD($number)); # ninety-ninth point zero nine 2200 print scalar ORD(NUMWORDS($number)); # ninety-nine point zero ninth 2201 2202Use whichever you feel is most appropriate. 2203 2204 2205=head1 INTERPOLATING INFLECTIONS IN STRINGS 2206 2207By far the commonest use of the inflection subroutines is to 2208produce message strings for various purposes. For example: 2209 2210 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"; 2211 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n" 2212 if $severity > 1; 2213 2214Unfortunately the need to separate each subroutine call detracts 2215significantly from the readability of the resulting code. To ameliorate 2216this problem, Lingua::EN::Inflect provides an exportable string-interpolating 2217subroutine (C<inflect($)>), which recognizes calls to the various inflection 2218subroutines within a string and interpolates them appropriately. 2219 2220Using C<inflect> the previous example could be rewritten: 2221 2222 print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n"; 2223 print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n" 2224 if $severity > 1; 2225 2226Note that C<inflect> also correctly handles calls to the C<NUM()> subroutine 2227(whether interpolated or antecedent). The C<inflect()> subroutine has 2228a related extra feature, in that it I<automatically> cancels any "default 2229number" value before it returns its interpolated string. This means that 2230calls to C<NUM()> which are embedded in an C<inflect()>-interpolated 2231string do not "escape" and interfere with subsequent inflections. 2232 2233 2234=head1 MODERN VS CLASSICAL INFLECTIONS 2235 2236Certain words, mainly of Latin or Ancient Greek origin, can form 2237plurals either using the standard English "-s" suffix, or with 2238their original Latin or Greek inflections. For example: 2239 2240 PL("stigma") # -> "stigmas" or "stigmata" 2241 PL("torus") # -> "toruses" or "tori" 2242 PL("index") # -> "indexes" or "indices" 2243 PL("millennium") # -> "millenniums" or "millennia" 2244 PL("ganglion") # -> "ganglions" or "ganglia" 2245 PL("octopus") # -> "octopuses" or "octopodes" 2246 2247 2248Lingua::EN::Inflect caters to such words by providing an 2249"alternate state" of inflection known as "classical mode". 2250By default, words are inflected using their contemporary English 2251plurals, but if classical mode is invoked, the more traditional 2252plural forms are returned instead. 2253 2254The exportable subroutine C<classical()> controls this feature. 2255If C<classical()> is called with no arguments, it unconditionally 2256invokes classical mode. If it is called with a single argument, it 2257turns all classical inflects on or off (depending on whether the argument is 2258true or false). If called with two or more arguments, those arguments 2259specify which aspects of classical behaviour are to be used. 2260 2261Thus: 2262 2263 classical; # SWITCH ON CLASSICAL MODE 2264 print PL("formula"); # -> "formulae" 2265 2266 classical 0; # SWITCH OFF CLASSICAL MODE 2267 print PL("formula"); # -> "formulas" 2268 2269 classical $cmode; # CLASSICAL MODE IFF $cmode 2270 print PL("formula"); # -> "formulae" (IF $cmode) 2271 # -> "formulas" (OTHERWISE) 2272 2273 classical herd=>1; # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS 2274 print PL("wilderbeest"); # -> "wilderbeest" 2275 2276 classical names=>1; # SWITCH ON CLASSICAL MODE FOR NAMES 2277 print PL("sally"); # -> "sallies" 2278 print PL("Sally"); # -> "Sallys" 2279 2280Note however that C<classical()> has no effect on the inflection of words which 2281are now fully assimilated. Hence: 2282 2283 PL("forum") # ALWAYS -> "forums" 2284 PL("criterion") # ALWAYS -> "criteria" 2285 2286LEI assumes that a capitalized word is a person's name. So it forms the 2287plural according to the rules for names (which is that you don't 2288inflect, you just add -s or -es). You can choose to turn that behaviour 2289off (it's on by the default, even when the module isn't in classical 2290mode) by calling C< classical(names=>0) >; 2291 2292=head1 USER-DEFINED INFLECTIONS 2293 2294=head2 Adding plurals at run-time 2295 2296Lingua::EN::Inflect provides five exportable subroutines which allow 2297the programmer to override the module's behaviour for specific cases: 2298 2299=over 8 2300 2301=item C<def_noun($$)> 2302 2303The C<def_noun> subroutine takes a pair of string arguments: the singular and 2304plural forms of the noun being specified. The singular form 2305specifies a pattern to be interpolated (as C<m/^(?:$first_arg)$/i>). 2306Any noun matching this pattern is then replaced by the string in the 2307second argument. The second argument specifies a string which is 2308interpolated after the match succeeds, and is then used as the plural 2309form. For example: 2310 2311 def_noun 'cow' => 'kine'; 2312 def_noun '(.+i)o' => '$1i'; 2313 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!'; 2314 2315Note that both arguments should usually be specified in single quotes, 2316so that they are not interpolated when they are specified, but later (when 2317words are compared to them). As indicated by the last example, care 2318also needs to be taken with certain characters in the second argument, 2319to ensure that they are not unintentionally interpolated during comparison. 2320 2321The second argument string may also specify a second variant of the plural 2322form, to be used when "classical" plurals have been requested. The beginning 2323of the second variant is marked by a '|' character: 2324 2325 def_noun 'cow' => 'cows|kine'; 2326 def_noun '(.+i)o' => '$1os|$1i'; 2327 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints'; 2328 2329If no classical variant is given, the specified plural form is used in 2330both normal and "classical" modes. 2331 2332If the second argument is C<undef> instead of a string, then the 2333current user definition for the first argument is removed, and the 2334standard plural inflection(s) restored. 2335 2336Note that in all cases, later plural definitions for a particular 2337singular form replace earlier definitions of the same form. For example: 2338 2339 # FIRST, HIDE THE MODERN FORM.... 2340 def_noun 'aviatrix' => 'aviatrices'; 2341 2342 # LATER, HIDE THE CLASSICAL FORM... 2343 def_noun 'aviatrix' => 'aviatrixes'; 2344 2345 # FINALLY, RESTORE THE DEFAULT BEHAVIOUR... 2346 def_noun 'aviatrix' => undef; 2347 2348 2349Special care is also required when defining general patterns and 2350associated specific exceptions: put the more specific cases I<after> 2351the general pattern. For example: 2352 2353 def_noun '(.+)us' => '$1i'; # EVERY "-us" TO "-i" 2354 def_noun 'bus' => 'buses'; # EXCEPT FOR "bus" 2355 2356This "try-most-recently-defined-first" approach to matching 2357user-defined words is also used by C<def_verb>, C<def_a> and C<def_an>. 2358 2359 2360=item C<def_verb($$$$$$)> 2361 2362The C<def_verb> subroutine takes three pairs of string arguments (that is, six 2363arguments in total), specifying the singular and plural forms of the three 2364"persons" of verb. As with C<def_noun>, the singular forms are specifications of 2365run-time-interpolated patterns, whilst the plural forms are specifications of 2366(up to two) run-time-interpolated strings: 2367 2368 def_verb 'am' => 'are', 2369 'are' => 'are|art", 2370 'is' => 'are'; 2371 2372 def_verb 'have' => 'have', 2373 'have' => 'have", 2374 'ha(s|th)' => 'have'; 2375 2376Note that as with C<def_noun>, modern/classical variants of plurals 2377may be separately specified, subsequent definitions replace previous 2378ones, and C<undef>'ed plural forms revert to the standard behaviour. 2379 2380 2381=item C<def_adj($$)> 2382 2383The C<def_adj> subroutine takes a pair of string arguments, which specify 2384the singular and plural forms of the adjective being defined. 2385As with C<def_noun> and C<def_adj>, the singular forms are specifications of 2386run-time-interpolated patterns, whilst the plural forms are specifications of 2387(up to two) run-time-interpolated strings: 2388 2389 def_adj 'this' => 'these', 2390 def_adj 'red' => 'red|gules', 2391 2392As previously, modern/classical variants of plurals 2393may be separately specified, subsequent definitions replace previous 2394ones, and C<undef>'ed plural forms revert to the standard behaviour. 2395 2396 2397=item C<def_a($)> and C<def_an($)> 2398 2399The C<def_a> and C<def_an> subroutines each take a single argument, which 2400specifies a pattern. If a word passed to C<A()> or C<AN()> matches this 2401pattern, it will be prefixed (unconditionally) with the corresponding indefinite 2402article. For example: 2403 2404 def_a 'error'; 2405 def_a 'in.+'; 2406 2407 def_an 'mistake'; 2408 def_an 'error'; 2409 2410As with the other C<def_...> subroutines, such redefinitions are sequential 2411in effect so that, after the above example, "error" will be inflected with "an". 2412 2413=back 2414 2415=head2 The F<$HOME/.inflectrc> file 2416 2417When it is imported, Lingua::EN::Inflect executes (as Perl code) 2418the contents of any file named F<.inflectrc> which it finds in the 2419in the directory where F<Lingua/EN/Inflect.pm> is installed, 2420or in the current home directory (C<$ENV{HOME}>), or in both. 2421Note that the code is executed within the Lingua::EN::Inflect 2422namespace. 2423 2424Hence the user or the local Perl guru can make appropriate calls to 2425C<def_noun>, C<def_verb>, etc. in one of these F<.inflectrc> files, to 2426permanently and universally modify the behaviour of the module. For example 2427 2428 > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc 2429 2430 def_noun "UNIX" => "UN*X|UNICES"; 2431 2432 def_verb "teco" => "teco", # LITERALLY: "to edit with TECO" 2433 "teco" => "teco", 2434 "tecos" => "teco"; 2435 2436 def_a "Euler.*"; # "Yewler" TURNS IN HIS GRAVE 2437 2438 2439Note that calls to the C<def_...> subroutines from within a program 2440will take precedence over the contents of the home directory 2441F<.inflectrc> file, which in turn takes precedence over the system-wide 2442F<.inflectrc> file. 2443 2444 2445=head1 DIAGNOSTICS 2446 2447On loading, if the Perl code in a F<.inflectrc> file is invalid 2448(syntactically or otherwise), an appropriate fatal error is issued. 2449A common problem is not ending the file with something that 2450evaluates to true (as the five C<def_...> subroutines do). 2451 2452Using the five C<def_...> subroutines directly in a program may also 2453result in fatal diagnostics, if a (singular) pattern or an interpolated 2454(plural) string is somehow invalid. 2455 2456Specific diagnostics related to user-defined inflections are: 2457 2458=over 8 2459 2460=item C<"Bad user-defined singular pattern:\n\t %s"> 2461 2462The singular form of a user-defined noun or verb 2463(as defined by a call to C<def_noun>, C<def_verb>, C<def_adj>, 2464C<def_a> or C<def_an>) is not a valid Perl regular expression. The 2465actual Perl error message is also given. 2466 2467=item C<"Bad user-defined plural string: '%s'"> 2468 2469The plural form(s) of a user-defined noun or verb 2470(as defined by a call to C<def_noun>, C<def_verb> or C<def_adj>) 2471is not a valid Perl interpolated string (usually because it 2472interpolates some undefined variable). 2473 2474=item C<"Bad .inflectrc file (%s):\n %s"> 2475 2476Some other problem occurred in loading the named local 2477or global F<.inflectrc> file. The Perl error message (including 2478the line number) is also given. 2479 2480=back 2481 2482There are I<no> diagnosable run-time error conditions for the actual 2483inflection subroutines, except C<NUMWORDS> and hence no run-time 2484diagnostics. If the inflection subroutines are unable to form a plural 2485via a user-definition or an inbuilt rule, they just "guess" the 2486commonest English inflection: adding "-s" for nouns, removing "-s" for 2487verbs, and no inflection for adjectives. 2488 2489C<Lingua::EN::Inflect::NUMWORDS()> can C<die> with the following messages: 2490 2491=over 8 2492 2493=item C<"Bad grouping option: %s"> 2494 2495The optional argument to C<NUMWORDS()> wasn't 1, 2 or 3. 2496 2497=item C<"Number out of range"> 2498 2499C<NUMWORDS()> was passed a number larger than 2500999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine hundred 2501and ninety-nine decillion, nine hundred and ninety-nine nonillion, nine 2502hundred and ninety-nine octillion, nine hundred and ninety-nine 2503septillion, nine hundred and ninety-nine sextillion, nine hundred and 2504ninety-nine quintillion, nine hundred and ninety-nine quadrillion, nine 2505hundred and ninety-nine trillion, nine hundred and ninety-nine billion, 2506nine hundred and ninety-nine million, nine hundred and ninety-nine 2507thousand, nine hundred and ninety-nine :-) 2508 2509The problem is that C<NUMWORDS> doesn't know any 2510words for number components bigger than "decillion". 2511 2512 2513=head1 OTHER ISSUES 2514 2515=head2 2nd Person precedence 2516 2517If a verb has identical 1st and 2nd person singular forms, but 2518different 1st and 2nd person plural forms, then when its plural is 2519constructed, the 2nd person plural form is always preferred. 2520 2521The author is not currently aware of any such verbs in English, but is 2522not quite arrogant enough to assume I<ipso facto> that none exist. 2523 2524 2525=head2 Nominative precedence 2526 2527The singular pronoun "it" presents a special problem because its plural form 2528can vary, depending on its "case". For example: 2529 2530 It ate my homework -> They ate my homework 2531 It ate it -> They ate them 2532 I fed my homework to it -> I fed my homework to them 2533 2534As a consequence of this ambiguity, C<PL()> or C<PL_N> have been implemented 2535so that they always return the I<nominative> plural (that is, "they"). 2536 2537However, when asked for the plural of an unambiguously I<accusative> 2538"it" (namely, C<PL("to it")>, C<PL_N("from it")>, C<PL("with it")>, 2539etc.), both subroutines will correctly return the accusative plural 2540("to them", "from them", "with them", etc.) 2541 2542 2543=head2 The plurality of zero 2544 2545The rules governing the choice between: 2546 2547 There were no errors. 2548 2549and 2550 2551 There was no error. 2552 2553are complex and often depend more on I<intent> rather than I<content>. 2554Hence it is infeasible to specify such rules algorithmically. 2555 2556Therefore, Lingua::EN::Text contents itself with the following compromise: If 2557the governing number is zero, inflections always return the plural form 2558unless the appropriate "classical" inflection is in effect, in which case the 2559singular form is always returned. 2560 2561Thus, the sequence: 2562 2563 NUM(0); 2564 print inflect "There PL(was) NO(choice)"; 2565 2566produces "There were no choices", whereas: 2567 2568 classical 'zero'; # or: classical(zero=>1); 2569 NUM(0); 2570 print inflect "There PL(was) NO(choice)"; 2571 2572it will print "There was no choice". 2573 2574 2575=head2 Homographs with heterogeneous plurals 2576 2577Another context in which intent (and not content) sometimes determines 2578plurality is where two distinct meanings of a word require different 2579plurals. For example: 2580 2581 Three basses were stolen from the band's equipment trailer. 2582 Three bass were stolen from the band's aquarium. 2583 2584 I put the mice next to the cheese. 2585 I put the mouses next to the computers. 2586 2587 Several thoughts about leaving crossed my mind. 2588 Several thought about leaving across my lawn. 2589 2590Lingua::EN::Inflect handles such words in two ways: 2591 2592=over 8 2593 2594=item * 2595 2596If both meanings of the word are the I<same> part of speech (for 2597example, "bass" is a noun in both sentences above), then one meaning 2598is chosen as the "usual" meaning, and only that meaning's plural is 2599ever returned by any of the inflection subroutines. 2600 2601=item * 2602 2603If each meaning of the word is a different part of speech (for 2604example, "thought" is both a noun and a verb), then the noun's 2605plural is returned by C<PL()> and C<PL_N()> and the verb's plural is 2606returned only by C<PL_V()>. 2607 2608=back 2609 2610Such contexts are, fortunately, uncommon (particularly 2611"same-part-of-speech" examples). An informal study of nearly 600 2612"difficult plurals" indicates that C<PL()> can be relied upon to "get 2613it right" about 98% of the time (although, of course, ichthyophilic 2614guitarists or cyber-behaviouralists may experience higher rates of 2615confusion). 2616 2617If the choice of a particular "usual inflection" is considered 2618inappropriate, it can always be reversed with a preliminary call 2619to the corresponding C<def_...> subroutine. 2620 2621=head1 NOTE 2622 2623I'm not taking any further correspondence on: 2624 2625=over 2626 2627=item "octopi". 2628 2629Despite the populist pandering of certain New World dictionaries, the 2630plural is "octopuses" or (for the pendantic classicist) "octopodes". The 2631suffix "-pus" is Greek, not Latin, so the plural is "-podes", not "pi". 2632 2633 2634=item "virus". 2635 2636Had no plural in Latin (possibly because it was a mass noun). 2637The only plural is the Anglicized "viruses". 2638 2639=back 2640 2641=head1 AUTHORS 2642 2643Damian Conway (damian@conway.org) 2644Matthew Persico (ORD inflection) 2645 2646 2647=head1 BUGS AND IRRITATIONS 2648 2649The endless inconsistencies of English. 2650 2651(I<Please> report words for which the correct plural or 2652indefinite article is not formed, so that the reliability 2653of Lingua::EN::Inflect can be improved.) 2654 2655 2656 2657=head1 COPYRIGHT 2658 2659 Copyright (c) 1997-2000, Damian Conway. All Rights Reserved. 2660 This module is free software. It may be used, redistributed 2661 and/or modified under the same terms as Perl itself. 2662