diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 1b76a87d..186f9c00 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -168,7 +168,6 @@ def referenced(word, article=INDEFINITE): ), # 4) Words that do not inflect. (( r"$", "" , "uninflected", False), ( r"$", "" , "uncountable", False), - ( r"s$", "s" , "s-singular" , False), ( r"fish$", "fish" , None, False), (r"([- ])bass$", "\\1bass" , None, False), ( r"ois$", "ois" , None, False), @@ -240,11 +239,12 @@ def referenced(word, article=INDEFINITE): ( r"o$", "i" , "o-i*", True), ( r"$", "i" , "-i*", True), ( r"$", "im" , "-im*", True) - ), # 9) -ch, -sh and -ss take -es in the plural + ), # 9) -ch, -sh and -ss and the s-singular group take -es in the plural # (e.g., churches, classes). - (( r"([cs])h$", "\\1hes" , None, False), - ( r"ss$", "sses" , None, False), - ( r"x$", "xes" , None, False) + (( r"([cs])h$", "\\1hes" , None, False), + ( r"ss$", "sses" , None, False), + ( r"x$", "xes" , None, False), + ( r"s$", "ses" , "s-singular", False) ), # 10) -f or -fe sometimes take -ves in the plural # (e.g, lives, wolves). (( r"([aeo]l)f$", "\\1ves" , None, False), @@ -291,17 +291,19 @@ def referenced(word, article=INDEFINITE): "advice" , "fruit" , "ketchup" , "meat" , "sand" , "bread" , "furniture" , "knowledge" , "mustard" , "software" , "butter" , "garbage" , "love" , "news" , "understanding", - "cheese" , "gravel" , "luggage" , "progress" , "water" - "electricity", "happiness" , "mathematics" , "research" , - "equipment" , "information", "mayonnaise" , "rice" + "cannabis" , "gravel" , "luggage" , "progress" , "water" + "cheese" , "happiness" , "mathematics" , "research" , + "electricity", "information", "mayonnaise" , "rice", + "equipment" ], "s-singular": [ "acropolis" , "caddis" , "dais" , "glottis" , "pathos" , - "aegis" , "cannabis" , "digitalis" , "ibis" , "pelvis" , - "alias" , "canvas" , "epidermis" , "lens" , "polis" , - "asbestos" , "chaos" , "ethos" , "mantis" , "rhinoceros" , - "bathos" , "cosmos" , "gas" , "marquis" , "sassafras" , - "bias" , "glottis" , "metropolis" , "trellis" + "aegis" , "canvas" , "digitalis" , "ibis" , "pelvis" , + "alias" , "chaos" , "epidermis" , "lens" , "polis" , + "asbestos" , "cosmos" , "ethos" , "mantis" , "rhinoceros" , + "bathos" , "gas" , "marquis" , "sassafras" , + "bias" , "glottis" , "metropolis" , "trellis", + "bus" ], "ex-ices": [ "codex" , "murex" , "silex" @@ -542,6 +544,7 @@ def pluralize(word, pos=NOUN, custom={}, classical=True): "collie" , "hankie" , "lingerie" , "reverie" , "toughie" , "cookie" , "hippie" , "meanie" , "rookie" , "valkyrie" , )) +singular_s = set(plural_categories['s-singular']) singular_irregular = { "atlantes": "atlas", "atlases": "atlas", @@ -609,7 +612,10 @@ def singularize(word, pos=NOUN, custom={}): return word for x in singular_ie: if w.endswith(x+"s"): - return w + return word[:-1] + for x in singular_s: + if w.endswith(x+"es"): + return word[:-2] for x in singular_irregular: if w.endswith(x): return re.sub('(?i)'+x+'$', singular_irregular[x], word) diff --git a/test/test_en.py b/test/test_en.py index 6430c8bc..66d16efa 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -200,6 +200,39 @@ def test_superlative(self): #--------------------------------------------------------------------------------------------------- +class WhiteboxInflectionTestCase(unittest.TestCase): + + def test_s_singular_pluralize(self): + self.assertEquals(en.inflect.pluralize('lens'), 'lenses') + + def test_diagnosis_pluralize(self): + self.assertEquals(en.inflect.pluralize('diagnosis'), 'diagnoses') + + def test_s_singular_singularize(self): + self.assertEquals(en.inflect.singularize('lenses'), 'lens') + + def test_ie_singular_singularize(self): + self.assertEquals(en.inflect.singularize('cuties'), 'cutie') + + def test_all_s_singular(self): + l = en.inflect.plural_categories['s-singular'] + singularize = en.inflect.singularize + pluralize = en.inflect.pluralize + for word in l: + self.assertEquals(pluralize(word), word+'es') + self.assertEquals(singularize(pluralize(word)), word) + + def test_all_ie_singular(self): + l = en.inflect.singular_ie + singularize = en.inflect.singularize + pluralize = en.inflect.pluralize + for word in l: + self.assertEquals(pluralize(word), word+'s') + self.assertEquals(singularize(pluralize(word)), word) + + +#--------------------------------------------------------------------------------------------------- + class TestQuantification(unittest.TestCase): def setUp(self):