From 9d5f56299f3132da599542323457f713d59b0bde Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:07:26 -0400 Subject: [PATCH 1/9] simple test to demonstrate bug. fails --- test/test_en.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_en.py b/test/test_en.py index 6430c8bc..20abb671 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -200,6 +200,14 @@ def test_superlative(self): #--------------------------------------------------------------------------------------------------- +class WhiteboxInflectionTestCase(unittest.TestCase): + + def test_s_singular(self): + self.assertEquals(en.inflect.pluralize('lens'), 'lenses') + + +#--------------------------------------------------------------------------------------------------- + class TestQuantification(unittest.TestCase): def setUp(self): From be2c358640b33eb3ea0d522b9e2cd44026f75df2 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:16:11 -0400 Subject: [PATCH 2/9] change pluralization rule. test in prev commit passes (as does new one) --- pattern/text/en/inflect.py | 10 +++++----- test/test_en.py | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 1b76a87d..8503bf46 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -168,7 +168,6 @@ def referenced(word, article=INDEFINITE): ), # 4) Words that do not inflect. (( r"$", "" , "uninflected", False), ( r"$", "" , "uncountable", False), - ( r"s$", "s" , "s-singular" , False), ( r"fish$", "fish" , None, False), (r"([- ])bass$", "\\1bass" , None, False), ( r"ois$", "ois" , None, False), @@ -240,11 +239,12 @@ def referenced(word, article=INDEFINITE): ( r"o$", "i" , "o-i*", True), ( r"$", "i" , "-i*", True), ( r"$", "im" , "-im*", True) - ), # 9) -ch, -sh and -ss take -es in the plural + ), # 9) -ch, -sh and -ss and the s-singular group take -es in the plural # (e.g., churches, classes). - (( r"([cs])h$", "\\1hes" , None, False), - ( r"ss$", "sses" , None, False), - ( r"x$", "xes" , None, False) + (( r"([cs])h$", "\\1hes" , None, False), + ( r"ss$", "sses" , None, False), + ( r"x$", "xes" , None, False), + ( r"s$", "ses" , "s-singular", False) ), # 10) -f or -fe sometimes take -ves in the plural # (e.g, lives, wolves). (( r"([aeo]l)f$", "\\1ves" , None, False), diff --git a/test/test_en.py b/test/test_en.py index 20abb671..32df4986 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -205,6 +205,9 @@ class WhiteboxInflectionTestCase(unittest.TestCase): def test_s_singular(self): self.assertEquals(en.inflect.pluralize('lens'), 'lenses') + def test_diagnosis(self): + self.assertEquals(en.inflect.pluralize('diagnosis'), 'diagnoses') + #--------------------------------------------------------------------------------------------------- From 208f91134a743be14ef4f964ffc417f5169792f2 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:20:07 -0400 Subject: [PATCH 3/9] test for singularization. fails. still have to do the other direction --- test/test_en.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_en.py b/test/test_en.py index 32df4986..b3cd390f 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -202,12 +202,14 @@ def test_superlative(self): class WhiteboxInflectionTestCase(unittest.TestCase): - def test_s_singular(self): + def test_s_singular_pluralize(self): self.assertEquals(en.inflect.pluralize('lens'), 'lenses') - def test_diagnosis(self): + def test_diagnosis_pluralize(self): self.assertEquals(en.inflect.pluralize('diagnosis'), 'diagnoses') + def test_s_singular_singularize(self): + self.assertEquals(en.inflect.singularize('lenses'), 'lens') #--------------------------------------------------------------------------------------------------- From e36c78b4538272a9806c7adb581eaf75d7a61eb3 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:42:01 -0400 Subject: [PATCH 4/9] found another bug with ie-singular. wrote test to demonstrate --- test/test_en.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_en.py b/test/test_en.py index b3cd390f..4c4f0ce1 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -211,6 +211,9 @@ def test_diagnosis_pluralize(self): def test_s_singular_singularize(self): self.assertEquals(en.inflect.singularize('lenses'), 'lens') + def test_ie_singular_singularize(self): + self.assertEquals(en.inflect.singularize('cuties'), 'cutie') + #--------------------------------------------------------------------------------------------------- class TestQuantification(unittest.TestCase): From 4cd9bd2731f21271554a1a7d88deb9f14630cf74 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:42:35 -0400 Subject: [PATCH 5/9] fix to singularize logic. previous two tests pass --- pattern/text/en/inflect.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 8503bf46..fbf84950 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -542,6 +542,7 @@ def pluralize(word, pos=NOUN, custom={}, classical=True): "collie" , "hankie" , "lingerie" , "reverie" , "toughie" , "cookie" , "hippie" , "meanie" , "rookie" , "valkyrie" , )) +singular_s = set(plural_categories['s-singular']) singular_irregular = { "atlantes": "atlas", "atlases": "atlas", @@ -609,7 +610,10 @@ def singularize(word, pos=NOUN, custom={}): return word for x in singular_ie: if w.endswith(x+"s"): - return w + return x + for x in singular_s: + if w.endswith(x+"es"): + return x for x in singular_irregular: if w.endswith(x): return re.sub('(?i)'+x+'$', singular_irregular[x], word) From e33b7da651c35b5ba082e8a8a6772ece05e425f0 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 13:50:14 -0400 Subject: [PATCH 6/9] plural_categories: moved cannabis, added bus. tried my best to follow spacing approach --- pattern/text/en/inflect.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index fbf84950..837c13b3 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -291,17 +291,19 @@ def referenced(word, article=INDEFINITE): "advice" , "fruit" , "ketchup" , "meat" , "sand" , "bread" , "furniture" , "knowledge" , "mustard" , "software" , "butter" , "garbage" , "love" , "news" , "understanding", - "cheese" , "gravel" , "luggage" , "progress" , "water" - "electricity", "happiness" , "mathematics" , "research" , - "equipment" , "information", "mayonnaise" , "rice" + "cannabis" , "gravel" , "luggage" , "progress" , "water" + "cheese" , "happiness" , "mathematics" , "research" , + "electricity", "information", "mayonnaise" , "rice", + "equipment" ], "s-singular": [ "acropolis" , "caddis" , "dais" , "glottis" , "pathos" , - "aegis" , "cannabis" , "digitalis" , "ibis" , "pelvis" , - "alias" , "canvas" , "epidermis" , "lens" , "polis" , - "asbestos" , "chaos" , "ethos" , "mantis" , "rhinoceros" , - "bathos" , "cosmos" , "gas" , "marquis" , "sassafras" , - "bias" , "glottis" , "metropolis" , "trellis" + "aegis" , "canvas" , "digitalis" , "ibis" , "pelvis" , + "alias" , "chaos" , "epidermis" , "lens" , "polis" , + "asbestos" , "cosmos" , "ethos" , "mantis" , "rhinoceros" , + "bathos" , "gas" , "marquis" , "sassafras" , + "bias" , "glottis" , "metropolis" , "trellis", + "bus" ], "ex-ices": [ "codex" , "murex" , "silex" From b016f1c32b19ff47bd1ff8bb575798c905980df8 Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 14:04:00 -0400 Subject: [PATCH 7/9] fix to maintain prefix --- pattern/text/en/inflect.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 837c13b3..08726255 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -612,10 +612,12 @@ def singularize(word, pos=NOUN, custom={}): return word for x in singular_ie: if w.endswith(x+"s"): - return x + prefix = w[:w.find(x)] + return prefix+x for x in singular_s: if w.endswith(x+"es"): - return x + prefix = w[:w.find(x)] + return prefix+x for x in singular_irregular: if w.endswith(x): return re.sub('(?i)'+x+'$', singular_irregular[x], word) From 18d883d42932f82d1190da697c2168b59c907a8e Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Sun, 18 Oct 2015 14:04:08 -0400 Subject: [PATCH 8/9] more tests --- test/test_en.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_en.py b/test/test_en.py index 4c4f0ce1..66d16efa 100644 --- a/test/test_en.py +++ b/test/test_en.py @@ -214,6 +214,23 @@ def test_s_singular_singularize(self): def test_ie_singular_singularize(self): self.assertEquals(en.inflect.singularize('cuties'), 'cutie') + def test_all_s_singular(self): + l = en.inflect.plural_categories['s-singular'] + singularize = en.inflect.singularize + pluralize = en.inflect.pluralize + for word in l: + self.assertEquals(pluralize(word), word+'es') + self.assertEquals(singularize(pluralize(word)), word) + + def test_all_ie_singular(self): + l = en.inflect.singular_ie + singularize = en.inflect.singularize + pluralize = en.inflect.pluralize + for word in l: + self.assertEquals(pluralize(word), word+'s') + self.assertEquals(singularize(pluralize(word)), word) + + #--------------------------------------------------------------------------------------------------- class TestQuantification(unittest.TestCase): From 1b05a0df9baa548ad05a0ad97b0ee84fe345be3d Mon Sep 17 00:00:00 2001 From: Jon Coe Date: Fri, 21 Jul 2017 07:32:02 -0400 Subject: [PATCH 9/9] simplify return by slicing. maintain capitalization --- pattern/text/en/inflect.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 08726255..186f9c00 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -612,12 +612,10 @@ def singularize(word, pos=NOUN, custom={}): return word for x in singular_ie: if w.endswith(x+"s"): - prefix = w[:w.find(x)] - return prefix+x + return word[:-1] for x in singular_s: if w.endswith(x+"es"): - prefix = w[:w.find(x)] - return prefix+x + return word[:-2] for x in singular_irregular: if w.endswith(x): return re.sub('(?i)'+x+'$', singular_irregular[x], word)