diff --git a/CHANGELOG.md b/CHANGELOG.md index 27f7efe..d0699c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- **Shape validator agrees with server on `naics(...)` / `psc(...)` expansions.** The client-side `ShapeParser.validate()` previously rejected the canonical `shape=naics(code,description)` form (which the server has always accepted) and also rejected the alias `shape=naics_code(code,description)`. The parser now mirrors the server's `_EXPAND_ALIASES` (introduced in Tango PR makegov/tango#2259) and rewrites `naics_code(...)` / `psc_code(...)` to their canonical `naics(...)` / `psc(...)` form at parse time. Bare scalar leaves (`shape=naics_code` / `shape=psc_code`) are left untouched and still return the raw column value, matching the server. Schemas for `Contract`, `Forecast`, `Opportunity`, `Notice`, and `Vehicle` gained explicit `naics` / `psc` expand entries backed by the existing `CodeDescription` nested model. Fixes makegov/tango#2266. + ## [0.6.0] - 2026-05-07 ### Added diff --git a/tango/models.py b/tango/models.py index 95ca73e..0a24965 100644 --- a/tango/models.py +++ b/tango/models.py @@ -738,8 +738,7 @@ class ShapeConfig: # Default for list_vehicle_orders() VEHICLE_ORDERS_MINIMAL: Final = ( - "key,piid,award_date,obligated,total_contract_value,description," - "recipient(display_name,uei)" + "key,piid,award_date,obligated,total_contract_value,description,recipient(display_name,uei)" ) # Default for list_organizations() diff --git a/tango/shapes/explicit_schemas.py b/tango/shapes/explicit_schemas.py index 77067e9..e7b59ea 100644 --- a/tango/shapes/explicit_schemas.py +++ b/tango/shapes/explicit_schemas.py @@ -327,6 +327,11 @@ ), "major_program": FieldSchema(name="major_program", type=str, is_optional=True, is_list=False), "naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False), + # Expand form: shape=naics(code,description). Server PR #2259 also accepts + # naics_code(...) as an alias which the SDK parser normalizes to naics. + "naics": FieldSchema( + name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "number_of_actions": FieldSchema( name="number_of_actions", type=int, is_optional=True, is_list=False ), @@ -355,6 +360,11 @@ name="price_evaluation_percent_difference", type=str, is_optional=True, is_list=False ), "psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False), + # Expand form: shape=psc(code,description). Server PR #2259 also accepts + # psc_code(...) as an alias which the SDK parser normalizes to psc. + "psc": FieldSchema( + name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "purchase_card_as_payment_method": FieldSchema( name="purchase_card_as_payment_method", type=str, is_optional=True, is_list=False ), @@ -572,6 +582,11 @@ "id": FieldSchema(name="id", type=int, is_optional=False, is_list=False), "is_active": FieldSchema(name="is_active", type=bool, is_optional=True, is_list=False), "naics_code": FieldSchema(name="naics_code", type=str, is_optional=True, is_list=False), + # Expand form: shape=naics(code,description). Server PR #2259 also accepts + # naics_code(...) as an alias which the SDK parser normalizes to naics. + "naics": FieldSchema( + name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "place_of_performance": FieldSchema( name="place_of_performance", type=str, is_optional=False, is_list=False ), @@ -611,6 +626,11 @@ ), "meta": FieldSchema(name="meta", type=dict, is_optional=False, is_list=False), "naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False), + # Expand form: shape=naics(code,description). Server PR #2259 also accepts + # naics_code(...) as an alias which the SDK parser normalizes to naics. + "naics": FieldSchema( + name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "notice_history": FieldSchema( name="notice_history", type=dict, @@ -631,6 +651,11 @@ name="primary_contact", type=dict, is_optional=True, is_list=False, nested_model="Contact" ), "psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False), + # Expand form: shape=psc(code,description). Server PR #2259 also accepts + # psc_code(...) as an alias which the SDK parser normalizes to psc. + "psc": FieldSchema( + name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "response_deadline": FieldSchema( name="response_deadline", type=datetime, is_optional=False, is_list=False ), @@ -654,10 +679,20 @@ name="last_updated", type=datetime, is_optional=False, is_list=False ), "naics_code": FieldSchema(name="naics_code", type=str, is_optional=True, is_list=False), + # Expand form: shape=naics(code,description). Server PR #2259 also accepts + # naics_code(...) as an alias which the SDK parser normalizes to naics. + "naics": FieldSchema( + name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "notice_id": FieldSchema(name="notice_id", type=str, is_optional=False, is_list=False), "opportunity": FieldSchema(name="opportunity", type=dict, is_optional=False, is_list=False), "posted_date": FieldSchema(name="posted_date", type=datetime, is_optional=False, is_list=False), "psc_code": FieldSchema(name="psc_code", type=str, is_optional=False, is_list=False), + # Expand form: shape=psc(code,description). Server PR #2259 also accepts + # psc_code(...) as an alias which the SDK parser normalizes to psc. + "psc": FieldSchema( + name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "response_deadline": FieldSchema( name="response_deadline", type=datetime, is_optional=False, is_list=False ), @@ -1120,7 +1155,17 @@ ), "opportunity_id": FieldSchema(name="opportunity_id", type=str, is_optional=True, is_list=False), "naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False), + # Expand form: shape=naics(code,description). Server PR #2259 also accepts + # naics_code(...) as an alias which the SDK parser normalizes to naics. + "naics": FieldSchema( + name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False), + # Expand form: shape=psc(code,description). Server PR #2259 also accepts + # psc_code(...) as an alias which the SDK parser normalizes to psc. + "psc": FieldSchema( + name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription" + ), "set_aside": FieldSchema(name="set_aside", type=str, is_optional=True, is_list=False), # Shaping expansions "awardees": FieldSchema( diff --git a/tango/shapes/parser.py b/tango/shapes/parser.py index bb73917..5fd7187 100644 --- a/tango/shapes/parser.py +++ b/tango/shapes/parser.py @@ -26,6 +26,62 @@ from tango.shapes.models import FieldSpec, ShapeSpec from tango.shapes.schema import SchemaRegistry +# Global expand-name aliases. Mirrors the server's `_EXPAND_ALIASES` in +# `api/shaping/grammar.py` (Tango PR #2259, issue #2266). Keys are user-typed +# alias names; values are the canonical names returned by the API and used by +# the schema registry. The rewrite only fires when the alias is used as an +# *expansion* (has nested fields or a wildcard) — bare scalars like +# `shape=naics_code` are left alone and continue to return the raw column. +# +# The canonical name (``naics`` / ``psc``) becomes the output key on the +# response regardless of which spelling the caller used. Keep this list short: +# aliases are for well-known historical spellings, not naming inconsistencies. +_EXPAND_ALIASES: dict[str, str] = { + "naics_code": "naics", + "psc_code": "psc", +} + + +def _normalize_expand_aliases(fields: list[FieldSpec]) -> None: + """Rewrite expand-form alias names to their canonical form, in place. + + Walks ``fields`` recursively. A field is treated as an "expansion" (and + therefore eligible for alias rewriting) when it has ``nested_fields`` or + ``is_wildcard`` set. Bare scalar leaves are left untouched so callers can + still request the raw column value via ``shape=naics_code``. + + If both the alias and its canonical name appear as expansions at the same + level (e.g. ``shape=naics(code),naics_code(description)``), the canonical + wins and the alias entry is dropped silently — this matches the server's + behavior and avoids emitting two output keys for the same data. + + Args: + fields: List of FieldSpec objects to normalize (mutated in place). + """ + # First pass: collect names of expand-form fields at this level so we can + # detect canonical/alias collisions before rewriting. + expand_names = {f.name for f in fields if f.nested_fields or f.is_wildcard} + + # Second pass: rewrite or drop aliases, then recurse into nested fields. + rewritten: list[FieldSpec] = [] + for field in fields: + is_expand = bool(field.nested_fields) or field.is_wildcard + canonical = _EXPAND_ALIASES.get(field.name) if is_expand else None + + if canonical is not None: + if canonical in expand_names and canonical != field.name: + # Canonical already requested at this level — drop the alias. + continue + field.name = canonical + + if field.nested_fields: + _normalize_expand_aliases(field.nested_fields) + + rewritten.append(field) + + # Replace the contents of the input list (caller holds the reference). + fields[:] = rewritten + def _suggest_field_correction(invalid_field: str, valid_fields: list[str]) -> str | None: """Suggest a correction for an invalid field name @@ -148,6 +204,10 @@ def parse(self, shape: str) -> ShapeSpec: # Parse the shape try: fields = self._parse_field_list(shape, 0)[0] + # Rewrite expand-form aliases (e.g. naics_code(...) -> naics(...)) + # to their canonical names. Mirrors server's `_EXPAND_ALIASES` so + # both spellings are accepted client-side. See issue #2266. + _normalize_expand_aliases(fields) shape_spec = ShapeSpec(fields=fields) # Cache the result diff --git a/tests/test_shapes.py b/tests/test_shapes.py index 61cd869..8faa42f 100644 --- a/tests/test_shapes.py +++ b/tests/test_shapes.py @@ -465,6 +465,142 @@ def test_validate_wildcard_always_valid(self): parser.validate(spec, MockModel) # Should not raise +class TestShapeParserExpandAliases: + """Test naics_code/psc_code -> naics/psc expand-alias normalization. + + Mirrors the server's `_EXPAND_ALIASES` map (Tango PR #2259, issue #2266): + when used as an expansion (with parens / wildcard), `naics_code(...)` is + rewritten to `naics(...)` and `psc_code(...)` to `psc(...)`. Bare scalar + leaves are left alone so `shape=naics_code` still returns the raw column. + """ + + def test_canonical_naics_expand_accepted_on_contract(self): + """Canonical `naics(code,description)` validates against Contract.""" + parser = ShapeParser() + spec = parser.parse("naics(code,description)") + + assert spec.fields[0].name == "naics" + assert spec.fields[0].nested_fields is not None + assert [f.name for f in spec.fields[0].nested_fields] == ["code", "description"] + + parser.validate(spec, Contract) # Should not raise + + def test_alias_naics_code_expand_rewritten_to_naics(self): + """Alias form `naics_code(code,description)` is rewritten to `naics`.""" + parser = ShapeParser() + spec = parser.parse("naics_code(code,description)") + + # Name is rewritten at parse time so downstream type generation and + # factory parsing see the canonical key the server returns. + assert spec.fields[0].name == "naics" + assert [f.name for f in spec.fields[0].nested_fields] == ["code", "description"] + + parser.validate(spec, Contract) # Should not raise + + def test_canonical_psc_expand_accepted_on_contract(self): + """Canonical `psc(code,description)` validates against Contract.""" + parser = ShapeParser() + spec = parser.parse("psc(code,description)") + + assert spec.fields[0].name == "psc" + parser.validate(spec, Contract) # Should not raise + + def test_alias_psc_code_expand_rewritten_to_psc(self): + """Alias form `psc_code(code,description)` is rewritten to `psc`.""" + parser = ShapeParser() + spec = parser.parse("psc_code(code,description)") + + assert spec.fields[0].name == "psc" + parser.validate(spec, Contract) # Should not raise + + def test_bare_naics_code_scalar_is_not_rewritten(self): + """Bare scalar `naics_code` keeps its name (returns raw column).""" + parser = ShapeParser() + spec = parser.parse("naics_code") + + # Scalar form is NOT touched — the alias only fires for expansions. + assert spec.fields[0].name == "naics_code" + assert spec.fields[0].nested_fields is None + assert spec.fields[0].is_wildcard is False + + parser.validate(spec, Contract) # naics_code is a real scalar field + + def test_bare_psc_code_scalar_is_not_rewritten(self): + """Bare scalar `psc_code` keeps its name (returns raw column).""" + parser = ShapeParser() + spec = parser.parse("psc_code") + + assert spec.fields[0].name == "psc_code" + assert spec.fields[0].nested_fields is None + + parser.validate(spec, Contract) + + def test_alias_naics_code_wildcard_expand_rewritten(self): + """Wildcard expansion `naics_code(*)` is rewritten to `naics(*)`.""" + parser = ShapeParser() + spec = parser.parse("naics_code(*)") + + assert spec.fields[0].name == "naics" + assert spec.fields[0].is_wildcard is True + + parser.validate(spec, Contract) # Should not raise + + def test_alias_collision_drops_alias_keeps_canonical(self): + """When both `naics(...)` and `naics_code(...)` appear, canonical wins. + + Matches server behavior — emitting two output keys for the same data + would surprise callers, so the alias entry is dropped silently. + """ + parser = ShapeParser() + spec = parser.parse("naics(code),naics_code(description)") + + # Only one entry should remain — the canonical `naics` one. + names = [f.name for f in spec.fields] + assert names == ["naics"] + assert [f.name for f in spec.fields[0].nested_fields] == ["code"] + + def test_scalar_and_expand_alias_coexist(self): + """Scalar `naics_code` and expand `naics_code(...)` both survive. + + The expand gets rewritten to `naics`; the scalar stays as + `naics_code`. They're now distinct keys with distinct meanings — + the scalar returns the raw int/str, the expand returns the dict. + """ + parser = ShapeParser() + spec = parser.parse("key,naics_code,naics_code(code,description)") + + names = [f.name for f in spec.fields] + assert names == ["key", "naics_code", "naics"] + assert spec.fields[1].nested_fields is None # scalar + assert spec.fields[2].nested_fields is not None # expand + + def test_alias_rewrite_applies_in_nested_expansions(self): + """Aliases nested inside another expansion are also rewritten. + + The parent expansion field is unrelated; we just want to confirm the + normalization walks recursively through ``nested_fields``. + """ + parser = ShapeParser() + # `recipient` is a valid expansion on Contract; nest a naics_code + # alias inside to confirm the walk recurses. + spec = parser.parse("recipient(uei,display_name),naics_code(code)") + + assert [f.name for f in spec.fields] == ["recipient", "naics"] + assert spec.fields[1].nested_fields[0].name == "code" + + def test_alias_accepted_on_opportunity(self): + """Server accepts the alias on opportunities too — schema covers it.""" + # Use a model class that exists; Contract is already covered above. + # Smoke-test that the validator finds `naics` on a couple of schemas + # that previously only had `naics_code`. + from tango.models import Opportunity + + parser = ShapeParser() + spec = parser.parse("naics_code(code,description)") + assert spec.fields[0].name == "naics" + parser.validate(spec, Opportunity) # Should not raise + + class TestShapeParserCaching: """Test shape parser caching"""