Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Fixed
- **Shape validator agrees with server on `naics(...)` / `psc(...)` expansions.** The client-side `ShapeParser.validate()` previously rejected the canonical `shape=naics(code,description)` form (which the server has always accepted) and also rejected the alias `shape=naics_code(code,description)`. The parser now mirrors the server's `_EXPAND_ALIASES` (introduced in Tango PR makegov/tango#2259) and rewrites `naics_code(...)` / `psc_code(...)` to their canonical `naics(...)` / `psc(...)` form at parse time. Bare scalar leaves (`shape=naics_code` / `shape=psc_code`) are left untouched and still return the raw column value, matching the server. Schemas for `Contract`, `Forecast`, `Opportunity`, `Notice`, and `Vehicle` gained explicit `naics` / `psc` expand entries backed by the existing `CodeDescription` nested model. Fixes makegov/tango#2266.

## [0.6.0] - 2026-05-07

### Added
Expand Down
3 changes: 1 addition & 2 deletions tango/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,8 +738,7 @@ class ShapeConfig:

# Default for list_vehicle_orders()
VEHICLE_ORDERS_MINIMAL: Final = (
"key,piid,award_date,obligated,total_contract_value,description,"
"recipient(display_name,uei)"
"key,piid,award_date,obligated,total_contract_value,description,recipient(display_name,uei)"
)

# Default for list_organizations()
Expand Down
45 changes: 45 additions & 0 deletions tango/shapes/explicit_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,11 @@
),
"major_program": FieldSchema(name="major_program", type=str, is_optional=True, is_list=False),
"naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False),
# Expand form: shape=naics(code,description). Server PR #2259 also accepts
# naics_code(...) as an alias which the SDK parser normalizes to naics.
"naics": FieldSchema(
name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"number_of_actions": FieldSchema(
name="number_of_actions", type=int, is_optional=True, is_list=False
),
Expand Down Expand Up @@ -355,6 +360,11 @@
name="price_evaluation_percent_difference", type=str, is_optional=True, is_list=False
),
"psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False),
# Expand form: shape=psc(code,description). Server PR #2259 also accepts
# psc_code(...) as an alias which the SDK parser normalizes to psc.
"psc": FieldSchema(
name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"purchase_card_as_payment_method": FieldSchema(
name="purchase_card_as_payment_method", type=str, is_optional=True, is_list=False
),
Expand Down Expand Up @@ -572,6 +582,11 @@
"id": FieldSchema(name="id", type=int, is_optional=False, is_list=False),
"is_active": FieldSchema(name="is_active", type=bool, is_optional=True, is_list=False),
"naics_code": FieldSchema(name="naics_code", type=str, is_optional=True, is_list=False),
# Expand form: shape=naics(code,description). Server PR #2259 also accepts
# naics_code(...) as an alias which the SDK parser normalizes to naics.
"naics": FieldSchema(
name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"place_of_performance": FieldSchema(
name="place_of_performance", type=str, is_optional=False, is_list=False
),
Expand Down Expand Up @@ -611,6 +626,11 @@
),
"meta": FieldSchema(name="meta", type=dict, is_optional=False, is_list=False),
"naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False),
# Expand form: shape=naics(code,description). Server PR #2259 also accepts
# naics_code(...) as an alias which the SDK parser normalizes to naics.
"naics": FieldSchema(
name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"notice_history": FieldSchema(
name="notice_history",
type=dict,
Expand All @@ -631,6 +651,11 @@
name="primary_contact", type=dict, is_optional=True, is_list=False, nested_model="Contact"
),
"psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False),
# Expand form: shape=psc(code,description). Server PR #2259 also accepts
# psc_code(...) as an alias which the SDK parser normalizes to psc.
"psc": FieldSchema(
name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"response_deadline": FieldSchema(
name="response_deadline", type=datetime, is_optional=False, is_list=False
),
Expand All @@ -654,10 +679,20 @@
name="last_updated", type=datetime, is_optional=False, is_list=False
),
"naics_code": FieldSchema(name="naics_code", type=str, is_optional=True, is_list=False),
# Expand form: shape=naics(code,description). Server PR #2259 also accepts
# naics_code(...) as an alias which the SDK parser normalizes to naics.
"naics": FieldSchema(
name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"notice_id": FieldSchema(name="notice_id", type=str, is_optional=False, is_list=False),
"opportunity": FieldSchema(name="opportunity", type=dict, is_optional=False, is_list=False),
"posted_date": FieldSchema(name="posted_date", type=datetime, is_optional=False, is_list=False),
"psc_code": FieldSchema(name="psc_code", type=str, is_optional=False, is_list=False),
# Expand form: shape=psc(code,description). Server PR #2259 also accepts
# psc_code(...) as an alias which the SDK parser normalizes to psc.
"psc": FieldSchema(
name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"response_deadline": FieldSchema(
name="response_deadline", type=datetime, is_optional=False, is_list=False
),
Expand Down Expand Up @@ -1120,7 +1155,17 @@
),
"opportunity_id": FieldSchema(name="opportunity_id", type=str, is_optional=True, is_list=False),
"naics_code": FieldSchema(name="naics_code", type=int, is_optional=True, is_list=False),
# Expand form: shape=naics(code,description). Server PR #2259 also accepts
# naics_code(...) as an alias which the SDK parser normalizes to naics.
"naics": FieldSchema(
name="naics", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"psc_code": FieldSchema(name="psc_code", type=str, is_optional=True, is_list=False),
# Expand form: shape=psc(code,description). Server PR #2259 also accepts
# psc_code(...) as an alias which the SDK parser normalizes to psc.
"psc": FieldSchema(
name="psc", type=dict, is_optional=True, is_list=False, nested_model="CodeDescription"
),
"set_aside": FieldSchema(name="set_aside", type=str, is_optional=True, is_list=False),
# Shaping expansions
"awardees": FieldSchema(
Expand Down
60 changes: 60 additions & 0 deletions tango/shapes/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,62 @@
from tango.shapes.models import FieldSpec, ShapeSpec
from tango.shapes.schema import SchemaRegistry

# Global expand-name aliases. Mirrors the server's `_EXPAND_ALIASES` in
# `api/shaping/grammar.py` (Tango PR #2259, issue #2266). Keys are user-typed
# alias names; values are the canonical names returned by the API and used by
# the schema registry. The rewrite only fires when the alias is used as an
# *expansion* (has nested fields or a wildcard) — bare scalars like
# `shape=naics_code` are left alone and continue to return the raw column.
#
# The canonical name (``naics`` / ``psc``) becomes the output key on the
# response regardless of which spelling the caller used. Keep this list short:
# aliases are for well-known historical spellings, not naming inconsistencies.
_EXPAND_ALIASES: dict[str, str] = {
"naics_code": "naics",
"psc_code": "psc",
}


def _normalize_expand_aliases(fields: list[FieldSpec]) -> None:
"""Rewrite expand-form alias names to their canonical form, in place.

Walks ``fields`` recursively. A field is treated as an "expansion" (and
therefore eligible for alias rewriting) when it has ``nested_fields`` or
``is_wildcard`` set. Bare scalar leaves are left untouched so callers can
still request the raw column value via ``shape=naics_code``.

If both the alias and its canonical name appear as expansions at the same
level (e.g. ``shape=naics(code),naics_code(description)``), the canonical
wins and the alias entry is dropped silently — this matches the server's
behavior and avoids emitting two output keys for the same data.

Args:
fields: List of FieldSpec objects to normalize (mutated in place).
"""
# First pass: collect names of expand-form fields at this level so we can
# detect canonical/alias collisions before rewriting.
expand_names = {f.name for f in fields if f.nested_fields or f.is_wildcard}

# Second pass: rewrite or drop aliases, then recurse into nested fields.
rewritten: list[FieldSpec] = []
for field in fields:
is_expand = bool(field.nested_fields) or field.is_wildcard
canonical = _EXPAND_ALIASES.get(field.name) if is_expand else None

if canonical is not None:
if canonical in expand_names and canonical != field.name:
# Canonical already requested at this level — drop the alias.
continue
field.name = canonical

if field.nested_fields:
_normalize_expand_aliases(field.nested_fields)

rewritten.append(field)

# Replace the contents of the input list (caller holds the reference).
fields[:] = rewritten


def _suggest_field_correction(invalid_field: str, valid_fields: list[str]) -> str | None:
"""Suggest a correction for an invalid field name
Expand Down Expand Up @@ -148,6 +204,10 @@ def parse(self, shape: str) -> ShapeSpec:
# Parse the shape
try:
fields = self._parse_field_list(shape, 0)[0]
# Rewrite expand-form aliases (e.g. naics_code(...) -> naics(...))
# to their canonical names. Mirrors server's `_EXPAND_ALIASES` so
# both spellings are accepted client-side. See issue #2266.
_normalize_expand_aliases(fields)
shape_spec = ShapeSpec(fields=fields)

# Cache the result
Expand Down
136 changes: 136 additions & 0 deletions tests/test_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,142 @@ def test_validate_wildcard_always_valid(self):
parser.validate(spec, MockModel) # Should not raise


class TestShapeParserExpandAliases:
"""Test naics_code/psc_code -> naics/psc expand-alias normalization.

Mirrors the server's `_EXPAND_ALIASES` map (Tango PR #2259, issue #2266):
when used as an expansion (with parens / wildcard), `naics_code(...)` is
rewritten to `naics(...)` and `psc_code(...)` to `psc(...)`. Bare scalar
leaves are left alone so `shape=naics_code` still returns the raw column.
"""

def test_canonical_naics_expand_accepted_on_contract(self):
"""Canonical `naics(code,description)` validates against Contract."""
parser = ShapeParser()
spec = parser.parse("naics(code,description)")

assert spec.fields[0].name == "naics"
assert spec.fields[0].nested_fields is not None
assert [f.name for f in spec.fields[0].nested_fields] == ["code", "description"]

parser.validate(spec, Contract) # Should not raise

def test_alias_naics_code_expand_rewritten_to_naics(self):
"""Alias form `naics_code(code,description)` is rewritten to `naics`."""
parser = ShapeParser()
spec = parser.parse("naics_code(code,description)")

# Name is rewritten at parse time so downstream type generation and
# factory parsing see the canonical key the server returns.
assert spec.fields[0].name == "naics"
assert [f.name for f in spec.fields[0].nested_fields] == ["code", "description"]

parser.validate(spec, Contract) # Should not raise

def test_canonical_psc_expand_accepted_on_contract(self):
"""Canonical `psc(code,description)` validates against Contract."""
parser = ShapeParser()
spec = parser.parse("psc(code,description)")

assert spec.fields[0].name == "psc"
parser.validate(spec, Contract) # Should not raise

def test_alias_psc_code_expand_rewritten_to_psc(self):
"""Alias form `psc_code(code,description)` is rewritten to `psc`."""
parser = ShapeParser()
spec = parser.parse("psc_code(code,description)")

assert spec.fields[0].name == "psc"
parser.validate(spec, Contract) # Should not raise

def test_bare_naics_code_scalar_is_not_rewritten(self):
"""Bare scalar `naics_code` keeps its name (returns raw column)."""
parser = ShapeParser()
spec = parser.parse("naics_code")

# Scalar form is NOT touched — the alias only fires for expansions.
assert spec.fields[0].name == "naics_code"
assert spec.fields[0].nested_fields is None
assert spec.fields[0].is_wildcard is False

parser.validate(spec, Contract) # naics_code is a real scalar field

def test_bare_psc_code_scalar_is_not_rewritten(self):
"""Bare scalar `psc_code` keeps its name (returns raw column)."""
parser = ShapeParser()
spec = parser.parse("psc_code")

assert spec.fields[0].name == "psc_code"
assert spec.fields[0].nested_fields is None

parser.validate(spec, Contract)

def test_alias_naics_code_wildcard_expand_rewritten(self):
"""Wildcard expansion `naics_code(*)` is rewritten to `naics(*)`."""
parser = ShapeParser()
spec = parser.parse("naics_code(*)")

assert spec.fields[0].name == "naics"
assert spec.fields[0].is_wildcard is True

parser.validate(spec, Contract) # Should not raise

def test_alias_collision_drops_alias_keeps_canonical(self):
"""When both `naics(...)` and `naics_code(...)` appear, canonical wins.

Matches server behavior — emitting two output keys for the same data
would surprise callers, so the alias entry is dropped silently.
"""
parser = ShapeParser()
spec = parser.parse("naics(code),naics_code(description)")

# Only one entry should remain — the canonical `naics` one.
names = [f.name for f in spec.fields]
assert names == ["naics"]
assert [f.name for f in spec.fields[0].nested_fields] == ["code"]

def test_scalar_and_expand_alias_coexist(self):
"""Scalar `naics_code` and expand `naics_code(...)` both survive.

The expand gets rewritten to `naics`; the scalar stays as
`naics_code`. They're now distinct keys with distinct meanings —
the scalar returns the raw int/str, the expand returns the dict.
"""
parser = ShapeParser()
spec = parser.parse("key,naics_code,naics_code(code,description)")

names = [f.name for f in spec.fields]
assert names == ["key", "naics_code", "naics"]
assert spec.fields[1].nested_fields is None # scalar
assert spec.fields[2].nested_fields is not None # expand

def test_alias_rewrite_applies_in_nested_expansions(self):
"""Aliases nested inside another expansion are also rewritten.

The parent expansion field is unrelated; we just want to confirm the
normalization walks recursively through ``nested_fields``.
"""
parser = ShapeParser()
# `recipient` is a valid expansion on Contract; nest a naics_code
# alias inside to confirm the walk recurses.
spec = parser.parse("recipient(uei,display_name),naics_code(code)")

assert [f.name for f in spec.fields] == ["recipient", "naics"]
assert spec.fields[1].nested_fields[0].name == "code"

def test_alias_accepted_on_opportunity(self):
"""Server accepts the alias on opportunities too — schema covers it."""
# Use a model class that exists; Contract is already covered above.
# Smoke-test that the validator finds `naics` on a couple of schemas
# that previously only had `naics_code`.
from tango.models import Opportunity

parser = ShapeParser()
spec = parser.parse("naics_code(code,description)")
assert spec.fields[0].name == "naics"
parser.validate(spec, Opportunity) # Should not raise


class TestShapeParserCaching:
"""Test shape parser caching"""

Expand Down
Loading