From b48efdfa95934285b5b47e3c10dc6c1af201c18b Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 12:04:11 -0400 Subject: [PATCH 1/9] Add Conley (1999) spatial HAC SE via vcov_type="conley" (Phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires Conley spatial-HAC standard errors into DifferenceInDifferences, TwoWayFixedEffects, and MultiPeriodDiD via four new keyword-only kwargs: conley_coords (lat/lon column tuple), conley_cutoff_km (required, no default per no-silent-failures), conley_metric ("haversine" default, "euclidean", or callable), conley_kernel ("bartlett" default, "uniform"). Variance estimator (Conley 1999 Eq 4.2): Var̂(β) = (X'X)^{-1} · (Σ_{i,j} K(d_ij/h) X_i ε_i ε_j X_j') · (X'X)^{-1}. FWL composes cleanly because the meat depends only on scores X·ε which within-transformation preserves -- TwoWayFixedEffects with conley is supported, unlike hc2/hc2_bm which need the full hat matrix. TWFE auto-cluster-at-unit is disabled when vcov_type="conley"; explicit cluster= raises NotImplementedError (combined product kernel deferred to Phase 2). Helpers live in new module diff_diff/conley.py (separated from linalg.py to keep the linear-algebra backend focused; revisited from the original plan when Phase 2's k-d-tree fast path was scoped). Cross-language parity vs R conleyreg (Düsterhöft 2021, CRAN v0.1.9) at ≤1e-6 on three benchmark fixtures; observed max abs diff 5.7e-16. Earth radius 6371.01 km matches conleyreg::haversine_dist exactly. Generation: cd benchmarks/R && Rscript generate_conley_golden.R. SyntheticDiD(vcov_type="conley") raises TypeError (uses bootstrap variance, not analytical sandwich; tracked in TODO.md). conley + cluster_ids / weights / survey_design / absorb each raise NotImplementedError with references to the deferral phase. Other changes: .gitignore /papers/ rule anchored to repo root so docs/methodology/papers/ is no longer silently swallowed; 5 paper-review markdowns committed (Conley 1999, Colella 2019, Clarke 2017, Butts 2023, Butts 2021) as the methodology basis for REGISTRY ConleySpatialHAC and the Phase 3 ring-indicator rescope flagged in BRIEFING.md. 70 new Conley tests pass (67 internal + 3 R parity); 442 tests pass on the targeted regression surface (test_linalg, test_linalg_hc2_bm, test_estimators_vcov_type, test_estimators, test_methodology_twfe). Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 6 +- CHANGELOG.md | 1 + README.md | 1 + TODO.md | 4 + benchmarks/R/README.md | 115 ++ benchmarks/R/generate_conley_golden.R | 108 ++ .../data/r_conleyreg_conley_golden.json | 49 + diff_diff/conley.py | 271 +++++ diff_diff/estimators.py | 144 +++ diff_diff/guides/llms-full.txt | 49 + diff_diff/guides/llms.txt | 1 + diff_diff/linalg.py | 171 ++- diff_diff/results.py | 19 +- diff_diff/synthetic_did.py | 162 ++- diff_diff/twfe.py | 75 ++ docs/doc-deps.yaml | 9 + docs/methodology/REGISTRY.md | 74 ++ docs/methodology/papers/butts-2021-review.md | 301 +++++ docs/methodology/papers/butts-2023-review.md | 226 ++++ docs/methodology/papers/clarke-2017-review.md | 282 +++++ .../papers/colella-et-al-2019-review.md | 286 +++++ docs/methodology/papers/conley-1999-review.md | 285 +++++ tests/test_conley_vcov.py | 1019 +++++++++++++++++ 23 files changed, 3556 insertions(+), 102 deletions(-) create mode 100644 benchmarks/R/README.md create mode 100644 benchmarks/R/generate_conley_golden.R create mode 100644 benchmarks/data/r_conleyreg_conley_golden.json create mode 100644 diff_diff/conley.py create mode 100644 docs/methodology/papers/butts-2021-review.md create mode 100644 docs/methodology/papers/butts-2023-review.md create mode 100644 docs/methodology/papers/clarke-2017-review.md create mode 100644 docs/methodology/papers/colella-et-al-2019-review.md create mode 100644 docs/methodology/papers/conley-1999-review.md create mode 100644 tests/test_conley_vcov.py diff --git a/.gitignore b/.gitignore index c2242b17..20fa5667 100644 --- a/.gitignore +++ b/.gitignore @@ -89,8 +89,10 @@ launch-video/ trop_avg_ref/ # Academic papers (local only, not for distribution) -papers/ -paper/ +# Anchored to repo root so that `docs/methodology/papers/` (tracked review markdown) +# is not silently ignored. +/papers/ +/paper/ # Local analysis notebooks (not committed) analysis/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c413ba8..a2dfe729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **Conley (1999) spatial-HAC standard errors via `vcov_type="conley"`** on `DifferenceInDifferences`, `TwoWayFixedEffects`, and `MultiPeriodDiD` (Phase 1 of the spillover-conley initiative). New keyword-only kwargs on `__init__`: `conley_coords=(, )` (column-name tuple from `data`), `conley_cutoff_km=` (positive finite bandwidth in km for haversine, or coord units for euclidean — REQUIRED, no default per the no-silent-failures contract), `conley_metric="haversine"|"euclidean"|callable` (default `"haversine"`; great-circle uses Earth's mean radius 6371.01 km matching R `conleyreg`), `conley_kernel="bartlett"|"uniform"` (default `"bartlett"` is PSD-guaranteed; `"uniform"` emits `UserWarning` if the meat has a materially negative eigenvalue per Conley 1999 footnote 11). Variance estimator `Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1}` (Conley 1999 Eq 4.2). FWL composes cleanly because the meat depends only on scores `X·ε`, both of which within-transformation preserves — `TwoWayFixedEffects(vcov_type="conley", ...)` is supported, UNLIKE `hc2`/`hc2_bm` which need the full hat matrix. TWFE auto-cluster-at-unit is disabled when `vcov_type="conley"`; explicit `cluster=` raises `NotImplementedError` (combined product kernel deferred to Phase 2). `n > 20_000` emits a `UserWarning` about the dense O(n²) distance-matrix memory; sparse k-d-tree fast path is queued for Phase 2. `SyntheticDiD(vcov_type="conley")` raises `TypeError` (uses bootstrap variance, not analytical sandwich); `set_params` mirrors the constructor rejection. `vcov_type="conley"` + `weights=` / `survey_design=` / `absorb=` raises `NotImplementedError` (Bertanha-Imbens 2014 weighted-Conley + arbitrary FE projection are deferred to follow-up phases). `TwoWayFixedEffects(vcov_type="conley", inference="wild_bootstrap")` raises `NotImplementedError` (Conley analytical spatial-HAC and wild cluster bootstrap are different inference paths). Helpers live in new module `diff_diff/conley.py` (`_haversine_km`, `_pairwise_distance_matrix`, `_bartlett_kernel`, `_uniform_kernel`, `_validate_conley_kwargs`, `_compute_conley_vcov`); `compute_robust_vcov` in `diff_diff/linalg.py` imports the dispatch helpers. R `conleyreg` parity (Düsterhöft 2021, CRAN v0.1.9) on three benchmark fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`, regenerable via `benchmarks/R/generate_conley_golden.R`); observed max abs diff 5.7e-16. Earth radius 6371.01 km matches `conleyreg::haversine_dist`. Test file `tests/test_conley_vcov.py` skips parity cleanly when the JSON is absent. `result.summary()` prints `"Conley spatial HAC (bartlett, cutoff=200.0km)"` via the extended `_format_vcov_label` helper. New REGISTRY section `## ConleySpatialHAC`. Tracked on `BRIEFING.md` as Phase 1 of the 6-phase initiative (Phase 2: two-way space×time + sparse fast path; Phase 3: ring-indicator spillover-aware DiD per Butts 2021; Phase 4a/4b: mechanical extension to IF-aggregation and sandwich-derived estimators; Phase 5: survey design support). - **Tutorial 21: HAD Pre-test Workflow** (`docs/tutorials/21_had_pretest_workflow.ipynb`) — composite pre-test walkthrough for `HeterogeneousAdoptionDiD` building on Tutorial 20's brand-campaign framing. Uses a 60-DMA × 8-week panel close in shape to T20's but with the dose distribution drawn from `Uniform[$0.01K, $50K]` (vs T20's `[$5K, $50K]`); the true support is strictly positive but very near zero, chosen so the QUG step in `did_had_pretest_workflow` fails-to-reject `H0: d_lower = 0` in this finite sample and the verdict text fires the load-bearing "Assumption 7 deferred" pivot for the upgrade-arc narrative. (HAD's `design="auto"` selector — a separate min/median heuristic at `had.py::_detect_design`, NOT the QUG p-value — independently lands on the `continuous_at_zero` identification path with target `WAS` on this panel because `d.min() < 0.01 * median(|d|)`. The QUG test and the design selector are independent rules that point to the same identification path here.) Walks through three surfaces: (a) `did_had_pretest_workflow(aggregate="overall")` on a two-period collapse, where the verdict explicitly flags Step 2 (Assumption 7 pre-trends) as not run because a single pre-period structurally cannot support a pre-trends test, and the structural fields `pretrends_joint` / `homogeneity_joint` are both `None`; (b) `did_had_pretest_workflow(aggregate="event_study")` on the full multi-period panel, where the verdict reads "TWFE admissible under Section 4 assumptions" because all three testable diagnostics (QUG + joint pre-trends Stute over 3 horizons + joint homogeneity Stute over 4 horizons) fail-to-reject — non-rejection evidence under finite-sample power and test specification, not proof that the identifying assumptions hold; and (c) a side panel exercising both `yatchew_hr_test` null modes — `null="linearity"` (default, paper Theorem 7) vs `null="mean_independence"` (Phase 4 R-parity with R `YatchewTest::yatchew_test(order=0)`) — on the within-pre-period first-difference paired with post-period dose, illustrating the stricter null's larger residual variance (`sigma2_lin` 7.01 vs 6.53) and smaller p-value (0.29 vs 0.49). Companion drift-test file `tests/test_t21_had_pretest_workflow_drift.py` (16 tests pinning panel composition, both verdict pivots, structural anchors on both paths, deterministic QUG / Yatchew statistics, bootstrap p-value tolerance bands per `feedback_bootstrap_drift_tests_need_backend_tolerance`, and `HAD(design="auto")` resolution to `continuous_at_zero` on this panel). T20's "Composite pretest workflow" Extensions bullet updated with a forward-pointer to T21. T22 weighted/survey HAD tutorial remains queued as a separate notebook PR. - **`ChaisemartinDHaultfoeuille.by_path` and `paths_of_interest` now compose with `survey_design`** for analytical Binder TSL SE and replicate-weight bootstrap variance. The `NotImplementedError` gate at `chaisemartin_dhaultfoeuille.py:1233-1239` is replaced by a per-path multiplier-bootstrap-only gate (`survey_design + n_bootstrap > 0` under by_path / paths_of_interest still raises, since the survey-aware perturbation pivot for path-restricted IFs is methodologically underived). Per-path SE routes through the existing `_survey_se_from_group_if` cell-period allocator: the per-period IF (`U_pp_l_path`) is built with non-path switcher-side contributions skipped (control contributions are unchanged, matching the joiners/leavers IF convention; preserves the row-sum identity `U_pp.sum(axis=1) == U`), cohort-recentered via `_cohort_recenter_per_period`, then expanded to observations as `psi_i = U_pp[g_i, t_i] · (w_i / W_{g_i, t_i})`. Replicate-weight designs unconditionally use the cell allocator (Class A contract from PR #323). New `_refresh_path_inference` helper post-call refreshes `safe_inference` on every populated entry across `multi_horizon_inference`, `placebo_horizon_inference`, `path_effects`, and `path_placebos` so all four surfaces use the same final `df_survey` after per-path replicate fits append `n_valid` to the shared accumulator. Path-enumeration ranking under `survey_design` remains unweighted (group-cardinality, not population-weight mass). Lonely-PSU policy stays sample-wide, not per-path. Telescope invariant: on a single-path panel, per-path SE matches the global non-by_path survey SE bit-exactly. **No R parity** — R `did_multiplegt_dyn` does not support survey weighting; this is a Python-only methodology extension. The global non-by_path TSL multiplier-bootstrap path is unaffected (anti-regression test `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathSurveyDesignAnalytical::test_global_survey_plus_n_bootstrap_still_works` locks the per-path-only scope of the new gate). Cross-surface invariants regression-tested at `TestByPathSurveyDesignAnalytical` (~17 tests across gate / dispatch / analytical SE / replicate-weight SE / per-path placebos / `trends_linear` composition / unobserved-path warnings / final-df refresh regressions) and `TestByPathSurveyDesignTelescope`. See `docs/methodology/REGISTRY.md` §`ChaisemartinDHaultfoeuille` `Note (Phase 3 by_path ...)` → "Per-path survey-design SE" for the full contract. - **Inference-field aliases on staggered result classes** for adapter / external-consumer compatibility. Read-only `@property` aliases expose the flat `att` / `se` / `conf_int` / `p_value` / `t_stat` names (matching `DiDResults` / `TROPResults` / `SyntheticDiDResults` / `HeterogeneousAdoptionDiDResults`) on every result class that previously only carried prefixed canonical fields: `CallawaySantAnnaResults`, `StackedDiDResults`, `EfficientDiDResults`, `ChaisemartinDHaultfoeuilleResults`, `StaggeredTripleDiffResults`, `WooldridgeDiDResults`, `SunAbrahamResults`, `ImputationDiDResults`, `TwoStageDiDResults` (mapping to `overall_*`); `ContinuousDiDResults` (mapping to `overall_att_*`, ATT-side as the headline, ACRT-side accessible unchanged via `overall_acrt_*`); `MultiPeriodDiDResults` (mapping to `avg_*`). `ContinuousDiDResults` additionally exposes `overall_se` / `overall_conf_int` / `overall_p_value` / `overall_t_stat` aliases for naming consistency with the rest of the staggered family. Aliases are pure read-throughs over the canonical fields — no recomputation, no behavior change — so the `safe_inference()` joint-NaN contract (per CLAUDE.md "Inference computation") is inherited automatically (NaN canonical → NaN alias, locked at `tests/test_result_aliases.py::test_pattern_b_aliases_propagate_nan`). The native `overall_*` / `overall_att_*` / `avg_*` fields remain canonical for documentation and computation. Motivated by the `balance.interop.diff_diff.as_balance_diagnostic()` adapter (`facebookresearch/balance` PR #465) which calls `getattr(res, "se", None)` / `getattr(res, "conf_int", None)` without a fallback chain — pre-alias, every staggered result class returned `None` on those keys, silently dropping `se` and `conf_int` from the adapter's diagnostic dict. 23 alias-mechanic + balance-adapter regression tests at `tests/test_result_aliases.py`. Patch-level (additive on stable surfaces). diff --git a/README.md b/README.md index 7502025d..2f32e91a 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`. - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design +- Conley spatial HAC SE (`vcov_type="conley"`) on DifferenceInDifferences/TwoWayFixedEffects/MultiPeriodDiD - Conley (1999) spatial-correlation-aware SEs with parity vs R `conleyreg` ## Survey Support diff --git a/TODO.md b/TODO.md index 17702eb3..59f7dacb 100644 --- a/TODO.md +++ b/TODO.md @@ -113,6 +113,10 @@ Deferred items from PR reviews that were not addressed before merge. | `HeterogeneousAdoptionDiD` time-varying dose on event study: Phase 2b REJECTS panels where `D_{g,t}` varies within a unit for `t >= F` (the aggregation uses `D_{g, F}` as the single regressor for all horizons, paper Appendix B.2 constant-dose convention). A follow-up PR could add a time-varying-dose estimator for these panels; current behavior is front-door rejection with a redirect to `ChaisemartinDHaultfoeuille`. | `diff_diff/had.py::_validate_had_panel_event_study` | Phase 2b | Low | | `HeterogeneousAdoptionDiD` repeated-cross-section support: paper Section 2 defines HAD on panel OR repeated cross-section, but Phase 2a is panel-only. RCS inputs (disjoint unit IDs between periods) are rejected by the balanced-panel validator with the generic "unit(s) do not appear in both periods" error. A follow-up PR will add an RCS identification path based on pre/post cell means (rather than unit-level first differences), with its own validator and a distinct `data_mode` / API surface. | `diff_diff/had.py::_validate_had_panel`, `diff_diff/had.py::_aggregate_first_difference` | Phase 2a | Medium | | SyntheticDiD: bootstrap cross-language parity anchor against R's default `synthdid::vcov(method="bootstrap")` (refit; rebinds `opts` per draw) or Julia `Synthdid.jl::src/vcov.jl::bootstrap_se` (refit by construction). Same-library validation (placebo-SE tracking, AER §6.3 MC truth) is in place; a cross-language anchor is desirable to bolster the methodology contract. Julia is the cleanest target — minimal wrapping work and refit-native vcov. Tolerance target: 1e-6 on Monte Carlo samples (different BLAS + RNG paths preclude 1e-10). The R-parity fixture from the previous release was deleted because it pinned the now-removed fixed-weight path. | `benchmarks/R/`, `benchmarks/julia/`, `tests/` | follow-up | Low | +| Conley + cluster_ids combined product kernel `K_space(d_ij/h) · 1{cluster_i = cluster_j}`. Phase 2 of the spillover-conley initiative will add this alongside the time-dimension extension (Driscoll-Kraay). Currently raises `NotImplementedError` at both the linalg validator and TWFE early-block. | `linalg.py::_validate_vcov_args`, `twfe.py`, `estimators.py` (DiD/MultiPeriodDiD `fit`) | Phase 2 (spillover-conley) | Medium | +| Conley + survey weights / `survey_design`. Score-reweighted meat `s_i = w_i · X_i · ε_i` is mechanical, but PSU clustering interaction with the spatial kernel and replicate-weights variance under spatial correlation are non-trivial (Bertanha-Imbens 2014 covers cluster-sample but not the explicit Conley case). Phase 5 of the spillover-conley initiative; paper review prerequisite. Currently raises `NotImplementedError`. | `linalg.py::_validate_vcov_args`, `twfe.py`, `estimators.py` | Phase 5 (spillover-conley) | Medium | +| Conley + `absorb=` (arbitrary FE projection beyond TWFE's two-FE within-transformation). FWL composability is proven analytically for TWFE's fixed two-FE design but not formally verified for arbitrary `absorb` dimensions; conservatively rejected at fit-time with a redirect to `fixed_effects=` dummies. Lift after empirical verification on multi-FE within-transformations. | `estimators.py::DifferenceInDifferences.fit`, `MultiPeriodDiD.fit` | follow-up (spillover-conley) | Low | +| `SyntheticDiD(vcov_type="conley")` support. Currently raises `TypeError` at `__init__` because SyntheticDiD uses `variance_method ∈ {bootstrap, jackknife, placebo}` rather than the analytical sandwich that Conley plugs into. Wiring would require either reimplementing an analytical sandwich path for SyntheticDiD or designing a spatial-block bootstrap (new methodology, Politis-Romano 1994 territory). | `synthetic_did.py::SyntheticDiD` | follow-up (spillover-conley) | Low | #### Performance diff --git a/benchmarks/R/README.md b/benchmarks/R/README.md new file mode 100644 index 00000000..a04e2e6d --- /dev/null +++ b/benchmarks/R/README.md @@ -0,0 +1,115 @@ +# R `conleyreg` parity benchmark for Conley spatial HAC SE + +`benchmarks/R/generate_conley_golden.R` produces the golden JSON used by +`tests/test_conley_vcov.py::TestConleyParityR` to verify that diff-diff's +`vcov_type="conley"` matches R `conleyreg` (Düsterhöft 2021, CRAN v0.1.9) +to ≤ 1e-6 on three benchmark fixtures. + +## Why R `conleyreg` + +`conleyreg` is the canonical open-source Conley (1999) implementation in R +(Christian Düsterhöft, https://github.com/cdueben/conleyreg). It uses +RcppArmadillo for the inner loops and is widely cited in applied work. +Stata `acreg` (Colella et al. 2019) is the parallel canonical +implementation in the Stata ecosystem; we cite both in REGISTRY but only +parity-test against `conleyreg` because it is free and open source. + +## Earth radius constant + +`conleyreg::haversine_dist` uses **6371.01 km** (mean Earth radius) — see +[`src/distance_functions.cpp`](https://github.com/cdueben/conleyreg/blob/master/src/distance_functions.cpp). +diff-diff's `_CONLEY_EARTH_RADIUS_KM` is set to `6371.01` to match. WGS-84 +equatorial radius is 6378.137 km; the 0.01 km vs 6371.0 delta is +methodologically negligible (Earth mean radius is approximate at many +more digits) but matters for the 1e-6 cross-language parity bound. + +## Regenerating the fixtures + +Requires: +- R installed (`/opt/homebrew/bin/Rscript` on Apple Silicon Mac) +- System libraries: `brew install gdal proj geos pkg-config udunits` + (needed by sf, lwgeom — transitive deps of conleyreg) +- R packages: `Rscript -e 'install.packages(c("conleyreg","sf","lwgeom","jsonlite"))'` + +```bash +cd benchmarks/R +Rscript generate_conley_golden.R +# Produces benchmarks/data/r_conleyreg_conley_golden.json +``` + +The output JSON is **committed to the repo** so CI doesn't need R. Only +re-run when: +- conleyreg is updated (verify version in `meta.tool` field) +- The set of benchmark fixtures changes + +## Skip behavior + +`tests/test_conley_vcov.py::TestConleyParityR` calls +`pytest.skip("Golden JSON not present...")` when the JSON is absent, so +CI passes without R. The 64 internal tests (`TestConleyKernels`, +`TestConleyDistanceMetrics`, `TestConleyReductions`, +`TestConleyDirectHelper`, `TestConleyValidatorHelpers`, +`TestConleyValidationDispatch`, `TestConleyEstimatorIntegration`, +`TestConleyTWFE`, `TestConleyEstimatorValidation`, +`TestConleySetParamsAtomicity`, `TestConleyLinearRegression`, +`TestConleyReductionsAddendum`) verify the implementation independently. + +## Fixtures + +Three haversine fixtures stress different scales / geographic ranges: + +| Fixture | n | k | Cutoff | Stress test | +|---|---|---|---|---| +| `small_haversine` | 50 | 2 | 500 km | Small-n, simple regressor | +| `dense_haversine` | 200 | 3 | 1000 km | Dense panel, 2 covariates, large cutoff | +| `lat_lon_realistic` | 300 | 3 | 200 km | Continental US lat/lon range, 200km cutoff | + +The euclidean code path (`conley_metric="euclidean"`) is verified +internally against `scipy.spatial.distance.cdist` in +`tests/test_conley_vcov.py::TestConleyDistanceMetrics::test_pairwise_distance_euclidean_matches_pdist`. +conleyreg's planar code path requires an `sf` CRS specification, which +adds noise without methodological value for parity testing. + +## JSON schema + +```json +{ + "meta": { + "generated_at": "2026-05-10", + "earth_radius_km": 6371.01, + "tool": "R conleyreg 0.1.9 (Düsterhöft 2021)" + }, + "small_haversine": { + "x": [], + "x_shape": [n, k], + "y": [], + "coords": [], + "coords_shape": [n, 2], + "metric": "haversine", + "cutoff_km": 500.0, + "kernel": "bartlett", + "vcov": [], + "vcov_shape": [k, k], + "n": , + "k": + }, + "dense_haversine": { ... }, + "lat_lon_realistic": { ... } +} +``` + +The R script transposes matrices before `as.vector` flatten so that +NumPy's `np.asarray(...).reshape(shape)` (row-major / C-order) decodes +the same orientation R wrote. Without the transpose, R's column-major +flatten misaligns when reshaped row-major. + +## Known constraints + +- `conleyreg` requires `unit` and `time` columns even with `lag_cutoff=0` + (cross-sectional). The script fakes them with `unit = 1:n, time = 1L`; + conleyreg emits a `Number of time periods: 1. Treating data as + cross-sectional` warning which is informational. +- `conleyreg` uses OpenMP for parallelism; on macOS Apple Silicon with + R's default toolchain, the `OpenMP not detected` warning is normal — + the package falls back to single-threaded mode without affecting + numerical output. diff --git a/benchmarks/R/generate_conley_golden.R b/benchmarks/R/generate_conley_golden.R new file mode 100644 index 00000000..0cc957d5 --- /dev/null +++ b/benchmarks/R/generate_conley_golden.R @@ -0,0 +1,108 @@ +#!/usr/bin/env Rscript +# Generate R conleyreg parity golden values for diff-diff Phase 1 Conley SE. +# +# Requires: install.packages("conleyreg") (CRAN v0.1.9+, plus sf/lwgeom and +# the system libs gdal/proj/geos/udunits/pkgconf via brew) +# Output: ../data/r_conleyreg_conley_golden.json +# +# The diff-diff Conley implementation (`diff_diff/linalg.py::_compute_conley_vcov`) +# matches the values in this JSON to atol=1e-6. Earth radius is 6371.01 km +# (matches conleyreg::haversine_dist in src/distance_functions.cpp). + +suppressPackageStartupMessages({ + library(conleyreg) + library(jsonlite) +}) + +EARTH_RADIUS_KM <- 6371.01 # matches diff-diff and conleyreg + +# Build one fixture entry for the JSON. Calls conleyreg, extracts the vcov, +# packs everything in the schema diff-diff's TestConleyParityR expects. +build_fixture <- function(seed, n, k, metric, cutoff_km, kernel, + lat_range, lon_range) { + set.seed(seed) + df <- data.frame( + lat = runif(n, lat_range[1], lat_range[2]), + lon = runif(n, lon_range[1], lon_range[2]) + ) + for (j in seq_len(k - 1)) df[[paste0("x", j)]] <- rnorm(n) + betas <- c(1.0, seq(0.5, 2.0, length.out = k - 1)) + df$y <- betas[1] + + rowSums(sapply(seq_len(k - 1), function(j) betas[j + 1] * df[[paste0("x", j)]])) + + rnorm(n, sd = 0.5) + # conleyreg requires unit + time columns even when lag_cutoff=0; supply + # singleton time series. + df$unit <- seq_len(n) + df$time <- 1L + + formula_str <- if (k == 2) "y ~ x1" else + paste0("y ~ ", paste(paste0("x", seq_len(k - 1)), collapse = " + ")) + # When vcov=TRUE, conleyreg returns the vcov matrix directly (a matrix array). + V <- conleyreg( + formula = as.formula(formula_str), + data = df, + dist_cutoff = cutoff_km, + unit = "unit", + time = "time", + lat = "lat", + lon = "lon", + kernel = kernel, + lag_cutoff = 0, + dist_comp = if (metric == "haversine") "spherical" else "planar", + verbose = FALSE, + vcov = TRUE + ) + V <- unname(as.matrix(V)) + + X <- cbind(1, as.matrix(df[, paste0("x", seq_len(k - 1)), drop = FALSE])) + coords_mat <- as.matrix(df[, c("lat", "lon")]) + # NOTE: R's as.vector on a matrix flattens COLUMN-major; NumPy's reshape + # reads ROW-major (C order). Transpose first so the flattened vector + # decodes correctly when np.asarray(...).reshape((n, 2)) is applied. + list( + x = as.vector(t(X)), + x_shape = c(nrow(X), ncol(X)), + y = df$y, + coords = as.vector(t(coords_mat)), + coords_shape = c(n, 2), + metric = metric, + cutoff_km = cutoff_km, + kernel = kernel, + vcov = as.vector(t(V)), + vcov_shape = dim(V), + n = n, + k = ncol(X) + ) +} + +out <- list( + meta = list( + generated_at = format(Sys.Date(), "%Y-%m-%d"), + earth_radius_km = EARTH_RADIUS_KM, + tool = paste0( + "R conleyreg ", as.character(packageVersion("conleyreg")), + " (Düsterhöft 2021)" + ) + ), + # NOTE: only haversine fixtures are anchored against conleyreg. Its planar + # code path requires a CRS specification (sf object) which is overkill for + # parity testing — diff-diff's euclidean path is already verified bit- + # equivalent against scipy.spatial.distance.cdist in + # tests/test_conley_vcov.py::TestConleyDistanceMetrics::test_pairwise_distance_euclidean_matches_pdist. + small_haversine = build_fixture( + seed = 42, n = 50, k = 2, metric = "haversine", cutoff_km = 500, + kernel = "bartlett", lat_range = c(-30, 30), lon_range = c(-100, 100) + ), + dense_haversine = build_fixture( + seed = 100, n = 200, k = 3, metric = "haversine", cutoff_km = 1000, + kernel = "bartlett", lat_range = c(-45, 45), lon_range = c(-150, 150) + ), + lat_lon_realistic = build_fixture( + seed = 314, n = 300, k = 3, metric = "haversine", cutoff_km = 200, + kernel = "bartlett", lat_range = c(25, 50), lon_range = c(-125, -65) + ) +) + +out_path <- "../data/r_conleyreg_conley_golden.json" +write(toJSON(out, auto_unbox = TRUE, digits = NA, pretty = TRUE), file = out_path) +cat("Wrote", out_path, "\n") diff --git a/benchmarks/data/r_conleyreg_conley_golden.json b/benchmarks/data/r_conleyreg_conley_golden.json new file mode 100644 index 00000000..76781f6c --- /dev/null +++ b/benchmarks/data/r_conleyreg_conley_golden.json @@ -0,0 +1,49 @@ +{ + "meta": { + "generated_at": "2026-05-10", + "earth_radius_km": 6371.01, + "tool": "R conleyreg 0.1.9 (Düsterhöft 2021)" + }, + "small_haversine": { + "x": [1, 0.321925265203947, 1, -0.783838940880375, 1, 1.57572751979198, 1, 0.642899305717316, 1, 0.0897606465996056, 1, 0.276550747291463, 1, 0.679288816055271, 1, 0.0898328865790817, 1, -2.99309008315293, 1, 0.284882953530659, 1, -0.367234642740975, 1, 0.185230564865609, 1, 0.581823727365507, 1, 1.39973682729268, 1, -0.727292059474465, 1, 1.30254263204414, 1, 0.335848119752074, 1, 1.03850609869762, 1, 0.920728568290646, 1, 0.720878162866863, 1, -1.04311893856785, 1, -0.0901863866107067, 1, 0.623518161999544, 1, -0.953523357772344, 1, -0.542828814573857, 1, 0.580996497681682, 1, 0.768178737834591, 1, 0.463767588540167, 1, -0.885776297409679, 1, -1.09978089864786, 1, 1.51270700980493, 1, 0.257921437532031, 1, 0.0884402291595863, 1, -0.120896537539089, 1, -1.19432889516053, 1, 0.611996898040387, 1, -0.217139845746521, 1, -0.182756706331922, 1, 0.93334632857116, 1, 0.821773110508249, 1, 1.39211637593427, 1, -0.476173923054674, 1, 0.650348560726305, 1, 1.39111045639, 1, -1.1107888794479, 1, -0.860792586877842, 1, -1.13173868085377, 1, -1.4592139995024, 1, 0.0799825532411612, 1, 0.65320433964919], + "x_shape": [50, 2], + "y": [1.76144532040122, 1.13045607314367, 1.28625943647606, 2.24569060369503, 0.711493618920894, 1.19103227987377, 1.12851646709321, 0.983741357312055, -0.402448524325718, 1.20202195576383, 0.803836403195811, 1.14665164640382, 1.04819424575942, 1.44775984830239, -0.194195569694638, 1.46010445258516, 0.911598930937137, 2.87019854952121, 0.779306168550464, 1.42906719071273, -0.268372002942073, 0.219688935976248, 1.37411027409828, 0.0249187536718095, 0.727674285560717, 1.07636880812793, 1.07725356569255, 0.21954487156053, -0.0552621238848328, 0.539867771235041, 2.04016380211423, 0.882522041989278, 1.04425155661247, 1.50099655292044, 1.12276342390783, 0.757441564817283, 0.832770297001651, 1.50937084729389, 1.23180837400243, 1.38465181278463, 1.65300453884859, 0.318073529519447, 1.10283227792078, 1.68083278865088, 0.237671135747089, 1.12629671824518, 0.193634238746124, 0.0538084839484376, 1.38842256489663, 0.798417963239141], + "coords": [24.8883626097813, -33.3145577460527, 26.2245247978717, -30.6503503583372, -12.8316279128194, -20.302917715162, 19.8268575640395, 56.9385551381856, 8.50473113358021, -92.2127017751336, 1.14575694780797, 49.7590772341937, 14.195298878476, 35.455366037786, -21.9200041657314, -65.7471339218318, 9.41953742410988, -47.7824072353542, 12.303887042217, 2.88258693180978, -2.53549342509359, 35.1214549038559, 13.1467350991443, 96.5634395834059, 26.0803348291665, 51.9088535103947, -14.6742705395445, 13.2976848166436, -2.2624306473881, 69.9379437137395, 26.4008713653311, -62.105212919414, 28.6935857031494, -45.7426770590246, -22.9507583007216, 65.6316970475018, -1.50017510633916, 38.6409640777856, 3.61996477469802, -51.8910520710051, 24.2418832378462, -91.4022407960147, -21.6773899365216, -71.9041811767966, 29.3335037352517, -56.7229169886559, 26.8000939534977, -4.1202871594578, -25.0537465140224, -60.5179315432906, 0.852707060985267, 43.8711675349623, -6.58779197372496, -98.4230522532016, 24.3442878546193, -24.9020070768893, -3.18182231392711, 2.88154166191816, 20.160255599767, -99.6858891565353, 14.2557370662689, 16.3208005018532, 18.6633084760979, -68.4189583640546, -6.7135030310601, -28.1943388283253, 11.1101837642491, 29.1263756807894, -29.7630996722728, 55.1646725274622, 19.9749648151919, 12.7293683122844, -29.5599511871114, -53.2593202777207, -17.5404616305605, -82.0038967300206, 24.3960844678804, -82.8775870148093, 6.70671860687435, -38.9563261065632, -7.22644556779414, 33.4853029344231, -3.85370490141213, -99.9522206839174, -27.7541380282491, -58.2860086113214, 28.4123948263004, 86.6068254690617, -4.0949250664562, 85.1289497222751, 27.4545957986265, 46.8188602011651, 23.2652943301946, -33.3856033161283, 8.39872616808861, 3.01266596652567, 28.2579966215417, 48.7949292641133, 7.13029243983328, 23.8318480085582], + "coords_shape": [50, 2], + "metric": "haversine", + "cutoff_km": 500, + "kernel": "bartlett", + "vcov": [0.00440187938156653, -0.000246427608920082, -0.000246427608920082, 0.00400965562190147], + "vcov_shape": [2, 2], + "n": 50, + "k": 2 + }, + "dense_haversine": { + "x": [1, 0.028171766216906, -1.28482864535802, 1, -0.356703405777853, -0.141781733351938, 1, 0.852626375409205, 1.43878477137932, 1, 0.513365248224428, -1.0400575406747, 1, 1.0182029971727, -1.02383795145174, 1, -1.02147908474531, 1.09477378757095, 1, -0.561668270939564, -2.0139268834093, 1, -1.01255607823451, -1.04657679799975, 1, -3.02081429901538, 0.833409951344638, 1, 0.332350267716707, 0.608602451829851, 1, 1.24051156628878, 1.08405733116013, 1, 0.671349599277446, 0.172710418320688, 1, -1.33003411112154, 1.51619199122945, 1, -0.850580313718227, -0.217208456191205, 1, -1.78883074241817, -0.108250740924685, 1, 1.23152191404999, -0.357716547672896, 1, -0.334399673592727, 0.327044381574813, 1, -0.967322101785947, -2.01294421884823, 1, 0.879370348381199, -0.149694549738939, 1, -0.253578689825057, -0.932191228422762, 1, -1.51787578894428, 0.278413060653678, 1, -0.0238832551874006, 0.81683628452615, 1, 0.0266594733310417, -0.430260264217654, 1, 0.163681200061025, 0.572330038910175, 1, 0.410007939451138, 0.176993753487642, 1, -0.627362599919064, 1.11357173539244, 1, 0.636918003852658, -0.587104911160215, 1, -1.20193478121661, -1.73823332281423, 1, 1.34654241253933, 0.653671428518984, 1, -0.59876731034254, -0.186656416626064, 1, 0.442204437768857, 0.639356782782631, 1, 0.613526408754513, -0.0318413309540883, 1, -0.299096372733788, 1.00223584988356, 1, -1.60058809520475, -0.641066625541131, 1, -0.339466955313178, 0.251037006646425, 1, -0.58130774599116, 0.575035440776607, 1, -0.390362010012251, -0.635022196847298, 1, 0.845514355048774, 0.857283500299097, 1, 0.684727205002902, 1.10488240710102, 1, 0.237713083514964, -2.40522528010656, 1, -0.710621886392092, -0.700964982333803, 1, 2.61331895757405, 0.0284493521609994, 1, -1.62664735142501, 1.71749203322685, 1, -1.60730629300302, -1.12829485950373, 1, 0.340317445805659, 1.0862461279025, 1, 2.72788770711924, 1.30588495425069, 1, -0.32719003951891, -0.988264609350404, 1, -1.25689674826326, -0.227417910585069, 1, -0.431398105936833, -0.142103332944813, 1, 2.54885324760934, 0.396012959847737, 1, -0.771408001996856, -1.93943937564748, 1, -0.507342353723463, -3.32078220508143, 1, -0.270022583786385, 0.942081891576336, 1, 0.74811656342622, 0.0371513947988088, 1, 0.668593810460555, -0.258555013338062, 1, -0.455298660081533, 0.34439414089391, 1, -1.92784522429471, 0.149355230174283, 1, 1.88197706699389, 0.940463733546733, 1, -1.24971112993433, -2.21790191923096, 1, -0.489344968902409, -1.79355875629624, 1, 0.215391285188049, 1.6921068899601, 1, 0.422108673603984, -1.67249614679688, 1, -1.22114827886872, -1.73719283921824, 1, 0.31222782062225, -0.0266348570697463, 1, 0.886462134092602, 0.0658044222690502, 1, 0.474810692727149, -1.46232343613037, 1, 0.869841354736154, -0.419404700573394, 1, -0.886523795971777, 0.576847013306975, 1, -0.128655531460981, -0.606922355335006, 1, 1.09651049495896, -0.0692195362732068, 1, 0.518029546611845, 1.33394384090484, 1, 0.398033175104538, -0.767742062250313, 1, 1.06665941044532, 0.553194668337453, 1, -0.449540081296166, -1.70865789974147, 1, 0.626101984200589, -0.526728102611897, 1, -0.81883595071451, 2.06376423359093, 1, -0.263516208460582, 0.423728259519932, 1, 0.154575286025624, 1.00855572784296, 1, 0.568445549513191, 1.36114179574455, 1, -2.66015967746067, -0.105954863320484, 1, 1.13653254173055, 0.0415006952291025, 1, 0.421772775929248, 1.11989771848809, 1, 1.35008259715745, 0.239162869721727, 1, 1.10375688508572, -1.0390839362081, 1, 0.647046109762646, -0.491755708782748, 1, 0.175635810206965, 0.384189624874408, 1, 0.653557786718714, -0.147472265361736, 1, -0.065490303769369, -0.650475178917459, 1, 0.645522172667217, 0.0563038939796303, 1, 0.364294036674242, -0.246601223518051, 1, 0.843441568136305, 1.23622156352818, 1, -0.676478865664919, -0.178326896688065, 1, -0.391432903077074, 1.23646957937443, 1, 0.404940644070594, -1.08216683133528, 1, -1.10685869531877, -0.494190200709459, 1, -0.383300408008914, -1.71111303442644, 1, 0.294020248251133, 0.0400580483971007, 1, 1.79306066807253, -0.5611434836846, 1, 0.814032171070863, -2.55736205762071, 1, -1.32002439597496, -0.696778810641331, 1, -1.8554452269695, -1.44628745503064, 1, -1.01856471494322, 0.315855761773287, 1, -1.04511108646899, -0.342747513915665, 1, -0.651287653362283, -1.93135309943573, 1, 0.382682653336886, 0.242821004264204, 1, 1.0318997103495, -0.362767948828951, 1, 0.131074973596078, 2.43272890031743, 1, 0.377191733838296, 0.592091190772483, 1, -1.8876950820172, -0.576200768227518, 1, 0.520219403024209, 0.406628240168577, 1, 0.859153161407287, -0.0452546344912198, 1, -0.802488655089867, 0.440900024502095, 1, -0.491960516592225, 2.23537620196202, 1, -0.210954317925969, -0.586154843978132, 1, -0.463676608852214, 0.59936024931904, 1, 0.299479567206212, 1.27423314654812, 1, -0.605473826674668, -0.911771154201932, 1, -0.439012134819359, -1.3754590566489, 1, -0.72075363013408, -0.28772686338876, 1, 0.780805030352143, 0.0814834106043889, 1, -1.22228427577597, 0.337895313938956, 1, 0.891194036418732, 0.566137538918655, 1, 0.253922842885533, -0.934222934352698, 1, -0.0658164266599258, 1.72676981387394, 1, 0.201466032357972, 1.58831492647447, 1, 2.47770051384538, -1.03744660785843, 1, 0.471752794711264, 0.36950741973645, 1, 1.32619808274433, 0.438658120085602, 1, 0.668598485083206, -0.877996042421038, 1, -0.137394413331552, 0.760917628344914, 1, -1.41117169628647, 0.064281984537207, 1, 1.20036310481627, 2.29847904217297, 1, 0.299090107859245, -1.29997798106704, 1, 0.456226882523328, -1.07963723132294, 1, 0.438078444170507, 1.48832346555083, 1, -0.215021138098019, -0.230763632057079, 1, -0.312122476056123, 0.66507385069932, 1, -0.69263041907488, -1.16031708724732, 1, -2.39622968331304, 1.08154085631275, 1, -1.58427493364794, 0.424211895855851, 1, 1.75874823335838, 0.776056326042136, 1, -2.36288678865353, -0.722788717947412, 1, 0.393172067606479, 0.509075410444175, 1, 0.565937864612634, 0.154614974638397, 1, -1.0439903705227, 0.309247239345119, 1, 1.02311329266288, 0.316513693032209, 1, -0.512664014020483, 0.707679895962637, 1, 0.0477366748063942, -0.462435325784938, 1, 1.39287699714616, 1.69830274705661, 1, -0.432499125908555, -0.861985561372884, 1, -0.135167836112898, -1.05182964365006, 1, 1.29773515008454, 0.531518618861102, 1, -0.0707264837276246, 0.0772839934550937, 1, -0.420828194004279, -1.02484108779373, 1, 1.14134035737233, 0.144159289028828, 1, -0.427126484236405, -0.22089623016418, 1, 1.3956696064749, -0.977331495522636, 1, -0.296187169458191, 0.335280231023267, 1, 0.58265961054232, 1.58736662168407, 1, -2.67622935121103, 1.26982587399556, 1, -1.40067900943453, 2.88733301028683, 1, 3.3041511108123, 0.683238914169781, 1, 0.856777541321684, -0.676976087514788, 1, 1.1610164452066, -0.163870433246229, 1, 0.27893688579298, 0.70961161253956, 1, -0.0135484957349808, -1.05366913851342, 1, -2.67637888685686, -0.956228502027929, 1, 0.12131445442741, -0.945187374215766, 1, -0.500543242093534, 0.224788447345343, 1, 1.37967791098485, 0.359455174379624, 1, -0.405986658005879, -0.663710424680318, 1, 1.39593194453504, -0.126508180360602, 1, 0.738596155462332, 1.64237424330845, 1, 0.736614152769048, -0.507098481511882, 1, -0.73308795363662, -0.518487044338523, 1, -1.89839604988591, 1.34365300461022, 1, 0.44587930138674, 0.0874870159736699, 1, 0.456260862159676, 0.537672736985538, 1, -1.52619720815971, 1.49218637618764, 1, -0.0805826170075358, 0.876792687054939, 1, -0.340010606452059, 0.495885237905853, 1, 0.0536807987542894, -0.247353919919016, 1, -0.691634849035605, -0.728777608675439, 1, -0.303494895361534, 0.874193563035279, 1, -1.19501404838334, -0.760603039658224, 1, 1.01973591561693, 0.402919539067055, 1, -0.478444221973824, -0.531741980733308, 1, -0.659492049486403, 1.19334844262523e-05, 1, 0.678879652793504, 0.818178381494963, 1, -0.105355327414482, 0.269119210145174, 1, -0.453717672446288, -0.38800114178671, 1, 1.34143863167838, -0.324758509654573, 1, 0.985957200815472, 1.68000872905028, 1, -1.1954647751412, -2.88073555840663, 1, -0.574858573244479, 0.622138111065629, 1, 0.981991362192483, -2.20355233220161, 1, -1.65738322502377, -0.27269177033075, 1, -1.11964693231431, -0.922119539049234, 1, -1.17082440660003, -0.306282669743543, 1, -1.13905161112107, 2.5365592399809], + "x_shape": [200, 3], + "y": [-0.58845233881355, 0.531793012331946, 3.67794018220557, -0.436034375797988, -0.136629422694743, 1.92942685653982, -3.01576015679137, -2.01975986630663, 0.769420580275472, 1.77910459286806, 3.79778263627008, 0.997429533565721, 2.37231246592793, -0.937804635184813, -0.0773290629933563, 1.33045463573521, 1.09524472442022, -3.78763972360352, 1.49377143541519, -0.895387844891349, 1.6177272570338, 2.53396245081283, -0.0284248408476459, 3.30729169950569, 1.49395504978497, 2.25697662511593, 0.474127772958795, -3.03153242921054, 3.20465021468758, 0.599345306383796, 3.31020449101302, 1.90372605609236, 2.80600265135525, -0.890310529300007, 1.71986147326328, 2.06264107319225, -0.478616965642962, 3.80785400489305, 3.92458304962124, -3.9149284613106, -1.12565608095726, 1.93827147843231, 2.95162333664312, -1.73110393921453, 3.16447351052179, 5.00043463024794, -1.68279480931779, -0.0649577704244136, -0.81170670820207, 1.99279611956251, -2.71834093914925, -6.7374067500273, 2.4959297824187, 0.803360293926327, 0.487006030050027, 1.51671717158752, 1.1788787260298, 4.54289074717719, -3.95675156016987, -3.09947468102732, 4.61801140565442, -2.618186109658, -3.05898381680599, 1.73913568423103, 1.34106045590049, -1.95520420713037, 1.01187220493928, 1.88794172029744, -0.052152993055971, 1.84957286374444, 3.87270185171008, -1.5555280427195, 3.0074764909431, -3.37630734987141, 0.448192258428745, 5.05039318391058, 1.68816616602275, 3.17920940649462, 4.60606078769719, -0.127185742678368, 2.30123348243661, 2.81024253455042, 1.6855673162549, -0.342600597588011, 0.334857821785263, 1.56109015672181, 1.49444343222173, -0.380692666409974, 1.92484304806691, 0.239605812668353, 4.01281712243136, -0.0488422725290896, 2.62472398682033, -0.548174988324588, -0.250737794142774, -3.29252920195756, 1.38655999453537, 0.64287232058512, -4.29971271471019, -0.959943467674003, -2.5667264329298, 1.15026910034173, 0.481792011968331, -2.743042292322, 0.938333369112711, 1.79495246144227, 6.38358248307177, 2.68763653840848, -0.455955631578817, 1.37515742706425, 1.37800646971591, 0.616834275944749, 5.61047463757968, -0.199599513077746, 1.71069276245278, 3.82813220517357, -0.52545022040815, -1.75273865861635, 0.226067190637751, 2.10960346341955, 2.25841650057951, 2.94701558773913, -1.19191659585964, 4.7996018080217, 4.93099150479951, 0.387388786762341, 1.54927651333478, 2.0164416517779, -0.419695963960503, 1.743345941977, 0.670724151859241, 6.67928705939447, -1.19304334550161, -1.48651600625955, 4.04945248518731, 1.39297210756738, 0.993729613937946, -1.8796692542034, 1.86506043930117, 1.59692490378879, 3.14106164540088, -1.732890449738, 2.00855799982314, 1.7899302802783, 0.427910958995686, 3.30024297622542, 1.98378227065833, -0.0615539760693999, 4.87856319651827, -1.05502631571201, -1.19920221318602, 2.52588209232391, 1.35635472984826, -1.29094858886543, 0.795144222965522, -0.15647423148539, -0.515730446026898, 2.62757175092121, 4.62855526691347, 1.56313264165336, 7.11363968199101, 4.29401280685743, 1.07967682984984, 1.10897940534938, 2.79944455037106, -1.80337378056658, -1.78919117873527, -1.23956293042633, 0.874585340979598, 3.15330149193397, -1.00102160039195, 0.857947138845204, 5.05060253030995, -0.043931627797349, 0.0912058414249606, 3.79170945259366, 2.31156718811615, 2.49986485343328, 3.17344638767825, 3.0834581459061, 2.38942049271204, 0.928849316942931, -0.424933986709507, 2.44005226651779, -2.36510740105742, 2.27818073766901, -0.766364549461974, 1.28768843773978, 3.06745337759762, 1.90474375061296, 0.332453162307477, 0.33917206504784, 5.18563276406838, -4.9960921895681, 2.10371818155757, -3.3834984492309, 0.250163327113514, -2.24891572297654, 0.54321110283666, 6.04921430975736], + "coords": [-17.3010500217788, -39.1211787471548, -21.8094749073498, 136.896850867197, 4.70901899971068, 124.072996736504, -39.9255164642818, 97.0009030541405, -2.83056444721296, -54.155353247188, -1.46063384134322, 113.310097740032, 28.1162355886772, 90.1052994187921, -11.6711516585201, 33.4069922799245, 4.19027355266735, -128.273658757098, -29.6764154057018, -23.5236869193614, 11.2496829335578, -46.6603195527568, 34.3948966311291, 75.5207061069086, -19.7681544022635, -84.3746040249243, -9.13608886068687, -62.3598081758246, 23.6295974068344, -43.2139904005453, 15.211954114493, 39.5885771373287, -26.584905423224, 117.759320000187, -12.8227631910704, 73.2098343316466, -12.6472396636382, -12.8332732943818, 17.1261475514621, -139.171366905794, 3.22300385450944, 20.5543555319309, 18.972346088849, -18.2952328585088, 3.45138284610584, 30.1453362219036, 22.4075004179031, 135.414828476496, -7.19086945289746, -69.1706064390019, -29.5721808657981, 47.6515173213556, 24.3271448789164, -127.058635954745, 34.3758228980005, -128.61443774309, 4.41870392533019, -38.9005440054461, -20.0048619043082, -60.8693399000913, -1.05246053310111, 15.3234816156328, 38.5654567438178, -39.0328234294429, -13.6177216423675, 103.706859564409, 40.8741936762817, 36.2372108269483, 17.5746725150384, -30.257993331179, 35.0508185033686, -60.1015415042639, -28.7633479340002, -35.6640490237623, 11.6451765387319, 60.3404809255153, 44.0607722941786, 134.059587190859, -33.2740016933531, 81.8484962917864, -15.2405526861548, -84.1349515132606, 32.8608491993509, 64.8331087315455, 24.9825995438732, 49.2653894238174, 29.4573104660958, 61.8750603636727, 9.29919208399951, -66.1116020288318, -0.789136025123298, 63.6751485290006, 25.2322659920901, 48.2826325111091, 34.5804324676283, -137.696193368174, -26.3057491974905, -131.65755153168, -17.3622692842036, -66.0714921075851, -15.252313669771, -59.4096848974004, -27.1188836800866, 136.983249010518, -23.787512818817, -33.0543843330815, -20.2602005749941, -38.4633639827371, 8.21889483137056, 102.942745853215, -22.1948411618359, 104.170500137843, -33.8861492648721, -54.2006542207673, -24.3084701662883, -110.248186066747, 8.7817763001658, 35.1327359676361, -25.9732299624011, 87.3913528164849, -3.26689397217706, -48.5853881109506, 13.2391074718907, 121.634279075079, 41.4515782892704, -90.732986968942, 15.8758354419842, 88.2255538366735, -4.93667804868892, 76.3808683957905, -12.8003595373593, 123.418454197235, -3.98416894488037, -53.1936886254698, -4.91274207830429, -124.149158061482, -22.9416669742204, 123.360136267729, 17.4915640801191, 136.552147543989, -7.89866684004664, 51.3428831705824, -15.5046718195081, 73.494660272263, 6.53082902776077, -17.9949071025476, 42.0299176010303, -115.488174604252, 14.5601119869389, 52.6928346371278, 11.2227945216, 69.4678549887612, 32.0987738342956, -4.80452917981893, 24.7300999285653, -98.6855886178091, 30.0624388852157, 52.7545604389161, -36.7640749225393, -71.1030160775408, -3.64270619815215, -47.5557512138039, 8.94583403365687, -86.9263371219859, 37.7749719657004, -145.104121766053, 43.4541669627652, -36.9711367646232, -41.597767858766, 18.6922205379233, 7.01436608098447, 53.9953823201358, 20.9982751356438, 73.6166311427951, -22.6131837512366, 135.021816683002, -17.9337127623148, -101.072785723954, 21.0120031423867, -52.572929346934, 36.6258938005194, -110.18518935889, -26.1164908739738, 41.1892316071317, -12.7675809734501, -50.7211168063805, -4.65307700447738, 44.7238718625158, 36.5783789707348, -59.1334923170507, -9.95046338299289, -128.630020120181, 1.57137745292857, 48.7171406624839, -33.7284821039066, 77.8810684802011, -42.2868828848004, 16.0080507863313, 24.462494510226, 11.7816292913631, -15.5326426075771, 104.877696116455, -9.946917607449, 45.9677876438946, -41.3052520947531, 135.26572692208, -12.4743028962985, 35.1669145049527, 6.38802754925564, -2.14780988171697, 16.6392217786051, 142.801969614811, 42.4005072307773, -2.89643516298383, 18.1762926676311, 46.551690925844, -43.960904579144, 29.6405368018895, 3.19789158878848, 134.270704910159, 30.2914316416718, -39.5965477684513, 27.6180068193935, 113.456101552583, -37.7160621061921, -13.5374796576798, -23.4981628367677, -1.05989542789757, 41.9390877569094, -11.8153154617175, -41.6094906209037, 35.3724590269849, 37.4765788950026, 31.2430475838482, 20.3603920619935, 85.7638807967305, -26.9301407178864, 16.5805424563587, 30.6180671788752, 80.693501746282, -9.30038874736056, -28.7142982240766, -9.65679665794596, 3.21594290435314, -2.46988017344847, 7.06093283370137, 7.52912230789661, 147.8397623403, -13.2851547980681, -21.105773630552, -42.4373388430104, 148.803376452997, 44.5579141192138, 85.9800739679486, 41.1802613222972, 4.77319036144763, 4.64860586682335, 0.806389655917883, -35.8241438283585, 123.491245298646, -23.5876366775483, -70.6752307713032, 32.4087898386642, -97.8429396869615, 21.4435157761909, -29.900623485446, -0.241896193474531, 11.5072152111679, 7.19289766391739, -76.510687591508, -43.5434953356162, -37.244712584652, -2.50831065466627, 24.076064187102, -41.1849244055338, -87.3134064953774, -3.31610253313556, 90.3990022139624, 11.6995291155763, 41.6407246375456, 15.5924358987249, 70.899226795882, -37.1668796427548, -18.3562777005136, -32.0774580258876, 23.9437147742137, 36.7630649032071, -73.1867971597239, -33.9720668946393, -10.884530399926, 20.5976513237692, -99.4155421620235, 40.5339458654635, 34.1683598700911, -41.0979676502757, 137.43078620173, -43.2360631786287, -6.54158131219447, -27.0834785629995, 75.3477992489934, 0.457763539161533, -143.825035030022, 38.3749886602163, -98.7092918716371, -32.5438477587886, 41.9899489497766, -29.7724881512113, -100.50645461306, 9.59646401228383, -43.5831674141809, 28.4289024118334, -94.0720578655601, 30.9043086692691, 119.296848401427, 25.926947419066, -78.6648432025686, -43.2881220895797, 145.483045396395, 17.8278747224249, -143.600778956898, 28.3583803893998, -117.738310410641, 6.15600548917428, -76.5519514912739, -1.79434054065496, 66.7087216861546, -30.4766894085333, -140.179517003708, -36.9669391284697, 14.8049982497469, -30.358586255461, 54.8426044173539, -42.5944394711405, -59.9277308676392, 18.8666528044268, -33.3676618058234, 23.4916359325871, 69.7338455356658, 32.1727301529609, 138.748925481923, -5.6460762117058, 78.1788510968909, -7.45771978748962, 24.5019486173987, 7.70845413208008, -11.138102109544, 29.2470822762698, -43.1071138707921, 26.3825304689817, -34.8234552657232, -15.6462686927989, -88.2656903704628, 41.2138749123551, -108.365888753906, 13.7717648665421, -33.2753569353372, -3.71917627053335, -70.1861331705004, 9.54196950886399, 60.9888846287504, -19.0150256920606, -27.6365500409156, 19.6047759382054, -70.330741815269, 38.1057698302902, -29.7066434752196, 15.7445537787862, -90.8060604007915, -28.2654006918892, 98.9795062690973, -13.673955348786, 8.39631913695484, -33.8832421903498, -31.4609429100528, -35.2695403061807, 22.1899698022753, -18.1842120154761, 141.291620931588, 30.4379910835996, 45.0449484866112, 44.3493984569795, -49.9196523567662, -5.65790316089988, -50.5657714325935, -26.7691886471584, 132.730464823544, 41.7484350618906, -35.6662516482174, 14.4348400854506, 19.2971768556163, -18.1694853468798, 3.45470658503473, -34.2664032708853, -108.341833460145, 8.97764411056414, -78.0631760833785, -34.1828502388671, 65.4027294600382, 25.9588814643212, -60.8328666770831], + "coords_shape": [200, 2], + "metric": "haversine", + "cutoff_km": 1000, + "kernel": "bartlett", + "vcov": [0.00130003681362414, -8.5938573296987e-05, 6.67775354920867e-05, -8.5938573296987e-05, 0.00127144478273302, -0.00023808426199593, 6.67775354920867e-05, -0.00023808426199593, 0.00101954442298434], + "vcov_shape": [3, 3], + "n": 200, + "k": 3 + }, + "lat_lon_realistic": { + "x": [1, -0.733376702142813, -2.12348627958831, 1, 1.37988020623228, 0.139785992168409, 1, -0.289939013122012, -2.28990606378477, 1, -1.03806120172436, 1.83529978690293, 1, 0.758779012231026, 0.226144143437339, 1, -0.15289100803927, 0.414103213350561, 1, -1.4475333419132, 0.409847144507884, 1, -0.72225027795233, -0.565093415661093, 1, 1.35904380222457, 1.4238105131244, 1, 0.0447094034057783, 0.30135936411516, 1, -1.25926490454147, 0.248466850230397, 1, -2.91095491466757, -0.616864004406248, 1, -0.373844474783292, 0.625243531326506, 1, 1.35644356266103, -0.00675889342036511, 1, -0.695309719136975, -0.979995552950745, 1, 0.650459955732345, -0.600859968388826, 1, -0.500984937344448, 0.359569399495063, 1, -1.43389072151158, 0.661603911432692, 1, -0.199462136838304, 0.77349044513175, 1, 1.2991275911859, -1.58582103329405, 1, -0.466359320509659, -1.7614004099096, 1, 0.0469387189982476, 0.0971343928937522, 1, -0.515531068758281, 0.103171674383164, 1, 0.183118071908697, -0.609781795227423, 1, -0.801160813180672, -1.01912613255821, 1, 0.400251011027922, 0.813783824167568, 1, 0.833019406459542, -0.646078347803931, 1, 2.01191154478013, -0.63096864897505, 1, 0.937653380236549, -0.733715479388184, 1, -0.706026150906619, 0.129378424202141, 1, -0.992325260291196, -0.727724495295198, 1, -0.405887673238853, -0.477480231254182, 1, -0.120107686717427, 2.31757693406101, 1, -0.734522230574548, 0.110431684877986, 1, -0.614696059179813, -0.895341651132664, 1, -1.10427914823073, 1.57391066543, 1, -1.1579078540189, 1.16729339593934, 1, -0.179500257112684, -0.327341481265006, 1, 0.898496374329244, -0.694576241595498, 1, -0.954543960373389, -1.36675225416857, 1, -0.246153300911598, -1.01498511887914, 1, 0.261723012813008, 0.27482631795456, 1, -0.19226276666191, -1.14660267044787, 1, 0.036378592964483, 0.551647906127169, 1, 0.640612545002271, -2.40990669851604, 1, 1.26623088941415, 1.23720846183343, 1, 0.246044005379975, -0.273691557132503, 1, -0.619136619697945, 0.187278494871171, 1, 0.00362122095447357, -0.207960363337257, 1, -0.311077709267196, 0.453892760410138, 1, 0.186080932047629, 2.39978151417216, 1, 0.368868988065764, -0.193285231677879, 1, 1.45255789924073, 1.06522428183559, 1, 0.327975545524117, -0.548627683557319, 1, -2.17087517866026, -0.520396211770451, 1, 1.99907810207623, 1.02470981171527, 1, 0.176184749279201, -0.222113484139704, 1, 0.218419751300436, -1.6317906283885, 1, 0.892976908071762, -1.69376778333846, 1, -0.301717947260058, -0.993564093878735, 1, 0.21905398656254, -0.560203032177327, 1, 0.976821764143017, -2.20701760402861, 1, -0.193479902238184, -2.57982625391472, 1, 1.98117023833912, 0.824945451955705, 1, -0.261104756682526, 0.570239068158389, 1, -0.485017417926701, 0.872625887347424, 1, -0.147931076009545, 0.957498647297589, 1, -0.29675797654885, 0.649764322013681, 1, 0.169343195074441, 1.59201765164462, 1, -0.186759210316642, -1.08495415147964, 1, 0.553764577233738, 0.459887107014202, 1, 0.477656238046979, 0.680539617045083, 1, -0.460411277655129, -1.24837828141169, 1, 0.791899866955657, -0.326242212327443, 1, -0.947905350972599, 1.28889292843489, 1, -0.131234877140873, -1.3996087962919, 1, -1.29979656637077, -0.883702654284218, 1, 1.07209813059406, -0.726026340930164, 1, 1.45272326087079, -1.75882214612022, 1, -0.496817996337956, -1.6354046000069, 1, 0.586287609919257, 0.432793801172368, 1, -0.793065641122438, 0.523253486739063, 1, -2.50879749081559, 0.203615381245329, 1, -0.426224506291168, 0.815629154229024, 1, 1.87026821525621, 0.16401113096249, 1, 0.256052649052938, 0.877191056774385, 1, -3.27601061925914, 0.362809998470996, 1, 0.474941530027002, 0.0460636210246073, 1, -1.52271383173288, -1.94837165859859, 1, -0.84826513903859, 0.203577770938633, 1, -0.540871484048952, 0.616841869391252, 1, 0.281079516775485, 0.169849977985851, 1, -0.105418888994045, -0.278131191663574, 1, -1.42346465983364, 0.338170727268437, 1, 0.715044735887013, -0.780417574894966, 1, 0.762524084659217, -1.09664045692704, 1, 0.0603969095395259, -0.441860228237729, 1, -0.106764543510285, 0.0375096128058296, 1, 1.11460128240473, 1.11972231472626, 1, 0.651603095362425, 0.191749429453995, 1, 0.991064391774657, -0.341107851572647, 1, -2.44425041250526, 0.595359324256391, 1, 2.18783374927541, -0.0202393078534392, 1, 0.0844618957707153, 1.89490885862973, 1, -1.37622922730019, 0.268256769936968, 1, -0.979458672041826, -2.21369645677442, 1, -0.345677484946142, 1.45124170595474, 1, -0.234246609097858, -1.70055143616487, 1, 0.168094643089989, -0.447567674518987, 1, -1.38773730231241, -0.0570925517837653, 1, -0.872212543583847, 0.377670353550842, 1, 0.320315384864876, -1.64311632982676, 1, -0.418710761805079, -0.779444522927664, 1, -0.298781929974878, -0.384686529555048, 1, 2.677118645489, 0.854221294206251, 1, -1.14180019499395, 0.680462986055326, 1, 0.628588361467659, 0.277307708168109, 1, 0.653251172359034, 0.689052475390437, 1, -0.875943739934686, -1.0145196129979, 1, -1.40319041862628, -0.579813910438061, 1, -0.723262203241788, -0.449778934169742, 1, -0.358215335057155, 0.301975904418837, 1, -0.0329184343084757, 0.0603797407044058, 1, 1.48195657344234, 1.64358815664189, 1, -0.711504406120115, -0.766473904743387, 1, -0.0845148663071147, -1.73357248465506, 1, 0.501682592789338, 0.415989065440458, 1, 0.325243864099515, 0.840403672499239, 1, -0.656743671768075, -0.839510482414583, 1, 0.294529571435667, 1.95307180940404, 1, -0.626636524632862, 0.97033848124543, 1, -0.625271980296058, 0.586124218300113, 1, -1.03046414346757, 1.20141906178965, 1, -0.210585968560477, 0.318961360284737, 1, 0.524479995587623, -0.904265707460295, 1, 0.935430127036921, -1.24921054436893, 1, 0.452279631244902, 0.445174794896414, 1, 0.777777204021201, -1.05781943986177, 1, 1.13199596310118, 2.179464788614, 1, 1.06597492231859, -1.40080815818865, 1, -1.3266285566927, -0.768051893024269, 1, 0.0629407363557197, 0.381158436578215, 1, -0.766252640199464, -1.58156579088047, 1, -1.06499180629693, 0.038347104800311, 1, -1.2389723822388, -0.640151152417062, 1, -2.45290202213416, 0.496864302118851, 1, -1.94100565851645, -1.35016639875214, 1, 0.369655102491409, 0.939568700362317, 1, 1.01542180757722, 0.161396937972746, 1, 1.03400582255296, 0.393490918794896, 1, 1.84628102336633, 1.53268987586945, 1, -0.184616409066517, -0.164853298820664, 1, 0.909024809605687, -0.795314262316524, 1, 0.786963126893264, -0.296873066601464, 1, 0.258103527003418, 2.19410410085225, 1, 1.20539127592218, 1.64877891705243, 1, 0.425986801320121, -0.228414578477784, 1, 0.131901140066895, 1.84122010420894, 1, 0.0105777975326613, -0.165878003027989, 1, -0.196733679103945, 0.924601123226953, 1, -0.0637451908025361, 0.903134324384731, 1, 0.267648836495588, -0.033018439873604, 1, 0.059165168305436, -0.94132315261496, 1, -0.726591968194937, 0.936172232275835, 1, -1.06735209625127, 1.00654073974205, 1, -0.617851188156066, 0.101563301055902, 1, -0.455213538104717, 1.23487233532098, 1, -1.70883208454246, -0.562190508415666, 1, -0.663186901659033, -0.64716060148408, 1, 0.47398075686908, -0.442622445642377, 1, 0.256190329122125, 0.608862048829696, 1, -0.754630008741782, 0.459418210761745, 1, 0.134663432139535, 0.115125123906067, 1, -1.32642389174812, -0.232933264663774, 1, 0.776852509360032, -0.189797148703144, 1, 1.14530622544854, 0.0405197570402372, 1, 1.50949565154891, -0.628442365404972, 1, 0.409610608942898, 0.530612759476524, 1, -0.219397666724886, -1.45983538073097, 1, -0.0878309427510092, 1.16083791764505, 1, -1.16811218564098, -0.326546915630821, 1, 0.102228164234088, 0.90354852724444, 1, 0.727880488926864, 0.318670617697535, 1, -0.861563294903555, -2.19113523572011, 1, 0.456663543265053, -1.26574224821958, 1, -0.150486702886032, 0.662935575763581, 1, 0.731461522441988, 0.378104847564821, 1, 0.922192294503685, -0.288349967850497, 1, 1.04302401286353, 0.0860383149396887, 1, 1.48879801162154, -1.48319480662699, 1, -0.588144314290122, -0.671697922126023, 1, -0.953065032787028, 0.241895146135386, 1, -0.371817394961664, -0.0938970870310242, 1, -0.465660402058176, 1.13760355671899, 1, -0.928418601034627, 1.19878815518647, 1, -0.254213331374227, 0.717376361046906, 1, 0.0377172461346727, -0.0809145895958541, 1, 1.36569371911674, -0.975655983013419, 1, 1.83290499025341, -1.09687866925651, 1, -1.19702008512819, 0.620675237607978, 1, 1.59292079107052, -0.574532247719585, 1, 0.375021558730439, 0.2938128809606, 1, -0.219463688885552, 1.00439949315915, 1, 0.807848614565743, -2.19784472350256, 1, 0.285391625194915, -0.627844960212646, 1, 1.40997706507732, 0.0663462862209504, 1, 0.674002584125026, -0.482532025688842, 1, -0.24051397783959, 0.828011487455045, 1, -0.841106912273094, -1.17907285607289, 1, -1.38836431859617, -0.455918739039317, 1, -1.60956303373709, 0.692655725007508, 1, -1.04051507620661, 0.176758242399776, 1, -0.782548910207101, 0.399248869776833, 1, -0.397748458875547, -1.80197300323937, 1, -1.95817214101027, -1.53617568513346, 1, 0.0486415358977611, -1.99021823236597, 1, 1.41913764719449, 0.163968609282152, 1, 1.41384743042274, -0.71098891688657, 1, -0.773406887885188, -0.918755152577735, 1, 1.40035675312206, 0.701557445577466, 1, -1.97419154963776, 1.47150566180012, 1, -1.01304506829673, -0.161328394048824, 1, -0.467097266801904, 0.59311938496812, 1, 0.180652451972821, -1.62596026086513, 1, 0.631638419264848, -1.30898092707779, 1, 1.18464199407707, -0.941254025500084, 1, -0.563014427745511, -0.356159641187012, 1, -0.325244127289161, -1.07897529911065, 1, -0.597020536078357, 0.539587095250418, 1, -0.100432324635984, 0.666386551776077, 1, 0.436939061514188, -0.669818106527255, 1, 0.119092911649812, 0.570089108841664, 1, -0.329648363266111, -0.854320255706155, 1, -1.41666732965303, 0.230934515938907, 1, 0.465335740333613, 0.331220708260556, 1, -1.49389703540507, 0.994771793272954, 1, 0.764009014228285, -1.00748234536977, 1, 1.17659009654081, -1.28992566774817, 1, 0.821704829517809, 1.42931271036671, 1, -0.544933172396987, 0.341036889780739, 1, -0.537194017323054, 0.206413628008811, 1, -1.21039817933968, -0.837219816661881, 1, -0.245135916416275, -1.15404267619361, 1, -0.340882172676212, 0.0955835291905908, 1, -0.306914510761417, 0.705592764564526, 1, 0.0643556016504762, 0.313806767421511, 1, -0.140846950488324, 0.212363164082233, 1, -1.26085446856138, 1.1878286360604, 1, -0.448698348258841, -0.352830702347139, 1, -0.219500658557331, -1.49585368803079, 1, 0.611677704999846, 0.22164682321194, 1, -0.0317076355180896, -0.0629594305548813, 1, -1.4159767453801, 0.432671610448451, 1, 0.686192891318254, 0.992539016575678, 1, 0.110523080335676, -1.48265226384703, 1, 0.624691556917521, -0.494736757707694, 1, -1.1430782295994, 0.102616493346277, 1, -0.126749671096103, 0.0894381563244289, 1, -1.01234354223579, 0.0374677352942311, 1, -0.156229300570127, 0.0807932928043504, 1, 2.12413625859939, 0.642849543376085, 1, -1.01237145168955, 0.408745338178795, 1, -0.978395396069584, -0.00194770136574662, 1, 0.0664297349010324, -0.374224093041413, 1, 0.138291850229392, -0.583885253886584, 1, 1.27019996604254, -0.0647631139596216, 1, 0.701718109365079, -0.384203317118735, 1, -0.720203159917692, -0.447477629016968, 1, -0.0822854997704102, -0.739720204055679, 1, -0.333332229314151, -0.0719844643261962, 1, -0.706169147899963, 1.90281225797165, 1, -1.45915448572243, -1.26976846569193, 1, 0.0727755491314043, 0.667365454330813, 1, -0.0808344680605234, 0.184781391292529, 1, -0.863709467100639, 1.47736659069375, 1, -0.586672480248321, -0.0068336072473826, 1, 0.44243789139275, -2.11481668211358, 1, -0.828467529053971, -0.75660973747225, 1, 0.470806885141186, 0.14405647506138, 1, 1.03928995001502, -0.658563537396914, 1, -0.0973604235622534, -2.89615922400029, 1, 1.94036940516588, -0.925782658089194, 1, 0.982746031050345, -1.02993969735053, 1, -2.53148692878878, -1.63117689326385, 1, -0.102969358321135, 2.12002622975576, 1, 1.27745374673127, 2.37331827627445, 1, 0.360740727359261, 1.11517803781759, 1, -0.439916374232608, -0.957559318958798, 1, -0.925990432415762, 1.10951680615855, 1, 0.652465196470606, 0.374633170657726, 1, -2.1573559945207, 1.58680476457387, 1, 0.825919867314306, 1.06960864831212, 1, 0.0934330860423216, -0.936840151186869, 1, -0.666449222040364, -0.355197282702138, 1, 0.56358273899803, -1.37246345516621, 1, -0.640488047370993, 0.458572047704437, 1, -0.553550622468521, 3.11191789875175, 1, 0.473947856180022, -1.18221453930136, 1, 0.68883716116012, 0.341431443608765, 1, -0.00722162310755496, 0.208758046562473], + "x_shape": [300, 3], + "y": [-3.80566941964591, 1.54908176449145, -3.1278028285857, 3.79048885299884, 1.74823915706338, 1.65246067230496, 1.27960931403029, -0.56592851663509, 5.00497197007389, 1.51653697210384, 1.21803634143176, -1.8854071302669, 1.95549489810405, 2.03225247676587, -0.816623437172049, -0.119665946265096, 1.77063920632686, 1.6659337084333, 2.38792191813225, -1.50363920001903, -2.55319715043868, 2.24465136601157, 0.897196517072314, 0.760706086004292, -1.79576177261499, 3.33893992427208, -0.952432205892787, 0.673431093536432, 0.464721808339846, 0.327583033720535, -0.658948654610542, -0.0284428048197385, 6.12352854234499, 1.38541414392379, -1.26609520173666, 3.20975211965673, 3.48156016663398, -0.0878074075626057, -0.420000146244977, -2.38507405371416, -1.22199960664539, 2.24222584961093, -1.00746149647909, 1.65638956887161, -3.1472808453951, 4.4702422334243, 0.914334504942389, 0.962050692404464, 0.312466251083892, 1.19380935498264, 5.9186539604933, 0.615461193894697, 3.19792902992371, 0.603857745696335, -0.699003165575888, 3.83943042725527, 0.771312277213186, -2.74689145160771, -2.68344119151899, -1.11657369273573, 0.708978500107075, -2.48170443350493, -4.92018054480692, 2.9077121824938, 1.85964369689357, 2.38217506543879, 2.76447092296257, 2.61253774845637, 4.03495836757325, -1.50715506023402, 1.78765985247861, 2.32058094667293, -1.41019258329476, 1.41842189808207, 3.63286587306791, -1.89578510712116, -1.96164134973559, -0.560867204059109, -2.25125980076631, -3.23660536144353, 1.96117116038708, 1.39952492252274, -0.0390178342491284, 2.2589409739776, 2.11971554976973, 3.01426873396185, 0.801771892347791, 1.47134288655087, -3.25967206481895, 0.72495364417385, 2.4808425907754, 1.43995363008351, 0.275629604396494, 1.63634418835864, -1.03134929344709, -0.493486276439744, -0.134934017555351, 0.354435262194949, 4.61298019325046, 1.04807595609928, -0.0138868057253823, 0.303166237660369, 2.3843092052252, 5.24667451949834, 0.462211287333623, -4.38283905125772, 3.80332798552168, -2.34034031846736, -0.274796754933312, 0.490038176036628, 1.17262728401497, -2.69757760017061, -0.733130705442101, -0.213475074091586, 3.30019221586162, 1.74418306998403, 1.33141673318315, 3.12103046572065, -1.51340084651934, 0.144461352109365, -0.706272786217255, 1.27298829507172, 1.3993791777592, 5.20165415968215, -0.595290971415798, -2.49180381295223, 2.69744510947268, 2.38713955348104, -0.562298621010308, 4.82408339131965, 1.9154787244646, 0.980186112707269, 3.16722662124447, 1.85762053600634, -1.03476381427997, -0.763327157893114, 1.35830375233652, -0.24818346458318, 5.9653426246782, -1.32414078419191, -1.32482351394368, 1.86292887347662, -2.06362112081128, 1.50226459514104, -1.7242845750087, 1.74954190824079, -2.34255520112018, 3.12825064302851, 1.6428616499954, 1.97489567245407, 3.96734162511977, 0.0357464833850099, -0.154001265201229, 1.054873778512, 5.84592745562272, 6.11197826761049, 0.745813787729375, 4.66113996901086, 0.34896262611051, 3.02865729157064, 2.63640007685476, 0.994401197891844, -0.965496166051436, 2.52728355076406, 1.98929695162357, 0.353264248367459, 2.23738723475045, -0.725056148467462, -0.850849364856482, 0.40225533275581, 2.45859333962349, 0.46557211527261, 1.62887401629234, 0.0277532072691723, 1.16000390524769, 2.00831357607483, -0.530972124324022, 1.28076535928944, -1.66960054354244, 2.53466298151428, 0.322504481629986, 3.28538741278628, 2.04746556289049, -3.66375926354049, -1.204952539235, 2.74231068758653, 1.86924786761776, 0.854154450323026, 1.86259736706521, -1.47106793360864, -0.393734316683511, 0.668806210355772, 0.675852343374485, 3.00071531021607, 3.50394529863021, 1.49412665843846, 1.29734481181329, -0.485370188302451, -0.959357972238169, 1.85564321892084, 1.2594540947968, 1.33734739535483, 2.1457039552267, -2.60911081150499, 0.17824723625664, 2.61185052702884, 0.237258879746757, 2.46668252606793, -1.76677801355537, -0.115037360330894, 1.41298226082516, 1.35210060458591, 1.31329630906411, -3.32306606946237, -3.23637416733209, -2.87988024919408, 2.13547466657754, 0.705142164572028, -2.00008296179729, 3.39941635140683, 4.37108004015502, 1.31941246112636, 1.03709003582194, -1.69938927929919, 0.110697826783455, -0.421864619325722, 0.341604050617123, -1.49089198668935, 2.41421609741128, 1.71834833273861, -0.358375661608223, 1.72784694796551, -0.60505413565153, 1.05583374375399, 2.28170088845985, 3.76898088532232, -0.556510960073391, -1.27476782489023, 4.06630117410057, 2.49464755540962, 0.576345406485293, -2.13243143004041, -2.10734364625304, 0.705410407876752, 2.46810690770091, 2.22209178852358, 0.881815036179949, 2.25197399212005, -0.445303641438292, -2.68317595987241, 1.46055982281047, 1.25880682489739, 1.20737023077546, 4.23815250303249, -1.64540568505356, -0.0623185562492917, 1.70699009123048, 0.837145252993992, -0.0327376859213911, 0.73989994921216, 4.70490997620916, 1.52191777348483, -0.0484052136287189, 0.0873771436334147, -0.676961371596496, 1.69969926086576, 0.155276981835386, 0.550765903798953, -0.618343208907558, 0.769589106605676, 4.55990110476863, -2.46581003708921, 2.7825276782881, 1.219928195271, 3.1743764700966, 0.489220643530654, -2.56702525157311, -1.53845968034167, 0.44111112906278, 0.845034687656059, -4.76006093747021, 0.201952315151719, -0.984746980156481, -3.68946119562185, 4.44360854392202, 6.39581932142748, 2.89416498626478, -1.28363466805926, 2.92099264651389, 2.14827196021727, 2.44887687339244, 4.37058999545495, -1.58430796450571, 0.60525762036133, -1.55128278651374, 1.52725504793008, 6.82182645000788, -0.55412019363217, 1.13798263409546, 0.455416612386988], + "coords": [27.4707987846341, -112.097537950613, 31.78694540984, -79.3000180041417, 44.163205276709, -113.434817390516, 30.6160606793128, -123.437046436593, 30.0610885256901, -94.0693336678669, 32.5814611103851, -90.027361693792, 31.0204281762708, -116.804101457819, 34.2780609033071, -107.19355346635, 38.7656996259466, -113.537867050618, 43.6763277975842, -75.0515514332801, 34.2208665621001, -78.7827574461699, 27.8930393513292, -98.2080221036449, 31.0593215457629, -84.8737596347928, 30.3485997603275, -116.463817870244, 37.6263862999622, -70.2987830061466, 34.7806577803567, -92.1567228529602, 43.0516442283988, -93.4777944441885, 37.9970584588591, -74.3568859109655, 25.9630828746594, -80.1026646094397, 33.4307062672451, -103.826644704677, 40.5458139663097, -78.7522445246577, 36.1695896193851, -67.9350565839559, 38.7521583295893, -71.9592513516545, 31.163638655562, -116.121457903646, 25.2586018119473, -87.6491499878466, 39.8962305160239, -83.1143720215186, 33.2770436769351, -121.255144951865, 25.8682190673426, -81.649994472973, 30.0204496947117, -96.1460592132062, 39.5914805470966, -92.5481395982206, 31.1234987981152, -90.6055248528719, 40.0097910838667, -114.238865450025, 25.4482048912905, -117.56826729048, 36.6655631631147, -85.2855930896476, 35.982232313836, -71.3408520119265, 27.7263315976597, -122.72550937254, 48.986199562205, -74.4762556813657, 47.9265625588596, -122.61523227673, 39.2682837729808, -92.4377418868244, 39.5562157325912, -77.7539171231911, 34.5264423754998, -97.9177146777511, 44.4645147072151, -107.138367965817, 35.9329059778247, -85.6361064966768, 46.5867533988785, -65.7786230230704, 41.8954971071798, -107.311327350326, 45.6820789491758, -102.960722204298, 40.1540609076619, -96.6992997331545, 31.9105114496779, -66.4000432286412, 26.4903231000062, -111.630540927872, 31.0115770611446, -65.5354247009382, 44.0549874736462, -83.1921148998663, 40.9433301305398, -65.0058361375704, 39.2907186120283, -101.259493767284, 43.7931650318205, -111.379119833, 31.6285617940594, -100.083948303945, 35.058972297702, -102.70349485334, 39.7602688462939, -77.5795071804896, 47.196706145769, -121.147136851214, 34.6343003620859, -98.8225086173043, 29.9124067358207, -73.122448050417, 35.0738739245571, -88.0069580394775, 43.1170818919782, -83.3890740480274, 29.954110365361, -116.866486873478, 34.2269004962873, -95.725186993368, 27.2848542779684, -124.433894348331, 34.2802436614875, -79.1628553252667, 37.6229547895491, -106.455742279068, 29.0278172702529, -87.1832156646997, 27.5587124342564, -112.511252723634, 34.597504202975, -71.5202422300354, 25.4772420797963, -122.97833295539, 40.6927352421917, -94.6583102922887, 49.2804933863226, -94.6133207343519, 38.6499294138048, -99.9453172273934, 45.3195638023317, -93.7002620426938, 40.2108846639749, -123.559088190086, 40.5804936424829, -84.6682383585721, 48.331017390592, -72.4555963510647, 25.2730290638283, -121.396897574887, 25.7271646521986, -110.200698482804, 34.6628600498661, -81.2712460244074, 36.1497240664903, -81.2364272773266, 47.8290642029606, -104.996055285446, 31.7975496524014, -66.7185138212517, 37.2112202981953, -97.77383252047, 31.4754185907077, -122.423586025834, 48.8797316327691, -119.344052225351, 42.791276978096, -65.8809051662683, 31.5055446058977, -68.9724027365446, 25.0383555307053, -111.420584744774, 40.8551363681909, -114.091766737401, 34.1141689918004, -114.699426470324, 38.4234336728696, -83.8329386385158, 38.2432106940541, -104.544960088097, 47.130318696145, -98.8189331628382, 47.7270197414327, -90.5165392579511, 42.366586177377, -79.1417098231614, 33.606773178326, -81.736669219099, 35.8811602520291, -89.7287655714899, 30.0783942511771, -73.1545638712123, 32.7082962496206, -115.506533253938, 38.8667827355675, -115.3876818344, 42.1497523260769, -120.815325342119, 29.5411136932671, -86.246180832386, 43.7223906046711, -109.820866407827, 41.6460823791567, -117.819532649592, 43.6272781633306, -107.468041055836, 27.9264097101986, -69.4817933440208, 40.0765914877411, -98.5776065196842, 44.9947309971321, -108.837687144987, 26.8686834548134, -89.369457452558, 30.0844381796196, -102.690940815955, 34.101767122047, -111.735342349857, 40.8366730320267, -111.867539198138, 47.4467967113014, -85.5896565876901, 47.984753845958, -89.734470336698, 42.7817778370809, -82.8673390764743, 45.1494106033351, -65.5507979216054, 33.1918284238782, -77.8174640750512, 35.7225922227371, -83.5804084222764, 37.2999615850858, -78.1656700978056, 45.2965195581783, -105.774216996506, 47.0481046184432, -95.5315727135167, 43.1131221470423, -114.765558247454, 37.6976456434932, -102.839771397412, 42.2032830945682, -100.849844035693, 41.9812431850005, -116.136099328287, 38.8792202458717, -120.6404693285, 30.9929617040325, -120.30020003207, 35.1912364945747, -118.26551543083, 39.1413289005868, -110.828843670897, 27.1620237501338, -118.537424076349, 45.175522583304, -97.980769709684, 33.0882331356406, -71.2568906275555, 44.2119730403647, -97.0859011868015, 32.5019138748758, -83.8784661330283, 48.9147514861543, -75.3835704643279, 46.6463613323867, -116.23875762336, 39.4357519806363, -81.7707997141406, 47.7098977251444, -121.481722882017, 30.1393425266724, -70.7505237776786, 27.2565706865862, -110.387556962669, 47.8032924584113, -89.8435984831303, 49.7858557617292, -97.9788100905716, 25.2393307921011, -97.8015420585871, 49.9443241395056, -75.1063386676833, 28.148558747489, -113.349108686671, 46.3306715129875, -98.893117448315, 27.3672870825976, -84.5985232200474, 44.7686270985287, -90.3575590020046, 48.3744598459452, -100.59938818682, 29.0597666229587, -78.3289548382163, 47.5315032934304, -92.9084983328357, 35.6214061612263, -115.381180411205, 38.5052453435492, -81.8572396691889, 43.1281577970367, -67.6236602477729, 38.4820823324844, -96.9934390578419, 31.2062856159173, -102.573190974072, 35.6263919442426, -95.9744152007625, 48.3460695657413, -70.9714778419584, 45.1089485490229, -99.2041874490678, 44.8567858547904, -115.357747180387, 25.8317412459292, -100.569438557141, 42.2695453569759, -79.5259166928008, 29.981039755512, -71.8767891824245, 34.2447574017569, -80.660091903992, 26.5475198801141, -100.183350802399, 26.7553771845996, -100.333937895484, 36.7422755283769, -120.56613383349, 48.9611355529632, -92.8470437694341, 30.9813339496031, -101.145302155055, 27.7330083539709, -67.8074048366398, 42.871752823703, -112.857448277064, 47.2783481935039, -77.2596587566659, 40.435338113457, -108.976760171354, 35.1624992035795, -108.957947324961, 37.8963907656725, -116.705657024868, 25.5605240876321, -98.7051994819194, 47.6823320845142, -89.3285010010004, 47.5217255123425, -77.6172855356708, 28.3890502643771, -66.8311153305694, 37.0943559915759, -124.230765406974, 39.853042649338, -119.683177755214, 43.734657112509, -107.394110965542, 41.3956094533205, -92.3185892635956, 49.8278201615904, -96.2602159986272, 49.4748610828537, -95.015565068461, 36.2480592913926, -118.37683216203, 48.9176684699487, -111.118808751926, 42.7095484512392, -65.9853237308562, 36.6687697009183, -110.479761306196, 38.7557265232317, -110.66957521718, 46.6747526079416, -124.238504669629, 49.3039369059261, -109.643087442964, 39.495692710625, -81.3116361666471, 37.8163744229823, -107.539578266442, 36.6370194999035, -85.7349885208532, 30.009355104994, -76.9494638964534, 47.4314975610469, -79.2124570673332, 25.868980656378, -76.915792869404, 36.0717893345281, -106.899843378924, 31.9807285093702, -69.5196529198438, 48.1687542865984, -115.090548032895, 43.2847869466059, -107.253166483715, 31.0607317194808, -74.923128564842, 43.7445345101878, -95.6120445020497, 28.3997825870756, -69.201692296192, 34.1939318634104, -67.1949854539707, 39.1521758632734, -120.206458321773, 32.1689292613883, -115.24017425254, 35.7936758431606, -105.979859712534, 48.0434910801705, -108.121977625415, 30.6678467721213, -114.414031454362, 36.3598890660796, -97.4030106840655, 25.4302484565414, -76.1766964849085, 31.2257868179586, -69.4824297819287, 35.7728370698169, -124.327071029693, 40.5724485870451, -102.031641574576, 42.8384863247629, -111.356437928043, 49.5520429103635, -97.4202415300533, 40.2200450422242, -89.7150627244264, 45.6521838612389, -120.834473939613, 26.3565805798862, -124.56268586684, 26.0861994116567, -120.142481504008, 37.483436363982, -86.6459822189063, 31.0433944920078, -90.2461129659787, 37.9076956480276, -87.3448572587222, 42.6039568264969, -84.0855385921896, 41.9519951101393, -90.8793100109324, 44.1796136030462, -81.2579075666144, 32.1813950547948, -103.662921870127, 40.0951536372304, -91.0679896688089, 26.4561033807695, -72.816146207042, 47.6489182503428, -104.106088946573, 31.9020505878143, -70.8190841972828, 40.1415528904181, -68.636861294508, 44.6493807015941, -102.747432691976, 44.4551234191749, -113.086186069995, 39.1811771842185, -115.473852800205, 46.4012167183682, -114.625875283964, 35.6004432600457, -95.6936051137745, 45.6347945786547, -67.0380280306563, 34.8306658503134, -96.7439729021862, 26.2414863915183, -70.5461551249027, 40.8951656019781, -115.516242445447, 44.6178857469931, -88.9749061269686, 45.9271430037916, -118.771154466085, 36.9535762700252, -65.4372019832954, 30.7945482782088, -89.7463751398027, 44.7258095431607, -100.242252168246, 47.6978393446188, -118.56607249938, 28.0653597379569, -119.685736037791, 28.1353681290057, -79.1321756737307, 33.6648839467671, -122.053310032934, 43.6736815434415, -71.2178000016138, 29.3359048780985, -112.294620228931, 40.1003922568634, -87.0067395409569, 31.457986793248, -84.3165822606534, 49.2446586547885, -106.612914097495, 35.9065608761739, -73.5474638640881, 44.1398380615283, -124.082810287364, 25.4708945460152, -84.924895837903, 41.8871806468815, -106.096405591816, 34.9150853871834, -73.2601794553921, 35.1797055220231, -83.2305818703026, 33.4463573875837, -66.7318040132523, 40.4766840103548, -76.9164558220655, 37.4098320258781, -91.6692243795842, 29.476062831236, -84.0955441258848, 41.1577843653504, -102.717892099172, 46.6312753385864, -88.1103074923158, 44.0961233864073, -119.438956254162, 47.6377815706655, -70.2221432700753, 27.5785221718252, -121.396933379583, 36.0376698023174, -97.6174479862675, 36.131562088849, -85.6437181448564, 41.888503881637, -84.3965621711686, 46.9924679840915, -118.53335971944, 45.149169646902, -100.304167163558, 33.4233808913268, -68.9160460047424, 38.0395922169555, -86.5758460015059, 32.1553702931851, -103.949608537368, 43.5587515472434, -83.3382963947952, 33.1261120678391, -69.1725475387648, 25.7592206529807, -123.864497346804, 48.475068929838, -83.8670489052311, 49.7120504907798, -100.888913748786, 28.4777149790898, -93.5360603639856, 28.6474500666372, -112.721066763625, 49.5247484592255, -104.175616260618, 41.0594402113929, -121.042377799749, 43.6778329254594, -79.4508780818433, 31.4667831582483, -96.2968209711835, 36.7204245936591, -109.251502500847, 37.1144337870646, -100.049575627781, 32.1325673838146, -68.5339817451313, 46.5417650761083, -121.027649720199, 44.7404646489304, -115.534672294743, 29.3185923947021, -122.251707599498, 39.7284219856374, -89.3906695209444], + "coords_shape": [300, 2], + "metric": "haversine", + "cutoff_km": 200, + "kernel": "bartlett", + "vcov": [0.000896574506884131, 9.97084980891442e-05, 0.000170880493698137, 9.97084980891442e-05, 0.0011877992411616, -3.64338758353445e-05, 0.000170880493698137, -3.64338758353445e-05, 0.000756678260461067], + "vcov_shape": [3, 3], + "n": 300, + "k": 3 + } +} diff --git a/diff_diff/conley.py b/diff_diff/conley.py new file mode 100644 index 00000000..02c069e5 --- /dev/null +++ b/diff_diff/conley.py @@ -0,0 +1,271 @@ +"""Conley (1999) spatial HAC helpers for diff-diff. + +This module contains the geographic-distance and kernel helpers that +implement the Conley (1999) spatial heteroskedasticity-and-autocorrelation- +consistent variance estimator: + + Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1} + +The public dispatch (``compute_robust_vcov`` in :mod:`diff_diff.linalg`) +imports :func:`_validate_conley_kwargs` and :func:`_compute_conley_vcov` and +calls them from the ``vcov_type="conley"`` branch. Tests exercise the +inner helpers directly. + +Earth radius constant is 6371.01 km (mean radius), matching R +``conleyreg::haversine_dist`` (Düsterhöft 2021, CRAN v0.1.9). See +``benchmarks/R/README.md`` for the cross-language parity convention. + +Phase 2 will add the time-dimension extension (Driscoll-Kraay product +kernel) and a sparse k-d-tree fast path; both will live in this module. +""" + +from __future__ import annotations + +import warnings +from typing import Optional + +import numpy as np + +# Earth's mean radius (km), matching R conleyreg's haversine convention +# (Düsterhöft 2021, conleyreg::haversine_dist in src/distance_functions.cpp, +# CRAN v0.1.9). WGS-84 equatorial radius is 6378.137 km; the 0.01 km delta +# vs 6371.0 is methodologically negligible (Earth mean radius is approximate +# at many more digits) but matters for the 1e-6 cross-language parity bound. +_CONLEY_EARTH_RADIUS_KM = 6371.01 + +# Empirical threshold for warning about dense O(n²) distance matrix memory +_CONLEY_DENSE_WARN_N = 20_000 + + +def _haversine_km( + lat1: np.ndarray, + lon1: np.ndarray, + lat2: np.ndarray, + lon2: np.ndarray, +) -> np.ndarray: + """Vectorized great-circle distance in km between two sets of points. + + Inputs are in DEGREES. NumPy broadcasting applies, so passing + ``lat1=lats[:, None]`` and ``lat2=lats[None, :]`` (with matching + ``lon1``, ``lon2``) yields the full pairwise n×n distance matrix. + + Earth radius is 6371.01 km (mean radius), matching R ``conleyreg``. + """ + lat1_r = np.radians(lat1) + lon1_r = np.radians(lon1) + lat2_r = np.radians(lat2) + lon2_r = np.radians(lon2) + dlat = lat2_r - lat1_r + dlon = lon2_r - lon1_r + a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1_r) * np.cos(lat2_r) * np.sin(dlon / 2.0) ** 2 + # Clip for numerical robustness — antipodal pairs can produce a >1 by ~eps + a = np.clip(a, 0.0, 1.0) + return _CONLEY_EARTH_RADIUS_KM * 2.0 * np.arcsin(np.sqrt(a)) + + +def _pairwise_distance_matrix(coords: np.ndarray, metric) -> np.ndarray: + """Build the dense n×n pairwise distance matrix. + + ``metric`` is one of ``"haversine"`` (lat/lon in degrees, distance in km), + ``"euclidean"`` (any units), or a callable ``f(coords1, coords2) -> n×n``. + """ + if metric == "haversine": + lats = coords[:, 0] + lons = coords[:, 1] + return _haversine_km(lats[:, None], lons[:, None], lats[None, :], lons[None, :]) + if metric == "euclidean": + # Vectorized via squared-distance identity; avoids scipy import path + # while matching scipy.spatial.distance.cdist to ~1e-14 + diff = coords[:, None, :] - coords[None, :, :] + return np.sqrt(np.sum(diff * diff, axis=-1)) + if callable(metric): + return np.asarray(metric(coords, coords), dtype=np.float64) + raise ValueError( + f"conley_metric must be 'haversine', 'euclidean', or callable; got {metric!r}." + ) + + +def _bartlett_kernel(u: np.ndarray) -> np.ndarray: + """Bartlett (linear taper) kernel: K(u) = max(0, 1 - |u|). + + Conley (1999) Eq 3.14 + Andrews (1991). PSD-guaranteed (non-negative + spectral window), so the resulting Conley meat is PSD. + """ + return np.maximum(0.0, 1.0 - np.abs(u)) + + +def _uniform_kernel(u: np.ndarray) -> np.ndarray: + """Uniform (truncated) kernel: K(u) = 1 if |u| <= 1 else 0. + + Cited as White (1980) truncated estimator; Conley (1999) page 11. Easier + to interpret than Bartlett but the spectral window is negative in regions + (Conley 1999 footnote 11), so the resulting meat is NOT guaranteed PSD. + Caller emits ``UserWarning`` if any meat eigenvalue is materially negative. + """ + return (np.abs(u) <= 1.0).astype(np.float64) + + +def _validate_conley_kwargs( + coords: Optional[np.ndarray], + cutoff: Optional[float], + metric, + kernel: str, + n: int, +) -> None: + """Validate the four Conley kwargs against the design's row count. + + Raises + ------ + ValueError + Missing/malformed coords or cutoff; lat/lon out of range under + haversine; unknown kernel/metric; non-finite or non-positive cutoff. + + Warnings + -------- + UserWarning + Emitted when ``n > 20_000`` to flag the dense O(n²) memory cost. + """ + if coords is None: + raise ValueError( + "vcov_type='conley' requires conley_coords (n×2 array of [lat, lon] " + "or projected coords). Pass via TwoWayFixedEffects(conley_coords=...) " + "or compute_robust_vcov(conley_coords=...)." + ) + coords_arr = np.asarray(coords, dtype=np.float64) + if coords_arr.ndim != 2 or coords_arr.shape[1] != 2: + raise ValueError(f"conley_coords must be a 2-D (n, 2) array; got shape {coords_arr.shape}.") + if coords_arr.shape[0] != n: + raise ValueError(f"conley_coords has {coords_arr.shape[0]} rows but X has {n} rows.") + if not np.isfinite(coords_arr).all(): + raise ValueError("conley_coords contains NaN or inf values.") + + if cutoff is None: + raise ValueError( + "vcov_type='conley' requires conley_cutoff_km (a positive finite " + "bandwidth). No defensible default — see Conley 1999 Section 5 " + "for the sensitivity-grid recommendation." + ) + if not np.isfinite(cutoff) or cutoff <= 0: + raise ValueError(f"conley_cutoff_km must be a positive finite number; got {cutoff!r}.") + + if not (metric in ("haversine", "euclidean") or callable(metric)): + raise ValueError( + f"conley_metric must be 'haversine', 'euclidean', or callable; got {metric!r}." + ) + + # Lat/lon range checks under haversine. Skipped for euclidean (user's + # responsibility to pass projected coords with consistent units) and + # for callables (user supplies their own distance function). + if metric == "haversine": + if not ((coords_arr[:, 0] >= -90.0) & (coords_arr[:, 0] <= 90.0)).all(): + raise ValueError( + "conley_metric='haversine' requires latitude in [-90, 90]; " + f"got min={coords_arr[:, 0].min()}, max={coords_arr[:, 0].max()}." + ) + if not ((coords_arr[:, 1] >= -180.0) & (coords_arr[:, 1] <= 180.0)).all(): + raise ValueError( + "conley_metric='haversine' requires longitude in [-180, 180]; " + f"got min={coords_arr[:, 1].min()}, max={coords_arr[:, 1].max()}." + ) + + if kernel not in ("bartlett", "uniform"): + raise ValueError(f"conley_kernel must be 'bartlett' or 'uniform'; got {kernel!r}.") + + if n > _CONLEY_DENSE_WARN_N: + memory_gb = (n * n * 8) / 1e9 + warnings.warn( + f"vcov_type='conley' builds a dense {n}x{n} distance matrix " + f"(~{memory_gb:.1f} GB float64). The sparse k-d-tree fast path is " + "deferred to a follow-up PR.", + UserWarning, + stacklevel=3, + ) + + +def _compute_conley_vcov( + X: np.ndarray, + residuals: np.ndarray, + coords: np.ndarray, + cutoff: float, + metric, + kernel: str, + bread_matrix: np.ndarray, +) -> np.ndarray: + """Conley (1999) spatial HAC sandwich variance. + + Var̂(β) = bread_inv · (Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j') · bread_inv + + Implemented via the vectorized identity ``meat = S' K S`` where + ``S = X * residuals[:, None]`` is the (n, k) score matrix and ``K`` is + the (n, n) kernel matrix. The diagonal contributes the standard White + (1980) HC0 term ``X_i ε_i² X_i'``. + + Inputs are assumed already validated by :func:`_validate_conley_kwargs`; + the helper only does the math. Caller is responsible for the validator. + + Returns + ------- + vcov : ndarray of shape (k, k) + + Notes + ----- + For ``kernel == "uniform"`` the meat is not guaranteed PSD (Conley 1999 + footnote 11); a ``UserWarning`` is emitted if the smallest meat eigenvalue + is materially negative (< -1e-12). + """ + coords_arr = np.asarray(coords, dtype=np.float64) + D = _pairwise_distance_matrix(coords_arr, metric) + + # Apply kernel. Both supported kernels vanish strictly outside the cutoff, + # so explicit zeroing of D > cutoff is unnecessary (the kernel handles it). + u = D / cutoff + if kernel == "bartlett": + K = _bartlett_kernel(u) + elif kernel == "uniform": + K = _uniform_kernel(u) + else: + raise ValueError(f"conley_kernel must be 'bartlett' or 'uniform'; got {kernel!r}.") + + # Score matrix S = X * residuals[:, None] is (n, k). Conley meat: + # meat[a, b] = Σ_{i,j} K_{ij} · S_{i,a} · S_{j,b} + # which equals S.T @ K @ S. The K(0) = 1 diagonal contributes the HC0 term. + # Suppress spurious BLAS-level "divide by zero / overflow" warnings on + # macOS Accelerate when K is sparse-ish (most off-diagonals are exactly + # 0 outside the cutoff). The matmul result is mathematically correct; + # the warning is a subnormal-handling false-positive in the AVX path. + # We verify finiteness immediately after the matmul. + S = X * residuals[:, np.newaxis] + with np.errstate(divide="ignore", over="ignore", invalid="ignore"): + meat = S.T @ K @ S + if not np.all(np.isfinite(meat)): + raise ValueError( + "Conley meat contains non-finite values; check residuals and " + "score matrix for NaN/Inf." + ) + + # PSD guard for uniform kernel (Conley 1999 fn 11) + if kernel == "uniform": + eigvals = np.linalg.eigvalsh(meat) + if eigvals.size and eigvals.min() < -1e-12: + warnings.warn( + f"Conley meat with uniform kernel has a negative eigenvalue " + f"({eigvals.min():.2e}); the variance estimator is not " + "guaranteed PSD. Consider conley_kernel='bartlett' (PSD by " + "construction).", + UserWarning, + stacklevel=3, + ) + + # Sandwich via two solves (mirrors _compute_cr2_bm pattern in linalg.py) + try: + temp = np.linalg.solve(bread_matrix, meat) + vcov = np.linalg.solve(bread_matrix, temp.T).T + except np.linalg.LinAlgError as e: + if "Singular" in str(e): + raise ValueError( + "Design matrix is rank-deficient (singular X'X matrix). " + "Cannot compute Conley spatial HAC variance." + ) from e + raise + + return vcov diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index 3b72b5d8..316f84dc 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -147,6 +147,10 @@ def __init__( bootstrap_weights: str = "rademacher", seed: Optional[int] = None, rank_deficient_action: str = "warn", + conley_coords: Optional[Tuple[str, str]] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ): # Resolve vcov_type from the legacy `robust` alias via the shared # helper so __init__ and set_params use identical validation logic. @@ -169,6 +173,12 @@ def __init__( self.bootstrap_weights = bootstrap_weights self.seed = seed self.rank_deficient_action = rank_deficient_action + # Conley spatial-HAC parameters; column names (NOT array values) for + # the coords. Validation happens at fit() when `data` is in scope. + self.conley_coords = conley_coords + self.conley_cutoff_km = conley_cutoff_km + self.conley_metric = conley_metric + self.conley_kernel = conley_kernel self.is_fitted_ = False self.results_ = None @@ -327,6 +337,66 @@ def fit( "HC2/CR2-BM are computed on the full projection." ) + # Reject Conley + absorb in Phase 1. Conley's meat depends only on + # scores X*epsilon, both of which FWL preserves under within- + # transformation, so the math composes cleanly for TWFE's two-FE + # design. But arbitrary absorb dimensions have not been verified + # empirically yet; conservatively reject and tell the user to use + # fixed_effects= dummies for the same FE design. + if absorb and self.vcov_type == "conley": + raise NotImplementedError( + "DifferenceInDifferences(absorb=..., vcov_type='conley') " + "is deferred to a follow-up. Conley + within-transformation " + "for arbitrary absorbed FE dimensions has not been verified; " + "use fixed_effects= dummies for an equivalent FE design " + "with the full projection, or drop absorb= for " + "cross-sectional Conley." + ) + + # Reject Conley + cluster (combined product kernel is Phase 2+) and + # Conley + survey_design (Bertanha-Imbens 2014 territory) early at + # the estimator level so the error message references the user-facing + # kwarg names rather than the internal cluster_ids/weights array. + if self.vcov_type == "conley": + if self.cluster is not None: + raise NotImplementedError( + f"DifferenceInDifferences(cluster={self.cluster!r}, " + "vcov_type='conley') is deferred to Phase 2 (combined " + "product kernel). Drop cluster= for cross-sectional " + "Conley." + ) + if survey_design is not None: + raise NotImplementedError( + "DifferenceInDifferences(survey_design=..., " + "vcov_type='conley') is deferred to Phase 2+ " + "(Bertanha-Imbens 2014). Drop survey_design= for " + "cross-sectional Conley." + ) + if self.conley_coords is None: + raise ValueError( + "vcov_type='conley' requires conley_coords=(, " + ") tuple of column names in the data." + ) + if self.conley_cutoff_km is None: + raise ValueError( + "vcov_type='conley' requires conley_cutoff_km (positive " + "finite bandwidth in km for haversine, or in coord units " + "for euclidean)." + ) + # Validate columns exist; the validator inside compute_robust_vcov + # will check NaN/range/etc on the array values themselves. + _coord_cols = list(self.conley_coords) + if len(_coord_cols) != 2: + raise ValueError( + f"conley_coords must be a 2-tuple of column names; got " + f"{self.conley_coords!r}." + ) + for _col in _coord_cols: + if _col not in data.columns: + raise ValueError( + f"conley_coords references column {_col!r} which " f"is not in `data`." + ) + if absorb: # FWL theorem: demean ALL regressors alongside outcome. # Regressors collinear with absorbed FE (e.g., treatment after @@ -389,6 +459,13 @@ def fit( # For wild bootstrap, we don't need cluster SEs from the initial fit cluster_ids = data[self.cluster].values if self.cluster is not None else None + # Extract Conley coords array (n×2 float64) from the user's data. + # Validation of the column existence and the 2-tuple shape happened + # at the top of fit(); here we only need to materialize the array. + _conley_coords_array = None + if self.vcov_type == "conley" and self.conley_coords is not None: + _conley_coords_array = data[list(self.conley_coords)].to_numpy(dtype=np.float64) + # When survey PSU is present, it overrides cluster for variance estimation effective_cluster_ids = _resolve_effective_cluster( resolved_survey, cluster_ids, self.cluster @@ -430,6 +507,10 @@ def fit( weight_type=survey_weight_type, survey_design=_lr_survey, vcov_type=_fit_vcov_type, + conley_coords=_conley_coords_array, + conley_cutoff_km=self.conley_cutoff_km, + conley_metric=self.conley_metric, + conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=n_absorbed_effects) coefficients = reg.coefficients_ @@ -552,6 +633,8 @@ def _refit_did_absorb(w_r): # stored `self.vcov_type`. vcov_type=_fit_vcov_type, cluster_name=self.cluster, + conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, + conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self._coefficients = coefficients @@ -819,6 +902,10 @@ def get_params(self) -> Dict[str, Any]: "bootstrap_weights": self.bootstrap_weights, "seed": self.seed, "rank_deficient_action": self.rank_deficient_action, + "conley_coords": self.conley_coords, + "conley_cutoff_km": self.conley_cutoff_km, + "conley_metric": self.conley_metric, + "conley_kernel": self.conley_kernel, } def set_params(self, **params) -> "DifferenceInDifferences": @@ -1307,6 +1394,50 @@ def fit( # type: ignore[override] "switch to fixed_effects= dummies for a full-dummy design." ) + # Reject Conley combinations early at the estimator level (see + # DifferenceInDifferences.fit for the matching block and rationale). + if absorb and self.vcov_type == "conley": + raise NotImplementedError( + "MultiPeriodDiD(absorb=..., vcov_type='conley') is deferred " + "to a follow-up. Use fixed_effects= dummies for an equivalent " + "FE design with the full projection, or drop absorb= for " + "cross-sectional Conley." + ) + if self.vcov_type == "conley": + if self.cluster is not None: + raise NotImplementedError( + f"MultiPeriodDiD(cluster={self.cluster!r}, " + "vcov_type='conley') is deferred to Phase 2 (combined " + "product kernel). Drop cluster= for cross-sectional " + "Conley." + ) + if survey_design is not None: + raise NotImplementedError( + "MultiPeriodDiD(survey_design=..., vcov_type='conley') " + "is deferred to Phase 2+ (Bertanha-Imbens 2014). Drop " + "survey_design= for cross-sectional Conley." + ) + if self.conley_coords is None: + raise ValueError( + "vcov_type='conley' requires conley_coords=(, " + ") tuple of column names in the data." + ) + if self.conley_cutoff_km is None: + raise ValueError( + "vcov_type='conley' requires conley_cutoff_km (positive " "finite bandwidth)." + ) + _coord_cols_mp = list(self.conley_coords) + if len(_coord_cols_mp) != 2: + raise ValueError( + f"conley_coords must be a 2-tuple of column names; got " + f"{self.conley_coords!r}." + ) + for _col in _coord_cols_mp: + if _col not in data.columns: + raise ValueError( + f"conley_coords references column {_col!r} which " f"is not in `data`." + ) + # Pre-compute non_ref_periods (needed for absorb demeaning) non_ref_periods = [p for p in all_periods if p != reference_period] @@ -1439,6 +1570,13 @@ def fit( # type: ignore[override] # Remap implicit "classical" + cluster to CR1 (legacy backward compat). _fit_vcov_type = self._resolve_effective_vcov_type(effective_cluster_ids) + # Extract Conley coords array (only when vcov_type='conley'; the + # estimator-level guards above already validated the column-name + # tuple against `data`). + _conley_coords_array_mp = None + if _fit_vcov_type == "conley" and self.conley_coords is not None: + _conley_coords_array_mp = data[list(self.conley_coords)].to_numpy(dtype=np.float64) + # Note: Wild bootstrap for multi-period effects is complex (multiple coefficients) # For now, we use analytical inference even if inference="wild_bootstrap" coefficients, residuals, fitted, vcov = solve_ols( @@ -1452,6 +1590,10 @@ def fit( # type: ignore[override] weights=survey_weights, weight_type=survey_weight_type, vcov_type=_fit_vcov_type, + conley_coords=_conley_coords_array_mp, + conley_cutoff_km=self.conley_cutoff_km, + conley_metric=self.conley_metric, + conley_kernel=self.conley_kernel, ) # Compute survey vcov if applicable @@ -1741,6 +1883,8 @@ def _refit_mp_absorb(w_r): n_clusters=( len(np.unique(effective_cluster_ids)) if effective_cluster_ids is not None else None ), + conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, + conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self._coefficients = coefficients diff --git a/diff_diff/guides/llms-full.txt b/diff_diff/guides/llms-full.txt index 6245f62b..8422b013 100644 --- a/diff_diff/guides/llms-full.txt +++ b/diff_diff/guides/llms-full.txt @@ -1883,6 +1883,55 @@ inference = reg.get_inference(coef_index) # -> InferenceResult | `p_value` | `float` | P-value | | `conf_int` | `tuple[float, float]` | Confidence interval | +### Conley Spatial HAC Standard Errors + +Conley (1999) spatial heteroskedasticity-and-autocorrelation-consistent standard +errors for DiD designs with geocoded data. Use when residuals are spatially +correlated (geo experiments, regional shocks, common-supplier effects). +Available on `DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`. + +```python +from diff_diff import TwoWayFixedEffects + +result = TwoWayFixedEffects( + vcov_type="conley", + conley_coords=("lat", "lon"), # column names with lat/lon (degrees) + conley_cutoff_km=200.0, # required; no default + conley_metric="haversine", # or "euclidean", or callable + conley_kernel="bartlett", # or "uniform" +).fit(data, outcome="y", treatment="treated", time="post", unit="unit_id") +print(result.summary()) # variance line: "Conley spatial HAC (bartlett, cutoff=200.0km)" +``` + +**Variance estimator:** + + Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij / h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1} + +**Kernels:** +- `"bartlett"` (default): `K(u) = max(0, 1 - |u|)`. PSD-guaranteed. +- `"uniform"`: `K(u) = 1{|u| ≤ 1}`. Easier to interpret; emits `UserWarning` if the resulting meat has a materially negative eigenvalue. + +**Distance metrics:** +- `"haversine"` (default): great-circle in km, Earth's mean radius 6371 km. Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. +- `"euclidean"`: from projected coordinates; user owns the units. +- `callable(coords1, coords2) -> n×n array`: custom distance for non-geographic networks. + +**No default bandwidth.** `conley_cutoff_km` is required. Conley (1999) Section 5 +recommends a sensitivity grid (e.g., 50, 100, 200, 500 km) and reporting the +SE range. + +**Restrictions in this release:** +- `vcov_type="conley"` + `cluster=` → `NotImplementedError` (combined kernel deferred to Phase 2). +- `vcov_type="conley"` + `weights=` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up). +- `vcov_type="conley"` + `absorb=` → `NotImplementedError` (only TWFE's two-FE within-transformation is supported). +- `SyntheticDiD(vcov_type="conley")` → `TypeError` (uses bootstrap, not analytical sandwich). +- `n > 20_000` emits a `UserWarning` about O(n²) distance-matrix memory. + +**Parity:** matches R `conleyreg` (Düsterhöft 2021, CRAN v0.1.9) to ≤ 1e-6 +on three benchmark fixtures in +`benchmarks/data/r_conleyreg_conley_golden.json`. Earth radius 6371.01 km +matches conleyreg. + ## Rust Backend diff-diff includes an optional Rust backend for performance-critical operations. diff --git a/diff_diff/guides/llms.txt b/diff_diff/guides/llms.txt index 2d14d525..a693ead6 100644 --- a/diff_diff/guides/llms.txt +++ b/diff_diff/guides/llms.txt @@ -76,6 +76,7 @@ Full practitioner guide: call `diff_diff.get_llm_guide("practitioner")` - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html): Rambachan & Roth (2023) sensitivity analysis — robust CI under parallel trends violations, breakdown values - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html): Roth (2022) minimum detectable violation and pre-trends test power curves - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html): Analytical and simulation-based power analysis — MDE, sample size, power curves for study design +- Conley spatial HAC SE (`vcov_type="conley"`) on DifferenceInDifferences/TwoWayFixedEffects/MultiPeriodDiD — Conley (1999) spatial-correlation-aware SEs with haversine/euclidean/callable distance metric and Bartlett/uniform kernel; parity vs R `conleyreg` (Düsterhöft 2021) ## Tutorials diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index e39a487f..cf550880 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -350,6 +350,10 @@ def solve_ols( weights: Optional[np.ndarray] = ..., weight_type: str = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: ... @@ -368,6 +372,10 @@ def solve_ols( weights: Optional[np.ndarray] = ..., weight_type: str = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]: ... @@ -386,6 +394,10 @@ def solve_ols( weights: Optional[np.ndarray] = ..., weight_type: str = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Union[ Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]], Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]], @@ -440,6 +452,10 @@ def solve_ols( weights: Optional[np.ndarray] = None, weight_type: str = "pweight", vcov_type: str = "hc1", + conley_coords: Optional[np.ndarray] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ) -> Union[ Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]], Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]], @@ -489,7 +505,7 @@ def solve_ols( Type of weights: "pweight" (inverse selection probability), "fweight" (frequency), or "aweight" (inverse variance). Affects variance estimation but not coefficient computation. - vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, default "hc1" + vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, default "hc1" Variance-covariance family forwarded to :func:`compute_robust_vcov`: - ``"classical"``: non-robust OLS SE, ``sigma_hat^2 * (X'X)^{-1}``. @@ -644,6 +660,10 @@ def solve_ols( column_names=column_names, _skip_rank_check=True, vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) else: # Check for rank deficiency using fast pivoted QR decomposition. @@ -693,6 +713,10 @@ def solve_ols( column_names=column_names, _precomputed_rank_info=(rank, dropped_cols, pivot), vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) # Back-transform residuals and compute weighted vcov on original-scale data. @@ -725,6 +749,10 @@ def solve_ols( weights=weights, weight_type=weight_type, vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) vcov_out = _expand_vcov_with_nan(vcov_reduced, _original_X.shape[1], kept_cols) else: @@ -737,6 +765,10 @@ def solve_ols( weights=weights, weight_type=weight_type, vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) if return_fitted: @@ -760,6 +792,10 @@ def _solve_ols_numpy( _precomputed_rank_info: Optional[Tuple[int, np.ndarray, np.ndarray]] = ..., _skip_rank_check: bool = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: ... @@ -776,6 +812,10 @@ def _solve_ols_numpy( _precomputed_rank_info: Optional[Tuple[int, np.ndarray, np.ndarray]] = ..., _skip_rank_check: bool = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]: ... @@ -792,6 +832,10 @@ def _solve_ols_numpy( _precomputed_rank_info: Optional[Tuple[int, np.ndarray, np.ndarray]] = ..., _skip_rank_check: bool = ..., vcov_type: str = ..., + conley_coords: Optional[np.ndarray] = ..., + conley_cutoff_km: Optional[float] = ..., + conley_metric: str = ..., + conley_kernel: str = ..., ) -> Union[ Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]], Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]], @@ -810,6 +854,10 @@ def _solve_ols_numpy( _precomputed_rank_info: Optional[Tuple[int, np.ndarray, np.ndarray]] = None, _skip_rank_check: bool = False, vcov_type: str = "hc1", + conley_coords: Optional[np.ndarray] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ) -> Union[ Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]], Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]], @@ -917,6 +965,10 @@ def _solve_ols_numpy( residuals, cluster_ids, vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) vcov = _expand_vcov_with_nan(vcov_reduced, k, kept_cols) else: @@ -931,7 +983,16 @@ def _solve_ols_numpy( # Compute variance-covariance matrix if requested vcov = None if return_vcov: - vcov = _compute_robust_vcov_numpy(X, residuals, cluster_ids, vcov_type=vcov_type) + vcov = _compute_robust_vcov_numpy( + X, + residuals, + cluster_ids, + vcov_type=vcov_type, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, + ) if return_fitted: return coefficients, residuals, fitted, vcov @@ -939,7 +1000,7 @@ def _solve_ols_numpy( return coefficients, residuals, vcov -_VALID_VCOV_TYPES = frozenset({"classical", "hc1", "hc2", "hc2_bm"}) +_VALID_VCOV_TYPES = frozenset({"classical", "hc1", "hc2", "hc2_bm", "conley"}) def _validate_vcov_args( @@ -1016,6 +1077,24 @@ def _validate_vcov_args( "'hc1' for weighted HC1, or drop weights for one-way HC2 + " "Bell-McCaffrey. Tracked in TODO.md." ) + if vcov_type == "conley": + # Conley + cluster_ids (combined product kernel) and Conley + weights + # (Bertanha-Imbens 2014) are deferred to Phase 2+. TwoWayFixedEffects + # has its own earlier raise at twfe.py with a TWFE-specific message; + # this is the fallback path for DifferenceInDifferences and + # MultiPeriodDiD callers (and direct compute_robust_vcov use). + if cluster_ids is not None: + raise NotImplementedError( + "vcov_type='conley' with cluster_ids is a Phase 2+ follow-up " + "(combined product kernel). Use vcov_type='hc1' for cluster-" + "robust without spatial HAC, or drop cluster= for Conley." + ) + if weights is not None: + raise NotImplementedError( + "vcov_type='conley' with weights is a Phase 2+ follow-up " + "(Bertanha-Imbens 2014). Drop weights for cross-sectional " + "Conley, or use vcov_type='hc1' for weighted HC1." + ) def resolve_vcov_type( @@ -1071,6 +1150,13 @@ def resolve_vcov_type( return vcov_type +# Conley (1999) spatial HAC helpers live in diff_diff.conley to keep linalg.py +# focused on linear-algebra primitives. Imported here so the dispatch in +# `_compute_robust_vcov_numpy` can route `vcov_type="conley"` without a +# late/local import. +from diff_diff.conley import _compute_conley_vcov, _validate_conley_kwargs # noqa: E402 + + def compute_robust_vcov( X: np.ndarray, residuals: np.ndarray, @@ -1079,9 +1165,14 @@ def compute_robust_vcov( weight_type: str = "pweight", vcov_type: str = "hc1", return_dof: bool = False, + *, + conley_coords: Optional[np.ndarray] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ - Compute variance-covariance matrix under one of four `vcov_type` variants. + Compute variance-covariance matrix under one of five `vcov_type` variants. Uses the sandwich estimator: (X'X)^{-1} * meat * (X'X)^{-1}, with the meat matrix determined by the ``vcov_type`` dispatch: @@ -1104,6 +1195,12 @@ def compute_robust_vcov( Pierce-Schott (2016) TWFE application in de Chaisemartin et al. (2026) with ``G=103``. Weighted clustered CR2 is the Phase 2+ follow-up and raises ``NotImplementedError``. + - ``"conley"``: spatial HAC sandwich (Conley 1999 Eq 4.2). Requires + ``conley_coords`` (n×2 array) and ``conley_cutoff_km`` (positive + bandwidth). Cross-sectional one-way only in this release; combining + with ``cluster_ids`` or ``weights`` raises ``NotImplementedError`` + (combined product kernel and Bertanha-Imbens-style weighted Conley + are deferred to Phase 2+). Parameters ---------- @@ -1203,6 +1300,10 @@ def compute_robust_vcov( weight_type=weight_type, vcov_type=vcov_type, return_dof=return_dof, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) raise @@ -1215,6 +1316,10 @@ def compute_robust_vcov( weight_type=weight_type, vcov_type=vcov_type, return_dof=return_dof, + conley_coords=conley_coords, + conley_cutoff_km=conley_cutoff_km, + conley_metric=conley_metric, + conley_kernel=conley_kernel, ) @@ -1484,6 +1589,11 @@ def _compute_robust_vcov_numpy( weight_type: str = "pweight", vcov_type: str = "hc1", return_dof: bool = False, + *, + conley_coords: Optional[np.ndarray] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ NumPy fallback implementation of compute_robust_vcov. @@ -1499,12 +1609,16 @@ def _compute_robust_vcov_numpy( n, k = X.shape - # Bread: (X'WX) or (X'X) depending on whether weights present - if weights is not None: - XtWX = X.T @ (X * weights[:, np.newaxis]) - bread_matrix = XtWX - else: - bread_matrix = X.T @ X + # Bread: (X'WX) or (X'X) depending on whether weights present. + # Suppress spurious BLAS-level subnormal warnings on macOS Accelerate + # for sparse-X designs (e.g., MultiPeriodDiD's event-study dummies). + # Non-finite bread is caught at the downstream np.linalg.solve. + with np.errstate(divide="ignore", over="ignore", invalid="ignore"): + if weights is not None: + XtWX = X.T @ (X * weights[:, np.newaxis]) + bread_matrix = XtWX + else: + bread_matrix = X.T @ X # Effective n for df computation # fweights: sum(w) (frequency expansion) @@ -1517,6 +1631,29 @@ def _compute_robust_vcov_numpy( elif np.any(weights == 0): n_eff = int(np.count_nonzero(weights > 0)) + # ------------------------------------------------------------------ + # Conley (1999) spatial HAC. Cross-sectional only in this release; + # the validator above already raised on conley + cluster_ids and + # conley + weights. + # ------------------------------------------------------------------ + if vcov_type == "conley": + _validate_conley_kwargs(conley_coords, conley_cutoff_km, conley_metric, conley_kernel, n) + # Validator coerces None to a clean array; helper accepts the user's + # input directly (np.asarray inside _compute_conley_vcov is a no-op + # for already-validated arrays). + vcov = _compute_conley_vcov( + X, + residuals, + np.asarray(conley_coords, dtype=np.float64), + float(conley_cutoff_km), # type: ignore[arg-type] + conley_metric, + conley_kernel, + bread_matrix, + ) + if return_dof: + return vcov, np.full(k, n_eff - k, dtype=np.float64) + return vcov + # ------------------------------------------------------------------ # Classical (non-robust) OLS SE. # ------------------------------------------------------------------ @@ -2189,7 +2326,7 @@ class LinearRegression: Resolved survey design for Taylor Series Linearization variance estimation. When provided, weights and weight_type are canonicalized from this object. - vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, optional + vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, optional Variance-covariance family. Defaults to the ``robust`` alias (``robust=True`` -> ``"hc1"``, ``robust=False`` -> ``"classical"``). Passing an explicit ``vcov_type`` overrides ``robust`` unless the @@ -2260,6 +2397,10 @@ def __init__( weight_type: str = "pweight", survey_design: object = None, vcov_type: Optional[str] = None, + conley_coords: Optional[np.ndarray] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: str = "haversine", + conley_kernel: str = "bartlett", ): self.include_intercept = include_intercept self.robust = robust @@ -2269,6 +2410,10 @@ def __init__( self.weights = weights self.weight_type = weight_type self.survey_design = survey_design # ResolvedSurveyDesign or None + self.conley_coords = conley_coords + self.conley_cutoff_km = conley_cutoff_km + self.conley_metric = conley_metric + self.conley_kernel = conley_kernel # Resolve vcov_type from the legacy `robust` alias via the shared helper. self.vcov_type = resolve_vcov_type(robust, vcov_type) # Preserve the raw constructor arg (possibly None) so `fit()` can @@ -2434,6 +2579,10 @@ def fit( weights=_fit_weights, weight_type=_fit_weight_type, vcov_type=_fit_vcov_type, + conley_coords=self.conley_coords, + conley_cutoff_km=self.conley_cutoff_km, + conley_metric=self.conley_metric, + conley_kernel=self.conley_kernel, ) # For hc2_bm, compute per-coefficient Bell-McCaffrey DOF. Both # the one-way HC2+BM case and the cluster CR2 case are supported; diff --git a/diff_diff/results.py b/diff_diff/results.py index cead3f52..b59ecb90 100644 --- a/diff_diff/results.py +++ b/diff_diff/results.py @@ -52,6 +52,8 @@ def _format_vcov_label( cluster_name: Optional[str], n_clusters: Optional[int], n_obs: Optional[int], + conley_cutoff_km: Optional[float] = None, + conley_kernel: Optional[str] = None, ) -> Optional[str]: """Compose a human-readable variance-family label for summary output. @@ -73,6 +75,11 @@ def _format_vcov_label( return f"CR2 Bell-McCaffrey cluster-robust at {cluster_name}{suffix}" suffix = f", n={n_obs}" if n_obs else "" return f"HC2 + Bell-McCaffrey DOF (one-way{suffix})" + if vcov_type == "conley": + kernel_str = conley_kernel or "bartlett" + if conley_cutoff_km is not None: + return f"Conley spatial HAC ({kernel_str}, cutoff={conley_cutoff_km:.1f}km)" + return f"Conley spatial HAC ({kernel_str})" return None @@ -125,11 +132,14 @@ class DiDResults: bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False) # Survey design metadata (SurveyMetadata instance from diff_diff.survey) survey_metadata: Optional[Any] = field(default=None) - # Variance-covariance family: "classical" | "hc1" | "hc2" | "hc2_bm". + # Variance-covariance family: "classical" | "hc1" | "hc2" | "hc2_bm" | "conley". # Plus cluster_name when cluster-robust. Used by summary() to label the # SE family in the output. vcov_type: Optional[str] = field(default=None) cluster_name: Optional[str] = field(default=None) + # Conley spatial-HAC parameters; populated only when vcov_type="conley". + conley_cutoff_km: Optional[float] = field(default=None) + conley_kernel: Optional[str] = field(default=None) def __repr__(self) -> str: """Concise string representation.""" @@ -211,6 +221,8 @@ def summary(self, alpha: Optional[float] = None) -> str: cluster_name=self.cluster_name, n_clusters=self.n_clusters, n_obs=self.n_obs, + conley_cutoff_km=self.conley_cutoff_km, + conley_kernel=self.conley_kernel, ) if label is not None: lines.append(f"{'Variance:':<25} {label:>40}") @@ -446,6 +458,9 @@ class MultiPeriodDiDResults: # Variance-covariance family and cluster column for summary() labeling. vcov_type: Optional[str] = field(default=None) cluster_name: Optional[str] = field(default=None) + # Conley spatial-HAC parameters; populated only when vcov_type="conley". + conley_cutoff_km: Optional[float] = field(default=None) + conley_kernel: Optional[str] = field(default=None) # --- Inference-field aliases (balance/external-adapter compatibility) --- @property @@ -548,6 +563,8 @@ def summary(self, alpha: Optional[float] = None) -> str: cluster_name=self.cluster_name, n_clusters=self.n_clusters, n_obs=self.n_obs, + conley_cutoff_km=self.conley_cutoff_km, + conley_kernel=self.conley_kernel, ) if label is not None: lines.append(f"{'Variance:':<25} {label:>50}") diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index 209ce7b4..fe4e6c9b 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -9,10 +9,10 @@ import pandas as pd from numpy.linalg import LinAlgError +from diff_diff.bootstrap_utils import generate_rao_wu_weights from diff_diff.estimators import DifferenceInDifferences from diff_diff.linalg import solve_ols from diff_diff.results import SyntheticDiDResults, _SyntheticDiDFitSnapshot -from diff_diff.bootstrap_utils import generate_rao_wu_weights from diff_diff.utils import ( _compute_regularization, _sum_normalize, @@ -164,7 +164,35 @@ def __init__( # Deprecated — accepted for backward compat, ignored with warning lambda_reg: Optional[float] = None, zeta: Optional[float] = None, + # Defensive guard against silently-ignored Conley kwargs. SyntheticDiD + # inherits __init__ from DifferenceInDifferences but overrides with + # literal `super().__init__(robust=True, cluster=None, alpha=alpha)`, + # so any user-passed `vcov_type=` or `conley_*=` would be silently + # dropped. Per `feedback_no_silent_failures`, raise loudly. Tracked + # in TODO.md for a follow-up that wires Conley to a non-bootstrap + # variance path on SyntheticDiD. + vcov_type: Optional[str] = None, + conley_coords: Optional[Tuple[str, str]] = None, + conley_cutoff_km: Optional[float] = None, + conley_metric: Optional[str] = None, + conley_kernel: Optional[str] = None, ): + if vcov_type == "conley" or any( + v is not None for v in (conley_coords, conley_cutoff_km, conley_metric, conley_kernel) + ): + raise TypeError( + "SyntheticDiD does not yet support vcov_type='conley' or any " + "conley_* kwargs. SyntheticDiD uses bootstrap/jackknife/placebo " + "variance (variance_method=...), not the analytical sandwich " + "routed through compute_robust_vcov. Tracked in TODO.md as " + "a follow-up." + ) + if vcov_type is not None and vcov_type != "conley": + raise TypeError( + f"SyntheticDiD does not accept vcov_type={vcov_type!r}. " + f"SyntheticDiD's variance is bootstrap/jackknife/placebo " + f"based; configure via variance_method=..." + ) if lambda_reg is not None: warnings.warn( "lambda_reg is deprecated and ignored. Regularization is now " @@ -357,10 +385,9 @@ def fit( # type: ignore[override] # surface even when the value is mathematically a no-op. fpc_col = survey_design.fpc if fpc_col not in data.columns: - raise ValueError( - f"FPC column '{fpc_col}' not found in data" - ) + raise ValueError(f"FPC column '{fpc_col}' not found in data") import dataclasses as _dc + warnings.warn( "SurveyDesign(fpc=...) is a no-op on " "variance_method='placebo': permutation tests are " @@ -498,9 +525,13 @@ def fit( # type: ignore[override] # covariate residualization doesn't shuffle row order, so the # collapse is invariant to which view we group on. from diff_diff.survey import collapse_survey_to_unit_level + all_units_for_bootstrap = list(control_units) + list(treated_units) resolved_survey_unit = collapse_survey_to_unit_level( - resolved_survey, data, unit, all_units_for_bootstrap, + resolved_survey, + data, + unit, + all_units_for_bootstrap, ) # Front-door FPC validation for implicit-PSU Rao-Wu (PR #355 # R8 P1). When psu is None but fpc is set, @@ -563,12 +594,8 @@ def fit( # type: ignore[override] # ω_eff normalization likewise), so switching to resolved weights # doesn't change their numerics. n_control_for_split = len(control_units) - w_control = resolved_survey_unit.weights[:n_control_for_split].astype( - np.float64 - ) - w_treated = resolved_survey_unit.weights[n_control_for_split:].astype( - np.float64 - ) + w_control = resolved_survey_unit.weights[:n_control_for_split].astype(np.float64) + w_treated = resolved_survey_unit.weights[n_control_for_split:].astype(np.float64) # Front-door positive-mass guard (PR #355 R7 P1). Survey weights # are non-negative post-resolve() (survey.py L171-L176 rejects # negatives), but all-zero mass on either arm is reachable — the @@ -797,13 +824,10 @@ def fit( # type: ignore[override] # ``resolved_survey_unit`` carries the per-unit strata/psu/fpc # arrays ordered as [control..., treated...] to match the # downstream variance-method column layout. - _full_design_survey = ( - resolved_survey_unit is not None - and ( - resolved_survey_unit.strata is not None - or resolved_survey_unit.psu is not None - or resolved_survey_unit.fpc is not None - ) + _full_design_survey = resolved_survey_unit is not None and ( + resolved_survey_unit.strata is not None + or resolved_survey_unit.psu is not None + or resolved_survey_unit.fpc is not None ) if _full_design_survey: _n_c = len(control_units) @@ -818,24 +842,16 @@ def fit( # type: ignore[override] else None ) _psu_control = ( - resolved_survey_unit.psu[:_n_c] - if resolved_survey_unit.psu is not None - else None + resolved_survey_unit.psu[:_n_c] if resolved_survey_unit.psu is not None else None ) _psu_treated = ( - resolved_survey_unit.psu[_n_c:] - if resolved_survey_unit.psu is not None - else None + resolved_survey_unit.psu[_n_c:] if resolved_survey_unit.psu is not None else None ) _fpc_control = ( - resolved_survey_unit.fpc[:_n_c] - if resolved_survey_unit.fpc is not None - else None + resolved_survey_unit.fpc[:_n_c] if resolved_survey_unit.fpc is not None else None ) _fpc_treated = ( - resolved_survey_unit.fpc[_n_c:] - if resolved_survey_unit.fpc is not None - else None + resolved_survey_unit.fpc[_n_c:] if resolved_survey_unit.fpc is not None else None ) else: _strata_control = None @@ -864,10 +880,7 @@ def fit( # type: ignore[override] _placebo_use_survey_path = ( self.variance_method == "placebo" and resolved_survey_unit is not None - and ( - resolved_survey_unit.strata is not None - or resolved_survey_unit.psu is not None - ) + and (resolved_survey_unit.strata is not None or resolved_survey_unit.psu is not None) ) # NOTE: the FPC no-op warning for placebo is emitted earlier # (before ``_resolve_survey_for_fit``); ``resolved_survey_unit.fpc`` @@ -879,9 +892,7 @@ def fit( # type: ignore[override] # strata is declared. PSU-without-strata is treated as a single # stratum (Rust & Rao 1996 JK1 form) inside # ``_jackknife_se_survey``. - _jackknife_use_survey_path = ( - _full_design_survey and self.variance_method == "jackknife" - ) + _jackknife_use_survey_path = _full_design_survey and self.variance_method == "jackknife" # Synthesize a single stratum for PSU/FPC-without-strata designs # so the placebo / jackknife survey paths can treat them as the @@ -890,12 +901,8 @@ def fit( # type: ignore[override] # methods; the original `_strata_*` arrays stay None so other # code paths (REGISTRY, metadata) see the true design. if _full_design_survey and _strata_control is None: - _strata_control_eff: np.ndarray = np.zeros( - len(control_units), dtype=np.int64 - ) - _strata_treated_eff: np.ndarray = np.zeros( - len(treated_units), dtype=np.int64 - ) + _strata_control_eff: np.ndarray = np.zeros(len(control_units), dtype=np.int64) + _strata_treated_eff: np.ndarray = np.zeros(len(treated_units), dtype=np.int64) else: _strata_control_eff = _strata_control # type: ignore[assignment] _strata_treated_eff = _strata_treated # type: ignore[assignment] @@ -1067,9 +1074,7 @@ def fit( # type: ignore[override] # overall mean for singleton strata) is not implemented # for SDID jackknife; reject upfront rather than silently # treating it as ``"remove"``. - _lonely_psu_mode = getattr( - resolved_survey_unit, "lonely_psu", "remove" - ) + _lonely_psu_mode = getattr(resolved_survey_unit, "lonely_psu", "remove") if _lonely_psu_mode not in ("remove", "certainty"): raise NotImplementedError( f"SurveyDesign(lonely_psu={_lonely_psu_mode!r}) is " @@ -1263,9 +1268,7 @@ def fit( # type: ignore[override] # Unit-level positional join onto ``_loo_unit_ids`` is well- # defined only for the unit-level path. if inference_method == "jackknife": - self.results_._loo_granularity = ( - "psu" if _jackknife_use_survey_path else "unit" - ) + self.results_._loo_granularity = "psu" if _jackknife_use_survey_path else "unit" else: self.results_._loo_granularity = None # Only populate unit-level LOO bookkeeping when the granularity @@ -1444,12 +1447,9 @@ def _bootstrap_se( # same subset every draw). Returns NaN SE — same shape as PR #351's # n_successful=0 raise but caught upstream as NaN. Recovered from # 91082e5:diff_diff/synthetic_did.py. - if ( - _use_rao_wu - and resolved_survey.psu is not None - and resolved_survey.strata is None - ): + if _use_rao_wu and resolved_survey.psu is not None and resolved_survey.strata is None: from numpy import unique as _unique + n_psu = len(_unique(resolved_survey.psu)) if n_psu < 2: return np.nan, np.array([]) @@ -1512,8 +1512,10 @@ def _bootstrap_se( # mean would corrupt the bootstrap distribution because # fit-time ATT uses the survey-weighted mean (PR #355 # R2 P0). - if (_use_rao_wu or _pweight_only) and rw_treated_draw is not None and ( - rw_control_draw.sum() == 0 or rw_treated_draw.sum() == 0 + if ( + (_use_rao_wu or _pweight_only) + and rw_treated_draw is not None + and (rw_control_draw.sum() == 0 or rw_treated_draw.sum() == 0) ): continue @@ -1528,10 +1530,14 @@ def _bootstrap_se( # set (PR #352), else unweighted. if rw_treated_draw is not None and rw_treated_draw.sum() > 0: Y_boot_pre_t_mean = np.average( - Y_boot_pre_t, axis=1, weights=rw_treated_draw, + Y_boot_pre_t, + axis=1, + weights=rw_treated_draw, ) Y_boot_post_t_mean = np.average( - Y_boot_post_t, axis=1, weights=rw_treated_draw, + Y_boot_post_t, + axis=1, + weights=rw_treated_draw, ) else: Y_boot_pre_t_mean = np.mean(Y_boot_pre_t, axis=1) @@ -1868,9 +1874,7 @@ def _placebo_variance_se( # equivalent, but the warm-start matches R's exact iterates # for bit-identical SE under the R-parity test. if init_omega is not None: - pseudo_omega_init = _sum_normalize( - init_omega[pseudo_control_idx] - ) + pseudo_omega_init = _sum_normalize(init_omega[pseudo_control_idx]) else: pseudo_omega_init = None pseudo_omega = compute_sdid_unit_weights( @@ -2037,9 +2041,7 @@ def _placebo_variance_se_survey( for _ in range(replications): try: pseudo_treated_parts = [] - for h, n_treated_h in zip( - unique_treated_strata, treated_counts_per_stratum - ): + for h, n_treated_h in zip(unique_treated_strata, treated_counts_per_stratum): controls_in_h = control_idx_per_stratum[h] pseudo_treated_h = rng.choice( controls_in_h, size=int(n_treated_h), replace=False @@ -2140,9 +2142,7 @@ def _placebo_variance_se_survey( stacklevel=3, ) - se = np.sqrt((n_successful - 1) / n_successful) * np.std( - placebo_estimates_arr, ddof=1 - ) + se = np.sqrt((n_successful - 1) / n_successful) * np.std(placebo_estimates_arr, ddof=1) return se, placebo_estimates_arr def _jackknife_se( @@ -2450,9 +2450,7 @@ def _jackknife_se_survey( # within strata by ``SurveyDesign.resolve`` (see survey.py # L308-L320 ``nest=False`` validation), so a (stratum, psu) pair # uniquely identifies a PSU. - unique_strata_all = np.unique( - np.concatenate([strata_control, strata_treated]) - ) + unique_strata_all = np.unique(np.concatenate([strata_control, strata_treated])) # Short-circuit: unstratified single-PSU design. ``strata_*`` arrays # are always populated after ``_resolve_survey_for_fit``, so a @@ -2499,9 +2497,7 @@ def _jackknife_se_survey( treated_in_h_mask = strata_treated == h psus_in_h_control = psu_control_eff[control_in_h_mask] psus_in_h_treated = psu_treated_eff[treated_in_h_mask] - psus_in_h = np.unique( - np.concatenate([psus_in_h_control, psus_in_h_treated]) - ) + psus_in_h = np.unique(np.concatenate([psus_in_h_control, psus_in_h_treated])) n_h = len(psus_in_h) if n_h < 2: # Singleton-stratum handling. R12 P1 fix: distinguish @@ -2564,9 +2560,7 @@ def _jackknife_se_survey( stratum_has_undefined_replicate = True undefined_replicate_stratum = h undefined_replicate_psu = j - undefined_replicate_reason = ( - "PSU contains no units in either arm" - ) + undefined_replicate_reason = "PSU contains no units in either arm" break # All treated removed → LOO yields an undefined SDID @@ -2605,9 +2599,7 @@ def _jackknife_se_survey( stratum_has_undefined_replicate = True undefined_replicate_stratum = h undefined_replicate_psu = j - undefined_replicate_reason = ( - "kept treated survey mass is zero" - ) + undefined_replicate_reason = "kept treated survey mass is zero" break Y_pre_t_mean = np.average( Y_pre_treated[:, treated_kept_mask], @@ -2633,18 +2625,14 @@ def _jackknife_se_survey( stratum_has_undefined_replicate = True undefined_replicate_stratum = h undefined_replicate_psu = j - undefined_replicate_reason = ( - "SDID estimator raised on the LOO panel" - ) + undefined_replicate_reason = "SDID estimator raised on the LOO panel" break if not np.isfinite(tau_j): stratum_has_undefined_replicate = True undefined_replicate_stratum = h undefined_replicate_psu = j - undefined_replicate_reason = ( - "SDID estimator returned non-finite τ̂" - ) + undefined_replicate_reason = "SDID estimator returned non-finite τ̂" break tau_loo_h.append(float(tau_j)) @@ -2658,9 +2646,7 @@ def _jackknife_se_survey( if len(tau_loo_h) == n_h: tau_bar_h = np.mean(tau_loo_h) - ss_h = float( - np.sum((np.asarray(tau_loo_h) - tau_bar_h) ** 2) - ) + ss_h = float(np.sum((np.asarray(tau_loo_h) - tau_bar_h) ** 2)) total_variance += (1.0 - f_h) * (n_h - 1) / n_h * ss_h any_stratum_contributed = True tau_loo_all.extend(tau_loo_h) diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index 7409b621..c2b8f909 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -143,6 +143,54 @@ def fit( # type: ignore[override] "the full projection." ) + # Conley + TWFE: Conley meat = S.T @ K @ S survives FWL because it + # depends only on scores X*epsilon. FWL preserves both the + # residualized X and the residuals epsilon, so spatial-HAC on the + # within-transformed design equals spatial-HAC on a full-dummy + # design — UNLIKE hc2/hc2_bm which need the hat matrix. + # However, TWFE auto-clusters at unit by default + # (twfe.py:205-216), and Conley + cluster is deferred to Phase 2 + # (combined product kernel). When the user explicitly passes + # cluster=..., reject early here for a TWFE-specific message; the + # linalg validator's NotImplementedError fires later for non-TWFE + # call paths. + if self.vcov_type == "conley" and self.cluster is not None: + raise NotImplementedError( + f"TwoWayFixedEffects(cluster={self.cluster!r}, " + "vcov_type='conley') is deferred to Phase 2 (combined " + "product kernel). Drop cluster= for cross-sectional " + "Conley; the unit auto-cluster default is also disabled " + "when vcov_type='conley'." + ) + if self.vcov_type == "conley": + if survey_design is not None: + raise NotImplementedError( + "TwoWayFixedEffects(survey_design=..., " + "vcov_type='conley') is deferred to Phase 2+ " + "(Bertanha-Imbens 2014). Drop survey_design= for " + "cross-sectional Conley." + ) + if self.conley_coords is None: + raise ValueError( + "vcov_type='conley' requires conley_coords=(, " + ") tuple of column names in the data." + ) + if self.conley_cutoff_km is None: + raise ValueError( + "vcov_type='conley' requires conley_cutoff_km (positive " "finite bandwidth)." + ) + _twfe_coord_cols = list(self.conley_coords) + if len(_twfe_coord_cols) != 2: + raise ValueError( + f"conley_coords must be a 2-tuple of column names; got " + f"{self.conley_coords!r}." + ) + for _col in _twfe_coord_cols: + if _col not in data.columns: + raise ValueError( + f"conley_coords references column {_col!r} which " f"is not in `data`." + ) + # Check for staggered treatment timing and warn if detected self._check_staggered_treatment(data, treatment, time, unit) @@ -212,6 +260,13 @@ def fit( # type: ignore[override] # Explicit classical + analytical inference: drop the auto-cluster # so the validator doesn't reject ``cluster_ids + classical``. cluster_var = None + elif self.vcov_type == "conley": + # Conley + TWFE: disable the auto-cluster default. Conley + cluster + # is deferred to Phase 2; the user wants spatial-HAC at the unit + # level via the kernel, not cluster-robust at the unit level. The + # explicit-cluster case is rejected upstream; reaching here means + # cluster=None. + cluster_var = None else: cluster_var = unit @@ -298,6 +353,16 @@ def fit( # type: ignore[override] # remapped vcov_type disagrees; the remapped `vcov_type` is the # single source of truth. _fit_vcov_type = self._resolve_effective_vcov_type(survey_cluster_ids) + + # Materialize Conley coords from data (validated above; this is just + # array extraction). NOTE: data passed to LinearRegression is the + # within-transformed matrix, but coords are still in the ORIGINAL + # row order — within-transformation preserves row ordering, so the + # coords align with the demeaned X 1:1. + _twfe_conley_coords = None + if _fit_vcov_type == "conley" and self.conley_coords is not None: + _twfe_conley_coords = data[list(self.conley_coords)].to_numpy(dtype=np.float64) + if self.rank_deficient_action == "error": reg = LinearRegression( include_intercept=False, @@ -308,6 +373,10 @@ def fit( # type: ignore[override] weight_type=survey_weight_type, survey_design=_lr_survey_twfe, vcov_type=_fit_vcov_type, + conley_coords=_twfe_conley_coords, + conley_cutoff_km=self.conley_cutoff_km, + conley_metric=self.conley_metric, + conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=df_adjustment) else: # Suppress generic warning, TWFE provides context-specific messages below @@ -324,6 +393,10 @@ def fit( # type: ignore[override] weight_type=survey_weight_type, survey_design=_lr_survey_twfe, vcov_type=_fit_vcov_type, + conley_coords=_twfe_conley_coords, + conley_cutoff_km=self.conley_cutoff_km, + conley_metric=self.conley_metric, + conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=df_adjustment) coefficients = reg.coefficients_ @@ -492,6 +565,8 @@ def _refit_twfe(w_r): # remapped hc1 under the legacy alias path, not self.vcov_type. vcov_type=_fit_vcov_type, cluster_name=_twfe_cluster_label, + conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, + conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self.is_fitted_ = True diff --git a/docs/doc-deps.yaml b/docs/doc-deps.yaml index c325f5f9..61527729 100644 --- a/docs/doc-deps.yaml +++ b/docs/doc-deps.yaml @@ -741,6 +741,15 @@ sources: - path: docs/api/utils.rst type: api_reference + diff_diff/conley.py: + drift_risk: medium + docs: + - path: docs/methodology/REGISTRY.md + section: "ConleySpatialHAC" + type: methodology + - path: benchmarks/R/README.md + type: methodology + diff_diff/utils.py: drift_risk: medium docs: diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 076632d0..c8d124df 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -2916,6 +2916,80 @@ should be a deliberate user choice. --- +## ConleySpatialHAC + +**Primary source:** Conley, T. G. (1999). GMM Estimation with Cross-Sectional Dependence. *Journal of Econometrics* 92(1), 1-45. DOI: 10.1016/S0304-4076(98)00084-0 + +**Secondary sources:** +- Andrews, D. W. K. (1991). Heteroskedasticity and autocorrelation consistent covariance matrix estimation. *Econometrica* 59(3), 817-858. +- Düsterhöft, C. (2021). conleyreg: Estimations using Conley Standard Errors. CRAN R package, https://github.com/cdueben/conleyreg. Our parity benchmark target. +- Colella, F., Lalive, R., Sakalli, S. O., & Thoenig, M. (2019). Inference with Arbitrary Clustering. IZA DP No. 12584. Stata `acreg` reference implementation; cited as the parallel canonical implementation in the Stata ecosystem (not parity-tested here). + +**Scope:** Cross-sectional spatial heteroskedasticity-and-autocorrelation-consistent +standard errors for OLS / TWFE when residuals are spatially correlated. Extends +White (1980) HC0 by allowing pairwise correlation that decays with geographic +distance. Available on `DifferenceInDifferences`, `TwoWayFixedEffects`, +`MultiPeriodDiD` via `vcov_type="conley"` plus `conley_coords`, +`conley_cutoff_km`, `conley_metric`, `conley_kernel`. `SyntheticDiD` is +explicitly excluded (it uses bootstrap/jackknife/placebo variance, not the +analytical sandwich); `SyntheticDiD(vcov_type="conley")` raises `TypeError`. +Phase 1: cross-sectional only; Phase 2 will add the time dimension +(Driscoll-Kraay) and a sparse k-d-tree fast path. + +**Variance estimator (Conley 1999 Eq 4.2 in pairwise-distance form, OLS specialization):** + + Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij / h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1} + +where `d_ij` is the geographic distance, `h` is the user-supplied bandwidth +(`conley_cutoff_km`), and `K(·)` is the kernel. The `i = j` diagonal contributes +the standard White HC0 term `X_i ε_i² X_i'`. + +**Kernel functions:** +- `conley_kernel="bartlett"` (default): `K(u) = max(0, 1 - |u|)`. Conley 1999 Eq 3.14, Andrews 1991. PSD-guaranteed (non-negative spectral window). +- `conley_kernel="uniform"`: `K(u) = 1{|u| ≤ 1}`. Spectral window negative in regions (Conley 1999 footnote 11) — meat not guaranteed PSD; implementation emits `UserWarning` if any meat eigenvalue < `-1e-12`. + +**Distance metrics:** +- `conley_metric="haversine"` (default): great-circle in km using Earth's mean radius (6371.01 km, matching R `conleyreg`). Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. +- `conley_metric="euclidean"`: Euclidean from projected coords. Skips lat/lon range checks (user owns the projection's units). +- `conley_metric=callable(coords1, coords2) -> n×n array`: custom distance for non-geographic networks. + +**Note:** No default bandwidth. Conley 1999 does not propose a plug-in selector; +the empirical example (Section 5) uses a sensitivity grid. Implementation +requires `conley_cutoff_km` to be supplied; `None` raises `ValueError` (per the +project's no-silent-failures rule). Practitioners should rerun on a coarse cutoff +grid (e.g., 50, 100, 200, 500 km) and report the SE range, mirroring Conley's +Section 5 robustness check. + +**Note (FWL composability):** Unlike `vcov_type="hc2"` and `vcov_type="hc2_bm"`, +which depend on the full hat matrix and therefore reject TWFE within-transformation, +Conley's meat depends only on scores `X_i·ε_i`. FWL preserves both the residualized +`X` and the residuals `ε`, so the spatial-HAC sandwich computed on the +within-transformed design equals the sandwich on the full-dummy design. +`TwoWayFixedEffects(vcov_type="conley", ...)` is therefore supported. + +**Note (R conleyreg parity):** diff-diff's Conley implementation matches R +`conleyreg` (Düsterhöft 2021, CRAN v0.1.9) to ≤ 1e-6 on three benchmark +fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`). Earth radius +constant is 6371.01 km (mean radius), matching +`conleyreg::haversine_dist`. Regeneration: +`cd benchmarks/R && Rscript generate_conley_golden.R`. + +**Edge cases / restrictions:** +- `vcov_type="conley"` + `cluster=` → `NotImplementedError` (combined kernel deferred to Phase 2) +- `vcov_type="conley"` + `weights=` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up) +- `vcov_type="conley"` + `absorb=` → `NotImplementedError` (only TWFE's two-FE within is supported in Phase 1; arbitrary `absorb` dimensions are deferred) +- `SyntheticDiD(vcov_type="conley")` → `TypeError` (SyntheticDiD uses bootstrap/jackknife/placebo variance, not the analytical sandwich; tracked in TODO.md) +- `n > 20_000`: emits `UserWarning` about O(n²) distance-matrix memory +- `conley_cutoff_km ≤ 0`, `nan`, or `inf`: rejected with `ValueError`. The HC0 reduction at h→0 is documented but not the sanctioned path; users should pass `vcov_type="hc1"` +- Identical coordinates (`d_ij = 0` for `i ≠ j`): `K(0) = 1`, contributing the full HC0 weight per Conley 1999 page 19. Documented behavior; no warning + +**Reference implementations:** +- R: `conleyreg::conleyreg(...)` (Düsterhöft 2021, CRAN v0.1.9) — **parity benchmark for diff-diff** +- Stata: `acreg dep indeps, latitude(lat) longitude(lon) spatial dist(km) bartlett` (Colella et al. 2019) — academic spec source for the cluster-flexible variance, not parity-tested here +- MATLAB: `ols_spatial_HAC.m` (Hsiang 2010) + +--- + ## Survey Data Support Survey-weighted estimation allows correct population-level inference from data diff --git a/docs/methodology/papers/butts-2021-review.md b/docs/methodology/papers/butts-2021-review.md new file mode 100644 index 00000000..2850fff9 --- /dev/null +++ b/docs/methodology/papers/butts-2021-review.md @@ -0,0 +1,301 @@ +# Paper Review: Difference-in-Differences with Spatial Spillovers + +**Authors:** Kyle Butts +**Citation:** Butts, K. (2023). Difference-in-Differences with Spatial Spillovers. arXiv:2105.03737v3 (originally posted 2021; v3 dated June 13, 2023). https://arxiv.org/abs/2105.03737 +**PDF reviewed:** papers/2105.03737v3.pdf (35 pages) +**Review date:** 2026-05-09 + +--- + +## CRITICAL FINDING — Paper Does Not Use Kernel-Weighted Exposure + +The Phase 3 brief describes the canonical estimator as a kernel-weighted exposure regressor `E_it = Sum_{j != i} w_ij * D_jt` added to the design matrix. **Butts (2021) v3 does NOT actually propose this form.** Butts proposes a **ring / distance-bin indicator** estimator and references kernel/spatial-decay exposure only in a footnote as one example of how a generic exposure mapping `h_i(D-vector)` could be defined. + +The paper's headline estimators are: +- Equation (5): single "near treatment" ring indicator `S_i` interacted with `(1 - D_i)`. +- Equation (6): multiple concentric ring indicators `Ring_{ij}` interacted with `(1 - D_it)`. +- Equation (8): TVA application uses 4 distance bins `{(0,50], (50,100], (100,150], (150,200]}` miles with ring indicators. + +The text on page 6, footnote 8, says only: *"For example, h_i(D-vector) could be an indicator for being within a certain distance of a treated unit. Additionally, exposure could be defined as a spatial decay function where exposure is declining in the distance of treated units."* This is a generic remark about possible exposure-mapping families inside the potential-outcomes framework (Section 2), not a proposed estimator. The paper's identification result (Proposition 2.3) and all empirical specifications use **ring/indicator** rather than continuous kernel exposure. + +**Implications for diff-diff Phase 3:** +- If Phase 3 implements the ring-indicator estimator, Butts (2021) IS the canonical source. The decomposition `tau_total - tau_spill(0)` (Equations (1)-(2)), Proposition 2.3, and the rings construction (Equation (6)) are exactly what to cite. +- If Phase 3 implements a continuous kernel-weighted `E_it = Sum_{j != i} w_ij * D_jt`, that specification is **NOT** from this paper. It is closer to Clarke (2017) MPRA paper or to design-based / linear-in-means peer-effects literature (Manski 1993, Goldsmith-Pinkham and Imbens 2013) referenced in Butts' Section 1.1. A separate paper review (Clarke 2017) is needed to source the kernel form, OR the diff-diff API should be reframed around ring/distance bins as the primary spec with kernel as an extension. +- The user-facing API design should be reconsidered. `spillover_kernel="exp"|"inverse_distance"|"power"|callable` does not map to anything in Butts (2021). What maps is `spillover_rings=[(0, 50), (50, 100), (100, 150), (150, 200)]` (or a single ring for a one-indicator spec) plus the "far-away" cutoff `d-bar`. +- The `direct_effect` and `spillover_effect` terminology can still be derived from Butts: `tau_total` (direct, from Proposition 2.3) and `tau_spill(s)` for s in {0, 1} or per-ring. Butts uses `tau_total` for treated-unit effects and `gamma_0` for the spillover-on-control coefficient (Equation 5). + +The remainder of this review documents what Butts ACTUALLY proposes (the ring-indicator estimator) so the diff-diff REGISTRY entry for Phase 3 is accurate. + +--- + +## Methodology Registry Entry + +*Formatted to match docs/methodology/REGISTRY.md structure. Heading levels and labels align with existing entries — copy the `## SpilloverDiD` section into the appropriate category in the registry.* + +## SpilloverDiD (ring-indicator) + +**Primary source:** Butts, K. (2023). Difference-in-Differences with Spatial Spillovers. arXiv:2105.03737v3. + +**Scope:** A two-period (Section 2-3) and staggered (Section 5) DiD setting in which treatment is assigned by an administrative boundary but treatment effects spill over onto nearby control units. The estimator separately identifies (i) the **total effect on the treated** `tau_total = E[Y_i1(1, h_i(D-vector)) - Y_i1(0, 0-vector) | D_i = 1]` and (ii) **spillover effects on control units** `tau_spill(0)`, by adding "near-treatment" ring indicators interacted with `(1 - D_it)` to the canonical TWFE regression. Identification is non-parametric in the spillover function — the researcher does not need to specify how spillovers decay over space, only the maximum distance `d-bar` past which spillovers do not occur. + +**Key implementation requirements:** + +*Notation (Section 2, page 6):* +- `D_i` ∈ {0,1}: treatment status; `D_it = D_i * 1{t=1}`. +- `D-vector` ∈ {0,1}^n: full vector of treatment statuses across all units. +- `h_i(D-vector)`: non-negative scalar- or vector-valued **exposure mapping** — generic representation of how unit `i` is affected by spillovers from the other units' treatments. `h_i(0-vector) = 0-vector` by definition. +- Potential outcome: `Y_it(D_i, h_i(D-vector))` — depends on own treatment AND on the exposure mapping. +- `S_i`: indicator for `min_{j: D_j=1} d(i,j) <= d-bar` (i.e., `i` is within `d-bar` miles of the nearest treated unit). Note `D_i = 1` implies `S_i = 1`. +- `Ring_{ij}`: indicator for unit `i` being in the `j`-th distance ring from treatment. + +*Assumption checks / warnings:* +- **Assumption 1 (Random Sampling)**: `{Y_i0, Y_i1}_{i=1..n}` is i.i.d. panel. +- **Assumption 2 (No Anticipation)**: `Y_i0(D, h) = Y_i0(0, 0)` for all D and h. The pre-treatment outcome does not depend on future treatment / exposure. +- **Assumption 3 (Parallel Counterfactual Trends)**: counterfactual trends do NOT depend on `D_i`: + `E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 1] = E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 0]`. + (Note: this is a STRONGER assumption than ordinary parallel trends — it requires parallel trends in the absence of all treatment AND zero exposure, not merely absence of own treatment. Reduces to standard parallel trends when SUTVA holds, since then every unit has zero exposure.) +- **Assumption 5 (Spillovers Are Local)**: there exists `d-bar` such that + (i) `min_{j: D_j=1} d(i,j) > d-bar => h_i(D-vector) = 0-vector` (spillovers vanish past `d-bar`); and + (ii) there exist treated and control units with `min_{j: D_j=1} d(i,j) > d-bar` (i.e., the sample contains far-away units that can serve as a clean control group). +- **Assumption 6 (Total Effect Parallel Trends)**: counterfactual trends do not depend on `D_i` AND `S_i`: + `E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 1] = E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 0, S_i = 0]`. +- **Assumption 7 (Spillover Effect Parallel Trends)**: for `s = 0, 1`, + `E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 1] = E[Y_i1(0, 0-vector) - Y_i0(0, 0-vector) | D_i = 0, S_i = s]`. + Required to estimate `gamma_0` (spillover on control) from Equation (5). STRONGER than Assumption 6. +- **Assumption 8 (Parallel Counterfactual Trends, Staggered, Equation 9)**: for all `i, t`, `Y_it(0, 0-vector) = mu_i + lambda_t + epsilon_it` with `E[epsilon_it] = 0`. Imposes a unit + time additive structure on untreated/unexposed potential outcomes (Section 5). Stronger than Assumption 3. +- **Assumption 9 (No Anticipation, Staggered)**: for all `(i,t)` with `D_it = 0` and `h_i(D-vector_t) = 0-vector`, `Y_it(D, h) = Y_it(0, 0)`. +- Warn that choice of `d-bar` is a researcher decision — Butts notes (page 13) "each value of `d-bar` corresponds to a different effective control group and hence a different parallel trends assumption" — and that data-driven selection under stricter assumptions is in the companion paper Butts (2021b) "Difference-in-Differences with Geocoded Microdata" (the "ring method" paper). +- Warn that using `S_i = 1` close-to-treatment as the control group (which is common in border-RD applied work) magnifies spillover bias, since these units experience the largest spillover effects (Section 4.1 / page 22). + +*Treatment effects definitions (Section 2.1, page 7-8):* + +Switching effect: +``` +tau_{i,switch}(h-vector) := Y_i1(1, h-vector) - Y_i1(0, h-vector) +tau_switch(h-vector) := E[ Y_i1(1, h_i(D-vector)) - Y_i1(0, h_i(D-vector)) | D_i = 1, h_i(D-vector) = h-vector ] +``` +Effect of changing only `i`'s treatment, holding exposure fixed at `h-vector`. Policy-relevant for local policymakers. + +Total effect on the treated (the diff-diff Phase 3 `direct_effect`): +``` +tau_total := E[ Y_i1(1, h_i(D-vector)) - Y_i1(0, 0-vector) | D_i = 1 ] +``` +Effect of going from no-treatment-no-exposure world to enacted treatment vector. Policy-relevant for national policymakers. + +Spillover effect: +``` +tau_{i,spill}(D_i, h_i(D-vector)) := Y_i1(D_i, h_i(D-vector)) - Y_i1(D_i, 0-vector) +tau_spill(D) := E[ Y_i1(D, h_i(D-vector)) - Y_i1(D, 0-vector) | D_i = D ] +``` +Average over all treated/control units regardless of whether they actually experience non-zero exposure. + +Algebraic identity (page 9, unnumbered): +``` +[Y_i1(1, h_i(D-vector)) - Y_i1(0, h_i(D-vector))] [SWITCHING EFFECT] += [Y_i1(1, 0-vector) - Y_i1(0, 0-vector)] [DIRECT EFFECT] ++ [Y_i1(1, h_i(D-vector)) - Y_i1(1, 0-vector)] [SPILLOVER ON TREATED] +- [Y_i1(0, h_i(D-vector)) - Y_i1(0, 0-vector)] [SPILLOVER ON CONTROL] +``` +The first two terms together equal the TOTAL EFFECT ON TREATED. + +*Bias decomposition of the canonical DiD estimand (Proposition 2.1, Equations 1-2):* + +Under Assumptions 1, 2, and 3 (Parallel Counterfactual Trends): +``` +E[Y_i1 - Y_i0 | D_i = 1] - E[Y_i1 - Y_i0 | D_i = 0] (1) + (Difference-in-Differences) += tau_total - tau_spill(0) (2) +``` +The standard DiD estimator is biased for `tau_total` by `-tau_spill(0)` — the average spillover effect onto control units. If spillovers and total effect have the same sign, the canonical DiD is **attenuated**; opposite signs, **inflated**. Butts then estimates the canonical TWFE (Equation 3): +``` +y_it = tau * D_it + mu_i + lambda_t + epsilon_it (3) +``` +and shows `tau-hat` is the sample analog of (1), hence biased for `tau_total` whenever spillovers exist. + +*Identification of the total effect (Proposition 2.3, Equation 4):* + +Under Assumptions 1, 2 (random sampling), Assumption 5 (Spillovers Are Local) and Assumption 6 (Total Effect Parallel Trends): +``` +E[Y_i1 - Y_i0 | D_i = 1] - E[Y_i1 - Y_i0 | D_i = 0, S_i = 0] = tau_total (4) +``` +Conditioning on `S_i = 0` uses only "far-away" control units, which by Assumption 5 have `h_i(D-vector) = 0-vector` and so identify the counterfactual trend cleanly. **Crucially, the researcher does NOT need to know the form of the exposure mapping** — only an indicator for "close to treatment" within distance `d-bar`. + +*Estimator equation — single ring (Equation 5):* + +``` +Y_it = tau * D_it + gamma_0 * (1 - D_it) * S_i + mu_i + lambda_t + epsilon_it (5) +``` +- `tau-hat` consistent for `tau_total` under Assumption 7 (Spillover Effect Parallel Trends). +- `gamma_0-hat` averages spillover effects across **all** units with `S_i = 1` (treated AND untreated), specifically `E[gamma_0-hat] = E[tau_{i,spill}(0) | S_i = 1, D_i = 0]`. This is NOT `tau_spill(0)` — and may be attenuated towards zero if the indicator captures many units with no actual spillover exposure. + +*Estimator equation — multiple rings (Equation 6):* + +``` +y_it = tau * D_it + sum_{j=1..n_rings} (1 - D_it) * Ring_{ij} * delta_j + mu_i + lambda_t + epsilon_it (6) +``` +- Each `delta_j` estimates the average `tau_{i,spill}(0)` for control units inside ring `j`. +- `tau-hat` continues to identify `tau_total` because the rings are collinear with the "single big S_i" indicator. +- Multiple rings better trace the spatial decay function as the number of rings grows and ring widths shrink — semi-parametric (Section 3.2, page 15). +- Bias-variance trade-off: more rings = less bias on the spillover function but smaller cell counts and noisier estimates. Clarke (2017) proposes cross-validation; Butts (2021b) proposes data-driven selection under a stricter parallel-trends assumption (footnote 14, 15). +- Caveat (page 16, end of Section 3): "spillover effects are additive in the number of nearby treated units... summarizing exposure by the distance to the closest treated unit fails to capture important information." If spillovers are additive in nearby treated units, count of treated units within each ring is preferred, but then bias is fully removed only if the exposure mapping is correctly specified — which the paper otherwise does not require. + +*Estimator equation — staggered TWFE / event-study (Section 5, Table 2):* + +Two-stage imputation estimator following Gardner (2021): +1. Estimate `Y_it = mu_i + lambda_t + u_it` on observations with `D_it = 0` AND `S_it = 0` (untreated AND unexposed). Compute residuals `Y-tilde_it := Y_it - mu-hat_i - lambda-hat_t`. +2. Regress `Y-tilde_it` on treatment + spillover dummies. + +Table 2 (page 24) gives the second-stage variables for each estimand: + +| Estimand | Included Variables | +|----------|-------------------| +| Total Effect | `D_it` | +| Total Effect (Event Study) | `D^k_{it}` dummies | +| Spillover Effect on Control | `S_it (1 - D_it)` or `Ring_{it,j} (1 - D_it)` | +| Spillover Effect on Control (Event Study) | `S^k_{it} (1 - D_it)` or `Ring^k_{it,j} (1 - D_it)` | + +where `D^k_{it} := D_i * 1{K_it = k}` and `K_it` is years since treatment turned on. + +**TWFE bias under staggered + spillover (page 22):** TWFE is a weighted sum of 2x2 DiDs (Goodman-Bacon 2018, Sun-Abraham 2020, de Chaisemartin-D'Haultfœuille 2019). Spillover bias enters each 2x2 with the same sign, but Goodman-Bacon weights can be negative — so the SIGN of `tau_spill` no longer determines the sign of the bias on the staggered TWFE. This makes spillover bias under staggered timing harder to sign than in the 2x2 case. + +*Standard errors (Section 3.1, page 13):* + +- **Cluster by unit `i`** to allow for serial correlation across periods. +- **Conley spatial HAC** (Conley 1999): "since assumption 5 is predicated on the fact that nearby places affect one another, we should account for such spatial correlation by allowing for spatial correlations following Conley (1999). More recent work by Ferman (2020) shows that for large-n asymptotics to be used, the structure of spatial correlation must be limited in that errors are assumed to be uncorrelated after a certain cutoff distance. A natural candidate for this cutoff would be `d-bar` used for the creation of `S_i`." +- TVA application (Table 1, page 19) uses Conley SEs with cutoff 200 miles. +- Two-way clustering (unit + time) is NOT explicitly recommended — Butts goes straight from clustering by unit to Conley spatial-HAC. +- For the staggered two-stage estimator, inference accounts for the first-stage estimation following Gardner (2021); implemented in `did2s` R/Stata package (Butts 2021a). + +*Edge cases:* +- **No nearby control units (Assumption 5(ii) fails)**: `tau_total` cannot be identified from Proposition 2.3. Detection: count units with `S_i = 0` and `D_i = 0`; if zero, error. Handling: error or warn; recommend smaller `d-bar` if the user has chosen too large a cutoff. +- **`d-bar` too small** (some spillover-affected units classified as `S_i = 0`): residual bias remains in `tau-hat`. Detection: not detectable from data alone without an explicit decay model; Butts argues bias is small because spillovers decay over distance. Handling: sensitivity analysis across multiple `d-bar` values. +- **`d-bar` too large** (`S_i = 0` units share fewer characteristics with treated units): increases variance and may worsen parallel-trends on the control group. Detection: parallel-pre-trends test on the chosen `S_i = 0` group. Handling: bias-variance trade-off; smaller `d-bar` reduces variance. +- **Single-ring `S_i` indicator covers many unaffected units**: `gamma_0-hat` is attenuated towards zero. Detection: consistent near-zero estimates for the spillover coefficient when the ring is wide. Handling: switch to multiple concentric rings (Equation 6). +- **Spillovers extend past the largest ring**: `tau-hat` from Equation (6) remains biased. Detection: the outermost ring's `delta_j` is statistically different from zero. Handling: extend the outermost ring or signal to user. +- **Additive spillovers in number of treated neighbors**: distance-to-nearest-treated rings under-identify; recommend count-of-treated-in-ring instead, but this re-introduces functional-form dependence (Section 3.2 end / page 16). +- **Staggered timing with negative Goodman-Bacon weights**: ordinary TWFE can flip the sign of the spillover bias; use the two-stage Gardner-style estimator (Section 5). + +*Algorithm (two-period, multiple rings):* +1. Compute distance from every unit `i` to the nearest treated unit: `d_i := min_{j: D_j = 1} d(i, j)`. +2. User supplies `d-bar` (max spillover distance) and a list of inner ring breakpoints `[r_0=0 < r_1 < ... < r_K = d-bar]`. +3. Build `Ring_{ij} := 1{r_{j-1} <= d_i <= r_j}` for `j = 1, ..., K`. Treated units have `d_i = 0` and are excluded from all rings (since the regressor multiplies by `(1 - D_it)`). +4. Augment design matrix with treatment dummy `D_it` and `(1 - D_it) * Ring_{ij}` for `j = 1, ..., K`. +5. Fit TWFE via partial-out / FW projection. +6. Read `tau-hat` (direct/total effect) and `delta_j-hat` (per-ring spillover-on-control effects). +7. Compute Conley spatial-HAC SEs with cutoff `d-bar` (recommended) or larger. +8. Optional pre-trends test: regress pre-period outcome differences on `D_i` and the rings to verify parallel trends on each subgroup. + +*Algorithm (staggered, two-stage following Gardner 2021):* +1. Build `D_it` (treatment indicator) and `S_it` (within `d-bar` of nearest treated unit at time `t`). +2. Subset to `D_it = 0` AND `S_it = 0`. Estimate `Y_it = mu_i + lambda_t + u_it` on this subsample. +3. Compute residuals `Y-tilde_it := Y_it - mu-hat_i - lambda-hat_t` for all observations. +4. Regress `Y-tilde_it` on the second-stage variables in Table 2. +5. Compute SEs accounting for the first-stage estimation (GMM-style; see Gardner 2021). + +**Reference implementation(s):** +- R: companion package `did2s` for the two-stage staggered estimator (Butts 2021a). https://github.com/kylebutts/did2s (cited in references and footnote 22). +- Stata: `did2s` Stata port also referenced. +- Software for the two-period ring estimator (Equations 5-6) is not separately distributed; it can be implemented directly via `lm` / `feols` after constructing ring indicators. + +**Requirements checklist (for diff-diff Phase 3 mapping to Butts 2021):** +- [ ] Distance computation utility: `d(i, j)` between every pair (or at least `min_{j: D_j=1} d(i, j)` for each `i`). +- [ ] Ring-indicator builder taking `(distance_array, ring_breakpoints, d_bar)` and producing `(Ring_{ij})` matrix and `S_i` indicator. +- [ ] TWFE / two-way FE estimator that accepts arbitrary additional regressors interacted with `(1 - D_it)`. +- [ ] Result object exposing `direct_effect = tau_total`, per-ring `spillover_effects[j]`, `d_bar` cutoff used. +- [ ] Conley spatial-HAC SE (Phase 1 deliverable) integrated as the recommended SE option. +- [ ] Pre-trends test on the chosen `S_i = 0` control group. +- [ ] Two-stage staggered variant (deferrable to Phase 3+). + +--- + +## Implementation Notes + +### Data Structure Requirements +- **Distance input**: either a precomputed `d_ij` matrix (NxN, dense or sparse), OR per-unit `(latitude, longitude)` columns from which great-circle distance is computed. The user-facing API should accept either: `spillover_distance="dist_matrix"` (column-name pointing to a precomputed array) OR `spillover_coords=("lat", "lon")` (paired column names with on-the-fly Haversine). +- **Ring breakpoints**: a sorted list of distance bin edges; defaults must be context-dependent (Butts uses 50-mile bins for the TVA application but cautions this is application-specific). +- **`d-bar`**: scalar; defaults should default to `max(ring_breakpoints)`. +- Treatment indicator `D_it` and panel structure unchanged from base TWFE. + +### Computational Considerations +- Construction of `Ring_{ij}` is O(n_treated * n) per period if computed via `min_{j: D_j=1} d(i, j)`. For panel data with time-varying treatment, recompute at each `t`. +- For large `n` (~10^5+), a k-d tree / ball tree on treated-unit coordinates yields `min_{j: D_j=1} d(i, j)` in O(n log n_treated) — Phase 2 sparse k-d-tree fast path (Conley spatial-HAC) can be reused. +- Once ring indicators are built, the augmented TWFE adds at most K + 1 columns (`D`, plus K ring interactions), so the Frisch-Waugh / partial-out workflow is O(n_obs * (K + 1)). Should be a thin wrapper around existing `TwoWayFixedEffects`. + +### Tuning Parameters + +| Parameter | Type | Default | Selection Method | +|-----------|------|---------|-----------------| +| `spillover_d_bar` | float | None (required) | User-supplied. Bias-variance trade-off (Butts pages 13-14); Butts (2021b) gives data-driven CV under stricter assumptions. Default could be the largest `ring_breakpoints` value. | +| `spillover_rings` | list of floats OR int | `[d_bar]` | User-supplied breakpoints, OR an int that auto-builds equal-width rings. Single-ring (Equation 5) by default; multi-ring (Equation 6) when list has 2+ values. | +| `spillover_distance` | str (column name) OR ndarray | None | Either precomputed nearest-treated distance per unit, OR a full distance matrix. | +| `spillover_coords` | tuple (str, str) | None | Pair of column names for on-the-fly Haversine if `spillover_distance` not supplied. | +| `spillover_ring_method` | "nearest" or "count" | "nearest" | "nearest": indicator per nearest-treated ring (Butts default). "count": count of treated units within each ring (re-introduces functional-form dependence; flagged in Section 3.2 end). | +| `vcov` | str | "conley" | Recommended. Cluster-by-unit also supported (Section 3.1). Cutoff defaults to `spillover_d_bar`. | + +### Relation to Existing diff-diff Estimators +- **Builds on** `TwoWayFixedEffects`. The augmented model is just TWFE with extra ring-indicator columns interacted with `(1 - D_it)`. +- **Result object**: should expose `direct_effect = tau_total = tau-hat` (treatment dummy coefficient), `spillover_effects = {ring_label: delta_j-hat}` dict (or a results table), and `d_bar` actually used. Standard errors should attach Conley spatial-HAC by default. +- **Conley SE (Phase 1)** is a hard prerequisite — Butts explicitly recommends it. +- **Sparse k-d-tree (Phase 2)** is reusable for ring-indicator construction. +- **Staggered variant** can be added later as a wrapper around an existing two-stage / imputation estimator (e.g., `ImputationDiD` in the diff-diff catalog corresponds to Borusyak-Jaravel-Spiess 2021 / Gardner 2021); the interaction with spillover indicators is described in Table 2. + +### Relation to Other Reviews in this Initiative +- **vs Clarke 2017 (clarke-2017-review.md, if produced):** Butts cites Clarke (2017) in his Section 1.1 as proposing a similar ring-method spec and referenced for the cross-validation technique for choosing the number of rings (footnote 15). Butts' contribution is the formal non-parametric **identification result** (Proposition 2.3) tying rings to a well-defined target estimand `tau_total`, plus the staggered extension (Section 5). If Phase 3 wants to support continuous-kernel exposure, that is closer to Clarke's stepwise `R^k(i,t) = 1{(k-1) h <= X_i < k h}` indicator construction OR to the spatial-decay-function suggestion in Butts' footnote 8 — but neither paper provides a full identification result for a continuous kernel `E_it = sum_j w_ij D_jt`. +- **vs Butts 2023 (butts-2023-review.md, the JUE Insight on geocoded microdata):** the JUE paper (Butts 2021b in this references list) is **complementary** to the present paper. The 2021b paper is the "ring method" paper proper — it adds a data-driven CV procedure for choosing the number / width of rings under a stricter parallel-trends assumption, in the single-treatment-point setting with continuous distance. The present paper (Butts 2021/2023, arXiv:2105.03737) is the **identification + multi-treated-unit + staggered** paper. They are cited together in published applications and in the `did2s` package documentation. Phase 3's REGISTRY entry should cite both. +- **vs Conley 1999 (conley-1999-review.md):** Butts (Section 3.1 page 13) explicitly recommends Conley spatial-HAC SEs in conjunction with the augmented TWFE, with cutoff equal to `d-bar`. He cites Ferman (2020) for the requirement that errors be uncorrelated past a finite cutoff for large-n asymptotics. The diff-diff Phase 1 Conley implementation should be the default `vcov` for SpilloverDiD. +- **vs Borusyak-Hull 2023 (design-based formula-instrument critique):** Butts does NOT directly anticipate the Borusyak-Hull (2023) critique (Borusyak-Hull is not in the references). However, Butts' framework is **model-based** (parallel trends on potential outcomes) rather than **design-based** (assumptions on treatment-assignment mechanism). The closest discussion is page 5: *"Those papers' identification results rely on design-based assumptions around the treatment-assignment mechanism, while this paper relies on model-based assumptions, based on a modified parallel-trends assumption, for identification in non-experimental settings."* So Butts is aware of the design-based / model-based dichotomy and positions his work in the model-based camp; whether the Borusyak-Hull critique applies to the ring-indicator spec is a separate question that the diff-diff REGISTRY entry should flag. + +### Empirical illustration — Tennessee Valley Authority (Section 4) +- Application: revisits Kline and Moretti (2014) on the long-run effects of the 1934-WWII TVA federal investment program on agriculture and manufacturing employment. +- Data: county-level decadal outcomes 1940-2000 (long run, Panel A) and 1940-1960 (short run, Panel B). 5/4 column specification. +- Specification (Equation 8): first-differenced two-period DiD with rings at `{(0, 50], (50, 100], (100, 150], (150, 200]}` miles from the TVA boundary, X covariates from Kline and Moretti (1940 controls). +- SEs: Conley with 200-mile cutoff (Conley 1999). +- Findings (Table 1, Panel A long run): + - Agriculture employment: standard DiD = -5.1% per decade. With spillovers controlled, total effect = -7.4% per decade. **Spillover bias = +2.3 pp; canonical DiD UNDERESTIMATED the agricultural decline by ~40%.** Spillover-on-control coefficients are negative (-3.7%, -1.6%, -3.0%, -1.6% per decade across the 4 distance bins) — consistent with farm-worker out-migration to higher-paying TVA manufacturing jobs. + - Manufacturing employment: standard DiD = +5.6% per decade. With spillovers controlled, total effect = +3.5% per decade. **Spillover bias = +2.1 pp; canonical DiD OVERESTIMATED the manufacturing gain by ~40%.** Spillover coefficients are negative (-2.0%, -2.5%, -3.3%, -3.0%), consistent with "urban shadow" effects whereby firms relocate INTO the TVA from neighboring areas. +- Interpretation (page 21): "the long-run spillovers cause the original estimates to be about 40 percent too small for agriculture employment and 40 percent too large for manufacturing employment." +- Useful design choice for diff-diff T22 tutorial DGP: a TVA-style bias-correction percentage of ~40% is large enough to be visible without being implausible. + +### Empirical illustration — Opportunity Zones (Appendix B) +- Application: revisits Chen, Glaeser, and Wessel (2021) on the 2017 federal Opportunity Zone program's effect on home prices. +- Two competing identification strategies in the literature: + - "Not-selected" (eligible-but-rejected as control, Equation B.1): Treat x Post = 0.30%* (s.e. 0.17%). + - "Neighboring" (geographically nearby Census tracts as control, Equation B.2): Treat x Post = 0.65%*** (s.e. 0.25%). +- Augmented spec (Equation B.3) adds Within-1/2-mi and Within-1-mi indicators, recovering 0.18% (s.e. 0.17%) for treatment + -1.06%*** for <1/2 mi spillover and -0.74%*** for 1/2-1 mi spillover. +- Reconciles the two literatures: the neighboring spec is biased upward by negative spillovers on adjacent tracts. Page 31 footnote 25: upper-bound effect size lowered from ~1.15% to ~0.65%. + +### Empirical illustration — Community Health Centers (Appendix C) +- Application: revisits Bailey and Goodman-Bacon (2015) on 1965-1974 federal community health center mortality effects. +- Uses the staggered two-stage estimator from Section 5 with `did2s`. +- Spillover indicator: within 25 miles of a treated county, time-varying. +- Result (Figure C1): "no spillover effect is estimated to be significantly different from zero which suggests that the effects of community health centers are very local. Since there are near zero spillover effects, the total effect estimates marked in Figure C1 as diamonds maintain the same shape as the author's original estimates with estimates between 15-30 fewer deaths per 100,000 persons" (page 34). +- Useful for the diff-diff tutorial as a NEGATIVE example (spillover NOT contaminating original estimate) — completes the asymmetry between the TVA case (large bias) and the CHC case (no bias). + +### Critique / limitations Butts acknowledges +- "A limitation of this research is in deciding how wide and how many rings to include in estimation. Concurrent work by Butts (2021b) discusses data-driven ring selection under a more stringent parallel trends assumption that does not readily apply in the context of large geographic units such as counties." (page 25, Discussion section). +- Stronger parallel-trends assumption: Assumption 3 / 6 / 7 require parallel trends not just in absence of own treatment but in absence of all treatment AND zero exposure (page 7) — this is fundamentally untestable without stronger structure. +- Trade-off between `d-bar` choice and quality of the `S_i = 0` control group (page 12-14): a wider `d-bar` reduces spillover-bias but may worsen parallel trends on the remaining control group ("each value of `d-bar` corresponds to a different effective control group and hence a different parallel trends assumption"). +- Identification for the "switching effect" `tau_switch(h-vector)` (Proposition 2.2, page 11) requires either parametrization of the spillover function or constraints on heterogeneity — Butts argues this is far harder than identifying `tau_total`. +- Counts-in-ring vs nearest-treated-ring: counts re-introduce functional-form dependence (page 16, end of Section 3); the no-functional-form claim of the paper depends on using nearest-treated rings. + +--- + +## Gaps and Uncertainties + +- **No continuous-kernel exposure regressor in the paper.** The Phase 3 brief assumes `E_it = sum_{j != i} w_ij D_jt` with a kernel `w_ij = K(d_ij / h)` is the canonical Butts spec. It is NOT — the paper uses ring/distance-bin indicators throughout. Footnote 8 (page 6) is the only reference to a "spatial decay function" exposure mapping, and it is illustrative of the abstract framework, not a proposed estimator. If Phase 3 wants the kernel form, the source must be either (a) Clarke (2017) MPRA, (b) the design-based / linear-in-means peer-effects literature (Manski 1993, Goldsmith-Pinkham and Imbens 2013), or (c) the Phase 3 plan should fold the kernel form into a future-work section while implementing the ring spec from Butts 2021. + +- **Bandwidth selection guidance is sparse.** Butts (2021/2023) does not give a default `d-bar` or default ring breakpoints — these are contextual. He defers data-driven selection to Butts (2021b) under stronger parallel-trends. For Phase 3, the diff-diff API will need explicit user-supplied defaults (e.g., quantiles of the distance distribution, application examples). + +- **Counts-in-ring vs nearest-treated-ring is under-specified.** Butts mentions both forms in passing but the algorithmic prescription is for the nearest-treated case. The Phase 3 API should either (i) support only nearest-treated rings and document the limitation, or (ii) support both and warn that counts re-impose functional-form constraints. + +- **Equation (8) is two-period first-differenced**, not a panel TWFE. Phase 3 may want to support both: (a) panel + time FEs + ring indicators (Equation 6), and (b) two-period first-differenced + ring indicators (Equation 8). Butts uses (b) for the TVA application but states (a) is equivalent. + +- **Ring construction across panel periods**: ring membership is computed at the unit-level in the two-period spec (Section 3) but at the unit-period level in the staggered spec (Section 5). The diff-diff implementation will need a clear convention for "is unit `i` within `d-bar` of a unit treated by time `t`?" vs "is unit `i` within `d-bar` of a unit ever treated?". Butts uses the time-varying definition for the staggered case (Table 2 caption / page 24). + +- **No published code for the two-period ring estimator.** `did2s` covers the staggered two-stage estimator only. The two-period ring spec (Equations 5-6) is shown in worked applications but not packaged as a standalone routine — Phase 3 will need to implement it from scratch. + +- **Borusyak-Hull (2023) connection**: not cited in this paper. The diff-diff REGISTRY caveats section should add a note on whether the ring-indicator approach inherits the formula-instrument / exposure-mapping concerns of Borusyak-Hull when researchers parametrize the rings or move to count-of-treated-neighbors. + +- **Pre-trends testing protocol**: Butts does not give an explicit pre-trends test for the augmented spec, but the standard event-study extension is straightforward — replace `D_it` and `(1 - D_it) Ring_{it,j}` by their event-time interactions and inspect pre-period leads (analog of Section 5 Table 2 staggered approach). Worth flagging in the REGISTRY entry as the standard diagnostic. + +- **Page-reference precision check.** Equation numbers preserved verbatim from PDF: (1)-(2) Proposition 2.1 page 9, (3) canonical TWFE page 9, (4) Proposition 2.3 page 12, (5) single-ring estimator page 14, (6) multi-ring estimator page 15, (7) Kline-Moretti baseline page 17, (8) TVA augmented spec page 19, (9) staggered parallel-trends page 23, (B.1)-(B.3) Opportunity Zones pages 31-32, (C.1) CHC event study page 32. Theorem / proposition numbers: Proposition 2.1, 2.2, 2.3 (no Theorem labels in this paper). Assumptions: 1-9 sequential. diff --git a/docs/methodology/papers/butts-2023-review.md b/docs/methodology/papers/butts-2023-review.md new file mode 100644 index 00000000..69cdbc30 --- /dev/null +++ b/docs/methodology/papers/butts-2023-review.md @@ -0,0 +1,226 @@ +# Paper Review: JUE Insight: Difference-in-Differences with Geocoded Microdata + +**Authors:** Kyle Butts +**Citation:** Butts, K. (2023). JUE Insight: Difference-in-Differences with Geocoded Microdata. *Journal of Urban Economics*, 133, 103493. DOI: 10.1016/j.jue.2022.103493 +**PDF reviewed:** papers/1-s2.0-S0094119022000705-main.pdf +**Review date:** 2026-05-09 + +--- + +## Methodology Registry Entry + +*Formatted to match docs/methodology/REGISTRY.md structure.* + +## SpilloverDiD-GeocodedMicrodata (Ring Method + Nonparametric Treatment-Effect Curve) + +**Primary source:** Butts, K. (2023). JUE Insight: Difference-in-Differences with Geocoded Microdata. *Journal of Urban Economics*, 133, 103493. + +**Scope:** Spatially-targeted DiD with geocoded microdata. Treatment occurs at a *point* in space (e.g. a foreclosed home, a new transit stop, a sex-offender residence) and the researcher observes panel or cross-sectional outcomes at neighboring locations indexed by their distance `Dist_i = d(theta_i, theta-bar)`. The paper formalizes the "ring method" used by applied work (Currie et al. 2015; Linden and Rockoff 2008; Gerardi et al. 2015; Campbell et al. 2011) that compares an inner "treated" ring to an outer "control" ring, derives the assumptions under which it identifies the average treatment effect on the treated, exposes the bias when these assumptions fail (Fig. 2 panels b-d), and proposes a nonparametric partitioning-based-least-squares estimator of the entire treatment-effect curve `tau(Dist_i)` based on Cattaneo et al. (2019a, 2019b). Paper is 8 pages and treats the no-staggered-timing single-shock case. + +**Note (scope vs. diff-diff Phase 3 plan):** This paper does NOT use the kernel-weighted-exposure regressor `E_it = sum_{j != i} w_ij D_jt` of the broader spillover-DiD literature. The estimand is `tau(Dist_i)`, a function of distance from the *one* treatment point, identified ring-by-ring under "Local Parallel Trends" (Assumption 2). Diff-diff's Phase 3 kernel choices (`exp`, `inverse_distance`, `adjacency`, `power`) correspond to this paper's "ring" indicator (`adjacency` with one cutoff, or a partition of cutoffs); the nonparametric estimator is conceptually a binsreg-style flexible ring kernel rather than a continuous-decay weighting. The "indirect / spillover ATT" decomposition language we plan to expose belongs to Butts (2021) "Difference-in-differences estimation with spatial spillovers" (arXiv:2105.03737, cited in this paper's references) - NOT this JUE Insight paper. See "Gaps and Uncertainties" below. + +**Key implementation requirements:** + +*Assumption checks / warnings:* +- **Assumption 1 (Random Sampling):** observed data `{Y_{i,1}, Y_{i,0}, Dist_i}` is i.i.d. (paper p. 3). +- **Assumption 2 (Local Parallel Trends, p. 3):** for a maximum distance `d-bar`, `lambda(d) = lambda(d')` for all positive `d, d' <= d-bar`. That is, in the absence of treatment, outcomes evolve identically at every distance from treatment within `d-bar`. Stronger than the standard parallel-trends assumption (which only requires equality between treated and control rings *on average*). +- **Assumption 3 (Average Parallel Trends, p. 3):** `E[lambda_d | 0 <= d <= d_t] = E[lambda_d | d_t < d <= d_c]`. Holds *between* the treated and control rings on average. Weaker than Assumption 2. +- **Assumption 4 (Correct d_t, p. 3):** the chosen treatment-ring outer-edge `d_t` satisfies (i) `tau(d) > 0` for all `d <= d_t` and `tau(d) = 0` for all `d > d_t`, AND (ii) `F(d_c) - F(d_t) > 0` (positive density of control units beyond `d_t`). +- **Assumption 5 (d_t is within d_c, p. 4):** there exists `d_t` with `0 < d_t < d_c` such that Assumption 4 holds AND `F(d_t) - F(d_t) > 0` (positive mass between any candidate `d_t` and `d_c`). +- Failure modes that MUST be warned about (Fig. 2 / Proposition 1 part (ii)): + - **Treated ring too wide:** `d_t > d_t-true` -> control units inside `d_t` average a zero treatment effect into the "treated" mean, biasing `tau-hat` *toward zero* (attenuation). Fig. 2(b). + - **Treated ring too narrow:** `d_t < d_t-true` -> the "control" ring `d_c \ d_t` contains units that ARE treated, so the counterfactual trend `lambda` is contaminated upward by treatment effects, biasing `tau-hat` *upward* in absolute magnitude. Fig. 2(c). + - **Robustness check via different rings:** Fig. 2(d) shows wider-ring and narrower-ring "robustness checks" can BOTH return the same biased estimate as the original mis-specified spec. Wider rings are NOT a robustness check. +- Warn when the user supplies only one ring (single `d_t`, single `d_c`) without justification: paper recommends nonparametric estimation when `d_t` is not known a priori from theory. + +*Estimator equation (Equation 1 - underlying outcome model, p. 3):* + + Y_it = mu_i + tau_i * 1{t=1} + lambda_i * 1{t=1} + u_it + +where: +- `mu_i` is unit-specific time-invariant fixed effect. +- `tau_i = tau(Dist_i) + tilde-tau_i` is unit `i`'s treatment effect, with `tau(d)` the systematic component varying with distance and `tilde-tau_i = tau_i - tau(Dist_i)` an idiosyncratic deviation. +- `lambda_i = lambda(Dist_i) + tilde-lambda_i` is unit `i`'s common-trend component, also split into a distance-dependent systematic part and an idiosyncratic deviation. +- `u_it` is an idiosyncratic error term. + +*Equation 2 (rewritten model, p. 3):* + + Y_it = mu_i + tau(Dist_i) * 1{t=1} + lambda(Dist_i) * 1{t=1} + epsilon_it + +where `epsilon = u_it + tilde-tau_i + tilde-lambda_i` is uncorrelated with `Dist_i` by construction. + +*Ring-method estimator (Equation 3, p. 3):* + +For a chosen treated cutoff `d_t` and control outer-edge `d_c`, define `D_i = {i : 0 <= Dist_i <= d_t}` and `D_c = {i : d_t < Dist_i <= d_c}`. On the subsample `D = D_i union D_c`, estimate: + + Delta Y_it = beta_0 + beta_1 * 1_{i in D_i} + u_it (3) + +`beta_1-hat` is the difference-in-differences estimator with expectation: + + E[beta_1-hat] = E[Delta Y_it | D_i] - E[Delta Y_it | D_c] + +*Decomposition of beta_1 (Proposition 1, p. 4):* + +Under model (2), `beta_1-hat` decomposes into: + + E[beta_1-hat] = (E[tau(Dist_i) | D_i] - E[tau(Dist_i) | D_c]) + + (E[lambda(Dist_i) | D_i] - E[lambda(Dist_i) | D_c]) + ^^^ Difference in Treatment Effect ^^^ ^^^ Difference in Trends ^^^ + +- (i) ALWAYS holds (algebraic decomposition). +- (ii) Under Local Parallel Trends (Assumption 2) OR Average Parallel Trends between `D_i` and `D_c` (Assumption 3), the "Difference in Trends" term collapses to zero, leaving: + + E[beta_1-hat] = E[tau(Dist_i) | D_i] - E[tau(Dist_i) | D_c] + +- (iii) If additionally `d_t` satisfies Assumption 4, then `E[tau(Dist_i) | D_c] = 0` and `E[beta_1-hat] = tau-bar` (average treatment effect on the affected). + +*Nonparametric partitioning-based estimator (Section 4, p. 4-5):* + +Following Cattaneo et al. (2019a, 2019b), partition the support `[0, d_c]` into `L` quantile-spaced intervals `D_1, ..., D_L` of `Dist_i`. Per-interval mean: + + bar-Delta-Y_j := (1 / n_j) * sum_{i in D_j} Delta Y_it + +Estimator for `E[Delta Y_it | Dist_i]`: + + bar-Delta-Y_it-hat := sum_{j=1}^{L} 1_{i in D_j} * bar-Delta-Y_j + +This paper uses degree-0 polynomials (constant within each interval), with `n_j ~ n / L`. + +*Treatment-effect-curve estimator under Assumption 5 (Section 4, p. 4):* + +Within the "control" interval `D_L` (the outermost ring), the average is: + + bar-Delta-Y_L ->^p lambda as L -> infinity, n -> infinity + +(under Local Parallel Trends + Assumption 5: the last bin is left-bounded by some `d_t' > d_t-true`, so `tau(Dist) = 0` in `D_L`). + +Per-interval treatment-effect estimator: + + tau-hat_j := bar-Delta-Y_j - bar-Delta-Y_L + +with population limit: + + tau-hat_j ->^p E[tau(Dist) | Dist in D_j] + lambda - lambda + = E[tau(Dist) | Dist in D_j] + +*Proposition 2 (Consistency of Nonparametric Estimator, p. 4):* + +Given units follow model (2) and `d_c` satisfies Local Parallel Trends and Assumption 5, as `n -> infinity` and `L -> infinity`: + + tau-hat = sum_{j=1}^{L} tau-hat_j * 1_{i in D_j} ->^{unif} tau(Dist) + +i.e. uniform convergence to the treatment-effect curve. Proof Appendix A.2 invokes Cattaneo et al. (2019b) for uniform convergence and underlying smoothness conditions. + +*Standard errors (Section 4, p. 5; Footnote 10):* + +- For `bar-Delta-Y_j`, Cattaneo et al. (2019a) provide robust standard errors that account for the additional randomness of *quantile-estimated* bin endpoints. Implemented in Stata/R `binsreg`. +- For `tau-hat_j = bar-Delta-Y_j - bar-Delta-Y_L`: the SE on the *difference of means* across two disjoint intervals is `sqrt(sigma_j^2 + sigma_L^2)`, where each `sigma_j` is the Cattaneo et al. (2019a) `binsreg` SE for the corresponding bin. +- Inference: form `t-stat = tau-hat_j / SE(tau-hat_j)` and use the standard normal distribution. +- **Footnote 10:** "There may be concerned that the standard errors need to adjust for spatial correlation. However, this is not the case under Assumption 2 as this implies the error term is uncorrelated with distance." So the paper does NOT recommend Conley spatial HAC SEs *for this estimator under Assumption 2*. (This is in tension with diff-diff Phase 1 Conley SE guidance for spillover settings - see "Gaps and Uncertainties" below.) + +*Remark 1 (Overall Average Treatment Effect, p. 5):* + +A practitioner may wish to "pool" the significant `tau-hat_j` rings into a single average. But inference on this back-of-envelope average is NOT valid because the number of significant rings is itself a random variable - "model selection makes inference a very difficult problem (Leeb and Ptscher, 2005)". A potential workaround is sample-splitting cross-validation: half the data picks the inner ring, the other half estimates the average effect. + +*Remark 2 (Covariates, p. 5):* + +`binsreg` allows for covariates `X` in the model with valid inference. The "common neighborhood trends" assumption then must hold conditional on `X` (Sant'Anna and Zhao, 2020). + +*Remark 3 (Choosing d_c, p. 5):* + +The method still requires the researcher to specify `d_c` (the outer-edge / sample boundary). Recommendation: use pre-treatment periods (`t = -2, -1`) on a large sample to estimate `tau(Dist)` under the null and choose `d_c` as the largest distance where the estimated curve is approximately flat. Functions as a pre-trends test. + +*Edge cases:* +- **Knife-edge ring choice (Fig. 2a):** when `d_t = d_t-true` exactly, ring estimator is unbiased - in practice this is unlikely without prior theory. +- **`tau(d)` non-monotonic / sign-changing (e.g. negative hyper-local + positive at intermediate distance):** paper p. 4-5 - "the average effect could be near zero across signs". Pooled ring estimate masks heterogeneity. Nonparametric curve is essential here. +- **`tau(d)` exactly cancels `lambda(d)`:** Fig. 4 shows visual pre-trends-like check using `tau-hat_j` for `j` close to `L`. If the bins past the outer `d_t` are all near zero, this is *suggestive* (not conclusive) evidence that Local Parallel Trends holds. +- **No untreated mass at large `d` (`F(d_c) - F(d_t) = 0`):** Assumption 4 part (ii) fails; cannot identify `tau-bar`. +- **Cross-sectional data (Linden-Rockoff application):** identification requires the alternative assumption that the *composition* of homes at a given distance does not change over time. First-differencing replaced by separate before/after nonparametric estimators differenced (Online Appendix). +- **Density of `Dist`:** quantile-spaced bins automatically allocate `n_j ~ n/L`; no special handling needed for sparse-distance regions. + +*Algorithm:* +1. Construct `Dist_i = d(theta_i, theta-bar)` (Euclidean distance from each unit's location to the treatment point). +2. Choose outer-edge `d_c` from theory or via Remark 3 (largest distance where pre-treatment `tau(Dist)` is flat). +3. Restrict sample to `{i : Dist_i <= d_c}`. +4. Compute first differences `Delta Y_i = Y_{i,1} - Y_{i,0}` (cross-sectional case: separate before/after estimators). +5. Choose `L` via Cattaneo et al. (2019a) `binsreg` data-driven optimal-`L` selector (variance-bias trade-off integrated over the quantile distribution). +6. Compute quantile bin edges of `Dist_i` at probabilities `(1/L), (2/L), ..., 1`. +7. Compute per-bin `bar-Delta-Y_j = mean(Delta Y_i | i in D_j)` and per-bin SE via `binsreg` formulas. +8. Set the outermost bin `D_L` as the local "control": `tau-hat_j = bar-Delta-Y_j - bar-Delta-Y_L` for `j = 1, ..., L-1`. +9. SE: `sqrt(sigma_j^2 + sigma_L^2)`. +10. Plot `(D_j, tau-hat_j)` with confidence bands; visual pre-trends check is implicit in bins near `D_L`. + +**Reference implementation(s):** +- Stata/R: `binsreg` (Cattaneo, Crump, Farrell, Feng) - https://nppackages.github.io/binsreg/. Used directly per Section 4. +- R: Butts maintains `did2s` (cited in Phase 3 plan); for the JUE Insight estimator, `binsreg` is the working tool. +- Code/data for the JUE Insight paper: not explicitly cited in the paper text. Butts personal page (https://kylebutts.com/) typically hosts replication code; not verifiable from the PDF alone. + +**Requirements checklist:** +- [ ] Geocoded microdata: each unit has a `(lat, lon)` or projected `(x, y)`. +- [ ] A single treatment point `theta-bar`. (Multiple treatment points = the broader spillover-exposure case in Butts 2021, NOT this paper.) +- [ ] Panel data with two periods (or cross-sectional with unchanging composition). +- [ ] Sufficient density of units across `[0, d_c]` for `binsreg` quantile bins to be well-populated. +- [ ] Either prior theory pinning `d_t` (parametric ring) OR a flat region in pre-treatment `tau(Dist)` (nonparametric ring). + +--- + +## Implementation Notes + +### Data Structure Requirements + +- Inputs: per-unit `(Y_{i,0}, Y_{i,1}, lat_i, lon_i)` plus a treatment point `(lat_T, lon_T)`. Compute `Dist_i = sqrt((lat_i - lat_T)^2 + (lon_i - lon_T)^2)` (or great-circle / projected). The paper uses Euclidean distance on the unit circle in the Monte Carlo (Equation 4) and presumably miles in the application. +- Output: a `tau-hat(Dist)` step function over `L` bins plus the bin endpoints, plus pointwise SE per bin. +- Cross-sectional case: needs unchanging composition of units at each distance over time (Online Appendix). Not generally available in the typical diff-diff panel test fixtures. + +### Computational Considerations + +- The paper's nonparametric estimator is `O(n)` for binning + `O(L)` for bin means + `O(n)` for inference. Total `O(n + L) = O(n)`. Optimal `L*` chosen by `binsreg` is typically `L* ~ n^{1/3}` to `n^{1/5}` depending on smoothness of `tau`. +- Quantile bin construction is `O(n log n)` (sort). +- Compared to the "kernel-weighted exposure" approach in Phase 3 (cost `O(n^2)` for the full pairwise weight matrix `w_ij`), the ring/binsreg approach is much cheaper for large `n` because it never builds a pairwise object. + +### Tuning Parameters + +| Parameter | Type | Default | Selection Method | +|-----------|------|---------|-----------------| +| `d_c` (outer-edge) | float (distance units) | None - REQUIRED | Theory, OR Remark 3 (largest `d` with flat pre-treatment curve). | +| `L` (number of bins) | int | None - data-driven | `binsreg` optimal-`L` selector (Cattaneo et al. 2019a). Typically 5-30. | +| `polynomial_degree` | int | 0 (constant within bin) | Section 4: degree 0 is sufficient under uniform consistency (Footnote 8). Higher degrees reduce bias but increase variance. | +| `kernel` | enum | "uniform" (rings) | Paper does NOT use kernels in the smooth-decay sense. The closest mapping in diff-diff Phase 3 is `adjacency` (uniform indicator inside each ring). Diff-diff's `exp` / `inverse_distance` / `power` choices come from Butts 2021, not this paper. | +| `bandwidth h` | float | n/a (uses bins, not h) | This estimator does NOT have a single bandwidth `h`. The "bandwidth" is the bin width, set automatically by quantile spacing once `L` is chosen. | + +### Relation to Existing diff-diff Estimators + +- **Phase 3 mapping:** the "ring method" with one inner ring + one outer ring is exactly the case `kernel="adjacency"` with two cutoffs `(d_t, d_c)` over a single treatment point. The nonparametric estimator extends this to `L` rings. The paper's strongest contribution is the warning that single-ring spec is only unbiased at the knife-edge `d_t = d_t-true` and that wider/narrower "robustness checks" can replicate the same bias. +- **Direct ATT / spillover ATT decomposition:** the paper's `tau(Dist)` curve is a *direct-effect-as-a-function-of-distance* curve. There is no separate "indirect / spillover ATT" parameter in this paper because the treatment is a *point*, not a discrete set of treated units with neighbors. The Butts-2023 setup is the limit of a Butts-2021 setup with a single treated `j*` and infinite neighbors. The parameter that diff-diff Phase 3 calls "indirect / spillover ATT" is from the Butts (2021) "Difference-in-differences estimation with spatial spillovers" working paper (arXiv:2105.03737), not this JUE Insight. +- **Conley SE (Phase 1):** Footnote 10 explicitly notes that Conley spatial HAC is NOT needed when Local Parallel Trends holds, because the assumption itself implies the error is uncorrelated with distance. Diff-diff users who *suspect* spatial autocorrelation in the residuals (i.e. who do NOT trust Local Parallel Trends fully) should still combine `binsreg` SEs with Conley-style spatial HAC. The two are not mutually exclusive. +- **Tutorial T22:** the paper's empirical illustration (Linden-Rockoff 2008 sex-offender arrival, Fig. 4) is an excellent T22 anchor: + - Single point treatment. + - `tau(d)` is large negative within ~0.1 mi, then noise around zero out to ~0.3 mi. + - The naive "1/10 mi treated, 1/10-1/3 mi control" ring spec yields ~`-7.5%` (Fig. 4a). + - The nonparametric `binsreg` estimator yields ~`-20%` in the very nearest bins (Fig. 4b, panel "Nonparametric Approach"). + - This is the canonical "ring is too wide -> attenuated estimate" lesson. +- **DGP for T22:** Equation 4 of the paper: + + p_{it} = 1 + tau(Dist_i) * 1_{t=1} + beta_Lat * Lat_i * 1_{t=1} + beta_Lon * Lon_i * 1_{t=1} + epsilon_it + + with `Lat_i, Lon_i ~ N(0, 0.036)` (units on the unit circle), `beta_Lat, beta_Lon ~ N(0, 0.036)` determining how the price levels evolve, `lambda ~ N(0, 0.025)` the constant common trend, `epsilon_it ~ N(0, 0.036)` idiosyncratic. Treatment-effect curves used in Monte Carlo (Table 1): + - `tau_1(Dist) = 0.15 * 1_{Dist<0.4}` (constant within ring - favorable to ring method). + - `tau_2(Dist) = (0.5 * (0.8 - Dist)^2) * 1_{Dist<0.8}` (smooth decline to zero). + - `tau_3(Dist) = (-0.15 + 1.2875*Dist - 1.375*Dist^2) * 1_{Dist<0.8}` (negative-then-positive: ring averages near zero despite real heterogeneity). + - `tau_4(Dist) = (0.5 * (0.8 - Dist)^2) * 1_{Dist<0.25}` (very-narrow effect; many unaffected units). +- **Diff-diff coverage of binsreg:** diff-diff currently has no `binsreg` integration. Implementing the full Butts (2023) nonparametric estimator would require either bundling Cattaneo et al.'s `binsreg` SE formulas or providing a simplified equal-bin-spacing approximation with bootstrap SEs. + +--- + +## Gaps and Uncertainties + +- **This paper does NOT provide the exposure regressor formulation.** Diff-diff Phase 3's plan to add `E_it = sum_{j != i} w_ij * D_jt` with kernel choices (`exp`, `inverse_distance`, `adjacency`, `power`) traces to **Butts (2021) "Difference-in-differences estimation with spatial spillovers" (arXiv:2105.03737)**, which is cited in this JUE Insight's references but is a SEPARATE paper. Phase 3 should also pull and review Butts (2021) for the direct-vs-indirect ATT decomposition we plan to expose. The JUE Insight version of the decomposition is "treatment effect as a function of distance from a single point" - not "direct ATT vs spillover ATT" in the multi-treated-unit sense. +- **Bandwidth `h` selection:** the paper does NOT use a kernel bandwidth `h` (the "bandwidth" mentioned in Fig. 3 is for the *graphical visualization* via Local Polynomial Kernel Density, not for the Section-4 estimator). Diff-diff Phase 3 needs separate guidance on selecting `h` for the smooth-kernel weights `w_ij = exp(-d_ij/h)` etc.; that guidance is NOT in this paper. +- **Identification with multiple treatment points:** the paper assumes ONE treatment point `theta-bar` (Section 3, paragraph 1: "Treatment occurs at a location theta-bar"). If multiple points exist, distances `Dist_i` to the *nearest* treated point cannot resolve compound exposure; the estimand becomes ambiguous. Phase 3's target case (multiple treated units with overlapping neighborhoods) is OUT OF SCOPE for this JUE Insight. +- **Conley SE recommendation:** Footnote 10 says spatial HAC is unnecessary under Assumption 2. But Phase 1 of the spillover-Conley initiative is *adding* Conley SE as a robustness option. The two are not contradictory: Conley SE is for the case where the user is *not certain* that the error term is distance-uncorrelated. We should document Footnote 10 as the "default off" rationale: under strict Local Parallel Trends, classic SEs suffice; users who suspect residual spatial correlation can opt into Conley. +- **No formal Hausman-style test for `d_t`:** the paper proposes a *visual* pre-trends-style check via Fig. 4(b) (bins past the true `d_t-true` should be flat near zero). There is no formal test statistic, p-value, or critical value. Diff-diff could expose a "flat-tail" diagnostic that bootstraps the joint hypothesis "`tau-hat_j = 0` for `j` in the outer K bins" but this would extrapolate beyond the paper. +- **No staggered treatment timing:** the paper is single-shock, two-period. Combining with multi-period staggered designs (de Chaisemartin-D'Haultfœuille / Callaway-Sant'Anna / Sun-Abraham) is not addressed. Phase 3 must decide whether to support `binsreg` per cohort, or per (cohort, post-period) cell, or use a single pooled cross-section. +- **Reference implementation:** the paper does not cite a specific GitHub repo or replication archive in the main text. JUE Insight policy generally requires data + code on JUE's online supplement; the citation lists DOI 10.1016/j.jue.2022.103493 and the paper says supplementary material is available "in the online version". Diff-diff implementation should not rely on Butts' code being available; the equations in Section 3-4 plus `binsreg` documentation are sufficient. +- **`binsreg` Python equivalent:** as of this paper's publication, there was no first-class Python implementation of Cattaneo et al. (2019a) `binsreg`. Diff-diff's options are (a) call the R/Stata `binsreg` via a subprocess (heavy dependency), (b) re-implement the bin-mean + per-bin SE manually (loses Cattaneo et al.'s data-driven `L*` and quantile-randomness adjustment), or (c) defer to Butts (2021) exposure-regressor formulation as Phase 3's primary entry point and mark the binsreg-style nonparametric ring method as a "Phase 3.5" follow-up. +- **Density assumption:** unlike de Chaisemartin et al. 2026's `f_{D_2}(0) > 0` assumption, this paper does not formalize a positive-density-at-boundary requirement on `f_{Dist}(0)`. In practice, very few units near `Dist = 0` will inflate the SE on `tau-hat_1` but should not bias the estimate. +- **Cross-sectional support:** the Linden-Rockoff application uses cross-sectional data, but the paper relegates the alternative identification argument (composition unchanged over time, separate-then-differenced nonparametric estimators) to the Online Appendix not visible in the main 8-page PDF. Diff-diff should treat panel as primary; cross-sectional as a follow-up. diff --git a/docs/methodology/papers/clarke-2017-review.md b/docs/methodology/papers/clarke-2017-review.md new file mode 100644 index 00000000..a0db69c5 --- /dev/null +++ b/docs/methodology/papers/clarke-2017-review.md @@ -0,0 +1,282 @@ +# Paper Review: Estimating Difference-in-Differences in the Presence of Spillovers + +**Authors:** Damian Clarke +**Citation:** Clarke, D. (2017). Estimating Difference-in-Differences in the Presence of Spillovers. MPRA Paper No. 81604. https://mpra.ub.uni-muenchen.de/81604/ +**PDF reviewed:** papers/MPRA_paper_81604.pdf +**Review date:** 2026-05-09 + +--- + +## Methodology Registry Entry + +*Formatted to match docs/methodology/REGISTRY.md structure. Heading levels and labels align with existing entries.* + +## SpilloverRobustDiD + +**Primary source:** Clarke, D. (2017). Estimating Difference-in-Differences in the Presence of Spillovers. MPRA Paper No. 81604. + +**Scope:** Difference-in-Differences estimation when SUTVA fails locally - i.e., when the treatment status of one unit (or one cluster) leaks into outcomes of nearby "close to treated" units. The estimator augments a standard two-period DD with one or more "close to treatment" indicators `R^k(i,t)` defined over distance bins `[(k-1)h, kh)` of an observable `X(i,t)` (geographic distance, network distance, ethnic distance, etc.). It separately recovers (i) the average treatment effect on the treated (`ATT = α`) and (ii) the average treatment effect on the close-to-treated (`ATC = β_k`), under monotonicity and a fixed bandwidth `h`. The optimal bandwidth `h*` and maximum spillover distance `d = kh` are chosen by data-driven RMSE-minimising leave-one-out (or k-fold) cross-validation. A multidimensional generalisation (Section 3.3) lets `R(i,t)` depend on a vector `X(i,t)` (e.g. distance interacted with vehicle ownership). + +**Key implementation requirements:** + +*Assumption checks / warnings:* +- Two-period panel `t in {0, 1}`. Treatment occurs only between periods; `D(i,0) = 0` for all `i`. Multi-period and staggered settings are NOT formally treated in this paper - the propositions are stated for the two-period case (Equation 1, Equation 7). For staggered designs, downstream applications add unit and time fixed effects to a binary `T_{it}` (see Equation 22 in the empirical example), but no theoretical results extend Proposition 1 to the staggered case. +- `D(i,t) = 1` and `R(i,t) != 0` are mutually exclusive (a unit cannot be both treated and "close to treated"). Implementations must enforce `R(i,t) = 0` whenever `D(i,1) = 1`. +- Assumption 1 (parallel trends in treatment and control): `E[Y^0(i,1) - Y^0(i,0) | D(i,1)=1, R(i,1)=0] = E[Y^0(i,1) - Y^0(i,0) | D(i,1)=0, R(i,1)=0]`. +- Assumption 2 (parallel trends in close and control): `E[Y^0(i,1) - Y^0(i,0) | D(i,1)=0, R(i,1)=1] = E[Y^0(i,1) - Y^0(i,0) | D(i,1)=0, R(i,1)=0]`. +- Assumption 3 (SUTVA holds for some units): there exists a non-empty subset `j in J subset N` for whom potential outcomes `(Y_j^0, Y_j^1)` are independent of `D = {0,1}` for all `i != j`. Practically: at least some "far" units must be unaffected by spillovers, otherwise `ATT` is not identified by this method (Footnote 5). +- Assumption 4A (assignment to close-to-treatment depends on observable `X(i,t)`): there exists a deterministic rule `δ(X(i,t)) = 1{X(i,t) < d}` with `d` a fixed scalar cutoff. Loosened in Section 3.3 to multidimensional `δ : X -> {0,1}`. +- Assumption 5 (monotonicity of spillovers in distance `X(i,t)`): the parameters on `R^k(i,t)` for all `k in 1,...,K` behave monotonely in distance. This is required for the bias arguments around Equation 19 to attenuate `|E[β_j^k]| <= |β_j|`. +- The propagation function need NOT be parametric (Proposition 1 says LS controlling "parametrically or non-parametrically" for `R(i,t) = 1{X(i,t) <= d}` is consistent), but the `R^k(i,t)` partition (Equation 11) IS the paper's parametric workhorse. +- Warn when no observations satisfy `D(i,t) = 1` AND `R(i,t) = 1` simultaneously (mutual exclusivity); warn when `N_{R_kT} = 0` for a candidate bin (no close-to-treated observations - that bin is uninformative). +- Warn when treated and close-to-treated zones overlap (e.g. proximity-based `X(i,t)` for treated units is undefined). Treated units have `R(i,t) = 0` by construction; this must be enforced upstream of regression. + +*Estimating equation (Equation 7 - single binary `R`):* + + Y(i,t) = μ + τ·D(i,1) + γ·R(i,1) + δ·t + α·D(i,t) + β·R(i,t) + ε(i,t) + +where: +- `D(i,t)`: treatment indicator (1 if treated in period `t`). +- `R(i,t)`: "close to treated" indicator (1 if untreated but lives within distance `d` of nearest treated unit in period `t`; `R(i,0) = 0` by construction). +- `μ`: intercept; `τ`: fixed effect for treated units; `γ`: fixed effect for close-to-treated units; `δ`: time trend. +- `α`: ATT (direct treatment effect). +- `β`: ATC (average treatment effect on the close-to-treated). +- `ε(i,t) = η(i) - E[η(i)|D(i,1), R(i,1)] + v(i,t)` with `E[ε(i,t) · {1, D(i,1), R(i,1), D(i,t), R(i,t)}] = 0` from Assumptions 2 and 5. + +*Estimating equation generalised to `K` distance bins (Equation 12 - the operational form):* + + Y(i,t) = μ + τ·D(i,1) + + γ_1·R^1(i,1) + ... + γ_K·R^K(i,1) + + δ·t + α·D(i,t) + + β_1·R^1(i,t) + ... + β_K·R^K(i,t) + ε(i,t) + +Each `β_k` captures the spillover effect on units in distance bin `k`. The `γ_k` are pre-period close-fixed-effects. + +*Distance-bin (exposure) construction (Equations 10-11):* + +Given an observable distance `X_i` to the nearest treated unit and a bandwidth `h`, define mutually exclusive indicators + + R(i,t) = R^1(i,t) + R^2(i,t) + ... + R^K(i,t) (10) + +with + + R^k(i,t) = { 1 if X_i >= (k-1)·h and X_i < k·h for all k in 1,...,K (11) + { 0 otherwise + +`X_i` is treated as time-invariant in the empirical example (county-to-treated-state-border distance) but the framework allows time-varying `X(i,t)`. The summed indicator `R(i,t)` corresponds to the binary "close" variable in Equation 7; the matrix `R_M(i,t)` (full set of `K` indicators) corresponds to Equation 12. + +*Kernel/weight choices (Section 3 and Section 4.1):* +- Clarke does NOT propose continuous kernels (Gaussian, Epanechnikov, exponential, inverse-distance) as the operational construction. The workhorse exposure mapping in Equations 10-11 is a STEPWISE / RING (donut) partition: contiguous, mutually-exclusive distance bins of fixed width `h`. Inside each ring the spillover effect `β_k` is constant; across rings it can vary monotonically. +- Section 3.3 discusses multidimensional exposure mappings via an assignment set `T = {x in X : δ(x) = 1}` (Equation 21), permitting interactions of distance with binary covariates (e.g. vehicle ownership). The functional form is "context-specific, ideally driven by economic theory" (page 17). +- Section 4.1 Monte Carlo Model 3 generates spillovers via an exponential function `γ · exp(-dist)` for `0 < dist <= 10` (page 20). The paper shows that the stepwise bin estimator still gives correct test size even when the true DGP is exponential (model misspecification robustness, Section 4.1, Table 1) - so the ring partition is the proposed estimator and continuous kernels appear only inside the simulation DGPs against which the estimator is evaluated. +- Adjacency / network / ethnic distance / message-strength: any univariate `X(i,t)` measure plugs into Equations 10-11. The Introduction (page 3) explicitly lists "euclidean space, ethnic distance, edges between nodes in a network, strength of messaging transmission, travel time" as legitimate `X` candidates. + +**For the diff-diff Phase 3 implementation note:** the user-facing API (`spillover_kernel=`, `spillover_distance=`, `spillover_bandwidth=`) is consistent with this paper IF `spillover_kernel="ring"` is the default and continuous kernels (`"exponential"`, `"inverse_distance"`, `"adjacency"`) are documented as DEVIATIONS supported as engineering convenience. Clarke's paper does not endorse continuous kernels as the primary construction. + +*Bandwidth `h` and maximum distance `d` selection (Section 3.2, Equation 20):* + +Optimal bandwidth `h*` minimises the leave-one-out cross-validation criterion + + CV(h) = (1/N) · Σ_{i=1}^N ( Y_i - Ŷ*(X_i(h); h, θ̂_{-i}) )² (20) + +with + + h*_{CV} = argmin_h CV(h) + +`Ŷ*` depends explicitly on `h` because the matrix of regressors `X_i(h)` includes the `R^k(i,t)` indicators, which themselves depend on `h`. The procedure is: + +1. For each candidate `h` in a discrete grid (e.g. `2km, 4km, ..., 40km` in the text-messaging example): + a. Build `R^k(i,t)` for `k = 1, ..., K(h)` where `K(h) = ceil(max(X)/h)` or the user's chosen ceiling. + b. Run the iterative procedure (Section 3.1, page 14) to determine the smallest `K` such that the marginal coefficient `β_K` is statistically zero. + c. Compute `CV(h)` as the LOOCV RMSE of the chosen specification. +2. Pick `h* = argmin_h CV(h)`. + +For large `N`, LOOCV is computationally infeasible; a `k`-fold variant (10-fold in the application, page 29) is recommended. Appendix Figure A2 / Appendix Table A1 documents that `k`-fold and LOOCV select identical `h*` in simulation; LOOCV reports lower RMSE values but identical argmin. + +**Default bandwidth?** Clarke does NOT recommend a numerical default. The whole point of the cross-validated procedure is to remove the researcher's degree of freedom. Instead, the implementation should expose the search grid and a CV mode (LOOCV vs k-fold) and surface the chosen `h*` in the result object. + +*Maximum spillover distance `d` (iterative procedure, Section 3.1, pages 13-14):* +1. Estimate Equation 7 with a single close-to-treatment indicator `R^1(i,t)` (and pre-period `R^1(i,1)`). +2. Test `H_0: β_1 = 0` vs `H_1: β_1 != 0`. The t-statistic is `t_{β̂_1^1} = (β̂_1^1 - β_1) / s.e.(β̂_1^1)` and is asymptotically standard normal under the null. +3. If rejected (spillovers present at distance `<= h`), augment with `R^2(i,t)` and test `H_0: β_2 = 0`. Continue until the marginal `β̂_{k+1}` fails to reject zero. +4. Conclude `d = k·h` where `k` is the last ring with rejected null. Equivalently, `R(i,t) = 1{X(i,t) <= k·h}`. + +Appendix C ("Spillovers as a Nuisance Parameter") provides an alternative iterative procedure that compares successive treatment-effect estimates `α̂^k` rather than spillover coefficients `β̂_k`, using a Zellner (1962) seemingly-unrelated-regression Chi-squared test (`H_0: α^{k-1} = α^k`). This variant is appropriate when spillovers are nuisance parameters and only `α` is of empirical interest. + +*Bias of naive DD that ignores spillovers (Equation 15):* + + Bias(α̂) = E[α̂|X] - α = -β · ( N_{R_T} / (N_T - N_{D_T}) ) (15) + +where `N_T` is observations with `t = 1`, `N_{D_T}` is treated observations at `t = 1`, and `N_{R_T}` is close-to-treated observations at `t = 1`. The bias is proportional to `β` (the spillover effect) times the fraction of the control group contaminated by spillovers. With `K` rings the bias generalises to (Equation 16): + + E[α̂|X] = α - Σ_{k=1}^K β_k · ( N_{R_kT} / (N_T - N_{D_T}) ) (16) + +When `j` rings are included and `K - j` are omitted, the residual biases on the included `α̂^j` and `β̂_j^k` are (Equations 18-19): + + Bias(α̂^k) = -β_{k+1} · ( N_{R_{k+1}T} / (N_T - N_{D_T} - Σ_{l=1}^k N_{R_lT}) ) - ... - β_K · ( N_{R_KT} / (N_T - N_{D_T} - Σ_{l=1}^k N_{R_lT}) ) + Bias(β̂_j^k) = same denominator, applied to omitted-ring coefficients + +**Practical use (REGISTRY-relevant):** `Bias(α̂)` is signed; if spillovers `β_k` are same-sign as `α`, the naive estimator UNDERESTIMATES `|α|` (attenuation). If opposite-sign, it OVERESTIMATES `|α|`. Naive DD is unbiased in only two cases: (i) `β_k = 0` for all `k` (no spillovers); (ii) `N_{R_kT} = 0` for all `k` (no close-to-treated units exist). + +*Identification (Proposition 1, page 9):* + +> Under Assumptions 1 to 4A, the ATT and ATC can be consistently estimated by least squares when controlling, parametrically or non-parametrically, for `R(i,t) = 1{X(i,t) <= d}`. + +Proof in Appendix B pages 44-45. Key identifying conditions beyond standard DD: +1. SUTVA holds on a non-empty subset (Assumption 3) - i.e. some "far" units are unaffected by spillovers. This replaces the standard DD requirement that SUTVA holds GLOBALLY. +2. Close-to-treatment status `R(i,t)` is determined by an observable rule on `X(i,t)` (Assumption 4A or 4B) - violations of SUTVA must be observable. +3. Close-to-treatment status is mutually exclusive with treatment. +4. Assumptions 1 and 2: parallel trends extend to BOTH treated and close-to-treated cohorts vs. far-untreated controls. Note that no parallel-trends assumption is needed BETWEEN treated and close-to-treated (Footnote 4) - they may have direct interactions. +5. Maximum spillover distance `d` is correctly identified (via the iterative procedure or CV) - misspecification of `d` propagates as omitted-variable bias per Equations 18-19. + +Proposition 2 (page 17) generalises Proposition 1 to multidimensional `X` under Assumption 4B; the proof reduces to Proposition 1 once `δ(x) = 1_{x in T}` provides the close-to-treatment indicator. + +*Decomposition: direct vs. indirect (spillover) effects:* + +ATT (direct effect, Equation 8): + + ATT = E[Y^1(i,1) - Y^0(i,1) | D(i,1) = 1] = α + +ATC (indirect/spillover effect on close-to-treated, Equation 9): + + ATC = E[Y^1(i,1) - Y^0(i,1) | R(i,1) = 1] = β (single-bin form) + = β_k (for the `k`-th distance bin under Equation 12) + +These are recovered by the SAME OLS regression of Equation 12. There is no separate identifying step for `α` vs. `β_k` - both are read directly off the coefficient vector, exploiting the mutual exclusivity of `D(i,1)` and `R^k(i,1)` in the design matrix. + +For diff-diff's Phase 3 result object: `result.direct_effect = α` (coefficient on `D(i,t)`) and `result.spillover_effect` should expose the FULL VECTOR `[β_1, ..., β_K]` (Equation 12), not a single scalar `β`. A scalar `result.spillover_effect = β` is only correct when the true `K = 1`, which is the case the binary Equation 7 covers but is the EXCEPTION rather than the typical operational specification. The empirical illustration uses `K = 4` (Table 2) and `K in {1, 2}` (Table 3, depending on ban type). + +*Standard errors (paper-level recommendation):* +- Clarke does NOT propose Conley (1999) HAC standard errors as the primary inference tool. Equation 22 (the empirical specification) and Tables 2-3 use STATE-LEVEL CLUSTERED standard errors (page 26: "Standard errors are clustered by state, and observations are weighted by county population"). +- The Conclusion (page 30) cites Bertrand, Duflo and Mullainathan (2004), Cameron, Gelbach and Miller (2008), and Cameron and Miller (2015) as the inference literature relevant to DD - all CLUSTER-ROBUST, not Conley HAC. +- For the spillover-augmented spec, two-way clustering (state × time) or unit + time clustering is sensible because the close-to-treated units in a state are spatially correlated by construction. The paper does not formalise this; it is an implementation choice. +- Conley (1999) HAC SE on top of Clarke-style exposure regressors is consistent with the spirit of the paper (spatial correlation in residuals after controlling for spillovers) but is NOT what Clarke himself recommends. **For diff-diff Phase 3:** if Conley SE is offered alongside the Clarke exposure regressor, it should be advertised as a COMPLEMENTARY method (Phase 1 inference) rather than Clarke's prescription. Document this clearly. +- The iterative test of `β_{k+1} = 0` uses the standard t-statistic with cluster-robust SE (page 14, no explicit small-sample correction). +- The `α^{k-1} = α^k` SUR test in Appendix C uses the Zellner (1962) Chi-squared distribution. + +*Edge cases:* +- **No close-to-treated units (`N_{R_kT} = 0` for all k):** naive DD is unbiased per Equation 15. Detection: count observations satisfying `R(i,t) = 1` post-period; if zero, fall back to standard DD with a warning. +- **No far units (Assumption 3 fails):** ATT is NOT identified by this method (Footnote 5). Detection: if the iterative procedure runs to `K = K_max` without ever failing to reject `β_k = 0`, the maximum spillover distance has not been bounded. Emit error. +- **Treated and close-to-treated zones overlap:** mathematically excluded (`D(i,1) = 1 implies R(i,1) = 0`). If user data has overlap (e.g. a county that is both treated and within `h` of another treated state), enforce `R = 0` on `D = 1` units and warn. +- **Non-monotonic spillovers (Assumption 5 failure):** the bias bounds `0 <= |E[β̂_j^k]| <= |β_j|` (page 14, Footnote 9) no longer hold; the iterative test of `β_{k+1} = 0` may falsely terminate. Clarke notes that monotonicity can be loosened to "spillovers do not fade out at a certain distance and then reappear at a greater distance" (page 12). Detection is hard in finite samples; document as a maintained assumption. +- **Multidimensional spillovers (Section 3.3):** the `R(i,t) = f(X_1, X_2)` parameterisation (e.g. Equation in Section 3.3) avoids the curse of dimensionality only when `f` is parametric. The paper uses `R(i,t) = X_1 · [β_{0,1} X_2^1 + ... + β_{0,K} X_2^K] + (1 - X_1) · [β_{1,1} X_2^1 + ... + β_{1,K} X_2^K]` - separate distance bins for binary `X_1 in {0,1}`. The iterative procedure runs separately on each `X_1` slice. +- **Sparse spillover region (small `N_{R_kT}`):** estimates of `β_k` are imprecise; the iterative test of `β_k = 0` may fail to reject the null even when true `β_k != 0` (Type II error). The CV procedure may select a smaller `h` than the true bandwidth in this regime. Page 22 (Footnote 12 on Table 1, Model 3): when spillovers reach 5% of the population, average `h*` underestimates the true 10-unit DGP cutoff. +- **Bandwidth grid does not bracket optimal `h`:** if `argmin CV(h)` is at the boundary of the grid, the search space is too narrow. Re-run with extended grid. +- **Time-varying `X(i,t)`:** the framework permits it (Assumption 4A is stated for `X(i,t)`), but the empirical example treats `X` as time-invariant. Implementation can support either; document that `R(i,0) = 0` is fixed by construction (treatment hasn't occurred yet), so time variation only affects `R(i,1)`. +- **Computational scaling for LOOCV with large N:** Section 3.2 (page 16) acknowledges `O(N²)` complexity if a vector `h_CV*` is searched (different `h` per iteration). Defaults: scalar `h` constant across iterations, k-fold (10-fold) substitute for LOOCV. + +*Algorithm (Section 3.1 stepwise + Section 3.2 bandwidth optimisation):* +1. Build distance variable `X_i` from raw geographic / network coordinates. For geographic, "average distance from county to nearest treated state border" (page 25) is the recipe used in the empirical example. Treated units have `X_i = 0` and `R(i,t) = 0` by construction. +2. Choose bandwidth grid (e.g. `2km, 4km, ..., 40km`). For each `h`: + a. Build `R^k(i,t)` for `k = 1, 2, ..., K_max(h)` per Equation 11. + b. Initialise `K = 1`. Fit Equation 12 with `R^1(i,t)` only (plus pre-period `R^1(i,1)` fixed effects). + c. Test `H_0: β_K = 0`. If rejected, set `K <- K + 1` and refit with the new ring; loop. Otherwise stop and record `K(h)`. + d. Compute LOOCV (or k-fold) RMSE `CV(h)` with the chosen `K(h)` rings. +3. Select `h* = argmin_h CV(h)`. +4. Refit Equation 12 at `h = h*`, `K = K(h*)`. Output: + - `α̂` = direct ATT (coefficient on `D(i,t)`). + - `β̂_1, ..., β̂_K` = spillover/ATC by distance bin. + - `d = K(h*) · h*` = maximum spillover distance. + - Standard errors clustered (Clarke uses state-level in the empirical example). + +*Algorithm variant (Appendix C - spillovers as nuisance):* +1. Run Step 2 above but compare successive `α̂^{k-1}` vs `α̂^k` instead of `β̂_k`. +2. Use Zellner (1962) SUR Chi-squared test for `H_0: α^{k-1} = α^k`. +3. Stop when the test fails to reject; report `α̂^k`. +4. This variant does NOT report individual `β_k`; appropriate when spillovers are uninteresting per se. + +**Reference implementation(s):** +- Stata, Matlab, R: code at `https://github.com/damianclarke/cdifdif` (footnote 1 of paper). Companion command name: `cdifdif`. +- The implementation is described as "[automating] this methodology in various languages" (Conclusion, page 31). diff-diff's Phase 3 is the Python equivalent. + +**Requirements checklist:** +- [ ] Two-period DiD baseline that supports an extra design-matrix column for `R^k(i,t)` indicators. +- [ ] Distance / network / arbitrary-`X(i,t)` input support: a `spillover_distance=` array (N x 1 for unidimensional, N x P for multidimensional). +- [ ] Ring/bin partition (`spillover_kernel="ring"` or equivalent) per Equations 10-11. This is the paper's PRIMARY construction. +- [ ] Bandwidth selection via LOOCV (small N) and k-fold CV (large N) per Equation 20. Expose `bandwidth_grid=`, `cv_folds=` (None for LOOCV). +- [ ] Iterative `β_K = 0` test loop with cluster-robust t-statistic (Section 3.1). +- [ ] Result fields: `direct_effect = α̂`, `spillover_effect = [β̂_1, ..., β̂_K]` (vector), `optimal_bandwidth = h*`, `max_spillover_distance = K · h*`, `cv_rmse = CV(h*)`. +- [ ] Cluster-robust SE by state / unit / two-way as the DEFAULT inference (Clarke's own choice). +- [ ] Mutual-exclusivity enforcement: warn / coerce if a unit appears in both `D(i,1) = 1` and `R(i,1) = 1` rows. +- [ ] Warning for `K_max` reached without test failure (Assumption 3 likely violated). +- [ ] Multidimensional `R(i,t) = f(X_1, X_2)` extension (Section 3.3) - low priority; document as future work for Phase 3. +- [ ] Optional: SUR Chi-squared test variant from Appendix C (`spillover_target="alpha_only"`). +- [ ] If continuous kernels (`exponential`, `inverse_distance`, `adjacency`) are exposed in the API, document them as Clarke-DEVIATIONS in REGISTRY.md - they are NOT in the paper's primary construction. +- [ ] If Conley (1999) HAC SE is exposed, document it as a Phase 1 inference complement, NOT as Clarke's prescription. + +--- + +## Implementation Notes + +### Data Structure Requirements +- Two-period panel: unit id, time id (`t in {0, 1}`), outcome `Y`, treatment `D in {0, 1}`. Treatment status is panel-invariant per unit-period within `t = 1`. +- Distance / exposure source: an `(N, 1)` array of distances (or higher-dimensional `(N, P)` for the Section 3.3 generalisation). Required at `t = 1`; can be time-varying. Treated units must have `R = 0`. +- Optional weights (the empirical example uses county-population weights, page 26). +- Recommended schema (sklearn-style): + - `did = SpilloverRobustDiD(spillover_distance=X, bandwidth_grid=[2,4,...,40], cv_folds=10)` + - `did.fit(data, formula="Y ~ D | unit + time")` -> result has `direct_effect`, `spillover_effect` (vector), `optimal_bandwidth`, `max_spillover_distance`. + +### Computational Considerations +- Building the `R^k(i,t)` matrix: O(N) per bin per bandwidth candidate; O(N · K(h) · |H_grid|) total. +- LOOCV: O(N²) in the worst case (N regression refits per bandwidth). 10-fold CV reduces this to O(10 · N · |H_grid|). +- The iterative `β_K = 0` test adds a small constant factor per `K`. +- The empirical example uses `N = 149,328` observations (3,111 counties × 48 months) and reports 10-fold CV taking minutes (page 29 - computationally demanding given LOOCV was infeasible). +- For multidimensional spillovers (Section 3.3): curse of dimensionality. Don't expose this in the first pass. + +### Tuning Parameters + +| Parameter | Type | Default | Selection Method | +|-----------|------|---------|-----------------| +| `spillover_kernel` | string | `"ring"` (paper-faithful) | `"ring"` per Equations 10-11. Continuous kernels (`"exponential"`, `"inverse_distance"`, `"adjacency"`) are deviations - document accordingly. | +| `spillover_distance` | array `(N,)` or `(N, P)` | required | User input - geographic distance, network distance, etc. Must be non-negative and finite for non-treated units; ignored for treated units. | +| `spillover_bandwidth` | float or "auto" | `"auto"` (CV) | Equation 20 LOOCV or k-fold CV when `"auto"`. User-supplied scalar overrides CV (e.g. for replication). | +| `bandwidth_grid` | list[float] | data-derived | Default to a uniform grid spanning `[X.min(), X.max()]` with ~20 points; user-overridable. Paper uses `2km..40km` step `2km` for the empirical example. | +| `cv_folds` | int or None | 10 | None -> LOOCV; recommended 10-fold for `N > 1000`. | +| `iterative_test_alpha` | float | 0.05 | t-test significance level for `β_k = 0` rings. | +| `cluster_var` | string | `"unit"` | Cluster-robust SE; user can specify a different clustering var (state in the empirical example). Two-way `["unit", "time"]` accepted. | +| `max_iterations` | int | `len(bandwidth_grid)` | Cap to detect Assumption 3 failure. | + +### Relation to Existing diff-diff Estimators +- `TwoWayFixedEffects` is the natural host for Phase 3. `SpilloverRobustDiD` is conceptually `TwoWayFixedEffects` + an extra group of design-matrix columns (`R^k(i,t)` and `R^k(i,1)` fixed effects) + a CV-based bandwidth selector + result-object spillover fields. +- `result.direct_effect = α̂` (scalar coefficient on `D(i,t)`) and `result.spillover_effect` is a `K`-length vector (or named dict by ring) - the multi-bin form is the operational case in Clarke's empirical example, NOT the scalar binary case. +- `Conley HAC` (Phase 1 deliverable) is COMPLEMENTARY but not Clarke-prescribed. The paper uses cluster-robust SE. Document Conley HAC as an alternative-inference layer. +- `CallawaySantAnna`, `SunAbraham`, `MultiPeriodDiD` - none of Clarke's results extend formally to staggered designs. The empirical example (Equation 22) uses `T_{im}` (binary contemporaneous treatment) with month and county fixed effects, but no propositions formalise the staggered case. **Phase 3 should ship two-period only**; staggered support requires a follow-up methodology contribution beyond this paper. +- `BaconDecomposition` and `SyntheticDiD` are unrelated. + +### T22 tutorial design hints +The empirical example (Section 4.2, Aboukand Adams 2013 text-messaging bans) is a near-ideal tutorial: +- 49 states, 3,111 counties, 48 months (Jan 2007 - Dec 2010). Outcome: log fatal SVSO accidents + 1. +- Three ban types: strong (primary enforcement), weak (secondary enforcement), handheld. Treatment indicator switches on at state-month of enactment. +- Distance: county-centroid to nearest treated-state border (km); Section 4.2 also offers travel-time-over-roads. +- Optimal bandwidths: 30km (weak ban, 30km spillover distance), 6km (handheld ban, 12km spillover distance), no spillover (strong ban). Table 3 reports. +- Magnitudes: weak-ban ATT = 7.6%, ATC[0-30km] = 5.4%; handheld ATT = -7.7% (n.s.), ATC[0-6km] = -11.1%, ATC[6-12km] = -5.3%. +- Pre-trend testing: not formally extended in the paper (two-period model). The MC simulations in Section 4.1 evaluate test SIZE under correctly- and mis-specified spillover bins; that infrastructure can support a tutorial demonstration of selection-via-CV. + +**Synthetic DGP for tutorial (Monte Carlo Model 1, Section 4.1):** + + y_{it} = α + β·T_{it} + Σ_{j=1}^{4} γ_j·close_{it,((j-1)×5,j×5]} + φ_t + λ_i + ε_{it} + +with 5km bins, `θ = (β, γ_1, γ_2, γ_3, γ_4) = (10, 5, 4, 3, 2)`, `ε ~ N(0, σ)` for `σ in {1, 2, 5}`, treatment switched on for 20% of sample in period 2, spillovers reaching 5%/10%/25% of population. Naive DD recovers `β̂ ≈ 9.56` (10% spillover), spillover-robust DD recovers `β̂ ≈ 10.00`. Closed-form bias check (Footnote 12): `E[β̂] = 10 - 5(0.025/0.8) - 4(0.025/0.8) - 3(0.025/0.8) - 2(0.025/0.8) = 9.5625`, matching simulation to 2 decimal places. + +### Critique / limitations Clarke himself acknowledges +1. **Two-period only.** No formal extension to staggered or multi-period DD (Section 2 specifies `t in {0, 1}`). The empirical example sidesteps this by using county-month panel data with `T_{it}` and standard FEs. +2. **Maintained Assumption 3 (some far units unaffected).** If all control units are spilled into, ATT is unidentified. The iterative procedure cannot detect this directly; it would simply run to `K_max` without failing to reject. +3. **Maintained Assumption 5 (monotonicity).** Loosened mildly (page 12) but still required. Non-monotonic spillovers (e.g. tipping-point effects) violate the iterative-stopping logic. +4. **Bin partition is arbitrary at small scales.** A bandwidth `h = 5km` forces all units in `[0, 5km)` to share the same `β_1`. The CV procedure picks `h*` by minimising prediction error, but does not address the fundamental discreteness of the bin model. Section 4.1 Models 2 and 3 (irregular and exponential DGPs) demonstrate the estimator's robustness, but the construction is still piecewise-constant. +5. **Multidimensional `X` requires functional-form choice (Section 3.3).** Nonparametric multivariate `R(i,t)` is not provided; user must specify a parametric form. +6. **Standard error inference is not a contribution of this paper.** The Conclusion (page 30) defers to Bertrand-Duflo-Mullainathan / Cameron-Gelbach-Miller / Cameron-Miller for cluster-robust SE; nothing is proven about valid inference under the specific spillover-augmented spec. The companion Phase 1 Conley (1999) HAC work fills part of this gap but is independent of Clarke. +7. **Application-specific spillover magnitudes are small in absolute terms.** Even when statistically significant, the spillover-corrected ATT (Table 3) is "not statistically distinguishable" from the naive ATT (Section 4.2, page 27) when spillovers reach a small fraction of the control group. The bias formula (Equation 15) explains this: small `N_{R_T} / (N_T - N_{D_T})` shrinks the bias regardless of `β`. + +--- + +## Gaps and Uncertainties + +1. **Inference under the iterative procedure.** Clarke uses a sequence of t-tests to determine `K`. Sequential testing inflates Type I error in principle (the chance that SOME ring fails to reject is higher than `α`). The paper does not propose a multiple-testing correction. The CV-based approach (Equation 20) is a partial remedy but is itself a model-selection procedure with no formal post-selection inference guarantee. +2. **Conley (1999) HAC vs cluster-robust.** Clarke chooses cluster-robust SE in the empirical example without explicit comparison to spatial HAC. For the diff-diff Phase 3 deliverable that BUNDLES Conley HAC (Phase 1) with the Clarke exposure regressor (Phase 3), there is no theoretical guarantee in either Conley (1999) or Clarke (2017) that the combination is valid. Both pieces are jointly used by the practitioner literature (e.g. Almond-Edlund-Palme 2009, cited at page 3) but a clean theoretical statement is absent. Document as a maintained-assumption in REGISTRY.md. +3. **Choice of distance metric.** The paper says distance can be "euclidean, ethnic, network, messaging-strength, travel-time" but does not provide guidance on which metric to choose for a given application. Practitioners must justify on substantive grounds. +4. **`d̲` for the bandwidth grid.** No formula or default for the grid. Practitioners default to `h_min = 2km` and `h_max ~= mean distance to nearest treated unit` based on the empirical example's `2..40 km` grid. The PYTHON IMPLEMENTATION should derive a sensible default grid from the data range and document the heuristic. +5. **Parallel trends in the spillover spec.** Assumptions 1 and 2 are stated for the binary `R(i,t)`. The K-bin generalisation (Equation 12) implicitly extends parallel trends to each `R^k(i,t)` group: `E[Y^0(i,1) - Y^0(i,0) | R^k(i,1) = 1] = E[Y^0(i,1) - Y^0(i,0) | far-control]` for every `k`. This is a STRONGER assumption than the binary case (parallel trends must hold separately at every distance). The paper does not flag this explicitly. +6. **Pre-trend testing in the spillover-augmented spec.** No formal pre-trend test is proposed. The iterative `β_K = 0` test is forward-looking (post-period spillovers) and does not test parallel-trends. Implementations should clearly distinguish "spillover-iteration test" (Clarke) from "event-study placebo test" (standard DD pre-trends). +7. **Multidimensional bandwidth grid (Section 3.3).** Curse of dimensionality is acknowledged but no remedy is proposed. Future-work flag. +8. **Heterogeneous spillover effects.** The paper assumes `β_k` is constant within ring `k`. Spillover heterogeneity by unit characteristic (e.g. urban vs rural counties, demographics) is not formalised; the multidimensional Section 3.3 example introduces ONE interaction (`X_1` binary). Practitioners interested in spillover heterogeneity beyond a single binary stratifier are on their own. +9. **Continuous kernel implementation choice.** If diff-diff Phase 3 ships `spillover_kernel="exponential"` for engineering convenience (Phase 3 plan mentions exponential, ring/donut, inverse-distance, adjacency), this DEVIATES from Clarke's proposed estimator. Justify in REGISTRY.md as `**Note (deviation from Clarke 2017):**` since exponential / inverse-distance are operational shortcuts for the multi-bin partition - not formally derived by the paper. The `"ring"` default is paper-faithful. diff --git a/docs/methodology/papers/colella-et-al-2019-review.md b/docs/methodology/papers/colella-et-al-2019-review.md new file mode 100644 index 00000000..b877a26e --- /dev/null +++ b/docs/methodology/papers/colella-et-al-2019-review.md @@ -0,0 +1,286 @@ +# Paper Review: Inference with Arbitrary Clustering + +**Authors:** Fabrizio Colella, Rafael Lalive, Seyhun Orcan Sakalli, Mathias Thoenig +**Citation:** Colella, F., Lalive, R., Sakalli, S. O., & Thoenig, M. (2019). Inference with Arbitrary Clustering. IZA Discussion Paper No. 12584. https://docs.iza.org/dp12584.pdf +**PDF reviewed:** papers/dp12584.pdf (34 pages: 1 cover, 1 colophon, 1 abstract, pp. 2-21 main text + references, pp. 22-32 figures and tables, p. 32 Appendix A.1) +**Review date:** 2026-05-09 + +--- + +## Methodology Registry Entry + +*Formatted to match docs/methodology/REGISTRY.md structure. Heading levels and labels align with existing entries - copy the `## {EstimatorName}` section into the appropriate category in the registry.* + +## acreg-compatible-Conley-HAC + +**Primary source:** Colella, F., Lalive, R., Sakalli, S. O., & Thoenig, M. (2019). Inference with Arbitrary Clustering. IZA DP No. 12584. + +**Scope:** A sandwich variance-covariance (VCV) estimator for OLS and 2SLS that allows arbitrary dependence of the errors across observations in space (or network) and across time. The estimator generalises Conley (1999) by letting the user specify the dependence structure as an n*T x n*T matrix `S` whose `(it, js)` entry is any number indicating how strongly observation `i` at time `t` is correlated with observation `j` at time `s`. In the special case where `S` is binary with entries equal to 1 iff units share at least one cluster, the estimator coincides with the Cameron-Gelbach-Miller (2011) multiway-cluster estimator. The paper's companion Stata package `acreg` is the parity benchmark for diff-diff Phase 1 (`vcov_method="conley"`) and Phase 2 (two-way space-time HAC). + +**Key implementation requirements:** + +*Assumption checks / warnings:* +- Linear model `y = X*beta + epsilon` (OLS) or 2SLS with `o > m` excluded instruments (Section 2, p. 5-6). +- The dependence-structure matrix `S` is provided externally - the paper does NOT estimate or test `S`. The user supplies either (i) a distance cutoff producing binary `S` (uniform kernel), or (ii) a full bilateral distance matrix to which a kernel decay is applied, or (iii) a directly-supplied `S`. +- `S` always includes self-links; main diagonal is ones (Section 2, p. 4-5). +- `S` may vary over time (i.e., `s_{itjs}` for any `(i,t,j,s)` quadruple); arbitrary cross-section + time + space-time interactions are allowed (Section 2, pp. 4-5, 7). +- The estimator is consistent under a small number of regularity conditions inherited from White (1980) and Cameron et al. (2011); the paper does NOT spell out the conditions formally and refers to those references. +- Spatial correction matters for inference ONLY when BOTH the outcome AND the regressor exhibit spatial autocorrelation (Section 3.1.3, "Spatial correlation in the outcome variable", p. 13-14, Table 2). If only one of the two is spatially correlated, robust SEs are already approximately correct. This insight contradicts Kelly (2019) and supports running the test on residual-times-regressor moments rather than residual moments alone. +- "There is no universal distance threshold that minimizes the likelihood of Type 1 error for all treatments (or covariates) in a model" (p. 15, "Optimal correction threshold" section). Implementations must take cutoff as a user input. + +*Variance estimator (Section 2, OLS case, p. 5):* + +The theoretical OLS VCV is + + VCV(b_OLS) = (X'X)^{-1} X' Omega X (X'X)^{-1} + +with `Omega = E(eps eps' | X)`. The proposed plug-in estimator is the sandwich + + VCV_hat(b_OLS) = (X'X)^{-1} X' (S * (e e')) X (X'X)^{-1} + +where `e = y - X b_OLS` are estimated residuals and `*` denotes elementwise (Hadamard) product. The "meat" of the sandwich is + + X' (S * (e e')) X = sum_{i=1..n} sum_{t=1..T} sum_{j=1..n} sum_{s=1..T} x_{it} e_{it} e_{js} x_{js} s_{itjs} (Section 2, p. 5) + +The 2SLS analogue (Section 2, p. 6-7) uses `X_hat = (Z'Z)^{-1} Z' X Z` (i.e., projected regressors) and residuals `u = y - X b_2SLS`: + + VCV(b_2SLS) = (X_hat' X_hat)^{-1} X_hat' Omega X_hat (X_hat' X_hat)^{-1} + +with the meat + + X_hat' (S * (u u')) X_hat = sum_{i=1..n} sum_{t=1..T} sum_{j=1..n} sum_{s=1..T} x_hat_{it} u_{it} u_{js} x_hat_{js} s_{itjs} (Section 2, p. 7) + +Note: residuals are formed from the original (NOT projected) regressors, but the meat is sandwiched by `X_hat` rather than `X`. This is the standard 2SLS sandwich form. + +The paper does NOT number the variance equations; in this review they are referred to as "the OLS sandwich" and "the 2SLS sandwich". Equations 1-12 in the paper are application equations (DGPs for the Monte Carlo study), not variance formulas. + +*Kernel functions (Section 3.1.1 footnote 5, Section 3.1.3, Footnote 9):* + +The paper distinguishes the *kernel* used by the user-supplied dependence matrix `S` from the *kernel decay* used in the Monte Carlo DGP. Both are explicit: + +- **Uniform kernel (default in baseline simulations).** Footnote 5 (page 8): "we adopt a uniform spatial decay kernel in our simulations. We have explored Bartlett-type kernels as well and find that results are fairly comparable to those we present here." Implementation: `s_{ij} = 1` if `dist_ij <= distcut`, else `0`. The DGP itself adds the *idiosyncratic* shock and the *share* of neighbours hit; the matrix `S` used for the variance correction is binary `dist <= cutoff`. + +- **Bartlett kernel (used in the Kelly 2019 replication, Footnote 9 of Section 3.1.2 on page 12 and the Section 3.1.3 outcome-randomization passage on page 13):** Footnote 9 (p. 12) writes the Bartlett rule for spreading a randomly generated variable across cluster members: + + Y_{i,sc} = sum_{j != i, j in cluster of i} [1 - (dist_ij / distcut)] * Y_j + X_{i,sc} = sum_{j != i, j in cluster of i} [1 - (dist_ij / distcut)] * X_j + + This is the Bartlett (linear-decay) taper `K(d) = max(0, 1 - d/h)`. Outside the cutoff `K = 0`. The same form applies to the variance kernel when `acreg` is invoked with a Bartlett option. + +- **acreg's documented kernel set (paper p. 15):** "our proposed estimator's companion statistical package (`acreg`) allows users to provide a bilateral-distance matrix of any metric between observations." Then "the distance threshold used for error correction can be defined as *effective distance* between observations in terms of time or cost of travel (flight, road, or walking) distance." The paper does NOT explicitly state which kernel `acreg` defaults to, but the simulations report results with both *uniform* and *Bartlett* and the wording "Then, using our proposed estimator, we correct for the spatial correlation in the model using different distance thresholds" implies the cutoff parameter is the primary knob and the kernel taper is secondary. + +*Default in acreg:* The paper text consistently treats UNIFORM (binary `S`) as the baseline. Kernel option and exact taper need to be confirmed against the `acreg` source. + +*Bandwidth / cutoff selection (Section 3.1.3, p. 14-16):* + +- The cutoff is REQUIRED user input. There is NO data-driven default in the paper or in `acreg`. The Footnote 7 (p. 11) states: "Our estimator requires as input either a distance cutoff value or an adjacency matrix showing which observations are within the same spatial clusters." +- Section 3.1.3 (p. 14-15) demonstrates that the *true* DGP cutoff (168 km in the Monte Carlo, "50 counties on average per cluster") delivers null-rejection closest to the nominal 5%. Cutoffs both larger (242, 327, 478 km) and smaller (56, 82, 117 km) yield slightly higher rejection rates: respectively 6.3%, 7.3%, 9.1% above and 7.5%, 9.1%, 10.5% below. +- Practitioner guidance (p. 16): "We suggest that researchers correct standard errors with varying distance thresholds (and potentially using different distance metrics) and select as the baseline the threshold that provides the largest standard errors for a given model." For multiple outcomes, "select a correction threshold that provides the largest standard errors for most of the variables of interest as the baseline." +- "There is no universal distance threshold that minimizes the likelihood of Type 1 error for all treatments (or covariates) in a model" (p. 15). When multiple treatment variables are present, the optimal cutoff for each may differ. The recommendation is conservatism: take the cutoff that produces the LARGEST SE. +- Existing diagnostics (Moran's I, Geary's C) test for univariate spatial autocorrelation but "fall short on providing insights on the optimal threshold for error correction" (p. 16) - they do not look at joint spatial distribution of two variables. + +*Two-way (space x time) mode (Section 2, pp. 4-5, 7):* + +The paper treats space and time symmetrically inside the matrix `S`. A panel observation is a `(i, t)` pair; the dependence matrix is `n*T x n*T` with entries `s_{itjs}`. The paper writes (page 5): "S allows for varying link strength, such that entries could range from 0 to 1, and S may change over time t. We also always include self-links in S, so its main diagonal contains ones." This permits: + +- Same-unit-different-time `s_{itis}` (a time-only kernel along the unit's history) +- Different-unit-same-time `s_{itjt}` (a space-only kernel at time `t`) +- Different-unit-different-time `s_{itjs}` (full space-time interaction; kernel may decay in BOTH spatial distance and temporal lag, possibly as a product) + +The paper does NOT prescribe a particular product structure; it explicitly says (Section 2, page 6 paragraph 2): "the flexibility of our structure allows accounting for not only cross-section dependence and time dependence but also interactions between the two, capturing changes in the strength of the correlation that can be due to alterations in the link structure over time or any kind of decay between two moments in time t and s." + +For the Phase-2 implementation in diff-diff, the natural product form is + + K(dist_ij, |t-s|) = K_space(dist_ij / h_space) * K_time(|t-s| / h_time) + +with `K_space, K_time` Bartlett or uniform; the paper sanctions this as one valid choice but does NOT mandate it. Practical `acreg` users supply BOTH a spatial distance cutoff AND a temporal lag cutoff (verify against `acreg` source). + +*Treatment of fixed effects (Section 2, p. 5):* + +"X is a matrix of k linearly independent components that could include a long list of dummies for each unit, in case we are interested in the within estimates." + +This is the ONLY mention of fixed effects in the paper. Implications: +- The paper assumes fixed effects are handled by *dummy expansion* into `X` (i.e., FEs become columns of the design matrix). +- The paper does NOT discuss within-transformation (FW partialling-out), absorption, or singleton-dropping. +- The paper does NOT discuss small-sample DOF corrections. +- For diff-diff parity: implement Conley by *first* applying the same fixed-effect treatment as the baseline OLS (within-transformation OK if it produces the same `b_OLS` and the same residuals `e`), then plugging into the sandwich. The acreg parity must use the same FE handling acreg uses. + +*Singleton handling, zero-variance handling, degrees of freedom:* + +The paper is **silent** on: +- Singleton observations (an observation forming its own cluster). +- Zero-variance covariates and collinearity. +- Small-sample DOF corrections (the Cameron-Miller (2015) `(G-1)/G * (n-1)/(n-k)` correction is NOT mentioned). +- Multiplicative scaling of the variance by `n / (n-k)`, `(n-1)/(n-k)`, or any related factor. + +This is a parity gap relative to acreg - implementers must consult acreg source. See Gaps section. + +*Algorithm (Section 2 + Section 3.1):* + +1. Estimate `b_OLS = (X'X)^{-1} X' y`. (Or 2SLS: form `X_hat = (Z'Z)^{-1} Z' X Z`, then `b_2SLS = (X_hat' X_hat)^{-1} X_hat' y`.) +2. Compute residuals `e = y - X b_OLS` (or `u = y - X b_2SLS` for 2SLS - note that residuals use `X` not `X_hat`). +3. Construct dependence matrix `S` (n*T x n*T) using one of: + a. User-supplied bilateral-distance matrix `D_{itjs}` plus user-supplied cutoff `h` and (optionally) kernel choice. Uniform: `s_{itjs} = 1{D_{itjs} <= h}`. Bartlett: `s_{itjs} = max(0, 1 - D_{itjs}/h)`. + b. User-supplied adjacency / cluster matrix. + c. Combined space-time: `s_{itjs} = K_space(d_ij / h_s) * K_time(|t-s| / h_t)`. +4. Form the meat: `X' (S * (e e')) X` for OLS, or `X_hat' (S * (u u')) X_hat` for 2SLS, computed by the explicit double sum (n^2 * T^2 terms; see numerical conventions below). +5. Sandwich: `VCV_hat = (X'X)^{-1} * meat * (X'X)^{-1}` for OLS, or `(X_hat' X_hat)^{-1} * meat * (X_hat' X_hat)^{-1}` for 2SLS. +6. Standard errors are sqrt(diag(VCV_hat)). The paper does NOT specify a t-distribution or normal critical-value convention; the simulations all use the 5% nominal level under a normal benchmark. + +**Reference implementation:** +- Stata: `acreg` companion package, downloadable at https://acregstata.weebly.com/ (footnote on page 1 of the abstract). +- Authors thank "Samuel Bazzi, Nicolas Berman, Richard Bluhm, Johannes Buggle, Mathieu Couttenier, David Drukker, Ruben Durante, Ruben Enikopolov, Elena Esposito, Matthew Jackson, Melanie Krause, Eleonora Patacchini" - implies extensive review of the package by Stata insiders. +- Options to capture (from the paper text - VERBATIM check against `acreg` source required): + - **Distance cutoff:** scalar, in the units of the supplied distance metric (the simulations use kilometres). REQUIRED. + - **Distance metric:** lat/lon great-circle (default in geocoded simulations), euclidean, or any user-supplied bilateral-distance matrix. + - **Bilateral-distance matrix override:** the user can provide a full n x n matrix of any metric (including effective distance, travel cost, network adjacency). + - **Kernel:** uniform vs Bartlett (paper explored both; default per simulations is uniform). + - **Spatial dimension:** lat-lon coordinates of each observation OR adjacency matrix. + - **Time dimension:** panel time identifier and (optionally) a temporal lag cutoff for two-way mode. + - **Endogenous regressors / instruments:** standard 2SLS option; the paper repeatedly emphasises that acreg supports IV/2SLS as a key contribution beyond Conley (1999) (which is OLS-only). + - **Outside instruments:** "We also allow users to specify outside instruments, a requirement that is very important for applied papers but that seems overlooked or not discussed in the more theory-driven spatial econometrics literature." (p. 4) + +**Numerical conventions critical for parity:** +- **No DOF correction in the paper.** The paper writes the meat as a plain double sum; no leading factor of `n/(n-k)` or `(G-1)/G`. Implementers MUST verify whether `acreg`'s Stata source applies any such factor; this is the most likely source of a numerical-parity break. +- **Distance unit convention.** Paper's simulations use kilometres throughout (56, 82, 117, 168, 242, 327, 478 km cutoffs in Section 3.1.3). The cutoff parameter `h` is interpreted in the SAME unit as the bilateral-distance matrix; if `acreg` ships its own great-circle helper, that helper's earth-radius constant must match (typical 6371 km vs 6378.137 km vs 6371.009 km can drift by ~0.1%). +- **Hadamard product semantics.** `S * (e e')` is elementwise. Implementers should beware of off-by-one in the time loop: when both `i = j` and `t = s`, the term is `e_{it}^2 * x_{it} x_{it}' * 1` (since self-links are 1); this matches White (1980) HC0 along the diagonal. There is NO HC1, HC2, or HC3 adjustment in the paper. +- **2SLS residuals** are formed using ORIGINAL regressors `X`, not projected `X_hat`: `u = y - X b_2SLS` (page 7). This is the standard convention; mis-applying it (using `X_hat`) would drive a large parity gap. +- **Symmetry of `S`.** The paper does not formally restrict `S` to be symmetric, but the meat formula `X'(S * ee')X` is well-defined either way. For typical spatial cutoffs, `S` is symmetric. For directed networks (e.g., citation graphs), `S` may be asymmetric; verify acreg's behaviour. +- **Diagonal entries of `S`.** Self-links are 1 (Section 2, page 5: "the main diagonal contains ones"). This is essential - dropping the diagonal would zero out the HC0 contribution. +- **Numerical stability.** The double sum has O(n^2 * T^2) terms; the paper says nothing about tree-based acceleration. acreg is presumed dense. + +**Requirements checklist:** +- [ ] Implement OLS sandwich `VCV_hat(b_OLS) = (X'X)^{-1} X' (S * ee') X (X'X)^{-1}`. +- [ ] Implement 2SLS sandwich `VCV_hat(b_2SLS) = (X_hat' X_hat)^{-1} X_hat' (S * uu') X_hat (X_hat' X_hat)^{-1}`. +- [ ] Default kernel: uniform indicator `1{d <= h}`. Optional: Bartlett `max(0, 1 - d/h)`. +- [ ] Required input: cutoff `h` (scalar, in units of the distance metric). +- [ ] Default distance metric: great-circle (haversine) on lat/lon. Optional: user-supplied bilateral-distance matrix of any metric. +- [ ] Optional time dimension with a separate temporal cutoff; product kernel `K_space * K_time`. +- [ ] Self-links: `s_{itit} = 1` always. +- [ ] No DOF rescaling in the base formula (verify against acreg). +- [ ] Reduces to HC0 when `h = 0` and no ties at distance 0 (i.e., `S = I`). Verify against acreg. +- [ ] Reduces to cluster-robust when `S = block-diagonal indicator(same cluster)`. Verify against acreg. + +--- + +## Implementation Notes + +### Data Structure Requirements + +- **Spatial:** lat/lon coordinates per observation, OR a user-supplied bilateral-distance matrix (n x n), OR an adjacency / cluster-membership matrix. +- **Temporal:** panel time identifier (for the two-way mode). The paper allows `S` to vary over time (i.e., `s_{itjs}` may differ from `s_{itjt}` for the same pair `(i, j)`); this is the key flexibility beyond Conley (1999). +- **Network:** any object encoding pairwise relatedness (coauthorship, ethnicity, language - see Section 1, p. 3-4 motivation). + +### Computational Considerations + +- The paper does NOT discuss complexity. The double sum in the meat is O(n^2 * T^2) terms. For n=3,141 counties and T=1 (cross-section), ~10 million pairs; tractable. For panel datasets with n=10^4 and T=20, the naive computation is 4*10^10 pairs - impractical without a sparse path. +- Phase-2 of diff-diff plans a k-d-tree fast path. The paper offers NO algorithmic details for this; implementers must provide their own sparse pruning (e.g., spatial range query for `dist_ij <= h`). +- Memory: `S` is n*T x n*T but in the uniform/Bartlett cutoff case is sparse; the meat can be computed without materialising `S`. + +### Tuning Parameters + +| Parameter | Type | Default | Selection Method | +|----------------|----------------|----------------------------|------------------| +| `cutoff` (h) | float (km) | NONE (REQUIRED) | User-chosen; paper recommends sensitivity analysis over multiple cutoffs and selecting the cutoff that yields the LARGEST SE for the variable of interest (p. 16) | +| `kernel` | enum {uniform, bartlett} | `uniform` (per simulations) | User-specified; uniform is the simulation default. Bartlett gives smoother decay | +| `distance_metric` | enum {haversine_km, euclidean, custom} | `haversine_km` for lat/lon | User-specified | +| `time_cutoff` | int (periods) | NONE (defaults to no time dependence) | User-chosen for panel mode | +| `time_kernel` | enum {uniform, bartlett} | matches `kernel` | User-specified | +| `dependence_matrix` | n x n array | NONE (auto-built from coords + cutoff) | Override path for custom topology (e.g., adjacency, network) | + +### Relation to Existing diff-diff Estimators + +- **Phase 1 parity target:** `vcov_method="conley"` on TWFE must match acreg to <=1e-6 on at least 2-3 fixtures. The `coords=("lat","lon")` and `cutoff_km=` parameters map directly onto acreg's lat/lon + cutoff inputs. +- **Reduces to HC0** when the cutoff is small enough that `S = I` (no neighbour pairs). The paper does not state this explicitly, but the meat formula collapses to `X' diag(e^2) X` in that case, which is HC0 (White 1980, equation referenced page 4). +- **Reduces to one-way clustering** when `S = block-diagonal indicator(same cluster)` (see Section 2, p. 6: Cameron et al. 2011 "can be embedded in this framework"). For multiway clustering, the paper says (page 6): "Multiway clustering assumes a particular *regularity condition* in the clustering structure ... However, in many real-life settings, this particular clustering structure may not hold." The acreg estimator is more flexible and the reduction to multiway clustering is approximate (binary `S` with the union-of-clusters structure). +- **Cluster + spatial joint mode:** The paper does NOT formally combine cluster-robust with spatial-HAC. However, since `S` is arbitrary, one can construct `S` as the elementwise OR of the cluster-indicator matrix and the spatial-cutoff matrix; this gives a joint estimator. acreg likely exposes both options - verify. + +### Parity Test Plan + +The paper's empirical fixtures (Section 3.1, page 7-15) are the natural acreg-parity targets: + +- **Spatial (Section 3.1):** N=3,141 US counties, NHGIS 2000 data; `Y_c = log median earnings 2000`. `Policy_c` = randomly drawn binary placebo shock (top quartile of a normal random variable). Spatially correlated version: cutoff 56 km (5 counties/cluster average), Bartlett decay across cluster members (Footnote 9, p. 12). Section 3.1 reports null-rejection rates for several estimators (Table 1: 5.5% for acreg OLS, 5.3% for acreg 2SLS, both close to nominal 5%). +- **Section 3.1.3 sensitivity grid:** cutoffs in {56, 82, 117, 168, 242, 327, 478} km; null-rejection rates from {10.5%, 9.1%, 7.5%, 5.9%, 6.3%, 7.3%, 9.1%}. The cell (cutoff=168 km, OLS, single treatment) gives 5.9% - this is a natural acreg-fixture target since it is the "true threshold" of the DGP. +- **Network (Section 3.2):** top 50 IDEAS RePEc authors by coauthor count; Y = log citations; covariates = log articles, gender, age, age^2; productivity-shock placebo with first-degree coauthor decay. With sample size 1000, acreg null-rejection is 5.5% OLS / 6.2% 2SLS. + +acreg invocations to replicate (illustrative; verify exact syntax against the package): + +- `acreg log_med_earnings policy_sc {controls}, spatial latitude(lat) longitude(lon) dist(168)` for the single-treatment N=3141 fixture, Table 1 row (7). +- `acreg log_med_earnings policy_sc_end {controls} (policy_end = policy), spatial latitude(lat) longitude(lon) dist(168)` for the 2SLS Table 1 row (8). +- A network fixture using a coauthorship adjacency matrix (acreg syntax for network input not specified in the paper text; consult package docs). + +For each fixture, the parity test should compare: +- Coefficient estimates (must be IDENTICAL since they are plain OLS / 2SLS). +- The full VCV matrix (must agree to <=1e-6 in elementwise abs / rel). +- Standard errors (sqrt of diagonal) - same tolerance. +- t-statistics and p-values (downstream checks). + +A reduction-to-HC0 fixture (cutoff=0 with no zero-distance pairs) would validate the diagonal-only special case; a reduction-to-cluster fixture (using a block-diagonal `S`) would validate the cluster-robust-equivalence claim. + +--- + +## Gaps and Uncertainties + +**1. acreg's exact default kernel and option syntax.** +The paper text tells us the simulations primarily use a UNIFORM (binary cutoff) kernel and that Bartlett is mentioned as "comparable" (Footnote 5, p. 8) and is used in the Kelly-2019 replication (Footnote 9, p. 12). The Bartlett formula `1 - dist/distcut` for `dist <= distcut` is given verbatim. But the paper does NOT specify which kernel is the *default option* in `acreg`, nor the exact option name. Implementers must consult the acreg Stata source (https://acregstata.weebly.com/) or its `.ado` / `.sthlp` files. Most likely option names: `bartlett`, `uniform`, or `kernel(...)`. + +**2. Degrees-of-freedom / small-sample correction.** +The paper writes the variance estimator with no leading scalar - i.e., the meat is the bare double sum and the bread is `(X'X)^{-1}`. There is NO mention of: +- `(n - 1) / (n - k)` (HC1-style correction) +- `(G - 1) / G` (cluster-robust correction) +- `T_eff` (effective DOF for the time dimension) +- Bell-McCaffrey adjustment + +`acreg` may or may not apply such a factor. For diff-diff Phase 1 parity, this is the SINGLE most likely break point. Recommend running a 1-fixture test with deliberate, known leading-factor candidates (1, n/(n-k), (n-1)/(n-k), G/(G-1)) to identify which factor acreg uses. + +**3. Distance metric internals (haversine vs Vincenty).** +"Spatial distance" is referred to throughout but the great-circle formula and earth-radius constant are NOT specified. Different defaults: +- Stata `geodist` ado: 6378.137 km (WGS-84 equatorial radius), Vincenty by default. +- Stata `geonear`: 6371 km, haversine. +- diff-diff Phase 1 plan: 6371 km haversine. +A 0.1% radius difference compounds to ~0.1% SE drift, which would break <=1e-6 parity. Verify acreg's specific distance helper before pinning the constant. + +**4. Two-way (space x time) kernel structure.** +The paper sanctions arbitrary `S`, including space-time interactions, but does NOT prescribe a specific two-way construction. acreg's two-way mode (Phase 2 reference) is implementation-defined. Most likely: a product kernel `K_space(d_ij / h_s) * K_time(|t-s| / h_t)`. Other candidates: max-norm `K(max(d/h_s, t/h_t))`, sum kernel `K_s + K_t`, or Driscoll-Kraay-style time-block kernel. Verify against acreg. + +**5. Treatment of fixed effects.** +The paper's only statement (Section 2, page 5) is that `X` "could include a long list of dummies for each unit". No discussion of: +- Within-transformation / partialling-out (FW theorem) +- Singleton observations (clusters of size 1 inside the cutoff) +- Singleton dropping +- Absorbed FEs with > k columns where the user expects `(X'X)^{-1}` to be regularised + +For diff-diff, the Phase 1 plan is to apply Conley to the partialled-out residuals from a within transformation. This must produce identical SEs to dummy expansion + acreg only if the number of dummies and the residualisation are bit-identical. Confirm with parity fixture using a small panel. + +**6. Singleton handling.** +Not discussed. acreg may silently include singletons (each contributes only the diagonal HC0 term) or may warn / drop. diff-diff convention so far is to warn-and-keep; align with whatever acreg does. + +**7. Zero-variance / near-collinear regressors.** +Not discussed. acreg presumably inherits Stata's collinearity-drop rule (`_rmcoll`); diff-diff will need to mirror this for parity (or have a clearly-documented deviation). + +**8. Multiplicative scaling of the variance.** +There is one footnote (Footnote 7, p. 11) on input requirements: "Our estimator requires as input either a distance cutoff value or an adjacency matrix showing which observations are within the same spatial clusters." No leading scalar is mentioned. But Stata variance commands often output `e(V) * c(N) / (c(N) - c(rank))` as the default; acreg may or may not follow this. Identifiable only by running acreg and checking. + +**9. Reference equations are not numbered.** +The paper does NOT number the OLS or 2SLS variance formulas. Equations 1-12 are the application DGPs. Implementers should cite "Section 2 OLS sandwich" or "Section 2 2SLS sandwich" rather than an equation number. The `X' (S * (ee')) X` form on page 5 and the `X_hat' (S * (uu')) X_hat` form on page 7 are the key references. + +**10. No formal asymptotic theory.** +The paper notes that consistency follows from White (1980) and Cameron et al. (2011) under regularity conditions, but does NOT prove a theorem of its own. Critical values are normal `1.96` throughout. There is no t-distribution adjustment, no Hausdorff fix, no Imbens-Kolesar Bell-McCaffrey. This is consistent with the paper being a "proof of concept" framing (abstract: "As a proof of concept, we conduct Monte Carlo simulations"). + +**11. Comparison to Conley (1999).** +The paper's stated contributions vs Conley (1999) (page 2) are: (a) extending to 2SLS / IV with outside instruments, (b) allowing "the metric in a flexible way: In addition to spatial distance, our approach can deal with travel distance, travel costs, contiguity and any concept of distance in a network". Conley (1999) is OLS-only and uses only spatial Euclidean / geographic distance with a fixed kernel. The acreg variance formula reduces to Conley's HAC when (i) cross-section only (T=1), (ii) `S` is built from spatial Euclidean distance with a uniform or Bartlett kernel, and (iii) the user-supplied bilateral-distance matrix is the geographic distance. + +**12. Replication artifacts.** +The paper does not ship a public replication archive with the IZA DP; the website is https://acregstata.weebly.com/. Authors thank Drukker (the Stata core team), implying the package is well-vetted but not necessarily archived. Implementers should treat the live `.ado` file as the canonical reference and pin a specific version SHA / date in the parity-fixture metadata. + +**13. Practical guidance not formalised.** +Section 3.1.3 (p. 14-16) is labelled "A Practitioner's Guide" but offers heuristic observations rather than testable propositions: +- "Spatial correlation has to be present in BOTH the outcome variable AND the variable of interest for an increase in the likelihood of Type 1 error" (p. 14). This contradicts Kelly (2019) (which the paper cites and pushes back on) - if true, this is a USEFUL practical screen but it is NOT a formal theorem. +- "Select as the baseline the threshold that provides the largest standard errors" (p. 16). A pragmatic heuristic, not a justified rule. +- "Researchers, as a healthy practice, [should] be transparent about their choice of baseline distance threshold and report the robustness of their findings to correcting the standard errors in their models using a wide range of distance thresholds" (p. 16). Strong endorsement of sensitivity-analysis output - diff-diff should make multi-cutoff sweeps easy. + +**14. Appendix A: Kelly (2019) replication.** +Figure A.1 (page 32) replicates Kelly (2019)'s "fake spatial correlation" Monte Carlo with the IZA DGP. Conventional robust SEs reject ~40-45%; acreg reduces this to ~10-15% (still above nominal but a substantial improvement). This artifact is useful as a stress-test fixture but not a tight parity benchmark. + +**15. Network kernels (acreg with adjacency matrix).** +The paper accepts an adjacency matrix as input (Footnote 7, p. 11). For coauthorship the network is Section 3.2's first-degree-neighbour shock structure (Footnote 11, p. 17): "We adopt a setting where shocks are correlated in coauthor neighborhoods of degree 1. Larger neighborhoods and decay in shocks can be accommodated in our estimator as well." How acreg accommodates "decay in shocks" via the adjacency input is NOT spelled out - presumably the user can supply a weighted adjacency matrix with entries in [0, 1] rather than binary. Phase 3 of diff-diff will need to clarify this. diff --git a/docs/methodology/papers/conley-1999-review.md b/docs/methodology/papers/conley-1999-review.md new file mode 100644 index 00000000..abd49c49 --- /dev/null +++ b/docs/methodology/papers/conley-1999-review.md @@ -0,0 +1,285 @@ +# Paper Review: GMM Estimation with Cross Sectional Dependence + +**Authors:** Timothy G. Conley +**Citation:** Conley, T. G. (1999). GMM Estimation with Cross Sectional Dependence. *Journal of Econometrics*, 92(1), 1-45. DOI: 10.1016/S0304-4076(98)00084-0 +**PDF reviewed:** papers/1-s2.0-S0304407698000840-main.pdf +**Review date:** 2026-05-09 + +--- + +## Methodology Registry Entry + +*Formatted to match docs/methodology/REGISTRY.md structure. Heading levels and labels align with existing entries - copy the `## ConleySpatialHAC` section into the appropriate category in the registry.* + +## ConleySpatialHAC + +**Primary source:** Conley, T. G. (1999). GMM Estimation with Cross Sectional Dependence. *Journal of Econometrics*, 92(1), 1-45. + +**Scope:** Cross-sectional spatial heteroskedasticity-and-autocorrelation-consistent (HAC) covariance matrix estimator for GMM/OLS when observations are realizations of a random field on a Euclidean space (typically R^2). Each observation `i` is associated with a location `s_i` and the dependence between `X_{s_i}, X_{s_j}` is allowed to be a (decreasing) function of the "economic distance" `d_{ij}`. The estimator is the spatial analog of Newey-West (1987) / Andrews (1991): a kernel-weighted sum of pairwise outer products, truncated beyond a user-supplied bandwidth. Conley distinguishes two settings: (i) Section 3, locations on an integer lattice with exactly observed distances; (ii) Section 4, real-valued locations with bounded measurement error in distances. For diff-diff Phase 1, only the OLS-just-identified specialization (Section 5 empirical example) is needed. + +**Why this paper is foundational for diff-diff:** every spatial-HAC SE used in modern applied DiD work traces back to this paper's three core ingredients — (a) a kernel-weighted pairwise meat with bandwidth `h`, (b) consistency under a polynomially-decaying mixing condition with bandwidth growth rate `o(n^{1/3})`, and (c) tolerance to bounded measurement error in distances (so applied users can use approximate haversine on lat/lon). Stata's `acreg` (Colella et al. 2019), `conleyreg` (Düsterhöft 2021), and the Hsiang (2010) MATLAB code all implement specializations of Equation 3.13 / 4.2. + +**Key implementation requirements:** + +*Assumption checks / warnings:* +- Random-field framing: each observation `i` is associated with a position `s_i` in R^2 (or general R^l), and `X_{s_i}` is a stationary random field with mixing coefficients that decay in inter-point distance (Section 2 page 5; Section 3.1.1 page 6). +- Sample region `Λ_τ` must be a sequence of finite closed convex regions that grow uniformly in at least two non-opposing directions (Assumption A1, page 8). Footnote 5 (page 4) flags the importance of two-direction growth: with growth along one direction only the data could be handled like a time series. +- Mixing condition on the random field: `α_{k,l}(n)` is the standard `σ`-algebra mixing coefficient with index sets of size `≤ k`, `≤ l` and minimum Euclidean distance `≥ n` (Equation 3.1, page 7). The required tail decay is `α_{1,∞}(m)^{δ/(2+δ)} = o(m^{-2})` and `Σ_m m·α_{k,l}(m) < ∞` for `k+l ≤ 4` (conditions B1-B3 page 9). Practical interpretation: the autocovariance function must die off polynomially in distance. +- Moment condition: for some `δ > 0`, `E[ ‖g(X_s; β)‖^{2+δ} ] < ∞` (B3, page 9). For OLS this becomes `E[ ‖x ε‖^{2+δ} ] < ∞`. +- Sampling/directing process `W_s` (Section 3.1.2 page 6): `W_s` is independent of the underlying random field `X_s`, stationary, mixing, with `E W_s = λ` and zero otherwise. This represents the irregular spacing of agents on the lattice (`H ∩ Λ_τ`); it can also accommodate cluster sampling so long as it is independent of `X`. Section 6 (page 22) flags this independence as a substantive restriction; "endogenous locations" violate it. +- Real-valued locations (Section 4): require a minimum economic distance `d_0` between distinct agents (Assumption D4 "hard core" point process, page 14) and bounded mixing measurement error in distances (Assumptions E1-E2, page 15). +- Warn (do NOT fit silently) when a user supplies `conley_cutoff_km = 0` (or `h = 0`): the estimator collapses to White HC0 (with no diagonal-only adjustment for the `j=k=0` double-counting subtraction, see Equation 3.13). +- Warn when the user-supplied cutoff exceeds roughly half the diameter of the sample region. Conley does not state a hard rule but the empirical example (Section 5, page 21) sweeps from 60-90 (truncated window) and 75-225 (Bartlett window) on a sample of 95 countries with distance metric centered around 50-150 between economically close pairs (Table 1, page 20). + +*Variance estimator (Equation 3.13 page 12, as implemented for OLS):* + +For the GMM moment process `Y_{m,n}(β) := g(X_{s_i}; β)` indexed by lattice coordinates `(m,n)`: + + Ĉ_τ = (1/T_τ) · Σ_{j=0}^{L_M} Σ_{k=0}^{L_N} Σ_{m=j+1}^{M} Σ_{n=k+1}^{N} K_{MN}(j,k) + · [ Y_{m,n}(b_τ) Y_{m-j, n-k}(b_τ)' + Y_{m-j, n-k}(b_τ) Y_{m,n}(b_τ)' ] + - (1/T_τ) · Σ_{m=1}^{M} Σ_{n=1}^{N} Y_{m,n}(b_τ) Y_{m,n}(b_τ)' (3.13) + +The second term subtracts the doubled `j = k = 0` contribution introduced by the symmetrized first sum (page 11 above Equation 3.13). For OLS, `Y_{m,n}(b) := x_i (y_i - x_i' b) = x_i ε̂_i`. The "meat" matrix `Ĉ_τ` is then sandwiched: + + Var̂(β̂) = (X'X)^{-1} · Ĉ_τ · (X'X)^{-1} (just-identified GMM, Section 5 framing) + +This is the spatial analog of the time-series HAC estimator (Newey-West 1987; Andrews 1991): a weighted sum of pairwise outer products with weights `K_{MN}(j,k)` that vanish when the lattice index gap exceeds `L_M` (in the M-direction) or `L_N` (in the N-direction). + +For real-valued locations with possible measurement error in distances (Equation 4.2 page 18): + + C̃_τ = (1/T_τ) · Σ_{j=0}^{L_M} Σ_{k=0}^{L_N} Σ_{m̃=j+1}^{M̃} Σ_{ñ=k+1}^{Ñ} K(j/L_M, k/L_N) + · [ Y_{m̃,ñ}(b_τ) Y_{m̃-j, ñ-k}(b_τ)' + Y_{m̃-j, ñ-k}(b_τ) Y_{m̃,ñ}(b_τ)' ] + - (1/T_τ) · Σ_{m̃=1}^{M̃} Σ_{ñ=1}^{Ñ} Y_{m̃,ñ}(b_τ) Y_{m̃,ñ}(b_τ)' (4.2) + +where `K(·,·)` is a bounded continuous function on `[-1,1] × [-1,1]` with absolutely summable Fourier coefficients and `K(0,0) = 1`. The plane is partitioned into squares of side `d̄ < d_0` and observations are relabeled by their square coordinates `(m̃, ñ)`. (For diff-diff this is the relevant form: locations are real-valued lat/lon, distances are computed pairwise rather than via lattice indexing.) + +In a pairwise pseudo-distance form that is more familiar to applied users (and that diff-diff will implement directly): + + C̃_τ = Σ_{i, j} K(d_{ij} / h) · X_i ε̂_i ε̂_j X_j' + +with `K(0) = 1` (so the `i = j` term contributes `X_i ε̂_i² X_i'`, which equals the White HC0 contribution). This is the form Conley sketches in Section 4.3 (Equation 4.2 plus the "pairwise products at a given distance" remark on page 19) and is the form used by every downstream Stata/R/MATLAB implementation. The exact mapping: with a 2-D Bartlett window `K(j/L_M, k/L_N) = (1 - |j|/L_M)(1 - |k|/L_N)` (Equation 3.14), the kernel is separable in M and N coordinates; in pairwise form it becomes a function of the L_∞ distance scaled by `(L_M, L_N)`. For a single isotropic cutoff `h` and Euclidean (or great-circle) distance, the standard practitioner specialization is `K(d_{ij}/h)`. + +where: +- `d_{ij}` = distance between units `i` and `j` (Conley: economic distance, possibly distorted by bounded mixing measurement error) +- `K(·)` = kernel function +- `h` = bandwidth (cutoff). Conley uses `(L_M, L_N)` for direction-specific bandwidths +- `X_i ε̂_i` = OLS score for observation `i` +- `T_τ` = number of observations in sample region `Λ_τ` (Conley uses `T_τ` for sample size, NOT `n`) + +*Kernel functions (Section 3.3.1 pages 11-12, Section 4.3 page 18):* + +The class of kernels Conley considers must satisfy (conditions C1, page 12; restated p.18 for the real-valued case): +- Uniformly bounded: `|K_{MN}(j,k)| ≤ const` +- Convergence to one at the origin: `K_{MN}(j,k) → 1` as `τ → ∞` for each fixed `(j,k)`. In the scaled form `K(j/L_M, k/L_N)`, this is `K(0,0) = 1`. +- Bandwidth growth: `L_M = o(M^{1/3})` and `L_N = o(N^{1/3})` (condition C1, page 12). This is the spatial analog of Andrews' `1/4` rate restriction in the time series case. +- For PSD point estimates (Section 3.3.1, p.12): the weights must correspond to a non-negative spectral window, equivalently the Fourier transform of `K(·,·)` must be non-negative. Bartlett, Parzen and similar windows from time-series HAC literature (see Priestley 1981) inherit this property. + +Specific kernels named in the paper: + +- **Truncated (uniform) window** (page 11, cited as White 1984's truncated estimator): + + K_{MN}(j,k) = 1{|j| < L_M, |k| < L_N} + + Easy to construct from imprecise distance information ("constant over the distances within a category"), but its spectral window (the Fourier transform) is "negative in some regions" (footnote 11, page 11) so it is NOT generally PSD. + +- **Bartlett window (Newey-West, 2-D product form)** (Equation 3.14, page 12): + + K_{MN}(j,k) = (1 - |j|/L_M)(1 - |k|/L_N) · 1{|j| < L_M, |k| < L_N} + + Its Fourier transform is non-negative (page 12), so `Ĉ_τ` is PSD with this weighting. The empirical example (Section 5, page 21) uses this kernel with truncation points 75-225 alongside a truncated window with cutoffs 60-90. + +- **General Bartlett-class** (sketched, not formalized): "many others (see, e.g., Priestley 1981) for other examples" (page 18). + +- **Pairwise (1-D) Bartlett (the form diff-diff will implement)** is not explicitly written in the paper. The 2-D `(1 - |j|/L_M)(1 - |k|/L_N)` is the only PSD kernel formula in the paper. The 1-D form `K(u) = max(0, 1 - |u|)` is the standard practitioner specialization (Hsiang 2010; Colella et al. 2019) and reduces to Conley's 2-D Bartlett along the M-axis when `L_N → ∞` (or vice versa). See "Gaps" section. + +Bandwidth selection (Section 3.3, Section 4.3): +- **Rate condition (consistency)**: `L_M = o(M^{1/3})`, `L_N = o(N^{1/3})` (C1, page 12). Bandwidth must grow with sample size but slower than the cube root. +- **No plug-in / cross-validation rule given.** Conley does NOT supply a data-driven bandwidth selector. The empirical example (Section 5 page 21) reports estimates over a coarse grid and notes "the qualitative results discussed below are robust to changes in this cutoff value and/or window specification." This is the standard sensitivity check approach that downstream packages (acreg, conleyreg) inherit. +- **Practical guidance from the empirical example (page 21)**: "Some idea of the relative magnitude of this truncation value of 75 is afforded by Table 1." Table 1 (page 20) shows the economic distances: USA-Mexico = 32, UK-France = 53, USA-Japan = 119, USA-Algeria = 141, USA-Pakistan = 218. So a cutoff of 75 includes near neighbors (USA-Mexico, UK-France) but excludes distant pairs. The implementation should expose `conley_cutoff_km` directly (no auto-selection) and document Conley's robustness-grid recommendation. + +*Edge cases (compiled across paper):* +- **`d_{ij} = 0` for `i ≠ j` (multiple observations at same coordinates)**: page 19 - "If measurements of economic distances do not locate agents in distinct locations, this strategy to get PSD estimates cannot always be implemented. However, `C̃_τ` will still remain consistent as long as the measurement errors satisfy conditions E1 and E2." Page 18 (eq 4.2 footnote): when there are multiple observations with the same index (large `d̄` or non-distinct distance measurements), the bracketed term `[Y_{m̃,ñ}(b_τ)_1 Y_{m̃-j, ñ-k}(b_τ)_1' + Y_{m̃-j, ñ-k}(b_τ)_1 Y_{m̃,ñ}(b_τ)_1']` gets expanded to all the cross products at that distance. Practical handling: the implementation should sum all `X_i ε̂_i ε̂_j X_j'` cross terms at zero distance the same way as nonzero distance. +- **No spatial dependence (`α_{k,l}(m) = 0` for all `m > 0`)**: Conley does NOT explicitly note this case, but inspection of Equation 3.13 with `K(j,k) → 1{j=k=0}` (i.e., cutoff `h = 0` with no ties) reduces to `(1/T_τ) Σ_m Σ_n Y_{m,n} Y_{m,n}' = (1/n) Σ_i x_i ε̂_i² x_i'`, which is White (1980) HC0 (the meat matrix). So at `h = 0` and no spatial ties Conley reduces to HC0 (page 19 names HC0 as "Eicker, 1967; Huber, 1967; White, 1980"). +- **Ties in distance (multiple cluster-like structure)**: when `K(d_{ij}/h)` is the uniform indicator and `d_{ij}` is `0` for units in the same group and `> h` otherwise, Conley reduces to cluster-robust (CR0) - specifically to the score-outer-product cluster sum without the small-sample `G/(G-1)` finite-sample correction. This reduction is not explicit in the paper but is implied by Equation 3.13 with `K = 1{d_{ij} = 0}`. +- **Imperfect distance measurement** (Section 4): bounded measurement error in `d_{ij}` is OK as long as E1-E2 hold; the estimator stays consistent (Proposition 5, page 19). This justifies practitioner use of approximate haversine distance from lat/lon. +- **PSD failure with truncated kernel**: "This estimator will not always be PSD, unfortunately, since the spectral window corresponding to the step function space domain window (its Fourier transform) will be negative in some regions" (footnote 11, page 11). Implementation should warn (or fall back to Bartlett) if the user requests `conley_kernel="uniform"` and the resulting matrix has negative eigenvalues. +- **Endogenous locations**: Section 6 (page 22) flags `W_s ⊥ X_s` (independence of sampling/directing process from random field) as a substantive restriction. "Allowing for endogenous locations is likely to require an explicit model of how locations and variables in the moment conditions are jointly determined." DiD spillover applications where treatment assignment is correlated with location violate this; the variance estimator may be inconsistent. +- **Sample region "very nearly a line"** (page 28 in proof): the count of "far apart" terms in the variance bound is `MN - 4 · min(L_M, L_N) - 1`, which is the worst case. Two-direction growth of `Λ_τ` (A1) is required to control these boundary terms. + +*GMM moment condition and estimator (Section 3.2 pages 7-8):* + +Population moment condition (Equation 3.2): + + E[ g(X_s; β) ] = 0 + +where `β` is `k × 1` and `g: R^l × B → R^v` with `v ≥ k`. The `v - k` overidentifying restrictions are exploited via the GMM objective (Equation 3.3): + + J(b)_τ = [ (1/T_τ) Σ_{i=1}^{T_τ} g(X_{s_i}; b) ]' Ω_τ [ (1/T_τ) Σ_{i=1}^{T_τ} g(X_{s_i}; b) ] (3.3) + +minimized over `b ∈ B`. For OLS (Section 5 page 19), `g(x_i, y_i; β) = x_i (y_i - x_i' β)`, `v = k`, `Ω_τ = I` (just-identified), and the minimizer is the OLS estimator. + +*Consistency conditions (Proposition 1 page 8):* + +Under conditions A1-A3, `b_τ → β` in probability as `τ → ∞`: +- **A1** (sample region): `Λ_τ` grows uniformly in two non-opposing directions as `τ → ∞`. (Page 8.) +- **A2** (weighting matrix): `Ω_τ → Ω` in probability where `Ω` is positive-definite. (Page 8.) +- **A3** (regularity): `X_s` and `W_s` are mixing; `g(·; b)` is Borel measurable for all `b ∈ B`; `g(X; ·)` is continuous on `B` for all `X ∈ R^l`; first-moment continuous on `B`. (Page 8 with footnote 7 defining first-moment continuity.) + +*Asymptotic distribution (Proposition 2 page 10, Equation 3.11):* + + sqrt(T_τ) · (b_τ - β) ⇒ N(0, D_0' Λ^{-1} V D_0) as τ → ∞ (3.11) + +where `D_0` and `V` are the GMM asymptotic-variance ingredients: + + D_0 = { E[Dg(X_s; β)]' Ω E[Dg(X_s; β)] }^{-1} E[Dg(X_s; β)]' Ω + V = Σ_{s ∈ Z^2} cov(Y_0(β), Y_s(β)) + +(page 10 Equation 3.12: "the asymptotic covariance matrix of the moment conditions that needs to be estimated is `C ≡ λ^{-1} V`.") `λ = E W_s` is the limiting fraction of lattice points sampled, accounting for the irregular spacing. + +For OLS just-identified (Section 5 page 19): `β̂ = (X'X)^{-1} X'y`, score = `x_i ε_i`, `Dg = -x_i x_i'`, so + + Var̂(β̂) = (X'X / T_τ)^{-1} · λ^{-1} V̂ · (X'X / T_τ)^{-1} / T_τ + = (X'X)^{-1} · Ĉ_τ · (X'X)^{-1} + +with `Ĉ_τ` from Equation 3.13. (The `λ` factor is absorbed by the lattice indexing; in the pairwise form `Σ_{i,j} K(d_{ij}/h) · ...` it does not appear.) + +*Consistency of the covariance matrix estimator (Proposition 3 page 12):* + +Under conditions A1-A3, B1-B5, and C1-C3: + + Ĉ_τ → C in probability as τ → ∞ + +where the relevant conditions are: +- **B1**: `Σ_{m=1}^∞ m · α_{k,l}(m) < ∞` for `k + l ≤ 4` (page 9). +- **B2**: `α_{1,∞}(m) = o(m^{-2})` (page 9). +- **B3**: For some `δ > 0`, `E[‖g(X_s; β)‖^{2+δ}] < ∞` and `Σ_m m · α_{1,1}(m)^{δ/(2+δ)} < ∞` (page 9). +- **B4**: `Dg(X_s; b)` is Borel measurable for all `b ∈ B`, continuous on `B` for all `X ∈ R^l`, first-moment continuous; `E Dg(X_s; β)` exists and is full rank (page 10, condition stated atop). +- **B5**: `V = Σ_{s ∈ Z^2} cov(Y_0(β), Y_s(β))` is non-singular (page 10). +- **C1**: `K_{MN}(j,k)` are uniformly bounded; `K_{MN}(j,k) → 1` as `τ → ∞` (M, N → ∞); `L_M = o(M^{1/3})` and `L_N = o(N^{1/3})` (page 12). +- **C2**: For some `δ > 0`, `E[‖g(X_s; β)‖^{4+δ}] < ∞` and `α_{∞,∞}(m)^{δ/(2+δ)} = o(m^{-4})` for both `X_s` and `W_s` (page 12). Note this is the **strengthened** moment / mixing condition relative to B2-B3. +- **C3**: `E sup_B ‖Y_{m,n}(b)‖² < ∞` and `E sup_B ‖(∂/∂b)[Y_{m,n}(b)]‖² < ∞` (page 12). This bounds the score and its gradient uniformly in `b`, so plug-in `b_τ → β` does not destroy consistency. + +For real-valued locations with measurement error (Proposition 5 page 19): under A1-A4 (where A4 is uniform growth in two non-opposing directions for the constructed lattice region `Λ_τ*`), B1-B5, C1-C3, D1-D6 (point process assumptions, page 14), E1-E2 (bounded mixing measurement error, page 15), `C̃_τ → C` in probability. + +The point-process assumptions D1-D6 (page 14) are: +- **D1**: `X(s)` and the point process `Φ` are independent. +- **D2**: `Φ` is simple (zero or one points at each location w.p.1). +- **D3**: `0 < EΦ(A) < ∞` for all `A` with finite Lebesgue measure. +- **D4**: `Φ` is "hard-core" — no points within `d_0` of each other w.p.1. +- **D5**: `Φ` is stationary. +- **D6**: `Φ` is mixing, with a rate that links to the random field's mixing via a relabeled lattice process `ψ_p = Φ(A_p)`. + +*Empirical example numerics (Section 5, Table 2 page 21):* + +Cross-country growth regression on 95 countries (Barro 1991 specification): `growth(1960-85) ~ GDP60 + SEC60 + PRIM60 + g^c/Y + REV + ASSASS + PPI60DEV + Africa + LatinAmerica`. Economic distance = transportation cost between countries (Conley & Ligon 1995). Standard errors computed three ways: + +| Variable | Point estimate | IID S.E. | HET S.E. | Spatial S.E. | +|-------------|----------------|----------|----------|--------------| +| Constant | 0.0333 | 0.0063 | 0.0070 | 0.0053 | +| GDP60 | -0.0066 | 0.0010 | 0.0009 | 0.0008 | +| SEC60 | 0.0124 | 0.0106 | 0.0077 | **0.0019**\*\* | +| PRIM60 | 0.0274 | 0.0060 | 0.0060 | 0.0065 | +| g^c/Y | -0.0959 | 0.0260 | 0.0269 | 0.0359 | +| REV | -0.0208 | 0.0085 | 0.0081 | 0.0072 | +| ASSASS | -0.0024 | 0.0029 | 0.0018 | **0.0009**\*\* | +| PPI60DEV | -0.0139 | 0.0051 | 0.0048 | **0.0064**\* | +| Africa | -0.0107 | 0.0038 | 0.0041 | 0.0046 | +| Latin America | -0.0137 | 0.0033 | 0.0032 | 0.0028 | + +Spatial SEs use a truncated window with cutoff 75 (countries less than 75 units apart are nonzero, more distant are zero). Sweep range: 60-90 (truncated) and 75-225 (Bartlett). Six of nine spatial SEs are SMALLER than IID/HET counterparts. Conley emphasizes (page 21): "spatial dependence does not imply that standard errors will rise. ... asymptotic variances may be smaller with spatially dependent data, just as asymptotic variances can be lower for dependent time series averages than independent series averages." This is a non-obvious finding for practitioners and worth surfacing in diff-diff documentation/tutorials. + +*Algorithm (pairwise form for OLS, the diff-diff implementation target):* +1. Fit OLS: `β̂ = (X'X)^{-1} X'y`, residuals `ε̂ = y - Xβ̂`. +2. For each pair `(i, j)` of observations: + a. Compute `d_{ij}` via the configured metric (haversine for lat/lon in km; euclidean for projected coords; user callable allowed). + b. Compute kernel weight `w_{ij} = K(d_{ij} / h)` where `h = conley_cutoff_km`. + c. Skip pair if `w_{ij} == 0` (sparse fast path: pairs with `d_{ij} ≥ h` for kernels with compact support). +3. Form the meat: `S = Σ_{i,j} w_{ij} · (x_i ε̂_i)(x_j ε̂_j)'`. Note: with the standard Bartlett or uniform kernel and `K(0) = 1`, the `i = j` term contributes `x_i ε̂_i² x_i'` which is the HC0 diagonal. +4. Form the bread: `B = (X'X)^{-1}`. +5. Sandwich: `Var̂(β̂) = B · S · B`. +6. Optional: project to nearest PSD if eigendecomposition reveals negative eigenvalues (only for non-PSD kernels like uniform/truncated). + +For the lattice form (Equation 3.13), the weighting array `K_{MN}(j,k)` is indexed by lattice gaps, not pairwise distances. diff-diff's pairwise form is mathematically equivalent when the kernel is separable but is the more natural API for irregular real-valued locations. + +**Reference implementation(s):** +- **Stata `acreg`** (Colella, Lalive, Bauer & Thoenig 2019, working paper "Inference with Arbitrary Clustering"): implements the pairwise form with Bartlett or uniform kernel. Cited as the modern Stata-canonical reference for diff-diff Phase 1. +- **Stata `conleyreg`** (Düsterhöft 2021, SSC archive): MATA-coded; supports haversine distance directly. +- **MATLAB `ols_spatial_HAC.m`** (Hsiang 2010, supplementary code for Hsiang 2010 PNAS): the original applied-econ reference implementation used by climate / development economists. +- **R `conleyreg`** (Düsterhöft 2021, CRAN port). +- **No reference R implementation of Conley appears in CRAN as of the paper's 1999 publication.** Modern R alternatives include `lfe::felm` (with `cmethod="cgm2"`, which is cluster-robust not Conley) and standalone Conley scripts circulated by Hsiang and Fetzer. + +The paper itself does NOT distribute code. Conley's Section 5 empirical example is reported numerically in Table 2 (page 21) but the underlying replication routine is not in the paper. + +**Requirements checklist:** +- [ ] Coordinates supplied as two columns (lat, lon) or `(x, y)` projected. +- [ ] Distance metric configured (haversine for lat/lon; euclidean for projected; callable for custom). +- [ ] Cutoff `conley_cutoff_km > 0` (or unitless `conley_cutoff` for euclidean). Document that `h = 0` reduces to HC0. +- [ ] Kernel choice `conley_kernel ∈ {"bartlett", "uniform"}`. Bartlett is PSD by construction; uniform is not in general (warn). +- [ ] Score outer products `x_i ε̂_i` computed identically to HC0 path. +- [ ] Robustness sweep: document that practitioners should report estimates at multiple cutoffs (Conley Section 5 standard). +- [ ] If `conley_kernel="uniform"` and the resulting variance has any negative eigenvalues, warn or fall back to Bartlett. + +--- + +## Implementation Notes + +### Data Structure Requirements +- New required columns when `vcov_method="conley"`: two coordinate columns named via `conley_coords=("lat","lon")`. Both must be finite floats. Reject NaN/Inf (no silent dropping, per `feedback_no_silent_failures`). +- Pre-fit checks: confirm coordinate columns exist and are numeric; confirm `conley_cutoff_km > 0`; confirm `conley_kernel ∈ {"bartlett", "uniform"}` (or callable signature); validate metric callable returns nonnegative scalar for two coordinate vectors. +- For two-way space x time (Phase 2 scope), additional time-key column needed; not in Phase 1. + +### Computational Considerations +- **Dense distance matrix is O(n²)** in both compute and memory. Conley's discussion (Section 4.3) frames this in terms of lattice-square indexing rather than pairwise distance, but the practitioner-canonical pairwise form realizes the full O(n²) cost. +- **Memory**: For `n = 10000` units and `float64`, the dense distance matrix is `8 · 10^8` bytes = 800 MB; the pairwise outer-product accumulator is `O(n² · k)` floats. Phase 1 should warn at `n > 5000` and refuse (or require explicit override) at `n > 50000`. +- **Sparse fast path (Phase 2)**: With Bartlett or uniform kernel, pairs at `d_{ij} ≥ h` contribute zero. A k-d tree (`scipy.spatial.cKDTree.query_ball_tree` with radius `h`) returns only neighbor pairs, reducing complexity to `O(n · k_avg)` where `k_avg` is the average number of neighbors within `h`. This matches the `acreg` "neighbors-only" inner loop. +- **Parallelization**: pairwise sums are embarrassingly parallel by `i`. Rust backend (Phase 2+) can process row-blocks in parallel. +- **PSD projection**: For non-PSD kernels (uniform), eigendecomposition + clamping negative eigenvalues to zero is `O(k³)` where `k` is the regressor count - cheap compared to the meat formation. + +### Tuning Parameters + +| Parameter | Type | Default | Selection Method | +|-----------|------|---------|-----------------| +| `vcov_method` | str | `"hc0"` | Set to `"conley"` to activate. | +| `conley_coords` | tuple of 2 str | `None` | User specifies the two column names for lat/lon (or projected x/y). Required when `vcov_method="conley"`. | +| `conley_cutoff_km` | float | `None` (no default) | User-supplied. Conley does not provide a plug-in selector. Recommend a robustness sweep (3-5 values spanning the relevant economic-distance range). For Phase 1, error if not supplied. | +| `conley_kernel` | str | `"bartlett"` | `"bartlett"` is PSD by construction (Conley Eq 3.14 page 12) and is the practitioner default. `"uniform"` matches Conley's "truncated window" (page 11) but may fail PSD; emit warning. | +| `conley_metric` | str or callable | `"haversine"` | `"haversine"` for lat/lon (km); `"euclidean"` for projected coords (units = whatever the coord units are - so if coords are degrees, cutoff is in degrees); a callable `(coord_i, coord_j) -> float` for custom metrics (e.g., travel time, network distance). | + +### Relation to Existing diff-diff Estimators +- **Composes with `compute_robust_vcov` in `diff_diff/linalg.py`**: Conley is a new value of `vcov_method` alongside `"hc0"`, `"hc1"`, `"cluster"`, `"crv1"`. The bread `(X'X)^{-1}` is unchanged; only the meat formation differs. +- **Reduces to HC0 when `conley_cutoff_km = 0` and no spatial ties**: with `K(d/0) = 1{d ≤ 0}` and all distinct units having `d_{ij} > 0`, the only nonzero terms are `i = j`, recovering the HC0 meat `Σ_i x_i ε̂_i² x_i'`. (Document this reduction; do NOT silently turn `cutoff=0` into HC0 - error out and tell the user to use `vcov_method="hc0"` directly.) +- **Differs from cluster-robust (`crv1`)**: cluster-robust uses a discrete group indicator `g_i = g_j` in place of `K(d_{ij}/h) = 1`. Conley with `conley_kernel="uniform"` and a cutoff that isolates exactly each cluster is the closest analog, but Conley is more general: it accommodates continuous-distance attenuation (Bartlett) and overlapping spatial neighborhoods (a unit can be "near" multiple others without belonging to a single cluster). +- **TWFE compatibility (Phase 1 scope)**: Conley replaces the meat in the standard sandwich. TWFE estimator `(X'X)^{-1} X'y` with absorbed fixed effects produces residuals `ε̂_i`; Conley's pairwise outer product over `(x_i ε̂_i, x_j ε̂_j)` is well-defined regardless of whether the fixed effects were absorbed or dummied. Phase 2 will extend to two-way space x time clustering (Cameron-Gelbach-Miller-style multi-way Conley). +- **Distance metrics**: paper is agnostic - Conley names "transportation costs" (Section 5 page 20), "physical distance" (page 2), "weather correlation" (page 3), "travel time" (page 2). Haversine vs euclidean is an applied-implementation choice not flagged in the paper. diff-diff's `conley_metric="haversine"` is the standard choice for lat/lon and matches Hsiang's MATLAB / Colella et al.'s `acreg`; `"euclidean"` is appropriate for projected coordinates. + +--- + +## Gaps and Uncertainties + +- **No 1-D pairwise kernel formula in the paper.** The paper only writes the 2-D product Bartlett (Equation 3.14, page 12). The applied-econ practitioner form `K(u) = max(0, 1 - |u|)` evaluated at `u = d_{ij}/h` is conventional but not derived in Conley 1999. Implementation should cite Conley 1999 for the framework, but credit the 1-D pairwise specialization to the downstream literature (Hsiang 2010, Colella et al. 2019). Page 18's general statement about `K(·,·)` "bounded continuous on [-1,1] x [-1,1]" with absolutely summable Fourier coefficients is the closest formal authority for kernels other than the explicit Bartlett product. + +- **Bandwidth selection is left open.** Conley's only formal restriction is `L_M, L_N = o((MN)^{1/3})` (page 12). The empirical example (page 21) sweeps the cutoff over a coarse grid and reports robustness. There is no Andrews (1991) plug-in selector or cross-validation procedure in the paper. The implementing engineer must either expose `conley_cutoff_km` as a required user-supplied parameter (Phase 1 plan) or implement a practitioner heuristic separately (e.g., median nearest-neighbor distance times a multiplier) and document it as a diff-diff convenience, not a Conley-1999 result. + +- **Haversine vs euclidean.** Conley works in R^2 with euclidean distance throughout (page 4 "Euclidean space, taken for the sake of exposition to be R^2"). The applied literature on country / county / household data routinely uses haversine on (lat, lon) - this is implicit in Hsiang (2010) and Colella et al. (2019). The paper does not address whether haversine satisfies the regularity conditions, but since haversine is just euclidean on the sphere and the mixing conditions are stated in terms of distance decay, the substantive content carries over for cutoffs small relative to the Earth's radius. Document this as an applied convention, not a theorem. + +- **Ties in distance (`d_{ij} = 0`, `i ≠ j`).** Page 19 says the estimator stays consistent under E1-E2 but the PSD-by-construction guarantee fails. Practitioner workflow: the user must assign nearby-but-distinct coordinates (Conley's example: "may be sensible to arbitrarily assign nearby but distinct locations to observations within a city"). diff-diff Phase 1 should NOT silently jitter; instead, error or warn. Defer auto-jittering to Phase 2 if requested. + +- **OLS is just-identified GMM (page 19, Section 5).** The paper's full GMM machinery covers overidentified moment conditions; Conley uses OLS in Section 5 to simplify the comparison vs HC0. For Phase 1 (TWFE OLS), the GMM `D_0 = (X'X)^{-1}` and `Ω = I` specializations are exactly the right form. The full GMM `Ω_τ` weighting matrix (Equation 3.3 page 7) is NOT relevant to diff-diff Phase 1; it would apply only if diff-diff added a 2SLS / GMM estimator (out of scope). + +- **`λ = E W_s` factor in Equation 3.12.** In the lattice formulation, `C = λ^{-1} V` accounts for the fraction of lattice points actually sampled. In the practitioner pairwise form `Σ_{i,j} K(d_{ij}/h) X_i ε̂_i ε̂_j X_j'`, this factor is absorbed by the change of indexing (sum over actual observations rather than over lattice points). The implementing engineer should NOT multiply by `1/λ` in the pairwise form; this is already handled by summing over the realized sample. + +- **PSD failure for the truncated/uniform kernel** (footnote 11, page 11). Conley's exact wording: "This estimator will not always be PSD, unfortunately, since the spectral window corresponding to the step function space domain window (its Fourier transform) will be negative in some regions." Implementation guidance: when `conley_kernel="uniform"`, compute the eigenvalues of `Var̂(β̂)` after sandwich and if `min(eig) < 0`, either (a) warn and proceed (matches `acreg`), (b) clamp to PSD via eigendecomposition + zero-floor, or (c) redirect to Bartlett. Phase 1 plan: warn and proceed (option a) to match downstream-tool expectations. + +- **The empirical example uses cross-country growth regressions (page 20)**, NOT a DiD or panel setup. Conley does not work out the panel TWFE specialization in the paper. The diff-diff Phase 1 implementation extends Conley's machinery to TWFE OLS (which is a linear regression with absorbed fixed effects) - this is mechanically straightforward but the methodological extension warrants a citation to a downstream paper (e.g., Cameron-Miller 2015 review article, or Bester-Conley-Hansen 2011 spatial cluster bootstrap) in REGISTRY.md. + +- **Sample size / boundary-effect caveats.** The proof of Proposition 3 (pages 26-31) relies on the boundary terms `Σ_{s_i ∈ Λ_τ - Λ_τ*} g(X_{s_i}; b)` shrinking faster than the interior. For finite samples on irregular regions (e.g., a country with concave coastline), the interior approximation may be less tight. The paper does not give a finite-sample correction. Phase 1 should pass `T_τ - k` (regressors-adjusted) as the divisor in any HC1-style finite-sample correction, mirroring HC0/HC1 conventions; HC0 (no correction) is the canonical Conley form per the paper. + +- **Two-way (space x time) HAC**. Conley 1999 only treats cross-section. The diff-diff Phase 2 spec extends to space × time (e.g., panel DiD where units have both spatial proximity and serial correlation across periods). The natural generalization — a product kernel `K_space(d_{ij}/h_s) · K_time(|t_i - t_j|/h_t)` — is implicit in Conley's framework (Section 3.3 page 10 mentions "two-dimensional" spectral density estimation; the lattice in Conley's setup is already 2-D over `(m, n)` so a 3-D extension is mechanically straightforward) but is NOT formally proved. Phase 2 should cite either Hansen (2007) or Driscoll-Kraay (1998) for the panel-data extension, not Conley alone. + +- **No bandwidth-selection theorem.** Conley's bandwidth restriction (`o(n^{1/3})`) is purely a consistency rate; it gives no MSE-optimal rule. Andrews (1991) provides a plug-in selector for time series HAC under stronger assumptions; an analogous spatial plug-in does not appear in this paper. Modern alternatives (Bester-Conley-Hansen 2011 spatial cluster bootstrap; Müller 2014 worst-case bandwidth selection) are downstream developments not covered here. Document `conley_cutoff_km` as a user-required parameter with a robustness-sweep recommendation. + +- **Acknowledgments + provenance** (page 23): Paper "is taken from my Ph.D. thesis at the University of Chicago. An earlier version circulated with the title 'Econometric Modelling of Cross Sectional Dependence.'" Lars Hansen, James Heckman, José Scheinkman acknowledged. NSF / Searle / Reid fellowships funded. The empirical example draws on the unpublished working paper Conley & Ligon (1995) "Economic distance, spillovers, and growth." This connects directly to the diff-diff Phase 3 spillover-regressor work — Conley & Ligon's notion of "spillover via economic distance" is the conceptual foundation for the bias-side spillover discussion in our Phase 3 plan. + +- **Bolthausen (1982) CLT dependency.** The asymptotic-normality result (Proposition 2) leans on Bolthausen's central limit theorem for stationary mixing random fields on regular lattices (page 9; cited in proofs page 25). The Bolthausen reference is "On the central limit theorem for stationary mixing random fields" *Annals of Probability* 10, 1047-1050. For implementation correctness this is not load-bearing, but anyone porting the proofs (e.g., for a panel space-time extension) needs the Bolthausen technical inputs, which include rectangular sample regions (page 25, paragraph above mixing-coefficient definition `π(Λ_1, Λ_2)`). Footnote 9 (page 10) flags that "extension to non-rectangular `Λ_τ` is straightforward" but tedious. + +- **Treatment of `K(0,0) = 1` in Equation 3.13.** The summation indices in Equation 3.13 are `j = 0, ..., L_M`, `k = 0, ..., L_N`, `m = j+1, ..., M`, `n = k+1, ..., N`. The `(j, k) = (0, 0)` term contributes `Σ_{m,n} K_{MN}(0,0) Y_{m,n} Y_{m,n}'` doubled (because of the symmetric `[Y Y' + Y' Y]` term), which is then de-duplicated by the explicit subtraction `-(1/T_τ) Σ Y_{m,n} Y_{m,n}'`. After cancellation the diagonal `i = j` contribution is exactly `Σ_i Y_i Y_i' = Σ_i x_i ε̂_i² x_i'` (the HC0 meat). The implementing engineer must reproduce this de-duplication carefully in the pairwise form: the formula `Σ_{i,j} K(d_{ij}/h) X_i ε̂_i ε̂_j X_j'` over **all** ordered pairs `(i, j)` (including `i = j`) automatically gives the right diagonal contribution `K(0) · X_i ε̂_i² X_i' = X_i ε̂_i² X_i'` (since `K(0) = 1`) and the right off-diagonal `K(d_{ij}/h) · X_i ε̂_i ε̂_j X_j'` for `i ≠ j` summed over `(i, j)` and `(j, i)` (both directions). No separate de-duplication needed in the pairwise form. Phase 1 unit test: assert that at `h → 0+` (no spatial ties), the Conley meat equals the HC0 meat to machine precision. diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py new file mode 100644 index 00000000..e7f4cb92 --- /dev/null +++ b/tests/test_conley_vcov.py @@ -0,0 +1,1019 @@ +"""Tests for Conley (1999) spatial HAC variance estimator. + +Phase 1 scope: pure-numerics helpers (kernels, distance metrics, direct +sandwich helper) and the dispatch-level validator. Estimator-level +integration tests, set_params atomicity, and Stata acreg parity land in +later Phase 1 checkpoints (Steps 4-6 of the plan). +""" + +import warnings + +import numpy as np +import pytest + +from diff_diff.conley import ( + _CONLEY_EARTH_RADIUS_KM, + _bartlett_kernel, + _compute_conley_vcov, + _haversine_km, + _pairwise_distance_matrix, + _uniform_kernel, + _validate_conley_kwargs, +) +from diff_diff.linalg import ( + LinearRegression, + compute_robust_vcov, + solve_ols, +) + +# --------------------------------------------------------------------------- +# Shared fixtures (small synthetic OLS dataset with geocoords) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def small_ols_with_coords(): + """20-row OLS dataset with synthetic lat/lon. Used across helper tests.""" + rng = np.random.default_rng(seed=42) + n = 20 + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = rng.standard_normal(n) + y = X @ np.array([1.0, 2.0]) + eps + coefs, *_ = np.linalg.lstsq(X, y, rcond=None) + residuals = y - X @ coefs + bread = X.T @ X + coords = np.column_stack( + [ + rng.uniform(-30, 30, n), # lat + rng.uniform(-100, 100, n), # lon + ] + ) + return X, residuals, coords, bread + + +# --------------------------------------------------------------------------- +# TestConleyKernels — Bartlett / uniform shape and boundary behavior +# --------------------------------------------------------------------------- + + +class TestConleyKernels: + def test_bartlett_at_zero(self): + np.testing.assert_allclose(_bartlett_kernel(np.array([0.0])), 1.0) + + def test_bartlett_at_one(self): + np.testing.assert_allclose(_bartlett_kernel(np.array([1.0])), 0.0) + + def test_bartlett_above_one_zero(self): + u = np.array([1.5, 2.0, 100.0]) + np.testing.assert_allclose(_bartlett_kernel(u), np.zeros(3)) + + def test_bartlett_negative_arg_symmetric(self): + """Bartlett uses |u|, so K(-0.3) == K(0.3).""" + np.testing.assert_allclose( + _bartlett_kernel(np.array([-0.3])), _bartlett_kernel(np.array([0.3])) + ) + + def test_uniform_kernel_at_boundary(self): + """Uniform kernel is closed on the right: K(1) = 1, K(1+eps) = 0.""" + np.testing.assert_allclose(_uniform_kernel(np.array([1.0])), 1.0) + + def test_uniform_kernel_above_one_zero(self): + np.testing.assert_allclose(_uniform_kernel(np.array([1.0001, 2.0, 100.0])), np.zeros(3)) + + def test_uniform_kernel_at_zero_one(self): + np.testing.assert_allclose(_uniform_kernel(np.array([0.0])), 1.0) + + def test_bartlett_psd_on_random_distances(self): + """Bartlett-weighted Gram matrix has all eigenvalues >= -tol.""" + rng = np.random.default_rng(seed=11) + n = 25 + coords = rng.uniform(0, 1, size=(n, 2)) + diff = coords[:, None, :] - coords[None, :, :] + D = np.sqrt((diff * diff).sum(axis=-1)) + K = _bartlett_kernel(D / 0.3) + eigvals = np.linalg.eigvalsh(0.5 * (K + K.T)) # ensure symmetric + assert eigvals.min() > -1e-12 + + +# --------------------------------------------------------------------------- +# TestConleyDistanceMetrics — haversine, euclidean, callable +# --------------------------------------------------------------------------- + + +class TestConleyDistanceMetrics: + def test_haversine_known_pair_one_degree_equator(self): + """1° longitude at the equator = 2π·R/360 ≈ 111.195 km (R=6371).""" + d = _haversine_km(np.array(0.0), np.array(0.0), np.array(0.0), np.array(1.0)) + expected = 2 * np.pi * _CONLEY_EARTH_RADIUS_KM / 360.0 + np.testing.assert_allclose(d, expected, atol=1e-9) + + def test_haversine_zero_self_distance(self): + d = _haversine_km(np.array(45.0), np.array(-122.0), np.array(45.0), np.array(-122.0)) + np.testing.assert_allclose(d, 0.0, atol=1e-12) + + def test_haversine_symmetric(self): + d_ab = _haversine_km(np.array(40.7), np.array(-74.0), np.array(34.0), np.array(-118.2)) + d_ba = _haversine_km(np.array(34.0), np.array(-118.2), np.array(40.7), np.array(-74.0)) + np.testing.assert_allclose(d_ab, d_ba, atol=1e-12) + + def test_haversine_pole_to_equator(self): + """North pole to equator at any longitude = π/2 · R = ~10007.5 km.""" + d = _haversine_km(np.array(90.0), np.array(0.0), np.array(0.0), np.array(0.0)) + expected = np.pi * _CONLEY_EARTH_RADIUS_KM / 2.0 + np.testing.assert_allclose(d, expected, atol=1e-9) + + def test_haversine_broadcasting_pairwise(self): + """Broadcasting (n, 1) vs (1, n) yields the n×n distance matrix.""" + coords = np.array([[0.0, 0.0], [0.0, 1.0], [0.0, 2.0]]) + lats = coords[:, 0] + lons = coords[:, 1] + D = _haversine_km(lats[:, None], lons[:, None], lats[None, :], lons[None, :]) + assert D.shape == (3, 3) + np.testing.assert_allclose(np.diag(D), 0.0, atol=1e-12) + # D[0, 2] should be 2 * D[0, 1] for collinear-equator points + np.testing.assert_allclose(D[0, 2], 2.0 * D[0, 1], rtol=1e-10) + + def test_pairwise_distance_haversine(self): + coords = np.array([[0.0, 0.0], [0.0, 1.0], [10.0, 0.0]]) + D = _pairwise_distance_matrix(coords, "haversine") + assert D.shape == (3, 3) + np.testing.assert_allclose(D, D.T, atol=1e-12) + np.testing.assert_allclose(np.diag(D), 0.0, atol=1e-12) + + def test_pairwise_distance_euclidean_matches_pdist(self): + """Euclidean path matches scipy.spatial.distance squareform exactly.""" + from scipy.spatial.distance import pdist, squareform + + rng = np.random.default_rng(seed=7) + coords = rng.uniform(0, 100, size=(15, 2)) + D = _pairwise_distance_matrix(coords, "euclidean") + D_scipy = squareform(pdist(coords, metric="euclidean")) + np.testing.assert_allclose(D, D_scipy, atol=1e-12) + + def test_pairwise_distance_callable(self): + """A user-supplied callable is dispatched and its output preserved.""" + coords = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]) + + def constant_metric(c1, c2): + n1 = len(c1) + n2 = len(c2) + return np.full((n1, n2), 5.0) + + D = _pairwise_distance_matrix(coords, constant_metric) + np.testing.assert_allclose(D, np.full((3, 3), 5.0)) + + def test_pairwise_distance_unknown_metric_raises(self): + """Unknown metric strings raise ValueError from the dispatcher.""" + with pytest.raises(ValueError, match="conley_metric"): + _pairwise_distance_matrix(np.zeros((3, 2)), "manhattan") + + +# --------------------------------------------------------------------------- +# TestConleyValidatorHelpers — direct calls to _validate_conley_kwargs +# --------------------------------------------------------------------------- + + +class TestConleyValidatorHelpers: + def test_missing_coords_raises(self): + with pytest.raises(ValueError, match="conley_coords"): + _validate_conley_kwargs( + coords=None, cutoff=100.0, metric="haversine", kernel="bartlett", n=10 + ) + + def test_missing_cutoff_raises(self): + with pytest.raises(ValueError, match="conley_cutoff_km"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=None, + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_zero_cutoff_raises(self): + with pytest.raises(ValueError, match="positive finite"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=0.0, + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_negative_cutoff_raises(self): + with pytest.raises(ValueError, match="positive finite"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=-5.0, + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_nan_cutoff_raises(self): + with pytest.raises(ValueError, match="positive finite"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=float("nan"), + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_inf_cutoff_raises(self): + with pytest.raises(ValueError, match="positive finite"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=float("inf"), + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_3d_coords_raises(self): + with pytest.raises(ValueError, match=r"\(n, 2\)"): + _validate_conley_kwargs( + coords=np.zeros((10, 3)), + cutoff=100.0, + metric="haversine", + kernel="bartlett", + n=10, + ) + + def test_coord_n_mismatch_raises(self): + with pytest.raises(ValueError, match="rows but X has"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=100.0, + metric="haversine", + kernel="bartlett", + n=15, + ) + + def test_nan_coord_raises(self): + bad = np.zeros((10, 2)) + bad[3, 1] = np.nan + with pytest.raises(ValueError, match="NaN or inf"): + _validate_conley_kwargs( + coords=bad, cutoff=100.0, metric="haversine", kernel="bartlett", n=10 + ) + + def test_lat_out_of_range_raises_haversine(self): + coords = np.array([[91.0, 0.0]] + [[0.0, 0.0]] * 9) + with pytest.raises(ValueError, match=r"latitude in \[-90, 90\]"): + _validate_conley_kwargs( + coords=coords, cutoff=100.0, metric="haversine", kernel="bartlett", n=10 + ) + + def test_lon_out_of_range_raises_haversine(self): + coords = np.array([[0.0, 200.0]] + [[0.0, 0.0]] * 9) + with pytest.raises(ValueError, match=r"longitude in \[-180, 180\]"): + _validate_conley_kwargs( + coords=coords, cutoff=100.0, metric="haversine", kernel="bartlett", n=10 + ) + + def test_lat_out_of_range_skipped_for_euclidean(self): + """Projected coords are unconstrained — euclidean skips lat/lon checks.""" + coords = np.array([[5000.0, 12000.0]] * 10) # any units + # Should not raise + _validate_conley_kwargs( + coords=coords, cutoff=100.0, metric="euclidean", kernel="bartlett", n=10 + ) + + def test_unknown_kernel_raises(self): + with pytest.raises(ValueError, match="conley_kernel"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=100.0, + metric="haversine", + kernel="gaussian", + n=10, + ) + + def test_unknown_metric_raises(self): + with pytest.raises(ValueError, match="conley_metric"): + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=100.0, + metric="manhattan", + kernel="bartlett", + n=10, + ) + + def test_callable_metric_accepted(self): + """Callable distance metric passes validation (delegated to caller).""" + _validate_conley_kwargs( + coords=np.zeros((10, 2)), + cutoff=100.0, + metric=lambda c1, c2: np.zeros((len(c1), len(c2))), + kernel="bartlett", + n=10, + ) + + def test_n_above_warn_threshold_warns(self): + with pytest.warns(UserWarning, match="dense"): + _validate_conley_kwargs( + coords=np.zeros((20_001, 2)), + cutoff=100.0, + metric="euclidean", + kernel="bartlett", + n=20_001, + ) + + def test_n_below_warn_threshold_no_warning(self): + with warnings.catch_warnings(): + warnings.simplefilter("error") # any warning becomes an error + _validate_conley_kwargs( + coords=np.zeros((100, 2)), + cutoff=100.0, + metric="euclidean", + kernel="bartlett", + n=100, + ) + + +# --------------------------------------------------------------------------- +# TestConleyDirectHelper — _compute_conley_vcov correctness +# --------------------------------------------------------------------------- + + +class TestConleyDirectHelper: + def test_returns_psd_with_bartlett(self, small_ols_with_coords): + X, residuals, coords, bread = small_ols_with_coords + vcov = _compute_conley_vcov( + X, + residuals, + coords, + cutoff=2000.0, + metric="haversine", + kernel="bartlett", + bread_matrix=bread, + ) + eigvals = np.linalg.eigvalsh(0.5 * (vcov + vcov.T)) + assert eigvals.min() > -1e-10 + + def test_symmetric_vcov(self, small_ols_with_coords): + X, residuals, coords, bread = small_ols_with_coords + vcov = _compute_conley_vcov( + X, + residuals, + coords, + cutoff=2000.0, + metric="haversine", + kernel="bartlett", + bread_matrix=bread, + ) + np.testing.assert_allclose(vcov, vcov.T, atol=1e-10) + + def test_shape_matches_bread(self, small_ols_with_coords): + X, residuals, coords, bread = small_ols_with_coords + vcov = _compute_conley_vcov( + X, + residuals, + coords, + cutoff=1500.0, + metric="haversine", + kernel="bartlett", + bread_matrix=bread, + ) + k = X.shape[1] + assert vcov.shape == (k, k) + + def test_uniform_kernel_negative_eigenvalue_warns(self): + """Construct a degenerate setup that produces a uniform-kernel + meat with a small negative eigenvalue. Verifies the PSD-warning + path. The setup uses two clusters of identical-coordinate points so + the uniform-kernel meat reduces to a known structure that is + numerically borderline.""" + rng = np.random.default_rng(seed=1) + n = 30 + # Mix of identical-coord pairs; uniform kernel sums full pairs + coords = np.repeat(rng.uniform(0, 1, size=(n // 2, 2)), 2, axis=0) + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = rng.standard_normal(n) + bread = X.T @ X + # No assertion on the exact meat — only that the PSD path is + # exercised. The warning may or may not fire depending on numerical + # condition; this test mainly ensures the code path runs without error. + with warnings.catch_warnings(): + warnings.simplefilter("always") + _compute_conley_vcov( + X, + eps, + coords, + cutoff=10.0, + metric="euclidean", + kernel="uniform", + bread_matrix=bread, + ) + + +# --------------------------------------------------------------------------- +# TestConleyReductions — Bartlett+tiny cutoff → HC0 meat; etc. +# --------------------------------------------------------------------------- + + +class TestConleyReductions: + def test_tiny_cutoff_distinct_coords_yields_HC0_meat(self): + """When the bandwidth is much smaller than the minimum pairwise + distance, Conley's kernel is ~0 off-diagonal and 1 on-diagonal, so + the meat reduces to Σ x_i² ε_i² x_i x_i' = HC0 meat. + """ + rng = np.random.default_rng(seed=3) + n = 15 + # Distinct coords with min pairwise distance >> 0 + coords = np.column_stack([np.arange(n) * 100.0, np.arange(n) * 100.0]) + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = rng.standard_normal(n) + bread = X.T @ X + + # HC0 meat (Σ x_i x_i' u_i²) — no DOF correction applied + meat_hc0 = X.T @ (X * (eps**2)[:, None]) + bread_inv = np.linalg.solve(bread, np.eye(2)) + vcov_hc0 = bread_inv @ meat_hc0 @ bread_inv + + vcov_conley = _compute_conley_vcov( + X, + eps, + coords, + cutoff=1.0, # << minimum pairwise distance + metric="euclidean", + kernel="bartlett", + bread_matrix=bread, + ) + np.testing.assert_allclose(vcov_conley, vcov_hc0, atol=1e-12) + + def test_uniform_kernel_tiny_cutoff_yields_HC0_meat(self): + """Same reduction with the uniform kernel.""" + rng = np.random.default_rng(seed=5) + n = 12 + coords = np.column_stack([np.arange(n) * 100.0, np.arange(n) * 100.0]) + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = rng.standard_normal(n) + bread = X.T @ X + + meat_hc0 = X.T @ (X * (eps**2)[:, None]) + bread_inv = np.linalg.solve(bread, np.eye(2)) + vcov_hc0 = bread_inv @ meat_hc0 @ bread_inv + + vcov_conley = _compute_conley_vcov( + X, + eps, + coords, + cutoff=0.5, + metric="euclidean", + kernel="uniform", + bread_matrix=bread, + ) + np.testing.assert_allclose(vcov_conley, vcov_hc0, atol=1e-12) + + def test_huge_cutoff_NOT_HC0(self, small_ols_with_coords): + """When cutoff dwarfs all pairwise distances, K -> ones(n, n) and + meat = (X·ε)' ones (X·ε) which is the rank-1 outer product of summed + scores — NOT HC0. This is the all-correlated limit.""" + X, residuals, coords, bread = small_ols_with_coords + vcov_conley = _compute_conley_vcov( + X, + residuals, + coords, + cutoff=1e9, + metric="euclidean", + kernel="uniform", + bread_matrix=bread, + ) + # HC0 for comparison + meat_hc0 = X.T @ (X * (residuals**2)[:, None]) + bread_inv = np.linalg.solve(bread, np.eye(X.shape[1])) + vcov_hc0 = bread_inv @ meat_hc0 @ bread_inv + # They should differ noticeably + assert not np.allclose(vcov_conley, vcov_hc0, atol=1e-6) + + def test_dispatch_matches_direct_helper(self, small_ols_with_coords): + """compute_robust_vcov(vcov_type='conley', ...) returns the same + vcov as a direct call to _compute_conley_vcov on the same inputs. + Atol=1e-14 (bit-equivalence).""" + X, residuals, coords, bread = small_ols_with_coords + vcov_dispatch = compute_robust_vcov( + X, + residuals, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=2000.0, + conley_metric="haversine", + conley_kernel="bartlett", + ) + vcov_direct = _compute_conley_vcov( + X, + residuals, + coords, + 2000.0, + "haversine", + "bartlett", + bread, + ) + np.testing.assert_allclose(vcov_dispatch, vcov_direct, atol=1e-14, rtol=1e-14) + + def test_dispatch_returns_dof_when_requested(self, small_ols_with_coords): + """return_dof=True returns (vcov, dof_vec) tuple where dof = n - k.""" + X, residuals, coords, _ = small_ols_with_coords + out = compute_robust_vcov( + X, + residuals, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=2000.0, + return_dof=True, + ) + assert isinstance(out, tuple) and len(out) == 2 + _vcov, dof = out + n, k = X.shape + np.testing.assert_array_equal(dof, np.full(k, n - k, dtype=np.float64)) + + +class TestConleyValidationDispatch: + """Validation tests at the compute_robust_vcov dispatch level.""" + + @pytest.fixture + def fit_inputs(self): + rng = np.random.default_rng(seed=0) + n = 12 + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + residuals = rng.standard_normal(n) + coords = rng.uniform(-10, 10, size=(n, 2)) + return X, residuals, coords + + def test_conley_in_valid_set(self): + """Sanity: 'conley' is in the canonical _VALID_VCOV_TYPES set.""" + from diff_diff.linalg import _VALID_VCOV_TYPES + + assert "conley" in _VALID_VCOV_TYPES + + def test_conley_with_cluster_raises(self, fit_inputs): + X, residuals, coords = fit_inputs + with pytest.raises(NotImplementedError, match="conley.*cluster_ids"): + compute_robust_vcov( + X, + residuals, + cluster_ids=np.arange(len(X)) // 3, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=100.0, + ) + + def test_conley_with_weights_raises(self, fit_inputs): + X, residuals, coords = fit_inputs + with pytest.raises(NotImplementedError, match="conley.*weights"): + compute_robust_vcov( + X, + residuals, + weights=np.ones(len(X)), + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=100.0, + ) + + def test_conley_without_coords_raises(self, fit_inputs): + X, residuals, _ = fit_inputs + with pytest.raises(ValueError, match="conley_coords"): + compute_robust_vcov( + X, + residuals, + vcov_type="conley", + conley_cutoff_km=100.0, + ) + + def test_conley_without_cutoff_raises(self, fit_inputs): + X, residuals, coords = fit_inputs + with pytest.raises(ValueError, match="conley_cutoff_km"): + compute_robust_vcov( + X, + residuals, + vcov_type="conley", + conley_coords=coords, + ) + + +class TestConleyLinearRegression: + """Step 3 smoke tests: LinearRegression and solve_ols thread Conley + kwargs to compute_robust_vcov. Covers both the higher-level + LinearRegression API and the lower-level solve_ols entrypoint.""" + + @pytest.fixture + def fit_data(self): + rng = np.random.default_rng(seed=42) + n = 25 + X = rng.standard_normal(size=(n, 2)) + y = X @ np.array([1.0, 2.0]) + rng.standard_normal(n) + coords = rng.uniform(-30, 30, size=(n, 2)) + return X, y, coords + + def test_linear_regression_conley_runs(self, fit_data): + X, y, coords = fit_data + reg = LinearRegression( + vcov_type="conley", + include_intercept=True, + conley_coords=coords, + conley_cutoff_km=2000.0, + ).fit(X, y) + assert reg.vcov_ is not None + assert reg.vcov_.shape == (3, 3) # +1 for intercept + # Diagonal entries are SE^2 — must be finite and positive + diag = np.diag(reg.vcov_) + assert np.all(np.isfinite(diag)) + assert np.all(diag > 0) + + def test_linear_regression_conley_matches_direct(self, fit_data): + """LinearRegression(vcov_type='conley', ...) ⇔ compute_robust_vcov direct + call produces the same vcov on the same X (with intercept added).""" + X, y, coords = fit_data + reg = LinearRegression( + vcov_type="conley", + include_intercept=True, + conley_coords=coords, + conley_cutoff_km=2000.0, + ).fit(X, y) + # Reproduce X with intercept that LinearRegression built internally + X_intercept = np.column_stack([np.ones(X.shape[0]), X]) + coefs, *_ = np.linalg.lstsq(X_intercept, y, rcond=None) + residuals = y - X_intercept @ coefs + vcov_direct = compute_robust_vcov( + X_intercept, + residuals, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=2000.0, + ) + np.testing.assert_allclose(reg.vcov_, vcov_direct, atol=1e-10, rtol=1e-10) + + def test_solve_ols_conley_path(self, fit_data): + """solve_ols(vcov_type='conley', ...) returns finite vcov.""" + X, y, coords = fit_data + coefs, residuals, vcov = solve_ols( + X, + y, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=2000.0, + skip_rank_check=True, + ) + assert vcov is not None + assert np.all(np.isfinite(np.diag(vcov))) + + +class TestConleyEstimatorIntegration: + """Step 4 smoke tests: DifferenceInDifferences and MultiPeriodDiD accept + vcov_type='conley' with the conley_* kwargs and produce finite SEs. + Also tests that summary() prints the Conley label.""" + + @pytest.fixture + def two_period_panel(self): + rng = np.random.default_rng(seed=11) + n_units = 40 + units = np.arange(n_units) + treated = (units < 20).astype(int) + rows = [] + for u in units: + lat = rng.uniform(-30, 30) + lon = rng.uniform(-100, 100) + for t in [0, 1]: + y = 1.0 + 0.5 * t + (1.0 if (treated[u] and t == 1) else 0.0) + rng.normal(0, 0.5) + rows.append( + {"unit": u, "time": t, "y": y, "treated": treated[u], "lat": lat, "lon": lon} + ) + import pandas as pd + + return pd.DataFrame(rows) + + def test_did_basic_with_conley(self, two_period_panel): + """DifferenceInDifferences fits with vcov_type='conley' and produces + finite SE > 0.""" + from diff_diff import DifferenceInDifferences + + df = two_period_panel.copy() + df["did"] = df["treated"] * df["time"] + result = DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(df, outcome="y", treatment="treated", time="time") + assert np.isfinite(result.se) and result.se > 0 + assert result.vcov_type == "conley" + assert result.conley_cutoff_km == 2000.0 + assert result.conley_kernel == "bartlett" + + def test_did_summary_includes_conley_label(self, two_period_panel): + from diff_diff import DifferenceInDifferences + + df = two_period_panel.copy() + result = DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=1500.0, + ).fit(df, outcome="y", treatment="treated", time="time") + out = result.summary() + assert "Conley spatial HAC" in out + assert "1500" in out + assert "bartlett" in out + + def test_multi_period_did_with_conley(self, two_period_panel): + from diff_diff import MultiPeriodDiD + + # Build a 4-period panel for MultiPeriodDiD + rng = np.random.default_rng(seed=13) + n_units = 30 + rows = [] + for u in range(n_units): + lat = rng.uniform(-30, 30) + lon = rng.uniform(-100, 100) + treated = u < 15 + for t in range(4): + y = 0.2 * t + (1.0 if (treated and t >= 2) else 0.0) + rng.normal(0, 0.5) + rows.append( + {"unit": u, "time": t, "y": y, "treated": int(treated), "lat": lat, "lon": lon} + ) + import pandas as pd + + df_mp = pd.DataFrame(rows) + result = MultiPeriodDiD( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(df_mp, outcome="y", treatment="treated", time="time", reference_period=1) + assert np.isfinite(result.avg_se) and result.avg_se > 0 + assert result.vcov_type == "conley" + + +class TestConleyTWFE: + """Step 5: TwoWayFixedEffects with Conley SE. + + TWFE composes with Conley because the meat depends only on scores X*epsilon, + both of which FWL preserves under within-transformation. This is UNLIKE + hc2/hc2_bm which depend on the full hat matrix and are rejected on TWFE. + """ + + @pytest.fixture + def panel(self): + """Build a 2-period panel with geocoords for TWFE testing.""" + rng = np.random.default_rng(seed=17) + n_units = 30 + rows = [] + for u in range(n_units): + lat = rng.uniform(-30, 30) + lon = rng.uniform(-100, 100) + treated = u < 15 + unit_fe = rng.normal(0, 0.3) + for t in range(2): + time_fe = 0.5 if t == 1 else 0.0 + effect = 1.0 if (treated and t == 1) else 0.0 + y = unit_fe + time_fe + effect + rng.normal(0, 0.4) + rows.append( + {"unit": u, "time": t, "y": y, "treated": int(treated), "lat": lat, "lon": lon} + ) + import pandas as pd + + return pd.DataFrame(rows) + + def test_twfe_conley_runs(self, panel): + from diff_diff import TwoWayFixedEffects + + result = TwoWayFixedEffects( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") + assert np.isfinite(result.se) and result.se > 0 + assert result.vcov_type == "conley" + assert result.conley_cutoff_km == 2000.0 + assert result.cluster_name is None # auto-cluster disabled under conley + + def test_twfe_conley_with_explicit_cluster_raises(self, panel): + """User explicitly setting cluster=... with conley should raise.""" + from diff_diff import TwoWayFixedEffects + + with pytest.raises(NotImplementedError, match="conley"): + TwoWayFixedEffects( + vcov_type="conley", + cluster="unit", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") + + def test_twfe_conley_FWL_invariance(self, panel): + """TWFE Conley SE matches DifferenceInDifferences with same kwargs + (verifies FWL composability — Conley meat survives within-transformation + because it depends only on scores X*epsilon).""" + from diff_diff import DifferenceInDifferences, TwoWayFixedEffects + + twfe_result = TwoWayFixedEffects( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") + # DiD equivalent: simple 2x2, no FE within-transformation + did_result = DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time") + # ATT estimates should be similar (panel structure differs only in FE handling). + # We don't expect bit-equivalence — DiD without FE absorbs unit FE + # into the residuals while TWFE removes them. The key invariance is + # that the SE families are both finite and reasonable. + assert np.isfinite(twfe_result.se) and twfe_result.se > 0 + assert np.isfinite(did_result.se) and did_result.se > 0 + + +class TestConleyEstimatorValidation: + """Step 4 validation: estimator-level rejections for invalid combinations.""" + + @pytest.fixture + def df(self): + import pandas as pd + + rng = np.random.default_rng(seed=2) + n = 20 + return pd.DataFrame( + { + "unit": np.arange(n), + "time": np.tile([0, 1], n // 2), + "y": rng.standard_normal(n), + "treated": np.tile([0, 1], n // 2), + "lat": rng.uniform(-30, 30, n), + "lon": rng.uniform(-100, 100, n), + "stratum": np.tile([0, 1, 2, 3], n // 4), + } + ) + + def test_did_conley_with_cluster_raises(self, df): + from diff_diff import DifferenceInDifferences + + with pytest.raises(NotImplementedError, match="cluster.*conley"): + DifferenceInDifferences( + vcov_type="conley", + cluster="stratum", + conley_coords=("lat", "lon"), + conley_cutoff_km=100.0, + ).fit(df, outcome="y", treatment="treated", time="time") + + def test_did_conley_without_coords_raises(self, df): + from diff_diff import DifferenceInDifferences + + with pytest.raises(ValueError, match="conley_coords"): + DifferenceInDifferences( + vcov_type="conley", + conley_cutoff_km=100.0, + ).fit(df, outcome="y", treatment="treated", time="time") + + def test_did_conley_without_cutoff_raises(self, df): + from diff_diff import DifferenceInDifferences + + with pytest.raises(ValueError, match="conley_cutoff_km"): + DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + ).fit(df, outcome="y", treatment="treated", time="time") + + def test_did_conley_unknown_coord_column_raises(self, df): + from diff_diff import DifferenceInDifferences + + with pytest.raises(ValueError, match="not in `data`"): + DifferenceInDifferences( + vcov_type="conley", + conley_coords=("missing_lat", "lon"), + conley_cutoff_km=100.0, + ).fit(df, outcome="y", treatment="treated", time="time") + + def test_did_conley_with_absorb_raises(self, df): + from diff_diff import DifferenceInDifferences + + with pytest.raises(NotImplementedError, match="absorb.*conley"): + DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=100.0, + ).fit(df, outcome="y", treatment="treated", time="time", absorb=["unit"]) + + def test_synthetic_did_conley_raises(self): + from diff_diff import SyntheticDiD + + with pytest.raises(TypeError, match="conley"): + SyntheticDiD(vcov_type="conley") # type: ignore[call-arg] + + def test_synthetic_did_conley_kwarg_raises(self): + from diff_diff import SyntheticDiD + + with pytest.raises(TypeError, match="conley"): + SyntheticDiD(conley_cutoff_km=100.0) # type: ignore[call-arg] + + +class TestConleySetParamsAtomicity: + """set_params atomicity for Conley fields. Per + feedback_transactional_set_params: invalid multi-kwarg call must not + leave the estimator in a partial state.""" + + def test_unknown_kwarg_raises_no_mutation(self): + from diff_diff import DifferenceInDifferences + + est = DifferenceInDifferences(conley_coords=("lat", "lon"), conley_cutoff_km=100.0) + # Pre-call snapshot + before_cutoff = est.conley_cutoff_km + before_kernel = est.conley_kernel + # set_params with valid + unknown key → must raise & not mutate + with pytest.raises(ValueError, match="Unknown parameter"): + est.set_params(conley_cutoff_km=200.0, garbage_field="x") + # Verify state did NOT change + assert est.conley_cutoff_km == before_cutoff + assert est.conley_kernel == before_kernel + + def test_valid_kwargs_apply(self): + from diff_diff import DifferenceInDifferences + + est = DifferenceInDifferences(conley_coords=("lat", "lon"), conley_cutoff_km=100.0) + est.set_params(conley_cutoff_km=250.0, conley_kernel="uniform") + assert est.conley_cutoff_km == 250.0 + assert est.conley_kernel == "uniform" + + +class TestConleyParityR: + """R conleyreg parity for the Conley spatial HAC implementation. + + Skips when the golden JSON is absent (CI's isolated-install job copies + only tests/, not benchmarks/). Local regeneration: + cd benchmarks/R && Rscript generate_conley_golden.R + """ + + GOLDEN_PATH = "benchmarks/data/r_conleyreg_conley_golden.json" + PARITY_TOL = 1e-6 # Phase 1 success criterion + + @pytest.fixture(scope="class") + def golden(self): + import json + from pathlib import Path + + repo_root = Path(__file__).resolve().parent.parent + path = repo_root / self.GOLDEN_PATH + if not path.exists(): + pytest.skip( + f"Golden JSON not present at {path}; run " + "`cd benchmarks/R && Rscript generate_conley_golden.R` to generate. " + "Requires conleyreg R package + sf/lwgeom + system libs gdal/proj/geos/udunits." + ) + return json.loads(path.read_text()) + + def _check_fixture(self, golden, name): + entry = golden[name] + X = np.asarray(entry["x"], dtype=np.float64).reshape(entry["x_shape"]) + y = np.asarray(entry["y"], dtype=np.float64) + coords = np.asarray(entry["coords"], dtype=np.float64).reshape(entry["coords_shape"]) + vcov_expected = np.asarray(entry["vcov"], dtype=np.float64).reshape(entry["vcov_shape"]) + + coefs, *_ = np.linalg.lstsq(X, y, rcond=None) + residuals = y - X @ coefs + vcov_got = compute_robust_vcov( + X, + residuals, + vcov_type="conley", + conley_coords=coords, + conley_cutoff_km=entry["cutoff_km"], + conley_metric=entry["metric"], + conley_kernel=entry["kernel"], + ) + np.testing.assert_allclose( + vcov_got, vcov_expected, atol=self.PARITY_TOL, rtol=self.PARITY_TOL + ) + + def test_parity_small_haversine(self, golden): + self._check_fixture(golden, "small_haversine") + + def test_parity_dense_haversine(self, golden): + self._check_fixture(golden, "dense_haversine") + + def test_parity_lat_lon_realistic(self, golden): + self._check_fixture(golden, "lat_lon_realistic") + + +class TestConleyReductionsAddendum: + """Additional reduction tests not covered by the helper-direct class. + + Placeholder: the helper-direct class already covers the essential + reductions (HC0 at tiny cutoff, K=ones at huge cutoff, etc.). + Kept here so future test expansions have a clear class to attach to. + """ + + def test_diagonal_of_meat_equals_HC0_contribution(self): + """For any kernel, K(0/h) = 1 so the diagonal contribution to the + meat is exactly the HC0 term Σ_i X_i ε_i² X_i'.""" + rng = np.random.default_rng(seed=9) + n = 20 + coords = rng.uniform(0, 1000, size=(n, 2)) + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = rng.standard_normal(n) + # Build kernel with cutoff between min and max pairwise distance + D = _pairwise_distance_matrix(coords, "euclidean") + cutoff = float(D[D > 0].min() * 0.001) # ensure off-diagonal kernel is 0 + # With this cutoff, the Bartlett kernel is 1 on the diagonal and 0 off, + # so meat == HC0. + S = X * eps[:, None] + meat_full = S.T @ _bartlett_kernel(D / cutoff) @ S + meat_hc0 = X.T @ (X * (eps**2)[:, None]) + np.testing.assert_allclose(meat_full, meat_hc0, atol=1e-12) From 7c0afa7d8b230a5e2ad1aaaf2dbcde09198c77dd Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 12:17:17 -0400 Subject: [PATCH 2/9] Tighten SyntheticDiD set_params and TWFE Conley + wild_bootstrap guards Closes two no-silent-failure gaps surfaced after the initial Phase 1 commit: 1. SyntheticDiD.set_params() now mirrors __init__'s Conley rejection contract. The constructor correctly raises TypeError on vcov_type="conley" / conley_*=... but set_params() previously only checked hasattr(self, key) and silently accepted those kwargs because SyntheticDiD inherits the conley_* attributes from DifferenceInDifferences. A user calling est.set_params(vcov_type="conley", conley_cutoff_km=100.0) followed by est.fit(...) would have gotten the bootstrap/jackknife/placebo variance silently with no Conley computation -- forbidden by feedback_no_silent_failures. The new set_params() rejects non-None vcov_type and any non-None conley_* kwarg with TypeError before mutation; None values for these keys are passthrough no-ops so get_params() -> set_params() round-trips cleanly. SyntheticDiD.get_params() now surfaces the inherited conley_* keys with None values for sklearn-style API consistency. 2. TwoWayFixedEffects(vcov_type="conley", inference="wild_bootstrap") now raises NotImplementedError. Conley analytical spatial-HAC and wild cluster bootstrap are different inference paths; combining them would route the bootstrap branch with cluster_ids=None (TWFE auto-cluster is disabled under Conley) and fail with a non-targeted error inside wild_bootstrap_se. Use inference='analytical' for Conley spatial HAC, or vcov_type='hc1' with inference='wild_bootstrap'. 3. DifferenceInDifferences and MultiPeriodDiD class docstrings now list vcov_type="conley" in the enum and document the four conley_* params (previously the Conley path was documented in REGISTRY/CHANGELOG/llms.txt but the in-code class docs still listed only {classical, hc1, hc2, hc2_bm}). 4. New tests: - SyntheticDiD().set_params(vcov_type="conley") raises TypeError + state unchanged - SyntheticDiD().set_params(conley_cutoff_km=100.0) raises + state unchanged - SyntheticDiD().get_params() includes conley_* keys with None values; round-trip set_params(**get_params()) is a no-op - TwoWayFixedEffects(vcov_type="conley", inference="wild_bootstrap") raises 5. TODO.md entries added for deferred follow-ups: callable-conley_metric shape/finiteness/symmetry validation, common Conley estimator-validator helper extraction, and a stronger TWFE Conley FWL invariance test that actually compares TWFE-within Conley to a full-dummy FE design (the current test only asserts finite SEs). 74 Conley tests pass (70 prior + 4 new); 261 tests pass on the targeted regression surface (test_conley_vcov, test_estimators_vcov_type, test_linalg, test_linalg_hc2_bm). Co-Authored-By: Claude Opus 4.7 (1M context) --- TODO.md | 3 ++ diff_diff/estimators.py | 38 +++++++++++++++++++++-- diff_diff/synthetic_did.py | 48 +++++++++++++++++++++++++++++ diff_diff/twfe.py | 16 ++++++++++ tests/test_conley_vcov.py | 62 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+), 2 deletions(-) diff --git a/TODO.md b/TODO.md index 59f7dacb..4de2eb22 100644 --- a/TODO.md +++ b/TODO.md @@ -117,6 +117,9 @@ Deferred items from PR reviews that were not addressed before merge. | Conley + survey weights / `survey_design`. Score-reweighted meat `s_i = w_i · X_i · ε_i` is mechanical, but PSU clustering interaction with the spatial kernel and replicate-weights variance under spatial correlation are non-trivial (Bertanha-Imbens 2014 covers cluster-sample but not the explicit Conley case). Phase 5 of the spillover-conley initiative; paper review prerequisite. Currently raises `NotImplementedError`. | `linalg.py::_validate_vcov_args`, `twfe.py`, `estimators.py` | Phase 5 (spillover-conley) | Medium | | Conley + `absorb=` (arbitrary FE projection beyond TWFE's two-FE within-transformation). FWL composability is proven analytically for TWFE's fixed two-FE design but not formally verified for arbitrary `absorb` dimensions; conservatively rejected at fit-time with a redirect to `fixed_effects=` dummies. Lift after empirical verification on multi-FE within-transformations. | `estimators.py::DifferenceInDifferences.fit`, `MultiPeriodDiD.fit` | follow-up (spillover-conley) | Low | | `SyntheticDiD(vcov_type="conley")` support. Currently raises `TypeError` at `__init__` because SyntheticDiD uses `variance_method ∈ {bootstrap, jackknife, placebo}` rather than the analytical sandwich that Conley plugs into. Wiring would require either reimplementing an analytical sandwich path for SyntheticDiD or designing a spatial-block bootstrap (new methodology, Politis-Romano 1994 territory). | `synthetic_did.py::SyntheticDiD` | follow-up (spillover-conley) | Low | +| Validate user-supplied callable `conley_metric` for shape `(n, n)`, finiteness, non-negativity, and symmetry. Currently `np.asarray(metric(coords, coords))` is accepted unchecked; a malformed callable produces opaque matmul errors and a non-symmetric distance matrix produces a non-symmetric vcov. CI reviewer flagged as P2 M3 in PR #(spillover-conley). | `diff_diff/conley.py::_pairwise_distance_matrix`, `_compute_conley_vcov` | follow-up (spillover-conley) | Low | +| Extract common Conley estimator-level validation helper. Today `cluster=`, `survey_design=`, `conley_coords=`, and `conley_cutoff_km=` checks are duplicated across `DifferenceInDifferences.fit` (estimators.py:~370-400), `MultiPeriodDiD.fit` (estimators.py:~1395-1455), and `TwoWayFixedEffects.fit` (twfe.py:~165-205). A future Conley-feature change risks updating one estimator but not the others. CI reviewer flagged as P2 MT1. | `diff_diff/estimators.py`, `diff_diff/twfe.py` | follow-up (spillover-conley) | Low | +| Strengthen `tests/test_conley_vcov.py::TestConleyTWFE::test_twfe_conley_FWL_invariance` to actually verify FWL equivalence between TWFE-within Conley and a full-dummy-FE design (build the dummy regression explicitly and compare the ATT coefficient + Conley SE). The current test only asserts both fits produce finite SEs — the name overstates the assertion. CI reviewer flagged as P2 DT3. | `tests/test_conley_vcov.py` | follow-up (spillover-conley) | Low | #### Performance diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index 316f84dc..ae654ae6 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -57,7 +57,7 @@ class DifferenceInDifferences: ``vcov_type``: with ``"hc1"`` dispatches to CR1 (Liang-Zeger); with ``"hc2_bm"`` dispatches to CR2 Bell-McCaffrey (Pustejovsky-Tipton 2018 symmetric-sqrt + Satterthwaite DOF). - vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, optional + vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, optional Variance-covariance family. Defaults to the ``robust`` alias. - ``"classical"``: non-robust OLS SEs, ``sigma_hat^2 * (X'X)^{-1}``. @@ -69,6 +69,11 @@ class DifferenceInDifferences: with ``cluster=``, Pustejovsky-Tipton (2018) CR2 cluster-robust. (Note: ``MultiPeriodDiD`` does NOT yet support ``cluster=`` with ``"hc2_bm"`` — see ``MultiPeriodDiD`` docstring and REGISTRY.md.) + - ``"conley"``: Conley (1999) spatial-HAC sandwich. Requires + ``conley_coords`` (lat/lon column tuple) and ``conley_cutoff_km`` + (positive bandwidth — REQUIRED, no default per the no-silent-failures + rule). Combining with ``cluster=``, ``survey_design=``, or ``absorb=`` + raises ``NotImplementedError`` (deferred to Phase 2+). alpha : float, default=0.05 Significance level for confidence intervals. inference : str, default="analytical" @@ -88,6 +93,20 @@ class DifferenceInDifferences: - "warn": Issue warning and drop linearly dependent columns (default) - "error": Raise ValueError - "silent": Drop columns silently without warning + conley_coords : tuple of (str, str), optional + Column-name tuple ``(lat_col, lon_col)`` for Conley spatial HAC SE. + Required when ``vcov_type="conley"``; raises ``ValueError`` otherwise. + conley_cutoff_km : float, optional + Positive finite bandwidth in km (haversine) or coord units (euclidean). + Required when ``vcov_type="conley"``; no default per Conley 1999 + Section 5 sensitivity-grid recommendation. + conley_metric : str, default "haversine" + Distance metric: ``"haversine"`` (lat/lon, km), ``"euclidean"`` (any + units), or a callable ``(coords1, coords2) -> n×n``. + conley_kernel : str, default "bartlett" + Kernel function: ``"bartlett"`` (PSD-guaranteed, default) or + ``"uniform"`` (emits ``UserWarning`` if the meat has a materially + negative eigenvalue per Conley 1999 footnote 11). Attributes ---------- @@ -1055,7 +1074,7 @@ class MultiPeriodDiD(DifferenceInDifferences): ``TODO.md``; also documented as a Note in ``docs/methodology/REGISTRY.md`` under the HeterogeneousAdoptionDiD requirements-checklist block. - vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, optional + vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, optional Variance-covariance family. Defaults to the ``robust`` alias. - ``"classical"``: non-robust OLS SEs, ``sigma_hat^2 * (X'X)^{-1}``. @@ -1066,8 +1085,23 @@ class MultiPeriodDiD(DifferenceInDifferences): - ``"hc2_bm"``: one-way HC2 + Imbens-Kolesar (2016) Satterthwaite DOF per coefficient plus a contrast-aware DOF for the post-period-average ATT. **Unsupported with** ``cluster=`` — see ``cluster`` above. + - ``"conley"``: Conley (1999) spatial-HAC sandwich. Requires + ``conley_coords`` and ``conley_cutoff_km``. Combining with + ``cluster=``, ``survey_design=``, or ``absorb=`` raises + ``NotImplementedError`` (deferred to Phase 2+). alpha : float, default=0.05 Significance level for confidence intervals. + conley_coords : tuple of (str, str), optional + Column-name tuple ``(lat_col, lon_col)`` for Conley spatial HAC SE. + Required when ``vcov_type="conley"``. + conley_cutoff_km : float, optional + Positive finite bandwidth for Conley spatial HAC. Required when + ``vcov_type="conley"`` (no default per Conley 1999 sensitivity-grid). + conley_metric : str, default "haversine" + Distance metric for Conley: ``"haversine"`` (lat/lon, km), + ``"euclidean"``, or a callable. + conley_kernel : str, default "bartlett" + Conley kernel: ``"bartlett"`` (PSD-guaranteed) or ``"uniform"``. Attributes ---------- diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index fe4e6c9b..a3d98c4c 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -2706,6 +2706,16 @@ def get_params(self) -> Dict[str, Any]: "variance_method": self.variance_method, "n_bootstrap": self.n_bootstrap, "seed": self.seed, + # Conley kwargs are inherited from DifferenceInDifferences.__init__ + # but rejected by SyntheticDiD's __init__ / set_params (Conley uses + # the analytical sandwich, SyntheticDiD uses bootstrap variance). + # Surface them here as None for sklearn-style API consistency; any + # non-None value is rejected by set_params/__init__. + "vcov_type": None, + "conley_coords": None, + "conley_cutoff_km": None, + "conley_metric": None, + "conley_kernel": None, } def set_params(self, **params) -> "SyntheticDiD": @@ -2715,16 +2725,54 @@ def set_params(self, **params) -> "SyntheticDiD": post-update state, the instance is rolled back to the pre-call values so a raised ``ValueError`` leaves the object consistent with its pre-call configuration. + + Mirrors ``__init__``'s defensive rejection of ``vcov_type`` / + ``conley_*`` non-None values: SyntheticDiD uses bootstrap/jackknife/ + placebo variance, not the analytical sandwich, so any Conley kwarg + would be silently ignored otherwise (forbidden by + ``feedback_no_silent_failures``). Tracked in TODO.md for a follow-up + that wires Conley to a non-bootstrap variance path. """ + # Reject Conley kwargs / non-None vcov_type before any mutation — + # mirrors __init__'s contract. Empty/None values are permitted so + # round-tripping get_params() back through set_params() is a no-op. + _conley_keys = ("conley_coords", "conley_cutoff_km", "conley_metric", "conley_kernel") + if params.get("vcov_type") is not None and params["vcov_type"] != "conley": + raise TypeError( + f"SyntheticDiD does not accept vcov_type={params['vcov_type']!r}. " + "SyntheticDiD's variance is bootstrap/jackknife/placebo based; " + "configure via variance_method=..." + ) + if params.get("vcov_type") == "conley" or any( + k in params and params[k] is not None for k in _conley_keys + ): + raise TypeError( + "SyntheticDiD does not yet support vcov_type='conley' or any " + "conley_* kwargs. SyntheticDiD uses bootstrap/jackknife/placebo " + "variance (variance_method=...), not the analytical sandwich " + "routed through compute_robust_vcov. Tracked in TODO.md as " + "a follow-up." + ) # Deprecated parameter names — emit warning and ignore _deprecated = {"lambda_reg", "zeta"} + # Conley kwargs are not stored as instance attributes; surfacing them + # in get_params() returns None unconditionally. set_params() with None + # values for these keys is a no-op (the rejection above only fires on + # non-None values). + _silent_conley_passthrough = {"vcov_type", *_conley_keys} # Snapshot original values for transactional rollback on validation failure. _rollback: Dict[str, Any] = {} for key in params: + if key in _silent_conley_passthrough: + continue if key not in _deprecated and hasattr(self, key): _rollback[key] = getattr(self, key) try: for key, value in params.items(): + if key in _silent_conley_passthrough: + # No-op: explicitly None passthrough for round-trip + # get_params() -> set_params() consistency. + continue if key in _deprecated: warnings.warn( f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda " diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index c2b8f909..9148b234 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -162,6 +162,22 @@ def fit( # type: ignore[override] "Conley; the unit auto-cluster default is also disabled " "when vcov_type='conley'." ) + # Conley + wild_bootstrap: Conley is an analytical spatial-HAC + # variance and wild cluster bootstrap is a different inference path + # that resamples residuals within clusters. There is no clean + # composition — the two methods target different things — and + # combining them would either silently drop one or fail downstream + # (TWFE auto-cluster is disabled under Conley, so the bootstrap + # would receive cluster_ids=None and fail with a non-targeted + # error in `wild_bootstrap_se`). Reject early. + if self.vcov_type == "conley" and self.inference == "wild_bootstrap": + raise NotImplementedError( + "TwoWayFixedEffects(vcov_type='conley', inference='wild_bootstrap') " + "is not supported: Conley is an analytical spatial-HAC variance and " + "wild cluster bootstrap is a different inference path. Use " + "inference='analytical' for Conley spatial HAC, or use " + "vcov_type='hc1' with inference='wild_bootstrap'." + ) if self.vcov_type == "conley": if survey_design is not None: raise NotImplementedError( diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py index e7f4cb92..27f6dbc5 100644 --- a/tests/test_conley_vcov.py +++ b/tests/test_conley_vcov.py @@ -798,6 +798,22 @@ def test_twfe_conley_with_explicit_cluster_raises(self, panel): conley_cutoff_km=2000.0, ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") + def test_twfe_conley_with_wild_bootstrap_raises(self, panel): + """Conley analytical spatial-HAC and wild cluster bootstrap are + different inference paths; combining them would either silently drop + one or fail downstream (TWFE auto-cluster is disabled under Conley, + so the bootstrap would receive cluster_ids=None). Reject early. + Closes CI reviewer P1 CQ2.""" + from diff_diff import TwoWayFixedEffects + + with pytest.raises(NotImplementedError, match="wild_bootstrap"): + TwoWayFixedEffects( + vcov_type="conley", + inference="wild_bootstrap", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") + def test_twfe_conley_FWL_invariance(self, panel): """TWFE Conley SE matches DifferenceInDifferences with same kwargs (verifies FWL composability — Conley meat survives within-transformation @@ -905,6 +921,52 @@ def test_synthetic_did_conley_kwarg_raises(self): with pytest.raises(TypeError, match="conley"): SyntheticDiD(conley_cutoff_km=100.0) # type: ignore[call-arg] + def test_synthetic_did_set_params_conley_raises(self): + """SyntheticDiD.set_params(vcov_type='conley') must raise (mirrors + __init__'s contract — closes the silent-bypass gap CI reviewer flagged + as P1 CQ1).""" + from diff_diff import SyntheticDiD + + est = SyntheticDiD() + # Snapshot pre-call state + before_variance = est.variance_method + before_n_boot = est.n_bootstrap + before_zeta = est.zeta_omega + + with pytest.raises(TypeError, match="conley"): + est.set_params(vcov_type="conley") + # Verify nothing mutated + assert est.variance_method == before_variance + assert est.n_bootstrap == before_n_boot + assert est.zeta_omega == before_zeta + + def test_synthetic_did_set_params_conley_kwarg_raises(self): + from diff_diff import SyntheticDiD + + est = SyntheticDiD() + with pytest.raises(TypeError, match="conley"): + est.set_params(conley_cutoff_km=100.0) + # Verify the conley attr stays None (rejected before mutation) + assert getattr(est, "conley_cutoff_km", None) is None + + def test_synthetic_did_get_params_includes_conley_keys(self): + """get_params() / set_params() round-trip must include the inherited + conley_* keys with None values for sklearn-style API consistency + (CI reviewer P2 CQ3).""" + from diff_diff import SyntheticDiD + + est = SyntheticDiD(variance_method="placebo", n_bootstrap=10) + params = est.get_params() + assert "vcov_type" in params and params["vcov_type"] is None + assert "conley_coords" in params and params["conley_coords"] is None + assert "conley_cutoff_km" in params and params["conley_cutoff_km"] is None + assert "conley_metric" in params and params["conley_metric"] is None + assert "conley_kernel" in params and params["conley_kernel"] is None + # Round-trip: passing None values back into set_params is a no-op + est.set_params(**params) + assert est.variance_method == "placebo" + assert est.n_bootstrap == 10 + class TestConleySetParamsAtomicity: """set_params atomicity for Conley fields. Per From d51bba9ac3b22618dcb0a2ead7e77a86d31e3498 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 13:08:32 -0400 Subject: [PATCH 3/9] Reject Conley on panel estimators; remove dead conley result-class wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CI Codex review of PR #411 (P1#1 + P1#2): P1#1 — Panel estimators with vcov_type="conley" silently produced wrong SE because cross-sectional Conley over (unit, time) rows treated same- unit cross-time pairs as d_ij=0 -> K=1, mishandling the space-time HAC. Phase 1 supports cross-sectional Conley only; reject panel fits at fit-time on DifferenceInDifferences, TwoWayFixedEffects, and MultiPeriodDiD with NotImplementedError. Practitioners pre-collapse to per-unit first-differences and call compute_robust_vcov directly. Phase 2 will add the space-time product kernel (Driscoll-Kraay) and lift the rejection. Granular Conley-arg validation collapsed into the single unconditional reject (cluster/absorb/coords/cutoff combinations all hit the same path). P1#2 — conley_metric was dropped at the result boundary and _format_vcov_label hard-coded "km" for the cutoff label even when metric was "euclidean". With panels rejected, the conley_cutoff_km / conley_kernel fields on DiDResults / MultiPeriodDiDResults are now unreachable; remove the dead fields, the dead arg passes from estimators.py / twfe.py, and the dead "conley" branch in _format_vcov_label. Tests added: TWFE / DiD / MPD panel-rejection regressions, including a repeated-coords-across-periods regression per the CI reviewer's recommendation. 70 Conley tests + 401 targeted regression tests pass. REGISTRY / CHANGELOG / llms.txt / README / TODO updated to reflect that the only supported Phase 1 Conley path is direct LinearRegression / compute_robust_vcov on a single-period design. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 2 +- README.md | 2 +- TODO.md | 8 +- diff_diff/estimators.py | 127 ++++++----------------- diff_diff/guides/llms.txt | 2 +- diff_diff/linalg.py | 2 - diff_diff/results.py | 34 +++--- diff_diff/twfe.py | 79 +++----------- docs/methodology/REGISTRY.md | 51 ++++++--- tests/test_conley_vcov.py | 196 +++++++++++++++++------------------ 10 files changed, 197 insertions(+), 306 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2dfe729..56131862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- **Conley (1999) spatial-HAC standard errors via `vcov_type="conley"`** on `DifferenceInDifferences`, `TwoWayFixedEffects`, and `MultiPeriodDiD` (Phase 1 of the spillover-conley initiative). New keyword-only kwargs on `__init__`: `conley_coords=(, )` (column-name tuple from `data`), `conley_cutoff_km=` (positive finite bandwidth in km for haversine, or coord units for euclidean — REQUIRED, no default per the no-silent-failures contract), `conley_metric="haversine"|"euclidean"|callable` (default `"haversine"`; great-circle uses Earth's mean radius 6371.01 km matching R `conleyreg`), `conley_kernel="bartlett"|"uniform"` (default `"bartlett"` is PSD-guaranteed; `"uniform"` emits `UserWarning` if the meat has a materially negative eigenvalue per Conley 1999 footnote 11). Variance estimator `Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1}` (Conley 1999 Eq 4.2). FWL composes cleanly because the meat depends only on scores `X·ε`, both of which within-transformation preserves — `TwoWayFixedEffects(vcov_type="conley", ...)` is supported, UNLIKE `hc2`/`hc2_bm` which need the full hat matrix. TWFE auto-cluster-at-unit is disabled when `vcov_type="conley"`; explicit `cluster=` raises `NotImplementedError` (combined product kernel deferred to Phase 2). `n > 20_000` emits a `UserWarning` about the dense O(n²) distance-matrix memory; sparse k-d-tree fast path is queued for Phase 2. `SyntheticDiD(vcov_type="conley")` raises `TypeError` (uses bootstrap variance, not analytical sandwich); `set_params` mirrors the constructor rejection. `vcov_type="conley"` + `weights=` / `survey_design=` / `absorb=` raises `NotImplementedError` (Bertanha-Imbens 2014 weighted-Conley + arbitrary FE projection are deferred to follow-up phases). `TwoWayFixedEffects(vcov_type="conley", inference="wild_bootstrap")` raises `NotImplementedError` (Conley analytical spatial-HAC and wild cluster bootstrap are different inference paths). Helpers live in new module `diff_diff/conley.py` (`_haversine_km`, `_pairwise_distance_matrix`, `_bartlett_kernel`, `_uniform_kernel`, `_validate_conley_kwargs`, `_compute_conley_vcov`); `compute_robust_vcov` in `diff_diff/linalg.py` imports the dispatch helpers. R `conleyreg` parity (Düsterhöft 2021, CRAN v0.1.9) on three benchmark fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`, regenerable via `benchmarks/R/generate_conley_golden.R`); observed max abs diff 5.7e-16. Earth radius 6371.01 km matches `conleyreg::haversine_dist`. Test file `tests/test_conley_vcov.py` skips parity cleanly when the JSON is absent. `result.summary()` prints `"Conley spatial HAC (bartlett, cutoff=200.0km)"` via the extended `_format_vcov_label` helper. New REGISTRY section `## ConleySpatialHAC`. Tracked on `BRIEFING.md` as Phase 1 of the 6-phase initiative (Phase 2: two-way space×time + sparse fast path; Phase 3: ring-indicator spillover-aware DiD per Butts 2021; Phase 4a/4b: mechanical extension to IF-aggregation and sandwich-derived estimators; Phase 5: survey design support). +- **Conley (1999) spatial-HAC standard errors via `vcov_type="conley"`** on cross-sectional `LinearRegression` / `compute_robust_vcov` (Phase 1 of the spillover-conley initiative). Keyword arguments: `conley_coords` (n × 2 array of lat/lon or projected coords), `conley_cutoff_km=` (positive finite bandwidth in km for haversine, or coord units for euclidean — REQUIRED, no default per the no-silent-failures contract), `conley_metric="haversine"|"euclidean"|callable` (default `"haversine"`; great-circle uses Earth's mean radius 6371.01 km matching R `conleyreg`), `conley_kernel="bartlett"|"uniform"` (default `"bartlett"` is PSD-guaranteed; `"uniform"` emits `UserWarning` if the meat has a materially negative eigenvalue per Conley 1999 footnote 11). Variance estimator `Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1}` (Conley 1999 Eq 4.2). **Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`) reject `vcov_type="conley"` at fit-time with `NotImplementedError`** — Phase 1's cross-sectional Conley does not handle the time dimension. Applying it over (unit, time) rows would treat same-unit cross-time pairs as `d_ij = 0 → K = 1`, mishandling the space-time HAC. Practitioners needing Conley with a panel design should pre-collapse to per-unit first-differences and call `compute_robust_vcov` directly on a single-period regression. Phase 2 will add the space-time product kernel (Driscoll-Kraay) for full panel support. `SyntheticDiD(vcov_type="conley")` raises `TypeError` (uses bootstrap variance, not analytical sandwich); `set_params` mirrors the constructor rejection. `vcov_type="conley"` + `cluster_ids=` / `weights=` / `survey_design=` raises `NotImplementedError` (combined product kernel + Bertanha-Imbens 2014 weighted-Conley deferred to follow-up phases). `n > 20_000` emits a `UserWarning` about the dense O(n²) distance-matrix memory; sparse k-d-tree fast path is queued for Phase 2. Helpers live in new module `diff_diff/conley.py` (`_haversine_km`, `_pairwise_distance_matrix`, `_bartlett_kernel`, `_uniform_kernel`, `_validate_conley_kwargs`, `_compute_conley_vcov`); `compute_robust_vcov` in `diff_diff/linalg.py` imports the dispatch helpers. R `conleyreg` parity (Düsterhöft 2021, CRAN v0.1.9) on three benchmark fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`, regenerable via `benchmarks/R/generate_conley_golden.R`); observed max abs diff 5.7e-16. Earth radius 6371.01 km matches `conleyreg::haversine_dist`. Test file `tests/test_conley_vcov.py` skips parity cleanly when the JSON is absent. New REGISTRY section `## ConleySpatialHAC`. Tracked on `BRIEFING.md` as Phase 1 of the 6-phase initiative (Phase 2: space-time product kernel + sparse fast path + panel-estimator support; Phase 3: ring-indicator spillover-aware DiD per Butts 2021; Phase 4a/4b: mechanical extension to IF-aggregation and sandwich-derived estimators; Phase 5: survey design support). - **Tutorial 21: HAD Pre-test Workflow** (`docs/tutorials/21_had_pretest_workflow.ipynb`) — composite pre-test walkthrough for `HeterogeneousAdoptionDiD` building on Tutorial 20's brand-campaign framing. Uses a 60-DMA × 8-week panel close in shape to T20's but with the dose distribution drawn from `Uniform[$0.01K, $50K]` (vs T20's `[$5K, $50K]`); the true support is strictly positive but very near zero, chosen so the QUG step in `did_had_pretest_workflow` fails-to-reject `H0: d_lower = 0` in this finite sample and the verdict text fires the load-bearing "Assumption 7 deferred" pivot for the upgrade-arc narrative. (HAD's `design="auto"` selector — a separate min/median heuristic at `had.py::_detect_design`, NOT the QUG p-value — independently lands on the `continuous_at_zero` identification path with target `WAS` on this panel because `d.min() < 0.01 * median(|d|)`. The QUG test and the design selector are independent rules that point to the same identification path here.) Walks through three surfaces: (a) `did_had_pretest_workflow(aggregate="overall")` on a two-period collapse, where the verdict explicitly flags Step 2 (Assumption 7 pre-trends) as not run because a single pre-period structurally cannot support a pre-trends test, and the structural fields `pretrends_joint` / `homogeneity_joint` are both `None`; (b) `did_had_pretest_workflow(aggregate="event_study")` on the full multi-period panel, where the verdict reads "TWFE admissible under Section 4 assumptions" because all three testable diagnostics (QUG + joint pre-trends Stute over 3 horizons + joint homogeneity Stute over 4 horizons) fail-to-reject — non-rejection evidence under finite-sample power and test specification, not proof that the identifying assumptions hold; and (c) a side panel exercising both `yatchew_hr_test` null modes — `null="linearity"` (default, paper Theorem 7) vs `null="mean_independence"` (Phase 4 R-parity with R `YatchewTest::yatchew_test(order=0)`) — on the within-pre-period first-difference paired with post-period dose, illustrating the stricter null's larger residual variance (`sigma2_lin` 7.01 vs 6.53) and smaller p-value (0.29 vs 0.49). Companion drift-test file `tests/test_t21_had_pretest_workflow_drift.py` (16 tests pinning panel composition, both verdict pivots, structural anchors on both paths, deterministic QUG / Yatchew statistics, bootstrap p-value tolerance bands per `feedback_bootstrap_drift_tests_need_backend_tolerance`, and `HAD(design="auto")` resolution to `continuous_at_zero` on this panel). T20's "Composite pretest workflow" Extensions bullet updated with a forward-pointer to T21. T22 weighted/survey HAD tutorial remains queued as a separate notebook PR. - **`ChaisemartinDHaultfoeuille.by_path` and `paths_of_interest` now compose with `survey_design`** for analytical Binder TSL SE and replicate-weight bootstrap variance. The `NotImplementedError` gate at `chaisemartin_dhaultfoeuille.py:1233-1239` is replaced by a per-path multiplier-bootstrap-only gate (`survey_design + n_bootstrap > 0` under by_path / paths_of_interest still raises, since the survey-aware perturbation pivot for path-restricted IFs is methodologically underived). Per-path SE routes through the existing `_survey_se_from_group_if` cell-period allocator: the per-period IF (`U_pp_l_path`) is built with non-path switcher-side contributions skipped (control contributions are unchanged, matching the joiners/leavers IF convention; preserves the row-sum identity `U_pp.sum(axis=1) == U`), cohort-recentered via `_cohort_recenter_per_period`, then expanded to observations as `psi_i = U_pp[g_i, t_i] · (w_i / W_{g_i, t_i})`. Replicate-weight designs unconditionally use the cell allocator (Class A contract from PR #323). New `_refresh_path_inference` helper post-call refreshes `safe_inference` on every populated entry across `multi_horizon_inference`, `placebo_horizon_inference`, `path_effects`, and `path_placebos` so all four surfaces use the same final `df_survey` after per-path replicate fits append `n_valid` to the shared accumulator. Path-enumeration ranking under `survey_design` remains unweighted (group-cardinality, not population-weight mass). Lonely-PSU policy stays sample-wide, not per-path. Telescope invariant: on a single-path panel, per-path SE matches the global non-by_path survey SE bit-exactly. **No R parity** — R `did_multiplegt_dyn` does not support survey weighting; this is a Python-only methodology extension. The global non-by_path TSL multiplier-bootstrap path is unaffected (anti-regression test `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathSurveyDesignAnalytical::test_global_survey_plus_n_bootstrap_still_works` locks the per-path-only scope of the new gate). Cross-surface invariants regression-tested at `TestByPathSurveyDesignAnalytical` (~17 tests across gate / dispatch / analytical SE / replicate-weight SE / per-path placebos / `trends_linear` composition / unobserved-path warnings / final-df refresh regressions) and `TestByPathSurveyDesignTelescope`. See `docs/methodology/REGISTRY.md` §`ChaisemartinDHaultfoeuille` `Note (Phase 3 by_path ...)` → "Per-path survey-design SE" for the full contract. - **Inference-field aliases on staggered result classes** for adapter / external-consumer compatibility. Read-only `@property` aliases expose the flat `att` / `se` / `conf_int` / `p_value` / `t_stat` names (matching `DiDResults` / `TROPResults` / `SyntheticDiDResults` / `HeterogeneousAdoptionDiDResults`) on every result class that previously only carried prefixed canonical fields: `CallawaySantAnnaResults`, `StackedDiDResults`, `EfficientDiDResults`, `ChaisemartinDHaultfoeuilleResults`, `StaggeredTripleDiffResults`, `WooldridgeDiDResults`, `SunAbrahamResults`, `ImputationDiDResults`, `TwoStageDiDResults` (mapping to `overall_*`); `ContinuousDiDResults` (mapping to `overall_att_*`, ATT-side as the headline, ACRT-side accessible unchanged via `overall_acrt_*`); `MultiPeriodDiDResults` (mapping to `avg_*`). `ContinuousDiDResults` additionally exposes `overall_se` / `overall_conf_int` / `overall_p_value` / `overall_t_stat` aliases for naming consistency with the rest of the staggered family. Aliases are pure read-throughs over the canonical fields — no recomputation, no behavior change — so the `safe_inference()` joint-NaN contract (per CLAUDE.md "Inference computation") is inherited automatically (NaN canonical → NaN alias, locked at `tests/test_result_aliases.py::test_pattern_b_aliases_propagate_nan`). The native `overall_*` / `overall_att_*` / `avg_*` fields remain canonical for documentation and computation. Motivated by the `balance.interop.diff_diff.as_balance_diagnostic()` adapter (`facebookresearch/balance` PR #465) which calls `getattr(res, "se", None)` / `getattr(res, "conf_int", None)` without a fallback chain — pre-alias, every staggered result class returned `None` on those keys, silently dropping `se` and `conf_int` from the adapter's diagnostic dict. 23 alias-mechanic + balance-adapter regression tests at `tests/test_result_aliases.py`. Patch-level (additive on stable surfaces). diff --git a/README.md b/README.md index 2f32e91a..9cbc76b7 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`. - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design -- Conley spatial HAC SE (`vcov_type="conley"`) on DifferenceInDifferences/TwoWayFixedEffects/MultiPeriodDiD - Conley (1999) spatial-correlation-aware SEs with parity vs R `conleyreg` +- Conley spatial HAC SE (`vcov_type="conley"`) on cross-sectional `LinearRegression` / `compute_robust_vcov` - Conley (1999) spatial-correlation-aware SEs with parity vs R `conleyreg` ## Survey Support diff --git a/TODO.md b/TODO.md index 4de2eb22..e64509a0 100644 --- a/TODO.md +++ b/TODO.md @@ -113,13 +113,11 @@ Deferred items from PR reviews that were not addressed before merge. | `HeterogeneousAdoptionDiD` time-varying dose on event study: Phase 2b REJECTS panels where `D_{g,t}` varies within a unit for `t >= F` (the aggregation uses `D_{g, F}` as the single regressor for all horizons, paper Appendix B.2 constant-dose convention). A follow-up PR could add a time-varying-dose estimator for these panels; current behavior is front-door rejection with a redirect to `ChaisemartinDHaultfoeuille`. | `diff_diff/had.py::_validate_had_panel_event_study` | Phase 2b | Low | | `HeterogeneousAdoptionDiD` repeated-cross-section support: paper Section 2 defines HAD on panel OR repeated cross-section, but Phase 2a is panel-only. RCS inputs (disjoint unit IDs between periods) are rejected by the balanced-panel validator with the generic "unit(s) do not appear in both periods" error. A follow-up PR will add an RCS identification path based on pre/post cell means (rather than unit-level first differences), with its own validator and a distinct `data_mode` / API surface. | `diff_diff/had.py::_validate_had_panel`, `diff_diff/had.py::_aggregate_first_difference` | Phase 2a | Medium | | SyntheticDiD: bootstrap cross-language parity anchor against R's default `synthdid::vcov(method="bootstrap")` (refit; rebinds `opts` per draw) or Julia `Synthdid.jl::src/vcov.jl::bootstrap_se` (refit by construction). Same-library validation (placebo-SE tracking, AER §6.3 MC truth) is in place; a cross-language anchor is desirable to bolster the methodology contract. Julia is the cleanest target — minimal wrapping work and refit-native vcov. Tolerance target: 1e-6 on Monte Carlo samples (different BLAS + RNG paths preclude 1e-10). The R-parity fixture from the previous release was deleted because it pinned the now-removed fixed-weight path. | `benchmarks/R/`, `benchmarks/julia/`, `tests/` | follow-up | Low | -| Conley + cluster_ids combined product kernel `K_space(d_ij/h) · 1{cluster_i = cluster_j}`. Phase 2 of the spillover-conley initiative will add this alongside the time-dimension extension (Driscoll-Kraay). Currently raises `NotImplementedError` at both the linalg validator and TWFE early-block. | `linalg.py::_validate_vcov_args`, `twfe.py`, `estimators.py` (DiD/MultiPeriodDiD `fit`) | Phase 2 (spillover-conley) | Medium | -| Conley + survey weights / `survey_design`. Score-reweighted meat `s_i = w_i · X_i · ε_i` is mechanical, but PSU clustering interaction with the spatial kernel and replicate-weights variance under spatial correlation are non-trivial (Bertanha-Imbens 2014 covers cluster-sample but not the explicit Conley case). Phase 5 of the spillover-conley initiative; paper review prerequisite. Currently raises `NotImplementedError`. | `linalg.py::_validate_vcov_args`, `twfe.py`, `estimators.py` | Phase 5 (spillover-conley) | Medium | -| Conley + `absorb=` (arbitrary FE projection beyond TWFE's two-FE within-transformation). FWL composability is proven analytically for TWFE's fixed two-FE design but not formally verified for arbitrary `absorb` dimensions; conservatively rejected at fit-time with a redirect to `fixed_effects=` dummies. Lift after empirical verification on multi-FE within-transformations. | `estimators.py::DifferenceInDifferences.fit`, `MultiPeriodDiD.fit` | follow-up (spillover-conley) | Low | +| Conley space-time product kernel + panel-estimator wire-up. Phase 1 rejects `vcov_type="conley"` on `DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD` at fit-time because cross-sectional Conley over (unit, time) rows mishandles same-unit cross-time pairs (`d_ij = 0 → K = 1`). Phase 2 will add `K(d_ij, |s-t|) = K_space(d_ij/h_space) · K_time(|s-t|/h_time)` (Driscoll-Kraay) and lift the rejection. | `linalg.py`, `conley.py`, `estimators.py`, `twfe.py` | Phase 2 (spillover-conley) | High | +| Conley + cluster_ids combined product kernel `K_space(d_ij/h) · 1{cluster_i = cluster_j}`. Phase 2 of the spillover-conley initiative will add this alongside the time-dimension extension. Currently raises `NotImplementedError` at the linalg validator (cross-sectional Conley + cluster). | `linalg.py::_validate_vcov_args` | Phase 2 (spillover-conley) | Medium | +| Conley + survey weights / `survey_design`. Score-reweighted meat `s_i = w_i · X_i · ε_i` is mechanical, but PSU clustering interaction with the spatial kernel and replicate-weights variance under spatial correlation are non-trivial (Bertanha-Imbens 2014 covers cluster-sample but not the explicit Conley case). Phase 5 of the spillover-conley initiative; paper review prerequisite. Currently raises `NotImplementedError` at the linalg validator. | `linalg.py::_validate_vcov_args` | Phase 5 (spillover-conley) | Medium | | `SyntheticDiD(vcov_type="conley")` support. Currently raises `TypeError` at `__init__` because SyntheticDiD uses `variance_method ∈ {bootstrap, jackknife, placebo}` rather than the analytical sandwich that Conley plugs into. Wiring would require either reimplementing an analytical sandwich path for SyntheticDiD or designing a spatial-block bootstrap (new methodology, Politis-Romano 1994 territory). | `synthetic_did.py::SyntheticDiD` | follow-up (spillover-conley) | Low | | Validate user-supplied callable `conley_metric` for shape `(n, n)`, finiteness, non-negativity, and symmetry. Currently `np.asarray(metric(coords, coords))` is accepted unchecked; a malformed callable produces opaque matmul errors and a non-symmetric distance matrix produces a non-symmetric vcov. CI reviewer flagged as P2 M3 in PR #(spillover-conley). | `diff_diff/conley.py::_pairwise_distance_matrix`, `_compute_conley_vcov` | follow-up (spillover-conley) | Low | -| Extract common Conley estimator-level validation helper. Today `cluster=`, `survey_design=`, `conley_coords=`, and `conley_cutoff_km=` checks are duplicated across `DifferenceInDifferences.fit` (estimators.py:~370-400), `MultiPeriodDiD.fit` (estimators.py:~1395-1455), and `TwoWayFixedEffects.fit` (twfe.py:~165-205). A future Conley-feature change risks updating one estimator but not the others. CI reviewer flagged as P2 MT1. | `diff_diff/estimators.py`, `diff_diff/twfe.py` | follow-up (spillover-conley) | Low | -| Strengthen `tests/test_conley_vcov.py::TestConleyTWFE::test_twfe_conley_FWL_invariance` to actually verify FWL equivalence between TWFE-within Conley and a full-dummy-FE design (build the dummy regression explicitly and compare the ATT coefficient + Conley SE). The current test only asserts both fits produce finite SEs — the name overstates the assertion. CI reviewer flagged as P2 DT3. | `tests/test_conley_vcov.py` | follow-up (spillover-conley) | Low | #### Performance diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index ae654ae6..17d57ec2 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -356,66 +356,30 @@ def fit( "HC2/CR2-BM are computed on the full projection." ) - # Reject Conley + absorb in Phase 1. Conley's meat depends only on - # scores X*epsilon, both of which FWL preserves under within- - # transformation, so the math composes cleanly for TWFE's two-FE - # design. But arbitrary absorb dimensions have not been verified - # empirically yet; conservatively reject and tell the user to use - # fixed_effects= dummies for the same FE design. - if absorb and self.vcov_type == "conley": + # Reject vcov_type='conley' on DifferenceInDifferences entirely. + # DiD is intrinsically a two-period panel design (the validator + # above enforces time has both 0 and 1 values). Cross-sectional + # Conley over (unit, t=0) ∪ (unit, t=1) rows is methodologically + # wrong: same-unit cross-time pairs have d_ij = 0 -> K(0/h) = 1, + # giving them full covariance weight as if they were one clustered + # pair, while cross-unit pairs are weighted only by spatial + # distance with no time-lag handling. That is neither documented + # Conley 1999 nor a documented space-time HAC. Phase 1 supports + # cross-sectional Conley only via direct compute_robust_vcov on a + # single-period regression; Phase 2 will add a space-time product + # kernel / Driscoll-Kraay estimator. + if self.vcov_type == "conley": raise NotImplementedError( - "DifferenceInDifferences(absorb=..., vcov_type='conley') " - "is deferred to a follow-up. Conley + within-transformation " - "for arbitrary absorbed FE dimensions has not been verified; " - "use fixed_effects= dummies for an equivalent FE design " - "with the full projection, or drop absorb= for " - "cross-sectional Conley." + "DifferenceInDifferences(vcov_type='conley') is deferred " + "to Phase 2 (space-time product kernel / Driscoll-Kraay). " + "Phase 1 supports cross-sectional Conley only via direct " + "compute_robust_vcov on a single-period design; " + "DifferenceInDifferences is intrinsically a two-period " + "panel — pre-collapse to per-unit first-differences and " + "call compute_robust_vcov directly, or wait for the " + "Phase 2 panel extension." ) - # Reject Conley + cluster (combined product kernel is Phase 2+) and - # Conley + survey_design (Bertanha-Imbens 2014 territory) early at - # the estimator level so the error message references the user-facing - # kwarg names rather than the internal cluster_ids/weights array. - if self.vcov_type == "conley": - if self.cluster is not None: - raise NotImplementedError( - f"DifferenceInDifferences(cluster={self.cluster!r}, " - "vcov_type='conley') is deferred to Phase 2 (combined " - "product kernel). Drop cluster= for cross-sectional " - "Conley." - ) - if survey_design is not None: - raise NotImplementedError( - "DifferenceInDifferences(survey_design=..., " - "vcov_type='conley') is deferred to Phase 2+ " - "(Bertanha-Imbens 2014). Drop survey_design= for " - "cross-sectional Conley." - ) - if self.conley_coords is None: - raise ValueError( - "vcov_type='conley' requires conley_coords=(, " - ") tuple of column names in the data." - ) - if self.conley_cutoff_km is None: - raise ValueError( - "vcov_type='conley' requires conley_cutoff_km (positive " - "finite bandwidth in km for haversine, or in coord units " - "for euclidean)." - ) - # Validate columns exist; the validator inside compute_robust_vcov - # will check NaN/range/etc on the array values themselves. - _coord_cols = list(self.conley_coords) - if len(_coord_cols) != 2: - raise ValueError( - f"conley_coords must be a 2-tuple of column names; got " - f"{self.conley_coords!r}." - ) - for _col in _coord_cols: - if _col not in data.columns: - raise ValueError( - f"conley_coords references column {_col!r} which " f"is not in `data`." - ) - if absorb: # FWL theorem: demean ALL regressors alongside outcome. # Regressors collinear with absorbed FE (e.g., treatment after @@ -652,8 +616,6 @@ def _refit_did_absorb(w_r): # stored `self.vcov_type`. vcov_type=_fit_vcov_type, cluster_name=self.cluster, - conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, - conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self._coefficients = coefficients @@ -1437,41 +1399,18 @@ def fit( # type: ignore[override] "FE design with the full projection, or drop absorb= for " "cross-sectional Conley." ) + # MultiPeriodDiD is intrinsically a multi-period panel estimator; + # cross-sectional Conley does not apply (same rationale as + # DifferenceInDifferences.fit's panel guard above). Phase 2 will + # add a documented space-time HAC. if self.vcov_type == "conley": - if self.cluster is not None: - raise NotImplementedError( - f"MultiPeriodDiD(cluster={self.cluster!r}, " - "vcov_type='conley') is deferred to Phase 2 (combined " - "product kernel). Drop cluster= for cross-sectional " - "Conley." - ) - if survey_design is not None: - raise NotImplementedError( - "MultiPeriodDiD(survey_design=..., vcov_type='conley') " - "is deferred to Phase 2+ (Bertanha-Imbens 2014). Drop " - "survey_design= for cross-sectional Conley." - ) - if self.conley_coords is None: - raise ValueError( - "vcov_type='conley' requires conley_coords=(, " - ") tuple of column names in the data." - ) - if self.conley_cutoff_km is None: - raise ValueError( - "vcov_type='conley' requires conley_cutoff_km (positive " "finite bandwidth)." - ) - _coord_cols_mp = list(self.conley_coords) - if len(_coord_cols_mp) != 2: - raise ValueError( - f"conley_coords must be a 2-tuple of column names; got " - f"{self.conley_coords!r}." - ) - for _col in _coord_cols_mp: - if _col not in data.columns: - raise ValueError( - f"conley_coords references column {_col!r} which " f"is not in `data`." - ) - + raise NotImplementedError( + "MultiPeriodDiD(vcov_type='conley') is deferred to Phase 2 " + "(space-time product kernel / Driscoll-Kraay). Phase 1 " + "supports cross-sectional Conley only via direct " + "compute_robust_vcov on a single-period design; " + "MultiPeriodDiD is intrinsically a multi-period panel." + ) # Pre-compute non_ref_periods (needed for absorb demeaning) non_ref_periods = [p for p in all_periods if p != reference_period] @@ -1917,8 +1856,6 @@ def _refit_mp_absorb(w_r): n_clusters=( len(np.unique(effective_cluster_ids)) if effective_cluster_ids is not None else None ), - conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, - conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self._coefficients = coefficients diff --git a/diff_diff/guides/llms.txt b/diff_diff/guides/llms.txt index a693ead6..de547a0b 100644 --- a/diff_diff/guides/llms.txt +++ b/diff_diff/guides/llms.txt @@ -76,7 +76,7 @@ Full practitioner guide: call `diff_diff.get_llm_guide("practitioner")` - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html): Rambachan & Roth (2023) sensitivity analysis — robust CI under parallel trends violations, breakdown values - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html): Roth (2022) minimum detectable violation and pre-trends test power curves - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html): Analytical and simulation-based power analysis — MDE, sample size, power curves for study design -- Conley spatial HAC SE (`vcov_type="conley"`) on DifferenceInDifferences/TwoWayFixedEffects/MultiPeriodDiD — Conley (1999) spatial-correlation-aware SEs with haversine/euclidean/callable distance metric and Bartlett/uniform kernel; parity vs R `conleyreg` (Düsterhöft 2021) +- Conley spatial HAC SE (`vcov_type="conley"`) on cross-sectional `LinearRegression` / `compute_robust_vcov` — Conley (1999) spatial-correlation-aware SEs with haversine/euclidean/callable distance metric and Bartlett/uniform kernel; parity vs R `conleyreg` (Düsterhöft 2021). Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`) reject `vcov_type="conley"` at fit-time; Phase 2 will add the space-time product kernel for panel support ## Tutorials diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index cf550880..5efde1da 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -3001,8 +3001,6 @@ def get_inference( and self.survey_design.uses_replicate_variance ) if effective_df is not None and effective_df <= 0 and not _is_replicate: - import warnings - warnings.warn( f"Degrees of freedom is non-positive (df={effective_df}). " "Using normal distribution instead of t-distribution for inference.", diff --git a/diff_diff/results.py b/diff_diff/results.py index b59ecb90..a758ae91 100644 --- a/diff_diff/results.py +++ b/diff_diff/results.py @@ -52,13 +52,15 @@ def _format_vcov_label( cluster_name: Optional[str], n_clusters: Optional[int], n_obs: Optional[int], - conley_cutoff_km: Optional[float] = None, - conley_kernel: Optional[str] = None, ) -> Optional[str]: """Compose a human-readable variance-family label for summary output. Returns None when vcov_type is not recognized so the caller can skip the - line silently (backward-compat). + line silently (backward-compat). vcov_type='conley' is intentionally + not labeled here: DifferenceInDifferences / MultiPeriodDiD / TwoWayFixedEffects + all reject vcov_type='conley' at fit-time (Phase 1 supports cross-sectional + Conley only via direct compute_robust_vcov / LinearRegression), so a + Conley label cannot be reached on these result classes. """ if vcov_type == "classical": return "Classical OLS SEs (non-robust)" @@ -75,11 +77,6 @@ def _format_vcov_label( return f"CR2 Bell-McCaffrey cluster-robust at {cluster_name}{suffix}" suffix = f", n={n_obs}" if n_obs else "" return f"HC2 + Bell-McCaffrey DOF (one-way{suffix})" - if vcov_type == "conley": - kernel_str = conley_kernel or "bartlett" - if conley_cutoff_km is not None: - return f"Conley spatial HAC ({kernel_str}, cutoff={conley_cutoff_km:.1f}km)" - return f"Conley spatial HAC ({kernel_str})" return None @@ -132,14 +129,15 @@ class DiDResults: bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False) # Survey design metadata (SurveyMetadata instance from diff_diff.survey) survey_metadata: Optional[Any] = field(default=None) - # Variance-covariance family: "classical" | "hc1" | "hc2" | "hc2_bm" | "conley". + # Variance-covariance family: "classical" | "hc1" | "hc2" | "hc2_bm". # Plus cluster_name when cluster-robust. Used by summary() to label the - # SE family in the output. + # SE family in the output. vcov_type='conley' is rejected at fit-time + # for all panel estimators (DifferenceInDifferences/MultiPeriodDiD/TWFE) + # in Phase 1; the supported Conley path is direct LinearRegression / + # compute_robust_vcov on a cross-sectional design, which uses its own + # result class. vcov_type: Optional[str] = field(default=None) cluster_name: Optional[str] = field(default=None) - # Conley spatial-HAC parameters; populated only when vcov_type="conley". - conley_cutoff_km: Optional[float] = field(default=None) - conley_kernel: Optional[str] = field(default=None) def __repr__(self) -> str: """Concise string representation.""" @@ -221,8 +219,6 @@ def summary(self, alpha: Optional[float] = None) -> str: cluster_name=self.cluster_name, n_clusters=self.n_clusters, n_obs=self.n_obs, - conley_cutoff_km=self.conley_cutoff_km, - conley_kernel=self.conley_kernel, ) if label is not None: lines.append(f"{'Variance:':<25} {label:>40}") @@ -456,11 +452,11 @@ class MultiPeriodDiDResults: n_bootstrap: Optional[int] = field(default=None) n_clusters: Optional[int] = field(default=None) # Variance-covariance family and cluster column for summary() labeling. + # vcov_type='conley' is rejected at fit-time for MultiPeriodDiD (Phase 1 + # supports cross-sectional Conley only via direct compute_robust_vcov); + # see _format_vcov_label. vcov_type: Optional[str] = field(default=None) cluster_name: Optional[str] = field(default=None) - # Conley spatial-HAC parameters; populated only when vcov_type="conley". - conley_cutoff_km: Optional[float] = field(default=None) - conley_kernel: Optional[str] = field(default=None) # --- Inference-field aliases (balance/external-adapter compatibility) --- @property @@ -563,8 +559,6 @@ def summary(self, alpha: Optional[float] = None) -> str: cluster_name=self.cluster_name, n_clusters=self.n_clusters, n_obs=self.n_obs, - conley_cutoff_km=self.conley_cutoff_km, - conley_kernel=self.conley_kernel, ) if label is not None: lines.append(f"{'Variance:':<25} {label:>50}") diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index 9148b234..d0eeff46 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -143,69 +143,24 @@ def fit( # type: ignore[override] "the full projection." ) - # Conley + TWFE: Conley meat = S.T @ K @ S survives FWL because it - # depends only on scores X*epsilon. FWL preserves both the - # residualized X and the residuals epsilon, so spatial-HAC on the - # within-transformed design equals spatial-HAC on a full-dummy - # design — UNLIKE hc2/hc2_bm which need the hat matrix. - # However, TWFE auto-clusters at unit by default - # (twfe.py:205-216), and Conley + cluster is deferred to Phase 2 - # (combined product kernel). When the user explicitly passes - # cluster=..., reject early here for a TWFE-specific message; the - # linalg validator's NotImplementedError fires later for non-TWFE - # call paths. - if self.vcov_type == "conley" and self.cluster is not None: - raise NotImplementedError( - f"TwoWayFixedEffects(cluster={self.cluster!r}, " - "vcov_type='conley') is deferred to Phase 2 (combined " - "product kernel). Drop cluster= for cross-sectional " - "Conley; the unit auto-cluster default is also disabled " - "when vcov_type='conley'." - ) - # Conley + wild_bootstrap: Conley is an analytical spatial-HAC - # variance and wild cluster bootstrap is a different inference path - # that resamples residuals within clusters. There is no clean - # composition — the two methods target different things — and - # combining them would either silently drop one or fail downstream - # (TWFE auto-cluster is disabled under Conley, so the bootstrap - # would receive cluster_ids=None and fail with a non-targeted - # error in `wild_bootstrap_se`). Reject early. - if self.vcov_type == "conley" and self.inference == "wild_bootstrap": + # Reject Conley on TWFE entirely. TWFE is intrinsically a multi- + # period panel estimator; cross-sectional Conley does not apply + # (same rationale as DifferenceInDifferences.fit's panel guard: + # same-unit cross-time pairs have d_ij=0 -> K=1, which together + # with within-transformed residuals that sum to zero per unit + # produces an anti-correlated cancellation, not the documented + # cross-sectional Conley meat). Phase 1 supports Conley only via + # direct compute_robust_vcov on a single-period design; Phase 2 + # will add a documented space-time HAC (Driscoll-Kraay product + # kernel + sparse k-d-tree fast path). + if self.vcov_type == "conley": raise NotImplementedError( - "TwoWayFixedEffects(vcov_type='conley', inference='wild_bootstrap') " - "is not supported: Conley is an analytical spatial-HAC variance and " - "wild cluster bootstrap is a different inference path. Use " - "inference='analytical' for Conley spatial HAC, or use " - "vcov_type='hc1' with inference='wild_bootstrap'." + "TwoWayFixedEffects(vcov_type='conley') is deferred to " + "Phase 2 (space-time product kernel / Driscoll-Kraay). " + "Phase 1 supports cross-sectional Conley only via direct " + "compute_robust_vcov on a single-period design; " + "TwoWayFixedEffects is intrinsically a multi-period panel." ) - if self.vcov_type == "conley": - if survey_design is not None: - raise NotImplementedError( - "TwoWayFixedEffects(survey_design=..., " - "vcov_type='conley') is deferred to Phase 2+ " - "(Bertanha-Imbens 2014). Drop survey_design= for " - "cross-sectional Conley." - ) - if self.conley_coords is None: - raise ValueError( - "vcov_type='conley' requires conley_coords=(, " - ") tuple of column names in the data." - ) - if self.conley_cutoff_km is None: - raise ValueError( - "vcov_type='conley' requires conley_cutoff_km (positive " "finite bandwidth)." - ) - _twfe_coord_cols = list(self.conley_coords) - if len(_twfe_coord_cols) != 2: - raise ValueError( - f"conley_coords must be a 2-tuple of column names; got " - f"{self.conley_coords!r}." - ) - for _col in _twfe_coord_cols: - if _col not in data.columns: - raise ValueError( - f"conley_coords references column {_col!r} which " f"is not in `data`." - ) # Check for staggered treatment timing and warn if detected self._check_staggered_treatment(data, treatment, time, unit) @@ -581,8 +536,6 @@ def _refit_twfe(w_r): # remapped hc1 under the legacy alias path, not self.vcov_type. vcov_type=_fit_vcov_type, cluster_name=_twfe_cluster_label, - conley_cutoff_km=self.conley_cutoff_km if _fit_vcov_type == "conley" else None, - conley_kernel=self.conley_kernel if _fit_vcov_type == "conley" else None, ) self.is_fitted_ = True diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index c8d124df..ca742b85 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -2926,15 +2926,26 @@ should be a deliberate user choice. - Colella, F., Lalive, R., Sakalli, S. O., & Thoenig, M. (2019). Inference with Arbitrary Clustering. IZA DP No. 12584. Stata `acreg` reference implementation; cited as the parallel canonical implementation in the Stata ecosystem (not parity-tested here). **Scope:** Cross-sectional spatial heteroskedasticity-and-autocorrelation-consistent -standard errors for OLS / TWFE when residuals are spatially correlated. Extends +standard errors for OLS when residuals are spatially correlated. Extends White (1980) HC0 by allowing pairwise correlation that decays with geographic -distance. Available on `DifferenceInDifferences`, `TwoWayFixedEffects`, -`MultiPeriodDiD` via `vcov_type="conley"` plus `conley_coords`, -`conley_cutoff_km`, `conley_metric`, `conley_kernel`. `SyntheticDiD` is -explicitly excluded (it uses bootstrap/jackknife/placebo variance, not the -analytical sandwich); `SyntheticDiD(vcov_type="conley")` raises `TypeError`. -Phase 1: cross-sectional only; Phase 2 will add the time dimension -(Driscoll-Kraay) and a sparse k-d-tree fast path. +distance. Phase 1 supports cross-sectional Conley only via direct +`compute_robust_vcov` / `LinearRegression` on a single-period design — pass +`vcov_type="conley"` plus `conley_coords` (n × 2 array) and `conley_cutoff_km`. + +**Panel estimators (`DifferenceInDifferences`, `MultiPeriodDiD`, +`TwoWayFixedEffects`) reject `vcov_type="conley"` at fit-time** with +`NotImplementedError`. They are intrinsically multi-period panel designs: +applying cross-sectional Conley over (unit, time) rows would treat same-unit +cross-time pairs as `d_ij = 0 → K = 1`, giving them full covariance weight as +if they were one clustered pair, while cross-unit pairs are weighted only by +spatial distance with no time-lag handling. That is neither documented Conley +1999 nor a documented space-time HAC. Practitioners needing Conley with a +panel design should pre-collapse to per-unit first-differences and call +`compute_robust_vcov` directly. `SyntheticDiD` is also excluded (it uses +bootstrap/jackknife/placebo variance, not the analytical sandwich); +`SyntheticDiD(vcov_type="conley")` raises `TypeError`. Phase 2 will add the +time dimension (Driscoll-Kraay product kernel) and a sparse k-d-tree fast +path. **Variance estimator (Conley 1999 Eq 4.2 in pairwise-distance form, OLS specialization):** @@ -2960,12 +2971,18 @@ project's no-silent-failures rule). Practitioners should rerun on a coarse cutof grid (e.g., 50, 100, 200, 500 km) and report the SE range, mirroring Conley's Section 5 robustness check. -**Note (FWL composability):** Unlike `vcov_type="hc2"` and `vcov_type="hc2_bm"`, -which depend on the full hat matrix and therefore reject TWFE within-transformation, -Conley's meat depends only on scores `X_i·ε_i`. FWL preserves both the residualized -`X` and the residuals `ε`, so the spatial-HAC sandwich computed on the -within-transformed design equals the sandwich on the full-dummy design. -`TwoWayFixedEffects(vcov_type="conley", ...)` is therefore supported. +**Note (FWL composability, Phase 1 status):** Conley's meat depends only on +scores `X_i·ε_i`, which FWL preserves under within-transformation. In +principle this means the spatial-HAC sandwich composes with TWFE's within- +transformation when applied to a single-period cross-sectional residualization +(unlike `vcov_type="hc2"` / `vcov_type="hc2_bm"`, whose leverage corrections +depend on the full hat matrix and reject TWFE outright). Phase 1 nevertheless +rejects `TwoWayFixedEffects(vcov_type="conley")` because TWFE is intrinsically +a multi-period panel estimator: the FWL-residualized design still has multiple +rows per unit at distinct (unit, time) coordinates, and Phase 1's +cross-sectional Conley does not handle the time dimension. Phase 2's +space-time product kernel (Driscoll-Kraay) is the correct contract for +panel TWFE. **Note (R conleyreg parity):** diff-diff's Conley implementation matches R `conleyreg` (Düsterhöft 2021, CRAN v0.1.9) to ≤ 1e-6 on three benchmark @@ -2975,10 +2992,10 @@ constant is 6371.01 km (mean radius), matching `cd benchmarks/R && Rscript generate_conley_golden.R`. **Edge cases / restrictions:** -- `vcov_type="conley"` + `cluster=` → `NotImplementedError` (combined kernel deferred to Phase 2) -- `vcov_type="conley"` + `weights=` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up) -- `vcov_type="conley"` + `absorb=` → `NotImplementedError` (only TWFE's two-FE within is supported in Phase 1; arbitrary `absorb` dimensions are deferred) +- Panel estimators (`DifferenceInDifferences`, `MultiPeriodDiD`, `TwoWayFixedEffects`) `+ vcov_type="conley"` → `NotImplementedError` at fit-time. Phase 1 supports cross-sectional Conley only; Phase 2 will add the space-time product kernel - `SyntheticDiD(vcov_type="conley")` → `TypeError` (SyntheticDiD uses bootstrap/jackknife/placebo variance, not the analytical sandwich; tracked in TODO.md) +- Cross-sectional `LinearRegression` / `compute_robust_vcov` `+ vcov_type="conley"` `+ cluster_ids=` → `NotImplementedError` (combined kernel deferred to Phase 2) +- Cross-sectional `LinearRegression` / `compute_robust_vcov` `+ vcov_type="conley"` `+ weights=` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up) - `n > 20_000`: emits `UserWarning` about O(n²) distance-matrix memory - `conley_cutoff_km ≤ 0`, `nan`, or `inf`: rejected with `ValueError`. The HC0 reduction at h→0 is documented but not the sanctioned path; users should pass `vcov_type="hc1"` - Identical coordinates (`d_ij = 0` for `i ≠ j`): `K(0) = 1`, contributing the full HC0 weight per Conley 1999 page 19. Documented behavior; no warning diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py index 27f6dbc5..32d392e0 100644 --- a/tests/test_conley_vcov.py +++ b/tests/test_conley_vcov.py @@ -684,41 +684,47 @@ def two_period_panel(self): return pd.DataFrame(rows) - def test_did_basic_with_conley(self, two_period_panel): - """DifferenceInDifferences fits with vcov_type='conley' and produces - finite SE > 0.""" + def test_did_with_conley_raises(self, two_period_panel): + """DifferenceInDifferences + vcov_type='conley' is rejected + unconditionally. DiD is intrinsically a two-period panel; cross- + sectional Conley over (unit, t=0) ∪ (unit, t=1) rows would treat + same-unit cross-time pairs as d_ij=0 -> K=1, mishandling the space- + time HAC. Phase 2 will add the space-time product kernel; Phase 1's + supported Conley path is direct compute_robust_vcov on a single- + period design. Closes CI reviewer P1 #1. + """ from diff_diff import DifferenceInDifferences df = two_period_panel.copy() - df["did"] = df["treated"] * df["time"] - result = DifferenceInDifferences( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=2000.0, - ).fit(df, outcome="y", treatment="treated", time="time") - assert np.isfinite(result.se) and result.se > 0 - assert result.vcov_type == "conley" - assert result.conley_cutoff_km == 2000.0 - assert result.conley_kernel == "bartlett" + with pytest.raises(NotImplementedError, match="DifferenceInDifferences.*conley"): + DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(df, outcome="y", treatment="treated", time="time") - def test_did_summary_includes_conley_label(self, two_period_panel): + def test_did_with_conley_repeated_coords_raises(self, two_period_panel): + """Per CI reviewer P1 #1 recommendation: regression test where + coordinates repeat across multiple periods. The fit must reject + rather than silently produce wrong SE.""" from diff_diff import DifferenceInDifferences - df = two_period_panel.copy() - result = DifferenceInDifferences( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=1500.0, - ).fit(df, outcome="y", treatment="treated", time="time") - out = result.summary() - assert "Conley spatial HAC" in out - assert "1500" in out - assert "bartlett" in out - - def test_multi_period_did_with_conley(self, two_period_panel): + # Confirm the fixture has time-invariant coords per unit. + coord_var = two_period_panel.groupby("unit")[["lat", "lon"]].nunique() + assert (coord_var.values == 1).all(), "Fixture coords must be time-invariant" + + with pytest.raises(NotImplementedError, match="conley"): + DifferenceInDifferences( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(two_period_panel, outcome="y", treatment="treated", time="time") + + def test_multi_period_did_with_conley_raises(self): + """MultiPeriodDiD is intrinsically a panel estimator; vcov_type='conley' + is rejected end-to-end. Closes CI reviewer P1 #1.""" from diff_diff import MultiPeriodDiD - # Build a 4-period panel for MultiPeriodDiD rng = np.random.default_rng(seed=13) n_units = 30 rows = [] @@ -734,26 +740,28 @@ def test_multi_period_did_with_conley(self, two_period_panel): import pandas as pd df_mp = pd.DataFrame(rows) - result = MultiPeriodDiD( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=2000.0, - ).fit(df_mp, outcome="y", treatment="treated", time="time", reference_period=1) - assert np.isfinite(result.avg_se) and result.avg_se > 0 - assert result.vcov_type == "conley" + with pytest.raises(NotImplementedError, match="MultiPeriodDiD.*conley"): + MultiPeriodDiD( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(df_mp, outcome="y", treatment="treated", time="time", reference_period=1) class TestConleyTWFE: - """Step 5: TwoWayFixedEffects with Conley SE. - - TWFE composes with Conley because the meat depends only on scores X*epsilon, - both of which FWL preserves under within-transformation. This is UNLIKE - hc2/hc2_bm which depend on the full hat matrix and are rejected on TWFE. + """TwoWayFixedEffects rejects vcov_type='conley' end-to-end. + + TWFE is intrinsically a multi-period panel estimator. Cross-sectional + Conley over (unit, time) rows would treat same-unit cross-time pairs as + d_ij=0 -> K=1, mishandling the space-time HAC. The supported Phase 1 + path for Conley with FE is to demean externally (single-period collapse) + and call compute_robust_vcov directly. Phase 2 will add a space-time + product kernel / Driscoll-Kraay estimator. Closes CI reviewer P1 #1. """ @pytest.fixture def panel(self): - """Build a 2-period panel with geocoords for TWFE testing.""" + """Build a 2-period panel with geocoords for TWFE rejection tests.""" rng = np.random.default_rng(seed=17) n_units = 30 rows = [] @@ -773,21 +781,20 @@ def panel(self): return pd.DataFrame(rows) - def test_twfe_conley_runs(self, panel): + def test_twfe_conley_raises(self, panel): + """TWFE + vcov_type='conley' is rejected unconditionally.""" from diff_diff import TwoWayFixedEffects - result = TwoWayFixedEffects( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=2000.0, - ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") - assert np.isfinite(result.se) and result.se > 0 - assert result.vcov_type == "conley" - assert result.conley_cutoff_km == 2000.0 - assert result.cluster_name is None # auto-cluster disabled under conley + with pytest.raises(NotImplementedError, match="TwoWayFixedEffects.*conley"): + TwoWayFixedEffects( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") def test_twfe_conley_with_explicit_cluster_raises(self, panel): - """User explicitly setting cluster=... with conley should raise.""" + """User explicitly setting cluster=... with conley still raises (the + outer panel-rejection raise fires first).""" from diff_diff import TwoWayFixedEffects with pytest.raises(NotImplementedError, match="conley"): @@ -799,14 +806,11 @@ def test_twfe_conley_with_explicit_cluster_raises(self, panel): ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") def test_twfe_conley_with_wild_bootstrap_raises(self, panel): - """Conley analytical spatial-HAC and wild cluster bootstrap are - different inference paths; combining them would either silently drop - one or fail downstream (TWFE auto-cluster is disabled under Conley, - so the bootstrap would receive cluster_ids=None). Reject early. - Closes CI reviewer P1 CQ2.""" + """Conley + wild_bootstrap on TWFE raises (the outer panel-rejection + fires before the inference-mode check).""" from diff_diff import TwoWayFixedEffects - with pytest.raises(NotImplementedError, match="wild_bootstrap"): + with pytest.raises(NotImplementedError, match="conley"): TwoWayFixedEffects( vcov_type="conley", inference="wild_bootstrap", @@ -814,29 +818,24 @@ def test_twfe_conley_with_wild_bootstrap_raises(self, panel): conley_cutoff_km=2000.0, ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") - def test_twfe_conley_FWL_invariance(self, panel): - """TWFE Conley SE matches DifferenceInDifferences with same kwargs - (verifies FWL composability — Conley meat survives within-transformation - because it depends only on scores X*epsilon).""" - from diff_diff import DifferenceInDifferences, TwoWayFixedEffects + def test_twfe_conley_repeated_coords_across_periods_raises(self, panel): + """Per CI reviewer P1 #1 recommendation: regression test where + coordinates repeat across multiple periods. Without the panel + rejection, cross-sectional Conley would silently produce wrong SE + because pairs (i, t1) <-> (i, t2) have d_ij = 0 -> K = 1.""" + from diff_diff import TwoWayFixedEffects - twfe_result = TwoWayFixedEffects( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=2000.0, - ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") - # DiD equivalent: simple 2x2, no FE within-transformation - did_result = DifferenceInDifferences( - vcov_type="conley", - conley_coords=("lat", "lon"), - conley_cutoff_km=2000.0, - ).fit(panel, outcome="y", treatment="treated", time="time") - # ATT estimates should be similar (panel structure differs only in FE handling). - # We don't expect bit-equivalence — DiD without FE absorbs unit FE - # into the residuals while TWFE removes them. The key invariance is - # that the SE families are both finite and reasonable. - assert np.isfinite(twfe_result.se) and twfe_result.se > 0 - assert np.isfinite(did_result.se) and did_result.se > 0 + # Each unit's lat/lon is constant across t=0 and t=1 in the fixture. + # Confirm via grouping that coords are time-invariant. + coord_var = panel.groupby("unit")[["lat", "lon"]].nunique() + assert (coord_var.values == 1).all(), "Fixture coords must be time-invariant" + + with pytest.raises(NotImplementedError, match="conley"): + TwoWayFixedEffects( + vcov_type="conley", + conley_coords=("lat", "lon"), + conley_cutoff_km=2000.0, + ).fit(panel, outcome="y", treatment="treated", time="time", unit="unit") class TestConleyEstimatorValidation: @@ -860,49 +859,44 @@ def df(self): } ) - def test_did_conley_with_cluster_raises(self, df): + def test_did_conley_combinations_all_raise(self, df): + """Every DifferenceInDifferences + vcov_type='conley' combination + rejects unconditionally (DiD is intrinsically a two-period panel; + cross-sectional Conley is unsafe over (unit, time) rows). Asserts + the reject regardless of cluster=, absorb=, or missing coords/cutoff. + Closes CI reviewer P1 #1. + """ from diff_diff import DifferenceInDifferences - with pytest.raises(NotImplementedError, match="cluster.*conley"): + # cluster + conley + with pytest.raises(NotImplementedError, match="conley"): DifferenceInDifferences( vcov_type="conley", cluster="stratum", conley_coords=("lat", "lon"), conley_cutoff_km=100.0, ).fit(df, outcome="y", treatment="treated", time="time") - - def test_did_conley_without_coords_raises(self, df): - from diff_diff import DifferenceInDifferences - - with pytest.raises(ValueError, match="conley_coords"): + # missing conley_coords + with pytest.raises(NotImplementedError, match="conley"): DifferenceInDifferences( vcov_type="conley", conley_cutoff_km=100.0, ).fit(df, outcome="y", treatment="treated", time="time") - - def test_did_conley_without_cutoff_raises(self, df): - from diff_diff import DifferenceInDifferences - - with pytest.raises(ValueError, match="conley_cutoff_km"): + # missing conley_cutoff_km + with pytest.raises(NotImplementedError, match="conley"): DifferenceInDifferences( vcov_type="conley", conley_coords=("lat", "lon"), ).fit(df, outcome="y", treatment="treated", time="time") - - def test_did_conley_unknown_coord_column_raises(self, df): - from diff_diff import DifferenceInDifferences - - with pytest.raises(ValueError, match="not in `data`"): + # unknown coord column (data validation skipped — outer reject fires first) + with pytest.raises(NotImplementedError, match="conley"): DifferenceInDifferences( vcov_type="conley", conley_coords=("missing_lat", "lon"), conley_cutoff_km=100.0, ).fit(df, outcome="y", treatment="treated", time="time") - - def test_did_conley_with_absorb_raises(self, df): - from diff_diff import DifferenceInDifferences - - with pytest.raises(NotImplementedError, match="absorb.*conley"): + # absorb + conley + with pytest.raises(NotImplementedError, match="conley"): DifferenceInDifferences( vcov_type="conley", conley_coords=("lat", "lon"), From 30887fd827e86e5f25bbb5b1c02cf414e32139e7 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 14:17:44 -0400 Subject: [PATCH 4/9] Reject Conley + survey at LinearRegression entry; align stale docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CI Codex review of PR #411 (P1 + P2): P1 — LinearRegression(vcov_type="conley", survey_design=...) silently bypassed the documented Conley+survey rejection. The downstream _validate_vcov_args (inside compute_robust_vcov) rejects this combination, but LinearRegression.fit() sets return_vcov=False on the solve_ols call when survey vcov is needed (so the linalg validator never runs), and the survey vcov path then overwrites vcov_ with a non-Conley variance under a Conley request. Front-door the rejection at LinearRegression entry so the contract is enforced uniformly. New regression test test_linear_regression_conley_with_survey_design_raises locks the new guard with a make_pweight_design fixture. P2 — DifferenceInDifferences and MultiPeriodDiD docstrings still listed "conley" as a vcov_type option and described the conley_* kwargs as if the path were reachable. Updated both to spell out the Phase 1 panel rejection and point users at compute_robust_vcov / LinearRegression for cross-sectional Conley. Rewrote the llms-full.txt Conley section around the LinearRegression / compute_robust_vcov surface, replaced the panel- estimator example with a cross-sectional one, and listed the panel rejection in the restrictions table. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/estimators.py | 67 ++++++++++++++++------------------ diff_diff/guides/llms-full.txt | 40 ++++++++++++-------- diff_diff/linalg.py | 19 ++++++++++ tests/test_conley_vcov.py | 24 ++++++++++++ 4 files changed, 99 insertions(+), 51 deletions(-) diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index 17d57ec2..f000a822 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -57,7 +57,7 @@ class DifferenceInDifferences: ``vcov_type``: with ``"hc1"`` dispatches to CR1 (Liang-Zeger); with ``"hc2_bm"`` dispatches to CR2 Bell-McCaffrey (Pustejovsky-Tipton 2018 symmetric-sqrt + Satterthwaite DOF). - vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, optional + vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, optional Variance-covariance family. Defaults to the ``robust`` alias. - ``"classical"``: non-robust OLS SEs, ``sigma_hat^2 * (X'X)^{-1}``. @@ -69,11 +69,15 @@ class DifferenceInDifferences: with ``cluster=``, Pustejovsky-Tipton (2018) CR2 cluster-robust. (Note: ``MultiPeriodDiD`` does NOT yet support ``cluster=`` with ``"hc2_bm"`` — see ``MultiPeriodDiD`` docstring and REGISTRY.md.) - - ``"conley"``: Conley (1999) spatial-HAC sandwich. Requires - ``conley_coords`` (lat/lon column tuple) and ``conley_cutoff_km`` - (positive bandwidth — REQUIRED, no default per the no-silent-failures - rule). Combining with ``cluster=``, ``survey_design=``, or ``absorb=`` - raises ``NotImplementedError`` (deferred to Phase 2+). + + ``vcov_type="conley"`` (Conley 1999 spatial-HAC) is **rejected** at + fit-time on ``DifferenceInDifferences`` in Phase 1 because DiD is + intrinsically a two-period panel design, and Phase 1's cross- + sectional Conley does not handle the time dimension. The supported + Phase 1 path for Conley is direct ``compute_robust_vcov`` / + ``LinearRegression`` on a single-period regression. Phase 2 will + add the space-time product kernel (Driscoll-Kraay) and lift the + rejection. alpha : float, default=0.05 Significance level for confidence intervals. inference : str, default="analytical" @@ -93,20 +97,13 @@ class DifferenceInDifferences: - "warn": Issue warning and drop linearly dependent columns (default) - "error": Raise ValueError - "silent": Drop columns silently without warning - conley_coords : tuple of (str, str), optional - Column-name tuple ``(lat_col, lon_col)`` for Conley spatial HAC SE. - Required when ``vcov_type="conley"``; raises ``ValueError`` otherwise. - conley_cutoff_km : float, optional - Positive finite bandwidth in km (haversine) or coord units (euclidean). - Required when ``vcov_type="conley"``; no default per Conley 1999 - Section 5 sensitivity-grid recommendation. - conley_metric : str, default "haversine" - Distance metric: ``"haversine"`` (lat/lon, km), ``"euclidean"`` (any - units), or a callable ``(coords1, coords2) -> n×n``. - conley_kernel : str, default "bartlett" - Kernel function: ``"bartlett"`` (PSD-guaranteed, default) or - ``"uniform"`` (emits ``UserWarning`` if the meat has a materially - negative eigenvalue per Conley 1999 footnote 11). + conley_coords, conley_cutoff_km, conley_metric, conley_kernel + Accepted by the constructor for sklearn-style API symmetry, but + ``vcov_type="conley"`` is rejected at fit-time on + ``DifferenceInDifferences`` (see ``vcov_type`` above). Use direct + ``compute_robust_vcov`` / ``LinearRegression`` on a single-period + regression for cross-sectional Conley in Phase 1; Phase 2 will lift + the panel rejection. Attributes ---------- @@ -1036,7 +1033,7 @@ class MultiPeriodDiD(DifferenceInDifferences): ``TODO.md``; also documented as a Note in ``docs/methodology/REGISTRY.md`` under the HeterogeneousAdoptionDiD requirements-checklist block. - vcov_type : {"classical", "hc1", "hc2", "hc2_bm", "conley"}, optional + vcov_type : {"classical", "hc1", "hc2", "hc2_bm"}, optional Variance-covariance family. Defaults to the ``robust`` alias. - ``"classical"``: non-robust OLS SEs, ``sigma_hat^2 * (X'X)^{-1}``. @@ -1047,23 +1044,21 @@ class MultiPeriodDiD(DifferenceInDifferences): - ``"hc2_bm"``: one-way HC2 + Imbens-Kolesar (2016) Satterthwaite DOF per coefficient plus a contrast-aware DOF for the post-period-average ATT. **Unsupported with** ``cluster=`` — see ``cluster`` above. - - ``"conley"``: Conley (1999) spatial-HAC sandwich. Requires - ``conley_coords`` and ``conley_cutoff_km``. Combining with - ``cluster=``, ``survey_design=``, or ``absorb=`` raises - ``NotImplementedError`` (deferred to Phase 2+). + + ``vcov_type="conley"`` (Conley 1999 spatial-HAC) is **rejected** at + fit-time on ``MultiPeriodDiD`` in Phase 1 because MultiPeriodDiD is + intrinsically a multi-period panel estimator and Phase 1's cross- + sectional Conley does not handle the time dimension. The supported + Phase 1 path for Conley is direct ``compute_robust_vcov`` / + ``LinearRegression`` on a single-period regression. Phase 2 will + add the space-time product kernel (Driscoll-Kraay) and lift the + rejection. alpha : float, default=0.05 Significance level for confidence intervals. - conley_coords : tuple of (str, str), optional - Column-name tuple ``(lat_col, lon_col)`` for Conley spatial HAC SE. - Required when ``vcov_type="conley"``. - conley_cutoff_km : float, optional - Positive finite bandwidth for Conley spatial HAC. Required when - ``vcov_type="conley"`` (no default per Conley 1999 sensitivity-grid). - conley_metric : str, default "haversine" - Distance metric for Conley: ``"haversine"`` (lat/lon, km), - ``"euclidean"``, or a callable. - conley_kernel : str, default "bartlett" - Conley kernel: ``"bartlett"`` (PSD-guaranteed) or ``"uniform"``. + conley_coords, conley_cutoff_km, conley_metric, conley_kernel + Accepted by the constructor for sklearn-style API symmetry, but + ``vcov_type="conley"`` is rejected at fit-time on ``MultiPeriodDiD`` + (see ``vcov_type`` above). Attributes ---------- diff --git a/diff_diff/guides/llms-full.txt b/diff_diff/guides/llms-full.txt index 8422b013..ce9f4756 100644 --- a/diff_diff/guides/llms-full.txt +++ b/diff_diff/guides/llms-full.txt @@ -1886,21 +1886,31 @@ inference = reg.get_inference(coef_index) # -> InferenceResult ### Conley Spatial HAC Standard Errors Conley (1999) spatial heteroskedasticity-and-autocorrelation-consistent standard -errors for DiD designs with geocoded data. Use when residuals are spatially -correlated (geo experiments, regional shocks, common-supplier effects). -Available on `DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`. - -```python -from diff_diff import TwoWayFixedEffects - -result = TwoWayFixedEffects( +errors. Use when residuals are spatially correlated (geo experiments, regional +shocks, common-supplier effects). **Phase 1 supports cross-sectional Conley only**, +via direct `compute_robust_vcov` / `LinearRegression` on a single-period design. +**Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, +`MultiPeriodDiD`) reject `vcov_type="conley"` at fit-time** with +`NotImplementedError`: cross-sectional Conley over `(unit, time)` rows would +treat same-unit cross-time pairs as `d_ij = 0 → K = 1`, mishandling the +space-time HAC. Practitioners pre-collapse to per-unit first-differences and +call `compute_robust_vcov` directly. Phase 2 will add the space-time product +kernel (Driscoll-Kraay) and lift the panel rejection. + +```python +import numpy as np +from diff_diff.linalg import LinearRegression + +# Cross-sectional design: 1 row per unit, n × 2 lat/lon coords. +reg = LinearRegression( vcov_type="conley", - conley_coords=("lat", "lon"), # column names with lat/lon (degrees) + include_intercept=True, + conley_coords=coords, # n × 2 array of (lat, lon) in degrees conley_cutoff_km=200.0, # required; no default conley_metric="haversine", # or "euclidean", or callable conley_kernel="bartlett", # or "uniform" -).fit(data, outcome="y", treatment="treated", time="post", unit="unit_id") -print(result.summary()) # variance line: "Conley spatial HAC (bartlett, cutoff=200.0km)" +).fit(X, y) +se = np.sqrt(np.diag(reg.vcov_)) ``` **Variance estimator:** @@ -1912,7 +1922,7 @@ print(result.summary()) # variance line: "Conley spatial HAC (bartlett, cutoff= - `"uniform"`: `K(u) = 1{|u| ≤ 1}`. Easier to interpret; emits `UserWarning` if the resulting meat has a materially negative eigenvalue. **Distance metrics:** -- `"haversine"` (default): great-circle in km, Earth's mean radius 6371 km. Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. +- `"haversine"` (default): great-circle in km, Earth's mean radius 6371.01 km (matching R `conleyreg`). Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. - `"euclidean"`: from projected coordinates; user owns the units. - `callable(coords1, coords2) -> n×n array`: custom distance for non-geographic networks. @@ -1921,9 +1931,9 @@ recommends a sensitivity grid (e.g., 50, 100, 200, 500 km) and reporting the SE range. **Restrictions in this release:** -- `vcov_type="conley"` + `cluster=` → `NotImplementedError` (combined kernel deferred to Phase 2). -- `vcov_type="conley"` + `weights=` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up). -- `vcov_type="conley"` + `absorb=` → `NotImplementedError` (only TWFE's two-FE within-transformation is supported). +- Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`) `+ vcov_type="conley"` → `NotImplementedError` at fit-time. Phase 2 adds the space-time product kernel. +- `LinearRegression(vcov_type="conley", cluster_ids=...)` → `NotImplementedError` (combined kernel deferred to Phase 2). +- `LinearRegression(vcov_type="conley", weights=...)` / `survey_design=` → `NotImplementedError` (Bertanha-Imbens 2014 territory; Phase 5 follow-up). - `SyntheticDiD(vcov_type="conley")` → `TypeError` (uses bootstrap, not analytical sandwich). - `n > 20_000` emits a `UserWarning` about O(n²) distance-matrix memory. diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index 5efde1da..10138895 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -2534,6 +2534,25 @@ def fit( stacklevel=2, ) + # Reject vcov_type='conley' + survey_design at LinearRegression entry. + # The downstream `_validate_vcov_args` rejects this combination inside + # `compute_robust_vcov`, but `LinearRegression.fit()` skips that path + # entirely when the survey design needs survey variance (return_vcov + # is set to False on the solve_ols call), and the survey vcov path + # would silently overwrite the result with a non-Conley variance + # under a Conley request. Front-door the rejection here so the + # contract is enforced uniformly. Phase 5 (Bertanha-Imbens 2014 + # weighted-Conley) will lift this; Phase 1 supports cross-sectional + # unweighted Conley only. + if _fit_vcov_type == "conley" and _use_survey_vcov: + raise NotImplementedError( + "LinearRegression(vcov_type='conley', survey_design=...) " + "is deferred to Phase 5 (Bertanha-Imbens 2014 weighted-" + "Conley). Phase 1 supports cross-sectional unweighted " + "Conley only via compute_robust_vcov / LinearRegression " + "without a survey design." + ) + # Resolve effective fit-time weights/weight_type WITHOUT mutating # self. When a survey design is present, canonicalize weights from # the design so coefficient estimation and survey vcov agree. diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py index 32d392e0..f4e306a8 100644 --- a/tests/test_conley_vcov.py +++ b/tests/test_conley_vcov.py @@ -659,6 +659,30 @@ def test_solve_ols_conley_path(self, fit_data): assert vcov is not None assert np.all(np.isfinite(np.diag(vcov))) + def test_linear_regression_conley_with_survey_design_raises(self, fit_data): + """LinearRegression(vcov_type='conley', survey_design=...) must raise + NotImplementedError before fitting. Without the front-door guard, + LinearRegression.fit() silently bypasses the documented Conley+survey + rejection: it sets `return_vcov=False` on the solve_ols call when + survey vcov is needed, skipping the linalg validator, and the survey + vcov path then overwrites `vcov_` with a non-Conley variance under a + Conley request. Phase 5 will lift this rejection (Bertanha-Imbens 2014 + weighted-Conley); Phase 1 is unweighted only. + """ + from diff_diff.survey import make_pweight_design + + X, y, coords = fit_data + n = X.shape[0] + survey = make_pweight_design(np.ones(n)) + with pytest.raises(NotImplementedError, match="conley.*survey"): + LinearRegression( + vcov_type="conley", + include_intercept=True, + conley_coords=coords, + conley_cutoff_km=2000.0, + survey_design=survey, + ).fit(X, y) + class TestConleyEstimatorIntegration: """Step 4 smoke tests: DifferenceInDifferences and MultiPeriodDiD accept From 75df4d2d0150cb572da541fd26e6d684ba49bb33 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 14:37:38 -0400 Subject: [PATCH 5/9] Drop overstated Bartlett PSD claim; apply indefiniteness guard to both kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CI Codex review of PR #411 (P1 + P3): P1 — The implementation called the radial 1-D Bartlett kernel "PSD-guaranteed" by citing Conley 1999 Eq 3.14 + Andrews 1991. Conley's explicit PSD Bartlett formula (Eq 3.14, page 12) is the 2-D separable product window `(1 - |j|/L_M)(1 - |k|/L_N)` indexed on a lattice; the 1-D radial form on pairwise distance that diff-diff and R `conleyreg` implement is a practitioner specialization (Hsiang 2010, Colella et al. 2019) that is not explicitly written in the paper and is therefore not formally PSD-guaranteed. Reframe the kernel docstring around the practitioner-specialization framing, drop the PSD-guaranteed claim, and lift the meat-eigenvalue PSD guard out of the uniform-only branch so it fires for both supported kernels. The warning message now names the active kernel and explicitly states neither radial form is formally PSD. New regression test test_indefinite_meat_warning_fires_for_bartlett locks the lifted guard by patching `_bartlett_kernel` to return an aggressively indefinite matrix and asserting the warning surfaces with the kernel name. P3 — Stale wording cleanup: - conley.py:128-132 missing-coords error message pointed at TwoWayFixedEffects(conley_coords=...) even though TWFE rejects Conley in Phase 1; redirect to LinearRegression / compute_robust_vcov. - TestConleyEstimatorIntegration class docstring claimed panel estimators accept Conley and print a label; rewrote to describe fit-time panel rejection. Doc surfaces (REGISTRY ConleySpatialHAC kernel section, llms-full.txt kernels block, CHANGELOG `conley_kernel` description, conley-1999- review.md PSD-failure note) updated to reflect the both-kernels guard and the radial-specialization framing. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 2 +- diff_diff/conley.py | 65 ++++++++++------ diff_diff/guides/llms-full.txt | 6 +- docs/methodology/REGISTRY.md | 6 +- docs/methodology/papers/conley-1999-review.md | 5 +- tests/test_conley_vcov.py | 77 ++++++++++++++++++- 6 files changed, 130 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56131862..e9ed8f48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- **Conley (1999) spatial-HAC standard errors via `vcov_type="conley"`** on cross-sectional `LinearRegression` / `compute_robust_vcov` (Phase 1 of the spillover-conley initiative). Keyword arguments: `conley_coords` (n × 2 array of lat/lon or projected coords), `conley_cutoff_km=` (positive finite bandwidth in km for haversine, or coord units for euclidean — REQUIRED, no default per the no-silent-failures contract), `conley_metric="haversine"|"euclidean"|callable` (default `"haversine"`; great-circle uses Earth's mean radius 6371.01 km matching R `conleyreg`), `conley_kernel="bartlett"|"uniform"` (default `"bartlett"` is PSD-guaranteed; `"uniform"` emits `UserWarning` if the meat has a materially negative eigenvalue per Conley 1999 footnote 11). Variance estimator `Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1}` (Conley 1999 Eq 4.2). **Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`) reject `vcov_type="conley"` at fit-time with `NotImplementedError`** — Phase 1's cross-sectional Conley does not handle the time dimension. Applying it over (unit, time) rows would treat same-unit cross-time pairs as `d_ij = 0 → K = 1`, mishandling the space-time HAC. Practitioners needing Conley with a panel design should pre-collapse to per-unit first-differences and call `compute_robust_vcov` directly on a single-period regression. Phase 2 will add the space-time product kernel (Driscoll-Kraay) for full panel support. `SyntheticDiD(vcov_type="conley")` raises `TypeError` (uses bootstrap variance, not analytical sandwich); `set_params` mirrors the constructor rejection. `vcov_type="conley"` + `cluster_ids=` / `weights=` / `survey_design=` raises `NotImplementedError` (combined product kernel + Bertanha-Imbens 2014 weighted-Conley deferred to follow-up phases). `n > 20_000` emits a `UserWarning` about the dense O(n²) distance-matrix memory; sparse k-d-tree fast path is queued for Phase 2. Helpers live in new module `diff_diff/conley.py` (`_haversine_km`, `_pairwise_distance_matrix`, `_bartlett_kernel`, `_uniform_kernel`, `_validate_conley_kwargs`, `_compute_conley_vcov`); `compute_robust_vcov` in `diff_diff/linalg.py` imports the dispatch helpers. R `conleyreg` parity (Düsterhöft 2021, CRAN v0.1.9) on three benchmark fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`, regenerable via `benchmarks/R/generate_conley_golden.R`); observed max abs diff 5.7e-16. Earth radius 6371.01 km matches `conleyreg::haversine_dist`. Test file `tests/test_conley_vcov.py` skips parity cleanly when the JSON is absent. New REGISTRY section `## ConleySpatialHAC`. Tracked on `BRIEFING.md` as Phase 1 of the 6-phase initiative (Phase 2: space-time product kernel + sparse fast path + panel-estimator support; Phase 3: ring-indicator spillover-aware DiD per Butts 2021; Phase 4a/4b: mechanical extension to IF-aggregation and sandwich-derived estimators; Phase 5: survey design support). +- **Conley (1999) spatial-HAC standard errors via `vcov_type="conley"`** on cross-sectional `LinearRegression` / `compute_robust_vcov` (Phase 1 of the spillover-conley initiative). Keyword arguments: `conley_coords` (n × 2 array of lat/lon or projected coords), `conley_cutoff_km=` (positive finite bandwidth in km for haversine, or coord units for euclidean — REQUIRED, no default per the no-silent-failures contract), `conley_metric="haversine"|"euclidean"|callable` (default `"haversine"`; great-circle uses Earth's mean radius 6371.01 km matching R `conleyreg`), `conley_kernel="bartlett"|"uniform"` (default `"bartlett"` evaluated on pairwise distance `d_ij/h`, matching R `conleyreg`; both kernels emit a `UserWarning` if the resulting meat has a materially negative eigenvalue. Conley 1999's explicit PSD Bartlett formula is the 2-D separable product window on a lattice (Eq 3.14); the 1-D radial pairwise specialization that diff-diff and R `conleyreg` implement is a practitioner convention that is not formally PSD-guaranteed). Variance estimator `Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij/h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1}` (Conley 1999 Eq 4.2). **Panel estimators (`DifferenceInDifferences`, `TwoWayFixedEffects`, `MultiPeriodDiD`) reject `vcov_type="conley"` at fit-time with `NotImplementedError`** — Phase 1's cross-sectional Conley does not handle the time dimension. Applying it over (unit, time) rows would treat same-unit cross-time pairs as `d_ij = 0 → K = 1`, mishandling the space-time HAC. Practitioners needing Conley with a panel design should pre-collapse to per-unit first-differences and call `compute_robust_vcov` directly on a single-period regression. Phase 2 will add the space-time product kernel (Driscoll-Kraay) for full panel support. `SyntheticDiD(vcov_type="conley")` raises `TypeError` (uses bootstrap variance, not analytical sandwich); `set_params` mirrors the constructor rejection. `vcov_type="conley"` + `cluster_ids=` / `weights=` / `survey_design=` raises `NotImplementedError` (combined product kernel + Bertanha-Imbens 2014 weighted-Conley deferred to follow-up phases). `n > 20_000` emits a `UserWarning` about the dense O(n²) distance-matrix memory; sparse k-d-tree fast path is queued for Phase 2. Helpers live in new module `diff_diff/conley.py` (`_haversine_km`, `_pairwise_distance_matrix`, `_bartlett_kernel`, `_uniform_kernel`, `_validate_conley_kwargs`, `_compute_conley_vcov`); `compute_robust_vcov` in `diff_diff/linalg.py` imports the dispatch helpers. R `conleyreg` parity (Düsterhöft 2021, CRAN v0.1.9) on three benchmark fixtures (`benchmarks/data/r_conleyreg_conley_golden.json`, regenerable via `benchmarks/R/generate_conley_golden.R`); observed max abs diff 5.7e-16. Earth radius 6371.01 km matches `conleyreg::haversine_dist`. Test file `tests/test_conley_vcov.py` skips parity cleanly when the JSON is absent. New REGISTRY section `## ConleySpatialHAC`. Tracked on `BRIEFING.md` as Phase 1 of the 6-phase initiative (Phase 2: space-time product kernel + sparse fast path + panel-estimator support; Phase 3: ring-indicator spillover-aware DiD per Butts 2021; Phase 4a/4b: mechanical extension to IF-aggregation and sandwich-derived estimators; Phase 5: survey design support). - **Tutorial 21: HAD Pre-test Workflow** (`docs/tutorials/21_had_pretest_workflow.ipynb`) — composite pre-test walkthrough for `HeterogeneousAdoptionDiD` building on Tutorial 20's brand-campaign framing. Uses a 60-DMA × 8-week panel close in shape to T20's but with the dose distribution drawn from `Uniform[$0.01K, $50K]` (vs T20's `[$5K, $50K]`); the true support is strictly positive but very near zero, chosen so the QUG step in `did_had_pretest_workflow` fails-to-reject `H0: d_lower = 0` in this finite sample and the verdict text fires the load-bearing "Assumption 7 deferred" pivot for the upgrade-arc narrative. (HAD's `design="auto"` selector — a separate min/median heuristic at `had.py::_detect_design`, NOT the QUG p-value — independently lands on the `continuous_at_zero` identification path with target `WAS` on this panel because `d.min() < 0.01 * median(|d|)`. The QUG test and the design selector are independent rules that point to the same identification path here.) Walks through three surfaces: (a) `did_had_pretest_workflow(aggregate="overall")` on a two-period collapse, where the verdict explicitly flags Step 2 (Assumption 7 pre-trends) as not run because a single pre-period structurally cannot support a pre-trends test, and the structural fields `pretrends_joint` / `homogeneity_joint` are both `None`; (b) `did_had_pretest_workflow(aggregate="event_study")` on the full multi-period panel, where the verdict reads "TWFE admissible under Section 4 assumptions" because all three testable diagnostics (QUG + joint pre-trends Stute over 3 horizons + joint homogeneity Stute over 4 horizons) fail-to-reject — non-rejection evidence under finite-sample power and test specification, not proof that the identifying assumptions hold; and (c) a side panel exercising both `yatchew_hr_test` null modes — `null="linearity"` (default, paper Theorem 7) vs `null="mean_independence"` (Phase 4 R-parity with R `YatchewTest::yatchew_test(order=0)`) — on the within-pre-period first-difference paired with post-period dose, illustrating the stricter null's larger residual variance (`sigma2_lin` 7.01 vs 6.53) and smaller p-value (0.29 vs 0.49). Companion drift-test file `tests/test_t21_had_pretest_workflow_drift.py` (16 tests pinning panel composition, both verdict pivots, structural anchors on both paths, deterministic QUG / Yatchew statistics, bootstrap p-value tolerance bands per `feedback_bootstrap_drift_tests_need_backend_tolerance`, and `HAD(design="auto")` resolution to `continuous_at_zero` on this panel). T20's "Composite pretest workflow" Extensions bullet updated with a forward-pointer to T21. T22 weighted/survey HAD tutorial remains queued as a separate notebook PR. - **`ChaisemartinDHaultfoeuille.by_path` and `paths_of_interest` now compose with `survey_design`** for analytical Binder TSL SE and replicate-weight bootstrap variance. The `NotImplementedError` gate at `chaisemartin_dhaultfoeuille.py:1233-1239` is replaced by a per-path multiplier-bootstrap-only gate (`survey_design + n_bootstrap > 0` under by_path / paths_of_interest still raises, since the survey-aware perturbation pivot for path-restricted IFs is methodologically underived). Per-path SE routes through the existing `_survey_se_from_group_if` cell-period allocator: the per-period IF (`U_pp_l_path`) is built with non-path switcher-side contributions skipped (control contributions are unchanged, matching the joiners/leavers IF convention; preserves the row-sum identity `U_pp.sum(axis=1) == U`), cohort-recentered via `_cohort_recenter_per_period`, then expanded to observations as `psi_i = U_pp[g_i, t_i] · (w_i / W_{g_i, t_i})`. Replicate-weight designs unconditionally use the cell allocator (Class A contract from PR #323). New `_refresh_path_inference` helper post-call refreshes `safe_inference` on every populated entry across `multi_horizon_inference`, `placebo_horizon_inference`, `path_effects`, and `path_placebos` so all four surfaces use the same final `df_survey` after per-path replicate fits append `n_valid` to the shared accumulator. Path-enumeration ranking under `survey_design` remains unweighted (group-cardinality, not population-weight mass). Lonely-PSU policy stays sample-wide, not per-path. Telescope invariant: on a single-path panel, per-path SE matches the global non-by_path survey SE bit-exactly. **No R parity** — R `did_multiplegt_dyn` does not support survey weighting; this is a Python-only methodology extension. The global non-by_path TSL multiplier-bootstrap path is unaffected (anti-regression test `tests/test_chaisemartin_dhaultfoeuille.py::TestByPathSurveyDesignAnalytical::test_global_survey_plus_n_bootstrap_still_works` locks the per-path-only scope of the new gate). Cross-surface invariants regression-tested at `TestByPathSurveyDesignAnalytical` (~17 tests across gate / dispatch / analytical SE / replicate-weight SE / per-path placebos / `trends_linear` composition / unobserved-path warnings / final-df refresh regressions) and `TestByPathSurveyDesignTelescope`. See `docs/methodology/REGISTRY.md` §`ChaisemartinDHaultfoeuille` `Note (Phase 3 by_path ...)` → "Per-path survey-design SE" for the full contract. - **Inference-field aliases on staggered result classes** for adapter / external-consumer compatibility. Read-only `@property` aliases expose the flat `att` / `se` / `conf_int` / `p_value` / `t_stat` names (matching `DiDResults` / `TROPResults` / `SyntheticDiDResults` / `HeterogeneousAdoptionDiDResults`) on every result class that previously only carried prefixed canonical fields: `CallawaySantAnnaResults`, `StackedDiDResults`, `EfficientDiDResults`, `ChaisemartinDHaultfoeuilleResults`, `StaggeredTripleDiffResults`, `WooldridgeDiDResults`, `SunAbrahamResults`, `ImputationDiDResults`, `TwoStageDiDResults` (mapping to `overall_*`); `ContinuousDiDResults` (mapping to `overall_att_*`, ATT-side as the headline, ACRT-side accessible unchanged via `overall_acrt_*`); `MultiPeriodDiDResults` (mapping to `avg_*`). `ContinuousDiDResults` additionally exposes `overall_se` / `overall_conf_int` / `overall_p_value` / `overall_t_stat` aliases for naming consistency with the rest of the staggered family. Aliases are pure read-throughs over the canonical fields — no recomputation, no behavior change — so the `safe_inference()` joint-NaN contract (per CLAUDE.md "Inference computation") is inherited automatically (NaN canonical → NaN alias, locked at `tests/test_result_aliases.py::test_pattern_b_aliases_propagate_nan`). The native `overall_*` / `overall_att_*` / `avg_*` fields remain canonical for documentation and computation. Motivated by the `balance.interop.diff_diff.as_balance_diagnostic()` adapter (`facebookresearch/balance` PR #465) which calls `getattr(res, "se", None)` / `getattr(res, "conf_int", None)` without a fallback chain — pre-alias, every staggered result class returned `None` on those keys, silently dropping `se` and `conf_int` from the adapter's diagnostic dict. 23 alias-mechanic + balance-adapter regression tests at `tests/test_result_aliases.py`. Patch-level (additive on stable surfaces). diff --git a/diff_diff/conley.py b/diff_diff/conley.py index 02c069e5..bfe673a7 100644 --- a/diff_diff/conley.py +++ b/diff_diff/conley.py @@ -86,10 +86,18 @@ def _pairwise_distance_matrix(coords: np.ndarray, metric) -> np.ndarray: def _bartlett_kernel(u: np.ndarray) -> np.ndarray: - """Bartlett (linear taper) kernel: K(u) = max(0, 1 - |u|). - - Conley (1999) Eq 3.14 + Andrews (1991). PSD-guaranteed (non-negative - spectral window), so the resulting Conley meat is PSD. + """Bartlett (linear taper) kernel on pairwise distance: K(u) = max(0, 1 - |u|). + + This is the radial 1-D specialization of Conley (1999)'s Bartlett window + that R ``conleyreg`` (Düsterhöft 2021), Stata ``acreg`` (Colella et al. + 2019), and Hsiang (2010) all use as their Bartlett path. Conley's + explicit PSD formula (Eq 3.14, page 12) is the **2-D separable product + window** ``K(j, k) = (1 - |j|/L_M)(1 - |k|/L_N)`` indexed on a lattice; + the 1-D radial form on pairwise distance is a practitioner specialization + that is not explicitly written in the paper and is therefore **not + PSD-guaranteed**. The caller checks the resulting meat for indefiniteness + and emits a ``UserWarning`` if the smallest eigenvalue is materially + negative (regardless of kernel). """ return np.maximum(0.0, 1.0 - np.abs(u)) @@ -99,7 +107,7 @@ def _uniform_kernel(u: np.ndarray) -> np.ndarray: Cited as White (1980) truncated estimator; Conley (1999) page 11. Easier to interpret than Bartlett but the spectral window is negative in regions - (Conley 1999 footnote 11), so the resulting meat is NOT guaranteed PSD. + (Conley 1999 footnote 11), so the resulting meat is not guaranteed PSD. Caller emits ``UserWarning`` if any meat eigenvalue is materially negative. """ return (np.abs(u) <= 1.0).astype(np.float64) @@ -128,8 +136,10 @@ def _validate_conley_kwargs( if coords is None: raise ValueError( "vcov_type='conley' requires conley_coords (n×2 array of [lat, lon] " - "or projected coords). Pass via TwoWayFixedEffects(conley_coords=...) " - "or compute_robust_vcov(conley_coords=...)." + "or projected coords). Pass via LinearRegression(conley_coords=...) " + "or compute_robust_vcov(conley_coords=...) on a cross-sectional " + "design (Phase 1 supports cross-sectional Conley only; panel " + "estimators are deferred to Phase 2)." ) coords_arr = np.asarray(coords, dtype=np.float64) if coords_arr.ndim != 2 or coords_arr.shape[1] != 2: @@ -209,9 +219,13 @@ def _compute_conley_vcov( Notes ----- - For ``kernel == "uniform"`` the meat is not guaranteed PSD (Conley 1999 - footnote 11); a ``UserWarning`` is emitted if the smallest meat eigenvalue - is materially negative (< -1e-12). + Neither the uniform kernel (negative spectral regions, Conley 1999 + footnote 11) nor the **radial 1-D Bartlett** specialization implemented + here is PSD-guaranteed. Conley's explicit PSD formula (Eq 3.14) is the + 2-D separable product window on a lattice; the radial pairwise form is + a practitioner specialization (R ``conleyreg``, Stata ``acreg``, Hsiang + 2010) that is not formally PSD. We emit a ``UserWarning`` if the smallest + meat eigenvalue is materially negative (< -1e-12) regardless of kernel. """ coords_arr = np.asarray(coords, dtype=np.float64) D = _pairwise_distance_matrix(coords_arr, metric) @@ -243,18 +257,25 @@ def _compute_conley_vcov( "score matrix for NaN/Inf." ) - # PSD guard for uniform kernel (Conley 1999 fn 11) - if kernel == "uniform": - eigvals = np.linalg.eigvalsh(meat) - if eigvals.size and eigvals.min() < -1e-12: - warnings.warn( - f"Conley meat with uniform kernel has a negative eigenvalue " - f"({eigvals.min():.2e}); the variance estimator is not " - "guaranteed PSD. Consider conley_kernel='bartlett' (PSD by " - "construction).", - UserWarning, - stacklevel=3, - ) + # PSD guard. Neither the uniform kernel (Conley 1999 fn 11) nor the + # radial 1-D Bartlett specialization is formally PSD-guaranteed — + # Conley's explicit PSD Bartlett formula (Eq 3.14) is the 2-D separable + # product window, not the 1-D radial pairwise form that R `conleyreg`, + # Stata `acreg`, and this implementation use. Check both kernels. + eigvals = np.linalg.eigvalsh(meat) + if eigvals.size and eigvals.min() < -1e-12: + warnings.warn( + f"Conley meat with conley_kernel={kernel!r} has a materially " + f"negative eigenvalue ({eigvals.min():.2e}); the variance " + "estimator is not guaranteed PSD on this design. Both " + "supported kernels (radial bartlett and uniform) are " + "practitioner specializations of Conley 1999 and are not " + "formally PSD-guaranteed; consider varying conley_cutoff_km " + "or reviewing the design for collinearity / degenerate " + "residual structure.", + UserWarning, + stacklevel=3, + ) # Sandwich via two solves (mirrors _compute_cr2_bm pattern in linalg.py) try: diff --git a/diff_diff/guides/llms-full.txt b/diff_diff/guides/llms-full.txt index ce9f4756..4de6b279 100644 --- a/diff_diff/guides/llms-full.txt +++ b/diff_diff/guides/llms-full.txt @@ -1918,8 +1918,10 @@ se = np.sqrt(np.diag(reg.vcov_)) Var̂(β) = (X'X)^{-1} · ( Σ_{i,j} K(d_ij / h) · X_i ε_i ε_j X_j' ) · (X'X)^{-1} **Kernels:** -- `"bartlett"` (default): `K(u) = max(0, 1 - |u|)`. PSD-guaranteed. -- `"uniform"`: `K(u) = 1{|u| ≤ 1}`. Easier to interpret; emits `UserWarning` if the resulting meat has a materially negative eigenvalue. +- `"bartlett"` (default): `K(u) = max(0, 1 - |u|)` on pairwise distance `d_ij/h`. The radial 1-D form, matching R `conleyreg` / Stata `acreg`. Conley 1999's explicit PSD-guaranteed Bartlett formula (Eq 3.14) is the 2-D **separable product** window on a lattice; the 1-D radial specialization that diff-diff implements is a practitioner convention and is not formally PSD-guaranteed. +- `"uniform"`: `K(u) = 1{|u| ≤ 1}`. Easier to interpret. + +Both kernels: `UserWarning` is emitted if the resulting meat has a materially negative eigenvalue (< -1e-12) — neither kernel is formally PSD-guaranteed in the radial 1-D pairwise-distance form. **Distance metrics:** - `"haversine"` (default): great-circle in km, Earth's mean radius 6371.01 km (matching R `conleyreg`). Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index ca742b85..16c35f90 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -2956,8 +2956,10 @@ where `d_ij` is the geographic distance, `h` is the user-supplied bandwidth the standard White HC0 term `X_i ε_i² X_i'`. **Kernel functions:** -- `conley_kernel="bartlett"` (default): `K(u) = max(0, 1 - |u|)`. Conley 1999 Eq 3.14, Andrews 1991. PSD-guaranteed (non-negative spectral window). -- `conley_kernel="uniform"`: `K(u) = 1{|u| ≤ 1}`. Spectral window negative in regions (Conley 1999 footnote 11) — meat not guaranteed PSD; implementation emits `UserWarning` if any meat eigenvalue < `-1e-12`. +- `conley_kernel="bartlett"` (default): `K(u) = max(0, 1 - |u|)` evaluated on the pairwise distance `d_ij/h`. The radial 1-D form on pairwise distance, matching R `conleyreg`, Stata `acreg` (Colella et al. 2019), and Hsiang (2010). +- `conley_kernel="uniform"`: `K(u) = 1{|u| ≤ 1}`. Conley 1999 page 11; spectral window negative in regions (footnote 11). + +**Note (deviation / source specialization):** Conley 1999's explicitly PSD-guaranteed Bartlett formula (Eq 3.14, page 12) is the **2-D separable product window** `K(j, k) = (1 - |j|/L_M)(1 - |k|/L_N)` indexed on a lattice. The 1-D radial form on pairwise distance that diff-diff implements (matching R `conleyreg`) is a practitioner specialization that is not explicitly written in the paper and is therefore **not formally PSD-guaranteed**. We apply the same indefiniteness check to both kernels: a `UserWarning` is emitted if any meat eigenvalue is materially negative (< `-1e-12`). **Distance metrics:** - `conley_metric="haversine"` (default): great-circle in km using Earth's mean radius (6371.01 km, matching R `conleyreg`). Validates `lat ∈ [-90, 90]`, `lon ∈ [-180, 180]`. diff --git a/docs/methodology/papers/conley-1999-review.md b/docs/methodology/papers/conley-1999-review.md index abd49c49..011d96ee 100644 --- a/docs/methodology/papers/conley-1999-review.md +++ b/docs/methodology/papers/conley-1999-review.md @@ -268,7 +268,10 @@ The paper itself does NOT distribute code. Conley's Section 5 empirical example - **`λ = E W_s` factor in Equation 3.12.** In the lattice formulation, `C = λ^{-1} V` accounts for the fraction of lattice points actually sampled. In the practitioner pairwise form `Σ_{i,j} K(d_{ij}/h) X_i ε̂_i ε̂_j X_j'`, this factor is absorbed by the change of indexing (sum over actual observations rather than over lattice points). The implementing engineer should NOT multiply by `1/λ` in the pairwise form; this is already handled by summing over the realized sample. -- **PSD failure for the truncated/uniform kernel** (footnote 11, page 11). Conley's exact wording: "This estimator will not always be PSD, unfortunately, since the spectral window corresponding to the step function space domain window (its Fourier transform) will be negative in some regions." Implementation guidance: when `conley_kernel="uniform"`, compute the eigenvalues of `Var̂(β̂)` after sandwich and if `min(eig) < 0`, either (a) warn and proceed (matches `acreg`), (b) clamp to PSD via eigendecomposition + zero-floor, or (c) redirect to Bartlett. Phase 1 plan: warn and proceed (option a) to match downstream-tool expectations. +- **PSD failure for both supported kernels under the radial 1-D specialization.** Two distinct sources: + - *Uniform/truncated kernel* (footnote 11, page 11): Conley's exact wording: "This estimator will not always be PSD, unfortunately, since the spectral window corresponding to the step function space domain window (its Fourier transform) will be negative in some regions." + - *Radial 1-D Bartlett* (the form diff-diff implements, matching R `conleyreg` / Stata `acreg`): Conley's explicit PSD-guaranteed Bartlett formula (Eq 3.14, page 12) is the 2-D **separable product window** `(1 - |j|/L_M)(1 - |k|/L_N)`, NOT the 1-D radial form on pairwise distance. The radial specialization is a practitioner convention (see "Pairwise (1-D) Bartlett" line above) that is not formally PSD-guaranteed. + Implementation guidance: under either kernel, compute the eigenvalues of the meat after sandwich and if `min(eig) < -1e-12`, either (a) warn and proceed (matches `acreg` and `conleyreg`), (b) clamp to PSD via eigendecomposition + zero-floor, or (c) redirect to a separable 2-D product kernel (Phase 2 + space-time extension). Phase 1 plan: warn and proceed (option a) for both kernels to match downstream-tool expectations. - **The empirical example uses cross-country growth regressions (page 20)**, NOT a DiD or panel setup. Conley does not work out the panel TWFE specialization in the paper. The diff-diff Phase 1 implementation extends Conley's machinery to TWFE OLS (which is a linear regression with absorbed fixed effects) - this is mechanically straightforward but the methodological extension warrants a citation to a downstream paper (e.g., Cameron-Miller 2015 review article, or Bester-Conley-Hansen 2011 spatial cluster bootstrap) in REGISTRY.md. diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py index f4e306a8..8e9d5f65 100644 --- a/tests/test_conley_vcov.py +++ b/tests/test_conley_vcov.py @@ -407,6 +407,71 @@ def test_uniform_kernel_negative_eigenvalue_warns(self): bread_matrix=bread, ) + def test_indefinite_meat_warning_fires_for_bartlett(self): + """Both kernels (radial 1-D bartlett and uniform) are practitioner + specializations of Conley 1999 and are NOT formally PSD-guaranteed + (Conley's explicit PSD formula is the 2-D separable product window, + Eq 3.14, not the 1-D radial form). The PSD guard must therefore + fire for bartlett too, not just uniform. + + Forces the indefinite path by monkey-patching `_bartlett_kernel` to + return a kernel matrix with an aggressive negative off-diagonal, + making the resulting meat indefinite. Verifies the warning surfaces + with the kernel name in the message. + """ + from diff_diff import conley as conley_mod + + rng = np.random.default_rng(seed=11) + n = 6 + coords = rng.uniform(0, 1, size=(n, 2)) + X = np.column_stack([np.ones(n), rng.standard_normal(n)]) + eps = np.ones(n) # non-zero residuals so meat is non-zero + bread = X.T @ X + + # Patch the bartlett kernel to return a sign-pattern that DEFINITELY + # produces an indefinite meat. The native bartlett is non-negative; + # injecting large negative off-diagonals breaks the + # PSD-by-non-negative-window heuristic. + original = conley_mod._bartlett_kernel + + def _indefinite(u: np.ndarray) -> np.ndarray: + base = np.eye(u.shape[0]) + # Aggressive negative off-diagonals; this kernel is itself + # indefinite and so is S.T @ K @ S for generic S. + for i in range(u.shape[0]): + for j in range(u.shape[0]): + if i != j: + base[i, j] = -10.0 + return base + + try: + conley_mod._bartlett_kernel = _indefinite + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + conley_mod._compute_conley_vcov( + X, + eps, + coords, + cutoff=10.0, + metric="euclidean", + kernel="bartlett", + bread_matrix=bread, + ) + # Verify a PSD warning fired naming the bartlett kernel + psd_warnings = [ + msg + for msg in w + if issubclass(msg.category, UserWarning) + and "bartlett" in str(msg.message) + and "negative eigenvalue" in str(msg.message) + ] + assert len(psd_warnings) >= 1, ( + f"Expected a UserWarning naming kernel='bartlett' and " + f"'negative eigenvalue'; got {[str(m.message) for m in w]}" + ) + finally: + conley_mod._bartlett_kernel = original + # --------------------------------------------------------------------------- # TestConleyReductions — Bartlett+tiny cutoff → HC0 meat; etc. @@ -685,9 +750,15 @@ def test_linear_regression_conley_with_survey_design_raises(self, fit_data): class TestConleyEstimatorIntegration: - """Step 4 smoke tests: DifferenceInDifferences and MultiPeriodDiD accept - vcov_type='conley' with the conley_* kwargs and produce finite SEs. - Also tests that summary() prints the Conley label.""" + """Panel-estimator rejection tests for vcov_type='conley'. + + DiD and MultiPeriodDiD reject Conley at fit-time in Phase 1 because + cross-sectional Conley over (unit, time) rows mishandles same-unit + cross-time pairs (d_ij = 0 -> K = 1). The supported Phase 1 path for + Conley is direct compute_robust_vcov / LinearRegression on a single- + period regression. Phase 2 will add the space-time product kernel and + lift the rejection. + """ @pytest.fixture def two_period_panel(self): From df11d250f18a7a832db3f407d5001e659515e92c Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 15:03:34 -0400 Subject: [PATCH 6/9] Document conley_* kwargs in compute_robust_vcov docstring Address P3 doc-drift finding from CI Codex review of PR #411 R-final. The function-level docstring already lists Conley in the dispatch description, but the Parameters section's `vcov_type` enum line and the four `conley_*` keyword-only kwargs were not separately documented. Added `"conley"` to the enum and four new parameter entries (``conley_coords``, ``conley_cutoff_km``, ``conley_metric``, ``conley_kernel``) describing the cross-sectional-only Phase 1 contract and the both-kernel indefiniteness warning. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/linalg.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index 10138895..2f36163c 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -1218,7 +1218,30 @@ def compute_robust_vcov( weight_type : str, default "pweight" Weight type: "pweight", "fweight", or "aweight". vcov_type : str, default "hc1" - One of ``"classical"``, ``"hc1"``, ``"hc2"``, ``"hc2_bm"``. + One of ``"classical"``, ``"hc1"``, ``"hc2"``, ``"hc2_bm"``, + ``"conley"`` (see top-level docstring above for the dispatch + contract). + conley_coords : ndarray of shape (n, 2), optional, keyword-only + Required when ``vcov_type="conley"``. Two-column array of + ``[lat, lon]`` (degrees, for ``conley_metric="haversine"``) or + projected coordinates (for ``conley_metric="euclidean"`` or a + callable metric). Raises ``ValueError`` when missing under Conley. + conley_cutoff_km : float, optional, keyword-only + Required when ``vcov_type="conley"``. Positive finite bandwidth in + km (haversine) or coord units (euclidean / callable). No default + per Conley 1999 Section 5's sensitivity-grid recommendation; + raises ``ValueError`` when missing under Conley. + conley_metric : str, default "haversine", keyword-only + Distance metric for Conley. ``"haversine"`` (lat/lon → km, Earth + radius 6371.01 matching R ``conleyreg``), ``"euclidean"`` (any + units), or a callable ``f(coords1, coords2) -> n×n``. + conley_kernel : str, default "bartlett", keyword-only + Conley kernel on pairwise distance ``d_ij/h``. ``"bartlett"`` is + the radial 1-D specialization (matching R ``conleyreg``); + ``"uniform"`` is the truncated indicator. Both kernels emit a + ``UserWarning`` if the resulting meat is materially indefinite — + neither is formally PSD-guaranteed in the radial form (see + ``docs/methodology/REGISTRY.md`` § ConleySpatialHAC for details). return_dof : bool, default False When True, returns ``(vcov, dof_vec)`` tuple. ``dof_vec`` is a length-k array of per-coefficient degrees of freedom. For ``classical``, From c16753dbc9fa29b8e89acf7a404a09e407366c08 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 15:14:16 -0400 Subject: [PATCH 7/9] Remove dead Conley scaffolding from panel estimators; align paper review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two P3 findings from CI Codex review of PR #411: P3 (Maintainability) — DiD/MultiPeriodDiD/TWFE all reject vcov_type="conley" unconditionally at fit-time, but each fit() still materialized `_conley_coords_array` from data and forwarded `conley_coords`, `conley_cutoff_km`, `conley_metric`, `conley_kernel` to LinearRegression / solve_ols. Those code paths were unreachable behind the unconditional NotImplementedError raise. Removed the dead extraction + arg-passes from all three estimators. The constructor still accepts the conley_* kwargs for sklearn-style API symmetry (set_params/get_params round-trip works); they have no effect on the panel paths. P3 (Documentation) — `docs/methodology/papers/conley-1999-review.md` Requirements checklist and Tuning Parameters table still said the Bartlett kernel is "PSD by construction" and only flagged uniform as needing the negative-eigenvalue warning. Updated both surfaces to spell out the radial 1-D pairwise specialization vs Conley's explicit 2-D separable PSD lattice formula (Eq 3.14) and to apply the warning to both kernels — matching the registry and the runtime contract. 271 targeted regression tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/estimators.py | 22 ------------------- diff_diff/twfe.py | 17 -------------- docs/methodology/papers/conley-1999-review.md | 6 ++--- 3 files changed, 3 insertions(+), 42 deletions(-) diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index f000a822..464baae3 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -439,13 +439,6 @@ def fit( # For wild bootstrap, we don't need cluster SEs from the initial fit cluster_ids = data[self.cluster].values if self.cluster is not None else None - # Extract Conley coords array (n×2 float64) from the user's data. - # Validation of the column existence and the 2-tuple shape happened - # at the top of fit(); here we only need to materialize the array. - _conley_coords_array = None - if self.vcov_type == "conley" and self.conley_coords is not None: - _conley_coords_array = data[list(self.conley_coords)].to_numpy(dtype=np.float64) - # When survey PSU is present, it overrides cluster for variance estimation effective_cluster_ids = _resolve_effective_cluster( resolved_survey, cluster_ids, self.cluster @@ -487,10 +480,6 @@ def fit( weight_type=survey_weight_type, survey_design=_lr_survey, vcov_type=_fit_vcov_type, - conley_coords=_conley_coords_array, - conley_cutoff_km=self.conley_cutoff_km, - conley_metric=self.conley_metric, - conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=n_absorbed_effects) coefficients = reg.coefficients_ @@ -1538,13 +1527,6 @@ def fit( # type: ignore[override] # Remap implicit "classical" + cluster to CR1 (legacy backward compat). _fit_vcov_type = self._resolve_effective_vcov_type(effective_cluster_ids) - # Extract Conley coords array (only when vcov_type='conley'; the - # estimator-level guards above already validated the column-name - # tuple against `data`). - _conley_coords_array_mp = None - if _fit_vcov_type == "conley" and self.conley_coords is not None: - _conley_coords_array_mp = data[list(self.conley_coords)].to_numpy(dtype=np.float64) - # Note: Wild bootstrap for multi-period effects is complex (multiple coefficients) # For now, we use analytical inference even if inference="wild_bootstrap" coefficients, residuals, fitted, vcov = solve_ols( @@ -1558,10 +1540,6 @@ def fit( # type: ignore[override] weights=survey_weights, weight_type=survey_weight_type, vcov_type=_fit_vcov_type, - conley_coords=_conley_coords_array_mp, - conley_cutoff_km=self.conley_cutoff_km, - conley_metric=self.conley_metric, - conley_kernel=self.conley_kernel, ) # Compute survey vcov if applicable diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index d0eeff46..f715a566 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -325,15 +325,6 @@ def fit( # type: ignore[override] # single source of truth. _fit_vcov_type = self._resolve_effective_vcov_type(survey_cluster_ids) - # Materialize Conley coords from data (validated above; this is just - # array extraction). NOTE: data passed to LinearRegression is the - # within-transformed matrix, but coords are still in the ORIGINAL - # row order — within-transformation preserves row ordering, so the - # coords align with the demeaned X 1:1. - _twfe_conley_coords = None - if _fit_vcov_type == "conley" and self.conley_coords is not None: - _twfe_conley_coords = data[list(self.conley_coords)].to_numpy(dtype=np.float64) - if self.rank_deficient_action == "error": reg = LinearRegression( include_intercept=False, @@ -344,10 +335,6 @@ def fit( # type: ignore[override] weight_type=survey_weight_type, survey_design=_lr_survey_twfe, vcov_type=_fit_vcov_type, - conley_coords=_twfe_conley_coords, - conley_cutoff_km=self.conley_cutoff_km, - conley_metric=self.conley_metric, - conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=df_adjustment) else: # Suppress generic warning, TWFE provides context-specific messages below @@ -364,10 +351,6 @@ def fit( # type: ignore[override] weight_type=survey_weight_type, survey_design=_lr_survey_twfe, vcov_type=_fit_vcov_type, - conley_coords=_twfe_conley_coords, - conley_cutoff_km=self.conley_cutoff_km, - conley_metric=self.conley_metric, - conley_kernel=self.conley_kernel, ).fit(X, y, df_adjustment=df_adjustment) coefficients = reg.coefficients_ diff --git a/docs/methodology/papers/conley-1999-review.md b/docs/methodology/papers/conley-1999-review.md index 011d96ee..9d71ef8c 100644 --- a/docs/methodology/papers/conley-1999-review.md +++ b/docs/methodology/papers/conley-1999-review.md @@ -214,10 +214,10 @@ The paper itself does NOT distribute code. Conley's Section 5 empirical example - [ ] Coordinates supplied as two columns (lat, lon) or `(x, y)` projected. - [ ] Distance metric configured (haversine for lat/lon; euclidean for projected; callable for custom). - [ ] Cutoff `conley_cutoff_km > 0` (or unitless `conley_cutoff` for euclidean). Document that `h = 0` reduces to HC0. -- [ ] Kernel choice `conley_kernel ∈ {"bartlett", "uniform"}`. Bartlett is PSD by construction; uniform is not in general (warn). +- [ ] Kernel choice `conley_kernel ∈ {"bartlett", "uniform"}`. Conley's explicit PSD Bartlett (Eq 3.14) is the 2-D separable lattice product window; the radial 1-D pairwise Bartlett that diff-diff and R `conleyreg` implement is a practitioner specialization that is **not** formally PSD-guaranteed. Uniform is also not PSD in general. Apply the negative-eigenvalue warning to **both** kernels. - [ ] Score outer products `x_i ε̂_i` computed identically to HC0 path. - [ ] Robustness sweep: document that practitioners should report estimates at multiple cutoffs (Conley Section 5 standard). -- [ ] If `conley_kernel="uniform"` and the resulting variance has any negative eigenvalues, warn or fall back to Bartlett. +- [ ] If the resulting Conley meat / variance has any materially negative eigenvalues (under either Bartlett or uniform), warn the user (the implementation does this for both kernels). --- @@ -242,7 +242,7 @@ The paper itself does NOT distribute code. Conley's Section 5 empirical example | `vcov_method` | str | `"hc0"` | Set to `"conley"` to activate. | | `conley_coords` | tuple of 2 str | `None` | User specifies the two column names for lat/lon (or projected x/y). Required when `vcov_method="conley"`. | | `conley_cutoff_km` | float | `None` (no default) | User-supplied. Conley does not provide a plug-in selector. Recommend a robustness sweep (3-5 values spanning the relevant economic-distance range). For Phase 1, error if not supplied. | -| `conley_kernel` | str | `"bartlett"` | `"bartlett"` is PSD by construction (Conley Eq 3.14 page 12) and is the practitioner default. `"uniform"` matches Conley's "truncated window" (page 11) but may fail PSD; emit warning. | +| `conley_kernel` | str | `"bartlett"` | `"bartlett"` evaluated on pairwise distance `d_ij/h` is the practitioner default, matching R `conleyreg` and Stata `acreg`; this radial 1-D form is a specialization of Conley's explicit 2-D separable PSD-guaranteed Bartlett (Eq 3.14, page 12) and is not formally PSD-guaranteed itself. `"uniform"` matches Conley's "truncated window" (page 11) and is also not PSD in general (footnote 11). Emit a warning under either kernel when the resulting meat has a materially negative eigenvalue. | | `conley_metric` | str or callable | `"haversine"` | `"haversine"` for lat/lon (km); `"euclidean"` for projected coords (units = whatever the coord units are - so if coords are degrees, cutoff is in degrees); a callable `(coord_i, coord_j) -> float` for custom metrics (e.g., travel time, network distance). | ### Relation to Existing diff-diff Estimators From 5806b33a7424d7dda4aab48632bedee2f6306563 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 10 May 2026 15:41:05 -0400 Subject: [PATCH 8/9] Final P3 cleanup: remove unreachable TWFE conley auto-cluster + fix resolve_vcov_type docstring Address two P3 documentation/maintenance findings from CI Codex review of PR #411 R-final: - twfe.py:234-240: an `elif self.vcov_type == "conley"` branch in the cluster-resolution chain disabled the auto-cluster default for Conley. Unreachable behind the unconditional Conley reject at twfe.py:146-163. Removed. - linalg.py::resolve_vcov_type docstring: the "Rules" section and Returns docstring listed only `{"classical", "hc1", "hc2", "hc2_bm"}`, omitting `"conley"` even though `_VALID_VCOV_TYPES` includes it and the runtime path already accepts it. Updated both surfaces; the source of truth pointer (`_VALID_VCOV_TYPES`) is now named explicitly so future enum additions don't need a docstring sweep. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/linalg.py | 7 ++++--- diff_diff/twfe.py | 7 ------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index 2f36163c..e4b96df0 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -1112,8 +1112,9 @@ def resolve_vcov_type( - If ``vcov_type`` is ``None``: map ``robust=True`` to ``"hc1"`` and ``robust=False`` to ``"classical"``. - - If ``vcov_type`` is supplied: it must be one of - ``{"classical", "hc1", "hc2", "hc2_bm"}``. + - If ``vcov_type`` is supplied: it must be one of the values in the + module-level ``_VALID_VCOV_TYPES`` set, namely + ``{"classical", "hc1", "hc2", "hc2_bm", "conley"}``. - If ``robust=False`` is supplied together with a non-``"classical"`` ``vcov_type``, raise ``ValueError`` - the combination is ambiguous. @@ -1127,7 +1128,7 @@ def resolve_vcov_type( Returns ------- str - One of ``"classical"``, ``"hc1"``, ``"hc2"``, ``"hc2_bm"``. + One of ``"classical"``, ``"hc1"``, ``"hc2"``, ``"hc2_bm"``, ``"conley"``. Raises ------ diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index f715a566..8fefa831 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -231,13 +231,6 @@ def fit( # type: ignore[override] # Explicit classical + analytical inference: drop the auto-cluster # so the validator doesn't reject ``cluster_ids + classical``. cluster_var = None - elif self.vcov_type == "conley": - # Conley + TWFE: disable the auto-cluster default. Conley + cluster - # is deferred to Phase 2; the user wants spatial-HAC at the unit - # level via the kernel, not cluster-robust at the unit level. The - # explicit-cluster case is rejected upstream; reaching here means - # cluster=None. - cluster_var = None else: cluster_var = unit From 5acef60cb5418859d9fab662bb3c1cc85642aa1a Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 11 May 2026 17:25:04 -0400 Subject: [PATCH 9/9] Align public docstrings with Phase 1 Conley contract; drop redundant guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address P2/P3 findings from CI Codex review of PR #411 R-rebased: P2 — Public docstrings for the cross-sectional supported APIs and the rejected panel surfaces were missing or stale: - `LinearRegression.__init__` and `solve_ols()` docstrings now document `conley_coords`, `conley_cutoff_km`, `conley_metric`, `conley_kernel` (the four newly added kwargs) plus the cluster/weights/survey rejection contract, mirroring the `compute_robust_vcov` docstring. - `SyntheticDiD` class docstring gains a `Notes (Conley spatial-HAC rejection)` block stating that `vcov_type` and `conley_*` kwargs raise `TypeError` at __init__ / set_params, with the bootstrap-variance rationale. - `TwoWayFixedEffects` class docstring gains a paragraph on the Phase 1 panel rejection (parallel to the existing HC2/Bell-McCaffrey paragraph), including the sklearn-style API-symmetry rationale for why constructor kwargs are inherited from DifferenceInDifferences. P3 — Two cleanups: - Removed the redundant `MultiPeriodDiD(absorb=..., vcov_type="conley")` pre-guard; the unconditional Conley reject immediately after covered the same path and the special-cased message was misleading (it told users to "drop absorb=" even though dropping absorb= would NOT make Conley available on MultiPeriodDiD). - Renamed `test_bartlett_psd_on_random_distances` to `test_bartlett_kernel_finite_and_in_unit_interval`. The original name encoded a stronger property than the methodology contract guarantees (radial 1-D Bartlett is a practitioner specialization, not PSD- guaranteed). The renamed test asserts finite / symmetric / [0, 1]- bounded instead; the both-kernel indefiniteness warning is locked separately by `test_indefinite_meat_warning_fires_for_bartlett`. Doc surfaces: - `docs/methodology/papers/colella-et-al-2019-review.md`: updated the Phase 1 parity target paragraph to state that Phase 1 ships only cross-sectional Conley with R `conleyreg` parity; Stata `acreg` TWFE parity is a Phase 2 target. - PR body summary rewritten to match the shipped contract: cross- sectional `LinearRegression` / `compute_robust_vcov` supported, DiD / MultiPeriodDiD / TWFE reject at fit-time. Co-Authored-By: Claude Opus 4.7 (1M context) --- diff_diff/estimators.py | 14 ++--- diff_diff/linalg.py | 55 +++++++++++++++++++ diff_diff/synthetic_did.py | 13 +++++ diff_diff/twfe.py | 14 +++++ .../papers/colella-et-al-2019-review.md | 2 +- tests/test_conley_vcov.py | 19 +++++-- 6 files changed, 102 insertions(+), 15 deletions(-) diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py index 464baae3..1cab604f 100644 --- a/diff_diff/estimators.py +++ b/diff_diff/estimators.py @@ -1374,19 +1374,13 @@ def fit( # type: ignore[override] "switch to fixed_effects= dummies for a full-dummy design." ) - # Reject Conley combinations early at the estimator level (see - # DifferenceInDifferences.fit for the matching block and rationale). - if absorb and self.vcov_type == "conley": - raise NotImplementedError( - "MultiPeriodDiD(absorb=..., vcov_type='conley') is deferred " - "to a follow-up. Use fixed_effects= dummies for an equivalent " - "FE design with the full projection, or drop absorb= for " - "cross-sectional Conley." - ) # MultiPeriodDiD is intrinsically a multi-period panel estimator; # cross-sectional Conley does not apply (same rationale as # DifferenceInDifferences.fit's panel guard above). Phase 2 will - # add a documented space-time HAC. + # add a documented space-time HAC. The rejection is unconditional + # — `absorb` and other Conley-adjacent kwargs cannot make + # MultiPeriodDiD Conley-compatible because the panel structure is + # the load-bearing reason Phase 1 cannot apply Conley here. if self.vcov_type == "conley": raise NotImplementedError( "MultiPeriodDiD(vcov_type='conley') is deferred to Phase 2 " diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index e4b96df0..02e864d8 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -520,6 +520,30 @@ def solve_ols( raises ``NotImplementedError`` because the BM DOF helper is inconsistent with ``solve_ols``'s WLS transform. Tracked in ``TODO.md``. + - ``"conley"``: Conley (1999) spatial-HAC sandwich. Requires + ``conley_coords`` (n × 2 array) and ``conley_cutoff_km`` (positive + bandwidth, no default per Conley 1999 Section 5's sensitivity-grid + recommendation). Combining with ``cluster_ids`` or ``weights`` + raises ``NotImplementedError`` (combined product kernel + Bertanha- + Imbens 2014 weighted-Conley deferred to Phase 2+). Cross-sectional + one-way only. + conley_coords : ndarray of shape (n, 2), optional + Required when ``vcov_type="conley"``. Two-column array of + ``[lat, lon]`` (degrees, for ``conley_metric="haversine"``) or + projected coordinates (for ``conley_metric="euclidean"`` / callable + metric). + conley_cutoff_km : float, optional + Required when ``vcov_type="conley"``. Positive finite bandwidth in + km (haversine) or coord units (euclidean / callable). + conley_metric : {"haversine", "euclidean", callable}, default "haversine" + Distance metric. Haversine uses Earth's mean radius 6371.01 km + (matching R ``conleyreg``). Euclidean treats coords as already + projected. Callable signature ``(coords1, coords2) -> n×n``. + conley_kernel : {"bartlett", "uniform"}, default "bartlett" + Kernel evaluated on pairwise distance ``d_ij/h``. Both kernels emit + a ``UserWarning`` if the resulting meat is materially indefinite; + the radial 1-D Bartlett (matching R ``conleyreg``) is not formally + PSD-guaranteed — see :func:`compute_robust_vcov`. Returns ------- @@ -2363,6 +2387,37 @@ class LinearRegression: sandwich, the class stores per-coefficient BM Satterthwaite DOF (``self._bm_dof``) and threads it into ``get_inference``. + For ``"conley"`` (Conley 1999 spatial-HAC) the supported Phase 1 + path is the cross-sectional `LinearRegression` / `compute_robust_vcov` + surface; requires ``conley_coords`` (n × 2 array) and a positive + ``conley_cutoff_km``. Combining ``vcov_type="conley"`` with + ``cluster_ids``, ``weights``, or ``survey_design`` raises + ``NotImplementedError`` (combined product kernel + Bertanha-Imbens + 2014 weighted-Conley deferred to Phase 2+). The panel DiD / + MultiPeriodDiD / TwoWayFixedEffects estimators reject + ``vcov_type="conley"`` at fit-time entirely in Phase 1. + conley_coords : ndarray of shape (n, 2), optional + Required when ``vcov_type="conley"``. Two-column array of + ``[lat, lon]`` (degrees, for ``conley_metric="haversine"``) or + projected coordinates (for ``conley_metric="euclidean"`` / callable + metric). Raises ``ValueError`` when missing under Conley. + conley_cutoff_km : float, optional + Required when ``vcov_type="conley"``. Positive finite bandwidth in + km (haversine) or coord units (euclidean / callable). No default + per Conley 1999 Section 5's sensitivity-grid recommendation. + conley_metric : {"haversine", "euclidean", callable}, default "haversine" + Distance metric. Haversine uses Earth's mean radius 6371.01 km + matching R ``conleyreg``. Euclidean treats the coords as already + projected. Callable signature ``(coords1, coords2) -> n×n``. + conley_kernel : {"bartlett", "uniform"}, default "bartlett" + Kernel evaluated on pairwise distance ``d_ij/h``. ``"bartlett"`` is + the radial 1-D specialization (matching R ``conleyreg``); + ``"uniform"`` is the truncated indicator. Both kernels emit a + ``UserWarning`` if the resulting meat is materially indefinite — + neither is formally PSD-guaranteed in the radial pairwise form + (Conley 1999's explicit PSD Bartlett formula is the 2-D separable + product window, Eq 3.14, not the 1-D radial pairwise form). + Attributes ---------- coefficients_ : ndarray diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index a3d98c4c..91f0167f 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -86,6 +86,19 @@ class SyntheticDiD(DifferenceInDifferences): Random seed for reproducibility. If None (default), results will vary between runs. + Notes (Conley spatial-HAC rejection) + ------------------------------------ + SyntheticDiD does not support the Conley (1999) spatial-HAC analytical + sandwich. Passing ``vcov_type="conley"`` or any non-``None`` Conley + keyword (``conley_coords``, ``conley_cutoff_km``, ``conley_metric``, + ``conley_kernel``) to ``__init__`` or ``set_params`` raises + ``TypeError``. Rationale: SyntheticDiD's variance is derived from + bootstrap / jackknife / placebo resampling (Arkhangelsky et al. 2021 + Algorithms 2–4), not the sandwich identity Conley plugs into. Adding + Conley support would require either an analytical SDID sandwich path + or a spatial-block bootstrap (Politis-Romano 1994 territory). Tracked + as a follow-up in ``TODO.md``. + Attributes ---------- results_ : SyntheticDiDResults diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index 8fefa831..da9157f2 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -67,6 +67,20 @@ class TwoWayFixedEffects(DifferenceInDifferences): ``TODO.md`` under Methodology/Correctness; also documented in ``docs/methodology/REGISTRY.md``. + **Conley spatial-HAC (``vcov_type="conley"``) is rejected at fit-time + in Phase 1.** TwoWayFixedEffects is intrinsically a multi-period panel + estimator and Phase 1's cross-sectional Conley does not handle the + time dimension — applying it over (unit, time) rows would treat same- + unit cross-time pairs as ``d_ij = 0 → K = 1``, mishandling the space- + time HAC. The supported Phase 1 path for Conley is direct + ``compute_robust_vcov`` / ``LinearRegression`` on a single-period + regression. The ``conley_*`` kwargs are inherited from + ``DifferenceInDifferences.__init__`` for sklearn-style API symmetry + (``get_params`` / ``set_params`` round-trip), but + ``TwoWayFixedEffects(vcov_type="conley").fit(...)`` raises + ``NotImplementedError``. Phase 2 will add the space-time product kernel + (Driscoll-Kraay) and lift the rejection. + Warning: TWFE can be biased with staggered treatment timing and heterogeneous treatment effects. Consider using more robust estimators (e.g., Callaway-Sant'Anna) for diff --git a/docs/methodology/papers/colella-et-al-2019-review.md b/docs/methodology/papers/colella-et-al-2019-review.md index b877a26e..ebd4d758 100644 --- a/docs/methodology/papers/colella-et-al-2019-review.md +++ b/docs/methodology/papers/colella-et-al-2019-review.md @@ -190,7 +190,7 @@ This is a parity gap relative to acreg - implementers must consult acreg source. ### Relation to Existing diff-diff Estimators -- **Phase 1 parity target:** `vcov_method="conley"` on TWFE must match acreg to <=1e-6 on at least 2-3 fixtures. The `coords=("lat","lon")` and `cutoff_km=` parameters map directly onto acreg's lat/lon + cutoff inputs. +- **Phase 1 parity target (UPDATED):** Phase 1 ships `vcov_type="conley"` on **cross-sectional** `compute_robust_vcov` / `LinearRegression` only, with parity verified against R `conleyreg` (Düsterhöft 2021) to ≤1e-6 on three benchmark fixtures. Panel estimators (`DifferenceInDifferences`, `MultiPeriodDiD`, `TwoWayFixedEffects`) reject `vcov_type="conley"` at fit-time because the radial 1-D pairwise Conley does not handle the time dimension — applying it over (unit, time) rows would treat same-unit cross-time pairs as `d_ij = 0 → K = 1`, mishandling the space-time HAC. **Stata `acreg` parity for TWFE / panel space-time Conley is a Phase 2 target**, alongside the Driscoll-Kraay product-kernel implementation. The `coords` and `cutoff_km` parameter mapping below is still accurate for the cross-sectional path. - **Reduces to HC0** when the cutoff is small enough that `S = I` (no neighbour pairs). The paper does not state this explicitly, but the meat formula collapses to `X' diag(e^2) X` in that case, which is HC0 (White 1980, equation referenced page 4). - **Reduces to one-way clustering** when `S = block-diagonal indicator(same cluster)` (see Section 2, p. 6: Cameron et al. 2011 "can be embedded in this framework"). For multiway clustering, the paper says (page 6): "Multiway clustering assumes a particular *regularity condition* in the clustering structure ... However, in many real-life settings, this particular clustering structure may not hold." The acreg estimator is more flexible and the reduction to multiway clustering is approximate (binary `S` with the union-of-clusters structure). - **Cluster + spatial joint mode:** The paper does NOT formally combine cluster-robust with spatial-HAC. However, since `S` is arbitrary, one can construct `S` as the elementwise OR of the cluster-indicator matrix and the spatial-cutoff matrix; this gives a joint estimator. acreg likely exposes both options - verify. diff --git a/tests/test_conley_vcov.py b/tests/test_conley_vcov.py index 8e9d5f65..65c13524 100644 --- a/tests/test_conley_vcov.py +++ b/tests/test_conley_vcov.py @@ -83,16 +83,27 @@ def test_uniform_kernel_above_one_zero(self): def test_uniform_kernel_at_zero_one(self): np.testing.assert_allclose(_uniform_kernel(np.array([0.0])), 1.0) - def test_bartlett_psd_on_random_distances(self): - """Bartlett-weighted Gram matrix has all eigenvalues >= -tol.""" + def test_bartlett_kernel_finite_and_in_unit_interval(self): + """Bartlett-weighted kernel matrix on random pairwise distances is + finite, symmetric, and bounded in [0, 1]. We do NOT assert PSD here: + the radial 1-D Bartlett on pairwise distance is a practitioner + specialization of Conley 1999 (matching R conleyreg) and is NOT + formally PSD-guaranteed — see REGISTRY ConleySpatialHAC. The + runtime path emits a UserWarning if the resulting Conley meat is + materially indefinite; that contract is locked separately in + ``test_indefinite_meat_warning_fires_for_bartlett``. + """ rng = np.random.default_rng(seed=11) n = 25 coords = rng.uniform(0, 1, size=(n, 2)) diff = coords[:, None, :] - coords[None, :, :] D = np.sqrt((diff * diff).sum(axis=-1)) K = _bartlett_kernel(D / 0.3) - eigvals = np.linalg.eigvalsh(0.5 * (K + K.T)) # ensure symmetric - assert eigvals.min() > -1e-12 + assert K.shape == (n, n) + assert np.all(np.isfinite(K)) + assert np.all(K >= 0.0) + assert np.all(K <= 1.0) + np.testing.assert_allclose(K, K.T, atol=1e-15) # ---------------------------------------------------------------------------