From f90731a113db31b1cc9912f6709e1f7a266ccce3 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 7 May 2026 15:28:43 -0400 Subject: [PATCH 1/5] fix: make sure zip file descriptors are properly closed --- .gitignore | 1 + package_python_function/nested_zip_loader.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 68bc17f..413a7e1 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +.opencode diff --git a/package_python_function/nested_zip_loader.py b/package_python_function/nested_zip_loader.py index ffd52d7..215b475 100644 --- a/package_python_function/nested_zip_loader.py +++ b/package_python_function/nested_zip_loader.py @@ -45,7 +45,8 @@ def load_nested_zip() -> None: nested_zip_path = Path(__file__).parent / '.dependencies.zip' - zipfile.ZipFile(str(nested_zip_path), 'r').extractall(str(staging_package_path)) + with zipfile.ZipFile(str(nested_zip_path), "r") as nested_zip: + nested_zip.extractall(str(staging_package_path)) # The idea here is that we don't rename the path until everything has been successfuly extracted. # This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction, @@ -65,4 +66,4 @@ def load_nested_zip() -> None: sys.path[0] = str(target_package_path) importlib.reload(sys.modules[__name__]) -load_nested_zip() \ No newline at end of file +load_nested_zip() From d9ddb3cb35e1d1c560ba0f3d9633d1a2568bed6f Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 7 May 2026 14:15:29 -0400 Subject: [PATCH 2/5] fix: use file locking to avoid race conditions on nested zip extraction --- package_python_function/nested_zip_loader.py | 41 ++++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/package_python_function/nested_zip_loader.py b/package_python_function/nested_zip_loader.py index 215b475..d43d320 100644 --- a/package_python_function/nested_zip_loader.py +++ b/package_python_function/nested_zip_loader.py @@ -24,34 +24,43 @@ """ def load_nested_zip() -> None: - from pathlib import Path + import fcntl + import importlib import sys import tempfile - import importlib + from pathlib import Path temp_path = Path(tempfile.gettempdir()) target_package_path = temp_path / "package-python-function" - if not target_package_path.exists(): - import zipfile - import shutil - import os + # We use manual locks here to allow target_package_path to stay static, + # but avoid race conditions when multiple processes try to run this + # function at the same time. + lock_path = temp_path / ".package-python-function.lock" + + with open(lock_path, "w") as lock_file: + fcntl.flock(lock_file, fcntl.LOCK_EX) + + if not target_package_path.exists(): + import zipfile + import shutil + import os - staging_package_path = temp_path / ".stage.package-python-function" + staging_package_path = temp_path / ".stage.package-python-function" - if staging_package_path.exists(): - shutil.rmtree(str(staging_package_path)) + if staging_package_path.exists(): + shutil.rmtree(str(staging_package_path)) - nested_zip_path = Path(__file__).parent / '.dependencies.zip' + nested_zip_path = Path(__file__).parent / ".dependencies.zip" - with zipfile.ZipFile(str(nested_zip_path), "r") as nested_zip: - nested_zip.extractall(str(staging_package_path)) + with zipfile.ZipFile(str(nested_zip_path), "r") as nested_zip: + nested_zip.extractall(str(staging_package_path)) - # The idea here is that we don't rename the path until everything has been successfuly extracted. - # This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction, - # we won't try and use the incomplete extraction. - os.rename(str(staging_package_path), str(target_package_path)) + # The idea here is that we don't rename the path until everything has been successfuly extracted. + # This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction, + # we won't try and use the incomplete extraction. + os.rename(str(staging_package_path), str(target_package_path)) # Lambda sets up the sys.path like this: # ['/var/task', '/opt/python/lib/python3.13/site-packages', '/opt/python', From 1300f00b72ec3bea2e6debce3f3aac3126d46bd0 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 7 May 2026 14:58:08 -0400 Subject: [PATCH 3/5] test: add unit tests for nested_zip_loader --- package_python_function/nested_zip_loader.py | 2 +- tests/test_nested_zip_loader.py | 79 ++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/test_nested_zip_loader.py diff --git a/package_python_function/nested_zip_loader.py b/package_python_function/nested_zip_loader.py index d43d320..1ddc404 100644 --- a/package_python_function/nested_zip_loader.py +++ b/package_python_function/nested_zip_loader.py @@ -57,7 +57,7 @@ def load_nested_zip() -> None: with zipfile.ZipFile(str(nested_zip_path), "r") as nested_zip: nested_zip.extractall(str(staging_package_path)) - # The idea here is that we don't rename the path until everything has been successfuly extracted. + # The idea here is that we don't rename the path until everything has been successfully extracted. # This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction, # we won't try and use the incomplete extraction. os.rename(str(staging_package_path), str(target_package_path)) diff --git a/tests/test_nested_zip_loader.py b/tests/test_nested_zip_loader.py new file mode 100644 index 0000000..d08da8e --- /dev/null +++ b/tests/test_nested_zip_loader.py @@ -0,0 +1,79 @@ +import importlib +import multiprocessing +import shutil +import sys +import tempfile +import zipfile +from pathlib import Path + +import pytest + +# We have to use importlib here because nested_zip_loader calls load_nested_zip +# at IMPORT TIME, which causes us a world of hurt in these tests if we try to +# import it "normally" here. +LOADER_PATH = Path(__file__).parent.parent / "package_python_function" / "nested_zip_loader.py" +PKG_NAME = "_test_nested_zip" + +def _make_deps_zip(path: Path) -> None: + with zipfile.ZipFile(path, "w") as zf: + zf.writestr(f"{PKG_NAME}/__init__.py", "LOADED = True\n") + +@pytest.fixture() +def lambda_env(tmp_path, monkeypatch): + """Simulate a Lambda-like layout: a task dir with a package whose __init__.py + is the nested_zip_loader code, and a .dependencies.zip with the 'real' code.""" + task_dir = tmp_path / "task" + pkg_dir = task_dir / PKG_NAME + pkg_dir.mkdir(parents=True) + shutil.copy(LOADER_PATH, pkg_dir / "__init__.py") + _make_deps_zip(pkg_dir / ".dependencies.zip") + + tmp_dir = tmp_path / "tmp" + tmp_dir.mkdir() + monkeypatch.setenv("TMPDIR", str(tmp_dir)) + tempfile.tempdir = None + + monkeypatch.syspath_prepend(str(task_dir)) + + yield tmp_path + + sys.modules.pop(PKG_NAME, None) + tempfile.tempdir = None + +def test_cold_start_extracts(lambda_env): + mod = importlib.import_module(PKG_NAME) + assert mod.LOADED is True + assert (lambda_env / "tmp" / "package-python-function").exists() + +def test_warm_start_skips_extraction(lambda_env): + target_pkg = lambda_env / "tmp" / "package-python-function" / PKG_NAME + target_pkg.mkdir(parents=True) + (target_pkg / "__init__.py").write_text("LOADED = 'warm'\n") + + mod = importlib.import_module(PKG_NAME) + assert mod.LOADED == "warm" + +def test_stale_staging_cleaned(lambda_env): + staging = lambda_env / "tmp" / ".stage.package-python-function" + staging.mkdir(parents=True) + (staging / "stale.txt").write_text("leftover") + + importlib.import_module(PKG_NAME) + assert not staging.exists() + +def _worker(task_dir): + import importlib + import sys + + sys.path.insert(0, task_dir) + assert importlib.import_module(PKG_NAME).LOADED is True + +def test_concurrent_no_race(lambda_env): + ctx = multiprocessing.get_context("forkserver") + procs = [ctx.Process(target=_worker, args=(str(lambda_env / "task"),)) for _ in range(2)] + for p in procs: + p.start() + for p in procs: + p.join(timeout=10) + assert p.exitcode == 0, "A race condition occured while extracting." + assert (lambda_env / "tmp" / "package-python-function").exists() From c5d4bcf60600c29f5af7976b2621557493e53016 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 7 May 2026 14:37:25 -0500 Subject: [PATCH 4/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- tests/test_nested_zip_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_nested_zip_loader.py b/tests/test_nested_zip_loader.py index d08da8e..38a40de 100644 --- a/tests/test_nested_zip_loader.py +++ b/tests/test_nested_zip_loader.py @@ -75,5 +75,5 @@ def test_concurrent_no_race(lambda_env): p.start() for p in procs: p.join(timeout=10) - assert p.exitcode == 0, "A race condition occured while extracting." + assert p.exitcode == 0, "A race condition occurred while extracting." assert (lambda_env / "tmp" / "package-python-function").exists() From 8e6476f50fac4e504ebedccbda185b5b469be957 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 7 May 2026 14:37:42 -0500 Subject: [PATCH 5/5] Spelling Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- package_python_function/nested_zip_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package_python_function/nested_zip_loader.py b/package_python_function/nested_zip_loader.py index 1ddc404..342f003 100644 --- a/package_python_function/nested_zip_loader.py +++ b/package_python_function/nested_zip_loader.py @@ -58,7 +58,7 @@ def load_nested_zip() -> None: nested_zip.extractall(str(staging_package_path)) # The idea here is that we don't rename the path until everything has been successfully extracted. - # This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction, + # This is expected to be an atomic operation. That way, if AWS terminates us during the extraction, # we won't try and use the incomplete extraction. os.rename(str(staging_package_path), str(target_package_path))