Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
165 commits
Select commit Hold shift + click to select a range
750bdbe
Add complete benchmark platform plans for Nature Methods paper
trissim Dec 19, 2025
5febdae
chore: tighten benchmark platform plans
trissim Dec 20, 2025
5f1b5e9
Add research findings for benchmark platform datasets and pipelines
trissim Dec 23, 2025
c3532ba
Add BBBC microscope handlers for benchmark datasets
trissim Dec 23, 2025
08fe25d
Add TESTED BBBC microscope handlers for benchmark datasets
trissim Dec 23, 2025
2aef1af
refactor: microscope detection, metadata caching, and signal batching
trissim Dec 24, 2025
3bc7445
feat: benchmark platform for OpenHCS and tool comparison
trissim Dec 24, 2025
ece9962
feat(benchmark): Add CellProfiler → OpenHCS converter with LLM absorp…
trissim Dec 24, 2025
a259e8d
wip(gui): [EXPERIMENTAL] Geometry tracking for flash overlays
trissim Dec 24, 2025
7c75a70
fix(converter): LLM-inferred contract/category, remove LLM fallback
trissim Dec 24, 2025
5a3c3e7
feat(benchmark): Complete CellProfiler to OpenHCS converter with 88 a…
trissim Dec 24, 2025
fef610e
Fix CellProfiler parameter mapping and kwargs passing
trissim Dec 24, 2025
fd0bb49
Recategorize CellProfiler functions with correct variable_components …
trissim Dec 24, 2025
6d13d43
Fix CellProfiler function categorization with correct variable_compon…
trissim Dec 24, 2025
728edda
Fix UnmixColors categorization: channel_operation → image_operation
trissim Dec 24, 2025
8f2a836
Fix dimensional flexibility in absorbed functions
trissim Dec 24, 2025
fee0c33
Fix pipeline generator and parameter mappings
trissim Dec 24, 2025
191183b
Fix IdentifyTertiaryObjects parameter mapping
trissim Dec 24, 2025
78dc00a
docs: Add comprehensive CellProfiler refactor plan
trissim Dec 27, 2025
811b1cb
docs: Move CellProfiler refactor plan to plans folder
trissim Dec 27, 2025
17e1608
docs: Move CellProfiler refactor plan to root plans folder
trissim Dec 27, 2025
4169c07
Merge main into benchmark-platform
trissim Feb 16, 2026
ece0bb4
Add CellProfiler integration architecture design document
trissim Feb 16, 2026
94f6064
Add CellProfiler to OpenHCS architecture mapping document
trissim Feb 16, 2026
7fd0918
Add comprehensive 'Context for New Agents' section to CellProfiler de…
trissim Feb 16, 2026
e6c50a9
Refactor FunctionStep artifact runtime
trissim Apr 25, 2026
f1badcf
Snapshot FunctionStep runtime plans
trissim Apr 25, 2026
dc979d2
Keep materialization collision metadata coherent
trissim Apr 25, 2026
64fed65
Centralize function pattern transforms
trissim Apr 25, 2026
85fb077
Make compiled step plans typed source of truth
trissim Apr 25, 2026
55988ac
Write validation results into compiled plans
trissim Apr 25, 2026
29e1bf6
Tighten compiled plan runtime typing
trissim Apr 25, 2026
62885ea
Compile FunctionStep invocation graphs
trissim Apr 25, 2026
ba266ee
Plan runtime value artifact upgrade
trissim Apr 25, 2026
d8f563f
Centralize callable compiler contracts
trissim Apr 25, 2026
e8cc88f
Normalize FunctionStep patterns before compilation
trissim Apr 25, 2026
51c4bbd
Compile artifacts through typed graph
trissim Apr 25, 2026
aee4c34
Validate runtime artifact values by kind
trissim Apr 25, 2026
d721ee5
Update runtime artifact upgrade progress
trissim Apr 25, 2026
62b9933
Record runtime values in typed store
trissim Apr 25, 2026
31cf196
Document runtime value store progress
trissim Apr 25, 2026
14637b8
Require typed records for artifact inputs
trissim Apr 26, 2026
cc73fe8
Resolve artifact VFS access through typed store queries
trissim Apr 26, 2026
54aa247
Plan compiler runtime refactor passes
trissim Apr 26, 2026
1fede4b
Use existing materialization presets for artifact defaults
trissim Apr 26, 2026
56c2590
Clarify compiler snapshot roadmap
trissim Apr 26, 2026
1bdc0ad
Add compiler step snapshots
trissim Apr 26, 2026
eccf3a5
Introduce compiler compilation sessions
trissim Apr 26, 2026
dd65f62
Validate FunctionStep contracts from compiled plans
trissim Apr 26, 2026
14af9ee
Add native runtime artifact values
trissim Apr 26, 2026
4368467
Compile CellProfiler artifact symbols
trissim Apr 26, 2026
7709981
Add CellProfiler runtime adapter view
trissim Apr 26, 2026
243cff4
Refactor compiler runtime contracts
trissim Apr 26, 2026
ea2575a
Prune compiler contract ceremony
trissim Apr 26, 2026
573ea34
Collapse compiler and path direction authorities
trissim Apr 26, 2026
09d0383
Simplify path direction resolution
trissim Apr 26, 2026
3c9a382
Collapse path IO directory helper symmetry
trissim Apr 26, 2026
86366f8
Add generic runtime artifact semantics
trissim Apr 26, 2026
069d52f
Remove flattened runtime semantic compatibility
trissim Apr 26, 2026
a1b0e12
Clean runtime compiler advisor findings
trissim Apr 26, 2026
afb46e4
Replace structural protocols with nominal ABCs
trissim Apr 26, 2026
191a2f2
Wire CellProfiler module runtime adapter
trissim Apr 26, 2026
0d6d31f
Register runtime payload capabilities explicitly
trissim Apr 27, 2026
8bb6897
Clean core CellProfiler runtime functions
trissim Apr 27, 2026
fadd6b5
Execute generated CellProfiler artifact flow
trissim Apr 27, 2026
43f7377
Wire converted cppipe execution and expand CP semantics
trissim Apr 27, 2026
12028ac
Refactor CellProfiler runtime image flow
trissim Apr 27, 2026
2033031
Add typed FunctionStep source bindings
trissim Apr 27, 2026
3394e3c
Type CellProfiler source binding contracts
trissim Apr 27, 2026
ddc9429
Compile CellProfiler setup schema into source bindings
trissim Apr 27, 2026
d07d7e2
Make compiled step edges explicit
trissim Apr 27, 2026
56a64fa
Make source binding plans pickleable
trissim Apr 27, 2026
eb28f24
Resolve typed runtime source bindings
trissim Apr 27, 2026
5a46067
Promote metadata rules into source bindings
trissim Apr 27, 2026
09a46cf
Compile NamesAndTypes image-set match plans
trissim Apr 27, 2026
1f66cc8
Unblock module-level GrayToColor conversion
trissim Apr 27, 2026
3b92e63
Execute BBBC021 converted pipeline acceptance
trissim Apr 28, 2026
dd1bef9
Validate CellProfiler tabular outputs end to end
trissim Apr 28, 2026
a8f72cc
Expose canonical cppipe references and source schema
trissim Apr 28, 2026
cada91c
Restore absorbed CellProfiler import surfaces
trissim Apr 28, 2026
0554b93
Track shipped cppipe corpus expectations
trissim Apr 28, 2026
7e02167
Accept canonical BBBC021 cppipe corpus
trissim Apr 28, 2026
ead1508
Expand real pipeline acceptance coverage
trissim Apr 28, 2026
258d682
Move pipeline image schema into core
trissim Apr 28, 2026
37f226e
Support order-based CellProfiler image matching
trissim Apr 28, 2026
77181c7
Infer CellProfiler artifact contracts
trissim Apr 28, 2026
6fd2dae
Deduplicate CellProfiler setting binding
trissim Apr 28, 2026
6dfa82f
Share generated cppipe pipeline context
trissim Apr 28, 2026
0cd550f
Model NamesAndTypes assignment layouts
trissim Apr 28, 2026
8ce1c10
Strengthen CellProfiler runtime contracts
trissim Apr 28, 2026
0ccb769
Strengthen absorbed CellProfiler function registry
trissim Apr 28, 2026
c3f67cd
Require absorbed functions in generated pipelines
trissim Apr 28, 2026
385a172
Fail loudly on unresolved CellProfiler contracts
trissim Apr 28, 2026
3a7844a
Add CellProfiler compatibility coverage matrix
trissim Apr 28, 2026
ec300e2
Resolve absorbed CellProfiler processing contracts
trissim Apr 28, 2026
9c9b0a2
Infer generic CellProfiler artifact semantics
trissim Apr 28, 2026
15b7b97
Support CellProfiler source selector filters
trissim Apr 28, 2026
7e302e0
Canonicalize legacy CellProfiler module identities
trissim Apr 28, 2026
ca05187
Support legacy CellProfiler measurement pipelines
trissim Apr 28, 2026
be09b15
Support source-bound CellProfiler object artifacts
trissim Apr 28, 2026
b453de2
Lower legacy CellProfiler LoadImages schema
trissim Apr 28, 2026
257e62d
Support typed area occupied measurements
trissim Apr 28, 2026
552ad9f
Support typed CellProfiler UnmixColors outputs
trissim Apr 28, 2026
3c66221
Allow CellProfiler workspace name rebinding
trissim Apr 28, 2026
fabef5e
Support mixed CellProfiler area occupied rows
trissim Apr 28, 2026
27bb0d4
Add legacy CellProfiler Align support
trissim Apr 28, 2026
df51f0a
Support typed CellProfiler overlay outline rows
trissim Apr 28, 2026
df7dcc4
Ignore legacy empty CellProfiler settings
trissim Apr 28, 2026
2a83b28
Support typed CellProfiler FilterObjects relabel outputs
trissim Apr 28, 2026
315a65d
Support CellProfiler FilterObjects measurement rules
trissim Apr 28, 2026
f742c54
Materialize CellProfiler source schemas as OpenHCS workspaces
trissim Apr 28, 2026
63eff96
Advance CellProfiler source-bound execution
trissim Apr 28, 2026
2891507
Cover CellProfiler LoadImages sidecar sources
trissim Apr 28, 2026
5cbe7ab
Type CellProfiler symbols by workspace kind
trissim Apr 28, 2026
cb9dc65
Track official ExampleHuman corpus fixture
trissim Apr 28, 2026
42f5179
Support CellProfiler object measurement execution
trissim Apr 28, 2026
16385a3
Support exact CellProfiler source filters
trissim Apr 28, 2026
73ccf18
Expand CellProfiler example pipeline compatibility
trissim Apr 28, 2026
1a48dce
Support CellProfiler relationship measurement consumers
trissim Apr 28, 2026
fd99cad
Broaden CellProfiler runtime compatibility
trissim Apr 29, 2026
380a50f
Broaden CellProfiler runtime compatibility
trissim Apr 29, 2026
90f47f7
Move CellProfiler image payload semantics into core
trissim Apr 29, 2026
b8d4ab8
Broaden CellProfiler color pipeline execution
trissim Apr 29, 2026
526dbb5
Support CellProfiler hex color literals
trissim Apr 29, 2026
234f35d
Align CellProfiler mask geometry
trissim Apr 29, 2026
b400a56
Fix CellProfiler rule-row source bindings
trissim Apr 29, 2026
402dd78
Support CellProfiler object measurement vectors
trissim Apr 29, 2026
e89993d
Align CellProfiler source and object measurement domains
trissim Apr 29, 2026
31fbee3
Expand official CellProfiler runtime coverage
trissim Apr 29, 2026
1cbd64c
Add official CellProfiler neighbors coverage
trissim Apr 29, 2026
bdd6153
Broaden CellProfiler official pipeline coverage
trissim Apr 29, 2026
c380f41
Support FilterObjects per-parent selection
trissim Apr 29, 2026
be61068
Support mixed image bundles for CellProfiler Tile
trissim Apr 29, 2026
66f8196
Support Vitra source schema and numpy sources
trissim Apr 29, 2026
e90ca2d
Support YeastPatches illumination pipeline
trissim Apr 29, 2026
388685d
Support official worm straightening pipelines
trissim Apr 29, 2026
1fd4708
Add opt-in CellProfiler corpus execution test
trissim Apr 29, 2026
f78b506
Use converted CellProfiler pipelines in benchmark adapter
trissim Apr 29, 2026
f2dd7c8
Merge remote-tracking branch 'openhcsdev/main' into benchmark-platform
trissim Apr 29, 2026
bad8d4a
Expand CellProfiler execution validation
trissim Apr 29, 2026
ea4045c
Tighten CellProfiler export validation
trissim Apr 29, 2026
6c2dc67
Derive CellProfiler corpus coverage from discovered cppipes
trissim Apr 29, 2026
f1e4b73
Classify cppipe module coverage by role
trissim Apr 29, 2026
be01829
Track CellProfiler source module coverage
trissim Apr 29, 2026
046d225
Promote module artifact contracts to OpenHCS core
trissim Apr 29, 2026
5571bd3
Generalize pipeline image schema naming
trissim Apr 29, 2026
6d87df5
Use OpenHCS callable metadata for generated contracts
trissim Apr 29, 2026
f39e132
Collapse CellProfiler contract metadata wrapper
trissim Apr 29, 2026
565a643
Resolve absorbed contracts through OpenHCS enum
trissim Apr 29, 2026
99d033b
Reuse setting binding for illumination kwargs
trissim Apr 29, 2026
823bf67
Rename source binding current image payload
trissim Apr 29, 2026
b8d9e02
Generalize runtime input binding requests
trissim Apr 29, 2026
5858b0e
Tighten converted pipeline export validation
trissim Apr 29, 2026
ae3442c
Derive CSV export kinds from artifact payload shape
trissim Apr 29, 2026
ac1c433
Rename module execution source-binding image
trissim Apr 29, 2026
68801d1
Centralize source metadata key matching
trissim Apr 29, 2026
c506678
Reuse declarative settings binding for illumination modules
trissim Apr 29, 2026
36e9ff1
Move image schema builder into core
trissim Apr 29, 2026
401e818
Centralize source metadata component matching
trissim Apr 29, 2026
8d0c4d9
Move source image type artifact kind into core
trissim Apr 29, 2026
40a1333
Promote source schema metadata into core runtime
trissim Apr 29, 2026
10360e6
Promote runtime export and query semantics
trissim Apr 29, 2026
a457e4e
Promote runtime execution validation
trissim Apr 29, 2026
ea0d6f1
Add runtime output equivalence checks
trissim Apr 29, 2026
a6d0012
Add CellProfiler parity benchmark runner
trissim Apr 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
65 changes: 65 additions & 0 deletions benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Public API for the benchmark platform."""

import openhcs as _openhcs_dependency_bootstrap

from benchmark.contracts.dataset import DatasetSpec, AcquiredDataset
from benchmark.contracts.metric import MetricCollector
from benchmark.contracts.tool_adapter import (
BenchmarkResult,
ToolAdapter,
ToolAdapterError,
ToolExecutionError,
ToolNotInstalledError,
ToolVersionError,
)
from benchmark.datasets.registry import BBBC021_SINGLE_PLATE, get_dataset_spec, DATASET_REGISTRY
from benchmark.datasets.acquire import acquire_dataset, DatasetAcquisitionError
from benchmark.metrics.time import TimeMetric
from benchmark.metrics.memory import MemoryMetric
from benchmark.pipelines.registry import (
PipelineSpec,
NUCLEI_SEGMENTATION,
get_pipeline_spec,
PIPELINE_REGISTRY,
)
from benchmark.adapters.openhcs import OpenHCSAdapter
from benchmark.adapters.cellprofiler import CellProfilerAdapter
from benchmark.runner import (
CellProfilerCompatibilityResult,
run_benchmark,
run_cellprofiler_compatibility_benchmark,
)

__all__ = [
# Contracts
"DatasetSpec",
"AcquiredDataset",
"MetricCollector",
"BenchmarkResult",
"ToolAdapter",
"ToolAdapterError",
"ToolExecutionError",
"ToolNotInstalledError",
"ToolVersionError",
# Datasets
"DatasetAcquisitionError",
"acquire_dataset",
"BBBC021_SINGLE_PLATE",
"DATASET_REGISTRY",
"get_dataset_spec",
# Pipelines
"PipelineSpec",
"NUCLEI_SEGMENTATION",
"PIPELINE_REGISTRY",
"get_pipeline_spec",
# Metrics
"TimeMetric",
"MemoryMetric",
# Adapters
"CellProfilerAdapter",
"OpenHCSAdapter",
# Runner
"CellProfilerCompatibilityResult",
"run_benchmark",
"run_cellprofiler_compatibility_benchmark",
]
6 changes: 6 additions & 0 deletions benchmark/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Tool adapters."""

from benchmark.adapters.cellprofiler import CellProfilerAdapter
from benchmark.adapters.openhcs import OpenHCSAdapter

__all__ = ["CellProfilerAdapter", "OpenHCSAdapter"]
194 changes: 194 additions & 0 deletions benchmark/adapters/cellprofiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""Native CellProfiler tool adapter."""

from __future__ import annotations

import shutil
import subprocess
from contextlib import ExitStack
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from benchmark.adapters.cppipe_source import (
CPPipeSourceRequest,
resolve_cppipe_source,
)
from benchmark.contracts.metric import MetricCollector
from benchmark.contracts.tool_adapter import (
BenchmarkResult,
ToolAdapter,
ToolExecutionError,
ToolNotInstalledError,
)
from openhcs.core.runtime_equivalence import RuntimeOutputSnapshot


@dataclass(frozen=True, slots=True)
class CellProfilerRunRequest:
"""Authoritative native CellProfiler run request."""

dataset_path: Path
pipeline_name: str
pipeline_params: dict[str, Any]
metrics: tuple[MetricCollector, ...]
output_dir: Path

@property
def dataset_id(self) -> str:
return str(self.pipeline_params.get("dataset_id", self.dataset_path.name))

@property
def timeout_seconds(self) -> float | None:
value = self.pipeline_params.get("cellprofiler_timeout_seconds")
if value is None:
return None
return float(value)

@property
def cppipe_source(self) -> CPPipeSourceRequest:
return CPPipeSourceRequest.from_pipeline_params(
dataset_id=self.dataset_id,
output_dir=self.output_dir,
pipeline_params=self.pipeline_params,
)


class CellProfilerAdapter(ToolAdapter):
"""Run a native CellProfiler `.cppipe` as the semantic reference tool."""

name = "CellProfiler"

def __init__(self, executable: str | Path | None = None) -> None:
self._configured_executable = Path(executable) if executable else None
self.version = "unknown"

def validate_installation(self) -> None:
"""Check that the CellProfiler command-line runner is available."""
executable = self._cellprofiler_executable()
try:
result = subprocess.run(
[str(executable), "--version"],
capture_output=True,
text=True,
timeout=30,
check=False,
)
except FileNotFoundError as exc:
raise ToolNotInstalledError(
f"CellProfiler executable not found: {executable}"
) from exc
if result.returncode != 0:
raise ToolExecutionError(
"Failed to query CellProfiler version:\n"
+ _subprocess_output(result)
)
self.version = (result.stdout or result.stderr).strip() or "unknown"

def run(
self,
dataset_path: Path,
pipeline_name: str,
pipeline_params: dict[str, Any],
metrics: list[Any],
output_dir: Path,
) -> BenchmarkResult:
"""Execute a native CellProfiler pipeline headlessly."""
request = CellProfilerRunRequest(
dataset_path=Path(dataset_path),
pipeline_name=pipeline_name,
pipeline_params=dict(pipeline_params),
metrics=self._validated_metric_collectors(metrics),
output_dir=Path(output_dir),
)
request.output_dir.mkdir(parents=True, exist_ok=True)
source = resolve_cppipe_source(request.cppipe_source)
native_output_root = (
request.output_dir
/ f"{request.dataset_path.name}_{request.pipeline_name}_native_cellprofiler"
)
native_output_root.mkdir(parents=True, exist_ok=True)
command = (
str(self._cellprofiler_executable()),
"-c",
"-r",
"-p",
str(source.path),
"-i",
str(request.dataset_path),
"-o",
str(native_output_root),
)

with ExitStack() as stack:
for metric in request.metrics:
stack.enter_context(metric)
try:
result = subprocess.run(
command,
capture_output=True,
text=True,
timeout=request.timeout_seconds,
check=False,
)
except FileNotFoundError as exc:
raise ToolNotInstalledError(
f"CellProfiler executable not found: {command[0]}"
) from exc
if result.returncode != 0:
raise ToolExecutionError(
"Native CellProfiler execution failed:\n"
+ _subprocess_output(result)
)

snapshot = RuntimeOutputSnapshot.from_output_root(native_output_root)
provenance: dict[str, Any] = {
"cellprofiler_version": self.version,
"pipeline_source": "native_cppipe",
"cppipe_path": str(source.path),
"csv_output_count": len(snapshot.tables),
"image_output_count": len(snapshot.images),
}
if source.reference_url is not None:
provenance["cppipe_reference_url"] = source.reference_url
return BenchmarkResult(
tool_name=self.name,
dataset_id=request.dataset_id,
pipeline_name=request.pipeline_name,
metrics={
metric.name: metric.get_result()
for metric in request.metrics
},
output_path=native_output_root,
success=True,
error_message=None,
provenance=provenance,
)

def _cellprofiler_executable(self) -> Path:
if self._configured_executable is not None:
return self._configured_executable
executable = shutil.which("cellprofiler")
if executable is None:
raise ToolNotInstalledError(
"CellProfiler executable not found in PATH."
)
return Path(executable)

def _validated_metric_collectors(
self,
metrics: list[Any],
) -> tuple[MetricCollector, ...]:
validated_metrics: list[MetricCollector] = []
for metric in metrics:
if not isinstance(metric, MetricCollector):
raise ToolExecutionError(
f"Metric {metric} does not extend MetricCollector"
)
validated_metrics.append(metric)
return tuple(validated_metrics)


def _subprocess_output(result: subprocess.CompletedProcess[str]) -> str:
stdout = (result.stdout or "").strip()
stderr = (result.stderr or "").strip()
return "\n".join(part for part in (stdout, stderr) if part)
129 changes: 129 additions & 0 deletions benchmark/adapters/cppipe_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Shared .cppipe source resolution for benchmark adapters."""

from __future__ import annotations

from collections.abc import Callable, Mapping
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
from urllib.request import urlopen

from benchmark.contracts.tool_adapter import ToolExecutionError
from benchmark.datasets.registry import get_dataset_spec


CPPipeReferenceMaterializer = Callable[[str, Path], Path]


@dataclass(frozen=True, slots=True)
class CPPipeSourceRequest:
"""Typed request for resolving a CellProfiler pipeline source."""

dataset_id: str
output_dir: Path
cppipe_path: Path | None = None
reference_url: str | None = None
reference_index: int | None = None

@classmethod
def from_pipeline_params(
cls,
*,
dataset_id: str,
output_dir: Path,
pipeline_params: Mapping[str, Any],
) -> "CPPipeSourceRequest":
cppipe_value = pipeline_params.get("cppipe_path") or pipeline_params.get(
"cppipe_file"
)
reference_url = pipeline_params.get("cppipe_reference_url")
reference_index = pipeline_params.get("cppipe_reference_index")
return cls(
dataset_id=dataset_id,
output_dir=Path(output_dir),
cppipe_path=Path(cppipe_value) if cppipe_value else None,
reference_url=str(reference_url) if reference_url is not None else None,
reference_index=(
int(reference_index) if reference_index is not None else None
),
)

def __post_init__(self) -> None:
if not self.dataset_id:
raise ValueError("CPPipeSourceRequest.dataset_id cannot be empty.")
object.__setattr__(self, "output_dir", Path(self.output_dir))
if self.reference_url == "":
raise ValueError("CPPipeSourceRequest.reference_url cannot be empty.")


@dataclass(frozen=True, slots=True)
class CPPipeSourceResolution:
"""Resolved CellProfiler pipeline source."""

path: Path
reference_url: str | None = None


def resolve_cppipe_source(
request: CPPipeSourceRequest,
*,
materialize_reference: CPPipeReferenceMaterializer | None = None,
) -> CPPipeSourceResolution:
"""Resolve a local or dataset-owned .cppipe path."""
materializer = materialize_reference or materialize_cppipe_reference
if request.cppipe_path is not None:
if not request.cppipe_path.exists():
raise ToolExecutionError(f".cppipe file not found: {request.cppipe_path}")
return CPPipeSourceResolution(request.cppipe_path)

reference_url = request.reference_url
if reference_url is None and request.reference_index is not None:
reference_url = reference_cppipe_url(
request.dataset_id,
request.reference_index,
)
if reference_url is None:
raise ToolExecutionError(
"CellProfiler pipeline execution requires cppipe_path, cppipe_file, "
"cppipe_reference_url, or cppipe_reference_index."
)

return CPPipeSourceResolution(
materializer(reference_url, request.output_dir / "cppipe_references"),
reference_url=reference_url,
)


def reference_cppipe_url(dataset_id: str, reference_index: int) -> str:
"""Resolve one canonical .cppipe URL from the dataset registry."""
try:
dataset_spec = get_dataset_spec(dataset_id)
except KeyError as exc:
raise ToolExecutionError(
f"Unknown dataset id {dataset_id!r} for cppipe reference lookup."
) from exc
try:
return dataset_spec.reference_cppipe_urls[reference_index]
except IndexError as exc:
raise ToolExecutionError(
f"Dataset {dataset_id!r} exposes "
f"{len(dataset_spec.reference_cppipe_urls)} cppipe references; "
f"index {reference_index} is out of range."
) from exc


def materialize_cppipe_reference(
reference_url: str,
target_dir: Path,
) -> Path:
"""Download one canonical .cppipe file into a stable local path."""
target_dir.mkdir(parents=True, exist_ok=True)
parsed = urlparse(reference_url)
filename = Path(parsed.path).name or "reference.cppipe"
target_path = target_dir / filename
if target_path.exists():
return target_path
with urlopen(reference_url) as response: # noqa: S310
target_path.write_bytes(response.read())
return target_path
Loading
Loading