Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
"numpy>=1.26.0",
"portalocker>=2.8.0", # Cross-platform file locking
"metaclass-registry",
"imageio>=2.37.0",
"zarr>=2.18.0,<3.0", # Required for ZarrStorageBackend
"ome-zarr>=0.11.0", # Required for OME-ZARR HCS compliance
]
Expand Down Expand Up @@ -197,4 +198,4 @@ ignore = [
]

[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"] # unused imports
"__init__.py" = ["F401"] # unused imports
15 changes: 13 additions & 2 deletions src/polystore/disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import shutil
import importlib
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Set, Union

Expand All @@ -23,7 +24,7 @@

def optional_import(module_name):
try:
return __import__(module_name)
return importlib.import_module(module_name)
except ImportError:
return None

Expand All @@ -44,6 +45,7 @@ def optional_import(module_name):
cupy = get_cupy()
tf = get_tf()
tifffile = optional_import("tifffile")
imageio = optional_import("imageio.v3")

# Optional arraybridge integration for memory conversion
try:
Expand Down Expand Up @@ -99,6 +101,7 @@ def _register_formats(self):

# Complex formats - use custom handlers
(FileFormat.TIFF, tifffile, self._tiff_writer, self._tiff_reader),
(FileFormat.RASTER_IMAGE, imageio, self._image_writer, self._image_reader),
(FileFormat.TEXT, True, self._text_writer, self._text_reader),
(FileFormat.JSON, True, self._json_writer, self._json_reader),
(FileFormat.CSV, True, self._csv_writer, self._csv_reader),
Expand Down Expand Up @@ -164,6 +167,14 @@ def _tiff_reader(self, path):
else:
return tifffile.imread(str(path))

def _image_writer(self, path, data, **kwargs):
"""Write standard raster images using imageio."""
imageio.imwrite(path, np.asarray(data))

def _image_reader(self, path):
Comment on lines +170 to +174
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raster image support is introduced here, but the test suite doesn’t cover saving/loading any of the new extensions (e.g., .png/.jpg/.bmp). Please add pytest coverage that round-trips a small array through at least one raster format and asserts the extension is registered/usable (and ideally verifies case-insensitive extension handling, e.g., '.PNG').

Copilot uses AI. Check for mistakes.
"""Read standard raster images using imageio."""
return imageio.imread(path)
Comment on lines +174 to +176
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DiskStorageBackend.load() calls the registered reader as reader(disk_path, **kwargs). The new _image_reader does not accept **kwargs, so any non-empty kwargs (even benign ones) will raise a TypeError when loading raster images. Please update _image_reader to accept **kwargs (and either ignore them or pass supported options through to imageio.imread).

Suggested change
def _image_reader(self, path):
"""Read standard raster images using imageio."""
return imageio.imread(path)
def _image_reader(self, path, **kwargs):
"""Read standard raster images using imageio."""
return imageio.imread(path, **kwargs)

Copilot uses AI. Check for mistakes.

def _text_writer(self, path, data, **kwargs):
"""Write text data to file. Accepts and ignores extra kwargs for compatibility."""
path.write_text(str(data))
Expand Down Expand Up @@ -261,7 +272,7 @@ def load(self, file_path: Union[str, Path], **kwargs) -> Any:
ext = disk_path.suffix.lower()

if not self.format_registry.is_registered(ext):
raise ValueError(f"No writer registered for extension '{ext}'")
raise ValueError(f"No reader registered for extension '{ext}'")

try:
reader = self.format_registry.get_reader(ext)
Expand Down
11 changes: 10 additions & 1 deletion src/polystore/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class FileFormat(Enum):

# Image formats
TIFF = "tiff"
RASTER_IMAGE = "raster_image"

# Data formats
CSV = "csv"
Expand All @@ -44,14 +45,22 @@ def extensions(self):
FileFormat.TENSORFLOW: [".tf"],
FileFormat.ZARR: [".zarr"],
FileFormat.TIFF: [".tif", ".tiff"],
FileFormat.RASTER_IMAGE: [".bmp", ".gif", ".jpeg", ".jpg", ".png"],
FileFormat.CSV: [".csv"],
FileFormat.JSON: [".json"],
FileFormat.TEXT: [".txt"],
FileFormat.ROI: [".roi.zip"],
}

# Default image extensions
DEFAULT_IMAGE_EXTENSIONS = {".tif", ".tiff", ".TIF", ".TIFF"}
DEFAULT_IMAGE_EXTENSIONS = {
extension
for extensions in (
FILE_FORMAT_EXTENSIONS[FileFormat.TIFF],
FILE_FORMAT_EXTENSIONS[FileFormat.RASTER_IMAGE],
)
for extension in extensions
}


def get_format_from_extension(ext: str) -> FileFormat:
Expand Down
8 changes: 7 additions & 1 deletion src/polystore/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ def list_files(
if self._memory_store[dir_key] is not None:
raise NotADirectoryError(f"Path is not a directory: {directory}")

lowercase_extensions = (
None if extensions is None else {extension.lower() for extension in extensions}
)
result = []
dir_prefix = dir_key + "/" if not dir_key.endswith("/") else dir_key

Expand All @@ -159,7 +162,10 @@ def list_files(
filename = Path(rel_path).name
# If pattern is None, match all files
if pattern is None or fnmatch(filename, pattern):
if not extensions or Path(filename).suffix in extensions:
if (
lowercase_extensions is None
or Path(filename).suffix.lower() in lowercase_extensions
):
# Calculate depth for breadth-first sorting
depth = rel_path.count('/')
result.append((Path(path), depth))
Expand Down
6 changes: 5 additions & 1 deletion src/polystore/virtual_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ def list_files(self, directory: Union[str, Path], pattern: Optional[str] = None,
logger.info(f" relative_dir_str='{relative_dir_str}'")
logger.info(f" mapping has {len(self._mapping_cache)} entries")

lowercase_extensions = (
None if extensions is None else {ext.lower() for ext in extensions}
)

# Filter paths in this directory
results = []
for virtual_relative in self._mapping_cache.keys():
Expand All @@ -230,7 +234,7 @@ def list_files(self, directory: Union[str, Path], pattern: Optional[str] = None,
vpath = Path(virtual_relative)
if pattern and not fnmatch(vpath.name, pattern):
continue
if extensions and vpath.suffix not in extensions:
if lowercase_extensions and vpath.suffix.lower() not in lowercase_extensions:
continue

# Return absolute path
Expand Down
11 changes: 11 additions & 0 deletions tests/test_memory_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ def test_list_files_with_extension_filter(self):
npy_files = self.backend.list_files("/test", extensions={".npy"})
assert len(npy_files) == 2

def test_list_files_extension_filter_is_case_insensitive(self):
"""Test extension filtering matches backend contract case-insensitively."""
self.backend.save(np.array([1]), "/test/image.TIF")
self.backend.save(np.array([2]), "/test/image.tif")
self.backend.save("text", "/test/notes.TXT")

tif_files = self.backend.list_files("/test", extensions={".tif"})

assert len(tif_files) == 2
assert {path.name for path in tif_files} == {"image.TIF", "image.tif"}

def test_list_files_recursive(self):
"""Test recursive file listing."""
# Create files in multiple levels
Expand Down
Loading