diff --git a/CHANGELOG.md b/CHANGELOG.md index bd6454c..18e6095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/). +## [1.3.1] - 2026-04-27 + +### New `auto_pysmccnet` param: `rename` +- Added a `rename` boolean flag to the pysmccnet pipeline. + - When set to `False`, the pipeline preserves the original dataset column names instead of automatically appending datatype prefixes (column names must be strictly unique across omics) + +### Datasets Module: +- Fixed a user-reported error ahead of the Multi Omics NETwork Analysis Workshop (MONET) workshop at the University of Colorado Anschutz Medical Campus. + ## [1.3.0] - 2026-04-01 ### Network Module (`bioneuralnet.network`) diff --git a/README.md b/README.md index 4237266..94c38b8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Documentation](https://img.shields.io/badge/docs-read%20the%20docs-blue.svg)](https://bioneuralnet.readthedocs.io/en/latest/) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17503083.svg)](https://doi.org/10.5281/zenodo.17503083) -## Welcome to BioNeuralNet 1.3.0 +## Welcome to BioNeuralNet 1.3.1 ![BioNeuralNet Logo](assets/logo_update.png) diff --git a/bioneuralnet/__init__.py b/bioneuralnet/__init__.py index ef68588..c28ea7d 100644 --- a/bioneuralnet/__init__.py +++ b/bioneuralnet/__init__.py @@ -13,7 +13,7 @@ """ -__version__ = "1.3.0" +__version__ = "1.3.1" # submodules to enable direct imports such as `from bioneuralnet import utils` from . import utils diff --git a/bioneuralnet/datasets/monet/phenotype.csv b/bioneuralnet/datasets/monet/phenotype.csv index 370a64e..3066a52 100644 --- a/bioneuralnet/datasets/monet/phenotype.csv +++ b/bioneuralnet/datasets/monet/phenotype.csv @@ -1,4 +1,5 @@ -0 +"phenotype" +0 1 0 0 diff --git a/bioneuralnet/network/pysmccnet/pipeline.py b/bioneuralnet/network/pysmccnet/pipeline.py index c9cf1d9..42de58c 100644 --- a/bioneuralnet/network/pysmccnet/pipeline.py +++ b/bioneuralnet/network/pysmccnet/pipeline.py @@ -18,7 +18,7 @@ from ...utils.logger import get_logger logger = get_logger(__name__) -def auto_pysmccnet(X: List[Union[pd.DataFrame, np.ndarray]], Y: Union[pd.DataFrame, np.ndarray], AdjustedCovar: Optional[pd.DataFrame] = None, preprocess: bool = False, Kfold: int = 5, subSampNum: int = 100, DataType: Optional[List[str]] = None, BetweenShrinkage: float = 2.0, ScalingPen: List[float] = [0.1, 0.1], saving_dir: str = os.getcwd(), tuneLength: int = 5, tuneRangeCCA: List[float] = [0.1, 0.5], tuneRangePLS: List[float] = [0.5, 0.9], EvalMethod: str = 'accuracy', ncomp_pls: int = 3, seed: int = 123, CutHeight: float = 1 - 0.1**10, min_size: int = 10, max_size: int = 100, summarization: str = "NetSHy", precomputed_fold_data: Optional[dict] = None, device: Optional[torch.device] = "cpu", dtype: torch.dtype = torch.float64) -> dict: +def auto_pysmccnet(X: List[Union[pd.DataFrame, np.ndarray]], Y: Union[pd.DataFrame, np.ndarray], AdjustedCovar: Optional[pd.DataFrame] = None, preprocess: bool = False, Kfold: int = 5, subSampNum: int = 100, DataType: Optional[List[str]] = None, BetweenShrinkage: float = 2.0, ScalingPen: List[float] = [0.1, 0.1], saving_dir: str = os.getcwd(), tuneLength: int = 5, tuneRangeCCA: List[float] = [0.1, 0.5], tuneRangePLS: List[float] = [0.5, 0.9], EvalMethod: str = 'accuracy', ncomp_pls: int = 3, seed: int = 123, CutHeight: float = 1 - 0.1**10, min_size: int = 10, max_size: int = 100, summarization: str = "NetSHy", precomputed_fold_data: Optional[dict] = None, device: Optional[torch.device] = "cpu", dtype: torch.dtype = torch.float64, rename: bool = True) -> dict: """Automated SmCCNet workflow with GPU acceleration. Runs the complete SmCCNet pipeline supporting both CCA (continuous phenotype) and PLS (binary phenotype) modes. The workflow includes optional preprocessing, cross-validation for penalty tuning, subsampling for stability selection, and final network construction. @@ -48,6 +48,7 @@ def auto_pysmccnet(X: List[Union[pd.DataFrame, np.ndarray]], Y: Union[pd.DataFra precomputed_fold_data (dict | None): Precomputed CV folds to bypass internal fold generation. device (torch.device | cpu): PyTorch device; if None, automatically selects GPU if available. dtype (torch.dtype): PyTorch data type for computations. + rename (bool): If True, prefix datatype to column names; if False, use original column names. Returns: @@ -73,20 +74,38 @@ def auto_pysmccnet(X: List[Union[pd.DataFrame, np.ndarray]], Y: Union[pd.DataFra if DataType is None: DataType = [f"Omics{i+1}" for i in range(len(X))] + rename_mapping = {} + + # validating uniqueness if rename is False + if not rename: + all_original_columns = [] + for data_obj in X: + if hasattr(data_obj, 'columns'): + all_original_columns.extend(list(data_obj.columns)) + else: + all_original_columns.extend([f"Feat{j+1}" for j in range(data_obj.shape[1])]) + + if len(all_original_columns) != len(set(all_original_columns)): + raise ValueError("Overlapping column names detected across omics. Set 'rename=True' or rename your columns manually.") + feature_labels = [] for i, data_obj in enumerate(X): prefix = DataType[i] if hasattr(data_obj, 'columns'): - # DataFrame: use real column names with DataType prefix - labels = [f"{prefix}_{col}" for col in data_obj.columns] + og_cols = data_obj.columns + labels = [f"{prefix}_{col}" for col in og_cols] else: - # Numpy array: fallback to generic - labels = [f"{prefix}_Feat{j+1}" for j in range(data_obj.shape[1])] + og_cols = [f"Feat{j+1}" for j in range(data_obj.shape[1])] + labels = [f"{prefix}_{col}" for col in og_cols] feature_labels.extend(labels) + if not rename: + for prefixed_col, og_col in zip(labels, og_cols): + rename_mapping[prefixed_col] = og_col + # preprocessing if preprocess: print("\n--------------------------------------------------") @@ -465,8 +484,10 @@ def auto_pysmccnet(X: List[Union[pd.DataFrame, np.ndarray]], Y: Union[pd.DataFra for i in range(len(X)): feature_labels.extend([f"{DataType[i]}_Feat{j+1}" for j in range(X[i].shape[1])]) - Abar = get_abar(Ws_final, feature_label=feature_labels) + if rename_mapping: + feature_labels = [rename_mapping.get(label, label) for label in feature_labels] + Abar = get_abar(Ws_final, feature_label=feature_labels) if not os.path.exists(saving_dir): os.makedirs(saving_dir) diff --git a/docs/source/conf.py b/docs/source/conf.py index 181f17b..7229c0e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,7 +12,7 @@ try: release = metadata.version("bioneuralnet") except metadata.PackageNotFoundError: - release = "1.3.0" + release = "1.3.1" project = "BioNeuralNet" version = release diff --git a/setup.cfg b/setup.cfg index 8d9f9e8..883d1b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = bioneuralnet -version = 1.3.0 +version = 1.3.1 author = Vicente Ramos author_email = vicente.ramos@ucdenver.edu description = A Graph Neural Network based Multi-Omics Network Data Analysis Tool