diff --git a/.Rbuildignore b/.Rbuildignore
index 39e8c0c..d713df3 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -19,3 +19,6 @@
 ^\\.github$
 ^\\.git$
 ^codecov\.yml$
+^vignettes/articles$
+^data_aux$ 
+^python(/.*)?$
\ No newline at end of file
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 115be99..423dbfc 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -2,8 +2,14 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main, master]
+    branches: [main, default]
   pull_request:
+    branches: [main, default]
+  release:
+    types: [published]
+    branches: [main, default]
+  workflow_dispatch:
+    branches: [main, default]
 
 name: R-CMD-check.yaml
 
@@ -48,6 +54,7 @@ jobs:
         with:
           upload-snapshots: true
           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
+          error-on: '"error"'
 
       - name: Test coverage
         if: runner.os == 'Linux' && matrix.config.r == 'release'
diff --git a/.github/workflows/Rminversion.yaml b/.github/workflows/Rminversion.yaml
deleted file mode 100644
index 5b2efde..0000000
--- a/.github/workflows/Rminversion.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-name: Test minimal R version
-
-on:
-  push:
-    branches: [main, master]
-  pull_request:
-    branches: [main, master]
-
-jobs:
-  r-cmd-check:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        r-version: ['3.5.0', '3.6.0', '4.0.0', '4.1.0', '4.2.0', '4.3.0', '4.4.0','4.5.0']
-
-    name: R ${{ matrix.r-version }} on ${{ matrix.os }}
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up R ${{ matrix.r-version }}
-        uses: r-lib/actions/setup-r@v2
-        with:
-          r-version: ${{ matrix.r-version }}
-
-      - name: Install package dependencies
-        uses: r-lib/actions/setup-r-dependencies@v2
-        with:
-          extra-packages: rcmdcheck
-          needs: check
-
-      - name: Run R CMD check
-        run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
-        shell: Rscript {0}
diff --git a/.github/workflows/bioc-check.yml b/.github/workflows/bioc-check.yml
index a9b7fda..b2c11bc 100644
--- a/.github/workflows/bioc-check.yml
+++ b/.github/workflows/bioc-check.yml
@@ -2,9 +2,14 @@ name: Bioconductor Check
 
 on:
   push:
-    branches: [main, master]
+    branches: [main, default]
   pull_request:
-    branches: [main, master]
+    branches: [main, default]
+  release:
+    types: [published]
+    branches: [main, default]
+  workflow_dispatch:
+    branches: [main, default]
 
 jobs:
   bioccheck:
@@ -13,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        r-version: ['4.4.0','4.5.0']
+        r-version: ['4.5.0']
 
     steps:
       - uses: actions/checkout@v4
@@ -37,4 +42,4 @@ jobs:
 
       - name: Run BiocCheck
         run: BiocCheck::BiocCheck(".", quit_with_status=TRUE)
-        shell: Rscript {0}
\ No newline at end of file
+        shell: Rscript {0}
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index bfc9f4d..bceb80e 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -2,11 +2,14 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main, master]
+    branches: [default]
   pull_request:
+    branches: [default]
   release:
     types: [published]
+    branches: [default]
   workflow_dispatch:
+    branches: [default]
 
 name: pkgdown.yaml
 
diff --git a/.gitignore b/.gitignore
index 953bcfd..32e172a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,11 +9,13 @@
 docs
 renv.lock
 .obsidian/
-/inst/Paper/data/ 
-/inst/Paper/localjobs/ 
 markeR.BiocCheck/ 
 /doc/
 /Meta/
 markeR.Rproj
 .DS_Store
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store 
+inst/doc
+data_aux
+markeR.Rcheck 
+/python/.venv
diff --git a/DESCRIPTION b/DESCRIPTION
index 5475c78..f9c3058 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: markeR
 Title: An R Toolkit for Evaluating Gene Signatures as Phenotypic Markers
-Version: 0.99.2 
+Version: 1.3
 Authors@R: 
     c(
       person("Rita", "Martins-Silva", 
@@ -14,17 +14,18 @@ Authors@R:
                comment=c(ORCID="0000-0002-1215-0538"))
     )
 Description: 
-    markeR provides a suite of methods for using gene sets (signatures) to quantify and evaluate the 
-    extent to which a given gene signature marks a specific phenotype. The package implements various 
-    scoring, enrichment and classification approaches, along with tools to compute 
-    performance metrics and visualize results, making it a valuable resource for transcriptomics research (bulk RNA-seq).
+    markeR is an R package that provides a modular and extensible framework for the systematic evaluation of gene sets as 
+    phenotypic markers using transcriptomic data. The package is designed to support both quantitative analyses and visual exploration of 
+    gene set behaviour across experimental and clinical phenotypes. It implements multiple methods, including score-based and enrichment 
+    approaches, and also allows the exploration of expression behaviour of individual genes. In addition, users can assess the 
+    similarity of their own gene sets against established collections (e.g., those from MSigDB), facilitating biological interpretation.
 License: Artistic-2.0
 biocViews: GeneExpression, Transcriptomics, Visualization, Software, GeneSetEnrichment, Classification
 Encoding: UTF-8
 Language: en-GB
 LazyData: false
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Additional_repositories: https://bioconductor.org/packages/release/bioc
 Imports:
     circlize,
@@ -46,7 +47,6 @@ Imports:
     limma,
     ggrepel,
     effectsize,
-    patchwork,
     msigdbr,
     tibble
 Suggests: 
@@ -59,11 +59,14 @@ Suggests:
     rmarkdown, 
     roxygen2,
     mockery,
-    covr
+    covr,
+    magick,
+    BiocStyle
 Config/testthat/edition: 3
 Depends: 
-    R (>= 3.5.0)
+    R (>= 4.5.0)
 URL: https://diseasetranscriptomicslab.github.io/markeR/,
      https://github.com/DiseaseTranscriptomicsLab/markeR
 BugReports: https://github.com/DiseaseTranscriptomicsLab/markeR/issues
 VignetteBuilder: knitr
+Config/Needs/website: rmarkdown
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 99ef319..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,180 +0,0 @@
-The Artistic License 2.0
-
-Copyright (c) 2000-2006, The Perl Foundation.
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-This license establishes the terms under which a given free software Package
-may be copied, modified, distributed, and/or redistributed. The intent is that
-the Copyright Holder maintains some artistic control over the development of
-that Package while still keeping the Package available as open source and free
-software.
-
-You are always permitted to make arrangements wholly outside of this license
-directly with the Copyright Holder of a given Package. If the terms of this
-license do not permit the full use that you propose to make of the Package,
-you should contact the Copyright Holder and seek a different licensing
-arrangement.
-
-Definitions
-
-"Copyright Holder" means the individual(s) or organization(s) named in the
-copyright notice for the entire Package.
-
-"Contributor" means any party that has contributed code or other material to
-the Package, in accordance with the Copyright Holder&apos;s procedures.
-
-"You" and "your" means any person who would like to copy, distribute, or
-modify the Package.
-
-"Package" means the collection of files distributed by the Copyright Holder,
-and derivatives of that collection and/or of those files. A given Package may
-consist of either the Standard Version, or a Modified Version.
-
-"Distribute" means providing a copy of the Package or making it accessible to
-anyone else, or in the case of a company or organization, to others outside of
-your company or organization.
-
-"Distributor Fee" means any fee that you charge for Distributing this Package
-or providing support for this Package to another party. It does not mean
-licensing fees.
-
-"Standard Version" refers to the Package if it has not been modified, or has
-been modified only in ways explicitly requested by the Copyright Holder.
-
-"Modified Version" means the Package, if it has been changed, and such changes
-were not explicitly requested by the Copyright Holder.
-
-"Original License" means this Artistic License as Distributed with the
-Standard Version of the Package, in its current version or as it may be
-modified by The Perl Foundation in the future.
-
-"Source" form means the source code, documentation source, and configuration
-files for the Package.
-
-"Compiled" form means the compiled bytecode, object code, binary, or any other
-form resulting from mechanical transformation or translation of the Source
-form.
-
-Permission for Use and Modification Without Distribution
-
-(1) You are permitted to use the Standard Version and create and use Modified
-Versions for any purpose without restriction, provided that you do not
-Distribute the Modified Version.
-
-Permissions for Redistribution of the Standard Version
-
-(2) You may Distribute verbatim copies of the Source form of the Standard
-Version of this Package in any medium without restriction, either gratis or
-for a Distributor Fee, provided that you duplicate all of the original
-copyright notices and associated disclaimers. At your discretion, such
-verbatim copies may or may not include a Compiled form of the Package.
-
-(3) You may apply any bug fixes, portability changes, and other modifications
-made available from the Copyright Holder. The resulting Package will still be
-considered the Standard Version, and as such will be subject to the Original
-License.
-
-Distribution of Modified Versions of the Package as Source
-
-(4) You may Distribute your Modified Version as Source (either gratis or for a
-Distributor Fee, and with or without a Compiled form of the Modified Version)
-provided that you clearly document how it differs from the Standard Version,
-including, but not limited to, documenting any non-standard features,
-executables, or modules, and provided that you do at least ONE of the
-following:
-
-(a) make the Modified Version available to the Copyright Holder of the
-Standard Version, under the Original License, so that the Copyright Holder may
-include your modifications in the Standard Version.
-
-(b) ensure that installation of your Modified Version does not prevent the
-user installing or running the Standard Version. In addition, the Modified
-Version must bear a name that is different from the name of the Standard
-Version.
-
-(c) allow anyone who receives a copy of the Modified Version to make the
-Source form of the Modified Version available to others under
-
-(i) the Original License or
-
-(ii) a license that permits the licensee to freely copy, modify and
-redistribute the Modified Version using the same licensing terms that apply to
-the copy that the licensee received, and requires that the Source form of the
-Modified Version, and of any works derived from it, be made freely available
-in that license fees are prohibited but Distributor Fees are allowed.
-
-Distribution of Compiled Forms of the Standard Version or Modified Versions
-without the Source
-
-(5) You may Distribute Compiled forms of the Standard Version without the
-Source, provided that you include complete instructions on how to get the
-Source of the Standard Version. Such instructions must be valid at the time of
-your distribution. If these instructions, at any time while you are carrying
-out such distribution, become invalid, you must provide new instructions on
-demand or cease further distribution. If you provide valid instructions or
-cease distribution within thirty days after you become aware that the
-instructions are invalid, then you do not forfeit any of your rights under
-this license.
-
-(6) You may Distribute a Modified Version in Compiled form without the Source,
-provided that you comply with Section 4 with respect to the Source of the
-Modified Version.
-
-Aggregating or Linking the Package
-
-(7) You may aggregate the Package (either the Standard Version or Modified
-Version) with other packages and Distribute the resulting aggregation provided
-that you do not charge a licensing fee for the Package. Distributor Fees are
-permitted, and licensing fees for other components in the aggregation are
-permitted. The terms of this license apply to the use and Distribution of the
-Standard or Modified Versions as included in the aggregation.
-
-(8) You are permitted to link Modified and Standard Versions with other works,
-to embed the Package in a larger work of your own, or to build stand-alone
-binary or bytecode versions of applications that include the Package, and
-Distribute the result without restriction, provided the result does not expose
-a direct interface to the Package.
-
-Items That are Not Considered Part of a Modified Version
-
-(9) Works (including, but not limited to, modules and scripts) that merely
-extend or make use of the Package, do not, by themselves, cause the Package to
-be a Modified Version. In addition, such works are not considered parts of the
-Package itself, and are not subject to the terms of this license.
-
-General Provisions
-
-(10) Any use, modification, and distribution of the Standard or Modified
-Versions is governed by this Artistic License. By using, modifying or
-distributing the Package, you accept this license. Do not use, modify, or
-distribute the Package, if you do not accept this license.
-
-(11) If your Modified Version has been derived from a Modified Version made by
-someone other than you, you are nevertheless required to ensure that your
-Modified Version complies with the requirements of this license.
-
-(12) This license does not grant you the right to use any trademark, service
-mark, tradename, or logo of the Copyright Holder.
-
-(13) This license includes the non-exclusive, worldwide, free-of-charge patent
-license to make, have made, use, offer to sell, sell, import and otherwise
-transfer the Package with respect to any patent claims licensable by the
-Copyright Holder that are necessarily infringed by the Package. If you
-institute patent litigation (including a cross-claim or counterclaim) against
-any party alleging that the Package constitutes direct or contributory patent
-infringement, then this Artistic License to you shall terminate on the date
-that such litigation is filed.
-
-(14) Disclaimer of Warranty:
-
-THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS&apos;
-AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE
-DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW,
-NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE
-PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/NAMESPACE b/NAMESPACE
index 70ccfba..45c11f7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,11 +20,7 @@ export(plotNESlollipop)
 export(plotPCA)
 export(plotVolcano)
 export(runGSEA)
-import(RColorBrewer)
-import(fgsea)
 import(ggplot2)
-import(ggpubr)
-import(grid)
 importFrom(ComplexHeatmap,Heatmap)
 importFrom(ComplexHeatmap,HeatmapAnnotation)
 importFrom(ComplexHeatmap,draw)
@@ -33,28 +29,17 @@ importFrom(circlize,colorRamp2)
 importFrom(edgeR,DGEList)
 importFrom(effectsize,eta_squared)
 importFrom(fgsea,fgsea)
+importFrom(fgsea,plotEnrichment)
 importFrom(ggh4x,facet_grid2)
-importFrom(ggplot2,aes)
 importFrom(ggplot2,element_blank)
 importFrom(ggplot2,element_line)
-importFrom(ggplot2,element_rect)
 importFrom(ggplot2,element_text)
-importFrom(ggplot2,facet_wrap)
-importFrom(ggplot2,geom_hline)
-importFrom(ggplot2,geom_line)
-importFrom(ggplot2,geom_point)
 importFrom(ggplot2,geom_text)
 importFrom(ggplot2,geom_tile)
-importFrom(ggplot2,geom_vline)
 importFrom(ggplot2,ggplot)
-importFrom(ggplot2,ggtitle)
 importFrom(ggplot2,labs)
 importFrom(ggplot2,margin)
-importFrom(ggplot2,scale_color_manual)
 importFrom(ggplot2,scale_fill_gradientn)
-importFrom(ggplot2,scale_shape_manual)
-importFrom(ggplot2,theme)
-importFrom(ggplot2,theme_bw)
 importFrom(ggplot2,theme_minimal)
 importFrom(ggpubr,annotate_figure)
 importFrom(ggpubr,ggarrange)
@@ -78,6 +63,7 @@ importFrom(pROC,roc)
 importFrom(reshape2,melt)
 importFrom(rstatix,t_test)
 importFrom(scales,hue_pal)
+importFrom(scales,rescale)
 importFrom(scales,squish)
 importFrom(stats,TukeyHSD)
 importFrom(stats,anova)
diff --git a/NEWS.md b/NEWS.md
index 00b710c..4d66e94 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,69 @@
+# markeR 1.1.2 (12 Mar, 2026) 
+
+## Minor Changes
+- Moved Python bridge scripts from `inst/python/` to a top-level `python/` 
+  directory, as these are supplementary scripts not part of the R package itself.
+- Added `requirements.txt` to the `python/` directory listing all needed 
+  Python dependencies (`rpy2`, `pandas`, `numpy`, and optionally 
+  `ipython` and `jupyter`) for easier environment setup. 
+- Removed redundant code snippets from the Python bridge scripts.
+
+# markeR 1.1.1 (11 Mar, 2026) 
+ 
+  - Added `p.adjust.method` parameter across all functions performing or
+  depending on multiple testing correction, allowing users to specify
+  any correction method supported by `stats::p.adjust()`, beyond the default 
+  Benjamini-Hochberg FDR.
+- Added Python bridge scripts in `inst/python/` for users who wish to call
+  markeR from a Python environment via `rpy2`. Includes a tutorial workflow
+  script and a generic command-line wrapper capable of invoking any exported
+  markeR function. See `inst/python/README.md` for installation and usage.
+
+# markeR 1.0.0 (31 Oct, 2025)
+
+- Official Bioconductor Release.
+
+# markeR 0.99.5 (17 Sep, 2025)
+
+- Minor fix in `.onAttach()` to avoid errors when checking `ggplot2` version and ensure the startup warning works correctly.
+
+# markeR 0.99.4 (17 Sep, 2025)
+
+## General
+- Addressed feedback from the Bioconductor review process with updates to documentation and vignette style.  
+
+## Documentation and vignette
+- Updated vignette style to **Bioconductor’s BiocStyle** with automatic table of contents.  
+- Improved vignette content with small corrections.
+- Revised dataset documentation by adding explicit `usage: data(object)` entries.  
+
+## Functions
+- Updated `geneset_similarity()` color handling: replaced the single `color_values` parameter with three new parameters — `color`, `neutral_color`, and `cold_color`, for more interpretable visualization.  
+
+# markeR 0.99.3 (21 Aug, 2025)
+
+## Package size and structure
+- Reduced package size below the 5 MB limit by converting long vignettes into `pkgdown` articles and keeping only a shorter vignette in the package.
+- Moved `inst/Paper` to a dedicated `paper` branch for better repository organization.
+- Removed unnecessary `LICENSE` file (already declared in `DESCRIPTION`).
+
+## Documentation
+- Added a concise main vignette (`markeR`) with installation, introduction, and a basic workflow.
+- Converted three longer vignettes into `pkgdown` articles (linked at the end of the main vignette).
+- Added runnable examples for `VariableAssociation`. 
+
+## NAMESPACE and dependencies
+- Replaced broad imports with `importFrom()` for most packages (except `ggplot2`, retained as full import).
+- Removed unused `patchwork` import.
+- Added missing imports from `stats` and `grDevices` to resolve `R CMD check` notes.
+
+## Code quality
+- Replaced all `sapply()` calls with `vapply()`.
+- Replaced `1:...` usage with `seq_len()` or `seq_along()`.
+- Standardized assignment to `<-` instead of `=`.
+- Fixed some redundant `stop()`/`warning()` conditions to provide clearer input validation.
+- Addressed “no visible binding” notes by using `.data$` or `utils::globalVariables()`.
+
 # markeR 0.99.2 (23 Jul, 2025)
 
 * Minor fixes in documentation
diff --git a/R/CalculateScores.R b/R/CalculateScores.R
index 5aae247..88df3b6 100644
--- a/R/CalculateScores.R
+++ b/R/CalculateScores.R
@@ -113,11 +113,12 @@ CalculateScores <- function(data, metadata, gene_sets,
                             method = c("ssGSEA", "logmedian","ranking", "all")) {
   data <- as.data.frame(data) # Ensure data is a data frame
   method <- match.arg(method)  # Validate method input
-
-  if (!is.data.frame(data)) stop("Error: data must be a data-frame")
-  if (!is.null(metadata) && !is.data.frame(metadata)) stop(
-    "Error: metadata must be a data-frame")
-  if (!is.list(gene_sets)) stop("Error: gene_sets must be a list")
+ 
+  
+  if (!is.data.frame(data) || (!is.null(metadata) && !is.data.frame(metadata)) || !is.list(gene_sets)) {
+    stop("Invalid input: 'data' must be a data-frame, 'metadata' (if provided) must be a data-frame, and 'gene_sets' must be a list.")
+  }
+  
 
   # Change first column name to default name "sample", for merging purposes
   if (!is.null(metadata)) colnames(metadata)[1] <- "sample"
diff --git a/R/CalculateScores_Ranking.R b/R/CalculateScores_Ranking.R
index 3e242e4..cc7f1fb 100644
--- a/R/CalculateScores_Ranking.R
+++ b/R/CalculateScores_Ranking.R
@@ -52,11 +52,19 @@
 CalculateScores_Ranking <- function(data, metadata = NULL, gene_sets) {
   data <- as.data.frame(data) # Ensure data is a data frame
   ResultsList <- list()
-
-  if (!is.data.frame(data)) stop("Error: data must be a data frame")
-  if (!is.null(metadata) && !is.data.frame(metadata)) stop(
-    "Error: metadata must be a data frame")
-  if (!is.list(gene_sets)) stop("Error: gene_sets must be a list")
+ 
+  
+  if (!is.data.frame(data) || (!is.null(metadata) && !is.data.frame(metadata)) || !is.list(gene_sets)) {
+    stop(
+      paste(
+        if (!is.data.frame(data)) "Error: 'data' must be a data frame." else NULL,
+        if (!is.null(metadata) && !is.data.frame(metadata)) "Error: 'metadata' must be a data frame." else NULL,
+        if (!is.list(gene_sets)) "Error: 'gene_sets' must be a list." else NULL,
+        collapse = " "
+      )
+    )
+  }
+  
 
   # Change first column name to default name "sample" for merging purposes
   if (!is.null(metadata)) colnames(metadata)[1] <- "sample"
@@ -78,12 +86,16 @@ CalculateScores_Ranking <- function(data, metadata = NULL, gene_sets) {
 
         signaturegenes_up <- signature[signature[,2] == 1, 1]
         signaturegenes_down <- signature[signature[,2] == -1, 1]
-
-        # Apply getRanking function to each sample (column)
-        rankings_up <- sapply(colnames(data),
-                              function(sample) getRanking(data, sample, signaturegenes_up))
-        rankings_down <- sapply(colnames(data),
-                                function(sample) getRanking(data, sample, signaturegenes_down))
+ 
+        
+        rankings_up <- vapply(colnames(data),
+                              function(sample) getRanking(data, sample, signaturegenes_up),
+                              numeric(1))
+        
+        rankings_down <- vapply(colnames(data),
+                                function(sample) getRanking(data, sample, signaturegenes_down),
+                                numeric(1))
+        
 
         ranking_final <- (rankings_up - rankings_down) / length(universe_genes)
         ranking_final <- data.frame(sample = colnames(data), score = ranking_final)
@@ -93,11 +105,11 @@ CalculateScores_Ranking <- function(data, metadata = NULL, gene_sets) {
         message(paste0("Considering unidirectional gene signature mode for signature ", sig))
 
         signaturegenes <- signature[, 1]
-
-        # Apply getRanking function to each sample (column)
-        rankings <- sapply(colnames(data),
-                           function(sample) getRanking(data, sample, signaturegenes))
-
+ 
+        rankings <- vapply(colnames(data),
+                           function(sample) getRanking(data, sample, signaturegenes),
+                           numeric(1))
+        
         ranking_final <- rankings / length(universe_genes)
         ranking_final <- data.frame(sample = colnames(data), score = ranking_final)
 
@@ -107,9 +119,10 @@ CalculateScores_Ranking <- function(data, metadata = NULL, gene_sets) {
 
       signaturegenes <- signature
 
-      # Apply getRanking function to each sample (column)
-      rankings <- sapply(colnames(data),
-                         function(sample) getRanking(data, sample, signaturegenes))
+      # Apply getRanking function to each sample (column) 
+      rankings <- vapply(colnames(data),
+                         function(sample) getRanking(data, sample, signaturegenes),
+                         numeric(1))
 
       ranking_final <- rankings / length(universe_genes)
       ranking_final <- data.frame(sample = colnames(data), score = ranking_final)
@@ -169,7 +182,7 @@ getRanking <- function(data, sample, geneset) {
   # Order from least to most expressed
   expressiongene <- expressiongene[order(expressiongene, decreasing = FALSE)]
   ranking <- match(geneset, names(expressiongene))  # Find gene positions in ordered list
-  ranking <- as.vector(na.omit(ranking))  # Remove missing genes
+  ranking <- as.vector(stats::na.omit(ranking))  # Remove missing genes
 
   return(sum(ranking))  # Return sum of ranks
 }
diff --git a/R/CalculateScores_logmedian.R b/R/CalculateScores_logmedian.R
index 6a77d99..b654b51 100644
--- a/R/CalculateScores_logmedian.R
+++ b/R/CalculateScores_logmedian.R
@@ -93,13 +93,13 @@ calculateScore_logmedian_bidirectional <- function(data, signature) {
   signaturegenes_down <- signature[signature[, 2] == -1, ]
 
   # Compute log-median scores for upregulated genes
-  data_subset_up <- na.omit(subset(log2(data + 1), row.names(data) %in% signaturegenes_up[, 1]))
-  data_subset_up <- data_subset_up - apply(data_subset_up, 1, median)
+  data_subset_up <- stats::na.omit(subset(log2(data + 1), row.names(data) %in% signaturegenes_up[, 1]))
+  data_subset_up <- data_subset_up - apply(data_subset_up, 1, stats::median)
   score_up <- colSums(data_subset_up) / nrow(data_subset_up)
 
   # Compute log-median scores for downregulated genes
-  data_subset_down <- na.omit(subset(log2(data + 1), row.names(data) %in% signaturegenes_down[, 1]))
-  data_subset_down <- data_subset_down - apply(data_subset_down, 1, median)
+  data_subset_down <- stats::na.omit(subset(log2(data + 1), row.names(data) %in% signaturegenes_down[, 1]))
+  data_subset_down <- data_subset_down - apply(data_subset_down, 1, stats::median)
   score_down <- colSums(data_subset_down) / nrow(data_subset_down)
 
   score <- score_up - score_down
@@ -125,9 +125,9 @@ calculateScore_logmedian_unidirectional <- function(data, signature) {
   data <- as.data.frame(data) # Ensure data is a data frame
   if (is.data.frame(signature)) signature <- as.vector(signature[, 1])
 
-  data_subset <- na.omit(subset(log2(data + 1), row.names(data) %in% signature))
+  data_subset <- stats::na.omit(subset(log2(data + 1), row.names(data) %in% signature))
   # Center gene in its log2 median
-  data_subset <- data_subset - apply(data_subset, 1, median)
+  data_subset <- data_subset - apply(data_subset, 1, stats::median)
   # Normalize by signature size
   dfScore <- colSums(data_subset) / nrow(data_subset)
   dfScore <- data.frame(sample = names(dfScore), score = dfScore)
diff --git a/R/CalculateScores_ssGSEA.R b/R/CalculateScores_ssGSEA.R
index a192b87..c8e9700 100644
--- a/R/CalculateScores_ssGSEA.R
+++ b/R/CalculateScores_ssGSEA.R
@@ -169,10 +169,11 @@ CalculateScores_ssGSEA_bidirectional <- function(data, signature) {
   ResultsList <- list()
 
   mtx <- log2(data)
-  mtx <- as.matrix(mtx)
-
-  up_genes <- subset(signature, Signal == 1)$Gene
-  down_genes <- subset(signature, Signal == -1)$Gene
+  mtx <- as.matrix(mtx) 
+  
+  up_genes <- signature[signature[["Signal"]] == 1, "Gene"]
+  down_genes <- signature[signature[["Signal"]] == -1, "Gene"]
+  
 
   ################## ssGSEA for UP genes ##################
 
diff --git a/R/CohenD_IndividualGenes.R b/R/CohenD_IndividualGenes.R
index 4589ea0..4b0d93d 100644
--- a/R/CohenD_IndividualGenes.R
+++ b/R/CohenD_IndividualGenes.R
@@ -166,8 +166,8 @@ CohenD_IndividualGenes <- function(data, metadata,
     if(n1 < 2 || n2 < 2) return(NA)
     m1 <- mean(x)
     m2 <- mean(y)
-    s1 <- sd(x)
-    s2 <- sd(y)
+    s1 <- stats::sd(x)
+    s2 <- stats::sd(y)
     pooled_sd <- sqrt(((n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2))
     if (pooled_sd == 0) return(NA)
     d <- (m1 - m2) / pooled_sd
@@ -205,8 +205,8 @@ CohenD_IndividualGenes <- function(data, metadata,
     if (is.null(title)) title <- paste("Cohen's d for variable", condition_var,
                                        "(", paste(class, collapse = ", "), " vs others)")
 
-    barplot <- ggplot2::ggplot(effect_values, ggplot2::aes(y = reorder(Gene, CohensD),
-                                                           x = CohensD)) +
+    barplot <- ggplot2::ggplot(effect_values, ggplot2::aes(y = stats::reorder(.data$Gene, .data$CohensD),
+                                                           x = .data$CohensD)) +
       ggplot2::geom_bar(stat = "identity", fill = fillcolor) +
       #ggplot2::coord_flip()  +
       coord_cartesian(xlim = c(0, max(effect_values$CohensD) + 0.1))+
diff --git a/R/CorrelationHeatmap.R b/R/CorrelationHeatmap.R
index b103512..82e85b5 100644
--- a/R/CorrelationHeatmap.R
+++ b/R/CorrelationHeatmap.R
@@ -138,7 +138,7 @@ CorrelationHeatmap <- function(data, metadata = NULL, genes, separate.by = NULL,
 
   # Subset data to selected genes
   #data <- data[rownames(data) %in% genes, , drop = FALSE]
-  data <- na.omit(as.data.frame(data[genes,])) # to keep input order
+  data <- stats::na.omit(as.data.frame(data[genes,])) # to keep input order
 
   if (!is.null(separate.by) && is.null(metadata)) {
     stop("separate.by is not NULL but metadata is missing. Please specify metadata.")
diff --git a/R/FPR_Simulation.R b/R/FPR_Simulation.R
index e22566e..126e44a 100644
--- a/R/FPR_Simulation.R
+++ b/R/FPR_Simulation.R
@@ -1,3 +1,4 @@
+utils::globalVariables(c( "cohen", "method", "contrast" ))
 #' FPR Simulation Plot
 #'
 #' This function simulates false positive rates (FPR) by generating simulated
@@ -43,7 +44,11 @@
 #'   grid layout. If `NULL`, layout is auto-calculated.
 #' @param nrow Integer. Number of rows for arranging signature plots in a grid
 #'   layout. If `NULL`, layout is auto-calculated.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
 #'
 #' @return Invisibly returns a list containing:
 #'   \describe{
@@ -103,14 +108,14 @@
 #' )
 #'
 #' @import ggplot2
-#' @import ggpubr
+#' @importFrom ggpubr ggarrange annotate_figure
 #' @export
 #'
 FPR_Simulation <- function(data, metadata, original_signatures, Variable,
-                           gene_list = NULL, number_of_sims=10, title=NULL,
+                           gene_list = NULL, number_of_sims=100, title=NULL,
                            widthTitle = 30, titlesize = 12,  pointSize = 2,
                            labsize = 10,mode = c( "none","simple","medium","extensive"),
-                           ColorValues=NULL, ncol=NULL, nrow=NULL) {
+                           ColorValues=NULL, ncol=NULL, nrow=NULL, p.adjust.method="BH") {
   data <- as.data.frame(data) # Ensure data is a data frame
   if (is.null(gene_list)) gene_list <- row.names(data)
 
@@ -127,7 +132,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
       results <- suppressMessages(CohenF_allConditions(data = data,
                                                        metadata = metadata,
                                                        gene_sets = original_signatures,
-                                                       variable = Variable ))
+                                                       variable = Variable, p.adjust.method = p.adjust.method ))
       cohentype <- "f"
 
     } else {
@@ -137,7 +142,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
         results <- suppressMessages(CohenF_allConditions(data = data,
                                                          metadata = metadata,
                                                          gene_sets = original_signatures,
-                                                         variable = Variable ))
+                                                         variable = Variable, p.adjust.method = p.adjust.method ))
         cohentype <- "f"
 
       } else {
@@ -145,7 +150,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
         results <- suppressMessages(CohenD_allConditions(data = data,
                                                          metadata = metadata,
                                                          gene_sets = original_signatures,
-                                                         variable = Variable, mode = mode))
+                                                         variable = Variable, mode = mode, p.adjust.method = p.adjust.method))
         cohentype <- "d"
 
       }
@@ -204,7 +209,8 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
 
     # Generate simulated signatures based on the current signature
     simulatedsigs <- list()
-    for (sim in 1:number_of_sims) {
+#    for (sim in 1:number_of_sims) {
+    for (sim in seq_len(number_of_sims)) {
       cur_model_sig <- cur_sig  # copy the current signature
       cur_model_sig$Gene <- sample(gene_list, nrow(cur_sig))  # simulate by sampling genes
       simulatedsigs[[paste0("sim", sim)]] <- cur_model_sig
@@ -217,14 +223,14 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
         metadata = metadata,
         gene_sets = simulatedsigs,
         variable = Variable,
-        mode = mode
+        mode = mode, p.adjust.method = p.adjust.method
       ))
     } else {
       results2 <- suppressMessages(CohenF_allConditions(
         data = data,
         metadata = metadata,
         gene_sets = simulatedsigs,
-        variable = Variable
+        variable = Variable, p.adjust.method = p.adjust.method
       ))
     }
 
@@ -239,7 +245,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
       } else if (cohentype=="f"){
         cohen_mat <- sig_data$CohenF
       } else {
-        stop("Error: results2 format not valid.")
+        stop("Error: results format not valid.")
       }
 
       padj_mat <- sig_data$padj
@@ -272,35 +278,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
 
     # needed to define the quantile dashed lines
     final_df$method <- factor(final_df$method, levels = methods)
-
-    #
-    # # Restructure simulation results into a list (one data frame per method)
-    # restructured <- lapply(methods, function(m) {
-    #   data.frame(
-    #     CohensD = sapply(results2, function(sim) sim$CohenD[m, 1]),
-    #     Pval    = sapply(results2, function(sim) sim$PValue[m, 1])
-    #   )
-    # })
-    # names(restructured) <- methods
-
-    # Combine simulation data into one long-format data frame
-    # sim_data <- do.call(rbind, lapply(methods, function(m) {
-    #   df <- restructured[[m]]
-    #   df$Method <- m
-    #   df$Shape <- ifelse(df$Pval < 0.05, "Significant", "Not Significant")
-    #   df
-    # }))
-    # # Set Method as a factor to control order in the plot
-    # sim_data$Method <- factor(sim_data$Method, levels = methods)
-
-    # Compute only the 95th percentile (top 5% threshold) for each method
-    # q_data <- do.call(rbind, lapply(methods, function(m) {
-    #   cd_vals <- final_df_simulated$cohen[final_df_simulated$method == m]
-    #   data.frame(
-    #     Method = m,
-    #     q_high = as.numeric(quantile(cd_vals, 0.95, na.rm = TRUE))
-    #   )
-    # }))
+ 
 
     # Calculate FPR for each Original observation
     final_df$FPR <- NA
@@ -322,7 +300,7 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
       for (mt in methods) {
         subset_df <- final_df[final_df$method == mt & final_df$contrast == ct, ]
         if (nrow(subset_df) == 0) next
-        q95 <- quantile(subset_df$cohen, 0.95, na.rm = TRUE)
+        q95 <- stats::quantile(subset_df$cohen, 0.95, na.rm = TRUE)
         xpos <- which(methods == mt)
         q_data <- rbind(q_data, data.frame(
           method = mt, contrast = ct, q_high = q95,
@@ -334,8 +312,9 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
 
     # Ensuring the label is always on top
     # Compute max cohen per method + contrast across both Simulated and Original
-    all_max <- aggregate(cohen ~ method + contrast, data = final_df, FUN = max)
+     all_max <- stats::aggregate(cohen ~ method + contrast, data = final_df, FUN = max)
 
+    
     # Extract FPR values from Original rows
     original_df <- final_df[final_df$type == "Original", ]
 
@@ -359,19 +338,19 @@ FPR_Simulation <- function(data, metadata, original_signatures, Variable,
     # Build the plot for the current signature
     p <- ggplot2::ggplot() +
       geom_jitter(data = final_df[final_df$type == "Simulated",],
-                  aes(y = cohen, x = method, color = type),
+                  aes(y = .data$cohen, x = .data$method, color = .data$type),
                   width = 0.3, height = 0,size = pointSize, alpha = 0.5) +
-      geom_violin(data = final_df, aes(y = cohen, x = method),
+      geom_violin(data = final_df, aes(y = .data$cohen, x = .data$method),
                   fill = "#F0F0F0", color = "black", alpha = 0.5) +
       geom_jitter(data = final_df[final_df$type == "Original",],
                   aes(y = cohen, x = method, color = type),
                   width = 0.3, height = 0, size = pointSize, alpha = 1) +
       geom_text(data = all_max,
-                aes(x = method, y = y, label = label),
+                aes(x = .data$method, y = .data$y, label = .data$label),
                 size = 3,
                 inherit.aes = FALSE) +
       geom_segment(data = q_data,
-                   aes(x = xmin, xend = xmax, y = q_high, yend = q_high),
+                   aes(x = .data$xmin, xend = .data$xmax, y = .data$q_high, yend = .data$q_high),
                    linetype = "dashed", color = "red", inherit.aes = FALSE) +
       labs(title = wrap_title(sig, widthTitle),
            y = ifelse(cohentype == "d", "|Cohen's d|", "|Cohen's f|"),
diff --git a/R/GSEA_VariableAssociation.R b/R/GSEA_VariableAssociation.R
index d4603da..159d815 100644
--- a/R/GSEA_VariableAssociation.R
+++ b/R/GSEA_VariableAssociation.R
@@ -57,55 +57,17 @@
 #'   removed before analysis, leading to a loss of data to be fitted in the
 #'   model.
 #' @param printplt Boolean specifying if plot is to be printed. Default: `TRUE`.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A list with two elements:
 #'   - `data`: A data frame containing the GSEA results, including normalized
 #'   enrichment scores (NES), adjusted p-values, and contrasts.
 #'   - `plot`: A ggplot2 object visualizing the GSEA results as a lollipop plot.
-#'
-#'
-#' @examples
-#' # Simulate gene expression data (genes as rows, samples as columns)
-#' set.seed(42)
-#' expr <- as.data.frame(matrix(rnorm(500), nrow = 50, ncol = 10))
-#' rownames(expr) <- paste0("Gene", 1:50)
-#' colnames(expr) <- paste0("Sample", 1:10)
-#'
-#' # Simulate metadata (categorical and continuous)
-#' metadata <- data.frame(
-#'   sampleID = paste0("Sample", 1:10),
-#'   Group = rep(c("A", "B"), each = 5),
-#'   Age = sample(20:60, 10),
-#'   row.names = colnames(expr)
-#' )
-#'
-#' # Define a toy gene set: one gene set only for discovery mode!
-#' gene_set <- list(
-#'   Signature1 = paste0("Gene", 1:10)
-#' )
-#'
-#' # Score-based association (e.g., logmedian)
-#' res_score <- VariableAssociation(
-#'   method = "logmedian",
-#'   data = expr,
-#'   metadata = metadata,
-#'   cols = c("Group", "Age"),
-#'   gene_set = gene_set
-#' )
-#' print(res_score$Overall)
-#' print(res_score$plot)
-#'
-#' # GSEA-based association (if GSEA_VariableAssociation is available)
-#' # res_gsea <- VariableAssociation(
-#' #   method = "GSEA",
-#' #   data = expr,
-#' #   metadata = metadata,
-#' #   cols = "Group",
-#' #   gene_set = gene_set
-#' # )
-#' # print(res_gsea$data)
-#' print(res_score$plot)
-#'
+#' 
 #' @keywords internal
 GSEA_VariableAssociation <- function(data, metadata, cols, stat=NULL,
                                      mode=c("simple","medium","extensive"),
@@ -113,7 +75,8 @@ GSEA_VariableAssociation <- function(data, metadata, cols, stat=NULL,
                                      signif_color = "red", saturation_value=NULL,
                                      sig_threshold = 0.05, widthlabels=18,
                                      labsize=10, titlesize=14, pointSize=5,
-                                     ignore_NAs = FALSE, printplt =TRUE) {
+                                     ignore_NAs = FALSE, printplt =TRUE,
+                                     p.adjust.method = "BH") {
   data <- as.data.frame(data) # Ensure data is a data frame
   mode <- match.arg(mode)
   metadata <- metadata[, cols %in% colnames(metadata), drop = FALSE]
@@ -139,7 +102,7 @@ GSEA_VariableAssociation <- function(data, metadata, cols, stat=NULL,
     if (variable_types[var] == "Numeric") {
 
       # Use a model matrix for continuous variables
-      design <- model.matrix(as.formula(paste("~1+", var)), data = metadata)
+      design <- stats::model.matrix(as.formula(paste("~1+", var)), data = metadata)
 
       DEGs_var <- calculateDE(data = data, metadata = metadata,
                               modelmat =  design, contrasts = c(var),
@@ -172,15 +135,19 @@ GSEA_VariableAssociation <- function(data, metadata, cols, stat=NULL,
   combined_results$Contrast <- cont_vec
 
   # correct adjusted p value to correct for multiple testing for the contrasts?
-  combined_results$padj <- p.adjust(combined_results$padj, method = "BH")
+  combined_results$padj <- stats::p.adjust(combined_results$padj, method = p.adjust.method)
 
 
   combined_results_toreturn <- combined_results
 
 
   # Ensure contrast ordering
-  combined_results$Contrast <- sapply(combined_results$Contrast,
-                                      function(x) wrap_title(x, widthlabels))
+  # combined_results$Contrast <- sapply(combined_results$Contrast,
+  #                                     function(x) wrap_title(x, widthlabels))
+  combined_results$Contrast <- vapply(combined_results$Contrast,
+                                      function(x) wrap_title(x, widthlabels),
+                                      FUN.VALUE = character(1))
+  
   combined_results$Contrast <- factor(combined_results$Contrast,
                                       levels = combined_results$Contrast[order(combined_results$NES)])
 
@@ -196,17 +163,17 @@ GSEA_VariableAssociation <- function(data, metadata, cols, stat=NULL,
   }
 
 
-  plot <- ggplot2::ggplot(combined_results, ggplot2::aes(x = NES, y = Contrast,
-                                                         fill = -log10(padj))) +
+  plot <- ggplot2::ggplot(combined_results, ggplot2::aes(x = .data$NES, y = .data$Contrast,
+                                                         fill = -log10(.data$padj))) +
     ggplot2::geom_segment(ggplot2::aes(
-      yend = Contrast,
+      yend = .data$Contrast,
       xend = 0,
-      linetype = ifelse(stat_used == "B" & NES < 0, "dashed", "solid"),
-      color = ifelse(stat_used == "B" & NES < 0, "grey", "black")
+      linetype = ifelse(.data$stat_used == "B" & .data$NES < 0, "dashed", "solid"),
+      color = ifelse(.data$stat_used == "B" & .data$NES < 0, "grey", "black")
     ), size = .5) +
     ggplot2::geom_point(ggplot2::aes(
       stroke = 1.2,
-      color = ifelse(stat_used == "B" & NES < 0, "grey", "black")
+      color = ifelse(.data$stat_used == "B" & .data$NES < 0, "grey", "black")
     ), shape = 21, size = pointSize) +
      ggplot2::scale_fill_gradient2(low = nonsignif_color,
                                   mid = nonsignif_color,
@@ -309,7 +276,8 @@ generate_all_contrasts <- function(levels, mode = "simple") {
 
   # 3. Groupwise comparisons (extensive mode)
   group_contrasts <- c()
-  for (i in 1:(n-1)) {
+ # for (i in 1:(n-1)) {
+  for (i in seq_len(max(0, n - 1))) {
     left_groups <- combn(levels, i, simplify = FALSE)  # Subsets for the first group
     for (left in left_groups) {
       right <- setdiff(levels, left)  # Remaining elements for the second group
diff --git a/R/Heatmap_Cohen.R b/R/Heatmap_Cohen.R
index 03840c9..89aac9d 100644
--- a/R/Heatmap_Cohen.R
+++ b/R/Heatmap_Cohen.R
@@ -33,6 +33,7 @@
 #' @param ColorValues A character vector specifying the colors for the gradient
 #'   fill in the heatmaps. Default is \code{c("#F9F4AE", "#B44141")}.
 #' @param title Title for the grid of plots.
+#' 
 #' @return A list with two elements:
 #' \describe{
 #'   \item{plt}{A combined heatmap arranged in a grid using \code{ggpubr::ggarrange}.}
@@ -55,14 +56,13 @@
 #' @seealso \code{\link{CohenD_allConditions}},
 #'   \code{\link{CohenF_allConditions}}
 #'
-#' @importFrom ggplot2 ggplot geom_tile geom_text labs scale_fill_gradientn
-#'   theme_minimal element_text element_blank element_line margin
+#' @import ggplot2 
 #' @importFrom ggpubr ggarrange
 #'
 #' @keywords internal
 Heatmap_Cohen <- function(cohenlist, nrow = NULL, ncol = NULL, limits = NULL,
                           widthTitle = 22, titlesize = 12, ColorValues = NULL,
-                          title=NULL ) {
+                          title=NULL) {
 
   cohentype <- ifelse("CohenD" %in% names(cohenlist[[1]]), "d",
                       ifelse("CohenF" %in% names(cohenlist[[1]]), "f", NULL))
@@ -112,9 +112,9 @@ Heatmap_Cohen <- function(cohenlist, nrow = NULL, ncol = NULL, limits = NULL,
     limits <- if (is.null(limits)) c(0 , max(long_data$Cohen, na.rm = TRUE)) else limits
 
     # Create heatmap using ggplot2
-    p <- ggplot2::ggplot(long_data, ggplot2::aes(x = Var2, y = Var1, fill = Cohen)) +
+    p <- ggplot2::ggplot(long_data, ggplot2::aes(x = .data$Var2, y = .data$Var1, fill = .data$Cohen)) +
       ggplot2::geom_tile() +
-      ggplot2::geom_text(aes(label = label), color = "black", size = 3) +
+      ggplot2::geom_text(aes(label = .data$label), color = "black", size = 3) +
       ggplot2::scale_fill_gradientn(colors = ColorValues, limits = limits) +
       ggplot2::labs(title = signature_title, x = NULL, y = NULL, fill =
                       ifelse(cohentype=="d", "|Cohen\'s d|", "|Cohen\'s f|")) +
@@ -202,7 +202,12 @@ Heatmap_Cohen <- function(cohenlist, nrow = NULL, ncol = NULL, limits = NULL,
 #' groups.
 #' - `"extensive"`: All possible groupwise contrasts, ensuring balance in the
 #' number of terms on each side.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A named list where each element corresponds to a gene signature. Each
 #'   signature element is a list with three components:
 #' \describe{
@@ -228,7 +233,7 @@ Heatmap_Cohen <- function(cohenlist, nrow = NULL, ncol = NULL, limits = NULL,
 #'
 #' @keywords internal
 CohenD_allConditions <- function(data, metadata, gene_sets, variable,
-                                 mode = c("simple","medium","extensive")) {
+                                 mode = c("simple","medium","extensive"), p.adjust.method = "BH") {
   data <- as.data.frame(data) # Ensure data is a data frame
   # Step 1: Check if variable exists in metadata
   if (!variable %in% colnames(metadata)) {
@@ -266,8 +271,8 @@ CohenD_allConditions <- function(data, metadata, gene_sets, variable,
                                        quantitative_var = "score", mode=mode)
 
       # Convert to named vectors (column names = comparisons)
-      cohen_d_results[[method]] <- setNames(cohen_results$CohenD, cohen_results$contrast)
-      p_value_results[[method]] <- setNames(cohen_results$PValue, cohen_results$contrast)
+      cohen_d_results[[method]] <- stats::setNames(cohen_results$CohenD, cohen_results$contrast)
+      p_value_results[[method]] <- stats::setNames(cohen_results$PValue, cohen_results$contrast)
     }
 
     # Convert lists to data frames
@@ -293,8 +298,8 @@ CohenD_allConditions <- function(data, metadata, gene_sets, variable,
     }
   }
 
-  # Step 2: Apply BH correction within each method
-  all_padj <- lapply(all_pvalues, function(pvals) p.adjust(pvals, method = "BH"))
+  # Step 2: Apply  correction within each method
+  all_padj <- lapply(all_pvalues, function(pvals) stats::p.adjust(pvals, method = p.adjust.method))
 
   # Step 3: Store corrected p-values back into result_list
   index_tracker <- list()  # Track index position for each method
@@ -345,8 +350,8 @@ cohen_d <- function(x, y) {
   if(n1 < 2 || n2 < 2) return(NA)
   m1 <- mean(x)
   m2 <- mean(y)
-  s1 <- sd(x)
-  s2 <- sd(y)
+  s1 <- stats::sd(x)
+  s2 <- stats::sd(y)
   pooled_sd <- sqrt(((n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2))
   if (pooled_sd == 0) return(NA)
   d <- (m1 - m2) / pooled_sd
@@ -403,9 +408,7 @@ compute_cohen_d <- function(dfScore, variable, quantitative_var="score",
     dfScore_subset <- create_contrast_column(dfScore, variable, pair)
     group1 <- levels(dfScore_subset$cohentest)[1]
     group2 <- levels(dfScore_subset$cohentest)[2]
-
-    # group1 <- unique(dfScore_subset$cohentest)[1]
-    # group2 <- unique(dfScore_subset$cohentest)[2]
+ 
 
     x <- dfScore_subset[dfScore_subset[["cohentest"]] == group1,
                         quantitative_var, drop = TRUE]
@@ -498,6 +501,12 @@ flatten_results <- function(nested_list) {
 #'   downregulated).
 #' @param variable A string specifying the categorical variable in
 #'   \code{metadata} used to model the gene signature scores.
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A named list where each element corresponds to a gene signature. Each
 #'   signature element is a list with three components:
 #' \describe{
@@ -511,7 +520,7 @@ flatten_results <- function(nested_list) {
 #' }
 #'
 #' @keywords internal
-CohenF_allConditions <- function(data, metadata, gene_sets, variable ) {
+CohenF_allConditions <- function(data, metadata, gene_sets, variable, p.adjust.method = "BH" ) {
   data <- as.data.frame(data) # Ensure data is a data frame
   # Step 1: Check if variable exists in metadata
   if (!variable %in% colnames(metadata)) {
@@ -555,8 +564,8 @@ CohenF_allConditions <- function(data, metadata, gene_sets, variable ) {
       results_var <- compute_cohens_f_pval(model, type)
 
       # Convert to named vectors (column names = comparisons)
-      cohen_f_results[[method]] <- setNames(results_var["Cohen_f"], variable)
-      p_value_results[[method]] <- setNames(results_var["P_Value"], variable)
+      cohen_f_results[[method]] <- stats::setNames(results_var["Cohen_f"], variable)
+      p_value_results[[method]] <- stats::setNames(results_var["P_Value"], variable)
     }
 
     # Convert lists to data frames
@@ -582,8 +591,8 @@ CohenF_allConditions <- function(data, metadata, gene_sets, variable ) {
     }
   }
 
-  # Step 2: Apply BH correction within each method
-  all_padj <- lapply(all_pvalues, function(pvals) p.adjust(pvals, method = "BH"))
+  # Step 2: Apply   correction within each method
+  all_padj <- lapply(all_pvalues, function(pvals) stats::p.adjust(pvals, method = p.adjust.method))
 
   # Step 3: Store corrected p-values back into result_list
   index_tracker <- list()  # Track index position for each method
diff --git a/R/IndividualGenes_Violins.R b/R/IndividualGenes_Violins.R
index 19186ca..13b7c70 100644
--- a/R/IndividualGenes_Violins.R
+++ b/R/IndividualGenes_Violins.R
@@ -149,7 +149,7 @@ IndividualGenes_Violins <- function(data, metadata=NULL, genes,GroupingVariable,
   plt <- plt + ggplot2::geom_violin(alpha=0.4)
 
   # Add median summary crossbar.
-  plt <- plt + ggplot2::stat_summary(fun = median, fun.min = median, fun.max = median,
+  plt <- plt + ggplot2::stat_summary(fun = stats::median, fun.min = stats::median, fun.max = stats::median,
                                  geom = "crossbar", width = 0.25,
                                  position = ggplot2::position_dodge(width = 0.13))
 
diff --git a/R/PlotScores.R b/R/PlotScores.R
index cc8ed03..0dc36cf 100644
--- a/R/PlotScores.R
+++ b/R/PlotScores.R
@@ -1,3 +1,5 @@
+utils::globalVariables(c("score"))
+
 #' Plot gene signature scores using various methods.
 #'
 #' Computes and visualizes gene signature scores using one or more methods,
@@ -112,7 +114,12 @@
 #' @param cor Correlation method for numeric variables. One of `"pearson"`
 #'   (default), `"spearman"`, or `"kendall"`. Only applies when the variable is
 #'   numeric and `method != "all"`.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. Only if `method == "all"`.
+#'   
 #' @return Depending on `method`:
 #'
 #'   If `method = "all"`, returns a list with `heatmap` and `volcano` ggplot objects.
@@ -239,7 +246,8 @@ PlotScores <- function(data, metadata, gene_sets,
                        cond_cohend = NULL, pvalcalc = FALSE,
                        mode = c("simple","medium","extensive"),
                        widthlegend=22, sig_threshold=0.05, cohen_threshold=0.5,
-                       colorPalette="Set3", cor=c("pearson","spearman","kendall")) {
+                       colorPalette="Set3", cor=c("pearson","spearman","kendall"),
+                       p.adjust.method="BH") {
   data <- as.data.frame(data) # Ensure data is a data frame
   method <- match.arg(method)
   mode <- match.arg(mode)
@@ -252,13 +260,13 @@ PlotScores <- function(data, metadata, gene_sets,
     if (type =="Numeric"){
 
       cohenlist <- CohenF_allConditions(data = data, metadata = metadata,
-                                        gene_sets = gene_sets, variable = Variable )
+                                        gene_sets = gene_sets, variable = Variable, p.adjust.method = p.adjust.method )
 
     } else {
 
       cohenlist <- CohenD_allConditions(data = data, metadata = metadata,
                                         gene_sets = gene_sets, variable = Variable,
-                                        mode = mode)
+                                        mode = mode, p.adjust.method = p.adjust.method )
 
     }
 
@@ -499,11 +507,11 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
 
       ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
 
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
+      p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$score)) +
         ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
         ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density") +
         # add points below density
-        ggplot2::geom_rug(ggplot2::aes(x = score), color=ColorValues, sides = "b",
+        ggplot2::geom_rug(ggplot2::aes(x = .data$score), color=ColorValues, sides = "b",
                           alpha = 0.8, size = .5, length = grid::unit(0.035, "npc"))
 
       # Customize the plot appearance.
@@ -596,7 +604,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
     p <- p + ggplot2::geom_violin(alpha = 0.5, scale = "width")
 
     # Add median summary crossbar.
-    p <- p + ggplot2::stat_summary(fun = median, fun.min = median, fun.max = median,
+    p <- p + ggplot2::stat_summary(fun = stats::median, fun.min = stats::median, fun.max = stats::median,
                                    geom = "crossbar", width = 0.25,
                                    position = ggplot2::position_dodge(width = 0.13))
 
@@ -624,7 +632,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
 
           line1 <- wrap_title(paste0("Cohen's d = ",
                                      format(signif(cohen_d_results, digits=3),
-                                            scientific = TRUE)),
+                                            scientific = FALSE)),
                               width = widthTitle)
           line2 <- wrap_title(paste0("p = ", format(signif(p_val, digits=3),
                                                     scientific = TRUE)),
@@ -634,7 +642,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
         } else {
           subtitle <- wrap_title(paste0("Cohen's d = ",
                                         format(signif(cohen_d_results, digits=3),
-                                               scientific = TRUE)),
+                                               scientific = FALSE)),
                                  width = widthTitle)
         }
 
@@ -663,7 +671,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
 
             line1 <- wrap_title(paste0("Cohen's d = ",
                                        format(signif(cohen_d_results, digits = 3),
-                                              scientific = TRUE)),
+                                              scientific = FALSE)),
                                 width = widthTitle)
             line2 <- wrap_title(paste0("p = ",
                                        format(signif(p_val, digits = 3), scientific = TRUE)),
@@ -672,7 +680,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
           } else {
             subtitle <- wrap_title(paste0("Cohen's d = ",
                                           format(signif(cohen_d_results, digits = 3),
-                                                 scientific = TRUE)),
+                                                 scientific = FALSE)),
                                    width = widthTitle)
           }
 
@@ -693,7 +701,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
 
             line1 <- wrap_title(paste0("Cohen's f = ",
                                        format(signif(results_var["Cohen_f"], digits = 3),
-                                              scientific = TRUE)), width = widthTitle)
+                                              scientific = FALSE)), width = widthTitle)
             line2 <- wrap_title(paste0("p = ",
                                        format(signif(results_var["P_Value"], digits = 3),
                                               scientific = TRUE)), width = widthTitle)
@@ -701,7 +709,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
           } else {
             subtitle <- wrap_title(paste0("Cohen's f = ",
                                           format(signif(results_var["Cohen_f"], digits = 3),
-                                                 scientific = TRUE)), width = widthTitle)
+                                                 scientific = FALSE)), width = widthTitle)
           }
 
         }
@@ -718,7 +726,7 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
     if (ConnectGroups && !is.null(ColorVariable)) {
       p <- p + ggplot2::stat_summary(ggplot2::aes_string(group = ColorVariable,
                                                          color = ColorVariable),
-                                     fun.y = median, geom = "line", size = 1.5,
+                                     fun.y = stats::median, geom = "line", size = 1.5,
                                      alpha = 0.75,
                                      show.legend = FALSE)
     }
@@ -784,16 +792,16 @@ PlotScores_Categorical <- function(data, metadata, gene_sets,
   if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
 
   # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "ssGSEA Enrichment Score"
-  } else if (method == "logmedian") {
-    ylab <- "Normalised Signature Score"
-  } else if (method == "ranking") {
-    ylab <- "Signature Genes' Ranking"
-  }
+  # if (method == "ssGSEA") {
+  #   ylab <- "ssGSEA Enrichment Score"
+  # } else if (method == "logmedian") {
+  #   ylab <- "Normalised Signature Score"
+  # } else if (method == "ranking") {
+  #   ylab <- "Signature Genes' Ranking"
+  # }
 
   combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
+                                           left = grid::textGrob(paste0("Gene Set's Score (", method, ")"),
                                                                  rot = 90, vjust = 1,
                                                                  gp = grid::gpar(cex = 1.3,
                                                                                  fontsize = labsize)),
@@ -1023,7 +1031,7 @@ PlotScores_Numeric <- function(data,
       if (pvalcalc) {
         line1 <- wrap_title(paste0("Cohen's f = ",
                                    format(signif(results_var["Cohen_f"], digits=3),
-                                          scientific = TRUE)), width = widthTitle)
+                                          scientific = FALSE)), width = widthTitle)
         line2 <- wrap_title(paste0("p = ",
                                    format(signif(results_var["P_Value"], digits=3),
                                           scientific = TRUE)), width = widthTitle)
@@ -1032,7 +1040,7 @@ PlotScores_Numeric <- function(data,
       } else {
         subtitle <- wrap_title(paste0("Cohen's f = ",
                                       format(signif(results_var["Cohen_f"],
-                                                    digits=3), scientific = TRUE)),
+                                                    digits=3), scientific = FALSE)),
                                width = widthTitle)
       }
 
@@ -1091,16 +1099,16 @@ PlotScores_Numeric <- function(data,
   if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
 
   # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "ssGSEA Enrichment Score"
-  } else if (method == "logmedian") {
-    ylab <- "Normalised Signature Score"
-  } else if (method == "ranking") {
-    ylab <- "Signature Genes' Ranking"
-  }
+  # if (method == "ssGSEA") {
+  #   ylab <- "ssGSEA Enrichment Score"
+  # } else if (method == "logmedian") {
+  #   ylab <- "Normalised Signature Score"
+  # } else if (method == "ranking") {
+  #   ylab <- "Signature Genes' Ranking"
+  # }
 
   combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
+                                           left = grid::textGrob(paste0("Gene Set's Score (", method, ")"),
                                                                  rot = 90,
                                                                  vjust = 1,
                                                                  gp = grid::gpar(cex = 1.3,
diff --git a/R/ROCAUC_Scores.R b/R/ROCAUC_Scores.R
index f2e3eb0..e8aa3b0 100644
--- a/R/ROCAUC_Scores.R
+++ b/R/ROCAUC_Scores.R
@@ -123,7 +123,7 @@ ROCAUC_Scores_Calculate <- function(data, metadata, gene_sets, method = c("logme
 #'@param title Title for the grid of plots.
 #'@return A `ggplot2` or `ggarrange` object containing the ROC curve plots.
 #'
-#'@importFrom ggplot2 ggplot geom_line aes labs theme scale_color_manual
+#'@import ggplot2
 #'@importFrom ggpubr annotate_figure ggarrange
 #'
 #'@examples
@@ -204,7 +204,7 @@ ROC_Scores <- function(data,
       }
 
       # Create the ROC plot with all methods on the same plot
-      p <- ggplot2::ggplot(combined_df, ggplot2::aes(x = FPR, y = TPR, color = Method)) +
+      p <- ggplot2::ggplot(combined_df, ggplot2::aes(x = .data$FPR, y = .data$TPR, color = .data$Method)) +
         ggplot2::geom_line(size = 1) +  # Plot all ROC curves on the same plot
         ggplot2::scale_color_manual(values = colors) +  # Ensure correct color mapping for each method
         ggplot2::labs(title = wrap_title(signature,widthTitle),
@@ -225,9 +225,9 @@ ROC_Scores <- function(data,
                                       length.out = length(auc_values)))  # Adjust the vertical positions
 
       p <- p + ggplot2::geom_label(data = auc_texts,
-                                  ggplot2::aes(x = x, y = y,
-                                               label = paste0("AUC ", Method, " = ", round(AUC, 2), ""),
-                                               color = Method),
+                                  ggplot2::aes(x = .data$x, y = .data$y,
+                                               label = paste0("AUC ", .data$Method, " = ", round(.data$AUC, 2), ""),
+                                               color = .data$Method),
                                   size = 3,
                                   vjust = 0,  # Adjust vertical position
                                   hjust = 1,  # Adjust horizontal position to align to the right
@@ -436,9 +436,9 @@ AUC_Scores <- function(data, metadata, gene_sets,
     limits <- if (is.null(limits)) c(0.5 , 1) else limits
 
     # Create heatmap using ggplot2
-    p <- ggplot2::ggplot(long_data, ggplot2::aes(x = Method, y = Contrast, fill = AUC)) +
+    p <- ggplot2::ggplot(long_data, ggplot2::aes(x = .data$Method, y = .data$Contrast, fill = .data$AUC)) +
       ggplot2::geom_tile() +
-      ggplot2::geom_text(aes(label = label), color = "black", size = 3) +
+      ggplot2::geom_text(aes(label = .data$label), color = "black", size = 3) +
       ggplot2::scale_fill_gradientn(colors = ColorValues, limits = limits) +
       ggplot2::labs(title = signature_title, x = NULL, y = NULL, fill = "AUC") +
       ggplot2::theme_bw() +
diff --git a/R/ROCandAUCplot.R b/R/ROCandAUCplot.R
index 3a727cc..bca12a7 100644
--- a/R/ROCandAUCplot.R
+++ b/R/ROCandAUCplot.R
@@ -259,9 +259,9 @@ ROCandAUCplot <- function(data, metadata,
 
     legend_position <- if (length(unique(roc_df[[group_var]])) > 1 && group_var != "All") "bottom" else "none"
 
-    roc_plot_local <- ggplot2::ggplot(roc_df, ggplot2::aes(x = FPR, y = TPR, color = Group, group = Group)) +
+    roc_plot_local <- ggplot2::ggplot(roc_df, ggplot2::aes(x = .data$FPR, y = .data$TPR, color = .data$Group, group = .data$Group)) +
       ggplot2::geom_line(size = 1) +
-      ggplot2::facet_wrap(~ Gene, scales = "free", ncol = roc_params_local$ncol, nrow = roc_params_local$nrow) +
+      ggplot2::facet_wrap(~ .data$Gene, scales = "free", ncol = roc_params_local$ncol, nrow = roc_params_local$nrow) +
       ggplot2::theme_minimal() +
       ggplot2::labs(
         title = final_title,
@@ -356,7 +356,7 @@ ROCandAUCplot <- function(data, metadata,
 
     fillcolor <- ifelse(is.null(auc_params$colors), "#3B415B", auc_params$colors[1])
 
-    barplot <- ggplot2::ggplot(auc_sorted, ggplot2::aes(y = reorder(Gene, AUC), x = AUC)) +
+    barplot <- ggplot2::ggplot(auc_sorted, ggplot2::aes(y = stats::reorder(.data$Gene, .data$AUC), x = .data$AUC)) +
       ggplot2::geom_bar(stat = "identity", fill = fillcolor) +
       #ggplot2::coord_flip()  +
       coord_cartesian(xlim = c(0.5, 1))+
diff --git a/R/Score_VariableAssociation.R b/R/Score_VariableAssociation.R
index 5cdfdc1..8c38b61 100644
--- a/R/Score_VariableAssociation.R
+++ b/R/Score_VariableAssociation.R
@@ -161,7 +161,12 @@ create_contrast_column <- function(metadata, variable_name, contrast) {
 #' @param color_palette A string specifying the color palette for discrete
 #' variables. Default: `"Set2"`.
 #' @param printplt Boolean specifying if plot is to be printed. Default: `TRUE`.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A list with:
 #'   - `Overall`: Data frame of effect sizes and p-values for each contrasted
 #'   phenotypic variable.
@@ -207,7 +212,8 @@ Score_VariableAssociation <- function(data,
                                       discrete_colors=NULL,
                                       continuous_color = "#8C6D03",
                                       color_palette = "Set2",
-                                      printplt =TRUE){
+                                      printplt =TRUE, 
+                                      p.adjust.method = "BH"){
   method <- match.arg(method)  # Validate method input
   mode <- match.arg(mode)
   data <- as.data.frame(data) # Ensure data is a data frame
@@ -296,8 +302,7 @@ Score_VariableAssociation <- function(data,
   # Would happen if we have only numeric variables
   if (nrow(df_results_contrast)!=0){
 
-    df_results_contrast$padj <- p.adjust(df_results_contrast$PValue,
-                                         method = "BH")
+    df_results_contrast$padj <- stats::p.adjust(df_results_contrast$PValue,  method = p.adjust.method)
 
     if(is.null(saturation_value)){
       if (min(df_results_contrast$padj)>sig_threshold){
@@ -313,20 +318,24 @@ Score_VariableAssociation <- function(data,
     ########### CONTRAST MODE PLOT ############
 
     # Ensure contrast ordering
-    df_results_contrast$Contrast <- sapply(df_results_contrast$Contrast,
-                                           function(x) wrap_title(
-                                             x, widthlabels))
+    # df_results_contrast$Contrast <- sapply(df_results_contrast$Contrast,
+    #                                        function(x) wrap_title(
+    #                                          x, widthlabels))
+    df_results_contrast$Contrast <- vapply(df_results_contrast$Contrast,
+                                           function(x) wrap_title(x, widthlabels),
+                                           FUN.VALUE = character(1))
+    
     df_results_contrast$Contrast <- factor(df_results_contrast$Contrast,
                                            levels = df_results_contrast$Contrast
                                            [order(df_results_contrast$CohenD)])
 
 
     plot_contrasts <- ggplot2::ggplot(df_results_contrast,
-                                      ggplot2::aes(x = CohenD,
-                                                   y = Contrast,
-                                                   fill = -log10(padj))) +
+                                      ggplot2::aes(x = .data$CohenD,
+                                                   y = .data$Contrast,
+                                                   fill = -log10(.data$padj))) +
       ggplot2::geom_segment(ggplot2::aes(
-        yend = Contrast,
+        yend = .data$Contrast,
         xend = 0,
         linetype =  "solid",
         color =  "black"
@@ -359,7 +368,7 @@ Score_VariableAssociation <- function(data,
         axis.text = ggplot2::element_text(size = labsize),
         axis.title.y = element_text(face = "bold")
       ) +
-      ggplot2::facet_grid(Variable ~.,   scales = "free", switch = "y",
+      ggplot2::facet_grid(.data$Variable ~.,   scales = "free", switch = "y",
                           space = "free" ) +
       theme(strip.background =element_rect(fill="white"))
 
@@ -381,10 +390,10 @@ Score_VariableAssociation <- function(data,
   ########### OVERALL MODE PLOT ############
 
   plot_overall <- ggplot2::ggplot(df_results_overall,
-                                  ggplot2::aes(x = Cohen_f, y = Variable,
-                                               fill = -log10(P_Value))) +
+                                  ggplot2::aes(x = .data$Cohen_f, y = .data$Variable,
+                                               fill = -log10(.data$P_Value))) +
     ggplot2::geom_segment(ggplot2::aes(
-      yend = Variable,
+      yend = .data$Variable,
       xend = 0,
       linetype =  "solid",
       color =  "black"
@@ -455,7 +464,7 @@ Score_VariableAssociation <- function(data,
         colors <- discrete_colors[[var]]
       } else {
         num_levels <- length(unique(df_ranking[[var]]))
-        colors <- colorRampPalette(RColorBrewer::brewer.pal(
+        colors <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(
           8, color_palette))(num_levels)
 
       }
diff --git a/R/VariableAssociation.R b/R/VariableAssociation.R
index fb6572e..a321be0 100644
--- a/R/VariableAssociation.R
+++ b/R/VariableAssociation.R
@@ -31,34 +31,29 @@ identify_variable_type <- function(df, cols = NULL) {
   if (is.null(cols)) return("Unknown")
 
   if (!is.null(cols)) df <- df[, cols, drop = FALSE]
-
-  variable_types <- sapply(names(df), function(col_name) {
-
+ 
+  
+  variable_types <- vapply(names(df), function(col_name) {
     col <- df[[col_name]]
     unique_vals <- length(unique(col))
-
+    
     if (is.numeric(col) | is.integer(col)) {
- #     if (unique_vals > 10) {
-        return("Numeric")
-      # } else if (unique_vals == 2) {
-      #   return("Categorical Bin")
-      # } else {
-      #   return("Categorical Multi")
-      # }
+      return("Numeric")
     } else if (is.character(col) || is.factor(col)) {
       if (unique_vals == 2) {
         return("Categorical Bin")
       } else if (unique_vals > 10) {
-        warning(paste0("Warning: Number of unique values in '", col_name, "'
-                       is too high (>10). Consider removing this variable
-                       from the analysis."))
+        warning(paste0("Warning: Number of unique values in '", col_name,
+                       "' is too high (>10). Consider removing this variable ",
+                       "from the analysis."))
         return("Categorical Multi")
       } else {
         return("Categorical Multi")
       }
     }
     return("Unknown")
-  }, USE.NAMES = TRUE)
+  }, FUN.VALUE = character(1), USE.NAMES = TRUE)
+  
 
   return(variable_types)
 }
@@ -101,7 +96,12 @@ identify_variable_type <- function(df, cols = NULL) {
 #' @param categorical_multi The statistical test for multi-level categorical
 #' variables.
 #'   Options: `"anova"` (default) or `"kruskal-wallis"`.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A named list (one entry per variable being analysed) where each
 #' element is a data frame with:
 #'   - **Metric**: The test statistic (correlation coefficient, t-statistic,
@@ -135,7 +135,7 @@ identify_variable_type <- function(df, cols = NULL) {
 compute_stat_tests <- function(df, target_var, cols = NULL,
                                numeric = "pearson",
                                categorical_bin = "t.test",
-                               categorical_multi = "anova") {
+                               categorical_multi = "anova", p.adjust.method="BH") {
 
   # Ensure only one method is selected per variable type
   if (length(numeric) > 1 | length(categorical_bin) > 1 |
@@ -194,7 +194,7 @@ compute_stat_tests <- function(df, target_var, cols = NULL,
         test_df <- rbind(test_df, tukey_df)
 
       } else if (categorical_multi == "kruskal-wallis") {
-        test_result <- kruskal.test(df[[target_var]] ~ df[[var]])
+        test_result <- stats::kruskal.test(df[[target_var]] ~ df[[var]])
         test_df <- data.frame(metric = test_result$statistic,
                               p_value = test_result$p.value)
         row.names(test_df) <- "Kruskal-Wallis"
@@ -207,7 +207,7 @@ compute_stat_tests <- function(df, target_var, cols = NULL,
     # scientific notation
     test_df$metric <- formatC(test_df$metric, format = "e", digits = 2)
     # correct for multiple testing per variable
-    test_df$p_value <- p.adjust(test_df$p_value, method = "BH")
+    test_df$p_value <- stats::p.adjust(test_df$p_value,  method = p.adjust.method)
     test_df$p_value <- formatC(test_df$p_value, format = "e", digits = 3)
 
 
@@ -240,7 +240,6 @@ compute_stat_tests <- function(df, target_var, cols = NULL,
 #'   - `"ranking"`
 #'   - `"GSEA"`
 #'
-#' @section Shared Arguments (All Methods):
 #' @param data A data frame with gene expression data (genes as rows,
 #' samples as columns).
 #' @param metadata A data frame containing sample metadata; the first column
@@ -273,7 +272,12 @@ compute_stat_tests <- function(df, target_var, cols = NULL,
 #' (`"B"` or `"t"`). Auto-detected if `NULL`.
 #' @param ignore_NAs (GSEA only) Logical. If `TRUE`, rows with NA metadata are
 #' removed. Default: `FALSE`.
-#'
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @return A list with method-specific results and ggplot2-based visualizations:
 #'
 #' **For score-based methods (`logmedian`, `ssGSEA`, `ranking`):**
@@ -297,6 +301,49 @@ compute_stat_tests <- function(df, target_var, cols = NULL,
 #' scores (NES), adjusted p-values, and contrasts.
 #' - `plot`: A ggplot2 lollipop plot of GSEA enrichment across contrasts.
 #'
+#' @examples
+#' # Simulate gene expression data (genes as rows, samples as columns)
+#' set.seed(42)
+#' expr <- as.data.frame(matrix(rnorm(500), nrow = 50, ncol = 10))
+#' rownames(expr) <- paste0("Gene", 1:50)
+#' colnames(expr) <- paste0("Sample", 1:10)
+#'
+#' # Simulate metadata (categorical and continuous)
+#' metadata <- data.frame(
+#'   sampleID = paste0("Sample", 1:10),
+#'   Group = rep(c("A", "B"), each = 5),
+#'   Age = sample(20:60, 10),
+#'   row.names = colnames(expr)
+#' )
+#'
+#' # Define a toy gene set: one gene set only for discovery mode!
+#' gene_set <- list(
+#'   Signature1 = paste0("Gene", 1:10)
+#' )
+#'
+#' # Score-based association (e.g., logmedian)
+#' res_score <- VariableAssociation(
+#'   method = "logmedian",
+#'   data = expr,
+#'   metadata = metadata,
+#'   cols = c("Group", "Age"),
+#'   gene_set = gene_set
+#' )
+#' print(res_score$Overall)
+#' print(res_score$plot)
+#'
+#' # GSEA-based association (if GSEA_VariableAssociation is available)
+#' # res_gsea <- VariableAssociation(
+#' #   method = "GSEA",
+#' #   data = expr,
+#' #   metadata = metadata,
+#' #   cols = "Group",
+#' #   gene_set = gene_set
+#' # )
+#' # print(res_gsea$data)
+#' print(res_score$plot)
+#'
+#'
 #' @export
 VariableAssociation <- function(method = c("ssGSEA", "logmedian",
                                            "ranking", "GSEA"),
@@ -318,7 +365,8 @@ VariableAssociation <- function(method = c("ssGSEA", "logmedian",
                                 discrete_colors = NULL,
                                 continuous_color = "#8C6D03",
                                 color_palette = "Set2",
-                                printplt = TRUE) {
+                                printplt = TRUE, 
+                                p.adjust.method = "BH") {
   method <- match.arg(method)
   mode <- match.arg(mode)
   data <- as.data.frame(data) # Ensure data is a data frame
@@ -338,7 +386,8 @@ VariableAssociation <- function(method = c("ssGSEA", "logmedian",
       labsize = labsize,
       titlesize = titlesize,
       pointSize = pointSize,
-      ignore_NAs = ignore_NAs
+      ignore_NAs = ignore_NAs,
+      p.adjust.method = p.adjust.method
     )
 
   } else if (method %in% c("ssGSEA", "logmedian", "ranking")) {
@@ -360,7 +409,8 @@ VariableAssociation <- function(method = c("ssGSEA", "logmedian",
       discrete_colors = discrete_colors,
       continuous_color = continuous_color,
       color_palette = color_palette,
-      printplt = printplt
+      printplt = printplt,
+      p.adjust.method = p.adjust.method
     )
   }
 
diff --git a/R/Volcano_Cohen.R b/R/Volcano_Cohen.R
index c6870b5..fabd21c 100644
--- a/R/Volcano_Cohen.R
+++ b/R/Volcano_Cohen.R
@@ -39,7 +39,7 @@
 #'
 #' @seealso \code{\link{CohenD_allConditions}}
 #'
-#' @importFrom ggplot2 ggplot geom_point geom_vline geom_hline facet_wrap labs scale_color_manual scale_shape_manual theme_bw ggtitle theme element_text element_rect
+#' @import ggplot2 
 #' @importFrom RColorBrewer brewer.pal
 #' @keywords internal
 Volcano_Cohen <- function(cohenlist,
@@ -88,12 +88,16 @@ Volcano_Cohen <- function(cohenlist,
   final_df <- do.call(rbind, rows)
 
   # Wrap long signature names
-  final_df$signature <- sapply(final_df$signature,
-                               function(x) wrap_title(x, widthlegend))
+  # final_df$signature <- sapply(final_df$signature,
+  #                              function(x) wrap_title(x, widthlegend))
+  
+  final_df$signature <- vapply(final_df$signature,
+                               function(x) wrap_title(x, widthlegend),
+                               character(1))
 
   # Handle colors
   if (is.null(ColorValues)) {
-    ColorValues <- colorRampPalette(RColorBrewer::brewer.pal(12, colorPalette))(
+    ColorValues <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(12, colorPalette))(
       length(unique(final_df$signature)))
   } else {
     if (!is.null(ColorValues[["volcano"]])) {
@@ -105,13 +109,13 @@ Volcano_Cohen <- function(cohenlist,
   }
 
   # Generate plot
-  plt <- ggplot2::ggplot(final_df, ggplot2::aes(x = abs(cohen),
-                                                y = -log10(padj),
-                                                shape = method)) +
+  plt <- ggplot2::ggplot(final_df, ggplot2::aes(x = abs(.data$cohen),
+                                                y = -log10(.data$padj),
+                                                shape = .data$method)) +
     ggplot2::geom_point(colour = "black", size = pointSize) +
-    ggplot2::geom_point(ggplot2::aes(colour = signature),
+    ggplot2::geom_point(ggplot2::aes(colour = .data$signature),
                         size = pointSize - 1.5) +
-    ggplot2::facet_wrap(. ~ contrast, scales = "free") +
+    ggplot2::facet_wrap(. ~ .data$contrast, scales = "free") +
     ggplot2::geom_hline(yintercept = -log10(sig_threshold),
                         linetype = "dashed",
                         color = "black", size = 0.5) +
diff --git a/R/calculateDE.R b/R/calculateDE.R
index 41c9801..d2c4c30 100644
--- a/R/calculateDE.R
+++ b/R/calculateDE.R
@@ -106,53 +106,46 @@ calculateDE <- function(data, metadata=NULL, variables=NULL, modelmat = NULL,
     variables <- setdiff(variables, "")  # Remove empty strings
     return(variables)
   }
-
-
+ 
   # Validate inputs
-  if (!is.matrix(data) && !is.data.frame(data)) stop(
-    "Error: 'data' must be a matrix or a data frame.")
-  if (is.null(rownames(data))) stop(
-    "Error: 'data' must have row names corresponding to gene identifiers.")
-  if (!is.null(metadata) && !is.data.frame(metadata)) stop(
-    "Error: 'metadata' must be a data frame.")
-  if (!is.null(metadata) && (ncol(data) != nrow(metadata))) stop(
-    "Error: Number of samples in 'data' does not match number of rows in 'metadata'.")
-
-  # add "." after each variable and remove spaces
-  # Important to avoid errors in design matrix
-  # if (!is.null(metadata)){
-  #   #metadata <- as.data.frame(lapply(metadata, function(x) paste0(".", x)))
-  #   #colnames(metadata) <- paste0(colnames(metadata),".")
-  #   metadata <- as.data.frame(lapply(metadata, function(x) gsub(" ", "", x)))
-  # }
-  # if(!is.null(variables)){
-  #   variables <- gsub(" ", "", variables)
-  #   #variables <- paste0(variables,".")
-  # }
-  #if(!is.null(modelmat)) colnames(modelmat) <- gsub(" ", "", colnames(modelmat))
-
-
-
-  # Reorder and subset metadata to match data
-  # counts: matrix or data frame with column names as sample IDs
-  # metadata: data frame with at least one column containing sample IDs
-
-  # 1. Find the metadata column that best matches column names of count matrix
-  sample_ids <- colnames(data)
-  best_match_col <- which.max(sapply(metadata, function(col) sum(sample_ids %in% col)))
-
-  # 2. Extract matched column
-  matched_col <- metadata[[best_match_col]]
-
-  # 3. Subset metadata to only those samples present in the count matrix
-  metadata_matched <- metadata[matched_col %in% sample_ids, ]
-
-  # 4. Reorder metadata to match column order of count matrix
-  rownames(metadata_matched) <- metadata_matched[[best_match_col]]
-  # drop = FALSE to preserve data frame format
-  metadata_matched <- metadata_matched[sample_ids, , drop = FALSE]
-  metadata <- metadata_matched
-
+  if ((!is.matrix(data) && !is.data.frame(data)) || is.null(rownames(data))) {
+    stop("Error: 'data' must be a matrix or data frame with row names corresponding to gene identifiers.")
+  }
+  
+  if (!is.null(metadata)) {
+    if (!is.data.frame(metadata) || ncol(data) != nrow(metadata)) {
+      stop(
+        "Error with 'metadata': must be a data frame, and the number of rows must match the number of samples in 'data'."
+      )
+    }
+  }
+  
+   
+  if (!is.null(metadata)){
+    # Reorder and subset metadata to match data
+    # counts: matrix or data frame with column names as sample IDs
+    # metadata: data frame with at least one column containing sample IDs
+    
+    # 1. Find the metadata column that best matches column names of count matrix
+    sample_ids <- colnames(data)
+    #best_match_col <- which.max(sapply(metadata, function(col) sum(sample_ids %in% col)))
+    best_match_col <- which.max(vapply(metadata, function(col) sum(sample_ids %in% col), numeric(1)))
+    
+    # print message saying which column was used to match samples 
+    message("Using metadata column '", colnames(metadata)[best_match_col], "' to match samples (data column names).")
+    
+    # 2. Extract matched column
+    matched_col <- metadata[[best_match_col]]
+    
+    # 3. Subset metadata to only those samples present in the count matrix
+    metadata_matched <- metadata[matched_col %in% sample_ids, ]
+    
+    # 4. Reorder metadata to match column order of count matrix
+    rownames(metadata_matched) <- metadata_matched[[best_match_col]]
+    # drop = FALSE to preserve data frame format
+    metadata_matched <- metadata_matched[sample_ids, , drop = FALSE]
+    metadata <- metadata_matched
+  }
 
 
   if (ignore_NAs & !is.null(variables)) {
@@ -167,27 +160,16 @@ calculateDE <- function(data, metadata=NULL, variables=NULL, modelmat = NULL,
   # Construct design matrix
   design_matrix <- tryCatch({
 
-    if (!is.null(modelmat)) {
-      if (!is.matrix(modelmat)) stop("Error: 'modelmat' must be a matrix.")
-      if (nrow(modelmat) != ncol(data)) stop(
-        "Error: Rows in 'modelmat' must match the number of samples in 'data'.
-        Check if your metadata has any NAs or consider using ignore_NAs = TRUE.")
-      modelmat
-    # } else if (!is.null(lmexpression)) {
-    #   lmexpression <- as.formula(lmexpression, env = parent.frame())
-    #   design_matrix <- model.matrix(lmexpression, data = metadata)
-    #   vars <- extract_variables(lmexpression)
-    #   #colnames(design_matrix) <- gsub("^Condition","",colnames(design_matrix))
-    #
-    #   colnames(design_matrix) <-  remove_prefix(colnames(design_matrix), vars)
-    #   colnames(design_matrix) <- gsub(" ", "", colnames(design_matrix))
-    #   #colnames(design_matrix) <- sub("^[^.]*\\.", "", colnames(design_matrix))
-    #   design_matrix
+    if (!is.null(modelmat)) { 
+      if (!is.matrix(modelmat) || (nrow(modelmat) != ncol(data))) {
+        stop("Error: 'modelmat' must be a matrix with rows equal to the number of samples in 'data'.
+       Check if your metadata has any NAs or consider using ignore_NAs = TRUE.")
+      }
+
+      modelmat 
     } else {
       design_formula <- as.formula(paste("~0+", paste(variables, collapse = " + ")))
-      design_matrix <- model.matrix(design_formula, data = metadata)
-      #colnames(design_matrix) <- gsub("^Condition","",colnames(design_matrix))
-      #colnames(design_matrix) <- sub("^[^.]*\\.", "", colnames(design_matrix)) # remove the variable name
+      design_matrix <- stats::model.matrix(design_formula, data = metadata) 
       colnames(design_matrix) <-   remove_prefix(colnames(design_matrix), variables)
       colnames(design_matrix) <- gsub(" ", "", colnames(design_matrix)) # remove spaces
       design_matrix
diff --git a/R/data.R b/R/data.R
index 748e00a..2c8557a 100644
--- a/R/data.R
+++ b/R/data.R
@@ -15,6 +15,7 @@
 #'   for senescent samples, "young" for proliferative).}
 #' }
 #'
+#' 
 #' @source \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE63577}
 #'
 #' @references  Marthandan S, Priebe S, Baumgart M, Groth M et al. Similarities
@@ -24,14 +25,15 @@
 #' @references  Marthandan S, Baumgart M, Priebe S, Groth M et al. Conserved
 #'   Senescence Associated Genes and Pathways in Primary Human Fibroblasts
 #'   Detected by RNA-Seq. PLoS One 2016;11(5):e0154531. PMID: 27140416
-#'
-#' @keywords datasets
+#'   
+#' @usage data(metadata_example)
 "metadata_example"
 
 #' Gene Expression Counts for Marthandan et al. (2016) RNA-Seq Data
 #'
-#' A numeric matrix containing filtered and normalized gene expression data from
-#' the Marthandan et al. (2016) study (GEO accession GSE63577).
+#' A numeric matrix containing filtered and normalized (non log-transformed) 
+#' gene expression data from the Marthandan et al. (2016) study (GEO accession 
+#' GSE63577).
 #'
 #' Raw FASTQ files were downloaded using `fasterq-dump` (v2.11.0) and processed
 #' in a reproducible conda environment (Python v3.11.5). Quality control was
@@ -39,8 +41,7 @@
 #' Pseudo-alignment to the RefSeq transcriptome (NCBI release 109) was performed
 #' using kallisto (v0.44.0). Genes with low expression (mean count < 70 in all
 #' conditions) were filtered out. Count normalization factors were calculated
-#' with `edgeR::calcNormFactors`, and log2-transformed values were obtained via
-#' `limma::voom`.
+#' with `edgeR::calcNormFactors`.
 #'
 #' Intermediate time points for HFF and MRC5 cell lines were excluded, resulting
 #' in a final dataset with 45 high-quality samples across proliferative,
@@ -52,7 +53,7 @@
 #'
 #' @format A numeric matrix with rows as genes (gene symbols) and columns as
 #'   samples (sample IDs).
-#'
+#'  
 #' @source \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE63577}
 #'
 #' @references Marthandan S, Priebe S, Baumgart M, Groth M et al. Similarities
@@ -63,8 +64,8 @@
 #'   Senescence Associated Genes and Pathways in Primary Human Fibroblasts
 #'   Detected by RNA-Seq.
 #' *PLoS One* 2016;11(5):e0154531. PMID: 27140416
-#'
-#' @keywords datasets
+#' 
+#' @usage data(counts_example)
 "counts_example"
 
 #' Example Gene Sets for Cellular Senescence
@@ -75,15 +76,15 @@
 #'   curated gene set of commonly reported senescence markers,
 #'   with directionality (+1 or -1).}
 #'   \item{REACTOME_Senescence}{Character vector of gene symbols. The
-#'   REACTOME_CELLULAR_SENESCENCE from MSigDB pathway. No directionality.}
+#'   REACTOME_CELLULAR_SENESCENCE from MSigDB database No directionality.}
 #'   \item{HernandezSegura}{A data frame with columns `gene` and `direction`.
 #'   A gene set from Hernandez-Segura et al. (2017), with directionality (+1 or -1).}
 #' }
-#'
+#' 
 #' @references Hernandez-Segura A, de Jong TV, Melov S, Guryev V, Campisi J,
 #'   Demaria M. Unmasking Transcriptional Heterogeneity in Senescent Cells.
 #' *Curr Biol.* 2017 Sep 11;27(17):2652-2660.e4. doi: 10.1016/j.cub.2017.07.033.
 #' Epub 2017 Aug 30. PMID: 28844647; PMCID: PMC5788810.
-#' @keywords datasets
+#' @usage data(genesets_example)
 "genesets_example"
 
diff --git a/R/geneset_similarity.R b/R/geneset_similarity.R
index 828d326..2079ee6 100644
--- a/R/geneset_similarity.R
+++ b/R/geneset_similarity.R
@@ -21,12 +21,31 @@
 #'   Odds Ratio required for a gene set to be included in the plot. Default is
 #'   1.
 #' @param pval_threshold (only if method == "odds_ratio" only) Numeric. Maximum
-#'   adjusted p-value to show a label. Default is 0.05.
-#' @param limits Numeric vector of length 2. Limits for color scale.
+#'   adjusted p-value required for a gene set to be included in the plot.
+#'   Default is 0.05.
+#' @param limits Numeric vector of length 2. Limits for color scale. If `NULL`,
+#'   is automatically set to c(0,1) for Jaccard or the range of OR for odds
+#'   ratio.
 #' @param title_size Integer specifying the font size for the plot title.
 #'   Default is `12`.
-#' @param color_values Character vector of colors used for the fill gradient.
-#'   Default is `c("#F9F4AE", "#B44141")`.
+#' @param color Character. The color for the maximum of the scale. Default is
+#'   `red.`
+#'   - If `method = "jaccard"`, the scale goes from `neutral_color` to `color`.
+#'   - If `method = "odds_ratio"` and any OR >= 1, the scale ends at `color`.
+#'   - If `method = "odds_ratio"` and all OR <= 1, `color` is not used; instead, the scale
+#'   runs from `cold_color` (minimum) to `neutral_color` (OR = 1, if present;
+#'   otherwise `neutral_color` is the maximum).
+#' @param neutral_color Character. The neutral reference color. Default is
+#'   `white`.
+#'   - If `method = "jaccard"`, this is the minimum of the scale.
+#'   - If `method = "odds_ratio"` and any OR >= 1, this corresponds to OR = 1 if such values exist; otherwise it is the minimum of the scale.
+#'   - If `method = "odds_ratio"` and all OR <= 1, this corresponds to OR = 1 if such values exist; otherwise it is the maximum of the scale (with `cold_color` as the minimum).
+#' @param cold_color Character. The color for values below OR = 1 (only used
+#'   when `method = "odds_ratio"`). Default is `blue`.
+#'   - If `method = "odds_ratio"` and any OR < 1, the scale runs from `cold_color`
+#'   (minimum) to `neutral_color` (OR = 1 if present; otherwise `neutral_color`
+#'   is the maximum).
+#'   - Ignored if `method = "jaccard"` or if all OR >= 1.
 #' @param title Optional. Custom title for the plot. If `NULL`, the title
 #'   defaults to `"Signature Overlap"`.
 #' @param jaccard_threshold (only if method == "jaccard" only) Numeric. Minimum
@@ -45,13 +64,13 @@
 #'   \describe{
 #'     \item{\code{plot}}{The \pkg{ggplot2} object of the similarity heatmap.}
 #'     \item{\code{data}}{The data frame object containing the similarity
-#'     scores aper pair of gene sets.}
+#'     scores per pair of gene sets.}
 #'   }
 #'
 #' @import ggplot2
 #' @importFrom tibble tibble
 #' @importFrom msigdbr msigdbr
-#' @importFrom scales squish
+#' @importFrom scales squish rescale
 #'
 #' @examples
 #' # Create two simple gene signatures
@@ -97,7 +116,9 @@ geneset_similarity <- function(
     pval_threshold = 0.05,
     limits = NULL,
     title_size = 12,
-    color_values = c("#F9F4AE", "#B44141"),
+    color = "#B44141",         # color for the maximum of the scale
+    neutral_color = "white",   # neutral reference color
+    cold_color = "#4173B4",       # color for OR < 1 when applicable
     title = NULL,
     jaccard_threshold = 0,
     msig_subset = NULL,
@@ -107,11 +128,11 @@ geneset_similarity <- function(
   if (is.null(signatures) || length(signatures) == 0) {
     stop("You must provide at least one signature.")
   }
-  if (!is.list(signatures) || !all(sapply(signatures, is.character))) {
+  if (!is.list(signatures) || !all(vapply(signatures, is.character, logical(1)))) {
     stop("Signatures must be a named list of character vectors.")
   }
   if (!is.null(other_user_signatures) && (!is.list(other_user_signatures) ||
-                                          !all(sapply(other_user_signatures, is.character)))) {
+                                          !all(vapply(other_user_signatures, is.character, logical(1))))) {
     stop("Other user signatures must be a named list of character vectors.")
   }
   if (!is.null(collection) && !is.character(collection)) {
@@ -135,14 +156,15 @@ geneset_similarity <- function(
   if (!is.null(limits) && (!is.numeric(limits) || length(limits) != 2)) {
     stop("limits must be a numeric vector of length 2.")
   }
+  
+  if (!is.null(limits) && any(limits < 0 | !is.finite(limits))) {
+    warning("Limits contain negative, or non-finite values. Ensure limits are positive finite numbers.")
+  }
 
   if (!is.numeric(title_size) || title_size <= 0) {
     stop("title_size must be a positive numeric value.")
   }
-
-  if (!is.character(color_values) || length(color_values) < 2) {
-    stop("color_values must be a character vector with two colors.")
-  }
+ 
 
   if (!is.null(title) && !is.character(title)) {
     stop("title must be a character string or NULL.")
@@ -229,14 +251,15 @@ geneset_similarity <- function(
         d <- length(setdiff(universe, union(sig1, sig2)))
 
         cont_tbl <- matrix(c(a, b, c, d), nrow = 2)
-        ft <- fisher.test(cont_tbl)
+        ft <- stats::fisher.test(cont_tbl)
 
         score <- log10(ft$estimate)
-        if (!is.na(ft$p.value) && ft$p.value <= pval_threshold && ft$estimate >= or_threshold) {
-          label <- sprintf("%.1f", score)
-        } else {
-          label <- ""
-        }
+        # if (!is.na(ft$p.value) && ft$p.value <= pval_threshold && ft$estimate >= or_threshold) {
+        #   label <- sprintf("%.1f", 10^score) # to show non log values in heatmap
+        #   #label <- sprintf("%.1f", score)
+        # } else {
+        #   label <- ""
+        # }
         pval <- ft$p.value
       }
 
@@ -244,7 +267,7 @@ geneset_similarity <- function(
         Reference_Signature = ref_name,
         Compared_Signature = comp_name,
         Score = score,
-        Label = label,
+        #Label = label,
         Pval = pval,
         stringsAsFactors = FALSE
       )
@@ -259,19 +282,24 @@ geneset_similarity <- function(
 
   if (metric == "odds_ratio") {
     # Filter groups where any 10^Score >= threshold
+    # keep_rows <- by(similarity_df, similarity_df$Compared_Signature, function(group) {
+    #   any(10^group$Score >= or_threshold, na.rm = TRUE)
+    # })
+
     keep_rows <- by(similarity_df, similarity_df$Compared_Signature, function(group) {
-      any(10^group$Score >= or_threshold, na.rm = TRUE)
+      any(10^group$Score >= or_threshold & group$Pval <= pval_threshold, na.rm = TRUE)
     })
-
+    
+    
     kept_signatures <- names(keep_rows[keep_rows])
     similarity_df <- similarity_df[similarity_df$Compared_Signature %in% kept_signatures, , drop = FALSE]
 
     # Add Label column
-    similarity_df$Label <- ifelse(
-      similarity_df$Pval <= pval_threshold,
-      sprintf("%.1f", similarity_df$Score),
-      ""
-    )
+    # similarity_df$Label <- ifelse(
+    #   similarity_df$Pval <= pval_threshold,
+    #   sprintf("%.1f", similarity_df$Score),
+    #   ""
+    # )
   }
 
   if (metric == "jaccard" && jaccard_threshold > 0) {
@@ -285,42 +313,170 @@ geneset_similarity <- function(
   }
 
   data <- similarity_df
-
-  similarity_df$Reference_Signature <- sapply(similarity_df$Reference_Signature,
-                                              function(x) wrap_title(x, width_text))
-  similarity_df$Compared_Signature <- sapply(similarity_df$Compared_Signature,
-                                             function(x) wrap_title(x, width_text))
-
+ 
+  
+  
+  if (nrow(similarity_df) == 0) {
+    stop("No signatures passed the filtering criteria.")  
+  }
+  
+  similarity_df$Reference_Signature <- vapply(similarity_df$Reference_Signature,
+                                              function(x) wrap_title(x, width_text),
+                                              character(1))
+  similarity_df$Compared_Signature <- vapply(similarity_df$Compared_Signature,
+                                             function(x) wrap_title(x, width_text),
+                                             character(1))
+# 
+#   if (is.null(limits)) {
+#     if (metric == "jaccard") {
+#       limits <- c(0, 1)
+#     } else {
+#       # For odds ratio, we set limits based on the data 
+#     limits <- c(min(similarity_df$Score[is.finite(similarity_df$Score)], na.rm = TRUE), max(similarity_df$Score, na.rm = TRUE))
+#      
+#   }
+#   }
+#   
+#   
+# 
+#   plt <- ggplot(similarity_df, aes(x = .data$Reference_Signature,
+#                                    y = .data$Compared_Signature, fill = .data$Score)) +
+#     geom_tile(color = "white") +
+#     #geom_text(aes(label = .data$Label), color = "black") +
+#     scale_fill_gradientn(colors = color_values, limits = limits,
+#                          oob = scales::squish, na.value = na_color) +
+#     labs(
+#       x = "",
+#       y = "Compared Signature",
+#       fill = ifelse(metric == "jaccard", "Jaccard Index", "log10(OR)"),
+#       title = ifelse(is.null(title), paste("Signature Overlap (", metric, ")"), title)
+#     ) +
+#     theme_minimal() +
+#     theme(
+#       axis.text.x = element_text(angle = 45, hjust = 1),
+#       plot.title = element_text(hjust = 0.5, size = title_size)
+#     )
+# 
+#   plt
+# 
+#   invisible(list(plot=plt,
+#                  data=data))
+    
+  
+  # ----------------------------
+  # Safe handling of limits (user provides OR)
+  # ----------------------------
   if (is.null(limits)) {
     if (metric == "jaccard") {
       limits <- c(0, 1)
-    } else {
-      # For odds ratio, we set limits based on the data
-      # but ensure they are at least 0 to avoid negative log10 values
-    limits <- c(0, max(similarity_df$Score, na.rm = TRUE))
-  }
+    } else { # odds_ratio
+       
+      # Extract finite scores
+      finite_scores <- similarity_df$Score[is.finite(similarity_df$Score)]
+      
+      # Identify if there are any OR < 1 (logOR < 0)
+      has_below1 <- any(finite_scores < 0)
+      
+      # Compute padding for -Inf (original OR = 0)
+      if (has_below1) {
+        # Place -Inf one log unit below the minimum finite score < 0
+        min_below <- min(finite_scores[finite_scores < 0])
+        pad_value <- min_below - 1
+      } else {
+        # All OR >= 1 → map -Inf slightly above the maximum score, then invert sign
+        max_score <- max(finite_scores)
+        pad_value <- -(max_score + 1)
+      }
+      
+      # Replace -Inf in Score with computed padding
+      similarity_df$Score[is.infinite(similarity_df$Score) & similarity_df$Score < 0] <- pad_value
+      
+      # Convert back from log
+      OR_values <- 10^similarity_df$Score  
+      
+      # Replace any zero or negative OR with a small number
+      OR_values[OR_values <= 0] <- 1e-6
+      #similarity_df$Score[is.infinite(similarity_df$Score) & similarity_df$Score < 0] <- log10(1e-6)
+      
+      # Compute limits
+      limits <- c(min(OR_values, na.rm = TRUE), max(OR_values, na.rm = TRUE))
+    }
   }
 
-  plt <- ggplot(similarity_df, aes(x = Reference_Signature,
-                                   y = Compared_Signature, fill = Score)) +
+  
+  # Convert OR limits to log space (Score already log10 OR)
+  log_limits <- if (metric == "odds_ratio") log10(limits) else limits
+  if (min(log_limits) == max(log_limits)) {
+    log_limits <- log_limits + c(-0.01, 0.01)  # small padding
+  } 
+  zero <- 0  # neutral color at OR = 1 → log10(1) = 0
+  
+  # ----------------------------
+  # Define fill colors
+  # ----------------------------
+  if (metric == "jaccard") {
+    fill_colors <- c(neutral_color, color)
+    fill_values <- c(log_limits[1], log_limits[2])
+  } else if (metric == "odds_ratio") {
+    min_lim <- log_limits[1]
+    max_lim <- log_limits[2]
+    
+    if (min_lim >= zero) {
+      fill_colors <- c(neutral_color, color)
+      fill_values <- c(min_lim, max_lim)
+    } else if (max_lim <= zero) {
+      fill_colors <- c(cold_color, neutral_color)
+      fill_values <- c(min_lim, max_lim)
+    } else {
+      fill_colors <- c(cold_color, neutral_color, color)
+      fill_values <- c(min_lim, zero, max_lim)
+    }
+    
+    # ----------------------------
+    # Safe legend breaks in OR space
+    # ----------------------------
+    valid_OR <- limits[limits > 0 & is.finite(limits)]
+    if (length(valid_OR) == 0) valid_OR <- 1  # fallback
+    
+    log_breaks <- 10^seq(floor(log10(min(valid_OR))), ceiling(log10(max(valid_OR))))
+    log_breaks <- log_breaks[log_breaks >= min(valid_OR) & log_breaks <= max(valid_OR)]
+  }
+  
+  # ----------------------------
+  # Build plot
+  # ----------------------------
+  plt <- ggplot(similarity_df, aes(
+    x = .data$Reference_Signature,
+    y = .data$Compared_Signature,
+    fill = .data$Score)) +
     geom_tile(color = "white") +
-    geom_text(aes(label = Label), color = "black") +
-    scale_fill_gradientn(colors = color_values, limits = limits,
-                         oob = scales::squish, na.value = na_color) +
+    scale_fill_gradientn(
+      colors = fill_colors,
+      values = scales::rescale(fill_values),
+      limits = log_limits,
+      oob = scales::squish,
+      na.value = na_color,
+      trans = "identity",  # already logged
+      breaks = if (metric == "odds_ratio") log10(log_breaks) else waiver(),
+      labels = if (metric == "odds_ratio") log_breaks else waiver()
+    ) +
     labs(
       x = "",
       y = "Compared Signature",
-      fill = ifelse(metric == "jaccard", "Jaccard Index", "log10(OR)"),
-      title = ifelse(is.null(title), paste("Signature Overlap (", metric, ")"), title)
+      fill = ifelse(metric == "jaccard", "Jaccard Index", "Odds Ratio"),
+      title = ifelse(is.null(title), "Signature Overlap", title)
     ) +
     theme_minimal() +
     theme(
       axis.text.x = element_text(angle = 45, hjust = 1),
       plot.title = element_text(hjust = 0.5, size = title_size)
     )
-
-  plt
-
-  invisible(list(plot=plt,
-                 data=data))
+   
+  if (metric == "odds_ratio") {
+    data$Score <- 10^data$Score  # convert back to OR for data output
+  }
+  
+  invisible(list(plot = plt, data = data))
+  
+  
 }
diff --git a/R/plotCombinedGSEA.R b/R/plotCombinedGSEA.R
index 675b4a0..617d1ef 100644
--- a/R/plotCombinedGSEA.R
+++ b/R/plotCombinedGSEA.R
@@ -43,7 +43,7 @@
 #' plotCombinedGSEA(GSEA_results, sig_threshold = 0.05, PointSize = 4)
 #'
 #' @import ggplot2
-#' @import RColorBrewer
+#' @importFrom RColorBrewer brewer.pal
 #' @export
 plotCombinedGSEA <- function(GSEA_results, sig_threshold = 0.05, PointSize = 4,
                              widthlegend=16) {
@@ -65,15 +65,19 @@ plotCombinedGSEA <- function(GSEA_results, sig_threshold = 0.05, PointSize = 4,
   # Create a color palette for pathways
   # RColorBrewer has palettes for discrete color scales
 
-  pathway_colors <- colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))(length(unique(combined_data$pathway)))
+  pathway_colors <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))(length(unique(combined_data$pathway)))
 
-  combined_data$pathway <- sapply(combined_data$pathway, function(x) wrap_title(x, widthlegend))
+  # combined_data$pathway <- sapply(combined_data$pathway, function(x) wrap_title(x, widthlegend))
+  combined_data$pathway <- vapply(combined_data$pathway,
+                                  function(x) wrap_title(x, widthlegend),
+                                  character(1))
+  
 
   # Create the plot
-  plot <- ggplot2::ggplot(combined_data, ggplot2::aes(x = NES, y = logpadj,
-                                                      shape = contrast)) +
+  plot <- ggplot2::ggplot(combined_data, ggplot2::aes(x = .data$NES, y = .data$logpadj,
+                                                      shape = .data$contrast)) +
     ggplot2::geom_point(colour="black", size = PointSize) +
-    ggplot2::geom_point(ggplot2::aes(colour = factor(pathway)) ,
+    ggplot2::geom_point(ggplot2::aes(colour = factor(.data$pathway)) ,
                         size = PointSize-2.5) +
     ggplot2::geom_hline(yintercept = -log10(sig_threshold), linetype = "dashed",
                         color = "black", size = .5)  +
diff --git a/R/plotGSEAenrichment.R b/R/plotGSEAenrichment.R
index bdfd29c..ea79520 100644
--- a/R/plotGSEAenrichment.R
+++ b/R/plotGSEAenrichment.R
@@ -53,7 +53,9 @@
 #' plot <- plotCombinedGSEA(GSEA_results, sig_threshold = 0.05, PointSize = 7)
 #' print(plot)
 #'
-#' @import ggplot2 ggpubr fgsea
+#' @import ggplot2  
+#' @importFrom ggpubr ggarrange
+#' @importFrom fgsea plotEnrichment
 #' @export
 plotGSEAenrichment <- function(GSEA_results, DEGList, gene_sets, widthTitle = 24,
                                grid = FALSE, nrow=NULL, ncol=NULL, titlesize=12) {
@@ -85,7 +87,7 @@ plotGSEAenrichment <- function(GSEA_results, DEGList, gene_sets, widthTitle = 24
       stat_used <- gsea_row$stat_used
 
       # order ranks by stat used
-      ranks <- setNames(deg_df[,stat_used, drop=TRUE], rownames(deg_df))
+      ranks <- stats::setNames(deg_df[,stat_used, drop=TRUE], rownames(deg_df))
       ranks <- sort(ranks, decreasing = TRUE)
 
       nes_value <- round(gsea_row$NES, 2)
@@ -118,7 +120,7 @@ plotGSEAenrichment <- function(GSEA_results, DEGList, gene_sets, widthTitle = 24
         directions <- as.numeric(gs[[2]])
         ranks_adjusted <- ranks
         idx <- which(names(ranks_adjusted) %in% gs_genes)
-        lookup <- setNames(directions, gs_genes)
+        lookup <- stats::setNames(directions, gs_genes)
         ranks_adjusted[idx] <- ranks_adjusted[idx] * lookup[names(ranks_adjusted)[idx]]
 
         plot <- fgsea::plotEnrichment(gs_genes, sort(ranks_adjusted, decreasing = TRUE))
diff --git a/R/plotNESlollipop.R b/R/plotNESlollipop.R
index 8f30d75..16a0500 100644
--- a/R/plotNESlollipop.R
+++ b/R/plotNESlollipop.R
@@ -95,7 +95,7 @@
 #'
 #' @import ggplot2
 #' @importFrom ggpubr annotate_figure ggarrange
-#' @import grid
+#' @importFrom grid textGrob gpar
 #' @export
 plotNESlollipop <- function(GSEA_results,
                             signif_color = "red", nonsignif_color = "white",
@@ -121,15 +121,19 @@ plotNESlollipop <- function(GSEA_results,
     }
 
     # Ensure contrast ordering
-    res$pathway <- sapply(res$pathway, function(x) wrap_title(x, widthlabels))
+    # res$pathway <- sapply(res$pathway, function(x) wrap_title(x, widthlabels))
+    res$pathway <- vapply(res$pathway,
+                          function(x) wrap_title(x, widthlabels),
+                          FUN.VALUE = character(1))
+    
     res$pathway <- factor(res$pathway, levels = res$pathway[order(res$NES)])
 
-    plot <- ggplot2::ggplot(res, ggplot2::aes(x = NES, y = pathway, fill = -log10(padj))) +
+    plot <- ggplot2::ggplot(res, ggplot2::aes(x = .data$NES, y = .data$pathway, fill = -log10(.data$padj))) +
 
       # Add a condition for dashed lines and points for B statistic and negative NES
-      ggplot2::geom_segment(ggplot2::aes(yend = pathway,
+      ggplot2::geom_segment(ggplot2::aes(yend = .data$pathway,
                                          xend = 0,
-                                         linetype = ifelse(stat_used == "B" & NES < 0,
+                                         linetype = ifelse(.data$stat_used == "B" & .data$NES < 0,
                                                            "dashed", "solid")),
                             size = .5,
                             color = ifelse(res$stat_used == "B" & res$NES < 0,
@@ -138,7 +142,7 @@ plotNESlollipop <- function(GSEA_results,
         shape = 21,
         stroke = 1.2,
         size = 4,
-        ggplot2::aes(color = ifelse(stat_used == "B" & NES < 0,
+        ggplot2::aes(color = ifelse(.data$stat_used == "B" & .data$NES < 0,
                                     "darkgrey", "black")) # Points color for B and negative NES
       ) +
 
diff --git a/R/plotPCA.R b/R/plotPCA.R
index 3ce34b0..c816788 100644
--- a/R/plotPCA.R
+++ b/R/plotPCA.R
@@ -118,13 +118,14 @@ plotPCA <- function(data, metadata=NULL, genes=NULL, scale=FALSE, center=TRUE,
 
   if (nPCs > ncol(PCAcounts)) stop("Error: Number of genes too low for number of chosen PCs. Please reduce number of PCs.")
 
-  PCAcounts <-  cbind(PCAcounts[,1:nPCs],y$samples)
-
+  #PCAcounts <-  cbind(PCAcounts[,1:nPCs],y$samples)
+  PCAcounts <- cbind(PCAcounts[, seq_len(nPCs), drop = FALSE], y$samples)
+  
   pltList <- list()
 
   for (pc in PCs){
     pc <- unlist(pc)
-    ev = PCAdata$sdev^2
+    ev <-  PCAdata$sdev^2
     pc_x <- round(100*ev[pc[1]]/sum(ev),2)
     pc_y <- round(100*ev[pc[2]]/sum(ev),2)
 
diff --git a/R/plotVolcano.R b/R/plotVolcano.R
index 609cc49..c8da073 100644
--- a/R/plotVolcano.R
+++ b/R/plotVolcano.R
@@ -262,7 +262,7 @@ plotVolcano <- function(DEResultsList, genes = NULL, N = NULL,
 
           # Add to plot
           p <- p + ggplot2::geom_point(data = plot_data,
-                                       aes(color = Direction),
+                                       aes(color = .data$Direction),
                                        size = pointSize,
                                        alpha = 0.8) +
             ggplot2::scale_color_manual(values = c(
@@ -281,7 +281,11 @@ plotVolcano <- function(DEResultsList, genes = NULL, N = NULL,
       ## Annotate top N genes if requested
       if (!is.null(N)) {
         genes_stat <- fit[order(fit[, x], decreasing = TRUE), ]
-        annotationgenes <- row.names(genes_stat)[c(1:N, (nrow(genes_stat) - N + 1):nrow(genes_stat))]
+        # annotationgenes <- row.names(genes_stat)[c(1:N, (nrow(genes_stat) - N + 1):nrow(genes_stat))]
+        annotationgenes <- row.names(genes_stat)[
+          c(seq_len(N), seq_len(N) + nrow(genes_stat) - N)
+        ]
+        
         p <- p +
           ggplot2::geom_point(data = fit[annotationgenes, ],
                               color = highlightcolor, size = pointSize) +
diff --git a/R/runGSEA.R b/R/runGSEA.R
index 38ab412..d80279c 100644
--- a/R/runGSEA.R
+++ b/R/runGSEA.R
@@ -28,7 +28,13 @@
 #'   number of contrasts tested per signature and provides more stringent
 #'   control of false discovery rate across multiple comparisons. If `FALSE`,
 #'   the function only corrects for the number of gene sets.
-#'
+#'   
+#' @param p.adjust.method Character string specifying the method to use for
+#'   multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+#'   default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+#'   \code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+#'   Passed to \code{\link[stats]{p.adjust}}. 
+#'   
 #' @param nPermSimple Number of permutations in the simple fgsea implementation
 #'   for preliminary estimation of P-values. Parameter from fgsea.
 #'
@@ -57,7 +63,7 @@
 #'
 #' @importFrom fgsea fgsea
 #' @export
-runGSEA <- function(DEGList, gene_sets, stat = NULL, ContrastCorrection=FALSE, nPermSimple=10000) {
+runGSEA <- function(DEGList, gene_sets, stat = NULL, ContrastCorrection=FALSE, nPermSimple=10000, p.adjust.method="BH") {
 
   # Initialize storage for results across contrasts
   results_by_contrast <- list()
@@ -80,7 +86,7 @@ runGSEA <- function(DEGList, gene_sets, stat = NULL, ContrastCorrection=FALSE, n
       }
 
       # Create the ranking vector for GSEA
-      ranks <- setNames(deg_df[[current_stat]], rownames(deg_df))
+      ranks <- stats::setNames(deg_df[[current_stat]], rownames(deg_df))
 
       if (current_stat=="t") {
 
@@ -150,7 +156,7 @@ runGSEA <- function(DEGList, gene_sets, stat = NULL, ContrastCorrection=FALSE, n
     combined_df <- do.call(rbind, Map(cbind, results_by_contrast, df_name = names(results_by_contrast)))
 
     # Step 2: Adjust p-values across all data
-    combined_df$padj <- p.adjust(combined_df$pval, method = "BH")
+    combined_df$padj <- stats::p.adjust(combined_df$pval,  method = p.adjust.method)
 
     # Step 3: Split back into the original list structure
     list_of_dfs <- split(combined_df, combined_df$df_name)
@@ -163,7 +169,7 @@ runGSEA <- function(DEGList, gene_sets, stat = NULL, ContrastCorrection=FALSE, n
 
     # Step 1: Adjust p-values for each data frame individually
     results_by_contrast <- lapply(results_by_contrast, function(df) {
-      df$padj <- p.adjust(df$pval, method = "BH")  # Adjust p-values per data frame
+      df$padj <- stats::p.adjust(df$pval,  method = p.adjust.method)  # Adjust p-values per data frame
       return(df)
     })
 
diff --git a/R/ssGSEA_alternative.R b/R/ssGSEA_alternative.R
index f76acf7..c5642a3 100644
--- a/R/ssGSEA_alternative.R
+++ b/R/ssGSEA_alternative.R
@@ -79,52 +79,55 @@ ssGSEA_alternative <- function(X,
                                scale = TRUE,
                                norm = FALSE,
                                single = TRUE) {
-  row_names = rownames(X)
-  num_genes = nrow(X)
-  gene_sets = lapply(gene_sets, function(genes) { which(row_names %in% genes) })
+  row_names <-  rownames(X)
+  num_genes <-  nrow(X)
+  gene_sets <-  lapply(gene_sets, function(genes) { which(row_names %in% genes) })
 
   # Ranks for genes
-  R = colRanking(X, ties.method = 'average')
+  R <-  colRanking(X, ties.method = 'average')
 
   # Calculate enrichment score (es) for each sample (column)
-  es = apply(R, 2, function(R_col) {
-    gene_ranks = order(R_col, decreasing = TRUE)
-
-    # Calc es for each gene set
-    es_sample = sapply(gene_sets, function(gene_set_idx) {
-      # pos: match (within the gene set)
-      # neg: non-match (outside the gene set)
-      indicator_pos = gene_ranks %in% gene_set_idx
-      indicator_neg = !indicator_pos
-
-      rank_alpha  = (R_col[gene_ranks] * indicator_pos) ^ alpha
-
-      step_cdf_pos = cumsum(rank_alpha)    / sum(rank_alpha)
-      step_cdf_neg = cumsum(indicator_neg) / sum(indicator_neg)
-
-      step_cdf_diff = step_cdf_pos - step_cdf_neg
-
-      # Normalize by gene number
-      if (scale) step_cdf_diff = step_cdf_diff / num_genes
-
-      # Use ssGSEA or not
-      if (single) {
-        sum(step_cdf_diff)
-      } else {
-        step_cdf_diff[which.max(abs(step_cdf_diff))]
-      }
-    })
+  es <-  apply(R, 2, function(R_col) {
+    gene_ranks <-  order(R_col, decreasing = TRUE)
+ 
+    
+    es_sample <- vapply(
+      gene_sets,
+      function(gene_set_idx) {
+        # pos: match (within the gene set)
+        # neg: non-match (outside the gene set)
+        indicator_pos <- gene_ranks %in% gene_set_idx
+        indicator_neg <- !indicator_pos
+        
+        rank_alpha <- (R_col[gene_ranks] * indicator_pos) ^ alpha
+        
+        step_cdf_pos <- cumsum(rank_alpha)    / sum(rank_alpha)
+        step_cdf_neg <- cumsum(indicator_neg) / sum(indicator_neg)
+        
+        step_cdf_diff <- step_cdf_pos - step_cdf_neg
+        
+        if (scale) step_cdf_diff <- step_cdf_diff / num_genes
+        
+        if (single) {
+          sum(step_cdf_diff)
+        } else {
+          step_cdf_diff[which.max(abs(step_cdf_diff))]
+        }
+      },
+      numeric(1)  # <- expected output per iteration
+    )
+    
     unlist(es_sample)
-  })
+  }) 
 
-  if (length(gene_sets) == 1) es = matrix(es, nrow = 1)
+  if (length(gene_sets) == 1) es <-  matrix(es, nrow = 1)
 
   # Normalize by absolute diff between max and min
-  if (norm) es = es / diff(range(es))
+  if (norm) es <-  es / diff(range(es))
 
   # Prepare output
-  rownames(es) = names(gene_sets)
-  colnames(es) = colnames(X)
+  rownames(es) <-  names(gene_sets)
+  colnames(es) <-  colnames(X)
   return(es)
 }
 
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..d34382c
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,13 @@
+.onAttach <- function(libname, pkgname) {
+  gg_version <- tryCatch(
+    utils::packageVersion("ggplot2"),
+    error = function(e) NULL
+  )
+  
+  if (!is.null(gg_version) && gg_version > "3.5.2") {
+    packageStartupMessage(
+      "Warning: markeR has been tested with ggplot2 <= 3.5.2. ",
+      "Using newer versions may cause incompatibilities."
+    )
+  }
+}
diff --git a/README.Rmd b/README.Rmd
index 032ecd7..da4d6b3 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -17,22 +17,23 @@ knitr::opts_chunk$set(
 
 <!-- badges: start -->
 
-![](https://img.shields.io/badge/status-development-yellowgreen)
+<!--![](https://img.shields.io/badge/status-development-yellowgreen)-->
 [![Pkgdown](https://img.shields.io/badge/docs-pkgdown-blue.svg)](https://diseasetranscriptomicslab.github.io/markeR/)
-[![Minimal R Version](https://img.shields.io/badge/min%20R-4.4.0-blue.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/Rminversion.yaml)
+![Minimal R Version](https://img.shields.io/badge/min%20R-4.5.0-blue.svg)
 [![codecov](https://codecov.io/gh/DiseaseTranscriptomicsLab/markeR/graph/badge.svg?token=7T1I4JCJG6)](https://codecov.io/gh/DiseaseTranscriptomicsLab/markeR)
 <!-- [![R-CMD-check](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/R-CMD-check.yaml)-->
 <!-- [![Bioconductor Check](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/bioc-check.yml/badge.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/bioc-check.yml) -->
 
 <!-- badges: end -->
 
-**markeR** provides a suite of methods for using gene sets (signatures) to quantify and evaluate the extent to which a given gene signature marks a specific phenotype from gene expression data. The package implements various scoring, enrichment and classification approaches, along with tools to compute performance metrics and visualize results.
+**`markeR`** is an R package that provides a modular and extensible framework for the systematic evaluation of gene sets as phenotypic markers using transcriptomic data. The package is designed to support both quantitative analyses and visual exploration of gene set behaviour across experimental and clinical phenotypes.
 
-> **To cite markeR please use:** 
+> **To cite `markeR` please use:** 
 >
->   Martins-Silva R, Kaizeler A, Barbosa-Morais N (2025). _markeR: an R Toolkit for Evaluating Gene Sets as Phenotypic Markers_. Gulbenkian Institute for Molecular Medicine, Faculdade de Medicina, Universidade de Lisboa, Lisbon, Portugal. R package version 0.99.2, https://github.com/DiseaseTranscriptomicsLab/markeR.
+> Martins-Silva R, Kaizeler A, Barbosa-Morais NL (2025). _markeR: An R Toolkit for Evaluating Gene Signatures as Phenotypic Markers_. doi:10.18129/B9.bioc.markeR, R package version 1.3, https://bioconductor.org/packages/markeR.
  
-`inst/Paper/` — Contains all scripts and materials used in the original markeR paper to reproduce analyses and figures.
+The folder `inst/Paper/` is in the **paper** branch and contains all scripts and materials used in the original `markeR` paper to reproduce analyses and figures.  You can browse it [here](https://github.com/DiseaseTranscriptomicsLab/markeR/tree/paper/inst/Paper).
+
 
 ![](man/figures/Workflow.png)
 
@@ -51,12 +52,25 @@ knitr::opts_chunk$set(
   - [4. Visualisation and Evaluation](#4-visualisation-and-evaluation)  
   - [5. Individual Gene Exploration (Optional)](#5-individual-gene-exploration-optional)  
   - [6. Compare with Reference Gene Sets (Optional)](#6-compare-with-reference-gene-sets-optional)  
+- [Python Bridge](#python-bridge)
 - [Contact](#contact)
 
 
 ## Installation 
 
-The latest development release of markeR from [GitHub](https://github.com/) can be installed with:
+
+Install the latest release from Bioconductor:
+
+```{r, eval=FALSE}
+# Install from Bioconductor
+if (!requireNamespace("BiocManager", quietly = TRUE))
+    install.packages("BiocManager")
+BiocManager::install("markeR")
+library(markeR)
+```
+ 
+
+Or install the latest development release of `markeR` from [GitHub](https://github.com/) with:
    
 ``` r
 # install.packages("devtools")
@@ -67,25 +81,17 @@ devtools::install_github("DiseaseTranscriptomicsLab/markeR@*release")
 
 The following tutorials are available:
 
+* [Introduction to markeR][tutorial-introduction]
 * [Benchmarking Mode][tutorial-benchmarking]
 * [Discovery Mode][tutorial-discovery]
 * [Signature Similarity][tutorial-signaturesimilarity]
 
 ## Requirements
 
-This package is officially supported on (based on a GitHub Actions workflow that tests against multiple `R` versions):
-
-- `R` `4.4.x`
-- `R` `4.5.x` 
-
-However, due to `Bioconductor` submission requirements, `R` version `4.5.0` will be listed as required upon package installation in future releases.
-
-⚠️ Compatibility with older R versions depends on the specific versions of dependencies installed. Older versions of `R` (including `R` `3.5.x`, `3.6.x`, `4.0.x`, `4.1.x`, `4.2.x`, and `4.3.x`) may work, but are not officially supported due to upstream dependency constraints. In some cases, installing older versions of dependencies (e.g., via `renv`, `CRAN` snapshots, or `checkpoint`) can restore compatibility. 
+This package is officially supported on `R > 4.5.0`. ⚠️ Older versions of `R` may work, but are not officially supported due to upstream dependency constraints. In some cases, installing older versions of dependencies (e.g., via `renv`, `CRAN` snapshots, or `checkpoint`) can restore compatibility. 
 
 ## Common Workflow
 
-`markeR` provides a modular pipeline to quantify transcriptomic signatures and assess their association with phenotypic or clinical variables. The typical workflow includes the following steps:
-
 ### 1. Input Requirements 
 
 Depending on the analysis mode, inputs vary slightly.
@@ -122,7 +128,10 @@ gene_sets
 ```
   
 * **Expression Data Frame**:  
-  A filtered and normalised gene expression data frame (genes × samples). Row names must be gene identifiers, and column names must match the sample IDs in the metadata.
+  A filtered and normalised, non log-transformed, gene expression matrix (genes × samples). Row names must be gene identifiers; column names must match sample IDs in the metadata. 
+  
+  **Warning:** If you are using microarray data or outputs from common RNA-seq pipelines (*e.g.*, edgeR), note that the expression values may already be log2-normalised. The input to `markeR` must necessarily be **non-log-transformed**. If your data are log2-transformed, you can revert them by applying `2^data`.
+
 
 ```{r example-expression-matrix, echo=FALSE}
 # Simulate expression matrix: 10 genes × 5 samples
@@ -138,7 +147,7 @@ head(expr_df)
 ```
 
 * **Sample Metadata**:  
-  A data frame with annotations for each sample, with the sample ID in the first column. The row names must match the column names of the expression matrix.
+  A data frame with samples as rows and annotations as columns. The first column should contain sample IDs matching the expression matrix column names.
 
 ```{r example-metadata, echo=FALSE}
 # Simulate sample metadata
@@ -155,78 +164,88 @@ metadata
 
 ### 2. Select Mode of Analysis
 
-* **Discovery Mode**:
-  Explore how a single, well-characterised gene set relates to a specific variable of interest. Suitable for hypothesis generation or signature projection.
+`markeR` provides two modes of operation:
 
-* **Benchmarking Mode**:
-  Evaluate one or more gene sets against multiple metadata variables using a standardised scoring and effect size framework. This mode provides comprehensive visualisations and comparisons across methods.
+* **Benchmarking**:
+evaluates gene sets' performance in marking a metadata variable, *i.e.*, a phenotype, returning comparative visualisations across scoring and enrichment methods.
+
+* **Discovery**:
+examines the relationship between a gene set and one or more variables of interest, suitable for exploratory or hypothesis-generating analyses. 
 
 ### 3. Choose a Quantification Approach
 
-`markeR` supports two complementary strategies for quantifying the association between gene sets and phenotypes:
+Two complementary strategies are implemented for quantifying associations between gene sets and phenotypes:
 
 #### 3.1 Score-Based Approach
 
-This strategy generates a **single numeric score per sample**, reflecting the activity of a gene set. It enables flexible downstream analyses, including comparisons across phenotypic groups.
-
-Three scoring methods are available:
 
-* **Log2-median**: Calculates the median log2 expression of the genes in the set. Sensitive to absolute shifts in expression.
+A score summarising the collective expression of a gene set therein is assigned **to each sample**. Scores can be visualised using built-in functions, or used directly in downstream analyses (*e.g.*, comparisons between phenotypic groups of samples, correlations with numerical phenotypes). 
 
-* **Ranking**: Ranks all genes within each sample and averages the ranks of gene set members. Captures relative ordering rather than magnitude.
+Available methods:
 
-* **ssGSEA**: Computes a single-sample gene set enrichment score using the ssGSEA algorithm. Reflects the coordinated up- or down-regulation of the set in each sample.
+* **Log2-median**: mean of the across-sample normalised log2 median-centred expression levels of the genes in the set; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
 
-These methods vary in assumptions and sensitivity. Robust gene sets are expected to perform consistently across all three.
+* **Ranking**: mean expression rank of gene set members in each sample; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset, and normalised by the number of genes in the set. 
 
-#### 3.2 Enrichment-Based Approach
+* **ssGSEA**: single-sample gene set enrichment score using ssGSEA; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
 
-This approach uses a classical **gene set enrichment analysis (GSEA)** framework to evaluate whether the gene set is significantly overrepresented at the top or bottom of a ranked list of genes (e.g., ranked by fold change or correlation with phenotype).
+Gene sets that are robust phenotypic markers are expected to yield consistently high scores across methods.
 
-* **GSEA**: Computes a Normalised Enrichment Score (NES) for each contrast or variable of interest, adjusting for gene set size and multiple testing.
+#### 3.2 Enrichment-Based Approach
 
-Use this approach when interested in collective behaviour of gene sets in relation to ranked differential signals.
+Enrichment-based methods implement **Gene Set Enrichment Analysis (GSEA)**. Genes are ranked according to differential expression statistics, and a Normalised Enrichment Score (NES) per variable of interest is computed, accompanied by a p-value adjusted for multiple hypothesis testing.
 
 ### 4. Visualisation and Evaluation
 
-In **Benchmarking Mode**, `markeR` offers a range of visual summaries:
+In **Benchmarking Mode**, `markeR` offers a range of visual summaries: 
 
-* Violin or scatter plots showing score distributions by phenotype
-* Volcano plots and heatmaps based on effect sizes (Cohen’s *d* or *f*)
-* ROC curves and AUC values 
-* Null distribution testing using random gene sets matched for size and directionality
+* Violin plots of score distributions by categorical phenotype;
+* Scatter plots of association between scores and numerical phenotypes;
+* Volcano plots and heatmaps of scores or differential gene set expression based on effect sizes (Cohen’s *d* or *f*);
+* ROC curves and respective AUC values of gene sets' phenotypic classification performance;
+* Violin plots of effect size distributions (Cohen’s *d*) for pairwise group differences in scores, for original and simulated gene sets;
+* Plots summarising NES alongside adjusted p-values (*e.g.*, lollipop plots); 
+* GSEA plots showing running enrichment scores across ranked gene lists.
 
-In **Discovery Mode**, the output focuses on a single gene set:
 
-* Score distributions by phenotype
-* Pairwise contrasts (Cohen’s *d*) and overall effect sizes (Cohen’s *f*)
-* Enrichment score summaries (NES) with adjusted p-values (e.g., lollipop plots)
+In **Discovery Mode**, the output focuses on a single gene set:
 
-Benchmarking mode offers the most comprehensive set of features and allows users to seamlessly move from discovery to benchmarking mode once a variable of interest has been identified and further testing is required. The main difference from Discovery mode is that Benchmarking is designed to evaluate multiple gene sets simultaneously, whereas Discovery mode focuses on quantifying a single, robust gene set.
+* Score distributions stratified by variable;
+* Effect sizes for pairwise and multiple-group differences (Cohen's *d* and *f*, respectively);
+* Cross-variable summaries of NES and adjusted p-values (*e.g.*, lollipop plots).
 
-### 5. Individual Gene Exploration (Optional)
+The Benchmarking Mode offers the most comprehensive set of features. Users are allowed to seamlessly move from Discovery to Benchmarking once a variable of interest has been identified and further testing is required. Benchmarking is designed to evaluate multiple gene sets simultaneously, whereas Discovery focuses on the performance of a single gene set.
 
-To better understand the contribution of individual genes within a gene set and identify whether specific genes drive the overall signal, `markeR` offers a suite of gene-level exploratory analyses, including:
+### 5. Individual Gene Exploration  
+   
+To better understand the contribution of individual genes within a gene set, and identify whether specific genes drive the set's collective signal,  `markeR` provides `VisualiseIndividualGenes.` Available options include:
 
-* Expression heatmaps of genes across samples and groups
-* Violin plots showing expression distributions of individual genes
-* Correlation heatmaps to reveal co-expression patterns among genes in the set
-* ROC curves and AUC values for individual genes to evaluate their discriminatory power
-* Effect size calculations (Cohen’s *d*) per gene to quantify differential expression
-* Principal Component Analysis (PCA) on gene set genes to assess variance explained and sample clustering
+* Expression heatmaps of genes across samples or groups of samples;
+* Violin plots showing cross-sample expression distributions of individual genes; 
+* Heatmaps of pairwise cross-sample expression correlation between genes in the set;
+* ROC curves and AUC values to evaluate single genes' performance as phenotypic markers;
+* Effect size estimation (Cohen’s *d*) of expression differences between groups of samples;
+* Principal Component Analysis (PCA) of expression of genes in the set, to evaluate which genes dominate collective variance and how samples separate according to the gene set's expression.
 
-### 6. Compare with Reference Gene Sets (Optional)
+### 6. Compare with Reference Gene Sets  
 
-`markeR` allows comparison of user-defined gene sets to reference sets (e.g., from MSigDB) using:
+`markeR` also supports comparison of user-defined gene sets against reference collections (e.g., MSigDB). Two complementary similarity metrics are implemented:
 
 * **Jaccard Index**:
-  Measures gene overlap relative to union size.
+the ratio of the number of genes in common over the total number of genes in the two sets.
 
-* **Log Odds Ratio (logOR)**:
-  Computes enrichment using a user-defined gene universe and Fisher’s exact test.
+* **Log Odds Ratio (logOR)** from Fisher’s exact test of association between gene sets, given a specified gene universe.
 
-Filters can be applied based on similarity thresholds (e.g., minimum Jaccard, OR, or p-value).
- 
+Filters can be applied based on similarity thresholds (e.g., minimum Jaccard, OR, or Fisher's test p-value).
+
+## Python Bridge
+
+For users who prefer Python, a lightweight bridge is available in
+`python/` that allows calling any `markeR` function from a Python
+environment via [`rpy2`](https://rpy2.github.io/). It includes a tutorial
+workflow script and a generic command-line wrapper. See
+[`python/README.md`](inst/python/README.md) for installation
+instructions and usage examples.
 
 
 ## Contact 
@@ -236,7 +255,7 @@ Filters can be applied based on similarity thresholds (e.g., minimum Jaccard, OR
 **Rita Martins-Silva**  
 Email: [rita.silva@medicina.ulisboa.pt](mailto:rita.silva@medicina.ulisboa.pt)
 
- 
-[tutorial-benchmarking]: https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_BenchmarkingMode.html
-[tutorial-discovery]: https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_DiscoveryMode.html
-[tutorial-signaturesimilarity]: https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_GeneSetSimilarity.html
+[tutorial-introduction]: https://diseasetranscriptomicslab.github.io/markeR/articles/markeR.html
+[tutorial-benchmarking]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_BenchmarkingMode.html
+[tutorial-discovery]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_DiscoveryMode.html
+[tutorial-signaturesimilarity]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_GeneSetSimilarity.html
diff --git a/README.md b/README.md
index a7266d4..af60464 100644
--- a/README.md
+++ b/README.md
@@ -5,32 +5,34 @@
 
 <!-- badges: start -->
 
-![](https://img.shields.io/badge/status-development-yellowgreen)
+<!--![](https://img.shields.io/badge/status-development-yellowgreen)-->
+
 [![Pkgdown](https://img.shields.io/badge/docs-pkgdown-blue.svg)](https://diseasetranscriptomicslab.github.io/markeR/)
-[![Minimal R
-Version](https://img.shields.io/badge/min%20R-4.4.0-blue.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/Rminversion.yaml)
+![Minimal R
+Version](https://img.shields.io/badge/min%20R-4.5.0-blue.svg)
 [![codecov](https://codecov.io/gh/DiseaseTranscriptomicsLab/markeR/graph/badge.svg?token=7T1I4JCJG6)](https://codecov.io/gh/DiseaseTranscriptomicsLab/markeR)
 <!-- [![R-CMD-check](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/R-CMD-check.yaml)-->
 <!-- [![Bioconductor Check](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/bioc-check.yml/badge.svg)](https://github.com/DiseaseTranscriptomicsLab/markeR/actions/workflows/bioc-check.yml) -->
 
 <!-- badges: end -->
 
-**markeR** provides a suite of methods for using gene sets (signatures)
-to quantify and evaluate the extent to which a given gene signature
-marks a specific phenotype from gene expression data. The package
-implements various scoring, enrichment and classification approaches,
-along with tools to compute performance metrics and visualize results.
+**`markeR`** is an R package that provides a modular and extensible
+framework for the systematic evaluation of gene sets as phenotypic
+markers using transcriptomic data. The package is designed to support
+both quantitative analyses and visual exploration of gene set behaviour
+across experimental and clinical phenotypes.
 
-> **To cite markeR please use:**
+> **To cite `markeR` please use:**
 >
-> Martins-Silva R, Kaizeler A, Barbosa-Morais N (2025). *markeR: an R
-> Toolkit for Evaluating Gene Sets as Phenotypic Markers*. Gulbenkian
-> Institute for Molecular Medicine, Faculdade de Medicina, Universidade
-> de Lisboa, Lisbon, Portugal. R package version 0.99.2,
-> <https://github.com/DiseaseTranscriptomicsLab/markeR>.
+> Martins-Silva R, Kaizeler A, Barbosa-Morais NL (2025). *markeR: An R
+> Toolkit for Evaluating Gene Signatures as Phenotypic Markers*.
+> <doi:10.18129/B9.bioc.markeR>, R package version 1.3,
+> <https://bioconductor.org/packages/markeR>.
 
-`inst/Paper/` — Contains all scripts and materials used in the original
-markeR paper to reproduce analyses and figures.
+The folder `inst/Paper/` is in the **paper** branch and contains all
+scripts and materials used in the original `markeR` paper to reproduce
+analyses and figures. You can browse it
+[here](https://github.com/DiseaseTranscriptomicsLab/markeR/tree/paper/inst/Paper).
 
 ![](man/figures/Workflow.png)
 
@@ -51,12 +53,23 @@ markeR paper to reproduce analyses and figures.
     (Optional)](#5-individual-gene-exploration-optional)  
   - [6. Compare with Reference Gene Sets
     (Optional)](#6-compare-with-reference-gene-sets-optional)  
+- [Python Bridge](#python-bridge)
 - [Contact](#contact)
 
 ## Installation
 
-The latest development release of markeR from
-[GitHub](https://github.com/) can be installed with:
+Install the latest release from Bioconductor:
+
+``` r
+# Install from Bioconductor
+if (!requireNamespace("BiocManager", quietly = TRUE))
+    install.packages("BiocManager")
+BiocManager::install("markeR")
+library(markeR)
+```
+
+Or install the latest development release of `markeR` from
+[GitHub](https://github.com/) with:
 
 ``` r
 # install.packages("devtools")
@@ -67,38 +80,25 @@ devtools::install_github("DiseaseTranscriptomicsLab/markeR@*release")
 
 The following tutorials are available:
 
+- [Introduction to
+  markeR](https://diseasetranscriptomicslab.github.io/markeR/articles/markeR.html)
 - [Benchmarking
-  Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_BenchmarkingMode.html)
+  Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_BenchmarkingMode.html)
 - [Discovery
-  Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_DiscoveryMode.html)
+  Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_DiscoveryMode.html)
 - [Signature
-  Similarity](https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_GeneSetSimilarity.html)
+  Similarity](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_GeneSetSimilarity.html)
 
 ## Requirements
 
-This package is officially supported on (based on a GitHub Actions
-workflow that tests against multiple `R` versions):
-
-- `R` `4.4.x`
-- `R` `4.5.x`
-
-However, due to `Bioconductor` submission requirements, `R` version
-`4.5.0` will be listed as required upon package installation in future
-releases.
-
-⚠️ Compatibility with older R versions depends on the specific versions
-of dependencies installed. Older versions of `R` (including `R` `3.5.x`,
-`3.6.x`, `4.0.x`, `4.1.x`, `4.2.x`, and `4.3.x`) may work, but are not
-officially supported due to upstream dependency constraints. In some
-cases, installing older versions of dependencies (e.g., via `renv`,
-`CRAN` snapshots, or `checkpoint`) can restore compatibility.
+This package is officially supported on `R > 4.5.0`. ⚠️ Older versions
+of `R` may work, but are not officially supported due to upstream
+dependency constraints. In some cases, installing older versions of
+dependencies (e.g., via `renv`, `CRAN` snapshots, or `checkpoint`) can
+restore compatibility.
 
 ## Common Workflow
 
-`markeR` provides a modular pipeline to quantify transcriptomic
-signatures and assess their association with phenotypic or clinical
-variables. The typical workflow includes the following steps:
-
 ### 1. Input Requirements
 
 Depending on the analysis mode, inputs vary slightly.
@@ -128,9 +128,15 @@ gene_sets
 ```
 
 - **Expression Data Frame**:  
-  A filtered and normalised gene expression data frame (genes ×
-  samples). Row names must be gene identifiers, and column names must
-  match the sample IDs in the metadata.
+  A filtered and normalised, non log-transformed, gene expression matrix
+  (genes × samples). Row names must be gene identifiers; column names
+  must match sample IDs in the metadata.
+
+  **Warning:** If you are using microarray data or outputs from common
+  RNA-seq pipelines (*e.g.*, edgeR), note that the expression values may
+  already be log2-normalised. The input to `markeR` must necessarily be
+  **non-log-transformed**. If your data are log2-transformed, you can
+  revert them by applying `2^data`.
 
 ``` r
 head(expr_df)
@@ -144,9 +150,9 @@ head(expr_df)
 ```
 
 - **Sample Metadata**:  
-  A data frame with annotations for each sample, with the sample ID in
-  the first column. The row names must match the column names of the
-  expression matrix.
+  A data frame with samples as rows and annotations as columns. The
+  first column should contain sample IDs matching the expression matrix
+  column names.
 
 ``` r
 metadata
@@ -160,109 +166,133 @@ metadata
 
 ### 2. Select Mode of Analysis
 
-- **Discovery Mode**: Explore how a single, well-characterised gene set
-  relates to a specific variable of interest. Suitable for hypothesis
-  generation or signature projection.
+`markeR` provides two modes of operation:
+
+- **Benchmarking**: evaluates gene sets’ performance in marking a
+  metadata variable, *i.e.*, a phenotype, returning comparative
+  visualisations across scoring and enrichment methods.
 
-- **Benchmarking Mode**: Evaluate one or more gene sets against multiple
-  metadata variables using a standardised scoring and effect size
-  framework. This mode provides comprehensive visualisations and
-  comparisons across methods.
+- **Discovery**: examines the relationship between a gene set and one or
+  more variables of interest, suitable for exploratory or
+  hypothesis-generating analyses.
 
 ### 3. Choose a Quantification Approach
 
-`markeR` supports two complementary strategies for quantifying the
-association between gene sets and phenotypes:
+Two complementary strategies are implemented for quantifying
+associations between gene sets and phenotypes:
 
 #### 3.1 Score-Based Approach
 
-This strategy generates a **single numeric score per sample**,
-reflecting the activity of a gene set. It enables flexible downstream
-analyses, including comparisons across phenotypic groups.
+A score summarising the collective expression of a gene set therein is
+assigned **to each sample**. Scores can be visualised using built-in
+functions, or used directly in downstream analyses (*e.g.*, comparisons
+between phenotypic groups of samples, correlations with numerical
+phenotypes).
 
-Three scoring methods are available:
+Available methods:
 
-- **Log2-median**: Calculates the median log2 expression of the genes in
-  the set. Sensitive to absolute shifts in expression.
+- **Log2-median**: mean of the across-sample normalised log2
+  median-centred expression levels of the genes in the set; for
+  bidirectional gene sets, the sample score is the partial score for the
+  subset of putatively upregulated genes minus that of the downregulated
+  subset.
 
-- **Ranking**: Ranks all genes within each sample and averages the ranks
-  of gene set members. Captures relative ordering rather than magnitude.
+- **Ranking**: mean expression rank of gene set members in each sample;
+  for bidirectional gene sets, the sample score is the partial score for
+  the subset of putatively upregulated genes minus that of the
+  downregulated subset, and normalised by the number of genes in the
+  set.
 
-- **ssGSEA**: Computes a single-sample gene set enrichment score using
-  the ssGSEA algorithm. Reflects the coordinated up- or down-regulation
-  of the set in each sample.
+- **ssGSEA**: single-sample gene set enrichment score using ssGSEA; for
+  bidirectional gene sets, the sample score is the partial score for the
+  subset of putatively upregulated genes minus that of the downregulated
+  subset.
 
-These methods vary in assumptions and sensitivity. Robust gene sets are
-expected to perform consistently across all three.
+Gene sets that are robust phenotypic markers are expected to yield
+consistently high scores across methods.
 
 #### 3.2 Enrichment-Based Approach
 
-This approach uses a classical **gene set enrichment analysis (GSEA)**
-framework to evaluate whether the gene set is significantly
-overrepresented at the top or bottom of a ranked list of genes (e.g.,
-ranked by fold change or correlation with phenotype).
-
-- **GSEA**: Computes a Normalised Enrichment Score (NES) for each
-  contrast or variable of interest, adjusting for gene set size and
-  multiple testing.
-
-Use this approach when interested in collective behaviour of gene sets
-in relation to ranked differential signals.
+Enrichment-based methods implement **Gene Set Enrichment Analysis
+(GSEA)**. Genes are ranked according to differential expression
+statistics, and a Normalised Enrichment Score (NES) per variable of
+interest is computed, accompanied by a p-value adjusted for multiple
+hypothesis testing.
 
 ### 4. Visualisation and Evaluation
 
 In **Benchmarking Mode**, `markeR` offers a range of visual summaries:
 
-- Violin or scatter plots showing score distributions by phenotype
-- Volcano plots and heatmaps based on effect sizes (Cohen’s *d* or *f*)
-- ROC curves and AUC values
-- Null distribution testing using random gene sets matched for size and
-  directionality
+- Violin plots of score distributions by categorical phenotype;
+- Scatter plots of association between scores and numerical phenotypes;
+- Volcano plots and heatmaps of scores or differential gene set
+  expression based on effect sizes (Cohen’s *d* or *f*);
+- ROC curves and respective AUC values of gene sets’ phenotypic
+  classification performance;
+- Violin plots of effect size distributions (Cohen’s *d*) for pairwise
+  group differences in scores, for original and simulated gene sets;
+- Plots summarising NES alongside adjusted p-values (*e.g.*, lollipop
+  plots);
+- GSEA plots showing running enrichment scores across ranked gene lists.
 
 In **Discovery Mode**, the output focuses on a single gene set:
 
-- Score distributions by phenotype
-- Pairwise contrasts (Cohen’s *d*) and overall effect sizes (Cohen’s
-  *f*)
-- Enrichment score summaries (NES) with adjusted p-values (e.g.,
-  lollipop plots)
+- Score distributions stratified by variable;
+- Effect sizes for pairwise and multiple-group differences (Cohen’s *d*
+  and *f*, respectively);
+- Cross-variable summaries of NES and adjusted p-values (*e.g.*,
+  lollipop plots).
 
-Benchmarking mode offers the most comprehensive set of features and
-allows users to seamlessly move from discovery to benchmarking mode once
+The Benchmarking Mode offers the most comprehensive set of features.
+Users are allowed to seamlessly move from Discovery to Benchmarking once
 a variable of interest has been identified and further testing is
-required. The main difference from Discovery mode is that Benchmarking
-is designed to evaluate multiple gene sets simultaneously, whereas
-Discovery mode focuses on quantifying a single, robust gene set.
+required. Benchmarking is designed to evaluate multiple gene sets
+simultaneously, whereas Discovery focuses on the performance of a single
+gene set.
 
-### 5. Individual Gene Exploration (Optional)
+### 5. Individual Gene Exploration
 
 To better understand the contribution of individual genes within a gene
-set and identify whether specific genes drive the overall signal,
-`markeR` offers a suite of gene-level exploratory analyses, including:
-
-- Expression heatmaps of genes across samples and groups
-- Violin plots showing expression distributions of individual genes
-- Correlation heatmaps to reveal co-expression patterns among genes in
-  the set
-- ROC curves and AUC values for individual genes to evaluate their
-  discriminatory power
-- Effect size calculations (Cohen’s *d*) per gene to quantify
-  differential expression
-- Principal Component Analysis (PCA) on gene set genes to assess
-  variance explained and sample clustering
+set, and identify whether specific genes drive the set’s collective
+signal, `markeR` provides `VisualiseIndividualGenes.` Available options
+include:
+
+- Expression heatmaps of genes across samples or groups of samples;
+- Violin plots showing cross-sample expression distributions of
+  individual genes;
+- Heatmaps of pairwise cross-sample expression correlation between genes
+  in the set;
+- ROC curves and AUC values to evaluate single genes’ performance as
+  phenotypic markers;
+- Effect size estimation (Cohen’s *d*) of expression differences between
+  groups of samples;
+- Principal Component Analysis (PCA) of expression of genes in the set,
+  to evaluate which genes dominate collective variance and how samples
+  separate according to the gene set’s expression.
+
+### 6. Compare with Reference Gene Sets
+
+`markeR` also supports comparison of user-defined gene sets against
+reference collections (e.g., MSigDB). Two complementary similarity
+metrics are implemented:
+
+- **Jaccard Index**: the ratio of the number of genes in common over the
+  total number of genes in the two sets.
+
+- **Log Odds Ratio (logOR)** from Fisher’s exact test of association
+  between gene sets, given a specified gene universe.
 
-### 6. Compare with Reference Gene Sets (Optional)
-
-`markeR` allows comparison of user-defined gene sets to reference sets
-(e.g., from MSigDB) using:
-
-- **Jaccard Index**: Measures gene overlap relative to union size.
+Filters can be applied based on similarity thresholds (e.g., minimum
+Jaccard, OR, or Fisher’s test p-value).
 
-- **Log Odds Ratio (logOR)**: Computes enrichment using a user-defined
-  gene universe and Fisher’s exact test.
+## Python Bridge
 
-Filters can be applied based on similarity thresholds (e.g., minimum
-Jaccard, OR, or p-value).
+For users who prefer Python, a lightweight bridge is available in
+`python/` that allows calling any `markeR` function from a Python
+environment via [`rpy2`](https://rpy2.github.io/). It includes a
+tutorial workflow script and a generic command-line wrapper. See
+[`python/README.md`](inst/python/README.md) for installation
+instructions and usage examples.
 
 ## Contact
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index b7206c0..2f0132b 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -11,7 +11,7 @@ badges: true
 home:
   sidebar:
       structure: [links, license, community, citation, authors, dev]
-
+  
 reference:
   - title: "Package Help Page"
     contents:
diff --git a/docs/authors.html b/docs/authors.html
index cd6f9f9..45a8ed8 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -7,9 +7,7 @@
 
     <a class="navbar-brand me-2" href="index.html">markeR</a>
 
-
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.9.3</small>
-
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.99.2</small>
 
 
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -17,12 +15,13 @@
     </button>
 
     <div id="navbar" class="collapse navbar-collapse ms-3">
-      <ul class="navbar-nav me-auto"><li class="nav-item"><a class="nav-link" href="reference/index.html">Reference</a></li>
+      <ul class="navbar-nav me-auto"><li class="nav-item"><a class="nav-link" href="articles/markeR.html">Get started</a></li>
+<li class="nav-item"><a class="nav-link" href="reference/index.html">Reference</a></li>
 <li class="nav-item dropdown">
   <button class="nav-link dropdown-toggle" type="button" id="dropdown-articles" data-bs-toggle="dropdown" aria-expanded="false" aria-haspopup="true">Articles</button>
-  <ul class="dropdown-menu" aria-labelledby="dropdown-articles"><li><a class="dropdown-item" href="articles/Tutorial_BenchmarkingMode.html">Benchmarking Mode Tutorial</a></li>
-    <li><a class="dropdown-item" href="articles/Tutorial_DiscoveryMode.html">Discovery Mode Tutorial</a></li>
-    <li><a class="dropdown-item" href="articles/Tutorial_GeneSetSimilarity.html">Gene Set Similarity Tutorial</a></li>
+  <ul class="dropdown-menu" aria-labelledby="dropdown-articles"><li><a class="dropdown-item" href="articles/Article_BenchmarkingMode.html">Benchmarking Mode Tutorial</a></li>
+    <li><a class="dropdown-item" href="articles/Article_DiscoveryMode.html">Discovery Mode Tutorial</a></li>
+    <li><a class="dropdown-item" href="articles/Article_GeneSetSimilarity.html">Gene Set Similarity Tutorial</a></li>
   </ul></li>
 <li class="nav-item"><a class="nav-link" href="news/index.html">Changelog</a></li>
       </ul><ul class="navbar-nav"><li class="nav-item"><form class="form-inline" role="search">
@@ -63,24 +62,18 @@ <h2>Authors</h2>
 
     <div class="section level2">
       <h2 id="citation">Citation</h2>
-      <p><small class="dont-index">Source: <a href="https://github.com/DiseaseTranscriptomicsLab/markeR/blob/HEAD/inst/CITATION" class="external-link"><code>inst/CITATION</code></a></small></p>
-
-      <p>Martins-Silva R, Kaizeler A, Barbosa-Morais N (2025).
-<em>markeR: an R Toolkit for Evaluating Gene Sets as Phenotypic Markers</em>.
-Gulbenkian Institute for Molecular Medicine, Faculdade de Medicina, Universidade de Lisboa, Lisbon, Portugal.
-R package version 0.9.3, <a href="https://github.com/DiseaseTranscriptomicsLab/markeR" class="external-link">https://github.com/DiseaseTranscriptomicsLab/markeR</a>.
+      <p><small class="dont-index">Source: <a href="https://github.com/DiseaseTranscriptomicsLab/markeR/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small></p>
 
+      <p>Martins-Silva R, Kaizeler A, Barbosa-Morais NL (2025).
+<em>markeR: An R Toolkit for Evaluating Gene Signatures as Phenotypic Markers</em>.
+R package version 0.99.2, <a href="https://diseasetranscriptomicslab.github.io/markeR/">https://diseasetranscriptomicslab.github.io/markeR/</a>.
 </p>
       <pre>@Manual{,
-  title = {markeR: an R Toolkit for Evaluating Gene Sets as Phenotypic Markers},
-  author = {Rita Martins-Silva and Alexandre Kaizeler and Nuno L. Barbosa-Morais},
-  organization = {Gulbenkian Institute for Molecular Medicine, Faculdade de Medicina, Universidade de Lisboa},
-  address = {Lisbon, Portugal},
+  title = {markeR: An R Toolkit for Evaluating Gene Signatures as Phenotypic Markers},
+  author = {Rita Martins-Silva and Alexandre Kaizeler and Nuno Luís Barbosa-Morais},
   year = {2025},
-
-  note = {R package version 0.9.3},
- 
-  url = {https://github.com/DiseaseTranscriptomicsLab/markeR},
+  note = {R package version 0.99.2},
+  url = {https://diseasetranscriptomicslab.github.io/markeR/},
 }</pre>
     </div>
 
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 177ec2d..fbd9b0c 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -2,11 +2,11 @@ pandoc: '3.4'
 pkgdown: 2.1.3
 pkgdown_sha: ~
 articles:
-  Tutorial_BenchmarkingMode: Tutorial_BenchmarkingMode.html
-  Tutorial_DiscoveryMode: Tutorial_DiscoveryMode.html
-  Tutorial_GeneSetSimilarity: Tutorial_GeneSetSimilarity.html
-last_built: 2025-07-02T17:46Z
-
+  articles/Article_BenchmarkingMode: Article_BenchmarkingMode.html
+  articles/Article_DiscoveryMode: Article_DiscoveryMode.html
+  articles/Article_GeneSetSimilarity: Article_GeneSetSimilarity.html
+  markeR: markeR.html
+last_built: 2025-08-18T18:23Z
 urls:
   reference: https://diseasetranscriptomicslab.github.io/markeR/reference
   article: https://diseasetranscriptomicslab.github.io/markeR/articles
diff --git a/inst/Paper/Figures/Figs/Figure1_Methods.png b/inst/Paper/Figures/Figs/Figure1_Methods.png
deleted file mode 100644
index 7543603..0000000
Binary files a/inst/Paper/Figures/Figs/Figure1_Methods.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure2_GeneSets_Datasets.png b/inst/Paper/Figures/Figs/Figure2_GeneSets_Datasets.png
deleted file mode 100644
index ae085bc..0000000
Binary files a/inst/Paper/Figures/Figs/Figure2_GeneSets_Datasets.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure3_Scores.png b/inst/Paper/Figures/Figs/Figure3_Scores.png
deleted file mode 100644
index 01dea3b..0000000
Binary files a/inst/Paper/Figures/Figs/Figure3_Scores.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure4_Enrichment_alternative.png b/inst/Paper/Figures/Figs/Figure4_Enrichment_alternative.png
deleted file mode 100644
index 865cee0..0000000
Binary files a/inst/Paper/Figures/Figs/Figure4_Enrichment_alternative.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure5_ComparisonSigs.png b/inst/Paper/Figures/Figs/Figure5_ComparisonSigs.png
deleted file mode 100644
index d4dc750..0000000
Binary files a/inst/Paper/Figures/Figs/Figure5_ComparisonSigs.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure6_tradeoffs.png b/inst/Paper/Figures/Figs/Figure6_tradeoffs.png
deleted file mode 100644
index fcafb3f..0000000
Binary files a/inst/Paper/Figures/Figs/Figure6_tradeoffs.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/Figure7_GTEx.png b/inst/Paper/Figures/Figs/Figure7_GTEx.png
deleted file mode 100644
index 8729f82..0000000
Binary files a/inst/Paper/Figures/Figs/Figure7_GTEx.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure1_Preprocessing.png b/inst/Paper/Figures/Figs/SupFigure1_Preprocessing.png
deleted file mode 100644
index c703da5..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure1_Preprocessing.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure2_GeneSets_Function_Localisation.png b/inst/Paper/Figures/Figs/SupFigure2_GeneSets_Function_Localisation.png
deleted file mode 100644
index a536f9d..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure2_GeneSets_Function_Localisation.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure3_Scores_SenescenceStressors.png b/inst/Paper/Figures/Figs/SupFigure3_Scores_SenescenceStressors.png
deleted file mode 100644
index 50ae6b9..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure3_Scores_SenescenceStressors.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure4_Scores_AUCCohen.png b/inst/Paper/Figures/Figs/SupFigure4_Scores_AUCCohen.png
deleted file mode 100644
index b483a8e..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure4_Scores_AUCCohen.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure5_FPRsimulations.png b/inst/Paper/Figures/Figs/SupFigure5_FPRsimulations.png
deleted file mode 100644
index 9a51222..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure5_FPRsimulations.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure6_GTEx_SenMayo.png b/inst/Paper/Figures/Figs/SupFigure6_GTEx_SenMayo.png
deleted file mode 100644
index 671439b..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure6_GTEx_SenMayo.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figs/SupFigure7_GTEx_SenMayo_Custom.png b/inst/Paper/Figures/Figs/SupFigure7_GTEx_SenMayo_Custom.png
deleted file mode 100644
index 94a670c..0000000
Binary files a/inst/Paper/Figures/Figs/SupFigure7_GTEx_SenMayo_Custom.png and /dev/null differ
diff --git a/inst/Paper/Figures/Figures_Paper_Main.Rmd b/inst/Paper/Figures/Figures_Paper_Main.Rmd
deleted file mode 100644
index a6624ce..0000000
--- a/inst/Paper/Figures/Figures_Paper_Main.Rmd
+++ /dev/null
@@ -1,3352 +0,0 @@
----
-title: "Main Paper Figures"
-author: "Rita Martins-Silva"
-date: "05/05/2025"
-output: html_document
----
-
-
-# Main Figures {.tabset .tabset-pills}
-
-## Set up {.tabset}
-
-### Libraries
-
-```{r}
-library("ggplot2")
-library("colorspace")
-library("scales")
-library("scater") 
-library("reshape2")
-library("markeR")
-library("ggbreak")
-library("ggnewscale")
-library("dplyr")
-library("tidyr")  
-library(patchwork)
-library("ggpubr") 
-library(purrr)
-```
-
-### Functions
-
-```{r}
-# Your Cohen's d function
-compute_cohens_d <- function(x, y) {
-  n1 <- length(x)
-  n2 <- length(y)
-  if(n1 < 2 || n2 < 2) return(NA)
-  m1 <- mean(x)
-  m2 <- mean(y)
-  s1 <- sd(x)
-  s2 <- sd(y)
-  pooled_sd <- sqrt(((n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2))
-  if (pooled_sd == 0) return(NA)
-  d <- abs((m1 - m2) / pooled_sd)
-  return(d)
-}
-```
-
-```{r}
-wrap_title <- function(title, width = 30) {
-  if (nchar(title) <= width) {
-    return(title)  # No need to wrap if it fits
-  }
-  
-  wrapped_title <- ""
-  while (nchar(title) > width) {
-    # Find positions of capital letters and symbols near the wrap point
-    capital_pos <- gregexpr("[A-Z]", title)[[1]]
-    symbol_pos <- gregexpr("(_|-|:|\\+|\\\\|/|\\*|\\.|,|;|\\?|!)", title)[[1]]
-    
-    # Check for symbol breaks within the last few characters (width - 5 to width)
-    valid_symbol_breaks <- symbol_pos[symbol_pos >= (width - 5) & symbol_pos <= width]
-    
-    if (length(valid_symbol_breaks) > 0) {
-      # If a suitable symbol is found, break at the first valid symbol
-      break_at <- valid_symbol_breaks[1]
-    } else {
-      # If no suitable symbol, look for capital letters within the same range
-      valid_capital_breaks <- capital_pos[capital_pos >= (width - 5) & capital_pos <= width]
-      
-      if (length(valid_capital_breaks) > 0) {
-        # If a capital letter is found, break just before the capital letter
-        break_at <- valid_capital_breaks[1] - 1
-      } else {
-        # If no suitable symbol or capital letter, break at width
-        break_at <- width
-      }
-    }
-    
-    # Append the wrapped line
-    wrapped_title <- paste0(wrapped_title, substr(title, 1, break_at), "\n")
-    
-    # Update title with the remaining text after the break
-    title <- substr(title, break_at + 1, nchar(title))
-  }
-  
-  # Add the remaining part of the title
-  wrapped_title <- paste0(wrapped_title, title)
-  
-  return(wrapped_title)
-}
-```
-
-```{r}
-# Adaptation of ROC_Scores, to not include the title of the signature
-
-ROC_Scores <- function(data, metadata, gene_sets, method = c("logmedian","ssGSEA","ranking","all"), variable,
-                       colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), grid = TRUE, spacing_annotation=0.3, ncol=NULL, nrow=NULL, mode=c("simple","medium","extensive"), widthTitle = 18, title=NULL, titlesize=12) {
-  
-  data_ROCAUC <- ROCAUC_Scores_Calculate(data = data, metadata = metadata, gene_sets = gene_sets, method = method, variable = variable, mode = mode)
-  
-  plot_list <- list()
-  
-  for (signature in names(data_ROCAUC[[1]])) {  # Iterate over signatures
-    for (contrast in names(data_ROCAUC[[1]][[signature]])) {  # Iterate over contrasts
-      
-      # Initialize an empty data frame to store all methods
-      combined_df <- data.frame()
-      auc_values <- list()
-      
-      for (method_name in names(data_ROCAUC)) {  # Iterate over methods
-        
-        if (length(names(data_ROCAUC)) == 1){
-          if (is.na(colors[method_name])) names(colors) <- method_name #if the user changed the color to only one, not named
-        }
-        
-        
-        roc_data <- data_ROCAUC[[method_name]][[signature]][[contrast]]
-        
-        # Create a data frame with FPR, TPR, and Method
-        temp_df <- data.frame(
-          FPR = rev(1 - roc_data$ROC$specificities),
-          TPR = rev(roc_data$ROC$sensitivities),
-          Method = method_name
-        )
-        
-        # Combine into one large data frame
-        combined_df <- rbind(combined_df, temp_df)
-        
-        # Calculate AUC for this method and contrast
-        auc_value <- roc_data$AUC
-        auc_values[[method_name]] <- auc_value
-        
-      }
-      
-      # Create the ROC plot with all methods on the same plot
-      p <- ggplot2::ggplot(combined_df, ggplot2::aes(x = FPR, y = TPR, color = Method)) +
-        ggplot2::geom_line(size = 1) +  # Plot all ROC curves on the same plot
-        ggplot2::scale_color_manual(values = colors) +  # Ensure correct color mapping for each method
-        ggplot2::labs(title = wrap_title(contrast,widthTitle), x = "False Positive Rate", y = "True Positive Rate") +
-        ggplot2::theme_classic() +
-        ggplot2::theme(legend.position = "none") + # Remove the default legend
-        ggplot2::geom_abline(linetype = "dashed", color = "gray") +
-        ggplot2::theme( plot.title = ggplot2::element_text(hjust = 0.5, size=titlesize ),
-                        plot.subtitle = ggplot2::element_text(hjust = 0.5, size=titlesize-1.5))
-      
-      # Add AUC text labels to the bottom-right corner
-      auc_texts <- data.frame(Method = names(auc_values),
-                              AUC = unlist(auc_values),
-                              x = rep(1, length(auc_values)),  # Place all text at x = 1 (right edge)
-                              y = seq(0.05, spacing_annotation, length.out = length(auc_values)))  # Adjust the vertical positions
-      
-      p <- p + ggplot2::geom_label(data = auc_texts,
-                                   ggplot2::aes(x = x, y = y, label = paste0("AUC ", Method, " = ", round(AUC, 2), ""), color = Method),
-                                   size = 3,
-                                   vjust = 0,  # Adjust vertical position
-                                   hjust = 1,  # Adjust horizontal position to align to the right
-                                   inherit.aes = FALSE,
-                                   fill = "white")  # Prevent inheritance of global aes from the main plot
-      
-      
-      # Store plot
-      plot_list[[paste(signature, contrast)]] <- p
-    }
-  }
-  
-  
-  
-  if (grid) {
-    
-    # Get number of signatures and contrasts
-    n_signatures <- length(names(data_ROCAUC[[1]]))
-    n_contrasts <- length(names(data_ROCAUC[[1]][[names(data_ROCAUC[[1]])[1]]]))
-    
-    # Case 1: If both nrow and ncol are provided, use them as is
-    if (!is.null(nrow) && !is.null(ncol)) {
-      # Use the provided nrow and ncol values as is
-      combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, align = "h")
-      
-      # Case 2: If only one of nrow or ncol is provided, adjust the other for a square-like grid
-    } else if (!is.null(nrow) || !is.null(ncol)) {
-      if (is.null(nrow)) {
-        nrow <- ceiling(length(plot_list) / ncol)  # Calculate nrow to make the grid more square-like
-      }
-      if (is.null(ncol)) {
-        ncol <- ceiling(length(plot_list) / nrow)  # Calculate ncol to make the grid more square-like
-      }
-      combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, align = "h")
-      
-      # Case 3: If neither nrow nor ncol are provided, use the number of signatures and contrasts
-    } else {
-      nrow <- n_signatures
-      ncol <- n_contrasts
-      
-      # If either nrow or ncol is 1, adjust the grid to make it more square
-      if (nrow == 1 || ncol == 1) {
-        nrow <- ceiling(sqrt(length(plot_list)))
-        ncol <- ceiling(length(plot_list) / nrow)
-      }
-      combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, align = "h")
-    }
-    # Add the title to the grid if provided
-    if (!is.null(title)) {
-      combined_plot <- ggpubr::annotate_figure(combined_plot, top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    }
-    
-    return(combined_plot)
-    
-  } else {
-    return(plot_list)
-  }
-}
-```
-
-```{r}
-PlotScores_Categorical_adapted <- function(data, metadata, gene_sets,
-                                           method = c("ssGSEA", "logmedian", "ranking"),
-                                           ColorVariable = NULL, GroupingVariable = NULL,
-                                           ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                                           widthTitle = 10, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 2,
-                                           xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                                           widthlegend=22, cohen_threshold=0.6, colorPalette="Set3") {
-  
-  method <- match.arg(method)
-  
-  ResultsList <- CalculateScores(data = data,
-                                 metadata = metadata,
-                                 gene_sets = gene_sets,
-                                 method = method)
-  
-  # if grouping variable is NULL, then the function displays a density / distribution of scores
-  if (is.null(GroupingVariable) | is.null(metadata)) {
-    
-    plot_list <- list()
-    
-    for (signature in names(ResultsList)) {
-      
-      df <- ResultsList[[signature]]
-      # Wrap the signature name using the helper function
-      wrapped_title <- wrap_title(signature, width = widthTitle)
-      
-      ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
-      
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
-        ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
-        ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density") +
-        # add points below density
-        ggplot2::geom_rug(ggplot2::aes(x = score), color=ColorValues, sides = "b",  alpha = 0.8, size = .5, length = grid::unit(0.035, "npc"))
-      
-      # Customize the plot appearance.
-      p <- p + ggplot2::theme_classic() +
-        ggplot2::labs(title = wrapped_title, color = "", x = "", y = "") +
-        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize - .5),
-                       axis.text.y = ggplot2::element_text(  size = labsize - .5),
-                       plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                       plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic"),
-                       legend.position="none")
-      
-      # If limits is specified, crop the plot without adjusting the data (violins).
-      if (!is.null(limits)) {
-        p <- p + ggplot2::coord_cartesian(xlim = limits)
-      }
-      
-      plot_list[[signature]] <- p
-      
-    }
-    
-    n <- length(plot_list)
-    
-    # Determine grid layout
-    if (is.null(ncol) && is.null(nrow)) {
-      ncol <- ceiling(sqrt(n))
-      nrow <- ceiling(n / ncol)
-    } else if (is.null(ncol)) {
-      ncol <- ceiling(n / nrow)
-    } else if (is.null(nrow)) {
-      nrow <- ceiling(n / ncol)
-    }
-    
-    # create label for y axis
-    if (method == "ssGSEA") {
-      xlab <- "ssGSEA Enrichment Score"
-    } else if (method == "logmedian") {
-      xlab <- "Normalised Signature Score"
-    } else if (method == "ranking") {
-      xlab <- "Signature Genes' Ranking"
-    }
-    
-    combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow,  align = "h")
-    combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                             left = grid::textGrob("Density",
-                                                                   rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    return(combined_plot)
-  }
-  
-  if (!(GroupingVariable %in% colnames(metadata)))
-    stop(paste0(GroupingVariable, " not in metadata columns. Please check metadata."))
-  
-  # Initialize an empty list to store individual ggplot objects.
-  plot_list <- list()
-  
-  # Loop over each gene signature in the ResultsList.
-  for (signature in names(ResultsList)) {
-    # Extract the data frame for the current signature.
-    df <- ResultsList[[signature]]
-    
-    # Using factors so we can retrieve the first condition for Cohen's d if none is specified.
-    df[, GroupingVariable] <- factor(df[, GroupingVariable],
-                                     levels = sort(unique(as.character(df[, GroupingVariable]))))
-    
-    # Wrap the signature name using the helper function.
-    wrapped_title <- wrap_title(signature, width = widthTitle)
-    
-    # Create a base ggplot object with the specified grouping on the x-axis and score on the y-axis.
-    p <- ggplot2::ggplot(df, ggplot2::aes_string(x = GroupingVariable, y = "score"))
-    
-    # Add jittered points, optionally colored by ColorVariable.
-    if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::geom_jitter(ggplot2::aes_string(color = ColorVariable), size = pointSize, alpha = 0.5)
-    } else {
-      p <- p + ggplot2::geom_jitter(size = pointSize, alpha = 0.5) +
-        ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # Overlay violin plots.
-    p <- p + ggplot2::geom_violin(alpha = 0.5, scale = "width")
-    
-    # Add median summary crossbar.
-    p <- p + ggplot2::stat_summary(fun = median, fun.min = median, fun.max = median,
-                                   geom = "crossbar", width = 0.25,
-                                   position = ggplot2::position_dodge(width = 0.13))
-    
-    # Add stats: Compute Cohen's d (and optionally p‑value)
-    if(compute_cohen){
-      if (!is.null(cond_cohend)){
-        # can be of the following form:
-        # cond_cohend <- list(A=c("Senescent"),
-        #                     B=c("Proliferative","Quiescent"))
-        
-        if (sum(unlist(cond_cohend) %in% unique(df[, GroupingVariable])) != length(unique(df[, GroupingVariable])))
-          warning("Warning: Not all conditions of GroupingVariable were specified for Cohen's d calculation")
-        
-        x <- df[df[[GroupingVariable]] %in% cond_cohend[[1]], "score", drop = TRUE]
-        y <- df[df[[GroupingVariable]]  %in% cond_cohend[[2]], "score", drop = TRUE]
-        
-        cohen_d_results <- cohen_d(x, y)
-        
-        # df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-        # cohen_d_results <- rstatix::cohens_d(df, formula = score ~ cohen)
-        
-        if (pvalcalc) {
-          df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-          ttest_results <- rstatix::t_test(df, formula = score ~ cohen)
-          p_val <- ttest_results$p[1]
-          line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-          line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-          subtitle <- paste(line1, line2, sep = "\n")
-        } else {
-          subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-        }
-        
-        
-      } else {
-        
-        if(length(unique(df[, GroupingVariable])) < 2){
-          
-          warning("Not enough conditions available to report Cohen's d.")
-          
-        } else if(length(unique(df[, GroupingVariable])) == 2) {
-          
-          # Calculate Cohen's d based on ordering of the x axis
-          group1 <- levels(df[, GroupingVariable])[1]
-          group2 <- levels(df[, GroupingVariable])[2]
-          
-          x <- df[df[[GroupingVariable]] == group1, "score", drop = TRUE]
-          y <- df[df[[GroupingVariable]] == group2, "score", drop = TRUE]
-          
-          cohen_d_results <- cohen_d(x, y)
-          
-          if (pvalcalc) {
-            ttest_results <- rstatix::t_test(df, formula = score ~ GroupingVariable)
-            p_val <- ttest_results$p[1]
-            line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-          }
-          
-          
-          
-        } else if(length(unique(df[, GroupingVariable])) > 2){
-          
-          # Calculate Cohen's f
-          type <- identify_variable_type(df, GroupingVariable)[GroupingVariable]
-          #Without scaling, the coefficient represents the change in score per unit increase in the variable (if numeric, the unit of the variable. Makes sense to not scale...)
-          model <- lm(score ~ get(GroupingVariable), data = df)
-          results_var <- compute_cohens_f_pval(model, type)
-          
-          
-          if (pvalcalc) {
-            line1 <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(results_var["P_Value"], 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-          }
-          
-        }
-        
-        
-      }
-      
-    } else {
-      
-      subtitle <- NULL
-      
-    }
-    # If ConnectGroups is TRUE, add a line connecting medians across groups.
-    if (ConnectGroups && !is.null(ColorVariable)) {
-      p <- p + ggplot2::stat_summary(ggplot2::aes_string(group = ColorVariable, color = ColorVariable),
-                                     fun.y = median, geom = "line", size = 1.5, alpha = 0.75,
-                                     show.legend = FALSE)
-    }
-    
-    # Customize the plot appearance.
-    p <- p + ggplot2::theme_bw() +
-      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize),
-                     axis.text.y = ggplot2::element_text(  size = labsize),
-                     plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                     plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic")) +
-      ggplot2::labs(title = wrapped_title, subtitle = subtitle, color = "", x = "", y = "")
-    
-    # If ColorValues is provided, use a manual color scale; otherwise, if ColorVariable is provided,
-    # use a default brewer palette.
-    if (!is.null(ColorValues)) {
-      p <- p + ggplot2::scale_color_manual(values = ColorValues)
-    } else if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # If limits is specified, crop the plot without adjusting the data (violins).
-    if (!is.null(limits)) {
-      p <- p + ggplot2::coord_cartesian(ylim = limits)
-    }
-    
-    # Adjust legend rows if legend_nrow is specified.
-    if (!is.null(legend_nrow)) {
-      p <- p + ggplot2::guides(color = ggplot2::guide_legend(nrow = legend_nrow))
-    }
-    
-    # Store the plot in the list.
-    plot_list[[signature]] <- p + theme(legend.position = "none")
-  }
-  
-  n <- length(plot_list)
-  
-  # Determine grid layout.
-  if (is.null(ncol) && is.null(nrow)) {
-    ncol <- ceiling(sqrt(n))
-    nrow <- ceiling(n / ncol)
-  } else if (is.null(ncol)) {
-    ncol <- ceiling(n / nrow)
-  } else if (is.null(nrow)) {
-    nrow <- ceiling(n / ncol)
-  }
-  
-  # Combine plots.
-  combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow , align = "h") #, common.legend = TRUE
-  
-  # Annotate with axis labels.
-  if (is.null(xlab)) {
-    xlab <- GroupingVariable
-  }
-  
-  if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
-  
-  # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "ssGSEA Enrichment Score"
-  } else if (method == "logmedian") {
-    ylab <- "Normalised Signature Score"
-  } else if (method == "ranking") {
-    ylab <- "Signature Genes' Ranking"
-  }
-  
-  combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
-                                                                 rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize)))
-  return(combined_plot)
-}
-```
-
-```{r}
-# adapted to not have the color legend
-
-PlotScores_adapted <- function(data, metadata, gene_sets,
-                               method = c("ssGSEA", "logmedian", "ranking", "all"),
-                               ColorVariable = NULL, Variable = NULL,
-                               ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                               widthTitle = 20, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 4,
-                               xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                               widthlegend=22, sig_threshold=0.05, cohen_threshold=0.5, colorPalette="Set3", cor=c("pearson","spearman","kendall")) {
-  
-  method <- match.arg(method)
-  
-  type <- identify_variable_type(metadata, Variable)#[Variable]
-  
-  if (method == "all") { # returns heatmap
-    
-    if (type =="Numeric"){
-      
-      cohenlist <- CohenF_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable )
-      
-    } else {
-      
-      cohenlist <- CohenD_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable, mode = mode)
-      
-    }
-    
-    # if user wants "all" methods, a heatmap of Cohen's d's is returned, for all combination of variables in GroupingVariable
-    Heatmap_Final <- Heatmap_Cohen(cohenlist = cohenlist,
-                                   nrow = nrow,
-                                   ncol = ncol,
-                                   limits = limits,
-                                   widthTitle = widthTitle,
-                                   titlesize = titlesize,
-                                   ColorValues = ColorValues,
-                                   title = title )
-    
-    Volcano_Cohen <- Volcano_Cohen(cohenlist = cohenlist,
-                                   titlesize = 12,
-                                   ColorValues = ColorValues,
-                                   title = title,
-                                   widthlegend = widthlegend,
-                                   pointSize = pointSize,
-                                   sig_threshold = sig_threshold,
-                                   cohen_threshold = cohen_threshold,
-                                   colorPalette =colorPalette,
-                                   ncol = ncol,
-                                   nrow = nrow)
-    
-    return(list(heatmap=Heatmap_Final$plt,
-                volcano=Volcano_Cohen$plt))
-    
-  } else {
-    
-    
-    
-    if (type!="Numeric"){
-      
-      return(
-        
-        PlotScores_Categorical_adapted(data=data, metadata=metadata, gene_sets=gene_sets,
-                                       method = method,
-                                       ColorVariable = ColorVariable, GroupingVariable = Variable,
-                                       ColorValues = ColorValues, ConnectGroups = ConnectGroups, ncol = ncol, nrow = nrow, title = title,
-                                       widthTitle = widthTitle, titlesize = titlesize, limits = limits, legend_nrow = legend_nrow, pointSize = pointSize,
-                                       xlab = xlab, labsize = labsize, compute_cohen=compute_cohen, cond_cohend = cond_cohend, pvalcalc = pvalcalc, mode = mode,
-                                       widthlegend=widthlegend, cohen_threshold=cohen_threshold, colorPalette=colorPalette)
-        
-      )
-      
-    } else {
-      
-      return(
-        
-        PlotScores_Numeric(data=data,
-                           metadata=metadata,
-                           gene_sets=gene_sets,
-                           method = method,
-                           Variable = Variable,
-                           ColorValues = ColorValues,
-                           ncol = ncol,
-                           nrow = nrow,
-                           title = title,
-                           widthTitle = widthTitle,
-                           titlesize = titlesize,
-                           limits = limits,
-                           pointSize = pointSize,
-                           xlab = xlab,
-                           labsize = labsize,
-                           compute_cohen = compute_cohen,
-                           pvalcalc = pvalcalc,
-                           colorPalette = colorPalette,
-                           cor=cor)
-        
-      )
-      
-    }
-    
-    
-  }
-  
-  
-}
-```
-
-```{r}
-PlotScores_Numeric_adapted <- function(data,
-                                       metadata,
-                                       gene_sets,
-                                       method = c("ssGSEA", "logmedian", "ranking"),
-                                       Variable = NULL,
-                                       ColorValues = NULL,
-                                       ncol = NULL,
-                                       nrow = NULL,
-                                       title = NULL,
-                                       widthTitle = 10,
-                                       titlesize = 12,
-                                       limits = NULL,
-                                       pointSize = 2,
-                                       xlab = NULL,
-                                       labsize = 10,
-                                       compute_cohen = TRUE,
-                                       pvalcalc = FALSE,
-                                       colorPalette = "Set3",
-                                       cor = c("pearson","spearman","kendall")) {
-  
-  method <- match.arg(method)
-  
-  
-  ResultsList <- CalculateScores(data = data,
-                                 metadata = metadata,
-                                 gene_sets = gene_sets,
-                                 method = method)
-  
-  
-  # if grouping variable is NULL, then the function displays a density / distribution of scores
-  if (is.null(Variable) | is.null(metadata)) {
-    
-    plot_list <- list()
-    
-    for (signature in names(ResultsList)) {
-      
-      df <- ResultsList[[signature]]
-      # Wrap the signature name using the helper function
-      wrapped_title <- wrap_title(signature, width = widthTitle)
-      
-      ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
-      
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
-        ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
-        ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density")
-      
-      # Customize the plot appearance.
-      p <- p + ggplot2::theme_classic() +
-        ggplot2::labs( color = "", x = "", y = "") +
-        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize - .5),
-                       axis.text.y = ggplot2::element_text(  size = labsize - .5),
-                       plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                       plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic"))
-      
-      
-      
-      # If limits is specified, crop the plot without adjusting the data (violins).
-      if (!is.null(limits)) {
-        p <- p + ggplot2::coord_cartesian(xlim = limits)
-      }
-      
-      plot_list[[signature]] <- p
-      
-    }
-    
-    n <- length(plot_list)
-    
-    # Determine grid layout
-    if (is.null(ncol) && is.null(nrow)) {
-      ncol <- ceiling(sqrt(n))
-      nrow <- ceiling(n / ncol)
-    } else if (is.null(ncol)) {
-      ncol <- ceiling(n / nrow)
-    } else if (is.null(nrow)) {
-      nrow <- ceiling(n / ncol)
-    }
-    
-    # create label for y axis
-    if (method == "ssGSEA") {
-      xlab <- "ssGSEA Enrichment Score"
-    } else if (method == "logmedian") {
-      xlab <- "Normalised Signature Score"
-    } else if (method == "ranking") {
-      xlab <- "Signature Genes' Ranking"
-    }
-    
-    combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, common.legend = TRUE, align = "h")
-    combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                             left = grid::textGrob("Density",
-                                                                   rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    return(combined_plot)
-  }
-  
-  if (!(Variable %in% colnames(metadata)))
-    stop(paste0(Variable, " not in metadata columns. Please check metadata."))
-  
-  # Initialize an empty list to store individual ggplot objects.
-  plot_list <- list()
-  
-  # Loop over each gene signature in the ResultsList.
-  for (signature in names(ResultsList)) {
-    # Extract the data frame for the current signature.
-    df <- ResultsList[[signature]]
-    #
-    #       # Using factors so we can retrieve the first condition for Cohen's d if none is specified.
-    #       df[, Variable] <- factor(df[, Variable],
-    #                                        levels = sort(unique(as.character(df[, Variable]))))
-    
-    # Wrap the signature name using the helper function.
-    wrapped_title <- wrap_title(signature, width = widthTitle)
-    
-    # Create a base ggplot object with the specified grouping on the x-axis and score on the y-axis.
-    p <- ggplot2::ggplot(df, ggplot2::aes_string(x = Variable, y = "score"))
-    
-    #add points
-    # If ColorValues is provided, use a manual color scale;
-    # use a default brewer palette.
-    ColorValues <- if (is.null(ColorValues)) "#5264B6" else ColorValues
-    p <- p + ggplot2::geom_point(size = pointSize, alpha = 0.5, color=ColorValues[1])+
-      # add density lines
-      geom_density2d( colour="white")
-    
-    # Add  line
-    p <- p + ggplot2::geom_smooth(method = "lm", col = "black", se = FALSE, size=2) + ggpubr::stat_cor(method=cor) # cor in "pearson" (default), "kendall", or "spearman".
-    
-    # Add stats: Compute Cohen's f (and optionally p‑value)
-    if(compute_cohen){
-      
-      # Calculate Cohen's f
-      type <- identify_variable_type(df, Variable)[Variable]
-      #Without scaling, the coefficient represents the change in score per unit increase in the variable (if numeric, the unit of the variable. Makes sense to not scale...)
-      model <- lm(score ~ get(Variable), data = df)
-      results_var <- compute_cohens_f_pval(model, type)
-      
-      if (pvalcalc) {
-        line1 <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-        line2 <- wrap_title(paste0("p = ", round(results_var["P_Value"], 3)), width = widthTitle)
-        subtitle <- paste(line1, line2, sep = "; ")
-        
-      } else {
-        subtitle <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-      }
-      
-    } else {
-      subtitle <- NULL
-    }
-    
-    
-    # Customize the plot appearance.
-    p <- p + ggplot2::theme_bw() +
-      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize),
-                     axis.text.y = ggplot2::element_text(  size = labsize),
-                     plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                     plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic")) +
-      ggplot2::labs(title =  subtitle, color = "", x = "", y = "") 
-    
-    
-    
-    # If limits is specified, crop the plot without adjusting the data (violins).
-    if (!is.null(limits)) {
-      p <- p + ggplot2::coord_cartesian(ylim = limits)
-    }
-    
-    # Store the plot in the list.
-    plot_list[[signature]] <- p
-  }
-  
-  n <- length(plot_list)
-  
-  # Determine grid layout.
-  if (is.null(ncol) && is.null(nrow)) {
-    ncol <- ceiling(sqrt(n))
-    nrow <- ceiling(n / ncol)
-  } else if (is.null(ncol)) {
-    ncol <- ceiling(n / nrow)
-  } else if (is.null(nrow)) {
-    nrow <- ceiling(n / ncol)
-  }
-  
-  # Combine plots.
-  combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, common.legend = TRUE, align = "h")
-  
-  # Annotate with axis labels.
-  if (is.null(xlab)) {
-    xlab <- Variable
-  }
-  
-  if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
-  
-  # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "Score"
-  } else if (method == "logmedian") {
-    ylab <- "Score"
-  } else if (method == "ranking") {
-    ylab <- "Score"
-  }
-  
-  combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
-                                                                 rot = 90, vjust = 2 , gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize),vjust = -2 ),
-                                           top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize)))
-  return(combined_plot)
-  
-}
-```
-
-
-
-### Load relevant data
-
-```{r}
-metadata <- readRDS("../data/metadata.rds")
-filtered_counts <- readRDS("../data/filtered_counts.rds")
-normalised_counts <- readRDS("../data/normalised_counts.rds")
-corrcounts <- readRDS("../data/corrcounts.rds")
-signatures <- readRDS("../data/SenescenceSignatures.rds") 
-signatures_bidirectional <- readRDS("../data/SenescenceSigntures_Bidirectional.rds") # Divided by direction
-
-GTEx_alltissues <- readRDS("../../../../data/GTExV8_voyagercorrected.rds") # https://github.com/DiseaseTranscriptomicsLab/voyAGEr/tree/main/Corrected_Counts
-metadata_GTEx_alltissues <- readRDS("../../../../data/GTExV8_metadata.rds") # Restricted access 
-```
-
-
-### Colors
-
-```{r}
-Condition_colors <- c(
-  "Senescent"     = "#F4A261",  # Soft orange (not goldenrod)
-  "Proliferative" = "#A1C298",  # Desaturated sage green
-  "Quiescent"     = "#9DB4C0"   # Dusty teal-gray
-)
-
-CellTypecols <- c(
-  "Fibroblast"   = "#E69F00",  # Orange
-  "Keratinocyte" = "#56B4E9",  # Sky Blue
-  "Melanocyte"   = "#009E73",  # Bluish Green
-  "Endothelial"  = "#F0E442",  # Yellow
-  "Neuronal"     = "#0072B2",  # Blue
-  "Mesenchymal"  = "#D55E00"   # Vermilion
-)
-
-CellTypecols_alt <- c(
-  "Fibroblast"   = "#1b9e77",  # Teal green
-  "Keratinocyte" = "#d95f02",  # Warm orange-brown
-  "Melanocyte"   = "#7570b3",  # Muted purple
-  "Endothelial"  = "#e7298a",  # Dark pink
-  "Neuronal"     = "#66a61e",  # Olive green
-  "Mesenchymal"  = "#e6ab02"   # Mustard
-)
-
-
-dataset_colors <- c(
-  "HernandezSegura" = "#8DD3C7",
-  "Mangelinck"      = "#FFFFB3",
-  "Chan"            = "#BEBADA",
-  "Purcell"         = "#FB8072",
-  "Marthandan2016"  = "#80B1D3",
-  "Marthandan2015"  = "#FDB462",
-  "Wang"            = "#B3DE69",
-  "Suda"            = "#FCCDE5",
-  "McHugh"          = "#D9D9D9",
-  "Savic"           = "#BC80BD",
-  "Skea"            = "#CCEBC5",
-  "Laurent"         = "#FFED6F",
-  "Tasdemir"        = "#D0B7E1",
-  "Lenain"          = "#FFB3BA",
-  "Aarts"           = "#C2C2F0",
-  "Casella"         = "#FFDAC1",
-  "Numa"            = "#C6E2FF",
-  "Admasu"          = "#F5CBA7",
-  "Urata"           = "#C7CEEA",
-  "Wang2023"        = "#E6B0AA",
-  "Fu"              = "#A9DFBF",
-  "Tanke"           = "#F9E79F",
-  "Bawens"          = "#AED6F1"
-)
-
-
-```
-
-```{r}
-show_col(Condition_colors, ncol=3)
-show_col(CellTypecols)
-show_col(dataset_colors)
-```
-
-## Figure 2 - Overview of Senescence Dataset and Gene Sets
-
-### Senescence Signatures
-
-To maximise the impact of this study, we aimed to use a set of senescence signatures that reflect the needs and preferences of the scientific community, taking into account the resources available as of the 13 of March of 2024 (Supplementary Figure 3). To this end, we collected 5 signatures from open source and highly cited studies, namely SenMayo (Saul et al., 2022), CSGene (Zhao et al., 2016), CellAge (Avelar et al., 2020), SeneQuest (Gorgoulis et al., 2019) and HernandezSegura (Hernandez-Segura et al., 2017). We also collected 4 gene signatures from the MSigDB database (Subramanian et al., 2005), which is widely used in computational analyses, considering the following: GOBP_CELLULAR_SENESCENCE, GOBP_NEGATIVE_REGULATION_OF_CELLULAR_SENESCENCE, GOBP_POSITIVE_REGULATION_OF_CELLULAR_SENESCENCE and REACTOME_CELLULAR_SENESCENCE.
-
-
-
-```{r}
-
-
-# Sample list (mixed data frames and vectors)
-# gene_list <- list(df1, df2, vec1, ...)
-
-# Count genes per signature
-gene_counts <- sapply(signatures_bidirectional, function(x) {
-  if (is.data.frame(x)) {
-    length(unique(x[[1]]))  # first column of data frame
-  } else if (is.vector(x)) {
-    length(unique(x))       # vector of genes
-  } else {
-    NA  # skip if not a vector or data frame
-  }
-})
-
-# Optional: add names if not already named
-if (is.null(names(gene_counts))) {
-  names(gene_counts) <- paste0("Signature_", seq_along(gene_counts))
-}
-
-
-# Convert to data frame
-gene_count_df <- data.frame(
-  Signature = names(gene_counts),
-  NumGenes = as.integer(gene_counts),
-  stringsAsFactors = FALSE
-)
-
-gene_count_df <- gene_count_df[order(gene_count_df$NumGenes),]
-gene_count_df$Signature <- factor(gene_count_df$Signature, levels = gene_count_df$Signature)
-
-order_signatures <- gene_count_df$Signature
-
-gene_count_df$Signature <- sapply(X=as.character(gene_count_df$Signature), function(x) wrap_title(x,20))
-gene_count_df <- gene_count_df[order(gene_count_df$NumGenes),]
-gene_count_df$Signature <- factor(gene_count_df$Signature, levels = gene_count_df$Signature)
-
-order_signatures_split <- gene_count_df$Signature
-
-#Barplot
-plt_nb_genes <- ggplot(gene_count_df, aes(x = NumGenes, y = Signature)) +
-  geom_col(fill="#3D5D56") +
-  geom_text(aes(label = NumGenes), hjust = -0.2) +  # Add text labels
-  xlab("Number of genes") +
-  ylab("") +
-  theme_classic() +
-  xlim(c(0,1500))  
-
-# plt_nb_genes <- ggplot(gene_count_df, aes(x = NumGenes, y = Signature)) +
-#   geom_col(fill = "#3D5D56") +
-#   geom_text(aes(label = NumGenes), hjust = 1.2) +  # Outside the bar (leftward in reversed scale)
-#   xlab("Number of genes") +
-#   ylab("") +
-#   scale_x_reverse(limits = c(1500, 0)) +  # Add space for text
-#   scale_y_discrete(position = "right") +
-#   theme_minimal() 
-
-
-plt_nb_genes
-```
-
-
-
-
-```{r fig.width=7, fig.height=7}
-
-# Initialize result storage
-summary_list <- list()
-
-for (name in names(signatures_bidirectional)) {
-  sig <- signatures_bidirectional[[name]]
-  
-  if (is.data.frame(sig)) {
-    up_count <- sum(sig[[2]] == 1)
-    down_count <- sum(sig[[2]] == -1)
-    no_direction <- 0
-  } else if (is.vector(sig)) {
-    up_count <- 0
-    down_count <- 0
-    no_direction <- length(sig)
-  } else {
-    up_count <- down_count <- no_direction <- 0
-  }
-  
-  summary_list[[name]] <- data.frame(
-    signature_plt = name,
-    enrichment = c("Enriched", "Depleted", "Unknown"),
-    count = c(up_count, down_count, no_direction),
-    stringsAsFactors = FALSE
-  )
-}
-
-# Combine all into a single data frame
-summary_df <- do.call(rbind, summary_list)
-
-
-
-summary_df$signature_plt <- sapply(X=as.character(summary_df$signature_plt), function(x) wrap_title(x,20))
-
-summary_df$signature_plt <- factor(summary_df$signature_plt, levels = order_signatures_split)
-
-# Step 1: Compute total gene count per signature using base R
-totals_df <- aggregate(count ~ signature_plt, data = summary_df, sum)
-
-# Step 2: Plot with ggplot2
-plt_nb_genes2 <- ggplot(summary_df, aes(x = count, y = signature_plt, fill = enrichment)) +
-  geom_col() +
-  geom_text(data = totals_df,
-            aes(x = count, y = signature_plt, label = count),
-            inherit.aes = FALSE,
-            hjust = -0.2, size = 4) +
-  xlab("Number of genes") +
-  ylab("") +
-  theme_classic() +
-  theme(legend.position = "top") +
-  scale_fill_manual(values = c(
-    "Enriched" = "#1BAA7A",
-    "Depleted" = "#C83C3C",
-    "Unknown" = "#C9C9C9"
-  )) +
-  #labs(fill = "") +
-  scale_x_break(c(600, 1100), space = 0.2, expand = c(0, 0), ticklabels = c(1200, 1500)) +
-  scale_x_continuous(
-    breaks = seq(0, 1500, 500),  # Define breaks at intervals of 500
-    limits = c(0, 1500)          # Set the x-axis limits from 0 to 2000
-  ) +
-  guides(fill= guide_legend(title = element_blank(), title.position="top", title.hjust = 0.5))
-
-plt_nb_genes2
-```
-
-
-```{r fig.width=8, fig.height=6}
-# 1. Extract gene sets from list
-gene_sets <- lapply(signatures_bidirectional, function(x) {
-  if (is.data.frame(x)) unique(x[[1]])
-  else if (is.vector(x)) unique(x)
-  else NULL
-})
-
-# 2. Assign names if missing
-if (is.null(names(gene_sets))) {
-  names(gene_sets) <- paste0("Signature_", seq_along(gene_sets))
-}
-
-# 3. Create all combinations
-signature_names <- names(gene_sets)
-n <- length(gene_sets)
-
-# Initialize storage vectors
-dataset_i <- character()
-dataset_j <- character()
-percentage <- numeric()
-nb_common_genes <- integer()
-
-# 4. Loop to calculate overlaps
-for (i in seq_len(n)) {
-  for (j in seq_len(n)) {
-    genes_i <- gene_sets[[i]]
-    genes_j <- gene_sets[[j]]
-    common_genes <- intersect(genes_i, genes_j)
-    
-    dataset_i <- c(dataset_i, signature_names[i])
-    dataset_j <- c(dataset_j, signature_names[j])
-    nb_common_genes <- c(nb_common_genes, length(common_genes))
-    percentage <- c(percentage, if (length(genes_i) > 0) 100 * length(common_genes) / length(genes_i) else 0)
-  }
-}
-
-# 5. Create data frame
-heatmap_data <- data.frame(
-  dataset_i = dataset_i,
-  dataset_j = dataset_j,
-  percentage = percentage,
-  nb_common_genes = nb_common_genes,
-  stringsAsFactors = FALSE
-)
-
-
-# heatmap_data$dataset_i <- factor(heatmap_data$dataset_i,levels=rev(order_signatures))
-# heatmap_data$dataset_j <- factor(heatmap_data$dataset_j,levels=(order_signatures))  
-heatmap_data$dataset_i <- sapply(X=as.character(heatmap_data$dataset_i), function(x) wrap_title(x,20))
-heatmap_data$dataset_j <- sapply(X=as.character(heatmap_data$dataset_j), function(x) wrap_title(x,20))
-heatmap_data$dataset_i <- factor(heatmap_data$dataset_i,levels=(order_signatures_split))
-heatmap_data$dataset_j <- factor(heatmap_data$dataset_j,levels=(order_signatures_split))
-
-# 6. Plot
-plt_heatmap <- ggplot(heatmap_data, aes(x = dataset_j, y = dataset_i, fill = percentage)) +
-  geom_tile(aes(fill = ifelse(dataset_i == dataset_j, NA, percentage)), color = "black") +
-  geom_tile(data = subset(heatmap_data, dataset_i == dataset_j), fill = "#F1F1F1", color = "black") +
-  geom_text(data = subset(heatmap_data, dataset_i != dataset_j),
-            aes(label = paste0(sprintf("%.1f", percentage), "%\n(", nb_common_genes, ")")), size = 3) +
-  scale_fill_gradient(low = "white", high = "#699B91", name = "Overlap (%)", na.value = "#F1F1F1") +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
-  xlab("") + ylab("")
-
-plt_heatmap
-```
-
-```{r}
-
-DF_genesets_poster <- data.frame(signature=c("SAUL_SEN_MAYO","GOBP_NEGATIVE_REGULATION_\nOF_CELLULAR_SENESCENCE","GOBP_POSITIVE_REGULATION_\nOF_CELLULAR_SENESCENCE", # unidirectional
-                                             "CellAge","SeneQuest","HernandezSegura", # bidirectional
-                                             "CSgene","REACTOME_\nCELLULAR_SENESCENCE","GOBP_\nCELLULAR_SENESCENCE"), 
-                                 Depleted=c(FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, NA, NA, NA),# not specified
-                                 Enriched=c(TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, NA, NA, NA))
-
-# Melt the data frame to long format
-melted_df_poster <- reshape2::melt(DF_genesets_poster, id.vars = "signature")
-melted_df_poster$signature <- factor( melted_df_poster$signature, levels = rev(c( "CellAge", "CSgene", "REACTOME_\nCELLULAR_SENESCENCE",  "SAUL_SEN_MAYO",  "SeneQuest", "GOBP_\nCELLULAR_SENESCENCE", "HernandezSegura", "GOBP_NEGATIVE_REGULATION_\nOF_CELLULAR_SENESCENCE", "GOBP_POSITIVE_REGULATION_\nOF_CELLULAR_SENESCENCE" )))
-# Create the plot
-plt_table_genesets_summary <- ggplot(melted_df_poster, aes(x = variable, y = signature)) +
-  geom_tile(fill = "white", color="black", size=1) +
-  geom_text(data = subset(melted_df_poster, variable == "Enriched" & value == TRUE), 
-            aes(label = "✓"), size = 8, vjust = 0.5, fontface="bold", color="#69C68F") +  # Add up arrow 
-  geom_text(data = subset(melted_df_poster, variable == "Depleted" & value == TRUE), 
-            aes(label = "✓"),   size = 8, vjust = 0.5, fontface="bold", color="#D34343") +  # Add up arrow 
-  geom_text(data = subset(melted_df_poster, variable %in% c("Enriched", "Depleted") & is.na(value)), 
-            aes(label = "?"), size = 4, vjust = 0.5, fontface="bold") + 
-  geom_text(data=subset(melted_df_poster, variable=="Statistic"), aes(label = value), color = "black", size = 6, vjust = 0.5) +  # Add text annotations # Add up arrow 
-  labs(x = NULL, y = NULL) +  # Remove axis labels
-  theme_minimal() +  # Optional: customize theme
-  theme(axis.text.x = element_text( hjust = 1, size=10),
-        axis.text.y = element_text( size=10)) +
-  scale_x_discrete(
-    expand = expansion(mult = c(0,0)), guide = guide_axis(angle = 60),
-    position = "bottom"
-  ) #+
-#scale_y_discrete(position = "right")
-plt_table_genesets_summary 
-```
-
-```{r fig.width=12, fig.height=7}
-plt_genesets <- ggarrange(plt_table_genesets_summary,
-                          plt_heatmap + theme(legend.position = "top", axis.text.y=element_blank()),
-                          plt_nb_genes + theme_void() + theme(legend.position = "top") , nrow=1, align = "h",
-                          widths=c(0.25,0.4,0.2))
-plt_genesets
-```
-
-
-
-
-
-### Senescence Datasets
-
-
-```{r}
-
-
-# Condition_colors <- c(
-#   "Senescent"     = "#F4A261",  # Soft orange (not goldenrod)
-#   "Proliferative" = "#A1C298",  # Desaturated sage green
-#   "Quiescent"     = "#9DB4C0"   # Dusty teal-gray
-# )
-# 
-
-
-# List to collect PCA results
-pca_list_PC12 <- list()
-pca_list_PC23 <- list()
-
-# Loop over CellTypes
-for (ct in unique(metadata$CellType)) {
-  
-  # Subset metadata and counts
-  meta_sub <- metadata[metadata$CellType == ct, ]
-  counts_sub <- corrcounts[, meta_sub$sampleID]
-  
-  # Create DGEList
-  y <- DGEList(log2(counts_sub + 1), samples = meta_sub)
-  
-  # PCA
-  PCA <- prcomp(t(y$counts), scale = FALSE, center = TRUE)
-  PCACounts <- as.data.frame(PCA$x)
-  
-  # Variance explained
-  ev <- PCA$sdev^2
-  pc1 <- round(100 * ev[1] / sum(ev), 2)
-  pc2 <- round(100 * ev[2] / sum(ev), 2)
-  pc3 <- round(100 * ev[3] / sum(ev), 2)  
-  
-  # Combine with metadata
-  PCAData <- cbind(PCACounts[, 1:3], meta_sub) 
-  
-  plt_12 <- ggplot(PCAData, aes(PC1,PC2))+
-    geom_point(aes(fill=Condition), size=4, alpha=0.6, shape=21)+
-    ggtitle(label =ct)+
-    xlab(paste0("PC1: ",pc1,"% variance")) +
-    ylab(paste0("PC2: ",pc2,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin(), legend.title=element_blank())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted") +
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5)) +
-    scale_fill_manual(values=Condition_colors)
-  
-  plt_23 <- ggplot(PCAData, aes(PC2,PC3))+
-    geom_point(aes(fill=Condition), size=4, alpha=0.6, shape=21)+
-    ggtitle(label =ct)+
-    xlab(paste0("PC2: ",pc2,"% variance")) +
-    ylab(paste0("PC3: ",pc3,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin(), legend.title=element_blank())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted") +
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5)) +
-    scale_fill_manual(values=Condition_colors)
-  
-  
-  pca_list_PC12[[ct]] <- plt_12
-  pca_list_PC23[[ct]] <- plt_23
-  
-}
-
-
-pca_PC12_divided <- ggarrange(plotlist = pca_list_PC12, ncol=3, nrow=2, common.legend = T, legend="bottom")
-pca_PC12_divided
-
-
-pca_PC23_divided <- ggarrange(plotlist = pca_list_PC23, ncol=3, nrow=2, common.legend = T, legend="bottom")
-pca_PC23_divided
-
-```
-
-
-```{r fig.width=5, fig.height=4}
-# CellTypecols <- c(
-#   "Fibroblast"   = "#E69F00",  # Orange
-#   "Keratinocyte" = "#56B4E9",  # Sky Blue
-#   "Melanocyte"   = "#009E73",  # Bluish Green
-#   "Endothelial"  = "#F0E442",  # Yellow
-#   "Neuronal"     = "#0072B2",  # Blue
-#   "Mesenchymal"  = "#D55E00"   # Vermilion
-# )
-
-
-# Condition_colors <- c(
-#   "Senescent"     = "#F4A261",  # Soft orange (not goldenrod)
-#   "Proliferative" = "#A1C298",  # Desaturated sage green
-#   "Quiescent"     = "#9DB4C0"   # Dusty teal-gray
-# )
-
-
-
-y <- DGEList(log2(corrcounts+1), samples= metadata)
-PCA <- prcomp(t(y$counts), scale=FALSE, center=TRUE)
-PCACounts <- PCA$x
-PCACounts <- as.data.frame(PCACounts)
-PCAData <-  cbind(PCACounts[1:10],y$samples) 
-
-ev = PCA$sdev^2 
-pc1= round(100*ev[1]/sum(ev),2) 
-pc2 = round(100*ev[2]/sum(ev),2) 
-pc3 = round(100*ev[3]/sum(ev),2) 
-
-
-(PCA_celltype_afterbatch_CellType <- ggplot(PCAData, aes(PC1,PC2))+
-    geom_point(aes(fill=CellType), size=5, alpha=0.8, shape=21)+
-    xlab(paste0("PC1: ",pc1,"% variance")) +
-    ylab(paste0("PC2: ",pc2,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted")+
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5),
-          legend.title = element_blank()) +
-    scale_fill_manual(values=CellTypecols_alt))
-
-(PCA_celltype_afterbatch_Condition <- ggplot(PCAData, aes(PC1,PC2))+
-    geom_point(aes(fill=Condition), size=5, alpha=0.8, shape=21)+
-    xlab(paste0("PC1: ",pc1,"% variance")) +
-    ylab(paste0("PC2: ",pc2,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted")+
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5),
-          legend.title = element_blank()) +
-    scale_fill_manual(values=Condition_colors))
-
-
-(PCA23_celltype_afterbatch_CellType <- ggplot(PCAData, aes(PC2,PC3))+
-    geom_point(aes(fill=CellType), size=5, alpha=0.8, shape=21)+
-    xlab(paste0("PC2: ",pc2,"% variance")) +
-    ylab(paste0("PC3: ",pc3,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted")+
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5),
-          legend.title = element_blank()) +
-    scale_fill_manual(values=CellTypecols_alt))
-
-(PCA23_celltype_afterbatch_Condition <- ggplot(PCAData, aes(PC2,PC3))+
-    geom_point(aes(fill=Condition), size=5, alpha=0.8, shape=21)+
-    xlab(paste0("PC2: ",pc2,"% variance")) +
-    ylab(paste0("PC3: ",pc3,"% variance")) + 
-    theme_bw()+
-    theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-    geom_vline(xintercept=0, linetype="dotted") + 
-    geom_hline(yintercept=0, linetype="dotted")+
-    theme(plot.title = element_text(hjust = 0.5),
-          plot.subtitle = element_text(hjust = 0.5),
-          legend.title = element_blank()) +
-    scale_fill_manual(values=Condition_colors))
-
-
-```
-
-
-
-```{r fig.width=12, fig.height=6}
-
-plt1 <- ggarrange(PCA_celltype_afterbatch_CellType,PCA_celltype_afterbatch_Condition, ncol=1)
-(plt_PC12 <- ggarrange(plt1 +
-                         theme(plot.margin=unit(c(.5,.5,.5,.5), 'cm')) , pca_PC12_divided+
-                         theme(plot.margin=unit(c(.5,.5,.5,.5), 'cm')), widths=c(0.3,0.7), labels=c("B","C")))
-
-plt2 <- ggarrange(PCA23_celltype_afterbatch_CellType,PCA23_celltype_afterbatch_Condition, ncol=1)
-(plt_PC23 <- ggarrange(plt2, pca_PC23_divided, widths=c(0.3,0.7)))
-```
-
-
-
-```{r  fig.width=12, fig.height=12}
-(plt_fig1 <- ggarrange(plt_genesets,plt_PC12, ncol=1, heights=c(0.55,0.45), labels=c("A","") ))
-```
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure2_GeneSets_Datasets.png",
-                plt_fig1,
-                width = 13, height=12, bg = 'white')
-```
-
-
-Alternative
-
-```{r fig.width=12, fig.height=6}
- 
-(plt1_pcas <- ggarrange(PCA_celltype_afterbatch_CellType,PCA_celltype_afterbatch_Condition, ncol=1))
-pca_PC12_divided
- 
-```
-
-```{r fig.width=12, fig.height=7}
-plt_genesets2 <- ggarrange( plt_heatmap + theme(legend.position = "top"),
-                            plt_nb_genes2 + theme(legend.position = "top",axis.text.y=element_blank())  +scale_x_continuous(position = "top", limits = c(0,1650)) , nrow=1, align = "h",
-                            widths=c(0.7,0.3))
-plt_genesets2
-```
-
-```{r  fig.width=14, fig.height=7}
-(plt_1 <- ggarrange(plt_genesets2,plt1_pcas, widths=c(0.7,0.3)))
-```
-
-```{r fig.width=6, fig.height=6}
-# Make pie chart
-metadata_aux <- metadata
-metadata_aux <- metadata_aux[metadata_aux$SenescentType != "none",]
-  
-sen_type_counts <- metadata_aux %>%
-  count(SenescentType)
-
-my_colors_custom_pastel <- c(
-  "#7ba6d8",  # dusty cornflower blue
-  "#f5a763",  # warm honey orange
-  "#89bf8a",  # soft sage green
-  "#ea7b7a",  # muted coral red
-  "#b398ca",  # lavender purple
-  "#b18372",  # cinnamon brown
-  "#f3a8c4",  # bubblegum pink
-  "#9d9d9d",  # medium grey
-  "#d3d566",  # chartreuse pastel
-  "#4eb9bb",  # bright teal
-  "#adc3e5",  # pale steel blue
-  "#f6ce7e",  # golden yellow pastel
-  "#9dcc9b"   # fresh mint green
-)
-
-ggplot(sen_type_counts, aes(x = "", y = n, fill = SenescentType)) +
-  geom_bar(stat = "identity", width = 1) +
-  coord_polar(theta = "y") +
-  theme_void() +
-  scale_fill_manual(values = my_colors_custom_pastel, guide = guide_legend(ncol =3)) +
-  theme(legend.position = "bottom", title = element_blank())
-
-
-library(ggplot2)
-library(dplyr)
-
-(sentypes_plt <- ggplot(sen_type_counts, aes(x = 2, y = n, fill = SenescentType)) +
-  geom_bar(stat = "identity", width = 1, color = "white") +
-  coord_polar(theta = "y") +
-  xlim(0.5, 2.5) +
-  theme_void() +
-  scale_fill_manual(values = my_colors_custom_pastel, guide = guide_legend(ncol = 3)) +
-  theme(legend.position = "bottom", legend.title = element_blank()) +
-  annotate("text", x = 1, y = 0, label = "\n\n\nSenescece\nInducers", size = 5,  hjust = 0.5, vjust = 0.5))
-
-
-```
-
-```{r  fig.width=14, fig.height=12}
-plt_row1 <- ggarrange(plt_genesets2,plt1_pcas, widths=c(0.7,0.3), labels=c("A","B"))
-plt_row2 <- ggarrange(pca_PC12_divided,sentypes_plt, widths=c(0.6,0.4), labels=c("C","D"))
-
-(plt_fig2_alt <- ggarrange(plt_row1,plt_row2, ncol=1, heights=c(0.6,0.4)))
-```
-
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure2_GeneSets_Datasets.png",
-                plt_fig2_alt,
-                width = 14, height=12, bg = 'white')
-```
-
-
-## Figure 3 - Scores
-
-
-<!-- ```{r fig.width=20, fig.height=4} -->
-
-<!-- (plot_logmedian <- PlotScores(data = corrcounts,  -->
-<!--                               metadata = metadata,  -->
-<!--                               method = "logmedian",  -->
-<!--                               gene_sets = signatures_bidirectional,   -->
-<!--                               ColorVariable = "CellType",  -->
-<!--                               Variable="Condition",  -->
-<!--                               ColorValues = CellTypecols_alt,  -->
-<!--                               ConnectGroups=TRUE,   -->
-<!--                               nrow = 1,  -->
-<!--                               widthTitle=20,  -->
-<!--                               limits = NULL,  -->
-<!--                               legend_nrow = 1,  -->
-<!--                               compute_cohen=F, -->
-<!--                               pointSize=2, -->
-<!--                               titlesize=10, -->
-<!--                               xlab = "", -->
-<!--                               labsize = 12)) -->
-
-<!-- (plot_ranking <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                     metadata = metadata,  -->
-<!--                                     method = "ranking",  -->
-<!--                                     gene_sets = signatures_bidirectional,   -->
-<!--                                     ColorVariable = "CellType",  -->
-<!--                                     Variable="Condition",  -->
-<!--                                     ColorValues = CellTypecols_alt,  -->
-<!--                                     ConnectGroups=TRUE,   -->
-<!--                                     nrow = 1,  -->
-<!--                                     widthTitle=20,  -->
-<!--                                     limits = NULL,  -->
-<!--                                     legend_nrow = 1,  -->
-<!--                                     compute_cohen=F, -->
-<!--                                     pointSize=2, -->
-<!--                                     titlesize=10, -->
-<!--                                     xlab = "", -->
-<!--                                     labsize = 12)) -->
-
-<!-- (plot_ssGSEA <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                    metadata = metadata,  -->
-<!--                                    method = "ssGSEA",  -->
-<!--                                    gene_sets = signatures_bidirectional,   -->
-<!--                                    ColorVariable = "CellType",  -->
-<!--                                    Variable="Condition",  -->
-<!--                                    ColorValues = CellTypecols_alt,  -->
-<!--                                    ConnectGroups=TRUE,   -->
-<!--                                    nrow = 1,  -->
-<!--                                    widthTitle=20,  -->
-<!--                                    limits = NULL,  -->
-<!--                                    legend_nrow = 1,  -->
-<!--                                    compute_cohen=F, -->
-<!--                                    pointSize=2, -->
-<!--                                    titlesize=10, -->
-<!--                                    xlab = "", -->
-<!--                                    labsize = 12)) -->
-
-<!-- ``` -->
-
-
-
-<!-- ```{r fig.width=20, fig.height=10} -->
-<!-- (fig3_scores_dist <- ggarrange(plot_logmedian , -->
-<!--           plot_ssGSEA , -->
-<!--           plot_ranking ,  -->
-<!--           common.legend=T, ncol=1, heights=c(0.35,0.3,0.3), labels = c("A","B","C"))) -->
-<!-- ``` -->
-
-
-<!-- ```{r fig.width=18, fig.height=4} -->
-
-
-<!-- Scores_Overview_All <- PlotScores(data = corrcounts,  -->
-<!--                                   metadata = metadata,   -->
-<!--                                   gene_sets=signatures_bidirectional,  -->
-<!--                                   Variable="Condition",   -->
-<!--                                   method ="all",    -->
-<!--                                   ncol = NULL,  -->
-<!--                                   nrow = 1,  -->
-<!--                                   widthTitle=20,  -->
-<!--                                   limits = c(0,2),    -->
-<!--                                   title=NULL,  -->
-<!--                                   titlesize = 10, -->
-<!--                                   #ColorValues = list(heatmap=c("#F9F4AE", "#B44141") ), -->
-<!--                                   mode="simple"#, -->
-<!--                                   #widthlegend=30,  -->
-<!--                                   #sig_threshold=0.05,  -->
-<!--                                   #cohen_threshold=0.6, -->
-<!--                                   #pointSize=6, -->
-<!--                                   #colorPalette="Paired" -->
-<!-- ) -->
-
-<!-- plt_cohenheatmap <- Scores_Overview_All$heatmap -->
-<!-- plt_cohenheatmap -->
-<!-- ``` -->
-
-<!-- ```{r fig.width=8, fig.height=8} -->
-<!-- plt_auc_heatmap <- AUC_Scores(data = corrcounts,  -->
-<!--                               metadata = metadata,  -->
-<!--                               gene_sets=signatures_bidirectional,  -->
-<!--                               method = "all",  -->
-<!--                               mode = "simple",  -->
-<!--                               variable="Condition",  -->
-<!--                               nrow = 3,  -->
-<!--                               ncol = NULL,  -->
-<!--                               limits = c(0.5,1),  -->
-<!--                               widthTitle = 20,  -->
-<!--                               titlesize = 10,  -->
-<!--                               ColorValues = c("#F9F4AE", "#B44141") )  -->
-<!-- plt_auc_heatmap -->
-
-<!-- ``` -->
- 
-<!-- ```{r fig.width=18, fig.height=4} -->
-
-<!-- (plot_logmedian_nodir <- PlotScores(data = corrcounts,  -->
-<!--                                     metadata = metadata,  -->
-<!--                                     method = "logmedian",  -->
-<!--                                     gene_sets = signatures_nodir,   -->
-<!--                                     ColorVariable = "CellType",  -->
-<!--                                     Variable="Condition",  -->
-<!--                                     ColorValues = CellTypecols_alt,  -->
-<!--                                     ConnectGroups=TRUE,   -->
-<!--                                     nrow = 1,  -->
-<!--                                     widthTitle=20,  -->
-<!--                                     limits = NULL,  -->
-<!--                                     legend_nrow = 2,  -->
-<!--                                     compute_cohen=F, -->
-<!--                                     pointSize=2, -->
-<!--                                     titlesize=9)) -->
-
-<!-- (plot_ranking_nodir <- PlotScores(data = corrcounts,  -->
-<!--                                   metadata = metadata,  -->
-<!--                                   method = "ranking",  -->
-<!--                                   gene_sets = signatures_nodir,   -->
-<!--                                   ColorVariable = "CellType",  -->
-<!--                                   Variable="Condition",  -->
-<!--                                   ColorValues = CellTypecols_alt,  -->
-<!--                                   ConnectGroups=TRUE,   -->
-<!--                                   nrow = 1,  -->
-<!--                                   widthTitle=20,  -->
-<!--                                   limits = NULL,  -->
-<!--                                   legend_nrow = 2,  -->
-<!--                                   compute_cohen=F, -->
-<!--                                   pointSize=2, -->
-<!--                                   titlesize=9)) -->
-
-<!-- (plot_ssGSEA_nodir <- PlotScores(data = corrcounts,  -->
-<!--                                  metadata = metadata,  -->
-<!--                                  method = "ssGSEA",  -->
-<!--                                  gene_sets = signatures_nodir,   -->
-<!--                                  ColorVariable = "CellType",  -->
-<!--                                  Variable="Condition",  -->
-<!--                                  ColorValues = CellTypecols_alt,  -->
-<!--                                  ConnectGroups=TRUE,   -->
-<!--                                  nrow = 1,  -->
-<!--                                  widthTitle=20,  -->
-<!--                                  limits = NULL,  -->
-<!--                                  legend_nrow = 2,  -->
-<!--                                  compute_cohen=F, -->
-<!--                                  pointSize=2, -->
-<!--                                  titlesize=9)) -->
-
-<!-- ``` -->
-
-
-
-<!-- CellAge interpretation changes drastically... -->
-
-
-<!-- ```{r} -->
-
-<!-- (plot_ranking_direct_cellage <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                                    metadata = metadata,  -->
-<!--                                                    method = "ranking",  -->
-<!--                                                    gene_sets = list(CellAge=signatures_bidirectional[["CellAge"]]),   -->
-<!--                                                    ColorVariable = "CellType",  -->
-<!--                                                    Variable="Condition",  -->
-<!--                                                    ColorValues = CellTypecols_alt,  -->
-<!--                                                    ConnectGroups=TRUE,   -->
-<!--                                                    nrow = 1,  -->
-<!--                                                    widthTitle=20,  -->
-<!--                                                    limits = NULL,  -->
-<!--                                                    legend_nrow = 2,  -->
-<!--                                                    compute_cohen=F, -->
-<!--                                                    pointSize=4, -->
-<!--                                                    titlesize=14, -->
-<!--                                                    labsize = 12, -->
-<!--                                                   xlab = "")) -->
-
-<!-- (plot_roc_direct_cellage <-  ROC_Scores(data = corrcounts,  -->
-<!--                                         metadata = metadata,  -->
-<!--                                         gene_sets=list(CellAge=signatures_bidirectional[["CellAge"]]),  -->
-<!--                                         method = "all",  -->
-<!--                                         variable ="Condition", -->
-<!--                                         colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"),  -->
-<!--                                         grid = TRUE,  -->
-<!--                                         spacing_annotation=0.3,  -->
-<!--                                         ncol=NULL,  -->
-<!--                                         nrow=1, -->
-<!--                                         mode = "simple", -->
-<!--                                         widthTitle = 28, -->
-<!--                                         titlesize = 12 ) ) -->
-
-<!-- (plot_ranking_nodir_cellage <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                                   metadata = metadata,  -->
-<!--                                                   method = "ranking",  -->
-<!--                                                   gene_sets = list(CellAge=signatures_nodir[["CellAge"]]),   -->
-<!--                                                   ColorVariable = "CellType",  -->
-<!--                                                   Variable="Condition",  -->
-<!--                                                   ColorValues = CellTypecols_alt,  -->
-<!--                                                   ConnectGroups=TRUE,   -->
-<!--                                                   nrow = 1,  -->
-<!--                                                   widthTitle=20,  -->
-<!--                                                   limits = NULL,  -->
-<!--                                                   legend_nrow = 2,  -->
-<!--                                                   compute_cohen=F, -->
-<!--                                                   pointSize=4, -->
-<!--                                                   titlesize=14, -->
-<!--                                                    labsize = 12, -->
-<!--                                                   xlab = "")) -->
-
-
-<!-- (plot_roc_nodir_cellage <-  ROC_Scores(data = corrcounts,  -->
-<!--                                        metadata = metadata,  -->
-<!--                                        gene_sets=list(CellAge=signatures_nodir[["CellAge"]]),  -->
-<!--                                        method = "all",  -->
-<!--                                        variable ="Condition", -->
-<!--                                        colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"),  -->
-<!--                                        grid = TRUE,  -->
-<!--                                        spacing_annotation=0.3,  -->
-<!--                                        ncol=NULL,  -->
-<!--                                        nrow=1, -->
-<!--                                        mode = "simple", -->
-<!--                                        widthTitle = 28, -->
-<!--                                        titlesize = 12 ) ) -->
-
-<!-- ``` -->
-
-<!-- ```{r fig.width=9, fig.height=3} -->
-<!-- fig_cellage_direct <- ggarrange(plot_ranking_direct_cellage,plot_roc_direct_cellage, widths=c(0.3,0.6)) -->
-<!-- fig_cellage_direct -->
-<!-- ``` -->
-
-<!-- ```{r fig.width=9, fig.height=3} -->
-<!-- fig_cellage_nodir <- ggarrange(plot_ranking_nodir_cellage,plot_roc_nodir_cellage, widths=c(0.3,0.6) ) -->
-<!-- fig_cellage_nodir -->
-<!-- ``` -->
-<!-- ```{r fig.width=10, fig.height=6} -->
-<!-- fig_cellage <- ggarrange(fig_cellage_direct,fig_cellage_nodir, labels=c("F","G"), ncol=1,font.label = list(size = 20) ) -->
-<!-- fig_cellage -->
-<!-- ``` -->
-
-<!-- ```{r fig.width=18, fig.height=6} -->
-<!-- fig_EFG <- ggarrange(plt_auc_heatmap,fig_cellage, -->
-<!--                      nrow=1, -->
-<!--                      labels=c("E","",""), widths=c(0.4,0.6),font.label = list(size = 20)) -->
-<!-- fig_EFG -->
-<!-- ``` -->
-
-
-
-<!-- ```{r fig.width=20, fig.height=21} -->
-<!-- (plt_fig2 <- ggarrange(plot_logmedian, -->
-<!--                        plot_ranking, -->
-<!--                        plot_ssGSEA, -->
-<!--                        plt_cohenheatmap, -->
-<!--                        fig_EFG, labels=c("A","B","C","D",""), ncol=1, heights=c(0.18,0.15,0.15,0.15,0.3),font.label = list(size = 20))) -->
-<!-- ``` -->
-
-<!-- ```{r} -->
-<!-- ggplot2::ggsave("../Figures/Figs/Figure3_Scores.png", -->
-<!--                 plt_fig2, -->
-<!--                 width = 20, height=22, bg = 'white') -->
-<!-- ``` -->
-
-<!-- ### Alternative -->
-
-```{r}
-(scores_logmedian <- CalculateScores(data = corrcounts, 
-                                     metadata = metadata, 
-                                     method = "logmedian", 
-                                     gene_sets = signatures_bidirectional))
-
-(scores_ssGSEA <- CalculateScores(data = corrcounts, 
-                                     metadata = metadata, 
-                                     method = "ssGSEA", 
-                                     gene_sets = signatures_bidirectional))
-
-(scores_ranking <- CalculateScores(data = corrcounts, 
-                                     metadata = metadata, 
-                                     method = "ranking", 
-                                     gene_sets = signatures_bidirectional))
-
-scoresList <- list(logmedian=scores_logmedian,
-               ssGSEA=scores_ssGSEA,
-               ranking=scores_ranking)
-```
-
-
-```{r}
- 
-flat_df_scores <- imap_dfr(scoresList, function(signature_list, method_name) {
-  imap_dfr(signature_list, function(signature_df, signature_name) {
-    signature_df %>%
-      mutate(
-        Method = method_name,
-        Signature = signature_name
-      )
-  })
-})
-
-flat_df_scores
-
-```
-```{r fig.width=16, fig.height=12}
-# Get the unique signatures
-signatures <- unique(flat_df_scores$Signature)
-
-# Create an empty list to store plots
-plot_list <- list()
-
-# Loop over each signature
-for (sig in signatures) {
-  df_sig <- flat_df_scores[flat_df_scores$Signature == sig, ]
-
-  p <- ggplot(df_sig, aes(x = Condition, y = score)) +
-    geom_jitter(size=2, aes(color = CellType), alpha=0.7)+
-    geom_violin(trim = FALSE, alpha=0.6, scale = "width") +  
-    stat_summary(aes(group = CellType, color = CellType),
-                                     fun.y = median, geom = "line", size = 1.5, alpha = 0.75,
-                                     show.legend = FALSE) +
-    stat_summary(fun = median, fun.min = median, fun.max = median,
-                                   geom = "crossbar", width = 0.25,
-                                   position = position_dodge(width = 0.13)) +
-    facet_wrap(~ Method, scales = "free_y") +
-    scale_color_manual(values = CellTypecols_alt) +
-    theme_minimal(base_size = 16) +
-    theme(
-      axis.text.x = element_text(angle = 45, hjust = 1),
-      strip.text = element_text(face = "bold"),
-      legend.position = "right",
-      plot.title = ggplot2::element_text(hjust = 0.5),
-
-    ) +
-    labs(
-      title = wrap_title(sig, width = 28),
-      x = "",
-      y = "Score",
-      color = ""
-    ) + 
-    guides(color = guide_legend(nrow = 1, override.aes = list(size = 4)))
-
-  plot_list[[sig]] <- p
-}
-
-
-# Arrange all plots in a grid with ggarrange
-plt_scores_2 <- ggarrange(plotlist = plot_list, ncol = 3, nrow=3, common.legend = T, legend = "top", align="h")
-plt_scores_2
-
-```
-
-CellAge interpretation changes
-
-```{r}
-
-# Illustrating directionality
-
-signatures_nodir <- lapply(signatures_bidirectional, function(entry) {
-  if (is.data.frame(entry)) {
-    as.character(entry[[1]])  # Keep only the first column, convert to character vector
-  } else {
-    entry  # Already a vector
-  }
-})
- 
-```
-
-```{r}
-
-(plot_ranking_direct_cellage_2 <- PlotScores_adapted(data = corrcounts, 
-                                                   metadata = metadata, 
-                                                   method = "ranking", 
-                                                   gene_sets = list(CellAge=signatures_bidirectional[["CellAge"]]),  
-                                                   ColorVariable = "CellType", 
-                                                   Variable="Condition", 
-                                                   ColorValues = CellTypecols_alt, 
-                                                   ConnectGroups=TRUE,  
-                                                   nrow = 1, 
-                                                   widthTitle=20, 
-                                                   limits = NULL, 
-                                                   legend_nrow = 2, 
-                                                   compute_cohen=F,
-                                                   pointSize=2,
-                                                   titlesize=14,
-                                                   labsize = 12,
-                                                  xlab = ""))
-
-(plot_roc_direct_cellage_2 <-  ROC_Scores(data = corrcounts, 
-                                        metadata = metadata, 
-                                        gene_sets=list(CellAge=signatures_bidirectional[["CellAge"]]), 
-                                        method = "all", 
-                                        variable ="Condition",
-                                        colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), 
-                                        grid = TRUE, 
-                                        spacing_annotation=0.3, 
-                                        ncol=NULL, 
-                                        nrow=1,
-                                        mode = "simple",
-                                        widthTitle = 28,
-                                        titlesize = 12 ) )
-
-(plot_ranking_nodir_cellage_2 <- PlotScores_adapted(data = corrcounts, 
-                                                  metadata = metadata, 
-                                                  method = "ranking", 
-                                                  gene_sets = list(CellAge=signatures_nodir[["CellAge"]]),  
-                                                  ColorVariable = "CellType", 
-                                                  Variable="Condition", 
-                                                  ColorValues = CellTypecols_alt, 
-                                                  ConnectGroups=TRUE,  
-                                                  nrow = 1, 
-                                                  widthTitle=20, 
-                                                  limits = NULL, 
-                                                  legend_nrow = 2, 
-                                                  compute_cohen=F,
-                                                  pointSize=2,
-                                                  titlesize=14,
-                                                   labsize = 12,
-                                                  xlab = ""))
-
-
-(plot_roc_nodir_cellage_2 <-  ROC_Scores(data = corrcounts, 
-                                       metadata = metadata, 
-                                       gene_sets=list(CellAge=signatures_nodir[["CellAge"]]), 
-                                       method = "all", 
-                                       variable ="Condition",
-                                       colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), 
-                                       grid = TRUE, 
-                                       spacing_annotation=0.3, 
-                                       ncol=NULL, 
-                                       nrow=1,
-                                       mode = "simple",
-                                       widthTitle = 28,
-                                       titlesize = 12 ) )
-
-```
-
-```{r fig.width=16, fig.height=3}
-fig_cellage_direct <- ggarrange(plot_ranking_direct_cellage_2,plot_roc_direct_cellage_2, widths=c(0.2,0.7))
-  
-fig_cellage_nodir <- ggarrange(plot_ranking_nodir_cellage_2,plot_roc_nodir_cellage_2, widths=c(0.2,0.7) )
-  
-fig_cellage_2 <- ggarrange(fig_cellage_direct,fig_cellage_nodir,  nrow=1, labels=c("B","C"),font.label = list(size = 20))
-fig_cellage_2
-```
-```{r fig.width=16, fig.height=16}
-plt_fig3_alt <- ggarrange(plt_scores_2, fig_cellage_2, heights=c(0.78,0.22), ncol=1, labels=c("A",""),font.label = list(size = 20))
-plt_fig3_alt
-```
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure3_Scores_alt.png",
-                plt_fig3_alt,
-                width = 18, height=16, bg = 'white')
-```
-
-## Figure 4 - Enrichment-based methods
-
-```{r}
-
-degenes <- calculateDE(data=corrcounts, 
-                       metadata=metadata, 
-                       variables="Condition",   
-                       modelmat = NULL, 
-                       contrasts = c("Senescent - Proliferative",
-                                     "Senescent - Quiescent"#,
-                                     #"Proliferative - Quiescent"
-                       )) 
-
-degenes
-```
-
-```{r fig.width=21, fig.height=4}
-
-(plotvolcano <- plotVolcano(DEResultsList=degenes, 
-                            genes=signatures_bidirectional, 
-                            N=NULL, 
-                            x="logFC",
-                            y="-log10(adj.P.Val)", 
-                            pointSize=2, 
-                            color="pink", 
-                            highlightcolor="darkblue", 
-                            highlightcolor_upreg = "#038C65", 
-                            highlightcolor_downreg = "#8C0303", 
-                            nointerestcolor="grey",
-                            threshold_y=NULL, 
-                            threshold_x=NULL, 
-                            xlab=NULL, 
-                            ylab=NULL, 
-                            ncol=NULL, 
-                            nrow=NULL, 
-                            title=NULL,
-                            labsize=7,
-                            widthlabs=28, 
-                            invert=T))
-
-```
-
-
-
-```{r}
-GSEAresults <- runGSEA(degenes, signatures_bidirectional, stat = NULL)
-GSEAresults
-```
-
-
-```{r fig.width=25, fig.height=6}
-(plotGSEAenrichment <- plotGSEAenrichment(GSEA_results=GSEAresults, DEGList=degenes, gene_sets=signatures_bidirectional, widthTitle=32,grid = T, titlesize = 10, nrow=2, ncol=9) )
-```
-
-```{r fig.width=11, fig.height=4.5}
-
-(plotNESlollipop <- plotNESlollipop(GSEA_results=GSEAresults, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                    grid = T, nrow = 1, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-```
-
-```{r fig.width=8, fig.height=4}
-(plotsummaryvolcano <- plotCombinedGSEA(GSEAresults, sig_threshold = 0.05, PointSize=9, widthlegend = 26 ))
-```
-
-```{r fig.width=20, fig.height=10}
-plt_bottom <- ggarrange(plotNESlollipop,plotsummaryvolcano, labels = c("B","C"), widths=c(0.53,0.47),font.label = list(size = 18))
-fig <- ggarrange(plotvolcano, plt_bottom,
-                 heights = c(0.5, 0.5),
-                 ncol = 1,
-                 labels = c("", ""),  # No default labels here
-                 align = "v")
-
-# Now annotate with "A" manually, placed higher
-fig3 <- annotate_figure(fig,
-                        top = text_grob("A", x = 0, hjust = -0.1, vjust = -0.0,
-                                        face = "bold", size = 18))
-
-fig3
-```
-
-
-```{r}
-ggplot2::ggsave("Figs/Figure3_Enrichment.png",
-                fig3,
-                width = 20, height=10, bg = 'white')
-```
-
-### Alternative 1
-
-```{r}
-GSEAresults_SQ <- GSEAresults$`Senescent-Quiescent`
-GSEAresults_SQ$contrast <- "Sen_vs_Quiesc"
-GSEAresults_SP <- GSEAresults$`Senescent-Proliferative`
-GSEAresults_SP$contrast <- "Sen_vs_Prol"
-
-GSEAresults_merge <- rbind(GSEAresults_SQ,GSEAresults_SP)
-GSEAresults_merge
-```
-
-```{r fig.width=4, fig.height=4}
-df_ggplot  <- GSEAresults_merge[,c("pathway","contrast","NES","padj")]
-df_ggplot <- merge(subset(df_ggplot, contrast=="Sen_vs_Prol"),subset(df_ggplot, contrast=="Sen_vs_Quiesc"), by="pathway")
-colnames(df_ggplot) <- c("pathway","contrast.x","NES_Sen_vs_Prol","padj_Sen_vs_Prol","contrast.y","NES_Sen_vs_Quiesc","padj_Sen_vs_Quiesc")
-df_ggplot$significance <- df_ggplot$padj_Sen_vs_Prol <= 0.05 & df_ggplot$padj_Sen_vs_Quiesc <= 0.05
-
-(plt_scatter_gsea <- ggplot(df_ggplot, aes(x=NES_Sen_vs_Prol, y=NES_Sen_vs_Quiesc)) +
-    geom_point(alpha=0.6, size=8, shape=21, aes(fill=significance)) +
-    theme_bw() +
-    geom_label_repel(aes(label=pathway), size=3, force_pull = 1, fill=NA, max.overlaps = 10000, min.segment.length = 0, force = 5, box.padding = .5) +
-    geom_vline(xintercept = 0, size=0.6, linetype="dashed")+
-    geom_hline(yintercept = 0, size=0.6, linetype="dashed") +
-    xlab("NES Senescence vs Proliferation")+
-    ylab("NES Senescence vs Quiescence")+ 
-    scale_fill_manual(values = c("white","#934873"), name = "Adj. p-value <= 0.05    .")  +
-    theme(legend.position = "top") + 
-    theme( plot.title = element_text(hjust = 0.5 ) ) )
-```
-
-
-```{r fig.width=8, fig.height=12}
-
-plotvolcano_1 <- plotVolcano(DEResultsList=degenes, 
-                             genes=signatures_bidirectional[1:3], 
-                             N=NULL, 
-                             x="logFC",
-                             y="-log10(adj.P.Val)", 
-                             pointSize=2, 
-                             color="pink", 
-                             highlightcolor="darkblue", 
-                             highlightcolor_upreg = "#038C65", 
-                             highlightcolor_downreg = "#8C0303", 
-                             nointerestcolor="grey",
-                             threshold_y=NULL, 
-                             threshold_x=NULL, 
-                             xlab=NULL, 
-                             ylab=NULL, 
-                             ncol=NULL, 
-                             nrow=2, 
-                             title=NULL,
-                             labsize=7,
-                             widthlabs=28, 
-                             invert=T)
-
-plotvolcano_2 <- plotVolcano(DEResultsList=degenes, 
-                             genes=signatures_bidirectional[4:6], 
-                             N=NULL, 
-                             x="logFC",
-                             y="-log10(adj.P.Val)", 
-                             pointSize=2, 
-                             color="pink", 
-                             highlightcolor="darkblue", 
-                             highlightcolor_upreg = "#038C65", 
-                             highlightcolor_downreg = "#8C0303", 
-                             nointerestcolor="grey",
-                             threshold_y=NULL, 
-                             threshold_x=NULL, 
-                             xlab=NULL, 
-                             ylab=NULL, 
-                             ncol=NULL, 
-                             nrow=2, 
-                             title=NULL,
-                             labsize=7,
-                             widthlabs=28, 
-                             invert=T)
-
-plotvolcano_3 <- plotVolcano(DEResultsList=degenes, 
-                             genes=signatures_bidirectional[7:9], 
-                             N=NULL, 
-                             x="logFC",
-                             y="-log10(adj.P.Val)", 
-                             pointSize=2, 
-                             color="pink", 
-                             highlightcolor="darkblue", 
-                             highlightcolor_upreg = "#038C65", 
-                             highlightcolor_downreg = "#8C0303", 
-                             nointerestcolor="grey",
-                             threshold_y=NULL, 
-                             threshold_x=NULL, 
-                             xlab=NULL, 
-                             ylab=NULL, 
-                             ncol=NULL, 
-                             nrow=2, 
-                             title=NULL,
-                             labsize=7,
-                             widthlabs=28, 
-                             invert=T) 
-
-
-(plt_volcano_new <- ggarrange(plotvolcano_1,plotvolcano_2,plotvolcano_3, ncol=1))
-```
-
-```{r fig.width=5, fig.height=10}
-
-(plotNESlollipop_1 <- plotNESlollipop(GSEA_results=list("Senescent-Proliferative"=GSEAresults$`Senescent-Proliferative`), sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                      grid = T, nrow = 1, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-(plotNESlollipop_2 <- plotNESlollipop(GSEA_results=list("Senescent-Quiescent"=GSEAresults$`Senescent-Quiescent`), sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                      grid = T, nrow = 1, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-(plotNESlollipop_new <- ggarrange(plotNESlollipop_1$`Senescent-Proliferative`,plotNESlollipop_2$`Senescent-Quiescent`,ncol=1, common.legend = T))
-```
-
-
-```{r fig.height=14, fig.width=12}
-plt_right <- ggarrange(plotNESlollipop_new,plt_scatter_gsea, ncol=1, labels=c("B","C"), heights=c(0.7,0.3))
-(plt_final <- ggarrange(plt_volcano_new,plt_right, labels = c("A",""), widths=c(0.6,0.4)))
-```
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure4_Enrichment_alternative.png",
-                plt_final,
-                width = 12, height=14, bg = 'white')
-```
-
-## Figure 5 - Comparative Performance of Senescence Signatures
-
-```{r fig.width=12, fig.height=4}
-
-degenes_SP <- degenes$`Senescent-Proliferative`
-degenes_SP$gene <- row.names(degenes_SP)
-degenes_SQ <- degenes$`Senescent-Quiescent`
-degenes_SQ$gene <- row.names(degenes_SQ)
-
-degenes_SP_SQ <- merge(degenes_SP,degenes_SQ, by="gene")
-degenes_SP_SQ <- degenes_SP_SQ[,c("gene","t.x","t.y")]
-colnames(degenes_SP_SQ) <- c("gene","t_SP","t_SQ")
-
-degenes_SP_SQ
-
-plt_scatter_SeneQuest <- ggplot(degenes_SP_SQ, aes(x=t_SP,y=t_SQ)) + 
-  geom_point(size=3, alpha=0.6, color="#C2C2C2") +
-  theme_bw() +
-  xlab("t-statistic Senescent vs Proliferative")+
-  ylab("t-statistic Senescent \nvs Quiescent") + 
-  theme_bw()   +  
-  geom_density_2d(color="white") + 
-  theme(plot.title = element_text(hjust = 0.5, size=12),
-        plot.subtitle = element_text(hjust = 0.5),
-        text = element_text(size=12) ) + geom_point(data= degenes_SP_SQ[degenes_SP_SQ$gene %in% signatures_bidirectional$SeneQuest$gene,], fill="#96C29B", size=3, alpha=0.8, shape=21) + ggtitle("SeneQuest") +
-  geom_vline(xintercept = 0, color="black", size=0.8, linetype="dashed")+
-  geom_hline(yintercept = 0, color="black", size=0.8, linetype="dashed")
-
-plt_scatter_HernandezSegura <- ggplot(degenes_SP_SQ, aes(x=t_SP,y=t_SQ)) + 
-  geom_point(size=3, alpha=0.6, color="#C2C2C2") +
-  theme_bw() +
-  xlab("t-statistic Senescent vs Proliferative")+
-  ylab("t-statistic Senescent \nvs Quiescent") + 
-  theme_bw()   +  
-  geom_density_2d(color="white") + 
-  theme(plot.title = element_text(hjust = 0.5, size=12),
-        plot.subtitle = element_text(hjust = 0.5),
-        text = element_text(size=12) ) + geom_point(data= degenes_SP_SQ[degenes_SP_SQ$gene %in% signatures_bidirectional$HernandezSegura$gene,], fill="#96C29B", size=3, alpha=0.8, shape=21) + ggtitle("HernandezSegura") +
-  geom_vline(xintercept = 0, color="black", size=0.8, linetype="dashed")+
-  geom_hline(yintercept = 0, color="black", size=0.8, linetype="dashed")
-# 
-# 
-# plt_scatter_SAUL_SEN_MAYO <- ggplot(degenes_SP_SQ, aes(x=t_SP,y=t_SQ)) + 
-#   geom_point(size=3, alpha=0.6, color="#C2C2C2") +
-#   theme_bw() +
-#   xlab("t-statistic Senescent vs Proliferative")+
-#   ylab("t-statistic Senescent \nvs Quiescent") + 
-#   theme_bw()   +  
-#   geom_density_2d(color="white") + 
-#   theme(plot.title = element_text(hjust = 0.5, size=12),
-#         plot.subtitle = element_text(hjust = 0.5),
-#         text = element_text(size=12)) + geom_point(data= degenes_SP_SQ[degenes_SP_SQ$gene %in% signatures_bidirectional$SAUL_SEN_MAYO$gene,], fill="#96C29B", size=3, alpha=0.8, shape=21) + ggtitle("SAUL_SEN_MAYO") +
-#   geom_vline(xintercept = 0, color="black", size=0.8, linetype="dashed")+
-#   geom_hline(yintercept = 0, color="black", size=0.8, linetype="dashed")
-
-
-plt_scatter_REACTOME <- ggplot(degenes_SP_SQ, aes(x=t_SP,y=t_SQ)) + 
-  geom_point(size=3, alpha=0.6, color="#C2C2C2") +
-  theme_bw() +
-  xlab("t-statistic Senescent vs Proliferative")+
-  ylab("t-statistic Senescent \nvs Quiescent") + 
-  theme_bw()   +  
-  geom_density_2d(color="white") + 
-  theme(plot.title = element_text(hjust = 0.5, size=12),
-        plot.subtitle = element_text(hjust = 0.5),
-        text = element_text(size=12)) + geom_point(data= degenes_SP_SQ[degenes_SP_SQ$gene %in% signatures_bidirectional$REACTOME_CELLULAR_SENESCENCE,], fill="#96C29B", size=3, alpha=0.8, shape=21) + ggtitle("REACTOME_CELLULAR_SENESCENCE") +
-  geom_vline(xintercept = 0, color="black", size=0.8, linetype="dashed")+
-  geom_hline(yintercept = 0, color="black", size=0.8, linetype="dashed")
-
-(plt_tstats <- ggarrange(plt_scatter_SeneQuest,plt_scatter_HernandezSegura,plt_scatter_REACTOME, nrow=1))
-
-
-```
-
-
-
-
-```{r}
-
-(plot_logmedian_SeneQuest <- PlotScores_adapted(data = corrcounts, 
-                                                metadata = metadata, 
-                                                method = "logmedian", 
-                                                gene_sets = list(SeneQuest =  signatures_bidirectional$SeneQuest ,
-                                                                 UP_SeneQuest= subset(signatures_bidirectional$SeneQuest, enrichment==1),
-                                                                 DOWN_SeneQuest = subset(signatures_bidirectional$SeneQuest, enrichment==-1)),   
-                                                Variable="Condition",  
-                                                ConnectGroups=F,  
-                                                nrow = 1, 
-                                                widthTitle=20, 
-                                                limits = NULL, 
-                                                legend_nrow = 2, 
-                                                compute_cohen=F,
-                                                pointSize=2,
-                                                titlesize=12,
-                                                cond_cohend = list(A="Senescent",B="Quiescent"),
-                                                pvalcalc = F, ColorValues = "pink"))
-
-```
-
-
-
-```{r fig.width=12, fig.height=6}
-(plt_fig5_senequest <- ggarrange(plt_tstats,plot_logmedian_SeneQuest, ncol=1, heights=c(0.45,0.55), labels=c("A","B")))
-```
-
-
-
-```{r}
-# ggplot2::ggsave("Figs/Figure5_SeneQuest.png",
-#                 plt_fig5_senequest,
-#                 width = 12, height=6, bg = 'white')
-```
-
-
-
-```{r}
-
-(plot_logmedian_HernandezSegura <- PlotScores_adapted(data = corrcounts, 
-                                                metadata = metadata, 
-                                                method = "logmedian", 
-                                                gene_sets = list(HernandezSegura =  signatures_bidirectional$HernandezSegura ,
-                                                                 UP_HernandezSegura= subset(signatures_bidirectional$HernandezSegura, enrichment==1),
-                                                                 DOWN_HernandezSegura = subset(signatures_bidirectional$HernandezSegura, enrichment==-1)),   
-                                                Variable="Condition",  
-                                                ConnectGroups=F,  
-                                                nrow = 1, 
-                                                widthTitle=20, 
-                                                limits = NULL, 
-                                                legend_nrow = 2, 
-                                                compute_cohen=F,
-                                                pointSize=2,
-                                                titlesize=12,
-                                                cond_cohend = list(A="Senescent",B="Quiescent"),
-                                                pvalcalc = F, ColorValues = "pink"))
-
-```
-
-
-what to expect from scores and GSEA
-
-
-```{r}
-
-set.seed(123)
-
-# Sample size per condition
-n <- 300
-
-# Two BAD signatures:
-# - Bad A: all conditions similar
-# - Bad B: Senescent and Quiescent similar; Proliferative very different
-mock_scores <- data.frame(
-  sampleID = paste0("Sample_", 1:(n * 3)),
-  Condition = rep(c("Senescent", "Proliferative", "Quiescent"), each = n),
-  
-  # GOOD signatures
-  Sig_Good1 = c(rnorm(n, 8, 1), rnorm(n, 3.5, 1), rnorm(n, 3, 1)),
-  Sig_Good2 = c(rnorm(n, 2, 1), rnorm(n, 6, 1), rnorm(n, 6.5, 1)),
-  
-  # BAD signatures
-  Sig_Bad1 = c(rnorm(n, 5, 1), rnorm(n, 5.1, 1), rnorm(n, 5.2, 1)),  # All similar
-  Sig_Bad2 = c(rnorm(n, 5, 1), rnorm(n, 8, 1), rnorm(n, 5.1, 1))    # Sen ≈ Quiesc, Prol different
-)
-
-# Reshape
-mock_long <- mock_scores %>%
-  pivot_longer(cols = starts_with("Sig_"), names_to = "Signature", values_to = "Score") %>%
-  mutate(Signature = recode(Signature,
-                            Sig_Good1 = "Good Signature A",
-                            Sig_Good2 = "Good Signature B",
-                            Sig_Bad1  = "Bad Signature A",
-                            Sig_Bad2  = "Bad Signature B"))
-
-```
-
-
-```{r fig.width=6, fig.height=3}
-(plt_expectation_scores <- ggplot(mock_long, aes(x = Condition, y = Score, fill = Condition)) +
-   #geom_jitter(width = 0.3, size = 2, alpha = 0.5, color = "grey") +
-   geom_violin(trim = FALSE, alpha = 0.4, size = 1) + 
-   facet_wrap(
-     ~Signature, nrow=1,
-     labeller = labeller(Signature = c(
-       "Good Signature A" = "Good Signature",
-       "Good Signature B" = "Good Signature",
-       "Bad Signature A" = "Bad Signature",
-       "Bad Signature B" = "Bad Signature"
-     ))
-   ) +
-   theme_classic(base_size = 13) +
-   scale_fill_manual(values = c(
-     "Senescent"     = "#F4A261",
-     "Proliferative" = "#A1C298",
-     "Quiescent"     = "#9DB4C0"
-   )) +
-   labs(x = NULL, y = "Signature Score") +
-   theme(
-     legend.position = "none",
-     axis.text.y = element_blank(),
-     axis.text.x = element_text(angle = 45, hjust = 1),
-     axis.ticks.y = element_blank(),
-     strip.text = element_text(face = "bold")
-   ) +
-   stat_summary(
-     fun.y = median, fun.ymin = median, fun.ymax = median,
-     geom = "crossbar", width = 0.25,
-     position = position_dodge(width = .13)
-   ))
-
-
-```
-
-
-```{r fig.width=4, fig.height=4}
-
-# Create mock NES values manually for clarity
-df_signatures <- data.frame(
-  Signature = paste0("Sig", 1:10),
-  NES_Sen_vs_Prol = c( 2.5, -3.0, 0.2, -0.1, 2.8, -2.7,  0.3,  0.1,  2.0, -2.0),
-  NES_Sen_vs_Quiesc = c(2.6, -3.2, 2.9, -0.2, -0.1,  0.3,  0.2, -0.3, -2.1, 2.1)
-)
-
-# Classify quality: close to axis = bad, strong in both directions = good
-df_signatures$Quality <- ifelse(
-  abs(df_signatures$NES_Sen_vs_Prol) > 1.5 & abs(df_signatures$NES_Sen_vs_Quiesc) > 1.5,
-  "Good", "Bad"
-)
-
-(plt_expectation_gsea <- ggplot(df_signatures, aes(x = NES_Sen_vs_Prol, y = NES_Sen_vs_Quiesc)) +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "gray70") +
-    geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
-    geom_point(aes(fill = Quality), shape = 21, size = 7, alpha = 0.85, color = "black") +
-    scale_fill_manual(values = c("Good" = "#3E7CB1", "Bad" = "#D1495B")) +
-    labs(
-      x = "NES (Senescent vs Proliferative)\n\n",
-      y = "NES (Senescent vs Quiescent)",
-      fill = "Signature Quality  "
-    ) +
-    coord_fixed() +
-    theme_classic(base_size = 12) +
-    theme(legend.position = "top"))
-
-```
-
-
-
-```{r fig.width=12, fig.height=4}
-(plt_expectations <- ggarrange(plt_expectation_scores,plt_expectation_gsea, labels=c("A","B"), widths=c(0.65,0.35)))
-```
-
-
-
-```{r fig.width=12, fig.height=6}
-
-
-# Local job: plt_fdrsim_subset_v2
-# 
-# plt_fdrsim_subset_test <- FPR_Simulation(data = corrcounts,
-#                               metadata = metadata,
-#                               original_signatures = list(CellAge=signatures_bidirectional$CellAge,
-#                                                          HernandezSegura=signatures_bidirectional$HernandezSegura,
-#                                                          SAUL_SEN_MAYO=signatures_bidirectional$SAUL_SEN_MAYO,
-#                                                          SeneQuest=signatures_bidirectional$SeneQuest
-#                               ),
-#                               gene_list = row.names(corrcounts),
-#                               number_of_sims = 5,
-#                               widthTitle = 30,
-#                               Variable = "Condition",
-#                               titlesize = 12,
-#                               pointSize = 3,
-#                               labsize = 10,
-#                               mode = "simple",
-#                               ColorValues=NULL,
-#                               ncol=NULL,
-#                               nrow=1 )
-
-plt_fdrsim_subset_v2 <- readRDS("../data/plt_fdrsim_subset_v2.rds")
-plt_fdrsim_subset_v2
-```
-
-<!-- # ```{r} -->
-<!-- # signatures_bidirectional_updown <- signatures_bidirectional -->
-<!-- # signatures_bidirectional_updown$UP_HernandezSegura <- subset(signatures_bidirectional_updown$HernandezSegura, enrichment==1) -->
-<!-- # signatures_bidirectional_updown$DOWN_HernandezSegura <- subset(signatures_bidirectional_updown$HernandezSegura, enrichment==-1) -->
-<!-- # signatures_bidirectional_updown$UP_SeneQuest <- subset(signatures_bidirectional_updown$SeneQuest, enrichment==1) -->
-<!-- # signatures_bidirectional_updown$DOWN_SeneQuest <- subset(signatures_bidirectional_updown$SeneQuest, enrichment==-1) -->
-<!-- # signatures_bidirectional_updown$UP_CellAge <- subset(signatures_bidirectional_updown$CellAge, enrichment==1) -->
-<!-- # signatures_bidirectional_updown$DOWN_CellAge <- subset(signatures_bidirectional_updown$CellAge, enrichment==-1) -->
-<!-- #  -->
-<!-- # signatures_bidirectional_updown$HernandezSegura <- NULL -->
-<!-- # signatures_bidirectional_updown$SeneQuest <- NULL -->
-<!-- # signatures_bidirectional_updown$CellAge <- NULL -->
-<!-- #  -->
-<!-- #  -->
-<!-- #  -->
-<!-- # ``` -->
-<!-- #  -->
-<!-- #  -->
-<!-- # ```{r} -->
-<!-- # GSEAresults_separate_UPDOWN <- runGSEA(degenes, signatures_bidirectional_updown, stat = NULL) -->
-<!-- # GSEAresults_separate_UPDOWN -->
-<!-- # ``` -->
-<!-- # ```{r fig.width=4, fig.height=4} -->
-<!-- #  -->
-<!-- # GSEAresults_SQ_updown <- GSEAresults_separate_UPDOWN$`Senescent-Quiescent` -->
-<!-- # GSEAresults_SQ_updown$contrast <- "Sen_vs_Quiesc" -->
-<!-- # GSEAresults_SP_updown <- GSEAresults_separate_UPDOWN$`Senescent-Proliferative` -->
-<!-- # GSEAresults_SP_updown$contrast <- "Sen_vs_Prol" -->
-<!-- #  -->
-<!-- # GSEAresults_merge_updown <- rbind(GSEAresults_SQ_updown,GSEAresults_SP_updown) -->
-<!-- # GSEAresults_merge_updown -->
-<!-- #  -->
-<!-- #  -->
-<!-- # df_ggplot  <- GSEAresults_merge_updown[,c("pathway","contrast","NES","padj")] -->
-<!-- # df_ggplot <- merge(subset(df_ggplot, contrast=="Sen_vs_Prol"),subset(df_ggplot, contrast=="Sen_vs_Quiesc"), by="pathway") -->
-<!-- # colnames(df_ggplot) <- c("pathway","contrast.x","NES_Sen_vs_Prol","padj_Sen_vs_Prol","contrast.y","NES_Sen_vs_Quiesc","padj_Sen_vs_Quiesc") -->
-<!-- # df_ggplot$significance <- df_ggplot$padj_Sen_vs_Prol <= 0.05 & df_ggplot$padj_Sen_vs_Quiesc <= 0.05 -->
-<!-- #  -->
-<!-- # # remove every row without UP or DOWN as prefix -->
-<!-- # df_ggplot <- df_ggplot[grepl("^UP_|^DOWN_", df_ggplot$pathway),] -->
-<!-- #  -->
-<!-- #  -->
-<!-- # (plt_scatter_gsea_updown <- ggplot(df_ggplot, aes(x=NES_Sen_vs_Prol, y=NES_Sen_vs_Quiesc)) + -->
-<!-- #     geom_point(alpha=0.6, size=8, shape=21, aes(fill=significance)) + -->
-<!-- #     theme_bw() + -->
-<!-- #     geom_label_repel(aes(label=pathway), size=3, force_pull = 1, fill=NA, max.overlaps = 10000, min.segment.length = 0, force = 5, box.padding = .5) + -->
-<!-- #     geom_vline(xintercept = 0, size=0.6, linetype="dashed")+ -->
-<!-- #     geom_hline(yintercept = 0, size=0.6, linetype="dashed") + -->
-<!-- #     xlab("NES Senescence vs Proliferation")+ -->
-<!-- #     ylab("NES Senescence vs Quiescence")+  -->
-<!-- #     scale_fill_manual(values = c("#BA3B46","#53A2BE"), name = "Adj. p-value <= 0.05")  + -->
-<!-- #     theme(legend.position = "top") +  -->
-<!-- #     theme( plot.title = element_text(hjust = 0.5 ) ) ) -->
-<!-- # ``` -->
-
-
-
-
-```{r fig.width=15, fig.height=16}
-
-plt_scores <- ggarrange(plot_logmedian_SeneQuest, plot_logmedian_HernandezSegura, nrow=1, labels=c("C","D"))
-
-plt_senequest <- ggarrange(plt_scores,plt_tstats, ncol=1, heights=c(0.6,0.45), labels=c("","E"))
-
-(plt_fig5_alternative <- ggarrange(plt_expectations,
-                                   plt_senequest,
-                                   plt_fdrsim_subset_v2,
-                                   labels=c("","","F"),
-                                   ncol=1,
-                                   heights=c(0.2,0.35,0.35)))
-```
-
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure5_ComparisonSigs.png",
-                plt_fig5_alternative,
-                width = 15, height=16, bg = 'white')
- 
-```
-
-
-
-## Figure 6 - Tradeoffs between number of genes and number of samples
-
-### Significance
-
-(Run in local job, takes ~24h)
-
-```{r}
-# Expression data: genes (rows) × samples (columns)
-df_expr <- corrcounts
-
-# Class vector: named vector or factor
-group_vec <-  metadata$Condition
-names(group_vec) <- metadata$sampleID
-
-# Signature gene sets
-gene_sets <- list(
-  CellAge = signatures_bidirectional$CellAge,
-  HernandezSegura = signatures_bidirectional$HernandezSegura
-)
-
-# Percentages to evaluate
-gene_pcts <- seq(10, 100, by = 10)
-sample_pcts <- seq(10, 100, by = 10)
-
-
-# Result container
-results <- data.frame()
-
-# Loop through gene sets
-for (sig_name in names(gene_sets)) {
-  
-  siggenes_original <- gene_sets[[sig_name]]
-  
-  if (is.data.frame(siggenes_original)){
-    full_genes <- siggenes_original[siggenes_original[,1] %in% row.names(df_expr),]
-  } else {
-    full_genes <- intersect(siggenes_original, rownames(df_expr))
-  }
-  
-  for (g_pct in gene_pcts) {
-    
-    
-    if (is.data.frame(siggenes_original)){
-      ngenes_to_sample <-  max(1, round(nrow(full_genes) * g_pct / 100))
-      set.seed("12345")
-      sel_genes <- full_genes[sample(1:nrow(full_genes), ngenes_to_sample),]
-    } else {
-      ngenes_to_sample <-  max(1, round(length(full_genes) * g_pct / 100))
-      set.seed("12345")
-      sel_genes <- full_genes[sample(1:length(full_genes), ngenes_to_sample)]
-    }
-    sigs_list <- list(sel_genes)
-    names(sigs_list) <- sig_name
-    
-    for (s_pct in sample_pcts) {
-      
-      print(paste0("Percentage Samples: ", s_pct,"% | Percentage Genes: ", g_pct, "%"))
-      
-      # Subset samples
-      group1 <- names(group_vec[group_vec == "Senescent"])
-      group2 <- names(group_vec[group_vec == "Proliferative"])
-      
-      set.seed("12345")
-      sel_group1 <- sample(group1, max(2, round(length(group1) * s_pct / 100)))
-      set.seed("12345")
-      sel_group2 <- sample(group2, max(2, round(length(group2) * s_pct / 100)))
-      
-      sel_samples <- c(sel_group1, sel_group2)
-      #sel_expr <- expr_mat[sel_genes, sel_samples]
-      
-      df_subset <- df_expr[,sel_samples]
-      metadata_subset <- metadata[metadata$sampleID %in% sel_samples,c("sampleID","Condition")]
-      
-      # SCORE approach: mean expression score per sample
-      df_Scores <- CalculateScores(data = df_subset[,metadata_subset$sampleID],
-                                   metadata = metadata_subset,
-                                   method = "logmedian",
-                                   gene_sets =  sigs_list)
-      df_Scores <- as.data.frame(df_Scores[[sig_name]])
-      cohen_d_score <- compute_cohens_d(df_Scores[df_Scores$Condition == "Senescent","score"], df_Scores[df_Scores$Condition == "Proliferative","score"])
-      
-      # Score p-value (t-test)
-      p_score <- tryCatch({
-        t.test(df_Scores$score ~ df_Scores$Condition)$p.value
-      }, error = function(e) NA)
-      
-      # ENRICHMENT approach: gene ranking for fgsea
-      DEGs <- calculateDE(data = df_subset[,metadata_subset$sampleID],
-                          metadata = metadata_subset,
-                          variables = "Condition",
-                          contrasts = c("Senescent - Proliferative"))
-      #DEGs <- DEGs$`Senescent-Proliferative`
-      
-      res <- runGSEA(DEGList = DEGs,
-                     gene_sets = sigs_list )
-      res <- res$`Senescent-Proliferative`
-      
-      NES <-  res$NES
-      p_enrich <-res$pval
-      
-      # Save
-      results <- rbind(results, data.frame(
-        signature = sig_name,
-        gene_pct = g_pct,
-        sample_pct = s_pct,
-        cohend = cohen_d_score,
-        p_score = p_score,
-        NES = NES,
-        p_enrich = p_enrich
-      ))
-    }
-  }
-}
-
-# results$p_score <- p.adjust(results$p_score, method = "BH")
-# results$p_enrich <- p.adjust(results$p_enrich, method = "BH")
-
-
-```
-
-```{r}
-#(results <- readRDS("data/Tradeoffs_v1.rds"))
-(results <- readRDS("../data/Tradeoffs_v3.rds"))
-results$p_score <- p.adjust(results$p_score, method = "BH")
-results$p_enrich <- p.adjust(results$p_enrich, method = "BH")
-```
-
-
-```{r fig.width=10, fig.height=4} 
-# Transform p-values for plotting
-results$log10_p_score <- -log10(results$p_score)
-results$log10_p_enrich <- -log10(results$p_enrich)
-
-# Cap for better visualization (optional)
-results$log10_p_score <- pmin(results$log10_p_score, 10)
-results$log10_p_enrich <- pmin(results$log10_p_enrich, 10)
-
-
-plt_scoresig <- ggplot(results, aes(x = factor(gene_pct), y = factor(sample_pct), fill = log10_p_score)) +
-  geom_tile( ) +
-  scale_fill_gradient(low = "white", high = "blue", name = "-log10 p (Score)") +
-  facet_wrap(~signature) +
-  xlab("Gene %") + ylab("Sample %") +
-  ggtitle("Score method significance") +
-  theme_minimal() +
-  scale_x_discrete(breaks = seq(0, 100, by = 10))+
-  scale_y_discrete(breaks = seq(0, 100, by = 10))+
-  theme(legend.position="bottom") +
-  theme(plot.title = element_text(hjust = 0.5))
-
-
-
-plt_enrichmentsig <- ggplot(results, aes(x = factor(gene_pct), y = factor(sample_pct), fill = log10_p_enrich)) +
-  geom_tile( ) +
-  scale_fill_gradient(low = "white", high = "red", name = "-log10 p (Enrichment)") +
-  facet_wrap(~signature) +
-  xlab("Gene %") + ylab("Sample %") +
-  ggtitle("Enrichment method significance") +
-  theme_minimal() +
-  scale_x_discrete(breaks = seq(0, 100, by = 10))+
-  scale_y_discrete(breaks = seq(0, 100, by = 10)) +
-  theme(legend.position="bottom") +
-  theme(plot.title = element_text(hjust = 0.5))
-
-ggarrange(plt_scoresig,plt_enrichmentsig, nrow=1)
-
-
-```
-
-
-```{r fig.width=12, fig.height=4}
-
-# Clean & reshape
-results_long <- results %>%
-  mutate(
-    logp_score = -log10(p_score),
-    logp_enrich = -log10(p_enrich)
-  ) %>%
-  pivot_longer(
-    cols = c(logp_score, logp_enrich),
-    names_to = "method", values_to = "logp"
-  ) %>%
-  mutate(method = recode(method,
-                         logp_score = "Score",
-                         logp_enrich = "Enrichment"))
-
-Signature_colors <- c(
-  "CellAge" = "#872341",  # Bordeaux / wine red
-  "HernandezSegura" = "#6CA0DC"  # Light cornflower blue
-)
-
-
-plt_samplepct <- ggplot(results_long %>% filter(gene_pct == 100),
-                        aes(x = sample_pct, y = logp, color = signature)) +
-  geom_line(size = 1) +
-  geom_point(size = 2) +
-  facet_wrap(~method, scales = "free") +
-  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray40") +
-  scale_color_manual(values = Signature_colors) +
-  labs(
-    # title = "Effect of Sample Size on Significance",
-    x = "Sample Subset (% of Total Samples)",
-    y = expression("-log"[10]*"(p-value)"),
-    color = "Signature"
-  ) +
-  theme_classic(base_size = 13) +
-  theme(plot.title = element_text(hjust = 0.5))
-
-plt_genepct <- ggplot(results_long %>% filter(sample_pct == 100),
-                      aes(x = gene_pct, y = logp, color = signature)) +
-  geom_line(size = 1) +
-  geom_point(size = 2) +
-  facet_wrap(~method, scales = "free") +
-  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray40") +
-  scale_color_manual(values = Signature_colors) +
-  labs(
-    #title = "Effect of Gene Set Size on Significance",
-    x = "Gene Subset (% of Signature Genes)",
-    y = expression("-log"[10]*"(p-value)"),
-    color = "Signature"
-  ) +
-  theme_classic(base_size = 13) +
-  theme(plot.title = element_text(hjust = 0.5))
-
-
-(plt_tradeoffs <- ggarrange(plt_samplepct,plt_genepct, common.legend = T, nrow=1, legend="top", labels="AUTO"))
-```
-
-
-
-
-
-
-
-### Scores: Not coordinated acrivation of gene set
-
-A senescence signature with both upregulated and downregulated genes — and a sample where only the upregulated genes are expressed highly.
-
-What happens:
-- The sample receives a high per-sample score, because the average expression is driven by the active upregulated genes.
-- However, the downregulated genes are not suppressed, breaking the expected pattern.
-- This could be a false positive in interpretation — the score is high, but the coordinated directionality is missing.
-
-A good example is the SeneQuest
-
-```{r}
-signatures_SeneQuest <- list(UP_SeneQuest = subset(signatures_bidirectional$SeneQuest,enrichment == 1 ),
-                             DOWN_SeneQuest = subset(signatures_bidirectional$SeneQuest,enrichment == -1 ),
-                             SeneQuest = signatures_bidirectional$SeneQuest)
-
-```
-
-```{r}
-(plot_logmedian <- PlotScores_adapted(data = corrcounts, 
-                                      metadata = metadata, 
-                                      method = "ranking", 
-                                      gene_sets = signatures_SeneQuest,  
-                                      ColorVariable = "Condition", 
-                                      Variable="Condition", 
-                                      ColorValues = Condition_colors, 
-                                      ConnectGroups=TRUE,  
-                                      nrow = 1, 
-                                      widthTitle=20, 
-                                      legend_nrow = 1, 
-                                      pointSize=2,
-                                      titlesize=10, compute_cohen=T,
-                                      cond_cohend = list(A=c("Senescent"),B="Proliferative"),
-                                      limits=c(-20,50)))
-```
-
-Second idea: use the genes of hernandez segura with the highest power to discriminate senescence from proliferative, and do plots of those genes vs all of the genes; to illustrate how easy it would be to reach significance and a similar result if we only used a set of genes, which is not good if we want to have an idea of the overall activity of the pathway; whereas in GSEA this wouldn't happen
-
-```{r fig.width=20, fig.height=20}
-
-IndividualGenes_Violins(data = corrcounts, 
-                        metadata = metadata, 
-                        genes = signatures_bidirectional$HernandezSegura$gene, 
-                        GroupingVariable = "Condition", 
-                        plot=T, 
-                        ncol=NULL, 
-                        nrow=10, 
-                        divide=NULL, 
-                        invert_divide=FALSE,
-                        ColorValues=Condition_colors, 
-                        pointSize=2, 
-                        ColorVariable="Condition", 
-                        title="Senescence Genes", 
-                        widthTitle=16,
-                        y_limits = NULL,
-                        legend_nrow=1, 
-                        xlab="Condition",
-                        colorlab="") 
-```
-
-
-```{r fig.width=6, fig.height=12}
-
-CohenD_IndividualGenes(corrcounts[,subset(metadata, Condition!="Proliferative")$sampleID], 
-                       subset(metadata, Condition!="Proliferative"), 
-                       genes=signatures_bidirectional$HernandezSegura$gene,
-                       condition_var = "Condition", 
-                       class = "Senescent", 
-                       group_var = NULL )
-
-# Senescent vs Others
-
-bottomgenes_1 <- c("SMO")
-topgenes_1 <- c("DDA1")
-bottomgenes_5 <- c("SPATA6","GBE1","SLC16A3","GSTM4","NFIA")
-topgenes_5 <- c("DDA1","RHNO1","SUSD6","CHMP5","FAM214B")
-bottomgenes_10 <- c("SPATA6","GBE1","SLC16A3","GSTM4","NFIA","DGKA","ZBTB7A","ASCC1","GDNF","ICE1")
-topgenes_10 <- c("DDA1","RHNO1","SUSD6","CHMP5","FAM214B","SPIN4","CCND1","PCIF1","CNTLN","PLK3") 
-
-# Senescent vs Quiescent 
-topgenes_1 <- c("CDCD5") 
-topgenes_5 <- c("CDCD5","PCIF1","PDLIM4","FAM214B","CREBBP" ) 
-topgenes_10 <- c("CDCD5","PCIF1","PDLIM4","FAM214B","CREBBP","CCND1","PATZ1","CNTLN","TRDMT1","DDA1") 
-
-```
-
-<!-- ```{r fig.width=10, fig.height=10} -->
-
-<!-- signatures_HS <- list(HernandezSegura=signatures_bidirectional$HernandezSegura, -->
-<!--                       HernandezSegura_top1 = subset(signatures_bidirectional$HernandezSegura, gene %in% topgenes_1), -->
-<!--                       HernandezSegura_top5 = subset(signatures_bidirectional$HernandezSegura, gene %in% topgenes_5), -->
-<!--                       HernandezSegura_top10 = subset(signatures_bidirectional$HernandezSegura, gene %in% topgenes_10)) -->
-
-<!-- (plot_ranking <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                     metadata = metadata,  -->
-<!--                                     method = "ranking",  -->
-<!--                                     gene_sets = signatures_HS,   -->
-<!--                                     ColorVariable = "Condition",  -->
-<!--                                     Variable="Condition",  -->
-<!--                                     ColorValues = Condition_colors,  -->
-<!--                                     ConnectGroups=TRUE,   -->
-<!--                                     nrow = 1,  -->
-<!--                                     widthTitle=24,  -->
-<!--                                     legend_nrow = 1,  -->
-<!--                                     pointSize=2, -->
-<!--                                     titlesize=10, compute_cohen=T, -->
-<!--                                     cond_cohend = list(A=c("Senescent"),B="Quiescent"))) -->
-
-<!-- (plot_logmedian <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                       metadata = metadata,  -->
-<!--                                       method = "logmedian",  -->
-<!--                                       gene_sets = signatures_HS,   -->
-<!--                                       ColorVariable = "Condition",  -->
-<!--                                       Variable="Condition",  -->
-<!--                                       ColorValues = Condition_colors,  -->
-<!--                                       ConnectGroups=TRUE,   -->
-<!--                                       nrow = 1,  -->
-<!--                                       widthTitle=24,  -->
-<!--                                       legend_nrow = 1,  -->
-<!--                                       pointSize=2, -->
-<!--                                       titlesize=10, compute_cohen=T, -->
-<!--                                       cond_cohend = list(A=c("Senescent"),B="Quiescent"))) -->
-
-<!-- (plot_ssGSEA <- PlotScores_adapted(data = corrcounts,  -->
-<!--                                    metadata = metadata,  -->
-<!--                                    method = "ssGSEA",  -->
-<!--                                    gene_sets = signatures_HS,   -->
-<!--                                    ColorVariable = "Condition",  -->
-<!--                                    Variable="Condition",  -->
-<!--                                    ColorValues = Condition_colors,  -->
-<!--                                    ConnectGroups=TRUE,   -->
-<!--                                    nrow = 1,  -->
-<!--                                    widthTitle=24,  -->
-<!--                                    legend_nrow = 1,  -->
-<!--                                    pointSize=2, -->
-<!--                                    titlesize=10, compute_cohen=T, -->
-<!--                                    cond_cohend = list(A=c("Senescent"),B="Quiescent"))) -->
-
-<!-- ggarrange(plot_logmedian,plot_ranking,plot_ssGSEA,ncol=1) -->
-<!-- ``` -->
-
-<!-- ```{r fig.width=10, fig.height=4} -->
-<!-- plot_ranking -->
-<!-- ``` -->
-
-
-<!-- ```{r} -->
-<!-- options(error=recover) -->
-<!-- GSEAresults_HStests <- runGSEA(list(`Senescent-Quiescent`=degenes$`Senescent-Quiescent`), list(HernandezSegura=signatures_bidirectional$HernandezSegura,  -->
-<!--                                                                                                HernandezSegura_top5 = subset(signatures_bidirectional$HernandezSegura, gene %in% topgenes_5), -->
-<!--                                                                                                HernandezSegura_top10 = subset(signatures_bidirectional$HernandezSegura, gene %in% topgenes_10)), stat = NULL) -->
-<!-- (plotNESlollipop_HStests <- plotNESlollipop(GSEA_results=GSEAresults_HStests, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873", -->
-<!--                                             grid = T, nrow = 1, ncol = NULL, widthlabels=18, title=NULL, titlesize=14)) -->
-<!-- ``` -->
-<!-- these genes were selected as the ones with the highest cohen's d separating senescent from quiescent samples. and then i created signatures with those genes. it is interesting that the score doesn't give much impact on the number of genes in the signature, but GSEA penalises the NES... if the gene signature is expected to recapitulate as a whole the phenotype, this is not very good if we use scores... -->
-
-<!-- ```{r fig.width=14, fig.height=4} -->
-<!-- (plt_impact_score <- ggarrange(plot_logmedian, plotNESlollipop_HStests$`Senescent-Quiescent`, widths=c(0.65,0.35))) -->
-<!-- ``` -->
-
-
-
-
-
-Let's try the same for CellAge
-
-```{r}
-separatinggenes <- CohenD_IndividualGenes(corrcounts[,subset(metadata, Condition!="Proliferative")$sampleID], 
-                                          subset(metadata, Condition!="Proliferative"), 
-                                          genes=signatures_bidirectional$CellAge$gene,
-                                          condition_var = "Condition", 
-                                          class = "Senescent", 
-                                          group_var = NULL )
-
-cellage_top2 <- separatinggenes$data[order(separatinggenes$data$CohensD, decreasing = T),][1:2,"Gene"]
-cellage_top5 <- separatinggenes$data[order(separatinggenes$data$CohensD, decreasing = T),][1:5,"Gene"]
-cellage_top10 <- separatinggenes$data[order(separatinggenes$data$CohensD, decreasing = T),][1:10,"Gene"]
-cellage_top20 <- separatinggenes$data[order(separatinggenes$data$CohensD, decreasing = T),][1:20,"Gene"]
-cellage_top100 <- separatinggenes$data[order(separatinggenes$data$CohensD, decreasing = T),][1:100,"Gene"]
-
-```
-
-
-
-
-```{r fig.width=10, fig.height=3.5}
-
-signatures_CellAge <- list(CellAge=signatures_bidirectional$CellAge,
-                           CellAge_top2 = subset(signatures_bidirectional$CellAge, gene %in% cellage_top2),
-                           CellAge_top5 = subset(signatures_bidirectional$CellAge, gene %in% cellage_top5),
-                           CellAge_top10 = subset(signatures_bidirectional$CellAge, gene %in% cellage_top10),
-                           CellAge_top20 = subset(signatures_bidirectional$CellAge, gene %in% cellage_top20),
-                           CellAge_top100 = subset(signatures_bidirectional$CellAge, gene %in% cellage_top100))
-
-(plot_ranking_cellage <- PlotScores_adapted(data = corrcounts, 
-                                            metadata = metadata, 
-                                            method = "ranking", 
-                                            gene_sets = signatures_CellAge,  
-                                            ColorVariable = "Condition", 
-                                            Variable="Condition", 
-                                            ColorValues = Condition_colors, 
-                                            ConnectGroups=TRUE,  
-                                            nrow = 1, 
-                                            widthTitle=24, 
-                                            legend_nrow = 1, 
-                                            pointSize=2,
-                                            titlesize=10, compute_cohen=T,
-                                            cond_cohend = list(A=c("Senescent"),B="Quiescent"))) 
-
-```
-
-
-
-
-```{r}
-
-GSEAresults_CellAgetests <- runGSEA(list(`Senescent-Quiescent`=degenes$`Senescent-Quiescent`),signatures_CellAge, stat = NULL)
-GSEAresults_CellAgetests
-(plotNESlollipop_CellAgetests <- plotNESlollipop(GSEA_results=GSEAresults_CellAgetests, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                                 grid = T, nrow = 1, ncol = NULL, widthlabels=18, title=NULL, titlesize=14))
-```
-
-```{r fig.width=18, fig.height=4}
-(plt_impact_score_cellage <- ggarrange(plot_ranking_cellage, plotNESlollipop_CellAgetests$`Senescent-Quiescent`, widths=c(0.65,0.35)))
-```
-
-
-
-### Enrichment: Enrichment of only a subset of the samples
-
-To check if there is any signature that is not significant with all data, but becomes significant when looking at one stressor in specific. proliferative and quiescent are the baseline.
-
-```{r}
-metadata_factor <- metadata
-metadata_factor$SenescentType <- factor(metadata$SenescentType, levels=unique(metadata$SenescentType)) # none appears first
-modelmat <- model.matrix(~SenescentType, metadata_factor)
-colnames(modelmat) <- gsub("SenescentType","",colnames(modelmat))
-
-degenes_sentype  <- calculateDE(data=corrcounts, 
-                                metadata=metadata,  
-                                modelmat = modelmat ) 
-
-degenes_sentype 
-
-
-GSEAresults_sentype  <- runGSEA(degenes_sentype,signatures_bidirectional, stat = NULL, nPermSimple = 10000)
-GSEAresults_sentype
-
-```
-
-```{r fig.width=16, fig.height=18}
-
-(plotNESlollipop_sentype <- plotNESlollipop(GSEA_results=GSEAresults_sentype, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                            grid = T, nrow = 4, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-```
-
-
-
-
-
-
-
-
-```{r}
-
-# Purcell has DNA demethylation, Proliferative and Quiescent
-metadata_radiation <- metadata[metadata$SenescentType %in% c("Radiation","none"),]
-corrcounts_radiation <- corrcounts[,metadata_radiation$sampleID]
-
-table(metadata_radiation$Condition)
-
-degenes_subset  <- calculateDE(data=corrcounts_radiation, 
-                               metadata=metadata_radiation, 
-                               variables="Condition",   
-                               modelmat = NULL, 
-                               contrasts = c("Senescent - Proliferative",
-                                             "Senescent - Quiescent" 
-                               )) 
-
-degenes_subset 
-```
-
-
-
-
-
-```{r}
-GSEAresults_radiation<- runGSEA(degenes_subset, signatures_bidirectional, stat = NULL, nPermSimple = 10000)
-GSEAresults_radiation
-```
-
-
-```{r fig.width=12, fig.height=4.5}
-
-(plt_impact_enrichment <- plotNESlollipop(GSEA_results=GSEAresults_radiation, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                          grid = T, nrow = 1, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-```
-
-### All
-
-```{r fig.width=14, fig.height=10}
-(fig5_tradeoffs <- ggarrange(plt_tradeoffs,plt_impact_score_cellage, plt_impact_enrichment, ncol=1, heights=c(0.3,0.3,0.45), labels=c("","C","D")))
-```
-```{r}
-ggplot2::ggsave("../Figures/Figs/Figure6_tradeoffs.png",
-                fig5_tradeoffs,
-                width = 14, height=10, bg = 'white')
-```
-
-
-
-
-## Figure 7 - Translation potential to human data
-
-
-```{r}
-df_Scores_alltissues <- CalculateScores(data = GTEx_alltissues,
-                                        metadata = metadata_GTEx_alltissues, # sample id has to be first column
-                                        method = "logmedian",
-                                        gene_sets =  signatures_bidirectional)
-```
-
-
-```{r fig.width=12, fig.height=5}
-(plt_gtex_score_alltissues <- ggplot(df_Scores_alltissues$HernandezSegura, aes(x = SMTSD, y = score)) +
-   geom_jitter(color="#DDDDDD") +
-   # add dashed horizontal line for median across all tissues
-   geom_hline(yintercept = median(df_Scores_alltissues$HernandezSegura$score), linetype = "dashed", color = "#888888") +
-   geom_violin(alpha=0.4, fill="#4E9B62")+ 
-   labs( x = "",
-         y = "Normalised \nSignature \nScore") +
-   theme_minimal() +
-   # x axis text 45 degrees
-   theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
-   # add median with line
-   stat_summary(fun = median, geom = "crossbar", width = 0.1, color = "black")   +
-   theme(text = element_text(size=14)))
-
-
-```
-
-```{r}
-df_Scores_hs <- CalculateScores(data = corrcounts,
-                                metadata = metadata,
-                                method = "logmedian",
-                                gene_sets =  list(HernandezSegura=signatures_bidirectional$HernandezSegura))
-
-df_Scores_hs <- df_Scores_hs$HernandezSegura
-
-df_Scores_hs
-```
-
-
-```{r fig.width=4, fig.height=4}
-(plt_densities_celltype_HS <- ggplot(df_Scores_hs, aes(x = score, color = CellType)) +
-   geom_density(size = 1) +
-   facet_wrap(~Condition, scales = "free_y", ncol=1) +
-   theme_classic() +
-   labs(x = "HernandezSegura Score", y = "Density", color = "Cell Type") +
-   theme(strip.text = element_text(face = "bold"), legend.position = "top") + scale_color_manual(values=CellTypecols) +
-   labs(color="") +
-  guides(color=guide_legend(nrow=3, byrow=TRUE)) )
-```
- 
-```{r}
-# local job: ~5 days
-# 
-# 
-# methods <- c("logmedian","ranking","ssGSEA")
-# gene_set <- list(HernandezSegura=signatures_bidirectional$HernandezSegura,
-#                  SAUL_SEN_MAYO=signatures_bidirectional$SAUL_SEN_MAYO)
-# tissues <- unique(metadata_GTEx_alltissues$SMTSD)
-# 
-# results_df_score <- data.frame(NULL)
-# results_df_gsea <- data.frame(NULL)
-# 
-# # Initialize progress bar
-# pb <- txtProgressBar(min = 0, max = length(tissues), style = 3)
-# 
-# for (i in seq_along(tissues)) {
-# 
-#   tissue <- tissues[i]
-# 
-#   subset_metadata <- metadata_GTEx_alltissues[metadata_GTEx_alltissues$SMTSD == tissue,]
-#   subset_data <- GTEx_alltissues[,subset_metadata$SAMPID]
-# 
-# 
-#   # Update progress bar
-#   setTxtProgressBar(pb, i)
-# 
-#   for (sig in names(gene_set)){
-# 
-#     signature <- list(gene_set[[sig]])
-#     names(signature) <- sig
-# 
-#     data_varassoc_gsea <- suppressWarnings(suppressMessages(GSEA_VariableAssociation(data=subset_data,
-#                                                                                      metadata=subset_metadata,
-#                                                                                      cols=c("AGE"),
-#                                                                                      mode="simple",
-#                                                                                      gene_set=signature)$data))
-# 
-#     data_varassoc_gsea <- data_varassoc_gsea[,c("NES","pval","Contrast")]
-#     data_varassoc_gsea$signature <- sig
-#     data_varassoc_gsea$method <- "GSEA"
-#     data_varassoc_gsea$tissue <- tissue
-# 
-#     results_df_gsea <- rbind(results_df_gsea, data_varassoc_gsea)
-# 
-#     for (method in methods){
-# 
-#       data_varassoc_score <- suppressWarnings(suppressMessages(Score_VariableAssociation(data=subset_data,
-#                                                                                          metadata=subset_metadata,
-#                                                                                          cols=c("AGE"), # SMRIN was a variable for batch correction
-#                                                                                          method=method,
-#                                                                                          gene_set = signature,
-#                                                                                          mode="simple", printplt = F)$Overall))
-# 
-# 
-#       df_permutations <- data.frame(NULL)
-#       for (j in 1:nperm){
-#         set.seed(j)
-#         metadata_subset_shuffleAGE <- subset_metadata
-#         metadata_subset_shuffleAGE$AGE <- sample(metadata_subset_shuffleAGE$AGE)
-# 
-#         cohend_shuffle <- suppressWarnings(suppressMessages(Score_VariableAssociation(data=subset_data[,metadata_subset_shuffleAGE$SAMPID],
-#                                                                                            metadata=metadata_subset_shuffleAGE,
-#                                                                                            cols=c("AGE"), # SMRIN was a variable for batch correction
-#                                                                                            method=method,
-#                                                                                            gene_set = signature,
-#                                                                                            mode="simple", printplt = F)$Overall))
-# 
-# 
-#         cohend_shuffle$signature <- sig
-#         cohend_shuffle$method <- method
-#         cohend_shuffle$tissue <- tissue
-#         df_permutations <- rbind(df_permutations, cohend_shuffle)
-# 
-#       }
-# 
-#       # calculateFPR
-# 
-#       df_permutations$Cohen_f
-#       fpr <- sum(df_permutations$Cohen_f > data_varassoc_score$Cohen_f)/length(df_permutations$Cohen_f)
-# 
-# 
-#       data_varassoc_score$signature <- sig
-#       data_varassoc_score$method <- method
-#       data_varassoc_score$tissue <- tissue
-#       data_varassoc_score$fpr <- fpr
-#       results_df_score <- rbind(results_df_score, data_varassoc_score)
-# 
-# 
-# 
-# 
-#     }
-# 
-#   }
-# 
-# }
-# 
-# # Close the progress bar
-# close(pb)
-# 
-# 
-# saveRDS( results_df_gsea,"results_df_gsea_GTEx_FPR.rds")
-# saveRDS( results_df_score,"results_df_score_GTEx_FPR.rds") # same results for t-test's p value and 1000 perms
-```
-
-
-```{r}
-results_df_score <- readRDS("../data/results_df_score_GTEx_FPR.rds")
-results_df_gsea <- readRDS("../data/results_df_gsea_GTEx_FPR.rds")
-
-
-results_df_gsea <- results_df_gsea %>%
-  group_by(signature) %>%
-  mutate(padj = p.adjust(pval, method = "BH")) %>%
-  ungroup()
-
-results_df_score <- results_df_score %>%
-  group_by(signature, method) %>%
-  mutate(P_adj = p.adjust(fpr, method = "BH")) %>%
-  ungroup()
-
-
-# Alphabetically order tissue factor
-tissue_levels <- sort(unique(results_df_score$tissue), decreasing = T)
-results_df_score$tissue <- factor(results_df_score$tissue, levels = tissue_levels)
-results_df_gsea$tissue  <- factor(results_df_gsea$tissue, levels = tissue_levels)
-
-results_df_gsea
-results_df_score
-```
-
-
-
-```{r fig.width=6, fig.height=10}
-
-
-# Ensure correct factor levels for method and tissue
-results_df_score$method <- factor(results_df_score$method, levels = c("logmedian", "ranking", "ssGSEA"))
-results_df_gsea$method <- "GSEA"  # single method name
-
-# Add asterisk for significance
-results_df_score <- results_df_score %>%
-  mutate(signif = ifelse(P_adj < 0.05, "*", ""))
-
-results_df_gsea <- results_df_gsea %>%
-  mutate(signif = ifelse(padj < 0.05, "*", ""))
-
-# Loop through each signature
-sigs <- unique(results_df_score$signature)
-
-list_plts_sigs <- list()
-for (sig in sigs) {
-  # Subset for this signature
-  score_data <- results_df_score %>% filter(signature == sig)
-  gsea_data  <- results_df_gsea %>% filter(signature == sig)
-  
-  # Create score plot
-  p_score <- ggplot(score_data, aes(x = method, y = tissue, fill = Cohen_f)) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#49B0AB", name = "Cohen's f", limits=c(0,0.4)) +
-    theme_minimal(base_size = 12) +
-    #labs(title = "Score") +
-    theme(axis.title = element_blank(),
-          axis.text.x = element_text(angle = 45, hjust = 1, size=12),
-          axis.text.y = element_text(size=12)) +
-    # center main title of the plot
-    theme( plot.title = element_text(hjust = 0.5))
-  
-  # Create GSEA plot
-  p_gsea <- ggplot(gsea_data, aes(x = method, y = tissue, fill = abs(NES))) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#B04975", name = "|NES|", limits=c(0,2.5)) +
-    theme_minimal(base_size = 12) +
-    #labs(title = "Enrichment") +
-    theme(axis.title = element_blank(),
-          axis.text.y = element_blank(),
-          axis.text.x = element_text(angle = 45, hjust = 1, size=12),
-          axis.ticks.y = element_blank(), 
-          plot.title = element_text(hjust = 0.5 ))
-  
-  # Combine the two plots side-by-side
-  combined_plot <- p_score + p_gsea + plot_layout(ncol = 2, guides = "collect", widths = c(0.75,0.25)) & theme(legend.position = "right") 
-  
-  # include title with signature name
-  combined_plot <- combined_plot #+  
-  #plot_annotation(title = paste("                     ", sig), theme = theme(plot.title = element_text(hjust = 0.5, size = 16)))
-  
-  list_plts_sigs[[sig]] <- combined_plot
-  # Print the plot 
-}
-
-
-plt_gtex_scores_HS <- list_plts_sigs[["HernandezSegura"]]
-plt_gtex_scores_HS
- 
-```
-
-
-```{r}
-tissues_signif_HS <- c("Artery - Aorta","Breast - Mammary Tissue", "Cells - Cultured fibroblasts","Thyroid")
-tissues_signif_SenMayo <- c("Artery - Tibial", "Brain - Anterior cingulate cortex (BA24)", "Brain - Hippocampus", "Colon - Sigmoid", "Minor Salivary Gland", "Muscle - Skeletal", "Nerve - Tibial", "Prostate")
-
-```
-
-
-```{r fig.width=6, fig.height=8}
-
-methods <- c("logmedian","ranking","ssGSEA")
-pltlist <- list()
-
-for (tissue in tissues_signif_HS){
-  
-  subset_metadata <- metadata_GTEx_alltissues[metadata_GTEx_alltissues$SMTSD == tissue,]
-  subset_data <- GTEx_alltissues[,subset_metadata$SAMPID]
-  
-  pltlist_aux <- list()
-  
-  for (method in methods){ 
-    
-    scores_df <- CalculateScores(data = subset_data,
-                                 metadata = subset_metadata,
-                                 gene_sets = list(HernandezSegura=signatures_bidirectional$HernandezSegura), method = method)
-     
-    
-    pltlist_aux[[method]] <- ggplot(scores_df$HernandezSegura, aes(x=AGE, y=score)) +
-      geom_jitter(size=2, color="#8F95B1")+ # to preserve the donor's age; correlation was calculated with "real" data
-      geom_density2d( colour="white", size=0.3) +
-      ggplot2::geom_smooth(method = "lm", col = "black", se = FALSE, size=1.3)+ ggpubr::stat_cor(aes(label = ..r.label..),  
-                                                                                                 label.x = 20, size=4) +  
-    xlab("Age (years)") + ylab("Score") +
-      theme_bw() +
-      ggtitle(method) +
-      #center title 
-      theme(plot.title = element_text(hjust = 0.5 ),
-            base_size = 16,
-            axis.text.x = element_text( size=12),
-            axis.text.y = element_text(size=12),
-            axis.title.x = element_text(size=14),
-            axis.title.y = element_text(size=14)) 
-  }
-  
-  pltlist[[tissue]] <- ggarrange(plotlist = pltlist_aux, ncol = 3, nrow = 1, common.legend = TRUE) +
-    ggtitle( tissue)  + theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))
-  
-  
-}
-
-plt_scoredistribution_HS_GTEx <- ggarrange(plotlist = pltlist, ncol = 1, common.legend = TRUE, legend = "bottom")  
-plt_scoredistribution_HS_GTEx
-```
-
- 
-
-
-
-
-```{r fig.width=12, fig.height=13}
-plt_row1 <- ggarrange(plt_gtex_score_alltissues, plt_densities_celltype_HS, widths=c(0.75,0.25), ncol=2, labels=c("A","B"), vjust = 1 )
-plt_row2 <- ggarrange(plt_gtex_scores_HS,plt_scoredistribution_HS_GTEx, widths=c(0.5,0.5), ncol=2, labels=c("C","D"))  
-(fig7 <- ggarrange(plt_row1,plt_row2, heights=c(0.3,0.63), ncol=1))
-
-```
-
-```{r}
- 
-ggplot2::ggsave("../Figures/Figs/Figure7_GTEx.png",
-                fig7,
-                width = 13, height=14, bg = 'white')
-```
-
-
-
-
-
-
-
-
- 
-
diff --git a/inst/Paper/Figures/Figures_Paper_Supplementary.Rmd b/inst/Paper/Figures/Figures_Paper_Supplementary.Rmd
deleted file mode 100644
index 4332c48..0000000
--- a/inst/Paper/Figures/Figures_Paper_Supplementary.Rmd
+++ /dev/null
@@ -1,2612 +0,0 @@
----
-title: "Supplementary Paper Figures"
-author: "Rita Martins-Silva"
-date: "05/05/2025"
-output: html_document
----
-
-#  {.tabset .tabset-pills}
-
-## Set up {.tabset}
- 
-
-### Libraries
-
-```{r}
-library("ggplot2")
-library("colorspace")
-library("scales")
-library("scater") 
-library("reshape2")
-library("data.table")
-library("edgeR")
-```
-
-
-### Functions
-
-```{r}
-wrap_title <- function(title, width = 30) {
-  if (nchar(title) <= width) {
-    return(title)  # No need to wrap if it fits
-  }
-
-  wrapped_title <- ""
-  while (nchar(title) > width) {
-    # Find positions of capital letters and symbols near the wrap point
-    capital_pos <- gregexpr("[A-Z]", title)[[1]]
-    symbol_pos <- gregexpr("(_|-|:|\\+|\\\\|/|\\*|\\.|,|;|\\?|!)", title)[[1]]
-
-    # Check for symbol breaks within the last few characters (width - 5 to width)
-    valid_symbol_breaks <- symbol_pos[symbol_pos >= (width - 5) & symbol_pos <= width]
-
-    if (length(valid_symbol_breaks) > 0) {
-      # If a suitable symbol is found, break at the first valid symbol
-      break_at <- valid_symbol_breaks[1]
-    } else {
-      # If no suitable symbol, look for capital letters within the same range
-      valid_capital_breaks <- capital_pos[capital_pos >= (width - 5) & capital_pos <= width]
-
-      if (length(valid_capital_breaks) > 0) {
-        # If a capital letter is found, break just before the capital letter
-        break_at <- valid_capital_breaks[1] - 1
-      } else {
-        # If no suitable symbol or capital letter, break at width
-        break_at <- width
-      }
-    }
-
-    # Append the wrapped line
-    wrapped_title <- paste0(wrapped_title, substr(title, 1, break_at), "\n")
-
-    # Update title with the remaining text after the break
-    title <- substr(title, break_at + 1, nchar(title))
-  }
-
-  # Add the remaining part of the title
-  wrapped_title <- paste0(wrapped_title, title)
-
-  return(wrapped_title)
-}
-```
-
-
-```{r}
-PlotScores_Categorical_adapted <- function(data, metadata, gene_sets,
-                                           method = c("ssGSEA", "logmedian", "ranking"),
-                                           ColorVariable = NULL, GroupingVariable = NULL,
-                                           ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                                           widthTitle = 10, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 2,
-                                           xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                                           widthlegend=22, cohen_threshold=0.6, colorPalette="Set3") {
-  
-  method <- match.arg(method)
-  
-  ResultsList <- CalculateScores(data = data,
-                                 metadata = metadata,
-                                 gene_sets = gene_sets,
-                                 method = method)
-  
-  # if grouping variable is NULL, then the function displays a density / distribution of scores
-  if (is.null(GroupingVariable) | is.null(metadata)) {
-    
-    plot_list <- list()
-    
-    for (signature in names(ResultsList)) {
-      
-      df <- ResultsList[[signature]]
-      # Wrap the signature name using the helper function
-      wrapped_title <- wrap_title(signature, width = widthTitle)
-      
-      ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
-      
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
-        ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
-        ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density") +
-        # add points below density
-        ggplot2::geom_rug(ggplot2::aes(x = score), color=ColorValues, sides = "b",  alpha = 0.8, size = .5, length = grid::unit(0.035, "npc"))
-      
-      # Customize the plot appearance.
-      p <- p + ggplot2::theme_classic() +
-        ggplot2::labs(title = wrapped_title, color = "", x = "", y = "") +
-        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize - .5),
-                       axis.text.y = ggplot2::element_text(  size = labsize - .5),
-                       plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                       plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic"),
-                       legend.position="none")
-      
-      # If limits is specified, crop the plot without adjusting the data (violins).
-      if (!is.null(limits)) {
-        p <- p + ggplot2::coord_cartesian(xlim = limits)
-      }
-      
-      plot_list[[signature]] <- p
-      
-    }
-    
-    n <- length(plot_list)
-    
-    # Determine grid layout
-    if (is.null(ncol) && is.null(nrow)) {
-      ncol <- ceiling(sqrt(n))
-      nrow <- ceiling(n / ncol)
-    } else if (is.null(ncol)) {
-      ncol <- ceiling(n / nrow)
-    } else if (is.null(nrow)) {
-      nrow <- ceiling(n / ncol)
-    }
-    
-    # create label for y axis
-    if (method == "ssGSEA") {
-      xlab <- "ssGSEA Enrichment Score"
-    } else if (method == "logmedian") {
-      xlab <- "Normalized Signature Score"
-    } else if (method == "ranking") {
-      xlab <- "Signature Genes' Ranking"
-    }
-    
-    combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow,  align = "h")
-    combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                             left = grid::textGrob("Density",
-                                                                   rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    return(combined_plot)
-  }
-  
-  if (!(GroupingVariable %in% colnames(metadata)))
-    stop(paste0(GroupingVariable, " not in metadata columns. Please check metadata."))
-  
-  # Initialize an empty list to store individual ggplot objects.
-  plot_list <- list()
-  
-  # Loop over each gene signature in the ResultsList.
-  for (signature in names(ResultsList)) {
-    # Extract the data frame for the current signature.
-    df <- ResultsList[[signature]]
-    
-    # Using factors so we can retrieve the first condition for Cohen's d if none is specified.
-    df[, GroupingVariable] <- factor(df[, GroupingVariable],
-                                     levels = sort(unique(as.character(df[, GroupingVariable]))))
-    
-    # Wrap the signature name using the helper function.
-    wrapped_title <- wrap_title(signature, width = widthTitle)
-    
-    # Create a base ggplot object with the specified grouping on the x-axis and score on the y-axis.
-    p <- ggplot2::ggplot(df, ggplot2::aes_string(x = GroupingVariable, y = "score"))
-    
-    # Add jittered points, optionally colored by ColorVariable.
-    if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::geom_jitter(ggplot2::aes_string(color = ColorVariable), size = pointSize, alpha = 0.5)
-    } else {
-      p <- p + ggplot2::geom_jitter(size = pointSize, alpha = 0.5) +
-        ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # Overlay violin plots.
-    p <- p + ggplot2::geom_violin(alpha = 0.5, scale = "width")
-    
-    # Add median summary crossbar.
-    p <- p + ggplot2::stat_summary(fun = median, fun.min = median, fun.max = median,
-                                   geom = "crossbar", width = 0.25,
-                                   position = ggplot2::position_dodge(width = 0.13))
-    
-    # Add stats: Compute Cohen's d (and optionally p‑value)
-    if(compute_cohen){
-      if (!is.null(cond_cohend)){
-        # can be of the following form:
-        # cond_cohend <- list(A=c("Senescent"),
-        #                     B=c("Proliferative","Quiescent"))
-        
-        if (sum(unlist(cond_cohend) %in% unique(df[, GroupingVariable])) != length(unique(df[, GroupingVariable])))
-          warning("Warning: Not all conditions of GroupingVariable were specified for Cohen's d calculation")
-        
-        x <- df[df[[GroupingVariable]] %in% cond_cohend[[1]], "score", drop = TRUE]
-        y <- df[df[[GroupingVariable]]  %in% cond_cohend[[2]], "score", drop = TRUE]
-        
-        cohen_d_results <- cohen_d(x, y)
-        
-        # df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-        # cohen_d_results <- rstatix::cohens_d(df, formula = score ~ cohen)
-        
-        if (pvalcalc) {
-          df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-          ttest_results <- rstatix::t_test(df, formula = score ~ cohen)
-          p_val <- ttest_results$p[1]
-          line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-          line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-          subtitle <- paste(line1, line2, sep = "\n")
-        } else {
-          subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-        }
-        
-        
-      } else {
-        
-        if(length(unique(df[, GroupingVariable])) < 2){
-          
-          warning("Not enough conditions available to report Cohen's d.")
-          
-        } else if(length(unique(df[, GroupingVariable])) == 2) {
-          
-          # Calculate Cohen's d based on ordering of the x axis
-          group1 <- levels(df[, GroupingVariable])[1]
-          group2 <- levels(df[, GroupingVariable])[2]
-          
-          x <- df[df[[GroupingVariable]] == group1, "score", drop = TRUE]
-          y <- df[df[[GroupingVariable]] == group2, "score", drop = TRUE]
-          
-          cohen_d_results <- cohen_d(x, y)
-          
-          if (pvalcalc) {
-            ttest_results <- rstatix::t_test(df, formula = score ~ GroupingVariable)
-            p_val <- ttest_results$p[1]
-            line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-          }
-          
-          
-          
-        } else if(length(unique(df[, GroupingVariable])) > 2){
-          
-          # Calculate Cohen's f
-          type <- identify_variable_type(df, GroupingVariable)[GroupingVariable]
-          #Without scaling, the coefficient represents the change in score per unit increase in the variable (if numeric, the unit of the variable. Makes sense to not scale...)
-          model <- lm(score ~ get(GroupingVariable), data = df)
-          results_var <- compute_cohens_f_pval(model, type)
-          
-          
-          if (pvalcalc) {
-            line1 <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(results_var["P_Value"], 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-          }
-          
-        }
-        
-        
-      }
-      
-    } else {
-      
-      subtitle <- NULL
-      
-    }
-    # If ConnectGroups is TRUE, add a line connecting medians across groups.
-    if (ConnectGroups && !is.null(ColorVariable)) {
-      p <- p + ggplot2::stat_summary(ggplot2::aes_string(group = ColorVariable, color = ColorVariable),
-                                     fun.y = median, geom = "line", size = 1.5, alpha = 0.75,
-                                     show.legend = FALSE)
-    }
-    
-    # Customize the plot appearance.
-    p <- p + ggplot2::theme_bw() +
-      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize),
-                     axis.text.y = ggplot2::element_text(  size = labsize),
-                     plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                     plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic")) +
-      ggplot2::labs(title = wrapped_title, subtitle = subtitle, color = "", x = "", y = "")
-    
-    # If ColorValues is provided, use a manual color scale; otherwise, if ColorVariable is provided,
-    # use a default brewer palette.
-    if (!is.null(ColorValues)) {
-      p <- p + ggplot2::scale_color_manual(values = ColorValues)
-    } else if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # If limits is specified, crop the plot without adjusting the data (violins).
-    if (!is.null(limits)) {
-      p <- p + ggplot2::coord_cartesian(ylim = limits)
-    }
-    
-    # Adjust legend rows if legend_nrow is specified.
-    if (!is.null(legend_nrow)) {
-      p <- p + ggplot2::guides(color = ggplot2::guide_legend(nrow = legend_nrow))
-    }
-    
-    # Store the plot in the list.
-    plot_list[[signature]] <- p + theme(legend.position = "none")
-  }
-  
-  n <- length(plot_list)
-  
-  # Determine grid layout.
-  if (is.null(ncol) && is.null(nrow)) {
-    ncol <- ceiling(sqrt(n))
-    nrow <- ceiling(n / ncol)
-  } else if (is.null(ncol)) {
-    ncol <- ceiling(n / nrow)
-  } else if (is.null(nrow)) {
-    nrow <- ceiling(n / ncol)
-  }
-  
-  # Combine plots.
-  combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow , align = "h") #, common.legend = TRUE
-  
-  # Annotate with axis labels.
-  if (is.null(xlab)) {
-    xlab <- GroupingVariable
-  }
-  
-  if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
-  
-  # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "ssGSEA Enrichment Score"
-  } else if (method == "logmedian") {
-    ylab <- "Normalized Signature Score"
-  } else if (method == "ranking") {
-    ylab <- "Signature Genes' Ranking"
-  }
-  
-  combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
-                                                                 rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize)))
-  return(combined_plot)
-}
-```
-
-```{r}
-# adapted to not have the color legend
-
-PlotScores_adapted <- function(data, metadata, gene_sets,
-                               method = c("ssGSEA", "logmedian", "ranking", "all"),
-                               ColorVariable = NULL, Variable = NULL,
-                               ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                               widthTitle = 20, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 4,
-                               xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                               widthlegend=22, sig_threshold=0.05, cohen_threshold=0.5, colorPalette="Set3", cor=c("pearson","spearman","kendall")) {
-  
-  method <- match.arg(method)
-  
-  type <- identify_variable_type(metadata, Variable)#[Variable]
-  
-  if (method == "all") { # returns heatmap
-    
-    if (type =="Numeric"){
-      
-      cohenlist <- CohenF_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable )
-      
-    } else {
-      
-      cohenlist <- CohenD_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable, mode = mode)
-      
-    }
-    
-    # if user wants "all" methods, a heatmap of Cohen's d's is returned, for all combination of variables in GroupingVariable
-    Heatmap_Final <- Heatmap_Cohen(cohenlist = cohenlist,
-                                   nrow = nrow,
-                                   ncol = ncol,
-                                   limits = limits,
-                                   widthTitle = widthTitle,
-                                   titlesize = titlesize,
-                                   ColorValues = ColorValues,
-                                   title = title )
-    
-    Volcano_Cohen <- Volcano_Cohen(cohenlist = cohenlist,
-                                   titlesize = 12,
-                                   ColorValues = ColorValues,
-                                   title = title,
-                                   widthlegend = widthlegend,
-                                   pointSize = pointSize,
-                                   sig_threshold = sig_threshold,
-                                   cohen_threshold = cohen_threshold,
-                                   colorPalette =colorPalette,
-                                   ncol = ncol,
-                                   nrow = nrow)
-    
-    return(list(heatmap=Heatmap_Final$plt,
-                volcano=Volcano_Cohen$plt))
-    
-  } else {
-    
-    
-    
-    if (type!="Numeric"){
-      
-      return(
-        
-        PlotScores_Categorical_adapted(data=data, metadata=metadata, gene_sets=gene_sets,
-                                       method = method,
-                                       ColorVariable = ColorVariable, GroupingVariable = Variable,
-                                       ColorValues = ColorValues, ConnectGroups = ConnectGroups, ncol = ncol, nrow = nrow, title = title,
-                                       widthTitle = widthTitle, titlesize = titlesize, limits = limits, legend_nrow = legend_nrow, pointSize = pointSize,
-                                       xlab = xlab, labsize = labsize, compute_cohen=compute_cohen, cond_cohend = cond_cohend, pvalcalc = pvalcalc, mode = mode,
-                                       widthlegend=widthlegend, cohen_threshold=cohen_threshold, colorPalette=colorPalette)
-        
-      )
-      
-    } else {
-      
-      return(
-        
-        PlotScores_Numeric(data=data,
-                           metadata=metadata,
-                           gene_sets=gene_sets,
-                           method = method,
-                           Variable = Variable,
-                           ColorValues = ColorValues,
-                           ncol = ncol,
-                           nrow = nrow,
-                           title = title,
-                           widthTitle = widthTitle,
-                           titlesize = titlesize,
-                           limits = limits,
-                           pointSize = pointSize,
-                           xlab = xlab,
-                           labsize = labsize,
-                           compute_cohen = compute_cohen,
-                           pvalcalc = pvalcalc,
-                           colorPalette = colorPalette,
-                           cor=cor)
-        
-      )
-      
-    }
-    
-    
-  }
-  
-  
-}
-```
-
-```{r}
-PlotScores_Numeric_adapted <- function(data,
-                                       metadata,
-                                       gene_sets,
-                                       method = c("ssGSEA", "logmedian", "ranking"),
-                                       Variable = NULL,
-                                       ColorValues = NULL,
-                                       ncol = NULL,
-                                       nrow = NULL,
-                                       title = NULL,
-                                       widthTitle = 10,
-                                       titlesize = 12,
-                                       limits = NULL,
-                                       pointSize = 2,
-                                       xlab = NULL,
-                                       labsize = 10,
-                                       compute_cohen = TRUE,
-                                       pvalcalc = FALSE,
-                                       colorPalette = "Set3",
-                                       cor = c("pearson","spearman","kendall")) {
-  
-  method <- match.arg(method)
-  
-  
-  ResultsList <- CalculateScores(data = data,
-                                 metadata = metadata,
-                                 gene_sets = gene_sets,
-                                 method = method)
-  
-  
-  # if grouping variable is NULL, then the function displays a density / distribution of scores
-  if (is.null(Variable) | is.null(metadata)) {
-    
-    plot_list <- list()
-    
-    for (signature in names(ResultsList)) {
-      
-      df <- ResultsList[[signature]]
-      # Wrap the signature name using the helper function
-      wrapped_title <- wrap_title(signature, width = widthTitle)
-      
-      ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
-      
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
-        ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
-        ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density")
-      
-      # Customize the plot appearance.
-      p <- p + ggplot2::theme_classic() +
-        ggplot2::labs( color = "", x = "", y = "") +
-        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize - .5),
-                       axis.text.y = ggplot2::element_text(  size = labsize - .5),
-                       plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                       plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic"))
-      
-      
-      
-      # If limits is specified, crop the plot without adjusting the data (violins).
-      if (!is.null(limits)) {
-        p <- p + ggplot2::coord_cartesian(xlim = limits)
-      }
-      
-      plot_list[[signature]] <- p
-      
-    }
-    
-    n <- length(plot_list)
-    
-    # Determine grid layout
-    if (is.null(ncol) && is.null(nrow)) {
-      ncol <- ceiling(sqrt(n))
-      nrow <- ceiling(n / ncol)
-    } else if (is.null(ncol)) {
-      ncol <- ceiling(n / nrow)
-    } else if (is.null(nrow)) {
-      nrow <- ceiling(n / ncol)
-    }
-    
-    # create label for y axis
-    if (method == "ssGSEA") {
-      xlab <- "ssGSEA Enrichment Score"
-    } else if (method == "logmedian") {
-      xlab <- "Normalized Signature Score"
-    } else if (method == "ranking") {
-      xlab <- "Signature Genes' Ranking"
-    }
-    
-    combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, common.legend = TRUE, align = "h")
-    combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                             left = grid::textGrob("Density",
-                                                                   rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    return(combined_plot)
-  }
-  
-  if (!(Variable %in% colnames(metadata)))
-    stop(paste0(Variable, " not in metadata columns. Please check metadata."))
-  
-  # Initialize an empty list to store individual ggplot objects.
-  plot_list <- list()
-  
-  # Loop over each gene signature in the ResultsList.
-  for (signature in names(ResultsList)) {
-    # Extract the data frame for the current signature.
-    df <- ResultsList[[signature]]
-    #
-    #       # Using factors so we can retrieve the first condition for Cohen's d if none is specified.
-    #       df[, Variable] <- factor(df[, Variable],
-    #                                        levels = sort(unique(as.character(df[, Variable]))))
-    
-    # Wrap the signature name using the helper function.
-    wrapped_title <- wrap_title(signature, width = widthTitle)
-    
-    # Create a base ggplot object with the specified grouping on the x-axis and score on the y-axis.
-    p <- ggplot2::ggplot(df, ggplot2::aes_string(x = Variable, y = "score"))
-    
-    #add points
-    # If ColorValues is provided, use a manual color scale;
-    # use a default brewer palette.
-    ColorValues <- if (is.null(ColorValues)) "#5264B6" else ColorValues
-    p <- p + ggplot2::geom_point(size = pointSize, alpha = 0.5, color=ColorValues[1])+
-      # add density lines
-      geom_density2d( colour="white")
-    
-    # Add  line
-    p <- p + ggplot2::geom_smooth(method = "lm", col = "black", se = FALSE, size=2) + ggpubr::stat_cor(method=cor) # cor in "pearson" (default), "kendall", or "spearman".
-    
-    # Add stats: Compute Cohen's f (and optionally p‑value)
-    if(compute_cohen){
-      
-      # Calculate Cohen's f
-      type <- identify_variable_type(df, Variable)[Variable]
-      #Without scaling, the coefficient represents the change in score per unit increase in the variable (if numeric, the unit of the variable. Makes sense to not scale...)
-      model <- lm(score ~ get(Variable), data = df)
-      results_var <- compute_cohens_f_pval(model, type)
-      
-      if (pvalcalc) {
-        line1 <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-        line2 <- wrap_title(paste0("p = ", round(results_var["P_Value"], 3)), width = widthTitle)
-        subtitle <- paste(line1, line2, sep = "; ")
-        
-      } else {
-        subtitle <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-      }
-      
-    } else {
-      subtitle <- NULL
-    }
-    
-    
-    # Customize the plot appearance.
-    p <- p + ggplot2::theme_bw() +
-      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = labsize),
-                     axis.text.y = ggplot2::element_text(  size = labsize),
-                     plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                     plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic")) +
-      ggplot2::labs(title =  subtitle, color = "", x = "", y = "") 
-    
-    
-    
-    # If limits is specified, crop the plot without adjusting the data (violins).
-    if (!is.null(limits)) {
-      p <- p + ggplot2::coord_cartesian(ylim = limits)
-    }
-    
-    # Store the plot in the list.
-    plot_list[[signature]] <- p
-  }
-  
-  n <- length(plot_list)
-  
-  # Determine grid layout.
-  if (is.null(ncol) && is.null(nrow)) {
-    ncol <- ceiling(sqrt(n))
-    nrow <- ceiling(n / ncol)
-  } else if (is.null(ncol)) {
-    ncol <- ceiling(n / nrow)
-  } else if (is.null(nrow)) {
-    nrow <- ceiling(n / ncol)
-  }
-  
-  # Combine plots.
-  combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow, common.legend = TRUE, align = "h")
-  
-  # Annotate with axis labels.
-  if (is.null(xlab)) {
-    xlab <- Variable
-  }
-  
-  if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
-  
-  # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "Score"
-  } else if (method == "logmedian") {
-    ylab <- "Score"
-  } else if (method == "ranking") {
-    ylab <- "Score"
-  }
-  
-  combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
-                                                                 rot = 90, vjust = 2 , gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                           bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize),vjust = -2 ),
-                                           top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize)))
-  return(combined_plot)
-  
-}
-```
-
-
-
-
-```{r}
-PlotScores_Categorical_adapted_noX <- function(data, metadata, gene_sets,
-                                           method = c("ssGSEA", "logmedian", "ranking"),
-                                           ColorVariable = NULL, GroupingVariable = NULL,
-                                           ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                                           widthTitle = 10, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 2,
-                                           xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                                           widthlegend=22, cohen_threshold=0.6, colorPalette="Set3") {
-  
-  method <- match.arg(method)
-  
-  ResultsList <- CalculateScores(data = data,
-                                 metadata = metadata,
-                                 gene_sets = gene_sets,
-                                 method = method)
-  
-  # if grouping variable is NULL, then the function displays a density / distribution of scores
-  if (is.null(GroupingVariable) | is.null(metadata)) {
-    
-    plot_list <- list()
-    
-    for (signature in names(ResultsList)) {
-      
-      df <- ResultsList[[signature]]
-      # Wrap the signature name using the helper function
-      wrapped_title <- wrap_title(signature, width = widthTitle)
-      
-      ColorValues <- if (is.null(ColorValues)) "#ECBD78" else ColorValues
-      
-      p <- ggplot2::ggplot(df, ggplot2::aes(x = score)) +
-        ggplot2::geom_density(fill = ColorValues, alpha = 0.5) +
-        ggplot2::labs(title = "Density Plot of Score", x = xlab, y = "Density") +
-        # add points below density
-        ggplot2::geom_rug(ggplot2::aes(x = score), color=ColorValues, sides = "b",  alpha = 0.8, size = .5, length = grid::unit(0.035, "npc"))
-      
-      # Customize the plot appearance.
-      p <- p + ggplot2::theme_classic() +
-        ggplot2::labs(title = wrapped_title, color = "", x = "", y = "") +
-        ggplot2::theme(axis.text.x = ggplot2::element_blank(),
-                       axis.text.y = ggplot2::element_text(  size = labsize - .5),
-                       plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                       plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic"),
-                       legend.position="top")+
-         guides(color = guide_legend(override.aes = list(size = 6)))
-      
-      # If limits is specified, crop the plot without adjusting the data (violins).
-      if (!is.null(limits)) {
-        p <- p + ggplot2::coord_cartesian(xlim = limits)
-      }
-      
-      plot_list[[signature]] <- p
-      
-    }
-    
-    n <- length(plot_list)
-    
-    # Determine grid layout
-    if (is.null(ncol) && is.null(nrow)) {
-      ncol <- ceiling(sqrt(n))
-      nrow <- ceiling(n / ncol)
-    } else if (is.null(ncol)) {
-      ncol <- ceiling(n / nrow)
-    } else if (is.null(nrow)) {
-      nrow <- ceiling(n / ncol)
-    }
-    
-    # create label for y axis
-    if (method == "ssGSEA") {
-      xlab <- "ssGSEA Enrichment Score"
-    } else if (method == "logmedian") {
-      xlab <- "Normalised Signature Score"
-    } else if (method == "ranking") {
-      xlab <- "Signature Genes' Ranking"
-    }
-    
-    combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow,  align = "h",common.legend = TRUE)
-    combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                             left = grid::textGrob("Density",
-                                                                   rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             bottom = grid::textGrob(xlab, gp = grid::gpar(cex = 1.3, fontsize = labsize)),
-                                             top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize + 2)))
-    return(combined_plot)
-  }
-  
-  if (!(GroupingVariable %in% colnames(metadata)))
-    stop(paste0(GroupingVariable, " not in metadata columns. Please check metadata."))
-  
-  # Initialize an empty list to store individual ggplot objects.
-  plot_list <- list()
-  
-  # Loop over each gene signature in the ResultsList.
-  for (signature in names(ResultsList)) {
-    # Extract the data frame for the current signature.
-    df <- ResultsList[[signature]]
-    
-    # Using factors so we can retrieve the first condition for Cohen's d if none is specified.
-    df[, GroupingVariable] <- factor(df[, GroupingVariable],
-                                     levels = sort(unique(as.character(df[, GroupingVariable]))))
-    
-    # Wrap the signature name using the helper function.
-    wrapped_title <- wrap_title(signature, width = widthTitle)
-    
-    # Create a base ggplot object with the specified grouping on the x-axis and score on the y-axis.
-    p <- ggplot2::ggplot(df, ggplot2::aes_string(x = GroupingVariable, y = "score"))
-    
-    # Add jittered points, optionally colored by ColorVariable.
-    if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::geom_jitter(ggplot2::aes_string(color = ColorVariable), size = pointSize, alpha = 0.5)
-    } else {
-      p <- p + ggplot2::geom_jitter(size = pointSize, alpha = 0.5) +
-        ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # Overlay violin plots.
-    p <- p + ggplot2::geom_violin(alpha = 0.5, scale = "width")
-    
-    # Add median summary crossbar.
-    p <- p + ggplot2::stat_summary(fun = median, fun.min = median, fun.max = median,
-                                   geom = "crossbar", width = 0.25,
-                                   position = ggplot2::position_dodge(width = 0.13))
-    
-    # Add stats: Compute Cohen's d (and optionally p‑value)
-    if(compute_cohen){
-      if (!is.null(cond_cohend)){
-        # can be of the following form:
-        # cond_cohend <- list(A=c("Senescent"),
-        #                     B=c("Proliferative","Quiescent"))
-        
-        if (sum(unlist(cond_cohend) %in% unique(df[, GroupingVariable])) != length(unique(df[, GroupingVariable])))
-          warning("Warning: Not all conditions of GroupingVariable were specified for Cohen's d calculation")
-        
-        x <- df[df[[GroupingVariable]] %in% cond_cohend[[1]], "score", drop = TRUE]
-        y <- df[df[[GroupingVariable]]  %in% cond_cohend[[2]], "score", drop = TRUE]
-        
-        cohen_d_results <- cohen_d(x, y)
-        
-        # df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-        # cohen_d_results <- rstatix::cohens_d(df, formula = score ~ cohen)
-        
-        if (pvalcalc) {
-          df$cohen <- ifelse(df[, GroupingVariable] %in% cond_cohend[[1]], names(cond_cohend)[1], names(cond_cohend)[2])
-          ttest_results <- rstatix::t_test(df, formula = score ~ cohen)
-          p_val <- ttest_results$p[1]
-          line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-          line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-          subtitle <- paste(line1, line2, sep = "\n")
-        } else {
-          subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results, 3)), width = widthTitle)
-        }
-        
-        
-      } else {
-        
-        if(length(unique(df[, GroupingVariable])) < 2){
-          
-          warning("Not enough conditions available to report Cohen's d.")
-          
-        } else if(length(unique(df[, GroupingVariable])) == 2) {
-          
-          # Calculate Cohen's d based on ordering of the x axis
-          group1 <- levels(df[, GroupingVariable])[1]
-          group2 <- levels(df[, GroupingVariable])[2]
-          
-          x <- df[df[[GroupingVariable]] == group1, "score", drop = TRUE]
-          y <- df[df[[GroupingVariable]] == group2, "score", drop = TRUE]
-          
-          cohen_d_results <- cohen_d(x, y)
-          
-          if (pvalcalc) {
-            ttest_results <- rstatix::t_test(df, formula = score ~ GroupingVariable)
-            p_val <- ttest_results$p[1]
-            line1 <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(p_val, 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's d = ", round(cohen_d_results$effsize, 3)), width = widthTitle)
-          }
-          
-          
-          
-        } else if(length(unique(df[, GroupingVariable])) > 2){
-          
-          # Calculate Cohen's f
-          type <- identify_variable_type(df, GroupingVariable)[GroupingVariable]
-          #Without scaling, the coefficient represents the change in score per unit increase in the variable (if numeric, the unit of the variable. Makes sense to not scale...)
-          model <- lm(score ~ get(GroupingVariable), data = df)
-          results_var <- compute_cohens_f_pval(model, type)
-          
-          
-          if (pvalcalc) {
-            line1 <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-            line2 <- wrap_title(paste0("p = ", round(results_var["P_Value"], 3)), width = widthTitle)
-            subtitle <- paste(line1, line2, sep = "\n")
-          } else {
-            subtitle <- wrap_title(paste0("Cohen's f = ", round(results_var["Cohen_f"], 3)), width = widthTitle)
-          }
-          
-        }
-        
-        
-      }
-      
-    } else {
-      
-      subtitle <- NULL
-      
-    }
-    # If ConnectGroups is TRUE, add a line connecting medians across groups.
-    if (ConnectGroups && !is.null(ColorVariable)) {
-      p <- p + ggplot2::stat_summary(ggplot2::aes_string(group = ColorVariable, color = ColorVariable),
-                                     fun.y = median, geom = "line", size = 1.5, alpha = 0.75,
-                                     show.legend = FALSE)
-    }
-    
-    # Customize the plot appearance.
-    p <- p + ggplot2::theme_bw() +
-      ggplot2::theme(axis.text.x = ggplot2::element_blank(),
-                     axis.text.y = ggplot2::element_text(  size = labsize),
-                     plot.title = ggplot2::element_text(hjust = 0.5, size = titlesize-1),
-                     plot.subtitle = ggplot2::element_text(hjust = 0.5, size = titlesize - 1.5, face = "italic")) +
-      ggplot2::labs(title = wrapped_title, subtitle = subtitle, color = "", x = "", y = "")  
-    
-    # If ColorValues is provided, use a manual color scale; otherwise, if ColorVariable is provided,
-    # use a default brewer palette.
-    if (!is.null(ColorValues)) {
-      p <- p + ggplot2::scale_color_manual(values = ColorValues)
-    } else if (!is.null(ColorVariable)) {
-      p <- p + ggplot2::scale_color_brewer(palette = colorPalette)
-    }
-    
-    # If limits is specified, crop the plot without adjusting the data (violins).
-    if (!is.null(limits)) {
-      p <- p + ggplot2::coord_cartesian(ylim = limits)
-    }
-    
-    # Adjust legend rows if legend_nrow is specified.
-    if (!is.null(legend_nrow)) {
-      p <- p + ggplot2::guides(color = ggplot2::guide_legend(nrow = legend_nrow, override.aes = list(size = pointSize + 2)))
-    }
-    
-    # Store the plot in the list.
-    plot_list[[signature]] <- p + theme(legend.position = "top")
-  }
-  
-  n <- length(plot_list)
-  
-  # Determine grid layout.
-  if (is.null(ncol) && is.null(nrow)) {
-    ncol <- ceiling(sqrt(n))
-    nrow <- ceiling(n / ncol)
-  } else if (is.null(ncol)) {
-    ncol <- ceiling(n / nrow)
-  } else if (is.null(nrow)) {
-    nrow <- ceiling(n / ncol)
-  }
-  
-  # Combine plots.
-  combined_plot <- ggpubr::ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow , align = "h",common.legend = TRUE) #, common.legend = TRUE
-  
-  # Annotate with axis labels.
-  if (is.null(xlab)) {
-    xlab <- GroupingVariable
-  }
-  
-  if (!is.null(title)) title <- wrap_title(title, width = widthTitle)
-  
-  # Create label for y axis based on method.
-  if (method == "ssGSEA") {
-    ylab <- "ssGSEA Enrichment Score"
-  } else if (method == "logmedian") {
-    ylab <- "Normalised Signature Score"
-  } else if (method == "ranking") {
-    ylab <- "Signature Genes' Ranking"
-  }
-  
-  combined_plot <- ggpubr::annotate_figure(combined_plot,
-                                           left = grid::textGrob(ylab,
-                                                                 rot = 90, vjust = 1, gp = grid::gpar(cex = 1.3, fontsize = labsize)), 
-                                           top = grid::textGrob(title, gp = grid::gpar(cex = 1.3, fontsize = titlesize)))
-  return(combined_plot)
-}
-```
-
-```{r}
-# adapted to not have the color legend
-
-PlotScores_adapted_noX <- function(data, metadata, gene_sets,
-                               method = c("ssGSEA", "logmedian", "ranking", "all"),
-                               ColorVariable = NULL, Variable = NULL,
-                               ColorValues = NULL, ConnectGroups = FALSE, ncol = NULL, nrow = NULL, title = NULL,
-                               widthTitle = 20, titlesize = 12, limits = NULL, legend_nrow = NULL, pointSize = 4,
-                               xlab = NULL, labsize = 10, compute_cohen=TRUE, cond_cohend = NULL, pvalcalc = FALSE, mode = c("simple","medium","extensive"),
-                               widthlegend=22, sig_threshold=0.05, cohen_threshold=0.5, colorPalette="Set3", cor=c("pearson","spearman","kendall")) {
-  
-  method <- match.arg(method)
-  
-  type <- identify_variable_type(metadata, Variable)#[Variable]
-  
-  if (method == "all") { # returns heatmap
-    
-    if (type =="Numeric"){
-      
-      cohenlist <- CohenF_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable )
-      
-    } else {
-      
-      cohenlist <- CohenD_allConditions(data = data, metadata = metadata, gene_sets = gene_sets, variable = Variable, mode = mode)
-      
-    }
-    
-    # if user wants "all" methods, a heatmap of Cohen's d's is returned, for all combination of variables in GroupingVariable
-    Heatmap_Final <- Heatmap_Cohen(cohenlist = cohenlist,
-                                   nrow = nrow,
-                                   ncol = ncol,
-                                   limits = limits,
-                                   widthTitle = widthTitle,
-                                   titlesize = titlesize,
-                                   ColorValues = ColorValues,
-                                   title = title )
-    
-    Volcano_Cohen <- Volcano_Cohen(cohenlist = cohenlist,
-                                   titlesize = 12,
-                                   ColorValues = ColorValues,
-                                   title = title,
-                                   widthlegend = widthlegend,
-                                   pointSize = pointSize,
-                                   sig_threshold = sig_threshold,
-                                   cohen_threshold = cohen_threshold,
-                                   colorPalette =colorPalette,
-                                   ncol = ncol,
-                                   nrow = nrow)
-    
-    return(list(heatmap=Heatmap_Final$plt,
-                volcano=Volcano_Cohen$plt))
-    
-  } else {
-    
-    
-    
-    if (type!="Numeric"){
-      
-      return(
-        
-        PlotScores_Categorical_adapted_noX(data=data, metadata=metadata, gene_sets=gene_sets,
-                                       method = method,
-                                       ColorVariable = ColorVariable, GroupingVariable = Variable,
-                                       ColorValues = ColorValues, ConnectGroups = ConnectGroups, ncol = ncol, nrow = nrow, title = title,
-                                       widthTitle = widthTitle, titlesize = titlesize, limits = limits, legend_nrow = legend_nrow, pointSize = pointSize,
-                                       xlab = xlab, labsize = labsize, compute_cohen=compute_cohen, cond_cohend = cond_cohend, pvalcalc = pvalcalc, mode = mode,
-                                       widthlegend=widthlegend, cohen_threshold=cohen_threshold, colorPalette=colorPalette)
-        
-      )
-      
-    } else {
-      
-      return(
-        
-        PlotScores_Numeric(data=data,
-                           metadata=metadata,
-                           gene_sets=gene_sets,
-                           method = method,
-                           Variable = Variable,
-                           ColorValues = ColorValues,
-                           ncol = ncol,
-                           nrow = nrow,
-                           title = title,
-                           widthTitle = widthTitle,
-                           titlesize = titlesize,
-                           limits = limits,
-                           pointSize = pointSize,
-                           xlab = xlab,
-                           labsize = labsize,
-                           compute_cohen = compute_cohen,
-                           pvalcalc = pvalcalc,
-                           colorPalette = colorPalette,
-                           cor=cor)
-        
-      )
-      
-    }
-    
-    
-  }
-  
-  
-}
-```
-
-
-
- 
-### Load relevant data
-
-```{r}
-metadata <- readRDS("../data/metadata.rds")
-counts <- readRDS("../data/counts.rds")
-filtered_counts <- readRDS("../data/filtered_counts.rds")
-normalised_counts <- readRDS("../data/normalised_counts.rds")
-corrcounts <- readRDS("../data/corrcounts.rds")
-signatures_bidirectional <- readRDS("../data/SenescenceSigntures_Bidirectional.rds")
-
-
-GTEx_alltissues <- readRDS("../data/GTExV8_voyagercorrected.rds") # https://github.com/DiseaseTranscriptomicsLab/voyAGEr/tree/main/Corrected_Counts
-metadata_GTEx_alltissues <- readRDS("../data/GTExV8_metadata.rds") # Restricted access
-``` 
-
-### Colors
-
-```{r}
-Condition_colors <- c(
-  "Senescent"     = "#F4A261",  # Soft orange (not goldenrod)
-  "Proliferative" = "#A1C298",  # Desaturated sage green
-  "Quiescent"     = "#9DB4C0"   # Dusty teal-gray
-)
-
-CellTypecols <- c(
-  "Fibroblast"   = "#E69F00",  # Orange
-  "Keratinocyte" = "#56B4E9",  # Sky Blue
-  "Melanocyte"   = "#009E73",  # Bluish Green
-  "Endothelial"  = "#F0E442",  # Yellow
-  "Neuronal"     = "#0072B2",  # Blue
-  "Mesenchymal"  = "#D55E00"   # Vermilion
-)
-
-CellTypecols_alt <- c(
-  "Fibroblast"   = "#1b9e77",  # Teal green
-  "Keratinocyte" = "#d95f02",  # Warm orange-brown
-  "Melanocyte"   = "#7570b3",  # Muted purple
-  "Endothelial"  = "#e7298a",  # Dark pink
-  "Neuronal"     = "#66a61e",  # Olive green
-  "Mesenchymal"  = "#e6ab02"   # Mustard
-)
-
-
-
-dataset_colors <- c(
-  "HernandezSegura" = "#8DD3C7",
-  "Mangelinck"      = "#FFFFB3",
-  "Chan"            = "#BEBADA",
-  "Purcell"         = "#FB8072",
-  "Marthandan2016"  = "#80B1D3",
-  "Marthandan2015"  = "#FDB462",
-  "Wang"            = "#B3DE69",
-  "Suda"            = "#FCCDE5",
-  "McHugh"          = "#D9D9D9",
-  "Savic"           = "#BC80BD",
-  "Skea"            = "#CCEBC5",
-  "Laurent"         = "#FFED6F",
-  "Tasdemir"        = "#D0B7E1",
-  "Lenain"          = "#FFB3BA",
-  "Aarts"           = "#C2C2F0",
-  "Casella"         = "#FFDAC1",
-  "Numa"            = "#C6E2FF",
-  "Admasu"          = "#F5CBA7",
-  "Urata"           = "#C7CEEA",
-  "Wang2023"        = "#E6B0AA",
-  "Fu"              = "#A9DFBF",
-  "Tanke"           = "#F9E79F",
-  "Bawens"          = "#AED6F1"
-)
-
-
-```
-
-```{r}
-show_col(Condition_colors, ncol=3)
-show_col(CellTypecols_alt)
-show_col(dataset_colors)
-```
- 
-## Supplementary Figure 1 - Senescence Datasets Processing
-
-Filtering lowly expressed genes
-
- 
-```{r fig.width=4, fig.height=4}
-# Average expression per gene per condition
-avg_expr_quiesc <- log10(rowMeans(counts[, metadata$Condition == "Quiescent"] + 1))
-avg_expr_prol   <- log10(rowMeans(counts[, metadata$Condition == "Proliferative"] + 1))
-avg_expr_sen    <- log10(rowMeans(counts[, metadata$Condition == "Senescent"] + 1))
-
-# Combine into dataframe
-df_avg_expr <- data.frame(expression = c(avg_expr_quiesc, avg_expr_prol, avg_expr_sen),
-                          Condition = rep(c("Quiescent", "Proliferative", "Senescent"),
-                                          each = nrow(counts)))
-
-# Plot
-(plt_beforefilt <- ggplot(df_avg_expr, aes(x = expression, fill = Condition)) +
-  geom_density(alpha = 0.7) +
-  theme_bw(base_size = 14) +
-  xlab("Mean log10(count + 1) per gene") +
-  ylab("Density") +
-  geom_vline(xintercept = log10(70), linetype = "dotted", color = "black", size = 1) +
-  scale_fill_manual(values = Condition_colors) +
-  theme(legend.position="bottom", legend.title = element_blank()))
- 
-# Average expression per gene per condition
-avg_expr_quiesc <- log10(rowMeans(filtered_counts[, metadata$Condition == "Quiescent"] + 1))
-avg_expr_prol   <- log10(rowMeans(filtered_counts[, metadata$Condition == "Proliferative"] + 1))
-avg_expr_sen    <- log10(rowMeans(filtered_counts[, metadata$Condition == "Senescent"] + 1))
-
-# Combine into dataframe
-df_avg_expr <- data.frame(expression = c(avg_expr_quiesc, avg_expr_prol, avg_expr_sen),
-                          Condition = rep(c("Quiescent", "Proliferative", "Senescent"),
-                                          each = nrow(filtered_counts)))
-
-# Plot
-(plt_afterfilt <- ggplot(df_avg_expr, aes(x = expression, fill = Condition)) +
-  geom_density(alpha = 0.7) +
-  theme_bw(base_size = 14) +
-  xlab("Mean log10(count + 1) per gene") +
-  ylab("Density") +
-  geom_vline(xintercept = log10(70), linetype = "dotted", color = "black", size = 1) +
-  scale_fill_manual(values = Condition_colors) +
-  theme(legend.position="bottom", legend.title = element_blank() ))
-
-```
-
-
-```{r fig.width=4, fig.height=6}
-# (filtplot <- ggarrange(plt_beforefilt,plt_afterfilt, ncol=1, common.legend = T))
-```
-
-Normalisation
- 
-```{r fig.width=10, fig.height=4}
-samples_ordered <- colnames(filtered_counts)[order(colSums(filtered_counts))]
-N <- 30
-
-filtcounts_ggplot <- stack(log2(filtered_counts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(filtcounts_ggplot) <- c("expression", "sampleID")
-filtcounts_ggplot <- merge(filtcounts_ggplot,metadata, by="sampleID")
-  
-
-(plt_beforenorm <- ggplot(filtcounts_ggplot, aes(sampleID,expression)) + 
-    geom_boxplot(aes(fill=DatasetID))+ 
-    xlab("Samples") + 
-    ylab("Gene Expression \n(log2(CPM))")    +  
-    theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw(base_size = 14)+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank(), legend.title = element_blank())  + 
-    scale_fill_manual(values = dataset_colors))
- 
-
-normcounts_ggplot <- stack(log2(normalised_counts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(normcounts_ggplot) <- c("expression", "sampleID")
-normcounts_ggplot <- merge(normcounts_ggplot,metadata, by="sampleID")
-  
-
-(plt_afternorm <- ggplot(normcounts_ggplot, aes(sampleID,expression)) + 
-    geom_boxplot(aes(fill=DatasetID))+ 
-    xlab("Samples") + 
-    ylab("Gene Expression \n(log2(CPM))")   +  
-    theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw(base_size = 14)+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank(), legend.title = element_blank()) + 
-    scale_fill_manual(values = dataset_colors))
-
-```
-
-
-```{r fig.width=8, fig.height=5}
-#(normplot <- ggarrange(plt_beforenorm,plt_afternorm,ncol=1, common.legend = T, legend="top"))
-```
-
-
-Batch effect correction
-
-(before)
-
-
-```{r fig.width=6, fig.height=5}
-y <- DGEList(log2(normalised_counts+1), samples= metadata)
-PCA_beforeBC  <- prcomp(t(y$counts), scale=FALSE, center=TRUE)
-PCACounts_beforeBC  <- PCA_beforeBC$x
-PCACounts_beforeBC  <- as.data.frame(PCACounts_beforeBC )
-PCAData_beforeBC <-  cbind(PCACounts_beforeBC[1:10],y$samples) 
-
-ev = PCA_beforeBC$sdev^2 
-pc1= round(100*ev[1]/sum(ev),2) 
-pc2 = round(100*ev[2]/sum(ev),2)  
-
-(PCA_beforeBEC_Dataset <- ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(fill=DatasetID), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = dataset_colors) +
-  theme( legend.title = element_blank())+ guides(fill=guide_legend(ncol=2,byrow=TRUE)))
-
-(PCA_beforeBEC_Conditon <- ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(fill=Condition), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = Condition_colors) +
-  theme( legend.title = element_blank()))
-
-(PCA_beforeBEC_CellType <- ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(fill=CellType), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = CellTypecols_alt) +
-  theme( legend.title = element_blank()))
- 
- 
-```
-
-Batch effect correction
-
-(after)
-
-
-
-```{r fig.width=6, fig.height=5}
-y <- DGEList(log2(corrcounts + 1), samples= metadata)
-PCA_afterBC  <- prcomp(t(y$counts), scale=FALSE, center=TRUE)
-PCACounts_afterBC  <- PCA_afterBC$x
-PCACounts_afterBC  <- as.data.frame(PCACounts_afterBC )
-PCAData_afterBC <-  cbind(PCACounts_afterBC[1:10],y$samples) 
-
-ev = PCA_afterBC$sdev^2 
-pc1= round(100*ev[1]/sum(ev),2) 
-pc2 = round(100*ev[2]/sum(ev),2)  
-
-(PCA_afterBEC_Dataset <- ggplot(PCAData_afterBC, aes(PC1,PC2))+
-  geom_point(aes(fill=DatasetID), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = dataset_colors) +
-  theme( legend.title = element_blank())+ guides(fill=guide_legend(ncol=3,byrow=TRUE)))
-
-(PCA_afterBEC_Conditon <- ggplot(PCAData_afterBC, aes(PC1,PC2))+
-  geom_point(aes(fill=Condition), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = Condition_colors) +
-  theme( legend.title = element_blank()))
-
-(PCA_afterBEC_CellType <- ggplot(PCAData_afterBC, aes(PC1,PC2))+
-  geom_point(aes(fill=CellType), size=5, alpha=0.6, shape=21)+ 
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw(base_size = 14)+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted") + 
-    scale_fill_manual(values = CellTypecols_alt) +
-  theme( legend.title = element_blank()))
- 
- 
-```
-
-
-```{r}
-
-corrcounts_ggplot <- stack(log2(corrcounts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(corrcounts_ggplot) <- c("expression", "sampleID")
-corrcounts_ggplot <- merge(corrcounts_ggplot,metadata, by="sampleID")
-  
-
-(plt_afterBEC <- ggplot(corrcounts_ggplot, aes(sampleID,expression)) + 
-    geom_boxplot(aes(fill=DatasetID))+ 
-    xlab("Samples") + 
-    ylab("Gene Expression \n(log2(CPM))")   +  
-    theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw(base_size = 14)+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank(), legend.title = element_blank(),
-        legend.position = "bottom") + 
-    scale_fill_manual(values = dataset_colors))
-```
-
-
-
-```{r fig.width=4, fig.height=4}
-
-df_variance_corrcounts <- readRDS("/mnt/scratch/home/rsilva/VersionControl/markeR/markeR/inst/Paper/data/df_variance_corrcounts.rds")
-df_variance_normcounts <- readRDS("/mnt/scratch/home/rsilva/VersionControl/markeR/markeR/inst/Paper/data/df_variance_normcounts.rds")
-vars <- c("DatasetID","Condition","CellType")
-
-(var_beforeBEC <- ggplot(df_variance_normcounts, aes(x = reorder(Variable, -VarianceExplained), y = VarianceExplained, fill = Variable)) +
-    geom_bar(stat = "identity") +
-    ylab("Mean Variance Explained (%)") +
-    xlab("") +
-    theme_minimal(base_size = 14) +
-    theme(plot.title = element_text(hjust = 0.5),
-          legend.position="bottom",
-          legend.title=element_blank()) +
-  scale_fill_manual(values = c("#F4C430", "#E07A5F", "#3D84A8") )+
-    guides(fill = guide_legend(title = "Variable"))+
-    ylim(c(0,100)) + 
-    theme(legend.position = "none"))
-
-
-(var_afterBEC <- ggplot(df_variance_corrcounts, aes(x = reorder(Variable, -VarianceExplained), y = VarianceExplained, fill = Variable)) +
-    geom_bar(stat = "identity") + 
-    ylab("Mean Variance Explained (%)") +
-    xlab("") +
-    theme_minimal(base_size = 14) +
-    theme(plot.title = element_text(hjust = 0.5),
-          legend.position="bottom",
-          legend.title=element_blank()) +
-    scale_fill_manual(values = c("#F4C430", "#E07A5F", "#3D84A8")) +
-    guides(fill = guide_legend(title = "Variable"))+
-    ylim(c(0,100))+ 
-    theme(legend.position = "none"))
-
-
-
-```
-
-
-
-
-```{r fig.width=16, fig.height=16}
-# filtering
-(panelA <- ggarrange(plt_beforefilt,plt_afterfilt, ncol=1, common.legend = T, legend="bottom"))
-# normalisation
-(panelB <- ggarrange(plt_beforenorm,plt_afternorm,ncol=1, common.legend = T, legend="bottom"))
-(panelAB <- ggarrange(panelA,panelB,nrow=1, widths=c(0.3,0.7), labels = c("A","B"),
-                      font.label = list(size = 18, face = "bold")))
-# before BEC
-(panelC_left <- ggarrange(PCA_beforeBEC_Dataset+
-  theme(plot.margin = margin(t = 5, r = 5, b = 5, l = 30)),PCA_beforeBEC_Conditon, ncol = 1, heights=c(0.7,0.3) ))
-(panelC_right <- ggarrange(PCA_beforeBEC_CellType,var_beforeBEC, ncol=1, heights=c(0.45,0.55) ))
-(panelC <- ggarrange(panelC_left,panelC_right, ncol=2  ))
-
-# after BEC
-(panelD_bottom <- ggarrange(PCA_afterBEC_Dataset,var_afterBEC, nrow=1, widths=c(0.6,0.4)))
-(panelD <- ggarrange(plt_afterBEC,panelD_bottom,ncol=1, heights = c(0.4,0.6)))
-(panelCD <- ggarrange(
-  panelC, panelD,
-  labels = c("C", "D"),
-  font.label = list(size = 18, face = "bold")
-))  
-(plt_sup1 <- ggarrange(panelAB,panelCD, ncol=1, heights=c(0.45,0.65)))
-```
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/Sup1_Preprocessing.png",
-                plt_sup1,
-                width = 19, height=16, bg = 'white')
-```
-
-
-## Supplementary Figure 2 - Function of Gene Sets
-
-```{r}
-# transform list into a list of vectors
-gene_sets <- lapply(signatures_bidirectional, function(x) {
-  if (is.data.frame(x)) unique(x[[1]])
-  else if (is.vector(x)) unique(x)
-  else NULL
-})
-
- 
-```
-
-
-Crossing information from BioMart (gene characterisation) and COMPARTMENTS (protein localisation)
-
-COMPARTMENTS: https://academic.oup.com/database/article/doi/10.1093/database/bau012/2633793#47519723
-Human Protein Atlas: https://www.proteinatlas.org/about/assays+annotation#ih_reliability
-
-```{r}
-gene_annotations <- as.data.frame(fread("../../../../data/BioMart_Gene_Characterisation_March2024.txt"))
-gene_annotations <- subset(gene_annotations, gene_annotations$`Protein stable ID`!="")
-
-
-protein_localisation <- as.data.frame(fread("../../../../data/human_compartment_knowledge_full.tsv"))
-protein_localisation <- subset(protein_localisation, V5 == "UniProtKB")
-protein_localisation <- subset(protein_localisation, V6 %in% c("CURATED")) #c("CURATED", "IDA", "TAS", "NAS")  
-
-protein_localisation_HumanAtlas_all <- as.data.frame(fread("../../../../data/HumanAtlas_subcellular_location.tsv"))
-protein_localisation_HumanAtlas <- subset(protein_localisation_HumanAtlas_all, Reliability != "Uncertain")
-```
-
-COMPARTMENTS + BioMart
-
-```{r}
-gene_annotation_and_localisation <- merge(gene_annotations,protein_localisation, by.x="Protein stable ID", by.y="V1", all.x = TRUE)
-colnames(gene_annotation_and_localisation) <- c("ProteinID","GeneName","GeneDescription","GOTermName","GOTermAccession","GOTermDefinition","GODomain","GeneName2","GOTermAccession_Localisation","GOTermName_Localisation","DataBase_Localisation","Evidence","Evidence_Score")
-gene_annotation_and_localisation <- subset(gene_annotation_and_localisation, !is.na(GeneName2))
-```
-
-
-```{r}
-gene_annotation_and_localisation
-protein_localisation_HumanAtlas
-```
-
-```{r fig.width=18, fig.height=12}
- 
-# Assume:
-# - `gene_sets`: named list of gene vectors
-# - `protein_localisation_HumanAtlas_all`: your dataframe
- 
-
-# # Rename columns just in case
-# gene_data <- protein_localisation_HumanAtlas_all[, c("Gene name", "Main location", "Extracellular location")]
-# colnames(gene_data) <- c("Gene", "Location", "Secretion")
-# 
-# # Assign extracellular label as either "Predicted to be secreted" or "not reported"
-# gene_data$SecretionStatus <- ifelse(grepl("Predicted to be secreted", gene_data$Secretion),
-#                                     "Predicted to be secreted", "not reported")
-#  
-plot_list <- list()
-
-for (sig_name in names(gene_sets)) {
-  genes <- gene_sets[[sig_name]]
-  
-  # Subset the Human Atlas data based on the gene set
-  sub_data <- protein_localisation_HumanAtlas_all[protein_localisation_HumanAtlas_all$`Gene name` %in% genes, ]
-  
-  # Remove duplicates based on the 'Gene name' and 'Main location' columns
-  sub_data <- sub_data[!duplicated(sub_data[c("Gene name", "Main location")]), ]
-
-  # Assign secretion status
-  sub_data$SecretionStatus <- ifelse(grepl("Predicted to be secreted", sub_data$`Extracellular location`),
-                                     "Predicted to be secreted", "Not reported")
-
-  # Initialize empty result data frame
-  counts <- data.frame(Location = character(),
-                       SecretionStatus = character(),
-                       Count = numeric(),
-                       stringsAsFactors = FALSE)
-
-  # Count occurrences
-  for (i in seq_len(nrow(sub_data))) {
-    locs <- unlist(strsplit(sub_data$`Main location`[i], ";"))
-    locs <- trimws(locs)
-    locs <- locs[locs != ""]
-    if (length(locs) == 0) locs <- "Unspecified"
-    secretion <- sub_data$SecretionStatus[i]
-
-    for (loc in locs) {
-      match_row <- which(counts$Location == loc & counts$SecretionStatus == secretion)
-      if (length(match_row) == 0) {
-        counts <- rbind(counts, data.frame(Location = loc, SecretionStatus = secretion, Count = 1))
-      } else {
-        counts$Count[match_row] <- counts$Count[match_row] + 1
-      }
-    }
-  }
-
-  # Reorder Location by total gene count
-  total_counts <- tapply(counts$Count, counts$Location, sum)
-  counts$Location <- factor(counts$Location, levels = names(sort(total_counts)))
-
-  # Order secretion status so "Predicted" appears on top
-  counts$SecretionStatus <- factor(counts$SecretionStatus, levels = c("Predicted to be secreted","Not reported"))
-
-  sig_name <- wrap_title(sig_name,28)
-  
-  # Plot
-  p <- ggplot(counts, aes(x = Count, y = Location)) +
-    geom_bar(stat = "identity", aes(fill = SecretionStatus)) +
-    labs(title = sig_name, x = "Frequency", y = "Cellular localisation\n") +
-    scale_fill_manual(values = c("Predicted to be secreted" = "#3D5D56", 
-                                 "Not reported" = "#899C98"),
-                      labels = c("Predicted to be secreted", "not reported")) +
-    theme_classic() +
-    theme(plot.title = element_text(hjust = 0.5),
-          axis.text.y = element_text(size = 9),
-          legend.title = element_blank())
-
-  plot_list[[sig_name]] <- p
-}
-
-# Combine all plots in a grid, with a common legend
-final_plot_localisation <- ggarrange(plotlist = plot_list, common.legend = TRUE, heights = c(0.4, 0.3, 0.3))
-final_plot_localisation
-```
-
-
-```{r fig.width=18, fig.height=12}
-# Load libraries
-library(msigdbr)
-library(ggplot2)
-library(reshape2)
-library(ggpubr)
-
-# 1. Load MSigDB Hallmark pathways
-pathways_H <- msigdbr(species = "Homo sapiens", category = "H")
-pathwaysHset <- split(toupper(pathways_H$gene_symbol), pathways_H$gs_name)
-
-# 2. Create the universe of genes from the Human Protein Atlas
-universe_genes <- unique(toupper(protein_localisation_HumanAtlas_all$`Gene name`))
-
-# 3. Initialize result matrices
-df_OR <- matrix(nrow = length(pathwaysHset), ncol = length(gene_sets))
-df_pval <- matrix(nrow = length(pathwaysHset), ncol = length(gene_sets))
-
-# 4. Fisher's exact test for each pathway × signature
-for (i in seq_along(pathwaysHset)) {
-  pathway_genes <- pathwaysHset[[i]]
-  pathway_name <- names(pathwaysHset)[i]
-
-  for (j in seq_along(gene_sets)) {
-    signature_genes <- toupper(gene_sets[[j]])
-    signature_name <- names(gene_sets)[j]
-
-    # Define contingency table
-    a <- length(intersect(signature_genes, pathway_genes))                           # In both
-    b <- length(setdiff(signature_genes, pathway_genes))                             # In signature only
-    c <- length(setdiff(pathway_genes, signature_genes))                             # In pathway only
-    d <- length(setdiff(universe_genes, union(signature_genes, pathway_genes)))      # In neither
-
-    contingency_table <- matrix(c(a, b, c, d), nrow = 2)
-
-    # Perform Fisher's exact test
-    fisher_result <- fisher.test(contingency_table)
-    df_OR[i, j] <- fisher_result$estimate
-    df_pval[i, j] <- fisher_result$p.value
-  }
-}
-
-# 5. Set row and column names
-rownames(df_pval) <- names(pathwaysHset)
-colnames(df_pval) <- names(gene_sets)
-rownames(df_OR) <- names(pathwaysHset)
-colnames(df_OR) <- names(gene_sets)
-
-# 6. Convert to long format
-df_pval_melt <- melt(df_pval)
-colnames(df_pval_melt) <- c("Pathway", "Signature", "pval")
-
-df_OR_melt <- melt(df_OR)
-colnames(df_OR_melt) <- c("Pathway", "Signature", "OR")
-
-# 7. Adjust p-values
-df_pval_melt$padj <- p.adjust(df_pval_melt$pval, method = "BH")
-
-# 8. Merge OR and adjusted p-values
-enrichment_results <- merge(df_OR_melt,
-                            df_pval_melt[, c("Pathway", "Signature", "padj")],
-                            by = c("Pathway", "Signature"))
-
-# 9. Filter significant results (padj ≤ 0.05)
-enrichment_results <- enrichment_results[!is.na(enrichment_results$padj) & enrichment_results$padj <= 0.05, ]
-
-# 10. Plot per signature
-plot_list <- list()
-for (sig in unique(enrichment_results$Signature)) {
-  data_sig <- enrichment_results[enrichment_results$Signature == sig, ]
-  
-    sig <- wrap_title(sig,28)
-    
-  p <- ggplot(data_sig, aes(x = log10(OR), y = reorder(Pathway, log10(OR)))) +
-    geom_bar(stat = "identity", fill = "#3D5D56") +
-    labs(title = sig, x = "log10(Fisher's OR)", y = "Enriched Hallmark \nPathways") +
-    theme_bw() +
-    geom_vline(xintercept = 0, linetype = "dashed") +
-    theme(plot.title = element_text(hjust = 0.5, size = 12),
-          axis.text.y = element_text(size = 9))
-  
-  plot_list[[sig]] <- p
-}
-
-# 11. Arrange plots in a 3x3 grid with common legend
-final_plot_function <- ggarrange(plotlist = plot_list,
-                        ncol = 3, nrow = 3,
-                        common.legend = TRUE, legend = "bottom", 
-                        heights=c(0.45,0.35,0.2))
-
-# 12. Display final plot
-print(final_plot_function)
-
-# 13. Optionally save it
-# ggsave("GeneSets_HallmarkPathways_Enrichment.png", plot = final_plot,
-#        width = 15, height = 18, bg = 'white')
-
-
-```
-
-
-```{r fig.width=18, fig.height=24}
-(supfig2 <- ggarrange(final_plot_localisation,final_plot_function, labels="AUTO", ncol=1,font.label = list(size = 22)))
-```
-
-```{r}
-ggplot2::ggsave("Figs/Sup2_GeneSets_Function_Localisation.png",
-                supfig2,
-                width = 20, height=24, bg = 'white')
-```
-
-
-
-### Alternative - using Signature Similarity
-
-```{r}
-gene_sets <- lapply(signatures_bidirectional, function(x) {
-  if (is.data.frame(x)) unique(x[[1]])
-  else if (is.vector(x)) unique(x)
-  else NULL
-})
-
-
-```
- 
-
-
-```{r fig.width=8, fig.height=16}
- 
-(plt_logodds <- signature_similarity(
-  signatures = gene_sets,
-  other_user_signatures = NULL,
-  collection = "H",
-  subcollection = NULL,
-  metric = "odds_ratio",
-  unique(toupper(protein_localisation_HumanAtlas_all$`Gene name`)), # universe of protein coding geenes, as they are probably the ones important in this
-  or_threshold = 1,
-  pval_threshold = 0.05,
-  limits = NULL,
-  title_size = 12,
-  color_values = c("#F9F4AE", "#B44141"),
-  title = "",
-  num_sigs_toplot = NULL,
-  jaccard_threshold = 0.1,
-  msig_subset = NULL,
-  width_text = 28
-))
-```
- 
-
-```{r fig.width=10, fig.height=16}
- library(ggplot2)
-library(ggpubr)
-
-plot_list <- list()
-plot_heights <- numeric()
-
-for (sig_name in names(gene_sets)) {
-  genes <- gene_sets[[sig_name]]
-  sub_data <- protein_localisation_HumanAtlas_all[protein_localisation_HumanAtlas_all$`Gene name` %in% genes, ]
-  sub_data <- sub_data[!duplicated(sub_data[c("Gene name", "Main location")]), ]
-
-  sub_data$SecretionStatus <- ifelse(grepl("Predicted to be secreted", sub_data$`Extracellular location`),
-                                     "Predicted to be secreted", "Not reported")
-
-  counts <- data.frame(Location = character(),
-                       SecretionStatus = character(),
-                       Count = numeric(),
-                       stringsAsFactors = FALSE)
-
-  for (i in seq_len(nrow(sub_data))) {
-    locs <- unlist(strsplit(sub_data$`Main location`[i], ";"))
-    locs <- trimws(locs)
-    locs <- locs[locs != ""]
-    if (length(locs) == 0) locs <- "Unspecified"
-    secretion <- sub_data$SecretionStatus[i]
-
-    for (loc in locs) {
-      match_row <- which(counts$Location == loc & counts$SecretionStatus == secretion)
-      if (length(match_row) == 0) {
-        counts <- rbind(counts, data.frame(Location = loc, SecretionStatus = secretion, Count = 1))
-      } else {
-        counts$Count[match_row] <- counts$Count[match_row] + 1
-      }
-    }
-  }
-
-  total_counts <- tapply(counts$Count, counts$Location, sum)
-  counts$Location <- factor(counts$Location, levels = names(sort(total_counts)))
-  counts$SecretionStatus <- factor(counts$SecretionStatus, levels = c("Predicted to be secreted","Not reported"))
-
-  sig_name_wrapped <- wrap_title(sig_name, 28)
-
-  p <- ggplot(counts, aes(x = Count, y = Location)) +
-    geom_bar(stat = "identity", aes(fill = SecretionStatus)) +
-    labs(title = sig_name_wrapped, x = "Frequency", y = "Cellular localisation\n") +
-    scale_fill_manual(values = c("Predicted to be secreted" = "#3D5D56", 
-                                 "Not reported" = "#899C98"),
-                      labels = c("Predicted to be secreted", "not reported")) +
-    theme_classic() +
-    theme(plot.title = element_text(hjust = 0.5, size=10),
-          axis.text.y = element_text(size = 10),
-          
-          axis.text.x = element_text(size = 10),
-          legend.title = element_blank(), legend.position = "none")
-
-  plot_list[[sig_name]] <- p
-  
-  # 🔑 Add number of y-axis rows as a proxy for height
-  n_locs <- length(levels(counts$Location))
-  plot_heights <- c(plot_heights, n_locs)
-}
-
-# Normalize heights so that they sum to 1
-plot_heights <- plot_heights / sum(plot_heights)
-names(plot_heights) <- names(plot_list)
-
-plot_heights["GOBP_POSITIVE_REGULATION_OF_CELLULAR_SENESCENCE"] <- 0.06
-plot_heights["GOBP_NEGATIVE_REGULATION_OF_CELLULAR_SENESCENCE"] <- 0.06
-
-# Combine plots in one column, adjusting heights automatically
-final_plot_localisation2_1 <- ggarrange(plotlist = plot_list[c("CellAge","CSgene","REACTOME_CELLULAR_SENESCENCE","SAUL_SEN_MAYO")],
-                                      ncol = 1,
-                                      heights = plot_heights[c("CellAge","CSgene","REACTOME_CELLULAR_SENESCENCE","SAUL_SEN_MAYO")] )
-
-final_plot_localisation2_2 <- ggarrange(plotlist = plot_list[!names(plot_list) %in% c("CellAge","CSgene","REACTOME_CELLULAR_SENESCENCE","SAUL_SEN_MAYO")],
-                                      ncol = 1,
-                                      heights = plot_heights[!names(plot_heights) %in% c("CellAge","CSgene","REACTOME_CELLULAR_SENESCENCE","SAUL_SEN_MAYO")],
-                                      common.legend = TRUE,
-                                      legend = "top")
-
-
-(final_plot_localisation2 <- ggarrange(final_plot_localisation2_2,final_plot_localisation2_1,
-                                      nrow = 1,
-                                      heights = plot_heights))
-```
-
-```{r fig.width=15, fig.height=14}
-(plt_sup2_alternative <- ggarrange(final_plot_localisation2,plt_logodds, nrow=1, labels = "AUTO", font.label = list(size = 20), widths = c(0.5,0.5)))
-```
-
-
-```{r}
-ggplot2::ggsave("Figs/SupFigure2_GeneSets_Function_Localisation_alternative.png",
-                plt_sup2_alternative,
-                width = 15, height=14, bg = 'white')
-```
-
-
-## Supplementary Figure 3 - Score Distribution on Different Senescence-Inducing Factors
-
-
-
-```{r fig.width=12, fig.height=8}
-plt_logmedian_stressors_1 <-  PlotScores_adapted_noX(data = corrcounts, 
-           metadata = metadata, 
-           method = "logmedian", 
-           gene_sets = signatures_bidirectional[1:6],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-plt_logmedian_stressors_2 <-  PlotScores_adapted(data = corrcounts, 
-           metadata = metadata, 
-           method = "logmedian", 
-           gene_sets = signatures_bidirectional[7:9],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-(plt_logmedian_stressors <- ggarrange(plt_logmedian_stressors_1,plt_logmedian_stressors_2, ncol=1, heights=c(0.55,0.45)))
-
-plt_ssGSEA_stressors_1 <-  PlotScores_adapted_noX(data = corrcounts, 
-           metadata = metadata, 
-           method = "ssGSEA", 
-           gene_sets = signatures_bidirectional[1:6],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-plt_ssGSEA_stressors_2 <-  PlotScores_adapted(data = corrcounts, 
-           metadata = metadata, 
-           method = "ssGSEA", 
-           gene_sets = signatures_bidirectional[7:9],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-(plt_ssGSEA_stressors <- ggarrange(plt_ssGSEA_stressors_1,plt_ssGSEA_stressors_2, ncol=1, heights=c(0.55,0.45)))
-
-plt_ranking_stressors_1 <-  PlotScores_adapted_noX(data = corrcounts, 
-           metadata = metadata, 
-           method = "ranking", 
-           gene_sets = signatures_bidirectional[1:6],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-plt_ranking_stressors_2 <-  PlotScores_adapted(data = corrcounts, 
-           metadata = metadata, 
-           method = "ranking", 
-           gene_sets = signatures_bidirectional[7:9],  
-           ColorVariable = "CellType", 
-           Variable="SenescentType", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=F,  
-           ncol = 3, 
-           widthTitle=30, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=1,
-           titlesize=14, xlab = "Senescence Inducer")
-
-(plt_ranking_stressors <- ggarrange(plt_ranking_stressors_1,plt_ranking_stressors_2, ncol=1, heights=c(0.55,0.45)))
-```
-
-```{r fig.width=12, fig.height=20}
-(supfigure_3 <- ggarrange(plt_logmedian_stressors,
-          plt_ssGSEA_stressors,
-          plt_ranking_stressors, 
-          common.legend=T, ncol=1, heights=c(0.3,0.3,0.3), labels = "AUTO",font.label = list(size = 20)))
-```
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/SupFigure3_Scores_SenescenceStressors.png",
-                supfigure_3,
-                width = 12, height=20, bg = 'white')
-```
-
-
-## Supplementary Figure 4 - Cohen's d and AUC for all signatures (scores)
-
-
-```{r  fig.width=8, fig.height=8}
-
-
-Scores_Overview_All <- PlotScores(data = corrcounts, 
-                                  metadata = metadata,  
-                                  gene_sets=signatures_bidirectional, 
-                                  Variable="Condition",  
-                                  method ="all",   
-                                  ncol = 3, 
-                                  nrow = 3, 
-                                  widthTitle=28, 
-                                  limits = c(0,2),   
-                                  title=NULL, 
-                                  titlesize = 10,
-                                  #ColorValues = list(heatmap=c("#F9F4AE", "#B44141") ),
-                                  mode="simple"#,
-                                  #widthlegend=30, 
-                                  #sig_threshold=0.05, 
-                                  #cohen_threshold=0.6,
-                                  #pointSize=6,
-                                  #colorPalette="Paired"
-)
-
-plt_cohenheatmap <- Scores_Overview_All$heatmap
-plt_cohenheatmap
-```
-
-```{r fig.width=8, fig.height=8}
-plt_auc_heatmap <- AUC_Scores(data = corrcounts, 
-                              metadata = metadata, 
-                              gene_sets=signatures_bidirectional, 
-                              method = "all", 
-                              mode = "simple", 
-                              variable="Condition", 
-                              nrow = 3, 
-                              ncol = 3, 
-                              limits = c(0.5,1), 
-                              widthTitle = 28, 
-                              titlesize = 10, 
-                              ColorValues = c("#F9F4AE", "#B44141") ) 
-plt_auc_heatmap
-
-```
-```{r fig.width=11, fig.height=12}
-(supfig4 <- ggarrange(plt_cohenheatmap,plt_auc_heatmap,ncol=1, labels="AUTO", heights=c(0.55,0.45) ))
-```
-
-```{r}
-ggplot2::ggsave("../Figures/Figs/SupFigure4_Scores_AUCCohen.png",
-                supfig4,
-                width = 11, height=12, bg = 'white')
-```
-
-## Supplementary Figure 5 - FPR for all signatures
-
- 
-```{r fig.width=12, fig.height=18}
- 
-# Local job
-# plt_fdrsim_allsignatures_v2 <- FPR_Simulation(data = corrcounts,
-#                               metadata = metadata,
-#                               original_signatures = signatures_bidirectional,
-#                               gene_list = row.names(corrcounts),
-#                               number_of_sims = 100,
-#                               widthTitle = 30,
-#                               Variable = "Condition",
-#                               titlesize = 12,
-#                               pointSize = 4,
-#                               labsize = 10,
-#                               mode = "simple",
-#                               ColorValues=NULL,
-#                               ncol=NULL,
-#                               nrow=3 )
-
-
-plt_fdrsim_allsignatures_v2
-```
-
-```{r}
-ggplot2::ggsave("Figs/SupFigure5_FPRsimulations.png",
-                plt_fdrsim_allsignatures_v2,
-                width = 12, height=18, bg = 'white')
-```
-
-
-## Supplementary Figure 6 - SenMayo for GTEx
- 
- 
-
-
-```{r}
-# local job: ~5 days
-# 
-# 
-# methods <- c("logmedian","ranking","ssGSEA")
-# gene_set <- list(HernandezSegura=signatures_bidirectional$HernandezSegura,
-#                  SAUL_SEN_MAYO=signatures_bidirectional$SAUL_SEN_MAYO)
-# tissues <- unique(metadata_GTEx_alltissues$SMTSD)
-# 
-# results_df_score <- data.frame(NULL)
-# results_df_gsea <- data.frame(NULL)
-# 
-# # Initialize progress bar
-# pb <- txtProgressBar(min = 0, max = length(tissues), style = 3)
-# 
-# for (i in seq_along(tissues)) {
-# 
-#   tissue <- tissues[i]
-# 
-#   subset_metadata <- metadata_GTEx_alltissues[metadata_GTEx_alltissues$SMTSD == tissue,]
-#   subset_data <- GTEx_alltissues[,subset_metadata$SAMPID]
-# 
-# 
-#   # Update progress bar
-#   setTxtProgressBar(pb, i)
-# 
-#   for (sig in names(gene_set)){
-# 
-#     signature <- list(gene_set[[sig]])
-#     names(signature) <- sig
-# 
-#     data_varassoc_gsea <- suppressWarnings(suppressMessages(GSEA_VariableAssociation(data=subset_data,
-#                                                                                      metadata=subset_metadata,
-#                                                                                      cols=c("AGE"),
-#                                                                                      mode="simple",
-#                                                                                      gene_set=signature)$data))
-# 
-#     data_varassoc_gsea <- data_varassoc_gsea[,c("NES","pval","Contrast")]
-#     data_varassoc_gsea$signature <- sig
-#     data_varassoc_gsea$method <- "GSEA"
-#     data_varassoc_gsea$tissue <- tissue
-# 
-#     results_df_gsea <- rbind(results_df_gsea, data_varassoc_gsea)
-# 
-#     for (method in methods){
-# 
-#       data_varassoc_score <- suppressWarnings(suppressMessages(Score_VariableAssociation(data=subset_data,
-#                                                                                          metadata=subset_metadata,
-#                                                                                          cols=c("AGE"), # SMRIN was a variable for batch correction
-#                                                                                          method=method,
-#                                                                                          gene_set = signature,
-#                                                                                          mode="simple", printplt = F)$Overall))
-# 
-# 
-#       df_permutations <- data.frame(NULL)
-#       for (j in 1:nperm){
-#         set.seed(j)
-#         metadata_subset_shuffleAGE <- subset_metadata
-#         metadata_subset_shuffleAGE$AGE <- sample(metadata_subset_shuffleAGE$AGE)
-# 
-#         cohend_shuffle <- suppressWarnings(suppressMessages(Score_VariableAssociation(data=subset_data[,metadata_subset_shuffleAGE$SAMPID],
-#                                                                                            metadata=metadata_subset_shuffleAGE,
-#                                                                                            cols=c("AGE"), # SMRIN was a variable for batch correction
-#                                                                                            method=method,
-#                                                                                            gene_set = signature,
-#                                                                                            mode="simple", printplt = F)$Overall))
-# 
-# 
-#         cohend_shuffle$signature <- sig
-#         cohend_shuffle$method <- method
-#         cohend_shuffle$tissue <- tissue
-#         df_permutations <- rbind(df_permutations, cohend_shuffle)
-# 
-#       }
-# 
-#       # calculateFPR
-# 
-#       df_permutations$Cohen_f
-#       fpr <- sum(df_permutations$Cohen_f > data_varassoc_score$Cohen_f)/length(df_permutations$Cohen_f)
-# 
-# 
-#       data_varassoc_score$signature <- sig
-#       data_varassoc_score$method <- method
-#       data_varassoc_score$tissue <- tissue
-#       data_varassoc_score$fpr <- fpr
-#       results_df_score <- rbind(results_df_score, data_varassoc_score)
-# 
-# 
-# 
-# 
-#     }
-# 
-#   }
-# 
-# }
-# 
-# # Close the progress bar
-# close(pb)
-# 
-# 
-# saveRDS( results_df_gsea,"results_df_gsea_GTEx_FPR.rds")
-# saveRDS( results_df_score,"results_df_score_GTEx_FPR.rds") # same results for t-test's p value and 1000 perms
-```
-
-
-```{r}
-results_df_score <- readRDS("../data/results_df_score_GTEx_FPR.rds")
-results_df_gsea <- readRDS("../data/results_df_gsea_GTEx_FPR.rds")
-
-
-results_df_gsea <- results_df_gsea %>%
-  group_by(signature) %>%
-  mutate(padj = p.adjust(pval, method = "BH")) %>%
-  ungroup()
-
-results_df_score <- results_df_score %>%
-  group_by(signature, method) %>%
-  mutate(P_adj = p.adjust(fpr, method = "BH")) %>%
-  ungroup()
-
-
-# Alphabetically order tissue factor
-tissue_levels <- sort(unique(results_df_score$tissue), decreasing = T)
-results_df_score$tissue <- factor(results_df_score$tissue, levels = tissue_levels)
-results_df_gsea$tissue  <- factor(results_df_gsea$tissue, levels = tissue_levels)
-
-results_df_gsea
-results_df_score
-```
-
- 
-
-```{r fig.width=10, fig.height=5.5}
-library(ggplot2)
-library(dplyr)
-library(patchwork)
-
-# Ensure correct factor levels for method and tissue
-results_df_score$method <- factor(results_df_score$method, levels = c("logmedian", "ranking", "ssGSEA"))
-results_df_gsea$method <- "GSEA"  # single method name
-
-# Add asterisk for significance
-results_df_score <- results_df_score %>%
-  mutate(signif = ifelse(P_adj < 0.05, "*", ""))
-
-results_df_gsea <- results_df_gsea %>%
-  mutate(signif = ifelse(padj < 0.05, "*", ""))
-
-# Loop through each signature
-sigs <- unique(results_df_score$signature)
-
-list_plts_sigs <- list()
-for (sig in sigs) {
-  # Subset for this signature
-  score_data <- results_df_score %>% filter(signature == sig)
-  gsea_data  <- results_df_gsea %>% filter(signature == sig)
-
-  # Score plot
-  p_score <- ggplot(score_data, aes(x = tissue, y = method, fill = Cohen_f)) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#49B0AB", name = "Cohen's f", limits=c(0,0.4) ) +
-    theme_minimal(base_size = 12) +
-    theme(
-      axis.title = element_blank(),
-      axis.text.x = element_text(angle = 60, hjust = 1, size = 12),
-      axis.text.y = element_text(size = 12),
-      legend.position = "top"
-    )
-
-  # GSEA plot
-  p_gsea <- ggplot(gsea_data, aes(x = tissue, y = method, fill = abs(NES))) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#B04975", name = "|NES|", limits=c(0,2.5),na.value = "#B04975"  ) + 
-    theme_minimal(base_size = 12) +
-    theme(
-      axis.title = element_blank(), 
-      axis.text.x = element_blank(),
-      axis.ticks.x = element_blank(),
-      legend.position = "top"
-    )
-
-  # Combine score and GSEA plots
-  combined_plot <- p_gsea + p_score  + 
-    plot_layout(ncol = 1, guides = "collect", heights = c(0.25, 0.75)) & 
-    theme(legend.position = "top")
-
-  list_plts_sigs[[sig]] <- combined_plot
-}
-
- 
-plt_gtex_scores_SenMayo <- list_plts_sigs[["SAUL_SEN_MAYO"]]
-plt_gtex_scores_SenMayo
-
-```
-
-```{r}
-tissues_signif_HS <- c("Artery - Aorta","Breast - Mammary Tissue", "Cells - Cultured fibroblasts","Thyroid")
-tissues_signif_SenMayo <- c("Artery - Tibial", "Brain - Anterior cingulate cortex (BA24)", "Brain - Hippocampus", "Colon - Sigmoid", "Minor Salivary Gland", "Muscle - Skeletal", "Nerve - Tibial", "Prostate")
-
-```
-
-```{r fig.width=12, fig.height=8}
-
-methods <- c("logmedian","ranking","ssGSEA")
-pltlist <- list()
-
-for (tissue in tissues_signif_SenMayo){
-  
-  subset_metadata <- metadata_GTEx_alltissues[metadata_GTEx_alltissues$SMTSD == tissue,]
-  subset_data <- GTEx_alltissues[,subset_metadata$SAMPID]
-  
-  pltlist_aux <- list()
-  
-  for (method in methods){ 
-    
-    scores_df <- CalculateScores(data = subset_data,
-                                 metadata = subset_metadata,
-                                 gene_sets = list(SAUL_SEN_MAYO=signatures_bidirectional$SAUL_SEN_MAYO), method = method)
-     
-    
-    pltlist_aux[[method]] <- ggplot(scores_df$SAUL_SEN_MAYO, aes(x=AGE, y=score)) +
-      geom_jitter(size=2, color="#8F95B1")+ # to preserve the donor's age; correlation was calculated with "real" data
-      geom_density2d( colour="white", size=0.3) +
-      ggplot2::geom_smooth(method = "lm", col = "black", se = FALSE, size=1.3)+ ggpubr::stat_cor(aes(label = ..r.label..),  
-                                                                                                 label.x = 20, size=4) +  
-      xlab("Age (years)") + ylab("Score") +
-      theme_bw() +
-      ggtitle(method) +
-      #center title 
-      theme(plot.title = element_text(hjust = 0.5 ),
-            base_size = 16,
-            axis.text.x = element_text( size=12),
-            axis.text.y = element_text(size=12),
-            axis.title.x = element_text(size=14),
-            axis.title.y = element_text(size=14)) 
-  }
-  
-  pltlist[[tissue]] <- ggarrange(plotlist = pltlist_aux, ncol = 3, nrow = 1, common.legend = TRUE) +
-    ggtitle( tissue)  + theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))
-  
-  
-}
-
-plt_scoredistribution_SenMayo_GTEx <- ggarrange(plotlist = pltlist, ncol = 2, nrow=4, common.legend = TRUE, legend = "bottom")  
-plt_scoredistribution_SenMayo_GTEx
-```
-
-
-```{r fig.width=13, fig.height=15}
- # sup figure?
-
-(supfig6 <-  ggarrange(plt_gtex_scores_SenMayo,plt_scoredistribution_SenMayo_GTEx, heights=c(0.35,0.65), labels = c("A","B"), nrow=2))
-ggplot2::ggsave("../Figures/Figs/SupFigure6_GTEx_SenMayo.png",
-                supfig6,
-                width = 13, height=15, bg = 'white')
-
-```
-
-
-
-###SenMayo with and without inflammation genes
-
-
-```{r}
-# Retrieve all hallmark gene sets (category H)
-hallmark_sets <- msigdbr(species = "Homo sapiens", category = "H")
-
-# View unique hallmark gene set names
-unique(hallmark_sets$gs_name)
-
-inflammation_sets <- c(
-  "HALLMARK_INFLAMMATORY_RESPONSE",
-  "HALLMARK_TNFA_SIGNALING_VIA_NFKB",
-  "HALLMARK_INTERFERON_GAMMA_RESPONSE",
-  "HALLMARK_INTERFERON_ALPHA_RESPONSE",
-  "HALLMARK_IL6_JAK_STAT3_SIGNALING",
-  #"HALLMARK_COMPLEMENT",
-  "HALLMARK_INFLAMMATORY_RESPONSE"
-)
-
-inflammation_genes <- msigdbr(species = "Homo sapiens", category = "H") |>
-  dplyr::filter(gs_name %in% inflammation_sets) |>
-  dplyr::pull(gene_symbol) |>
-  unique()
-
-inflammation_genes
-
-overlap <- intersect(signatures_bidirectional$SAUL_SEN_MAYO$gene, inflammation_genes)
-
-overlap
-
-SenMayo_woInflammation <- signatures_bidirectional$SAUL_SEN_MAYO[signatures_bidirectional$SAUL_SEN_MAYO$gene %in% overlap,]
-SenMayo_InflammationOnly <- signatures_bidirectional$SAUL_SEN_MAYO[!signatures_bidirectional$SAUL_SEN_MAYO$gene %in% overlap,]
-
-SenMayo_Signatures <- list(SAUL_SEN_MAYO=signatures_bidirectional$SAUL_SEN_MAYO,
-                           SAUL_SEN_MAYO_WO_INFLAMMATION=SenMayo_woInflammation,
-                           SAUL_SEN_MAYO_INFLAMMATION=SenMayo_InflammationOnly
-)
-
-```
-
-
-```{r fig.width=8, fig.height=4}
-(plt_senmayo_scores <- PlotScores(data = corrcounts, 
-           metadata = metadata, 
-           method = "ranking", 
-           gene_sets = SenMayo_Signatures,  
-           ColorVariable = "CellType", 
-           Variable="Condition", 
-           ColorValues = CellTypecols_alt, 
-           ConnectGroups=T,  
-           ncol = 3, 
-           widthTitle=20, 
-           limits = NULL, 
-           legend_nrow = 1, 
-           compute_cohen=F,
-           pointSize=3,
-           titlesize=14, xlab = ""))
-
-```
-
- 
-```{r}
-
-degenes_senmayo <- calculateDE(data=corrcounts, 
-                       metadata=metadata, 
-                       variables="Condition",   
-                       modelmat = NULL, 
-                       contrasts = c("Senescent - Proliferative",
-                                     "Senescent - Quiescent"#,
-                                     #"Proliferative - Quiescent"
-                       )) 
-
-degenes_senmayo
-```
-
-
- 
-
-
-
-```{r}
-GSEAresults_senmayo <- runGSEA(degenes_senmayo, SenMayo_Signatures, stat = NULL)
-GSEAresults_senmayo
-```
-
-
- 
-
-```{r fig.width=11, fig.height=3}
-
-(plotNESlollipop_senmayo <- plotNESlollipop(GSEA_results=GSEAresults_senmayo, sig_threshold = 0.05,saturation_value=0.00001, nonsignif_color = "white", signif_color = "#934873",
-                                    grid = T, nrow = 1, ncol = NULL, widthlabels=20, title=NULL, titlesize=14))
-
-```
-
-
-
-```{r} 
-
-methods <- c("logmedian","ranking","ssGSEA")
-gene_set <- SenMayo_Signatures[2:3] #wo/ senmayo original, wich we already got
-tissues <- unique(metadata_GTEx_alltissues$SMTSD) 
-
-results_df_score_SenMayoCustom <- data.frame(NULL)
-results_df_gsea_SenMayoCustom <- data.frame(NULL)
-
-# Initialize progress bar
-pb <- txtProgressBar(min = 0, max = length(tissues), style = 3)
-
-for (i in seq_along(tissues)) {
-  
-  tissue <- tissues[i]
-  
-  subset_metadata <- metadata_GTEx_alltissues[metadata_GTEx_alltissues$SMTSD == tissue,]
-  subset_data <- GTEx_alltissues[,subset_metadata$SAMPID]
-  
-  
-  # Update progress bar
-  setTxtProgressBar(pb, i)
-  
-  for (sig in names(gene_set)){
-    
-    signature <- list(gene_set[[sig]])
-    names(signature) <- sig
-    
-    data_varassoc_gsea <- suppressWarnings(suppressMessages(GSEA_VariableAssociation(data=subset_data, 
-                                                                                     metadata=subset_metadata, 
-                                                                                     cols=c("AGE"),  
-                                                                                     mode="simple", 
-                                                                                     gene_set=signature)$data))
-    
-    data_varassoc_gsea <- data_varassoc_gsea[,c("NES","pval","Contrast")]
-    data_varassoc_gsea$signature <- sig
-    data_varassoc_gsea$method <- "GSEA"
-    data_varassoc_gsea$tissue <- tissue
-    
-    results_df_gsea_SenMayoCustom <- rbind(results_df_gsea_SenMayoCustom, data_varassoc_gsea)
-    
-    for (method in methods){
-      
-      data_varassoc_score <- suppressWarnings(suppressMessages(Score_VariableAssociation(data=subset_data, 
-                                                                                         metadata=subset_metadata, 
-                                                                                         cols=c("AGE"), # SMRIN was a variable for batch correction
-                                                                                         method=method, 
-                                                                                         gene_set = signature, 
-                                                                                         mode="simple", printplt = F)$Overall))
-      data_varassoc_score$signature <- sig
-      data_varassoc_score$method <- method
-      data_varassoc_score$tissue <- tissue
-      results_df_score_SenMayoCustom <- rbind(results_df_score_SenMayoCustom, data_varassoc_score)
-      
-      
-      
-    }
-    
-  }
-  
-}
-
-# Close the progress bar
-close(pb)
-
-
-results_df_gsea_SenMayoCustom
-results_df_score_SenMayoCustom
-```
- 
-```{r}
-
-results_df_gsea_SenMayoCustom  <- results_df_gsea_SenMayoCustom  %>%
-  group_by(signature) %>%
-  mutate(padj = p.adjust(pval, method = "BH")) %>%
-  ungroup()
-
-results_df_score_SenMayoCustom <- results_df_score_SenMayoCustom  %>%
-  group_by(signature, method) %>%
-  mutate(P_adj = p.adjust(P_Value, method = "BH")) %>%
-  ungroup()
-
-
-# Alphabetically order tissue factor
-tissue_levels <- sort(unique(results_df_score_SenMayoCustom$tissue), decreasing = T)
-results_df_score_SenMayoCustom$tissue <- factor(results_df_score_SenMayoCustom$tissue, levels = tissue_levels)
-results_df_gsea_SenMayoCustom$tissue  <- factor(results_df_gsea_SenMayoCustom$tissue, levels = tissue_levels)
-
-results_df_gsea_SenMayoCustom
-results_df_score_SenMayoCustom
-```
-
-
-```{r fig.width=12, fig.height=5}
-
-# Ensure correct factor levels for method and tissue
-results_df_score_SenMayoCustom$method <- factor(results_df_score_SenMayoCustom$method, levels = c("logmedian", "ranking", "ssGSEA"))
-results_df_gsea_SenMayoCustom$method <- "GSEA"  # single method name
-
-# Add asterisk for significance
-results_df_score_SenMayoCustom <- results_df_score_SenMayoCustom %>%
-  mutate(signif = ifelse(P_adj < 0.05, "*", ""))
-
-results_df_gsea_SenMayoCustom <- results_df_gsea_SenMayoCustom %>%
-  mutate(signif = ifelse(padj < 0.05, "*", ""))
-
-# Loop through each signature
-sigs <- unique(results_df_score_SenMayoCustom$signature)
-
-list_plts_sigs <- list()
-for (sig in sigs) {
-  # Subset for this signature
-  score_data <- results_df_score_SenMayoCustom %>% filter(signature == sig)
-  gsea_data  <- results_df_gsea_SenMayoCustom %>% filter(signature == sig)
-
-  # Score plot
-  p_score <- ggplot(score_data, aes(x = tissue, y = method, fill = Cohen_f)) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#49B0AB", name = "Cohen's f", limits=c(0,0.4) ) +
-    theme_minimal(base_size = 12) +
-    theme(
-      axis.title = element_blank(),
-      axis.text.x = element_text(angle = 60, hjust = 1, size = 12),
-      axis.text.y = element_text(size = 12),
-      legend.position = "none"
-    )
-
-  # GSEA plot
-  p_gsea <- ggplot(gsea_data, aes(x = tissue, y = method, fill = abs(NES))) +
-    geom_tile(color = "gray90") +
-    geom_text(aes(label = signif), size = 6, vjust = 0.8, hjust = 0.5) +
-    scale_fill_gradient(low = "white", high = "#B04975", name = "|NES|", limits=c(0,2.5) ) + 
-    theme_minimal(base_size = 12) +
-    theme(
-      axis.title = element_blank(), 
-      axis.text.x = element_blank(),
-      axis.ticks.x = element_blank(),
-      legend.position = "none"
-    )
-
-  # Combine score and GSEA plots
-  combined_plot <- p_gsea + p_score  + 
-    plot_layout(ncol = 1, guides = "collect", heights = c(0.25, 0.75))  
-
-  list_plts_sigs[[sig]] <- combined_plot
-}
-
-plt_senmayo_original <- plt_gtex_scores_SenMayo + plot_annotation(
-  title = "SAUL_SEN_MAYO",
-  theme = theme(
-    plot.title = element_text(hjust = 0.5, size = 16 )
-  )
-)
-plt_senmayo_woinfl <- list_plts_sigs$SAUL_SEN_MAYO_WO_INFLAMMATION +
-  plot_annotation(
-  title = "SAUL_SEN_MAYO_WO_INFLAMMATION",
-  theme = theme(
-    plot.title = element_text(hjust = 0.5, size = 16 )
-  )
-)
-plt_senmayo_infl <- list_plts_sigs$SAUL_SEN_MAYO_INFLAMMATION +
-  plot_annotation(
-  title = "SAUL_SEN_MAYO_INFLAMMATION",
-  theme = theme(
-    plot.title = element_text(hjust = 0.5, size = 16 )
-  )
-)
-
-```
-```{r fig.width=12, fig.height=10}
- 
-
-# Adjust plot 1: keep legend, hide x-axis
-plt_senmayo_original <- plt_senmayo_original + 
-  theme(axis.title.x = element_blank(),
-        axis.text.x = element_blank(),
-        axis.ticks.x = element_blank())
-
-# Adjust plot 2: remove legend and x-axis
-plt_senmayo_woinfl <- plt_senmayo_woinfl + 
-  theme(axis.title.x = element_blank(),
-        axis.text.x = element_blank(),
-        axis.ticks.x = element_blank() )
- 
-
-# Combine plots vertically
-(plt_senmayo_gtex_custom <- ggarrange(
-  plt_senmayo_original,
-  plt_senmayo_woinfl,
-  plt_senmayo_infl,
-  ncol = 1,
-  align = "v",
-  heights=c(0.3,0.2,0.5)
-))
- 
-```
-
-```{r fig.width=12, fig.height=6}
-(plt_senmayo_invitro <- ggarrange(plt_senmayo_scores,plotNESlollipop_senmayo, nrow=2, heights=c(0.65,0.35), labels=c("A","B")))
-```
-
-```{r fig.width=12, fig.height= 16}
-(plt_senmayo_custom <- ggarrange(plt_senmayo_invitro,plt_senmayo_gtex_custom, labels=c("","C"), ncol=1, heights=c(0.4,0.6)))
-```
-```{r}
-ggplot2::ggsave("../Figures/Figs/SupFigure7_GTEx_SenMayo_Custom.png",
-                plt_senmayo_custom,
-                width = 12, height=16, bg = 'white')
-```
-
diff --git a/inst/Paper/preprocessing/Download_Alignment_QC/Metadata.csv b/inst/Paper/preprocessing/Download_Alignment_QC/Metadata.csv
deleted file mode 100644
index d8b259f..0000000
--- a/inst/Paper/preprocessing/Download_Alignment_QC/Metadata.csv
+++ /dev/null
@@ -1,558 +0,0 @@
-DatasetID;DatasetAccessionID;SampleAccessionID;CellType;DetailedCellType;Condition;SenescentType;Treatment;timepoints;Immortalised
-Laurent;GSE247831;SRR26840995;Fibroblast;BJ-5ta;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247832;SRR26840996;Fibroblast;BJ;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247833;SRR26840997;Fibroblast;BJ;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247834;SRR26840998;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Laurent;GSE247835;SRR26840999;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Laurent;GSE247836;SRR26841000;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Laurent;GSE247837;SRR26841001;Fibroblast;BJ;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247838;SRR26841002;Fibroblast;BJ;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247839;SRR26841003;Fibroblast;BJ;Quiescent;none;Leo1 KO;none;yes
-Laurent;GSE247840;SRR26841004;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Laurent;GSE247841;SRR26841005;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Laurent;GSE247842;SRR26841006;Fibroblast;BJ;Proliferative;none;WT;none;yes
-Skea;GSE235768;SRR25032574;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032575;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032576;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032577;Fibroblast;HFL1;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032578;Fibroblast;HFL1;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032579;Fibroblast;HFL1;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032580;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032581;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032582;Fibroblast;HFL1;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032583;Fibroblast;HFL1;Senescent;Telomere shortening;bortezomib;none;no
-Skea;GSE235768;SRR25032584;Fibroblast;HFL1;Senescent;Telomere shortening;bortezomib;none;no
-Skea;GSE235768;SRR25032585;Fibroblast;HFL1;Senescent;Telomere shortening;bortezomib;none;no
-Skea;GSE235768;SRR25032586;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032587;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032588;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032589;Fibroblast;BJ;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032590;Fibroblast;BJ;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032591;Fibroblast;BJ;Senescent;Oxidative stress;hydrogen peroxide;none;no
-Skea;GSE235768;SRR25032592;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032593;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032594;Fibroblast;BJ;Proliferative;none;Proliferation;none;no
-Skea;GSE235768;SRR25032595;Fibroblast;BJ;Senescent;Telomere shortening;bortezomib;none;no
-Skea;GSE235768;SRR25032596;Fibroblast;BJ;Senescent;Telomere shortening;bortezomib;none;no
-Skea;GSE235768;SRR25032597;Fibroblast;BJ;Senescent;Telomere shortening;bortezomib;none;no
-Wang;GSE214410;SRR21743822;Fibroblast;IMR90;Senescent;Oncogene;Hras + vector;none;no
-Wang;GSE214410;SRR21743823;Fibroblast;IMR90;Proliferative;none;Proliferation + WSTF OE;none;no
-Wang;GSE214410;SRR21743824;Fibroblast;IMR90;Proliferative;none;Proliferation + WSTF OE;none;no
-Wang;GSE214410;SRR21743817;Fibroblast;IMR90;Senescent;Oncogene;Hras + WSTF OE;none;no
-Wang;GSE214410;SRR21743818;Fibroblast;IMR90;Senescent;Oncogene;Hras + WSTF OE;none;no
-Wang;GSE214410;SRR21743819;Fibroblast;IMR90;Senescent;Oncogene;Hras + WSTF OE;none;no
-Wang;GSE214410;SRR21743820;Fibroblast;IMR90;Senescent;Oncogene;Hras + vector;none;no
-Wang;GSE214410;SRR21743821;Fibroblast;IMR90;Senescent;Oncogene;Hras + vector;none;no
-Wang;GSE214410;SRR21743825;Fibroblast;IMR90;Proliferative;none;Proliferation + WSTF OE;none;no
-Wang;GSE214410;SRR21743826;Fibroblast;IMR90;Proliferative;none;Proliferation + vector;none;no
-Wang;GSE214410;SRR21743827;Fibroblast;IMR90;Proliferative;none;Proliferation + vector;none;no
-Wang;GSE214410;SRR21743828;Fibroblast;IMR90;Proliferative;none;Proliferation + vector;none;no
-Lenain;GSE75643;SRR2970873;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;4d;yes
-Lenain;GSE75643;SRR2970874;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);4d;yes
-Lenain;GSE75643;SRR2970876;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;10d;yes
-Lenain;GSE75643;SRR2970877;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);10d;yes
-Lenain;GSE75643;SRR2970879;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;4d;yes
-Lenain;GSE75643;SRR2970880;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);4d;yes
-Lenain;GSE75643;SRR2970882;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;10d;yes
-Lenain;GSE75643;SRR2970883;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);10d;yes
-Lenain;GSE75643;SRR2970885;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;4d;yes
-Lenain;GSE75643;SRR2970886;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);4d;yes
-Lenain;GSE75643;SRR2970888;Fibroblast;Tig3;Proliferative;none;Proliferation + vector;10d;yes
-Lenain;GSE75643;SRR2970889;Fibroblast;Tig3;Senescent;Oncogene;pBabe-puro-BRAFE600 (OIS);10d;yes
-Lenain;GSE75643;SRR2970891;Fibroblast;Tig3;Proliferative;none;Proliferation;10d;yes
-Lenain;GSE75643;SRR2970892;Fibroblast;Tig3;Quiescent;none;0% FBS DMEM;10d;yes
-Lenain;GSE75643;SRR2970893;Fibroblast;Tig3;Proliferative;none;Proliferation;10d;yes
-Lenain;GSE75643;SRR2970894;Fibroblast;Tig3;Quiescent;none;0% FBS DMEM;10d;yes
-Lenain;GSE75643;SRR2970895;Fibroblast;Tig3;Proliferative;none;Proliferation;10d;yes
-Lenain;GSE75643;SRR2970896;Fibroblast;Tig3;Quiescent;none;0% FBS DMEM;10d;yes
-Purcell;GSE60340;SRR1544480;Fibroblast;MDAH041 LFS ;Proliferative;none;Immortal;none;yes
-Purcell;GSE60340;SRR1544481;Fibroblast;MDAH041 LFS ;Proliferative;none;Immortal;none;yes
-Purcell;GSE60340;SRR1544482;Fibroblast;MDAH041 LFS ;Proliferative;none;Immortal;none;yes
-Purcell;GSE60340;SRR1544483;Fibroblast;MDAH041 LFS ;Proliferative;none;low low passage (young);none;no
-Purcell;GSE60340;SRR1544484;Fibroblast;MDAH041 LFS ;Proliferative;none;low low passage (young);none;no
-Purcell;GSE60340;SRR1544485;Fibroblast;MDAH041 LFS ;Proliferative;none;low low passage (young);none;no
-Purcell;GSE60340;SRR1544486;Fibroblast;MDAH041 LFS ;Proliferative;none;low passage (old);none;no
-Purcell;GSE60340;SRR1544487;Fibroblast;MDAH041 LFS ;Proliferative;none;low passage (old);none;no
-Purcell;GSE60340;SRR1544488;Fibroblast;MDAH041 LFS ;Proliferative;none;low passage (old);none;no
-Purcell;GSE60340;SRR1544489;Fibroblast;MDAH041 LFS ;Senescent;Telomere shortening;growth arrest;none;no
-Purcell;GSE60340;SRR1544490;Fibroblast;MDAH041 LFS ;Senescent;Telomere shortening;growth arrest;none;no
-Purcell;GSE60340;SRR1544491;Fibroblast;MDAH041 LFS ;Senescent;Telomere shortening;growth arrest;none;no
-Purcell;GSE60340;SRR1544492;Fibroblast;MDAH041 LFS ;Senescent;DNA damage;Adriamycin;none;no
-Purcell;GSE60340;SRR1544493;Fibroblast;MDAH041 LFS ;Senescent;DNA damage;Adriamycin;none;no
-Purcell;GSE60340;SRR1544494;Fibroblast;MDAH041 LFS ;Senescent;DNA damage;Adriamycin;none;no
-Purcell;GSE60340;SRR1544495;Fibroblast;MDAH041 LFS ;Senescent;�DNA demethylation;5-aza;none;no
-Purcell;GSE60340;SRR1544496;Fibroblast;MDAH041 LFS ;Senescent;�DNA demethylation;5-aza;none;no
-Purcell;GSE60340;SRR1544497;Fibroblast;MDAH041 LFS ;Senescent;�DNA demethylation;5-aza;none;no
-Purcell;GSE60340;SRR1544498;Fibroblast;MDAH041 LFS ;Senescent;Oxidative stress;H2O2;none;no
-Purcell;GSE60340;SRR1544499;Fibroblast;MDAH041 LFS ;Senescent;Oxidative stress;H2O2;none;no
-Purcell;GSE60340;SRR1544500;Fibroblast;MDAH041 LFS ;Senescent;Oxidative stress;H2O2;none;no
-Purcell;GSE60340;SRR1544501;Fibroblast;MDAH041 LFS ;Quiescent;none;Serum-starved;none;no
-Purcell;GSE60340;SRR1544502;Fibroblast;MDAH041 LFS ;Quiescent;none;Serum-starved;none;no
-Purcell;GSE60340;SRR1544503;Fibroblast;MDAH041 LFS ;Quiescent;none;Serum-starved;none;no
-Aarts;GSE94928;SRR5259584;Fibroblast;IMR90;Proliferative;none;Proliferation;20d;no
-Aarts;GSE94928;SRR5259585;Fibroblast;IMR90;Proliferative;none;Proliferation;20d;no
-Aarts;GSE94928;SRR5259586;Fibroblast;IMR90;Proliferative;none;Proliferation;20d;no
-Aarts;GSE94928;SRR5259587;Fibroblast;IMR90;Proliferative;none;Proliferation;14d;no
-Aarts;GSE94928;SRR5259588;Fibroblast;IMR90;Proliferative;none;Proliferation;14d;no
-Aarts;GSE94928;SRR5259589;Fibroblast;IMR90;Proliferative;none;Proliferation;14d;no
-Aarts;GSE94928;SRR5259590;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;20d;no
-Aarts;GSE94928;SRR5259591;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;20d;no
-Aarts;GSE94928;SRR5259592;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;20d;no
-Aarts;GSE94928;SRR5259593;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;14d;no
-Aarts;GSE94928;SRR5259594;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;14d;no
-Aarts;GSE94928;SRR5259595;Fibroblast;IMR90;Senescent;OSKM factors;OSKM factors;14d;no
-McHugh;GSE224070;SRR23272464;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin;none;no
-McHugh;GSE224070;SRR23272467;Fibroblast;IMR90;Proliferative;none;Proliferation;none;no
-McHugh;GSE224070;SRR23272470;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin;none;no
-McHugh;GSE224070;SRR23272473;Fibroblast;IMR90;Proliferative;none;Proliferation;none;no
-McHugh;GSE224070;SRR23272476;Fibroblast;IMR90;Senescent;Oncogene;ER:RAS + tamoxyfen (4OHT);none;no
-McHugh;GSE224070;SRR23272479;Fibroblast;IMR90;Proliferative;none;ER:RAS + DMSO;none;no
-McHugh;GSE224070;SRR23272482;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin;none;no
-McHugh;GSE224070;SRR23272485;Fibroblast;IMR90;Proliferative;none;Proliferation;none;no
-McHugh;GSE224070;SRR23272488;Fibroblast;IMR90;Senescent;Oncogene;ER:RAS + tamoxyfen (4OHT);none;no
-McHugh;GSE224070;SRR23272491;Fibroblast;IMR90;Proliferative;none;ER:RAS + DMSO;none;no
-McHugh;GSE224070;SRR23272494;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin;none;no
-McHugh;GSE224070;SRR23272497;Fibroblast;IMR90;Proliferative;none;Proliferation;none;no
-McHugh;GSE224070;SRR23272500;Fibroblast;IMR90;Senescent;Oncogene;ER:RAS + tamoxyfen (4OHT);none;no
-McHugh;GSE224070;SRR23272503;Fibroblast;IMR90;Proliferative;none;ER:RAS + DMSO;none;no
-Marthandan2016;GSE63577;SRR1660534;Fibroblast;MCR5;Senescent;Telomere shortening;PD72 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660535;Fibroblast;MCR5;Senescent;Telomere shortening;PD72 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660536;Fibroblast;MCR5;Senescent;Telomere shortening;PD72 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660537;Fibroblast;BJ;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660538;Fibroblast;BJ;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660539;Fibroblast;BJ;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660540;Fibroblast;BJ;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660541;Fibroblast;BJ;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660542;Fibroblast;BJ;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660543;Fibroblast;HFF;Proliferative;none;PD16;none;no
-Marthandan2016;GSE63577;SRR1660544;Fibroblast;HFF;Proliferative;none;PD16;none;no
-Marthandan2016;GSE63577;SRR1660545;Fibroblast;HFF;Proliferative;none;PD16;none;no
-Marthandan2016;GSE63577;SRR1660546;Fibroblast;HFF;Senescent;Telomere shortening;PD74 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660547;Fibroblast;HFF;Senescent;Telomere shortening;PD74 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660548;Fibroblast;HFF;Senescent;Telomere shortening;PD74 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660549;Fibroblast;IMR90;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660550;Fibroblast;IMR90;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660551;Fibroblast;IMR90;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660552;Fibroblast;IMR90;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660553;Fibroblast;IMR90;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660554;Fibroblast;IMR90;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660555;Fibroblast;WI38;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660556;Fibroblast;WI38;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660557;Fibroblast;WI38;Proliferative;none;young;none;no
-Marthandan2016;GSE63577;SRR1660558;Fibroblast;WI38;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660559;Fibroblast;WI38;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR1660560;Fibroblast;WI38;Senescent;Telomere shortening;old (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR2751110;Fibroblast;HFF;Proliferative;none;PD26;none;no
-Marthandan2016;GSE63577;SRR2751111;Fibroblast;HFF;Proliferative;none;PD26;none;no
-Marthandan2016;GSE63577;SRR2751112;Fibroblast;HFF;Proliferative;none;PD26;none;no
-Marthandan2016;GSE63577;SRR2751116;Fibroblast;HFF;Senescent;Telomere shortening;PD64 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR2751117;Fibroblast;HFF;Senescent;Telomere shortening;PD64 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR2751118;Fibroblast;HFF;Senescent;Telomere shortening;PD64 (Replicative senescence);none;no
-Marthandan2016;GSE63577;SRR2751119;Fibroblast;MCR5;Proliferative;none;PD42;none;no
-Marthandan2016;GSE63577;SRR2751120;Fibroblast;MCR5;Proliferative;none;PD42;none;no
-Marthandan2016;GSE63577;SRR2751121;Fibroblast;MCR5;Proliferative;none;PD42;none;no
-Marthandan2016;GSE63577;SRR2751122;Fibroblast;MCR5;Proliferative;none;PD52;none;no
-Marthandan2016;GSE63577;SRR2751123;Fibroblast;MCR5;Proliferative;none;PD52;none;no
-Marthandan2016;GSE63577;SRR2751124;Fibroblast;MCR5;Proliferative;none;PD52;none;no
-Suda;GSE222400;SRR23018053;Fibroblast;WI38;Proliferative;none;DXR-treated;1d;no
-Suda;GSE222400;SRR23018054;Fibroblast;WI38;Proliferative;none;DXR-treated;1d;no
-Suda;GSE222400;SRR23018055;Fibroblast;WI38;Proliferative;none;DXR-treated;0d;no
-Suda;GSE222400;SRR23018056;Fibroblast;WI38;Proliferative;none;DXR-treated;0d;no
-Suda;GSE222400;SRR23018057;Fibroblast;WI38;Senescent;Calcium influx;KCl-treated;16d;no
-Suda;GSE222400;SRR23018058;Fibroblast;WI38;Senescent;Calcium influx;KCl-treated;16d;
-Suda;GSE222400;SRR23018069;Fibroblast;WI38;Proliferative;none;KCl-treated;1d;no
-Suda;GSE222400;SRR23018070;Fibroblast;WI38;Proliferative;none;KCl-treated;1d;no
-Suda;GSE222400;SRR23018071;Fibroblast;WI38;Proliferative;none;KCl-treated;0d;no
-Suda;GSE222400;SRR23018072;Fibroblast;WI38;Proliferative;none;KCl-treated;0d;no
-Suda;GSE222400;SRR23018073;Fibroblast;WI38;Senescent;DNA damage;DXR-treated;16d;no
-Suda;GSE222400;SRR23018074;Fibroblast;WI38;Senescent;DNA damage;DXR-treated;16d;no
-Suda;GSE222400;SRR23018085;Fibroblast;WI38;Senescent;Plasma membrane dysruption;SDS-treated;16d;no
-Suda;GSE222400;SRR23018086;Fibroblast;WI38;Senescent;Plasma membrane dysruption;SDS-treated;16d;no
-Suda;GSE222400;SRR23018097;Fibroblast;WI38;Proliferative;Plasma membrane dysruption;SDS-treated;1d;no
-Suda;GSE222400;SRR23018098;Fibroblast;WI38;Proliferative;Plasma membrane dysruption;SDS-treated;1d;no
-Suda;GSE222400;SRR23018099;Fibroblast;WI38;Proliferative;Plasma membrane dysruption;SDS-treated;0d;no
-Suda;GSE222400;SRR23018100;Fibroblast;WI38;Proliferative;Plasma membrane dysruption;SDS-treated;0d;no
-Suda;GSE222400;SRR23018101;Fibroblast;WI38;Senescent;Telomere shortening;replicative senescence;none;no
-Suda;GSE222400;SRR23018102;Fibroblast;WI38;Senescent;Telomere shortening;replicative senescence;none;no
-Suda;GSE222400;SRR23018103;Fibroblast;WI38;Proliferative;none;Proliferating;none;no
-Suda;GSE222400;SRR23018104;Fibroblast;WI38;Proliferative;none;Proliferating;none;no
-Tasdemir;GSE74324;SRR2932856;Fibroblast;IMR90;Proliferative;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932857;Fibroblast;IMR90;Proliferative;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932858;Fibroblast;IMR90;Proliferative;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932859;Fibroblast;IMR90;Quiescent;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932860;Fibroblast;IMR90;Quiescent;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932861;Fibroblast;IMR90;Quiescent;none;cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932862;Fibroblast;IMR90;Senescent;Oncogene;cotransduced with pWZL HRASV12 and shRen;none;no
-Tasdemir;GSE74324;SRR2932863;Fibroblast;IMR90;Senescent;Oncogene;cotransduced with pWZL HRASV12 and shRen;none;no
-Tasdemir;GSE74324;SRR2932864;Fibroblast;IMR90;Senescent;Oncogene;cotransduced with pWZL HRASV12 and shRen;none;no
-Tasdemir;GSE74324;SRR2932910;Fibroblast;IMR90;Proliferative;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932911;Fibroblast;IMR90;Proliferative;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932912;Fibroblast;IMR90;Proliferative;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932913;Fibroblast;IMR90;Quiescent;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932914;Fibroblast;IMR90;Quiescent;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932915;Fibroblast;IMR90;Quiescent;none;DMSO 48h (vector for JQ1) + cotransduced with pWZL-Hygro and shRen;none;no
-Tasdemir;GSE74324;SRR2932916;Fibroblast;IMR90;Senescent;Oncogene;DMSO 48h (vector for JQ1) + cotransduced with pWZL HRASV12 and shRen;none;no
-Tasdemir;GSE74324;SRR2932917;Fibroblast;IMR90;Senescent;Oncogene;DMSO 48h (vector for JQ1) + cotransduced with pWZL HRASV12 and shRen;none;no
-Tasdemir;GSE74324;SRR2932918;Fibroblast;IMR90;Senescent;Oncogene;DMSO 48h (vector for JQ1) + cotransduced with pWZL HRASV12 and shRen;none;no
-Savic;GSE230181;SRR24952348;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952349;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952350;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952351;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952352;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952353;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952354;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952355;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952356;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952357;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952358;Fibroblast;IMR90;Senescent;Oxidative stress;Oligomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952359;Fibroblast;IMR90;Senescent;Oxidative stress;Oligomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952360;Fibroblast;IMR90;Senescent;Oxidative stress;Oligomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952361;Fibroblast;IMR90;Senescent;Oxidative stress;Oligomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952362;Fibroblast;IMR90;Senescent;Oxidative stress;Oligomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952363;Fibroblast;IMR90;Senescent;Oxidative stress;Antimycin A for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952364;Fibroblast;IMR90;Senescent;Oxidative stress;Antimycin A for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952365;Fibroblast;IMR90;Senescent;Oxidative stress;Antimycin A for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952366;Fibroblast;IMR90;Senescent;Oxidative stress;Antimycin A for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952367;Fibroblast;IMR90;Senescent;Oxidative stress;Antimycin A for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952368;Fibroblast;IMR90;Senescent;Oxidative stress;Rotenone for 14 days;none;no
-Savic;GSE230181;SRR24952369;Fibroblast;IMR90;Senescent;Oxidative stress;Rotenone for 14 days;none;no
-Savic;GSE230181;SRR24952370;Fibroblast;IMR90;Senescent;Oxidative stress;Rotenone for 14 days;none;no
-Savic;GSE230181;SRR24952371;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952372;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952373;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952374;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952375;Fibroblast;IMR90;Senescent;DNA damage;Bleomycin for 6 days on + 8 days off;none;no
-Savic;GSE230181;SRR24952376;Fibroblast;IMR90;Senescent;oncogene;stably transduced with inducible RasV12 + DOX for 12 days;none;yes
-Savic;GSE230181;SRR24952377;Fibroblast;IMR90;Senescent;oncogene;stably transduced with inducible RasV12 + DOX for 12 days;none;yes
-Savic;GSE230181;SRR24952378;Fibroblast;IMR90;Senescent;oncogene;stably transduced with inducible RasV12 + DOX for 12 days;none;yes
-Savic;GSE230181;SRR24952379;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952395;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952396;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952397;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952398;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952399;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952400;Fibroblast;IMR90;Proliferative;none;Proliferative (control for RasV12);none;yes
-Savic;GSE230181;SRR24952401;Fibroblast;IMR90;Proliferative;none;Proliferative (control for RasV12);none;yes
-Savic;GSE230181;SRR24952402;Fibroblast;IMR90;Proliferative;none;Proliferative (control for RasV12);none;yes
-Savic;GSE230181;SRR24952403;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952404;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952405;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952406;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952407;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952408;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952409;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952410;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952411;Fibroblast;IMR90;Proliferative;none;Proliferative;none;no
-Savic;GSE230181;SRR24952412;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952413;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952414;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952415;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952416;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952417;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952418;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952419;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952420;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952421;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952422;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952423;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952424;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952425;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952426;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952427;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952428;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952429;Fibroblast;IMR90;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Chan;GSE175533;SRR14646263;Fibroblast;WI38;Proliferative;none;hTERT_TP1_A;none;yes
-Chan;GSE175533;SRR14646264;Fibroblast;WI38;Proliferative;none;hTERT_TP1_B;none;yes
-Chan;GSE175533;SRR14646265;Fibroblast;WI38;Proliferative;none;hTERT_TP1_C;none;yes
-Chan;GSE175533;SRR14646266;Fibroblast;WI38;Proliferative;none;hTERT_TP2_A;none;yes
-Chan;GSE175533;SRR14646267;Fibroblast;WI38;Proliferative;none;hTERT_TP2_B;none;yes
-Chan;GSE175533;SRR14646268;Fibroblast;WI38;Proliferative;none;hTERT_TP2_C;none;yes
-Chan;GSE175533;SRR14646269;Fibroblast;WI38;Proliferative;none;hTERT_TP4_A;none;yes
-Chan;GSE175533;SRR14646270;Fibroblast;WI38;Proliferative;none;hTERT_TP4_B;none;yes
-Chan;GSE175533;SRR14646271;Fibroblast;WI38;Proliferative;none;hTERT_TP4_C;none;yes
-Chan;GSE175533;SRR14646272;Fibroblast;WI38;Proliferative;none;hTERT_TP5_A;none;yes
-Chan;GSE175533;SRR14646273;Fibroblast;WI38;Proliferative;none;hTERT_TP5_B;none;yes
-Chan;GSE175533;SRR14646274;Fibroblast;WI38;Proliferative;none;hTERT_TP5_C;none;yes
-Chan;GSE175533;SRR14646275;Fibroblast;WI38;Proliferative;none;hTERT_TP6_A;none;yes
-Chan;GSE175533;SRR14646276;Fibroblast;WI38;Proliferative;none;hTERT_TP6_B;none;yes
-Chan;GSE175533;SRR14646277;Fibroblast;WI38;Proliferative;none;hTERT_TP6_C;none;yes
-Chan;GSE175533;SRR14646278;Fibroblast;WI38;Proliferative;none;hTERT_TP7_A;none;yes
-Chan;GSE175533;SRR14646279;Fibroblast;WI38;Proliferative;none;hTERT_TP7_B;none;yes
-Chan;GSE175533;SRR14646292;Fibroblast;WI38;Proliferative;none;hTERT_TP7_C;none;yes
-Chan;GSE175533;SRR14646293;Fibroblast;WI38;Proliferative;none;RS_PDL20_TP1_A;PDL20;no
-Chan;GSE175533;SRR14646294;Fibroblast;WI38;Proliferative;none;RS_PDL20_TP1_B;PDL20;no
-Chan;GSE175533;SRR14646295;Fibroblast;WI38;Proliferative;none;RS_PDL20_TP1_C;PDL20;no
-Chan;GSE175533;SRR14646296;Fibroblast;WI38;Proliferative;none;RS_PDL25_TP2_A;PDL25;no
-Chan;GSE175533;SRR14646297;Fibroblast;WI38;Proliferative;none;RS_PDL25_TP2_B;PDL25;no
-Chan;GSE175533;SRR14646298;Fibroblast;WI38;Proliferative;none;RS_PDL25_TP2_C;PDL25;no
-Chan;GSE175533;SRR14646317;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL52_TP9_A;PDL52;no
-Chan;GSE175533;SRR14646318;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL52_TP9_B;PDL52;no
-Chan;GSE175533;SRR14646319;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL52_TP9_C;PDL52;no
-Chan;GSE175533;SRR14646320;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL53_TP10_A;PDL53;no
-Chan;GSE175533;SRR14646321;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL53_TP10_B;PDL53;no
-Chan;GSE175533;SRR14646322;Fibroblast;WI38;Senescent;Telomere shortening;RS_PDL53_TP10_C;PDL53;no
-Chan;GSE175533;SRR14646353;Fibroblast;WI38;Proliferative;none;RIS_d2_no_xray_A;d2;no
-Chan;GSE175533;SRR14646354;Fibroblast;WI38;Proliferative;none;RIS_d2_no_xray_B;d2;no
-Chan;GSE175533;SRR14646355;Fibroblast;WI38;Proliferative;none;RIS_d2_no_xray_C;d2;no
-Chan;GSE175533;SRR14646368;Fibroblast;WI38;Senescent;Radiation;RIS_d9_xray_A;d9;no
-Chan;GSE175533;SRR14646369;Fibroblast;WI38;Senescent;Radiation;RIS_d9_xray_B;d9;no
-Chan;GSE175533;SRR14646370;Fibroblast;WI38;Senescent;Radiation;RIS_d9_xray_C;d9;no
-HernandezSegura;E-MTAB-5403;ERR1805188;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805189;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805190;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805191;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805196;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805199;Fibroblast;HCA2;Senescent;Radiation;10Gy;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805200;Fibroblast;HCA2;Senescent;Radiation;Proliferation;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805201;Fibroblast;HCA2;Senescent;Radiation;Proliferation;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805202;Fibroblast;HCA2;Senescent;Radiation;Proliferation;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805203;Fibroblast;HCA2;Senescent;Radiation;Proliferation;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805204;Fibroblast;HCA2;Senescent;Radiation;Proliferation;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805205;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805206;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805207;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805208;Fibroblast;HCA2;Quiescent;none;48 hr in DMEM supplemented with 0.2% FBS.;none;no
-HernandezSegura;E-MTAB-5403;ERR1805209;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805210;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805211;Fibroblast;HCA2;Proliferative;none;Proliferation;none;no
-Marthandan2015;GSE64553;SRR1736333;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD22;no
-Marthandan2015;GSE64553;SRR1736334;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD22;no
-Marthandan2015;GSE64554;SRR1736335;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD22;no
-Marthandan2015;GSE64555;SRR1736336;Fibroblast;HFF;Proliferative;none;none;PD22;no
-Marthandan2015;GSE64556;SRR1736337;Fibroblast;HFF;Proliferative;none;none;PD22;no
-Marthandan2015;GSE64557;SRR1736338;Fibroblast;HFF;Proliferative;none;none;PD22;no
-Marthandan2015;GSE64558;SRR1736339;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD26;no
-Marthandan2015;GSE64559;SRR1736340;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD26;no
-Marthandan2015;GSE64560;SRR1736341;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD26;no
-Marthandan2015;GSE64561;SRR1736342;Fibroblast;HFF;Proliferative;none;none;PD26;no
-Marthandan2015;GSE64562;SRR1736343;Fibroblast;HFF;Proliferative;none;none;PD26;no
-Marthandan2015;GSE64563;SRR1736344;Fibroblast;HFF;Proliferative;none;none;PD26;no
-Marthandan2015;GSE64564;SRR1736345;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD30;no
-Marthandan2015;GSE64565;SRR1736346;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD30;no
-Marthandan2015;GSE64566;SRR1736347;Fibroblast;HFF;Proliferative;none;100 nM rotenone;PD30;no
-Marthandan2015;GSE64567;SRR1736348;Fibroblast;HFF;Proliferative;none;none;PD30;no
-Marthandan2015;GSE64568;SRR1736349;Fibroblast;HFF;Proliferative;none;none;PD30;no
-Marthandan2015;GSE64569;SRR1736350;Fibroblast;HFF;Proliferative;none;none;PD30;no
-Marthandan2015;GSE64570;SRR1736357;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD58;no
-Marthandan2015;GSE64571;SRR1736358;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD58;no
-Marthandan2015;GSE64572;SRR1736359;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD58;no
-Marthandan2015;GSE64573;SRR1736360;Fibroblast;HFF;Senescent;Telomere shortening;none;PD58;no
-Marthandan2015;GSE64574;SRR1736361;Fibroblast;HFF;Senescent;Telomere shortening;none;PD58;no
-Marthandan2015;GSE64575;SRR1736362;Fibroblast;HFF;Senescent;Telomere shortening;none;PD58;no
-Marthandan2015;GSE64576;SRR1736363;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD74;no
-Marthandan2015;GSE64577;SRR1736364;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD74;no
-Marthandan2015;GSE64578;SRR1736365;Fibroblast;HFF;Senescent;Telomere shortening;100 nM rotenone;PD74;no
-Marthandan2015;GSE64579;SRR1736366;Fibroblast;HFF;Senescent;Telomere shortening;none;PD74;no
-Marthandan2015;GSE64580;SRR1736367;Fibroblast;HFF;Senescent;Telomere shortening;none;PD74;no
-Marthandan2015;GSE64581;SRR1736368;Fibroblast;HFF;Senescent;Telomere shortening;none;PD74;no
-Casella;GSE130727;SRR9016157;Fibroblast;IMR90;Proliferative;none;Proliferation;PD15;no
-Casella;GSE130727;SRR9016158;Fibroblast;IMR90;Proliferative;none;Proliferation;PD15;no
-Casella;GSE130727;SRR9016159;Fibroblast;IMR90;Senescent;Radiation;10 Gy of ionizing radiation exposure;PD15;no
-Casella;GSE130727;SRR9016160;Fibroblast;IMR90;Senescent;Radiation;10 Gy of ionizing radiation exposure;PD15;no
-Casella;GSE130727;SRR9016161;Fibroblast;IMR90;Senescent;Telomere shortening;Replicative exhaustion;PD52;no
-Casella;GSE130727;SRR9016162;Fibroblast;IMR90;Senescent;Telomere shortening;Replicative exhaustion;PD52;no
-Casella;GSE130727;SRR9016163;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016164;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016165;Fibroblast;WI38;Senescent;DNA damage;Treated with 2 �g/mL Dox (Sigma) for 24 h;PD25;no
-Casella;GSE130727;SRR9016166;Fibroblast;WI38;Senescent;DNA damage;Treated with 2 �g/mL Dox (Sigma) for 24 h;PD25;no
-Casella;GSE130727;SRR9016167;Fibroblast;WI38;Proliferative;none;Treatment with empty vector at 10 MOI\, and \, and puromycin (1 �g/mL);PD25;no
-Casella;GSE130727;SRR9016168;Fibroblast;WI38;Proliferative;none;Treatment with empty vector at 10 MOI\, and \, and puromycin (1 �g/mL);PD25;no
-Casella;GSE130727;SRR9016169;Fibroblast;WI38;Senescent;Oncogene;Treatment with lentivirus expressing HRASG12V at 10 MOI\, and puromycin (1 �g/mL);PD25;no
-Casella;GSE130727;SRR9016170;Fibroblast;WI38;Senescent;Oncogene;Treatment with lentivirus expressing HRASG12V at 10 MOI\, and puromycin (1 �g/mL);PD25;no
-Casella;GSE130727;SRR9016171;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016172;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016173;Fibroblast;WI38;Senescent;DNA damage;Treated with 2 �g/mL Dox (Sigma) for 24 h;PD25;no
-Casella;GSE130727;SRR9016174;Fibroblast;WI38;Senescent;DNA damage;Treated with 2 �g/mL Dox (Sigma) for 24 h;PD25;no
-Casella;GSE130727;SRR9016175;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016176;Fibroblast;WI38;Proliferative;none;Control;PD25;no
-Casella;GSE130727;SRR9016177;Fibroblast;WI38;Senescent;Radiation;10 Gy of ionizing radiation exposure;PD25;no
-Casella;GSE130727;SRR9016178;Fibroblast;WI38;Senescent;Radiation;10 Gy of ionizing radiation exposure;PD25;no
-Casella;GSE130727;SRR9016179;Fibroblast;WI38;Senescent;Telomere shortening;Replicative exhaustion;PD53-55;no
-Casella;GSE130727;SRR9016180;Fibroblast;WI38;Senescent;Telomere shortening;Replicative exhaustion;PD53-55;no
-Casella;GSE130727;SRR9016181;Fibroblast;WI38;Proliferative;none;Proliferation;PD25;no
-Casella;GSE130727;SRR9016182;Fibroblast;WI38;Proliferative;none;Proliferation;PD25;no
-Mangelinck;E-MTAB-9714;ERR4781442;Fibroblast;WI38;Senescent;DNA damage;sh3H2AF + etoposide;none;yes
-Mangelinck;E-MTAB-9714;ERR4781443;Fibroblast;WI38;Senescent;DNA damage;sh3H2AF + etoposide;none;yes
-Mangelinck;E-MTAB-9714;ERR4781444;Fibroblast;WI38;Senescent;DNA damage;sh3H2AF + etoposide;none;yes
-Mangelinck;E-MTAB-9714;ERR4781445;Fibroblast;WI38;Proliferative;none;sh3H2AFJ ;none;yes
-Mangelinck;E-MTAB-9714;ERR4781446;Fibroblast;WI38;Proliferative;none;sh3H2AFJ ;none;yes
-Mangelinck;E-MTAB-9714;ERR4781447;Fibroblast;WI38;Proliferative;none;sh3H2AFJ ;none;yes
-Mangelinck;E-MTAB-9714;ERR4781448;Fibroblast;WI38;Proliferative;none;shNT;none;yes
-Mangelinck;E-MTAB-9714;ERR4781449;Fibroblast;WI38;Proliferative;none;shNT;none;yes
-Mangelinck;E-MTAB-9714;ERR4781450;Fibroblast;WI38;Proliferative;none;shNT;none;yes
-Mangelinck;E-MTAB-9714;ERR4781451;Fibroblast;WI38;Senescent;DNA damage;shNT + etoposide;none;yes
-Mangelinck;E-MTAB-9714;ERR4781452;Fibroblast;WI38;Senescent;DNA damage;shNT + etoposide;none;yes
-Mangelinck;E-MTAB-9714;ERR4781453;Fibroblast;WI38;Senescent;DNA damage;shNT + etoposide;none;yes
-Numa;200250224;SRR27215555;Endothelial;Corneal Endothelial Cell;Proliferative;none;none;none;no
-Numa;200250224;SRR27215556;Endothelial;Corneal Endothelial Cell;Proliferative;none;none;none;no
-Numa;200250224;SRR27215557;Endothelial;Corneal Endothelial Cell;Proliferative;none;none;none;no
-Numa;200250224;SRR27215558;Endothelial;Corneal Endothelial Cell;Proliferative;none;none;none;no
-Numa;200250224;SRR27215547;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;UVA exposure;none;no
-Numa;200250224;SRR27215548;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;UVA exposure;none;no
-Numa;200250224;SRR27215549;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;UVA exposure;none;no
-Numa;200250224;SRR27215550;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;UVA exposure;none;no
-Numa;200250224;SRR27215551;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;irradiation;none;no
-Numa;200250224;SRR27215552;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;irradiation;none;no
-Numa;200250224;SRR27215553;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;irradiation;none;no
-Numa;200250224;SRR27215554;Endothelial;Corneal Endothelial Cell;Senescent;Radiation;irradiation;none;no
-Heidari;200262117;SRR28389055;Epithelial;Prostate Epithelial-like;Senescent;Hormone;DEC1-KD-SAL-1;none;no
-Heidari;200262117;SRR28389056;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;DEC1-KD-DMSO-3;none;no
-Heidari;200262117;SRR28389057;Epithelial;Prostate Epithelial-like;Senescent;Hormone;Control-SAL-2;none;no
-Heidari;200262117;SRR28389058;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;DEC1-KD-DMSO-1;none;no
-Heidari;200262117;SRR28389059;Epithelial;Prostate Epithelial-like;Senescent;Hormone;Control-SAL-3;none;no
-Heidari;200262117;SRR28389060;Epithelial;Prostate Epithelial-like;Senescent;Hormone;Control-SAL-2;none;no
-Heidari;200262117;SRR28389061;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;Control-SAL-1;none;no
-Heidari;200262117;SRR28389062;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;Control-DMSO-3;none;no
-Heidari;200262117;SRR28389063;Epithelial;Prostate Epithelial-like;Senescent;Hormone;DEC1-KD-SAL-3;none;no
-Heidari;200262117;SRR28389064;Epithelial;Prostate Epithelial-like;Senescent;Hormone;DEC1-KD-SAL-2;none;no
-Heidari;200262117;SRR28389065;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;Control-DMSO-2;none;no
-Heidari;200262117;SRR28389066;Epithelial;Prostate Epithelial-like;Proliferative;Hormone;Control-DMSO-1;none;no
-Admasu;200196724;SRR18022193;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022194;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022196;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022198;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022199;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022200;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022201;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022202;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022203;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022205;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022206;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022207;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022208;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022209;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022212;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022213;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022215;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022216;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022217;Endothelial;Artery Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022218;Endothelial;Artery Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022220;Endothelial;Artery Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022221;Endothelial;Artery Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022224;Endothelial;Artery Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022225;Endothelial;Artery Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022226;Endothelial;Artery Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022227;Endothelial;Artery Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022228;Endothelial;Artery Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022195;Endothelial;Vein Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022197;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022204;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022210;Endothelial;Vein Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022211;Endothelial;Vein Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Admasu;200196724;SRR18022214;Endothelial;Vein Endothelial cells;Proliferative;none;no treatment;none;no
-Admasu;200196724;SRR18022219;Endothelial;Artery Endothelial cells;Senescent;Conditioned Medium;Senescent conditioned medium;none;no
-Admasu;200196724;SRR18022222;Endothelial;Artery Endothelial cells;Proliferative;none;Non senescent conditioned medium;none;no
-Admasu;200196724;SRR18022223;Endothelial;Artery Endothelial cells;Senescent;DNA damage;Doxorubicin;none;no
-Urata;200206677;SRR19783652;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;Control;none;no
-Urata;200206677;SRR19783654;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;Control;none;no
-Urata;200206677;SRR19783655;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;Control;none;no
-Urata;200206677;SRR19783656;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;Control;none;no
-Urata;200206677;SRR19783657;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;Control;none;no
-Urata;200206677;SRR19783659;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;Control;none;no
-Urata;200206677;SRR19783660;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;SARS-CoV-2 infection;none;no
-Urata;200206677;SRR19783661;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;SARS-CoV-2 infection;none;no
-Urata;200206677;SRR19783662;Endothelial;Umbilical Vein Endothelial cells;Senescent;Telomere shortening;SARS-CoV-2 infection;none;no
-Urata;200206677;SRR19783663;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;SARS-CoV-2 infection;none;no
-Urata;200206677;SRR19783665;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;SARS-CoV-2 infection;none;no
-Urata;200206677;SRR19783666;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;SARS-CoV-2 infection;none;no
-Wang2023;200217718;SRR22254572;Neuronal;Neuronal Stem Cell;Proliferative;none;NSCs from SVZ;none;no
-Wang2023;200217718;SRR22254573;Neuronal;Neuronal Stem Cell;Proliferative;none;NSCs from SVZ;none;no
-Wang2023;200217718;SRR22254574;Neuronal;Neuronal Stem Cell;Proliferative;none;NSCs from SVZ;none;no
-Wang2023;200217718;SRR22254575;Neuronal;Neuronal Stem Cell;Proliferative;none;NSCs from SVZ;none;no
-Wang2023;200217718;SRR22254576;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254577;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254578;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254579;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254580;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254581;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Wang2023;200217718;SRR22254582;Neuronal;Neuronal Stem Cell;Senescent;Lipid Accumulation;htNSCs from 3VF adjacent to ACP;none;no
-Fu;200110268;SRR6680309;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;WT;none;no
-Fu;200110268;SRR6680310;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;WT;none;no
-Fu;200110268;SRR6680311;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;WT;none;no
-Fu;200110268;SRR6680312;Mesenchymal;Mesenchymal Stem Cell;Senescent;YAP KO;YAP KO;none;no
-Fu;200110268;SRR6680313;Mesenchymal;Mesenchymal Stem Cell;Senescent;YAP KO;YAP KO;none;no
-Fu;200110268;SRR6680314;Mesenchymal;Mesenchymal Stem Cell;Senescent;YAP KO;YAP KO;none;no
-Fu;200110268;SRR6680315;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;TAZ KO;none;no
-Fu;200110268;SRR6680316;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;TAZ KO;none;no
-Fu;200110268;SRR6680317;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;TAZ KO;none;no
-Fu;200110268;SRR6680321;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;non-targeting control;none;no
-Fu;200110268;SRR6680322;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;non-targeting control;none;no
-Fu;200110268;SRR6680323;Mesenchymal;Mesenchymal Stem Cell;Proliferative;none;non-targeting control;none;no
-Tanke;200213323;SRR21563935;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;high density;none;no
-Tanke;200213323;SRR21563936;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;high density;none;no
-Tanke;200213323;SRR21563937;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;high density;none;no
-Tanke;200213323;SRR21563938;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;low density;none;no
-Tanke;200213323;SRR21563939;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;low density;none;no
-Tanke;200213323;SRR21563940;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;low density;none;no
-Tanke;200213323;SRR21563944;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;serum starve;none;no
-Tanke;200213323;SRR21563945;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;serum starve;none;no
-Tanke;200213323;SRR21563946;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;serum starve;none;no
-Tanke;200213323;SRR21563950;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;normal serum;none;no
-Tanke;200213323;SRR21563951;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;normal serum;none;no
-Tanke;200213323;SRR21563952;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;normal serum;none;no
-Tanke;200213323;SRR21563959;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;72h shear stress at 15 dyn/cm2;none;no
-Tanke;200213323;SRR21563960;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;72h shear stress at 15 dyn/cm2;none;no
-Tanke;200213323;SRR21563961;Endothelial;Umbilical Vein Endothelial cells;Quiescent;none;72h shear stress at 15 dyn/cm2;none;no
-Tanke;200213323;SRR21563962;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;static condition;none;no
-Tanke;200213323;SRR21563963;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;static condition;none;no
-Tanke;200213323;SRR21563964;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;static condition;none;no
-Bawens;E-MTAB-10969;ERR6804206;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;3d;no
-Bawens;E-MTAB-10969;ERR6804207;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;5d;no
-Bawens;E-MTAB-10969;ERR6804208;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;3d;no
-Bawens;E-MTAB-10969;ERR6804209;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;5d;no
-Bawens;E-MTAB-10969;ERR6804210;Keratinocyte;skin of prepuce of penis;Senescent;Telomere shortening;passage 12 to 14;2m;no
-Bawens;E-MTAB-10969;ERR6804211;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;0;no
-Bawens;E-MTAB-10969;ERR6804212;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;3d;no
-Bawens;E-MTAB-10969;ERR6804213;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;5d;no
-Bawens;E-MTAB-10969;ERR6804214;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;3d;no
-Bawens;E-MTAB-10969;ERR6804215;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;5d;no
-Bawens;E-MTAB-10969;ERR6804216;Keratinocyte;skin of prepuce of penis;Senescent;Telomere shortening;passage 12 to 14;2m;no
-Bawens;E-MTAB-10969;ERR6804217;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;0;no
-Bawens;E-MTAB-10969;ERR6804218;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;3d;no
-Bawens;E-MTAB-10969;ERR6804219;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;5d;no
-Bawens;E-MTAB-10969;ERR6804220;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;3d;no
-Bawens;E-MTAB-10969;ERR6804221;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;5d;no
-Bawens;E-MTAB-10969;ERR6804222;Keratinocyte;skin of prepuce of penis;Senescent;Telomere shortening;passage 12 to 14;2m;no
-Bawens;E-MTAB-10969;ERR6804223;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;0;no
-Bawens;E-MTAB-10969;ERR6804224;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;3d;no
-Bawens;E-MTAB-10969;ERR6804225;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;5d;no
-Bawens;E-MTAB-10969;ERR6804226;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;3d;no
-Bawens;E-MTAB-10969;ERR6804227;Keratinocyte;skin of prepuce of penis;Senescent;Radiation;UVB at 311 nm;5d;no
-Bawens;E-MTAB-10969;ERR6804228;Keratinocyte;skin of prepuce of penis;Senescent;Telomere shortening;passage 12 to 14;2m;no
-Bawens;E-MTAB-10969;ERR6804229;Keratinocyte;skin of prepuce of penis;Proliferative;none;none;0;no
-Savic;GSE230181;SRR24952339;Endothelial;Lung Microvascular Endothelial Cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952340;Endothelial;Lung Microvascular Endothelial Cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952341;Endothelial;Lung Microvascular Endothelial Cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952342;Endothelial;Lung Microvascular Endothelial Cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952343;Endothelial;Lung Microvascular Endothelial Cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952344;Endothelial;Lung Microvascular Endothelial Cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952345;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952346;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952347;Endothelial;Umbilical Vein Endothelial cells;Proliferative;none;none;none;no
-Savic;GSE230181;SRR24952380;Endothelial;Umbilical Vein Endothelial cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952381;Endothelial;Umbilical Vein Endothelial cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-Savic;GSE230181;SRR24952382;Endothelial;Umbilical Vein Endothelial cells;Senescent;Radiation;10Gy ionising radiation 14 days prior collection;none;no
-HernandezSegura;E-MTAB-5403;ERR1805218;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805226;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805232;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805233;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805234;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805235;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805236;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805237;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805238;Keratinocyte;Primary epidermal keratinocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805239;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805240;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805241;Keratinocyte;Primary epidermal keratinocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805242;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805250;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805256;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805257;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805258;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805259;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805260;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805261;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805262;Melanocyte;Primary epidermal melanocytes;Senescent;Radiation;Irradiation, day 20;d20;no
-HernandezSegura;E-MTAB-5403;ERR1805263;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805264;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
-HernandezSegura;E-MTAB-5403;ERR1805265;Melanocyte;Primary epidermal melanocytes;Proliferative;none;Proliferation;none;no
\ No newline at end of file
diff --git a/inst/Paper/preprocessing/Download_Alignment_QC/pipeline_download_processing.sh b/inst/Paper/preprocessing/Download_Alignment_QC/pipeline_download_processing.sh
deleted file mode 100755
index 7aebe53..0000000
--- a/inst/Paper/preprocessing/Download_Alignment_QC/pipeline_download_processing.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-# to launch pipeline:
-#     screen -S benchmarking
-#     conda activate benchmarking
-#     ./pipeline_benchmarking.sh 2>&1 | tee logPipeline.txt 
-# to kill pipeline: 
-#     screen -XS <id_screen> quit 
-
-# CONDA ENVIRONMENT ---------------------------------------------------------------------------------- 
-# python: v3.11.5
-# kallisto: conda install bioconda::kallisto #(v0.44.0)
-# fasterq-dump: conda install bioconda::sra-tools #(v2.11.0)
-# fastqc: conda install bioconda::fastqc  #(v0.12.1)
-# multiqc: conda install multiqc -c conda-forge  #(v1.14)
-
-# ----------------------------------------- DEFINE VARIABLES -----------------------------------------
-
-# User-defined variables (ONLY CHANGE THE SCRIPT HERE!!!!!) ------------------------------------------ 
-
-working_directory="path/to/working/directory"
-
-dataset_table_aux="path/to/text_file/sampleIDs.txt"
-dataset_table="path/where/formatted/text_file/sampleIDs/will/be/stored.txt"
-dataset_description="NameOfRun"
-
-N=12 # Number of samples to process in parallel
-threads=20
-
-
-# TRANSCRIPTOME  
-# FASTA file - RefSeq Release 109 (19/11/2021), downloaded in April 6th, 2022, from:
-# https://www.ncbi.nlm.nih.gov/projects/genome/guide/human/index.shtml
-# Correspondence isoform <-> gene retreived from UCSC's genome browser for RefSeq release 109
- 
-transcriptome="path/to/transcriptome"
-
-#########################
-#########################
-#########################
-
-
-# Auxiliary variables ---------------------------------------------------------------------------------- 
-
-tr -d '\r' <${dataset_table_aux} >${dataset_table} #format to unix 
-kallisto_index="kallisto_index"
-
-# ----------------------------------------- CREATE DIRECTORIES -----------------------------------------
-
-mkdir ${working_directory}/FASTQ
-
-mkdir ${working_directory}/fasterqdump_logs
-
-mkdir ${working_directory}/kallisto
-
-mkdir ${working_directory}/kallisto_index_log
-
-mkdir ${working_directory}/FASTQC
-
-mkdir ${working_directory}/kallisto_logs_samples
-
-mkdir ${working_directory}/MULTIQC
-
-mkdir ${working_directory}/temp
-
-######################################################### PIPELINE ########################################################################
- 
-
- echo "####################### STARTING PIPELINE... ####################### "
-
- echo "--> Starting time: " $(date +%F_%T)
-
-# ----------------------------------------- Kallisto index -----------------------------------------
-
-echo "Creating Kallisto index..."
-
-kallisto index -i ${working_directory}/${kallisto_index} ${transcriptome} > ${working_directory}/kallisto_index_log/kallisto_index_log.txt 2>&1
- 
-
-# ----------------------------------------- Pipeline for each sample -----------------------------------------
-
-for sample in $(awk '{print $1}' ${dataset_table})
-do 
-
-  (
-
-  # ----------------------------------------- DOWNLOAD SAMPLE -----------------------------------------
-
-  # --split-files splits the FASTQ reads into two files, when its paired-end
-  # https://rnnh.github.io/bioinfo-notebook/docs/fasterq-dump.html
-
-  echo "Downloading sample " ${sample} "..." 
-  fasterq-dump ${sample} --temp ${working_directory}/temp --split-files --threads ${threads} --outdir ${working_directory}/FASTQ/ > ${working_directory}/fasterqdump_logs/log_fasterqdump_${sample}.txt 2>&1
-  
-  nb_files=$(ls ${working_directory}/FASTQ/${sample}* | wc -l)
-
-  # if download failed, there's no point in doing the rest of the pipeline
-  if [ $nb_files -eq 0 ]; then
-    
-    echo "Failed download for sample " ${sample}
-
-    echo ${sample} >> ${working_directory}/failedSamples_${dataset_description}.txt
-
-  else
-
-
-      echo "Compressing samples " ${sample} "..."
-      gzip -f ${working_directory}/FASTQ/${sample}*
-
-        # ----------------------------------------- CHECK IF SINGLE OR PAIRED-END -----------------------------------------
-
-         
-      echo "  Number of files: " ${nb_files}
-
-      if [ $nb_files -ge 2 ]; then
-         paired_end=true
-         # Remove unmated reads, so that paired-end has only two samples
-         rm -f ${working_directory}/FASTQ/${sample}.*
-
-         echo "  info: Sample " ${sample} " is paired-end"
-      else
-        paired_end=false
-        echo "  info: Sample " ${sample} " is single-end"
-      fi
-
-
-      # ------------------------------------------------ PERFORM FASTQC ------------------------------------------------
-
-    echo "Performing FASTQC for sample " ${sample} "..."
-
-     # Check the value of paired_end and execute different code accordingly
-      if [ "$paired_end" = true ]; then
-
-        fastqc --quiet ${working_directory}/FASTQ/${sample}_1* -o ${working_directory}/FASTQC
-        fastqc --quiet ${working_directory}/FASTQ/${sample}_2* -o ${working_directory}/FASTQC
-
-      else
-        
-        fastqc --quiet ${working_directory}/FASTQ/${sample}* -o ${working_directory}/FASTQC
-
-      fi
-
-
-      # ------------------------------------------------ ALIGN WITH KALLISTO ------------------------------------------------
-
-      echo "Kallisto alignment for sample " ${sample} "..."
-
-       # Check the value of paired_end and execute different code accordingly
-      if [ "$paired_end" = true ]; then
-
-        kallisto quant -i ${working_directory}/${kallisto_index} -o ${working_directory}/kallisto/${sample} ${working_directory}/FASTQ/${sample}_1.* ${working_directory}/FASTQ/${sample}_2.* -t ${threads} --bias --single-overhang > ${working_directory}/kallisto_logs_samples/log_kallisto_quant_${sample}.txt 2>&1
-
-      else
-        
-        kallisto quant -i ${working_directory}/${kallisto_index} -o ${working_directory}/kallisto/${sample} --single -l 200 -s 20 ${working_directory}/FASTQ/${sample}.* -t ${threads} --bias --single-overhang > ${working_directory}/kallisto_logs_samples/log_kallisto_quant_${sample}.txt 2>&1
-
-      fi
-
-      # ------------------------------------------------ REMOVE SAMPLE ------------------------------------------------
-
-      echo "Removing sample " ${sample} "..."
-
-      rm ${working_directory}/FASTQ/${sample}* 
-     
-
-  fi
-  
-  ) & # to parallelise 
- 
-     # allow to execute up to $N jobs in parallel
-    if [ $(jobs -r -p | wc -l) -ge $N ]; then
-        # now there are $N jobs already running, so wait here for any job
-        # to be finished so there is a place to start next one.
-        wait -n
-    fi
- 
-
-done
-
-
-# no more jobs to be started but wait for pending jobs
-# (all need to be finished)
-wait
-
-
-
-# ------------------------------------------------ PERFORM MULTIQC BY DATASET ------------------------------------------------
-
-# perform multiqc for each dataset in separate, including kallisto logs
-
-# Iterate over each dataset
-
-for dataset in $(awk '{print $2}' ${dataset_table} | sort -u); do
-
-    (
-
-    echo "Performing multiQC for dataset " ${dataset} "..."
-
-    # Extract sample IDs for the current dataset
-
-    samples_in_batch=$(awk -v dataset="$dataset" '$2 == dataset {print $1}' "$dataset_table" )
-    #echo ${samples_in_batch} 
-
-    full_paths_fastqc=()
-    full_paths_kallisto=()
-        # Iterate over each sample ID
-    for sample in $samples_in_batch; do
-        full_paths_fastqc+=" $(ls ${working_directory}/FASTQC/${sample}*fastqc.zip )"
-        full_paths_kallisto+=" $(ls ${working_directory}/kallisto_logs_samples/log_kallisto_quant_${sample}*.txt )"
-    done
-
-    # Run MultiQC on the samples in the current batch
-    multiqc -q  --interactive --title ${dataset} --module fastqc ${full_paths_fastqc} --module kallisto ${full_paths_kallisto} -o ${working_directory}/MULTIQC/$dataset
-
-    ) &
-
-    # allow to execute up to $N jobs in parallel
-    if [ $(jobs -r -p | wc -l) -ge $N ]; then
-        # now there are $N jobs already running, so wait here for any job
-        # to be finished so there is a place to start next one.
-        wait -n
-    fi
- 
-
-done
-
-# no more jobs to be started but wait for pending jobs
-# (all need to be finished)
-wait
-
- 
-echo "####################### DONE! ####################### "
-echo "--> Ending time: " $(date +%F_%T)
diff --git a/inst/Paper/preprocessing/Download_Alignment_QC/sampleIDs.txt b/inst/Paper/preprocessing/Download_Alignment_QC/sampleIDs.txt
deleted file mode 100644
index e33fb28..0000000
--- a/inst/Paper/preprocessing/Download_Alignment_QC/sampleIDs.txt
+++ /dev/null
@@ -1,557 +0,0 @@
-SRR26840995	Laurent
-SRR26840996	Laurent
-SRR26840997	Laurent
-SRR26840998	Laurent
-SRR26840999	Laurent
-SRR26841000	Laurent
-SRR26841001	Laurent
-SRR26841002	Laurent
-SRR26841003	Laurent
-SRR26841004	Laurent
-SRR26841005	Laurent
-SRR26841006	Laurent
-SRR25032574	Skea
-SRR25032575	Skea
-SRR25032576	Skea
-SRR25032577	Skea
-SRR25032578	Skea
-SRR25032579	Skea
-SRR25032580	Skea
-SRR25032581	Skea
-SRR25032582	Skea
-SRR25032583	Skea
-SRR25032584	Skea
-SRR25032585	Skea
-SRR25032586	Skea
-SRR25032587	Skea
-SRR25032588	Skea
-SRR25032589	Skea
-SRR25032590	Skea
-SRR25032591	Skea
-SRR25032592	Skea
-SRR25032593	Skea
-SRR25032594	Skea
-SRR25032595	Skea
-SRR25032596	Skea
-SRR25032597	Skea
-SRR21743822	Wang
-SRR21743823	Wang
-SRR21743824	Wang
-SRR21743817	Wang
-SRR21743818	Wang
-SRR21743819	Wang
-SRR21743820	Wang
-SRR21743821	Wang
-SRR21743825	Wang
-SRR21743826	Wang
-SRR21743827	Wang
-SRR21743828	Wang
-SRR2970873	Lenain
-SRR2970874	Lenain
-SRR2970876	Lenain
-SRR2970877	Lenain
-SRR2970879	Lenain
-SRR2970880	Lenain
-SRR2970882	Lenain
-SRR2970883	Lenain
-SRR2970885	Lenain
-SRR2970886	Lenain
-SRR2970888	Lenain
-SRR2970889	Lenain
-SRR2970891	Lenain
-SRR2970892	Lenain
-SRR2970893	Lenain
-SRR2970894	Lenain
-SRR2970895	Lenain
-SRR2970896	Lenain
-SRR1544480	Purcell
-SRR1544481	Purcell
-SRR1544482	Purcell
-SRR1544483	Purcell
-SRR1544484	Purcell
-SRR1544485	Purcell
-SRR1544486	Purcell
-SRR1544487	Purcell
-SRR1544488	Purcell
-SRR1544489	Purcell
-SRR1544490	Purcell
-SRR1544491	Purcell
-SRR1544492	Purcell
-SRR1544493	Purcell
-SRR1544494	Purcell
-SRR1544495	Purcell
-SRR1544496	Purcell
-SRR1544497	Purcell
-SRR1544498	Purcell
-SRR1544499	Purcell
-SRR1544500	Purcell
-SRR1544501	Purcell
-SRR1544502	Purcell
-SRR1544503	Purcell
-SRR5259584	Aarts
-SRR5259585	Aarts
-SRR5259586	Aarts
-SRR5259587	Aarts
-SRR5259588	Aarts
-SRR5259589	Aarts
-SRR5259590	Aarts
-SRR5259591	Aarts
-SRR5259592	Aarts
-SRR5259593	Aarts
-SRR5259594	Aarts
-SRR5259595	Aarts
-SRR23272464	McHugh
-SRR23272467	McHugh
-SRR23272470	McHugh
-SRR23272473	McHugh
-SRR23272476	McHugh
-SRR23272479	McHugh
-SRR23272482	McHugh
-SRR23272485	McHugh
-SRR23272488	McHugh
-SRR23272491	McHugh
-SRR23272494	McHugh
-SRR23272497	McHugh
-SRR23272500	McHugh
-SRR23272503	McHugh
-SRR1660534	Marthandan2016
-SRR1660535	Marthandan2016
-SRR1660536	Marthandan2016
-SRR1660537	Marthandan2016
-SRR1660538	Marthandan2016
-SRR1660539	Marthandan2016
-SRR1660540	Marthandan2016
-SRR1660541	Marthandan2016
-SRR1660542	Marthandan2016
-SRR1660543	Marthandan2016
-SRR1660544	Marthandan2016
-SRR1660545	Marthandan2016
-SRR1660546	Marthandan2016
-SRR1660547	Marthandan2016
-SRR1660548	Marthandan2016
-SRR1660549	Marthandan2016
-SRR1660550	Marthandan2016
-SRR1660551	Marthandan2016
-SRR1660552	Marthandan2016
-SRR1660553	Marthandan2016
-SRR1660554	Marthandan2016
-SRR1660555	Marthandan2016
-SRR1660556	Marthandan2016
-SRR1660557	Marthandan2016
-SRR1660558	Marthandan2016
-SRR1660559	Marthandan2016
-SRR1660560	Marthandan2016
-SRR2751110	Marthandan2016
-SRR2751111	Marthandan2016
-SRR2751112	Marthandan2016
-SRR2751116	Marthandan2016
-SRR2751117	Marthandan2016
-SRR2751118	Marthandan2016
-SRR2751119	Marthandan2016
-SRR2751120	Marthandan2016
-SRR2751121	Marthandan2016
-SRR2751122	Marthandan2016
-SRR2751123	Marthandan2016
-SRR2751124	Marthandan2016
-SRR23018053	Suda
-SRR23018054	Suda
-SRR23018055	Suda
-SRR23018056	Suda
-SRR23018057	Suda
-SRR23018058	Suda
-SRR23018069	Suda
-SRR23018070	Suda
-SRR23018071	Suda
-SRR23018072	Suda
-SRR23018073	Suda
-SRR23018074	Suda
-SRR23018085	Suda
-SRR23018086	Suda
-SRR23018097	Suda
-SRR23018098	Suda
-SRR23018099	Suda
-SRR23018100	Suda
-SRR23018101	Suda
-SRR23018102	Suda
-SRR23018103	Suda
-SRR23018104	Suda
-SRR2932856	Tasdemir
-SRR2932857	Tasdemir
-SRR2932858	Tasdemir
-SRR2932859	Tasdemir
-SRR2932860	Tasdemir
-SRR2932861	Tasdemir
-SRR2932862	Tasdemir
-SRR2932863	Tasdemir
-SRR2932864	Tasdemir
-SRR2932910	Tasdemir
-SRR2932911	Tasdemir
-SRR2932912	Tasdemir
-SRR2932913	Tasdemir
-SRR2932914	Tasdemir
-SRR2932915	Tasdemir
-SRR2932916	Tasdemir
-SRR2932917	Tasdemir
-SRR2932918	Tasdemir
-SRR24952348	Savic
-SRR24952349	Savic
-SRR24952350	Savic
-SRR24952351	Savic
-SRR24952352	Savic
-SRR24952353	Savic
-SRR24952354	Savic
-SRR24952355	Savic
-SRR24952356	Savic
-SRR24952357	Savic
-SRR24952358	Savic
-SRR24952359	Savic
-SRR24952360	Savic
-SRR24952361	Savic
-SRR24952362	Savic
-SRR24952363	Savic
-SRR24952364	Savic
-SRR24952365	Savic
-SRR24952366	Savic
-SRR24952367	Savic
-SRR24952368	Savic
-SRR24952369	Savic
-SRR24952370	Savic
-SRR24952371	Savic
-SRR24952372	Savic
-SRR24952373	Savic
-SRR24952374	Savic
-SRR24952375	Savic
-SRR24952376	Savic
-SRR24952377	Savic
-SRR24952378	Savic
-SRR24952379	Savic
-SRR24952395	Savic
-SRR24952396	Savic
-SRR24952397	Savic
-SRR24952398	Savic
-SRR24952399	Savic
-SRR24952400	Savic
-SRR24952401	Savic
-SRR24952402	Savic
-SRR24952403	Savic
-SRR24952404	Savic
-SRR24952405	Savic
-SRR24952406	Savic
-SRR24952407	Savic
-SRR24952408	Savic
-SRR24952409	Savic
-SRR24952410	Savic
-SRR24952411	Savic
-SRR24952412	Savic
-SRR24952413	Savic
-SRR24952414	Savic
-SRR24952415	Savic
-SRR24952416	Savic
-SRR24952417	Savic
-SRR24952418	Savic
-SRR24952419	Savic
-SRR24952420	Savic
-SRR24952421	Savic
-SRR24952422	Savic
-SRR24952423	Savic
-SRR24952424	Savic
-SRR24952425	Savic
-SRR24952426	Savic
-SRR24952427	Savic
-SRR24952428	Savic
-SRR24952429	Savic
-SRR14646263	Chan
-SRR14646264	Chan
-SRR14646265	Chan
-SRR14646266	Chan
-SRR14646267	Chan
-SRR14646268	Chan
-SRR14646269	Chan
-SRR14646270	Chan
-SRR14646271	Chan
-SRR14646272	Chan
-SRR14646273	Chan
-SRR14646274	Chan
-SRR14646275	Chan
-SRR14646276	Chan
-SRR14646277	Chan
-SRR14646278	Chan
-SRR14646279	Chan
-SRR14646292	Chan
-SRR14646293	Chan
-SRR14646294	Chan
-SRR14646295	Chan
-SRR14646296	Chan
-SRR14646297	Chan
-SRR14646298	Chan
-SRR14646317	Chan
-SRR14646318	Chan
-SRR14646319	Chan
-SRR14646320	Chan
-SRR14646321	Chan
-SRR14646322	Chan
-SRR14646353	Chan
-SRR14646354	Chan
-SRR14646355	Chan
-SRR14646368	Chan
-SRR14646369	Chan
-SRR14646370	Chan
-ERR1805188	HernandezSegura
-ERR1805189	HernandezSegura
-ERR1805190	HernandezSegura
-ERR1805191	HernandezSegura
-ERR1805196	HernandezSegura
-ERR1805199	HernandezSegura
-ERR1805200	HernandezSegura
-ERR1805201	HernandezSegura
-ERR1805202	HernandezSegura
-ERR1805203	HernandezSegura
-ERR1805204	HernandezSegura
-ERR1805205	HernandezSegura
-ERR1805206	HernandezSegura
-ERR1805207	HernandezSegura
-ERR1805208	HernandezSegura
-ERR1805209	HernandezSegura
-ERR1805210	HernandezSegura
-ERR1805211	HernandezSegura
-SRR1736333	Marthandan2015
-SRR1736334	Marthandan2015
-SRR1736335	Marthandan2015
-SRR1736336	Marthandan2015
-SRR1736337	Marthandan2015
-SRR1736338	Marthandan2015
-SRR1736339	Marthandan2015
-SRR1736340	Marthandan2015
-SRR1736341	Marthandan2015
-SRR1736342	Marthandan2015
-SRR1736343	Marthandan2015
-SRR1736344	Marthandan2015
-SRR1736345	Marthandan2015
-SRR1736346	Marthandan2015
-SRR1736347	Marthandan2015
-SRR1736348	Marthandan2015
-SRR1736349	Marthandan2015
-SRR1736350	Marthandan2015
-SRR1736357	Marthandan2015
-SRR1736358	Marthandan2015
-SRR1736359	Marthandan2015
-SRR1736360	Marthandan2015
-SRR1736361	Marthandan2015
-SRR1736362	Marthandan2015
-SRR1736363	Marthandan2015
-SRR1736364	Marthandan2015
-SRR1736365	Marthandan2015
-SRR1736366	Marthandan2015
-SRR1736367	Marthandan2015
-SRR1736368	Marthandan2015
-SRR9016157	Casella
-SRR9016158	Casella
-SRR9016159	Casella
-SRR9016160	Casella
-SRR9016161	Casella
-SRR9016162	Casella
-SRR9016163	Casella
-SRR9016164	Casella
-SRR9016165	Casella
-SRR9016166	Casella
-SRR9016167	Casella
-SRR9016168	Casella
-SRR9016169	Casella
-SRR9016170	Casella
-SRR9016171	Casella
-SRR9016172	Casella
-SRR9016173	Casella
-SRR9016174	Casella
-SRR9016175	Casella
-SRR9016176	Casella
-SRR9016177	Casella
-SRR9016178	Casella
-SRR9016179	Casella
-SRR9016180	Casella
-SRR9016181	Casella
-SRR9016182	Casella
-ERR4781442	Mangelinck
-ERR4781443	Mangelinck
-ERR4781444	Mangelinck
-ERR4781445	Mangelinck
-ERR4781446	Mangelinck
-ERR4781447	Mangelinck
-ERR4781448	Mangelinck
-ERR4781449	Mangelinck
-ERR4781450	Mangelinck
-ERR4781451	Mangelinck
-ERR4781452	Mangelinck
-ERR4781453	Mangelinck
-SRR27215555	Numa
-SRR27215556	Numa
-SRR27215557	Numa
-SRR27215558	Numa
-SRR27215547	Numa
-SRR27215548	Numa
-SRR27215549	Numa
-SRR27215550	Numa
-SRR27215551	Numa
-SRR27215552	Numa
-SRR27215553	Numa
-SRR27215554	Numa
-SRR28389055	Heidari
-SRR28389056	Heidari
-SRR28389057	Heidari
-SRR28389058	Heidari
-SRR28389059	Heidari
-SRR28389060	Heidari
-SRR28389061	Heidari
-SRR28389062	Heidari
-SRR28389063	Heidari
-SRR28389064	Heidari
-SRR28389065	Heidari
-SRR28389066	Heidari
-SRR18022193	Admasu
-SRR18022194	Admasu
-SRR18022196	Admasu
-SRR18022198	Admasu
-SRR18022199	Admasu
-SRR18022200	Admasu
-SRR18022201	Admasu
-SRR18022202	Admasu
-SRR18022203	Admasu
-SRR18022205	Admasu
-SRR18022206	Admasu
-SRR18022207	Admasu
-SRR18022208	Admasu
-SRR18022209	Admasu
-SRR18022212	Admasu
-SRR18022213	Admasu
-SRR18022215	Admasu
-SRR18022216	Admasu
-SRR18022217	Admasu
-SRR18022218	Admasu
-SRR18022220	Admasu
-SRR18022221	Admasu
-SRR18022224	Admasu
-SRR18022225	Admasu
-SRR18022226	Admasu
-SRR18022227	Admasu
-SRR18022228	Admasu
-SRR18022195	Admasu
-SRR18022197	Admasu
-SRR18022204	Admasu
-SRR18022210	Admasu
-SRR18022211	Admasu
-SRR18022214	Admasu
-SRR18022219	Admasu
-SRR18022222	Admasu
-SRR18022223	Admasu
-SRR19783652	Urata
-SRR19783654	Urata
-SRR19783655	Urata
-SRR19783656	Urata
-SRR19783657	Urata
-SRR19783659	Urata
-SRR19783660	Urata
-SRR19783661	Urata
-SRR19783662	Urata
-SRR19783663	Urata
-SRR19783665	Urata
-SRR19783666	Urata
-SRR22254572	Wang2023
-SRR22254573	Wang2023
-SRR22254574	Wang2023
-SRR22254575	Wang2023
-SRR22254576	Wang2023
-SRR22254577	Wang2023
-SRR22254578	Wang2023
-SRR22254579	Wang2023
-SRR22254580	Wang2023
-SRR22254581	Wang2023
-SRR22254582	Wang2023
-SRR6680309	Fu
-SRR6680310	Fu
-SRR6680311	Fu
-SRR6680312	Fu
-SRR6680313	Fu
-SRR6680314	Fu
-SRR6680315	Fu
-SRR6680316	Fu
-SRR6680317	Fu
-SRR6680321	Fu
-SRR6680322	Fu
-SRR6680323	Fu
-SRR21563935	Tanke
-SRR21563936	Tanke
-SRR21563937	Tanke
-SRR21563938	Tanke
-SRR21563939	Tanke
-SRR21563940	Tanke
-SRR21563944	Tanke
-SRR21563945	Tanke
-SRR21563946	Tanke
-SRR21563950	Tanke
-SRR21563951	Tanke
-SRR21563952	Tanke
-SRR21563959	Tanke
-SRR21563960	Tanke
-SRR21563961	Tanke
-SRR21563962	Tanke
-SRR21563963	Tanke
-SRR21563964	Tanke
-ERR6804206	Bawens
-ERR6804207	Bawens
-ERR6804208	Bawens
-ERR6804209	Bawens
-ERR6804210	Bawens
-ERR6804211	Bawens
-ERR6804212	Bawens
-ERR6804213	Bawens
-ERR6804214	Bawens
-ERR6804215	Bawens
-ERR6804216	Bawens
-ERR6804217	Bawens
-ERR6804218	Bawens
-ERR6804219	Bawens
-ERR6804220	Bawens
-ERR6804221	Bawens
-ERR6804222	Bawens
-ERR6804223	Bawens
-ERR6804224	Bawens
-ERR6804225	Bawens
-ERR6804226	Bawens
-ERR6804227	Bawens
-ERR6804228	Bawens
-ERR6804229	Bawens
-SRR24952339	Savic
-SRR24952340	Savic
-SRR24952341	Savic
-SRR24952342	Savic
-SRR24952343	Savic
-SRR24952344	Savic
-SRR24952345	Savic
-SRR24952346	Savic
-SRR24952347	Savic
-SRR24952380	Savic
-SRR24952381	Savic
-SRR24952382	Savic
-ERR1805218	HernandezSegura
-ERR1805226	HernandezSegura
-ERR1805232	HernandezSegura
-ERR1805233	HernandezSegura
-ERR1805234	HernandezSegura
-ERR1805235	HernandezSegura
-ERR1805236	HernandezSegura
-ERR1805237	HernandezSegura
-ERR1805238	HernandezSegura
-ERR1805239	HernandezSegura
-ERR1805240	HernandezSegura
-ERR1805241	HernandezSegura
-ERR1805242	HernandezSegura
-ERR1805250	HernandezSegura
-ERR1805256	HernandezSegura
-ERR1805257	HernandezSegura
-ERR1805258	HernandezSegura
-ERR1805259	HernandezSegura
-ERR1805260	HernandezSegura
-ERR1805261	HernandezSegura
-ERR1805262	HernandezSegura
-ERR1805263	HernandezSegura
-ERR1805264	HernandezSegura
-ERR1805265	HernandezSegura
\ No newline at end of file
diff --git a/inst/Paper/preprocessing/Filtering_Normalisation_Batch/Preprocessing_SenescenceDatasets.Rmd b/inst/Paper/preprocessing/Filtering_Normalisation_Batch/Preprocessing_SenescenceDatasets.Rmd
deleted file mode 100644
index 5afb904..0000000
--- a/inst/Paper/preprocessing/Filtering_Normalisation_Batch/Preprocessing_SenescenceDatasets.Rmd
+++ /dev/null
@@ -1,479 +0,0 @@
----
-title: "Pre-processing Senescence Datasets"
-author: "Rita Martins-Silva"
-date: "05/05/2025"
-output: html_document
-editor_options: 
-  markdown: 
-    wrap: 72
----
-
-#  {.tabset .tabset-pills}
-
-## Set up {.tabset}
-
-### Libraries
-
-```{r , include=FALSE}
-knitr::opts_chunk$set(echo = TRUE,  max.height='100px')
-knitr::opts_knit$set(root.dir = '/inst/Paper/')
-setwd("/inst/Paper/")
-```
-
-```{r}
-library("data.table")
-library("tximport")
-library("ggplot2")
-library("edgeR")
-library("ggpubr")
-```
-
-### Functions
-
-```{r}
-plot_variance_explained <- function(mat, vars, df_metadata){
-  df_metadata <- df_metadata[,vars]
-  #mat <- log2(mat+1)
-  print("calculating variance explained by each variable")
-  varMat <- getVarianceExplained(mat, variables = df_metadata)
-  print("calculation over")
-  p <- plotExplanatoryVariables(
-    varMat,
-    variables = variables)  +
-    ggtitle("Variance Explained") +
-    theme(text = element_text(size=20))+
-    theme(plot.title = element_text(hjust = 0.5))+ 
-    guides(color=guide_legend(title="Variable"))
-  return(p)
-}
-```
-
-## Import Data
-
-To construct a robust reference compendium for evaluating
-senescence-associated gene signatures, we curated publicly available
-human RNA-seq datasets from GEO, ArrayExpress, and prior literature. The
-curation process followed a two-step strategy:
-
-***Step 1: Fibroblast Datasets***
-
-We began by focusing exclusively on fibroblast-derived datasets, as
-fibroblasts represent the most widely used and experimentally tractable
-model for studying cellular senescence. Only RNA-seq studies of Homo
-sapiens with a minimum of 10 samples were considered. Senescence
-validation methods (e.g., SA-β-gal staining, cell cycle arrest markers)
-were manually verified from the original publications. Data available as
-of February 14, 2024 for GEO and Literature Review, and February 29,
-2024 for ArrayExpress.
-
--   GEO: ("senescence" OR "quiescence") AND "fibroblasts" AND "RNA-seq"
-    AND "Homo sapiens"[porgn] AND ≥10 samples
-
-    Initial: 59 datasets \| Final: 12 datasets \| Total: 298 samples
-
--   ArrayExpress: (senescence OR quiescence) AND fibroblasts AND
-    (RNA-seq OR RNA sequencing) AND (Homo sapiens OR Human), with ≥10
-    samples and raw data availability
-
-    Initial: 38 datasets \| Final: 1 dataset \| Total: 12 samples
-
--   Literature Review: Previously validated RNA-seq datasets from
-    senescence-related studies
-
-    3 datasets included \| Total: 74 samples
-
-***Step 2: Other Cell Types***
-
-To expand the diversity and generalizability of our analyses, we next
-incorporated datasets from non-fibroblast cell types. Additional filters
-were applied to ensure relevance and data quality, including exclusion
-of scRNA-seq, ChIP-seq, and cancer-specific studies. Data available as
-of July 12, 2024.
-
--   GEO: ("senescence" OR "quiescence") NOT "fibroblasts" AND "RNA-seq"
-    NOT "scRNA-seq" NOT "ChIP-seq" AND ≥10 samples AND "Homo
-    sapiens"[porgn]
-
-    Initial: 133 datasets \| Final: 7 datasets \| Total: 113 samples
-
--   ArrayExpress: (senescence OR quiescence) NOT fibroblasts AND
-    (RNA-seq OR RNA sequencing) AND (Homo sapiens OR Human) AND NOT
-    (single OR ChIP-seq OR ATAC-seq OR Affymetrix) AND NOT (cancer OR
-    tumour)
-
-    Initial: 7 datasets \| Final: 1 dataset \| Total: 24 samples
-
--   Literature Review: Additional well-characterized datasets from
-    previous analyses
-
-    2 of 6 candidate datasets retained \| Total: 36 samples
-
-***Final Dataset Composition***
-
-In total, 25 datasets comprising 545 RNA-seq
-samples were curated for downstream benchmarking and discovery analyses.
-These include 16 fibroblast-based datasets (384 samples) and 9 datasets
-from other human cell types (161 samples). The fibroblast datasets
-include 192 proliferative, 24 quiescent, and 168 senescent samples. The
-non-fibroblast datasets comprise 80 proliferative, 9 quiescent, and 72
-senescent samples.
-
-### Read Counts
-
-```{r}
-counts <- readRDS("data/counts.rds")
-counts[1:5,1:5]
-```
-
-### Metadata
-
-```{r}
-metadata <- readRDS("data/metadata.rds")
-head(metadata)
-```
-
-## Filtering
- 
-Lowly expressed genes were filtered out by retaining only those with a mean expression greater than 70 counts in at least one experimental condition (Quiescent, Proliferative, or Senescent). This ensures that genes with minimal or no expression across all groups do not bias downstream analyses.
-
- 
-```{r}
-# Average expression per gene per condition
-avg_expr_quiesc <- log10(rowMeans(counts[, metadata$Condition == "Quiescent"] + 1))
-avg_expr_prol   <- log10(rowMeans(counts[, metadata$Condition == "Proliferative"] + 1))
-avg_expr_sen    <- log10(rowMeans(counts[, metadata$Condition == "Senescent"] + 1))
-
-# Combine into dataframe
-df_avg_expr <- data.frame(expression = c(avg_expr_quiesc, avg_expr_prol, avg_expr_sen),
-                          Condition = rep(c("Quiescent", "Proliferative", "Senescent"),
-                                          each = nrow(counts)))
-
-# Plot
-ggplot(df_avg_expr, aes(x = expression, fill = Condition)) +
-  geom_density(alpha = 0.3) +
-  theme_bw() +
-  ggtitle("Gene Expression by Condition", subtitle = "Mean log10(count + 1) per gene") +
-  xlab("Mean log10(count + 1) per gene") +
-  ylab("Density") +
-  geom_vline(xintercept = log10(70), linetype = "dotted", color = "black", size = 1)
-
-```
-
-
-```{r}
-# Set threshold (e.g., mean count > 70 in at least one condition)
-min_mean_count <- 70
-
-# Get sample indices per condition
-samples_quiesc <- metadata$sampleID[metadata$Condition == "Quiescent"]
-samples_prol   <- metadata$sampleID[metadata$Condition == "Proliferative"]
-samples_sen    <- metadata$sampleID[metadata$Condition == "Senescent"]
-
-# Calculate mean counts per gene within each condition
-mean_quiesc <- rowMeans(counts[, samples_quiesc])
-mean_prol   <- rowMeans(counts[, samples_prol])
-mean_sen    <- rowMeans(counts[, samples_sen])
-
-# Keep genes where mean expression is above threshold in at least one condition
-keep_genes <- (mean_quiesc > min_mean_count) |
-              (mean_prol > min_mean_count) |
-              (mean_sen > min_mean_count)
-
-# Apply filter to counts
-filtered_counts <- counts[keep_genes, ]
-
-
-cat("Number of genes before filtering:", nrow(counts), "\n")
-cat("Number of genes after filtering:", nrow(filtered_counts), "\n")
-
-```
-```{r}
-# Average expression per gene per condition
-avg_expr_quiesc <- log10(rowMeans(filtered_counts[, metadata$Condition == "Quiescent"] + 1))
-avg_expr_prol   <- log10(rowMeans(filtered_counts[, metadata$Condition == "Proliferative"] + 1))
-avg_expr_sen    <- log10(rowMeans(filtered_counts[, metadata$Condition == "Senescent"] + 1))
-
-# Combine into dataframe
-df_avg_expr <- data.frame(expression = c(avg_expr_quiesc, avg_expr_prol, avg_expr_sen),
-                          Condition = rep(c("Quiescent", "Proliferative", "Senescent"),
-                                          each = nrow(filtered_counts)))
-
-# Plot
-ggplot(df_avg_expr, aes(x = expression, fill = Condition)) +
-  geom_density(alpha = 0.3) +
-  theme_bw() +
-  ggtitle("Gene Expression by Condition (Filtered)", subtitle = "Mean log10(count + 1) per gene") +
-  xlab("Mean log10(count + 1) per gene") +
-  ylab("Density") 
-
-```
-  
-```{r include=FALSE, eval=FALSE}
-#saveRDS(filtered_counts, "data/filtered_counts.rds")
-```
-  
-## Normalisation
-
- 
-
- 
-```{r fig.width=10, fig.height=4}
-
-samples_ordered <- colnames(filtered_counts)[order(colSums(filtered_counts))]
-N <- 30
-
-filtcounts_ggplot <- stack(log2(filtered_counts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(filtcounts_ggplot) <- c("expression", "sampleID")
-filtcounts_ggplot <- merge(filtcounts_ggplot,metadata, by="sampleID")
-  
-
-ggplot(filtcounts_ggplot, aes(sampleID,expression)) + geom_boxplot(aes(fill=DatasetID), alpha=0.4)+ xlab("Samples") + ylab("log2(counts)")   +  theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw()+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank()) +
-  ggtitle("Senescence datasets", subtitle="Before Normalisation - 30 lowest and highest coverage") 
-
-```
-
-
-```{r}
-dgelist_normcounts <- DGEList(filtered_counts) # the matrix with gene names as row names and counts in columns
-dgelist_normcounts <- calcNormFactors(dgelist_normcounts, method = "TMM") 
-normalised_counts <- as.data.frame(voom(dgelist_normcounts,plot=F)$E) 
-#normalised_counts <- as.data.frame(edgeR::cpm(dgelist_normcounts, log = FALSE)) 
-normalised_counts <- 2^normalised_counts
-lognormalised_counts <- log2(normalised_counts+1)
-```
-
-
-
-```{r fig.width=10, fig.height=4}
-
-# samples_ordered <- colnames(filtered_counts)[order(colSums(normalised_counts))]
-# N <- 30
-
-normcounts_ggplot <- stack(log2(normalised_counts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(normcounts_ggplot) <- c("expression", "sampleID")
-normcounts_ggplot <- merge(normcounts_ggplot,metadata, by="sampleID")
-  
-
-ggplot(normcounts_ggplot, aes(sampleID,expression)) + geom_boxplot(aes(fill=DatasetID), alpha=0.4)+ xlab("Samples") + ylab("log2(counts)")   +  theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw()+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank()) +
-  ggtitle("Senescence datasets", subtitle="After Normalisation - 30 lowest and highest coverage") 
-
-```
-
-```{r include=FALSE, eval=FALSE}
-#saveRDS(normalised_counts, "data/normalised_counts.rds")
-```
-
-## Batch correction
- 
-```{r fig.width=8, fig.height=6}
-y <- DGEList(lognormalised_counts, samples= metadata)
-PCA_beforeBC  <- prcomp(t(y$counts), scale=FALSE, center=TRUE)
-PCACounts_beforeBC  <- PCA_beforeBC$x
-PCACounts_beforeBC  <- as.data.frame(PCACounts_beforeBC )
-PCAData_beforeBC <-  cbind(PCACounts_beforeBC[1:10],y$samples) 
-
-ev = PCA_beforeBC$sdev^2 
-pc1= round(100*ev[1]/sum(ev),2) 
-pc2 = round(100*ev[2]/sum(ev),2) 
-pc3 = round(100*ev[3]/sum(ev),2) 
-pc4 = round(100*ev[4]/sum(ev),2) 
-pc5 = round(100*ev[5]/sum(ev),2) 
-pc6 = round(100*ev[6]/sum(ev),2)  
-
-
-ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(color=DatasetID), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(color=Condition), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(color=CellType), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(color=SenescentType), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData_beforeBC, aes(PC1,PC2))+
-  geom_point(aes(color=lib.size), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
- 
-```
- 
-
-See https://github.com/DiseaseTranscriptomicsLab/voyAGEr_scripts/blob/main/Markdowns/voyAGEr.Rmd 
-
-  
-```{r}
-
-# Assume that the cell type will be a biological "batch", but the individual cell lines is not relevant for our problem;
-# DatasetID is for sure a batch
-batch_cols <- c("DatasetID") 
-
-metadata_cols <- c("Condition", "CellType")
-
-mmbatch <- model.matrix(reformulate(paste0("~0 + ",paste0( batch_cols,collapse = " + "))), data=metadata) 
-mmkeep <- model.matrix(reformulate(paste0("~0 + ",paste0( metadata_cols,collapse = " + "))), data=metadata)
-mm <- cbind(mmkeep,mmbatch)
-
-D0 <- DGEList(normalised_counts)  
-D0 <- calcNormFactors(D0) 
-y <- voom(D0, mm, plot = F) 
-fit <- lmFit(y, mm)  
-beta <- fit$coefficients[,-(1:ncol(mmkeep)),drop=FALSE]
-#beta <- fit$coefficients
-beta[is.na(beta)] <- 0
-corrcounts <- as.matrix(y$E) - beta %*% t(mmbatch)
-
-offset <- apply(corrcounts,1,min) - apply(log2(normalised_counts+1),1,min)
-corrcounts <- as.data.frame(corrcounts-offset)
-corrcounts <- 2^corrcounts
-
-```
-
-```{r include=FALSE, eval=FALSE}
-#saveRDS(corrcounts, "data/corrcounts.rds")
-```
-
-
-
-```{r fig.width=8, fig.height=6}
-y <- DGEList(log2(corrcounts+1), samples= metadata)
-PCA <- prcomp(t(y$counts), scale=FALSE, center=TRUE)
-PCACounts <- PCA$x
-PCACounts <- as.data.frame(PCACounts)
-PCAData <-  cbind(PCACounts[1:10],y$samples) 
-
-ev = PCA$sdev^2 
-pc1= round(100*ev[1]/sum(ev),2) 
-pc2 = round(100*ev[2]/sum(ev),2) 
-pc3 = round(100*ev[3]/sum(ev),2) 
-pc4 = round(100*ev[4]/sum(ev),2) 
-pc5 = round(100*ev[5]/sum(ev),2) 
-pc6 = round(100*ev[6]/sum(ev),2)  
-
-
-ggplot(PCAData, aes(PC1,PC2))+
-  geom_point(aes(color=DatasetID), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData, aes(PC1,PC2))+
-  geom_point(aes(color=Condition), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData, aes(PC1,PC2))+
-  geom_point(aes(color=CellType), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData, aes(PC1,PC2))+
-  geom_point(aes(color=SenescentType), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
-
-ggplot(PCAData, aes(PC1,PC2))+
-  geom_point(aes(color=lib.size), size=5, alpha=0.6)+
-  ggtitle(label ='Senescence datasets')+
-  xlab(paste0("PC1: ",pc1,"% variance")) +
-  ylab(paste0("PC2: ",pc2,"% variance")) + 
-  theme_bw()+
-  theme(legend.position="bottom", legend.box="vertical", legend.margin=margin())  + 
-  geom_vline(xintercept=0, linetype="dotted") + 
-  geom_hline(yintercept=0, linetype="dotted")
- 
-
-```
-
-
-
-```{r fig.width=10, fig.height=4}
-
-# samples_ordered <- colnames(filtered_counts)[order(colSums(normalised_counts))]
-# N <- 30
-
-corrcounts_ggplot <- stack(log2(corrcounts[,c(samples_ordered[1:N], samples_ordered[(length(samples_ordered)-N +1):length(samples_ordered)])]))
-colnames(corrcounts_ggplot) <- c("expression", "sampleID")
-corrcounts_ggplot <- merge(corrcounts_ggplot,metadata, by="sampleID")
-  
-
-ggplot(corrcounts_ggplot, aes(sampleID,expression)) + geom_boxplot(aes(fill=DatasetID), alpha=0.4)+ xlab("Samples") + ylab("log2(counts)")   +  theme(axis.text.x = element_text(angle = 45, hjust=1))+
-  theme_bw()+ 
-  theme(plot.title = element_text(hjust = 0.5),
-        plot.subtitle = element_text(hjust = 0.5),
-        axis.text.x=element_blank(),
-        axis.ticks.x=element_blank()) +
-  ggtitle("Senescence datasets", subtitle="After BEC - 30 lowest and highest coverage") 
-
-```
-
-
-
- 
-
-
-
diff --git a/man/CohenD_allConditions.Rd b/man/CohenD_allConditions.Rd
index 5c0ea77..5587129 100644
--- a/man/CohenD_allConditions.Rd
+++ b/man/CohenD_allConditions.Rd
@@ -9,7 +9,8 @@ CohenD_allConditions(
   metadata,
   gene_sets,
   variable,
-  mode = c("simple", "medium", "extensive")
+  mode = c("simple", "medium", "extensive"),
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -37,6 +38,12 @@ groups.
 \item \code{"extensive"}: All possible groupwise contrasts, ensuring balance in the
 number of terms on each side.
 }}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A named list where each element corresponds to a gene signature. Each
diff --git a/man/CohenF_allConditions.Rd b/man/CohenF_allConditions.Rd
index 8a5f4b4..df25f8c 100644
--- a/man/CohenF_allConditions.Rd
+++ b/man/CohenF_allConditions.Rd
@@ -4,7 +4,13 @@
 \alias{CohenF_allConditions}
 \title{Compute Cohen's f for All Gene Signatures Across a Categorical Variable}
 \usage{
-CohenF_allConditions(data, metadata, gene_sets, variable)
+CohenF_allConditions(
+  data,
+  metadata,
+  gene_sets,
+  variable,
+  p.adjust.method = "BH"
+)
 }
 \arguments{
 \item{data}{A data frame of gene expression data, with genes as rows and
@@ -21,6 +27,12 @@ downregulated).}
 
 \item{variable}{A string specifying the categorical variable in
 \code{metadata} used to model the gene signature scores.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A named list where each element corresponds to a gene signature. Each
diff --git a/man/FPR_Simulation.Rd b/man/FPR_Simulation.Rd
index ece45bd..fb94bf7 100644
--- a/man/FPR_Simulation.Rd
+++ b/man/FPR_Simulation.Rd
@@ -10,7 +10,7 @@ FPR_Simulation(
   original_signatures,
   Variable,
   gene_list = NULL,
-  number_of_sims = 10,
+  number_of_sims = 100,
   title = NULL,
   widthTitle = 30,
   titlesize = 12,
@@ -19,7 +19,8 @@ FPR_Simulation(
   mode = c("none", "simple", "medium", "extensive"),
   ColorValues = NULL,
   ncol = NULL,
-  nrow = NULL
+  nrow = NULL,
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -77,6 +78,12 @@ grid layout. If \code{NULL}, layout is auto-calculated.}
 
 \item{nrow}{Integer. Number of rows for arranging signature plots in a grid
 layout. If \code{NULL}, layout is auto-calculated.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 Invisibly returns a list containing:
diff --git a/man/GSEA_VariableAssociation.Rd b/man/GSEA_VariableAssociation.Rd
index afe244b..915560d 100644
--- a/man/GSEA_VariableAssociation.Rd
+++ b/man/GSEA_VariableAssociation.Rd
@@ -20,7 +20,8 @@ GSEA_VariableAssociation(
   titlesize = 14,
   pointSize = 5,
   ignore_NAs = FALSE,
-  printplt = TRUE
+  printplt = TRUE,
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -96,6 +97,12 @@ removed before analysis, leading to a loss of data to be fitted in the
 model.}
 
 \item{printplt}{Boolean specifying if plot is to be printed. Default: \code{TRUE}.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A list with two elements:
@@ -111,48 +118,5 @@ molecular score) and metadata variables using differential expression (DE)
 analysis and Gene Set Enrichment Analysis (GSEA). It generates all possible
 contrasts for categorical variables and uses linear modeling for continuous
 variables.
-}
-\examples{
-# Simulate gene expression data (genes as rows, samples as columns)
-set.seed(42)
-expr <- as.data.frame(matrix(rnorm(500), nrow = 50, ncol = 10))
-rownames(expr) <- paste0("Gene", 1:50)
-colnames(expr) <- paste0("Sample", 1:10)
-
-# Simulate metadata (categorical and continuous)
-metadata <- data.frame(
-  sampleID = paste0("Sample", 1:10),
-  Group = rep(c("A", "B"), each = 5),
-  Age = sample(20:60, 10),
-  row.names = colnames(expr)
-)
-
-# Define a toy gene set: one gene set only for discovery mode!
-gene_set <- list(
-  Signature1 = paste0("Gene", 1:10)
-)
-
-# Score-based association (e.g., logmedian)
-res_score <- VariableAssociation(
-  method = "logmedian",
-  data = expr,
-  metadata = metadata,
-  cols = c("Group", "Age"),
-  gene_set = gene_set
-)
-print(res_score$Overall)
-print(res_score$plot)
-
-# GSEA-based association (if GSEA_VariableAssociation is available)
-# res_gsea <- VariableAssociation(
-#   method = "GSEA",
-#   data = expr,
-#   metadata = metadata,
-#   cols = "Group",
-#   gene_set = gene_set
-# )
-# print(res_gsea$data)
-print(res_score$plot)
-
 }
 \keyword{internal}
diff --git a/man/PlotScores.Rd b/man/PlotScores.Rd
index 8d17eb6..a685ddf 100644
--- a/man/PlotScores.Rd
+++ b/man/PlotScores.Rd
@@ -31,7 +31,8 @@ PlotScores(
   sig_threshold = 0.05,
   cohen_threshold = 0.5,
   colorPalette = "Set3",
-  cor = c("pearson", "spearman", "kendall")
+  cor = c("pearson", "spearman", "kendall"),
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -169,6 +170,12 @@ is manually specified.}
 \item{cor}{Correlation method for numeric variables. One of \code{"pearson"}
 (default), \code{"spearman"}, or \code{"kendall"}. Only applies when the variable is
 numeric and \code{method != "all"}.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}. Only if \code{method == "all"}.}
 }
 \value{
 Depending on \code{method}:
diff --git a/man/Score_VariableAssociation.Rd b/man/Score_VariableAssociation.Rd
index 6c7c43b..4e6a50d 100644
--- a/man/Score_VariableAssociation.Rd
+++ b/man/Score_VariableAssociation.Rd
@@ -23,7 +23,8 @@ Score_VariableAssociation(
   discrete_colors = NULL,
   continuous_color = "#8C6D03",
   color_palette = "Set2",
-  printplt = TRUE
+  printplt = TRUE,
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -85,6 +86,12 @@ variables. Default: \code{"#8C6D03"}.}
 variables. Default: \code{"Set2"}.}
 
 \item{printplt}{Boolean specifying if plot is to be printed. Default: \code{TRUE}.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A list with:
diff --git a/man/VariableAssociation.Rd b/man/VariableAssociation.Rd
index 4c2907c..8e055d5 100644
--- a/man/VariableAssociation.Rd
+++ b/man/VariableAssociation.Rd
@@ -24,7 +24,8 @@ VariableAssociation(
   discrete_colors = NULL,
   continuous_color = "#8C6D03",
   color_palette = "Set2",
-  printplt = TRUE
+  printplt = TRUE,
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -88,6 +89,12 @@ points (default: \code{"#8C6D03"}).}
 categorical variables (default: \code{"Set2"}).}
 
 \item{printplt}{Logical. If \code{TRUE}, plots are printed. Default: \code{TRUE}.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A list with method-specific results and ggplot2-based visualizations:
@@ -125,7 +132,47 @@ GSEA-based association.
 The function returns statistical results and visualizations summarizing
 effect sizes and significance.
 }
-\section{Shared Arguments (All Methods)}{
+\examples{
+# Simulate gene expression data (genes as rows, samples as columns)
+set.seed(42)
+expr <- as.data.frame(matrix(rnorm(500), nrow = 50, ncol = 10))
+rownames(expr) <- paste0("Gene", 1:50)
+colnames(expr) <- paste0("Sample", 1:10)
+
+# Simulate metadata (categorical and continuous)
+metadata <- data.frame(
+  sampleID = paste0("Sample", 1:10),
+  Group = rep(c("A", "B"), each = 5),
+  Age = sample(20:60, 10),
+  row.names = colnames(expr)
+)
 
-}
+# Define a toy gene set: one gene set only for discovery mode!
+gene_set <- list(
+  Signature1 = paste0("Gene", 1:10)
+)
 
+# Score-based association (e.g., logmedian)
+res_score <- VariableAssociation(
+  method = "logmedian",
+  data = expr,
+  metadata = metadata,
+  cols = c("Group", "Age"),
+  gene_set = gene_set
+)
+print(res_score$Overall)
+print(res_score$plot)
+
+# GSEA-based association (if GSEA_VariableAssociation is available)
+# res_gsea <- VariableAssociation(
+#   method = "GSEA",
+#   data = expr,
+#   metadata = metadata,
+#   cols = "Group",
+#   gene_set = gene_set
+# )
+# print(res_gsea$data)
+print(res_score$plot)
+
+
+}
diff --git a/man/compute_stat_tests.Rd b/man/compute_stat_tests.Rd
index 9e2df31..8b290b5 100644
--- a/man/compute_stat_tests.Rd
+++ b/man/compute_stat_tests.Rd
@@ -10,7 +10,8 @@ compute_stat_tests(
   cols = NULL,
   numeric = "pearson",
   categorical_bin = "t.test",
-  categorical_multi = "anova"
+  categorical_multi = "anova",
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -30,6 +31,12 @@ Options: \code{"t.test"} (default) or \code{"wilcoxon"}.}
 \item{categorical_multi}{The statistical test for multi-level categorical
 variables.
 Options: \code{"anova"} (default) or \code{"kruskal-wallis"}.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A named list (one entry per variable being analysed) where each
diff --git a/man/counts_example.Rd b/man/counts_example.Rd
index 25658d2..4998e47 100644
--- a/man/counts_example.Rd
+++ b/man/counts_example.Rd
@@ -12,11 +12,12 @@ samples (sample IDs).
 \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE63577}
 }
 \usage{
-counts_example
+data(counts_example)
 }
 \description{
-A numeric matrix containing filtered and normalized gene expression data from
-the Marthandan et al. (2016) study (GEO accession GSE63577).
+A numeric matrix containing filtered and normalized (non log-transformed)
+gene expression data from the Marthandan et al. (2016) study (GEO accession
+GSE63577).
 }
 \details{
 Raw FASTQ files were downloaded using \code{fasterq-dump} (v2.11.0) and processed
@@ -25,8 +26,7 @@ conducted using FastQC (v0.12.1) and summarised with MultiQC (v1.14).
 Pseudo-alignment to the RefSeq transcriptome (NCBI release 109) was performed
 using kallisto (v0.44.0). Genes with low expression (mean count < 70 in all
 conditions) were filtered out. Count normalization factors were calculated
-with \code{edgeR::calcNormFactors}, and log2-transformed values were obtained via
-\code{limma::voom}.
+with \code{edgeR::calcNormFactors}.
 
 Intermediate time points for HFF and MRC5 cell lines were excluded, resulting
 in a final dataset with 45 high-quality samples across proliferative,
diff --git a/man/figures/Workflow.png b/man/figures/Workflow.png
index 7543603..66dbc7e 100644
Binary files a/man/figures/Workflow.png and b/man/figures/Workflow.png differ
diff --git a/man/geneset_similarity.Rd b/man/geneset_similarity.Rd
index 35dd72f..c4c5991 100644
--- a/man/geneset_similarity.Rd
+++ b/man/geneset_similarity.Rd
@@ -15,7 +15,9 @@ geneset_similarity(
   pval_threshold = 0.05,
   limits = NULL,
   title_size = 12,
-  color_values = c("#F9F4AE", "#B44141"),
+  color = "#B44141",
+  neutral_color = "white",
+  cold_color = "#4173B4",
   title = NULL,
   jaccard_threshold = 0,
   msig_subset = NULL,
@@ -48,15 +50,42 @@ Odds Ratio required for a gene set to be included in the plot. Default is
 1.}
 
 \item{pval_threshold}{(only if method == "odds_ratio" only) Numeric. Maximum
-adjusted p-value to show a label. Default is 0.05.}
+adjusted p-value required for a gene set to be included in the plot.
+Default is 0.05.}
 
-\item{limits}{Numeric vector of length 2. Limits for color scale.}
+\item{limits}{Numeric vector of length 2. Limits for color scale. If \code{NULL},
+is automatically set to c(0,1) for Jaccard or the range of OR for odds
+ratio.}
 
 \item{title_size}{Integer specifying the font size for the plot title.
 Default is \code{12}.}
 
-\item{color_values}{Character vector of colors used for the fill gradient.
-Default is \code{c("#F9F4AE", "#B44141")}.}
+\item{color}{Character. The color for the maximum of the scale. Default is
+\code{red.}
+\itemize{
+\item If \code{method = "jaccard"}, the scale goes from \code{neutral_color} to \code{color}.
+\item If \code{method = "odds_ratio"} and any OR >= 1, the scale ends at \code{color}.
+\item If \code{method = "odds_ratio"} and all OR <= 1, \code{color} is not used; instead, the scale
+runs from \code{cold_color} (minimum) to \code{neutral_color} (OR = 1, if present;
+otherwise \code{neutral_color} is the maximum).
+}}
+
+\item{neutral_color}{Character. The neutral reference color. Default is
+\code{white}.
+\itemize{
+\item If \code{method = "jaccard"}, this is the minimum of the scale.
+\item If \code{method = "odds_ratio"} and any OR >= 1, this corresponds to OR = 1 if such values exist; otherwise it is the minimum of the scale.
+\item If \code{method = "odds_ratio"} and all OR <= 1, this corresponds to OR = 1 if such values exist; otherwise it is the maximum of the scale (with \code{cold_color} as the minimum).
+}}
+
+\item{cold_color}{Character. The color for values below OR = 1 (only used
+when \code{method = "odds_ratio"}). Default is \code{blue}.
+\itemize{
+\item If \code{method = "odds_ratio"} and any OR < 1, the scale runs from \code{cold_color}
+(minimum) to \code{neutral_color} (OR = 1 if present; otherwise \code{neutral_color}
+is the maximum).
+\item Ignored if \code{method = "jaccard"} or if all OR >= 1.
+}}
 
 \item{title}{Optional. Custom title for the plot. If \code{NULL}, the title
 defaults to \code{"Signature Overlap"}.}
@@ -81,7 +110,7 @@ Invisibly returns a list containing:
 \describe{
 \item{\code{plot}}{The \pkg{ggplot2} object of the similarity heatmap.}
 \item{\code{data}}{The data frame object containing the similarity
-scores aper pair of gene sets.}
+scores per pair of gene sets.}
 }
 }
 \description{
diff --git a/man/genesets_example.Rd b/man/genesets_example.Rd
index a63a58d..274f199 100644
--- a/man/genesets_example.Rd
+++ b/man/genesets_example.Rd
@@ -11,13 +11,13 @@ A named list of length 3:
 curated gene set of commonly reported senescence markers,
 with directionality (+1 or -1).}
 \item{REACTOME_Senescence}{Character vector of gene symbols. The
-REACTOME_CELLULAR_SENESCENCE from MSigDB pathway. No directionality.}
+REACTOME_CELLULAR_SENESCENCE from MSigDB database No directionality.}
 \item{HernandezSegura}{A data frame with columns \code{gene} and \code{direction}.
 A gene set from Hernandez-Segura et al. (2017), with directionality (+1 or -1).}
 }
 }
 \usage{
-genesets_example
+data(genesets_example)
 }
 \description{
 Example Gene Sets for Cellular Senescence
diff --git a/man/metadata_example.Rd b/man/metadata_example.Rd
index 05b3ca1..f30d8d2 100644
--- a/man/metadata_example.Rd
+++ b/man/metadata_example.Rd
@@ -21,7 +21,7 @@ for senescent samples, "young" for proliferative).}
 \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE63577}
 }
 \usage{
-metadata_example
+data(metadata_example)
 }
 \description{
 A data frame containing metadata for samples from the Marthandan et al.
diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd
index c1df8ef..a73336a 100644
--- a/man/runGSEA.Rd
+++ b/man/runGSEA.Rd
@@ -9,7 +9,8 @@ runGSEA(
   gene_sets,
   stat = NULL,
   ContrastCorrection = FALSE,
-  nPermSimple = 10000
+  nPermSimple = 10000,
+  p.adjust.method = "BH"
 )
 }
 \arguments{
@@ -48,6 +49,12 @@ the function only corrects for the number of gene sets.}
 
 \item{nPermSimple}{Number of permutations in the simple fgsea implementation
 for preliminary estimation of P-values. Parameter from fgsea.}
+
+\item{p.adjust.method}{Character string specifying the method to use for
+multiple testing correction. Must be one of \code{"BH"} (Benjamini-Hochberg,
+default), \code{"holm"}, \code{"hommel"}, \code{"bonferroni"},
+\code{"BY"} (Benjamini-Yekutieli), \code{"fdr"}, or \code{"none"}.
+Passed to \code{\link[stats]{p.adjust}}.}
 }
 \value{
 A named list where each element corresponds to a contrast. Each
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..a2f7b7b
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,151 @@
+# markeR Python Bridge
+
+This workspace provides two simple Python helpers for using the
+Bioconductor R package **markeR** via `rpy2`.
+
+- `markeR_to_python.py` provides a minimal example workflow 
+  demonstrating how to load the example data and call a specific 
+  function, serving as a template for users who wish to structure 
+  their own analysis script in Python.
+- `run_marker_function.py` is a generic command‑line wrapper that can
+  invoke *any* function exported by the markeR package.
+
+## Prerequisites
+
+* R (>=4.5) installed and on your `PATH`.
+* A Python virtual environment.  A `requirements.txt` file is provided in
+  this folder listing the needed packages (`rpy2`, `pandas`, `numpy` plus
+  optional `ipython`/`jupyter` for notebook usage).
+  To set up the environment:
+
+  ```bash
+  python -m venv .venv
+  source .venv/bin/activate
+  pip install -r requirements.txt
+  ```
+
+  After activation you can run the helper scripts using `python` from the
+  same environment.
+
+## Quick start
+
+### 1. Run the tutorial workflow
+
+```bash
+python markeR_to_python.py --tutorial --output tutorial.png
+open tutorial.png
+```
+
+This executes an example from the markeR vignette; consult the
+original R tutorial (link below) for a step‑by‑step description of each
+analysis step.
+
+### 2. Call any markeR function
+
+```bash
+python run_marker_function.py PlotScores \
+  --data counts_example \
+  --metadata metadata_example \
+  --gene_sets genesets_example \
+  --Variable "Condition" \
+  --method logmedian \
+  --nrow 1 \
+  --output my_plot.png
+```
+
+All parameters are passed as `--name value` pairs.  Use `--help-function` or
+refer to the online reference manual for argument names.  The two scripts
+are described briefly below.
+
+---
+
+## Tutorial script (`markeR_to_python.py`)
+
+A minimal Python example illustrating how to call markeR from within a Python 
+environment. It loads the example datasets, runs a selected function, and 
+generates a corresponding plot. The script is intended as a template for users 
+who wish to structure and extend their own analysis workflows in Python.
+
+```bash
+python markeR_to_python.py --tutorial [--output file.png]
+```
+
+---
+
+## Flexible CLI caller (`run_marker_function.py`)
+
+This wrapper constructs and executes R code on the fly, so you can run any
+markeR function without _writing_ R.  It handles type conversion, data
+loading and optional PNG output.
+
+Basic syntax:
+
+```bash
+python run_marker_function.py FUNCTION_NAME [OPTIONS]
+```
+
+Useful options:
+
+* `--param value` – named argument for the R function
+* `--output filename.png` – capture plot output
+* `--width` / `--height` – PNG dimensions (pixels; default 800×600)
+* `--help-function` – show documentation link/usage hint
+* `--verbose` – display generated R code prior to execution
+
+Example (scores only, no plot):
+
+```bash
+python run_marker_function.py CalculateScores \
+  --data counts_example \
+  --metadata metadata_example \
+  --gene_sets genesets_example \
+  --method logmedian
+```
+
+Example saving a plot to PNG (with custom dimensions):
+
+```bash
+python run_marker_function.py PlotScores \
+  --data counts_example \
+  --metadata metadata_example \
+  --gene_sets genesets_example \
+  --Variable "Condition" \
+  --method logmedian \
+  --nrow 1 \
+  --width 800 \
+  --height 400 \
+  --output my_plot.png
+```
+---
+
+## Built‑in example data
+
+`counts_example`, `metadata_example` and `genesets_example` are loaded
+automatically and mirror the objects used in the tutorial.  They let you
+try commands without supplying your own datasets.
+
+---
+
+## Tips & troubleshooting
+
+* Add `--verbose` to see the exact R code being run – handy when a
+  parameter doesn’t behave as expected.
+* If `--output` fails, check write permissions and ensure the directory
+  exists.
+* Installation problems usually indicate R is missing; make sure `R` is
+  on your path before installing Python dependencies.
+
+---
+
+## References
+
+* [markeR on Bioconductor](https://bioconductor.org/packages/markeR)
+* [Official R tutorial](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_BenchmarkingMode.html)
+* [Reference manual](https://diseasetranscriptomicslab.github.io/markeR/reference/)
+* Paper: https://www.biorxiv.org/content/10.64898/2025.12.05.692517
+
+---
+
+*Notes:* scripts install markeR via BiocManager if it’s not already present.
+Warnings from `ggplot2` (e.g. about `aes_string()`) are harmless and come
+from the package itself.
diff --git a/python/markeR_to_python.py b/python/markeR_to_python.py
new file mode 100644
index 0000000..a2e3b09
--- /dev/null
+++ b/python/markeR_to_python.py
@@ -0,0 +1,321 @@
+"""Simple workflow for using the Bioconductor package `markeR` from Python via rpy2.
+
+source .venv/bin/activate
+pip install -r requirements.txt
+python markeR_to_python.py --tutorial   # now should execute successfully
+
+This script shows how to:
+
+1. Configure rpy2 and ensure an R environment is available.
+2. Install Bioconductor and markeR if not already installed.
+3. Load R functions into Python.
+4. Demonstrate a markeR analysis using example data.
+
+Notes:
+- You must have R (>=4.5) installed on your system.
+- Install the Python package `rpy2` in the same environment where this script runs:
+    pip install rpy2
+
+For more details on markeR see https://bioconductor.org/packages/markeR
+
+This module also includes utilities (`plot_r_expression`, `plot_r_function`)
+that open an R graphics device and capture plots as PNG files.  When run inside a
+Jupyter notebook the plots are automatically displayed inline; otherwise the
+images are saved to a temporary file whose path is printed.
+"""
+
+from __future__ import annotations
+
+import sys
+
+# check that required Python libraries are installed before proceeding
+_missing = []
+try:
+    import numpy  # used by rpy2 and examples
+except ImportError:  # pragma: no cover - dependency check
+    _missing.append("numpy")
+try:
+    import pandas  # examples use it for conversions
+except ImportError:  # pragma: no cover
+    _missing.append("pandas")
+try:
+    import rpy2  # primary bridge to R
+except ImportError:  # pragma: no cover
+    _missing.append("rpy2")
+
+if _missing:
+    sys.exit(
+        "The following Python packages are required but not installed: %s.\n"
+        "Please install them (e.g. `pip install -r requirements.txt`)." %
+        ", ".join(_missing)
+    )
+
+# rpy2 imports
+import rpy2.robjects as ro
+from rpy2.robjects import pandas2ri
+from rpy2.robjects import conversion
+from rpy2.robjects.packages import importr, isinstalled
+
+# note: pandas2ri.activate() is deprecated; we use conversion contexts when
+# converting.  helpers below wrap the recommended API.
+
+
+def _to_py(obj):
+    """Convert an R object to a pandas/numpy equivalent."""
+    with conversion.localconverter(ro.default_converter + pandas2ri.converter):
+        return conversion.rpy2py(obj)
+
+# utilities for inline plotting (e.g. in Jupyter notebooks)
+import os
+import tempfile
+try:
+    from IPython.display import Image, display
+    _HAS_IPYTHON = True
+except ImportError:  # not running in notebook
+    _HAS_IPYTHON = False
+
+
+def _r_open_png(width=800, height=600, filename=None):
+    """Start an R PNG device, returning the filename used."""
+    if filename is None:
+        filename = tempfile.mktemp(suffix=".png")
+    else:
+        # Convert to absolute path to ensure R saves to the intended location
+        filename = os.path.abspath(filename)
+    # ensure the path exists
+    dirname = os.path.dirname(filename)
+    if dirname and not os.path.isdir(dirname):
+        os.makedirs(dirname, exist_ok=True)
+    ro.r(f'png("{filename}", width={width}, height={height})')
+    return filename
+
+
+def _r_close_device():
+    """Close the active R graphics device."""
+    ro.r('dev.off()')
+
+
+def plot_r_expression(expr: str, width=800, height=600, filename=None, display_plot=True):
+    """Evaluate an R expression that produces a plot and optionally display it.
+
+    Parameters
+    ----------
+    expr : str
+        R code string that generates a plot when evaluated.
+    width, height : int
+        Dimensions for the PNG device in pixels.
+    filename : str or None
+        Path to which the image should be saved. If None a temporary file
+        will be created.
+    display_plot : bool
+        If True and running under IPython, display the resulting PNG
+        inline. Otherwise the path is printed.
+
+    Returns
+    -------
+    str
+        The path to the saved PNG file.
+    """
+    fname = _r_open_png(width=width, height=height, filename=filename)
+    ro.r(expr)
+    _r_close_device()
+    if display_plot and _HAS_IPYTHON:
+        display(Image(filename=fname))
+    else:
+        print(f"plot written to {fname}")
+    return fname
+
+
+def plot_r_function(func_name: str, *args, width=800, height=600, filename=None,
+                    display_plot=True, **kwargs):
+    """Call an R plotting function by name and save/display result.
+
+    Any positional and keyword arguments are converted to their R
+    equivalents by rpy2.
+
+    Example::
+
+        plot_r_function('PlotScores', data=counts_example, metadata=metadata_example,
+                        gene_sets=genesets_example)
+    """
+    fname = _r_open_png(width=width, height=height, filename=filename)
+    rfunc = ro.r[func_name]
+    # convert kwargs into ro objects (rpy2 handles this automatically)
+    rfunc(*args, **kwargs)
+    _r_close_device()
+    
+    # Try to display if requested; if in actual IPython/Jupyter context, display inline;
+    # otherwise just print the path.
+    if display_plot and _HAS_IPYTHON:
+        try:
+            # Check if we're actually in an interactive IPython shell (not just that it's installed)
+            from IPython import get_ipython
+            ipython = get_ipython()
+            if ipython is not None:
+                display(Image(filename=fname))
+            else:
+                print(f"plot written to {fname}")
+        except Exception:
+            # If anything goes wrong (file not ready, not in IPython context, etc.)
+            print(f"plot written to {fname}")
+    else:
+        print(f"plot written to {fname}")
+    return fname
+
+
+# ---------------------------------------------------------------------------
+# Utility functions
+# ---------------------------------------------------------------------------
+
+def ensure_bioc_installed() -> None:
+    """Install Bioconductor's package manager if it is not already present."""
+    biocinstaller = "BiocManager"
+    if not isinstalled(biocinstaller):
+        ro.r('install.packages("{0}")'.format(biocinstaller))
+    ro.r('suppressMessages(require({0}))'.format(biocinstaller))
+
+
+def install_markeR() -> None:
+    """Install the markeR package from Bioconductor if not already installed.
+
+    Uses BiocManager to perform the installation.  After running this function
+    the package should be loadable via `importr("markeR")`.
+    """
+    ensure_bioc_installed()
+    if not isinstalled("markeR"):
+        ro.r('BiocManager::install("markeR", ask=FALSE, update=FALSE)')
+    ro.r('library(markeR)')
+
+
+def get_markeR_functions() -> ro.Environment:
+    """Return the markeR namespace so that functions can be accessed conveniently.
+
+    Example:
+        mark = get_markeR_functions()
+        scores = mark.CalculateScores(data=counts, metadata=metadata, gene_sets=genesets, method="logmedian")
+    """
+    install_markeR()
+    # importing via importr is more reliable than accessing `ro.r['markeR']`.
+    try:
+        return importr("markeR")
+    except Exception as e:
+        raise RuntimeError("Unable to load markeR package: %s" % e)
+
+
+
+
+
+# ---------------------------------------------------------------------------
+# Tutorial helpers using markeR example data
+# ---------------------------------------------------------------------------
+
+def load_benchmark_examples():
+    """Load the built-in example data and gene sets from the markeR package.
+
+    Returns a tuple `(counts, metadata, genesets)` where each element is an R
+    object.  You can convert them to pandas objects if desired.
+    """
+    # ensure package is installed and loaded
+    install_markeR()
+    # load the three example datasets provided by the vignette
+    ro.r('data("genesets_example", package="markeR")')
+    ro.r('data("counts_example", package="markeR")')
+    ro.r('data("metadata_example", package="markeR")')
+    genesets = ro.r('genesets_example')
+    counts = ro.r('counts_example')
+    metadata = ro.r('metadata_example')
+    return counts, metadata, genesets
+
+
+def tutorial_benchmark(output_file=None):
+    """Demonstrate a small benchmarking mode example from the markeR vignette.
+    
+    Parameters
+    ----------
+    output_file : str or None
+        If provided, saves the display output to a file using the R graphics device.
+        Note: The markeR::PlotScores function outputs to the active graphics device.
+    """
+    print("-- loading example data from markeR")
+    counts, metadata, genesets = load_benchmark_examples()
+
+    # show dimensions of the data
+    print("counts matrix dimensions:", ro.r('dim')(counts))
+    print("metadata dimensions:", ro.r('dim')(metadata))
+    print("available gene sets:", list(genesets.names))
+
+    # run CalculateScores (logmedian method) as in the tutorial
+    calculate = ro.r['CalculateScores']
+    print("-- calculating scores using logmedian method")
+    df_scores = calculate(data=counts,
+                          metadata=metadata,
+                          method="logmedian",
+                          gene_sets=genesets)
+
+    # df_scores is an R list with one element per gene set; convert first one
+    # to pandas for display
+    first_name = list(df_scores.names)[0]
+    r_first = df_scores.rx2(first_name)
+    try:
+        import pandas as pd
+        pd_first = _to_py(r_first)
+        print(f"first gene set ({first_name}) scores (head):\n", pd_first.head())
+    except ImportError:
+        print("pandas not available; skipping conversion of results to DataFrame")
+
+    # Generate plot using PlotScores
+    # Note: markeR's PlotScores function creates an interactive plot or writes to the current device
+    print("-- generating a simple score plot")
+    
+    if output_file:
+        # Set up PNG device
+        output_file = os.path.abspath(output_file)
+        dirname = os.path.dirname(output_file)
+        if dirname and not os.path.isdir(dirname):
+            os.makedirs(dirname, exist_ok=True)
+        ro.r(f'png("{output_file}", width=800, height=400)')
+        print(f"   saving to: {output_file}")
+    
+    # Call the plotting function and force evaluation of the returned plot
+    ro.r('''
+    p <- PlotScores(
+        data = counts_example,
+        metadata = metadata_example,
+        gene_sets = genesets_example,
+        Variable = "Condition",
+        method = "logmedian",
+        nrow=1
+    )
+    print(p)
+    ''')
+    
+    if output_file:
+        # Close device
+        ro.r('dev.off()')
+        print(f"plot saved to {output_file}")
+
+# ---------------------------------------------------------------------------
+# Command-line interface
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    output_file = None
+    
+    # Parse command line arguments
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "--tutorial":
+            # Check if --output flag is present
+            if len(sys.argv) > 2 and sys.argv[2].startswith("--output"):
+                if sys.argv[2] == "--output" and len(sys.argv) > 3:
+                    output_file = sys.argv[3]
+                elif "=" in sys.argv[2]:
+                    output_file = sys.argv[2].split("=", 1)[1]
+            tutorial_benchmark(output_file=output_file)
+        else:
+            print("usage: python markeR_to_python.py --tutorial [--output FILENAME]")
+            print("  --tutorial              : load markeR example data and compute logmedian scores")
+            print("  --output FILENAME       : save plot to specified PNG file (optional)")
+            print("                            example: python markeR_to_python.py --tutorial --output my_plot.png")
+    else:
+        print("usage: python markeR_to_python.py --tutorial [--output FILENAME]")
+        print("See the module docstring for more details.")
\ No newline at end of file
diff --git a/python/requirements.txt b/python/requirements.txt
new file mode 100644
index 0000000..fdb0e44
--- /dev/null
+++ b/python/requirements.txt
@@ -0,0 +1,9 @@
+# install packages for markeR_to_python script:
+#   pip install -r requirements.txt
+
+rpy2>=3.6
+pandas
+numpy
+# optional (for notebook inline display)
+ipython
+jupyter
diff --git a/python/run_marker_function.py b/python/run_marker_function.py
new file mode 100644
index 0000000..5ca3337
--- /dev/null
+++ b/python/run_marker_function.py
@@ -0,0 +1,366 @@
+"""Flexible wrapper to call any markeR R function from Python.
+
+This script allows you to call markeR functions directly without writing Python code.
+
+Usage examples:
+    python run_marker_function.py PlotScores --help-function
+    python run_marker_function.py CalculateScores \\
+        --data counts_example --metadata metadata_example \\
+        --gene_sets genesets_example --method logmedian --verbose
+    
+    python run_marker_function.py PlotScores \\
+        --data counts_example --metadata metadata_example \\
+        --gene_sets genesets_example --Variable "Condition" \\
+        --method logmedian --nrow 1 --output plot.png
+
+Options:
+    --help-function      Show R documentation for the function
+    --verbose            Print the generated R code before executing
+    --output FILE        Save plot to PNG file
+
+For built-in example data, use the names: counts_example, metadata_example, genesets_example
+"""
+
+import sys
+import argparse
+import json
+import os
+import re
+
+# Check dependencies
+_missing = []
+try:
+    import rpy2.robjects as ro
+    from rpy2.robjects.packages import importr, isinstalled
+except ImportError:
+    _missing.append("rpy2")
+
+if _missing:
+    sys.exit(
+        "The following Python packages are required but not installed: %s.\n"
+        "Please install them (e.g. `pip install rpy2`)." % ", ".join(_missing)
+    )
+
+def ensure_bioc_installed() -> None:
+    """Install Bioconductor's package manager if it is not already present."""
+    biocinstaller = "BiocManager"
+    if not isinstalled(biocinstaller):
+        ro.r('install.packages("{0}")'.format(biocinstaller))
+    ro.r('suppressMessages(require({0}))'.format(biocinstaller))
+
+
+def install_markeR() -> None:
+    """Install the markeR package from Bioconductor if not already installed."""
+    ensure_bioc_installed()
+    if not isinstalled("markeR"):
+        ro.r('BiocManager::install("markeR", ask=FALSE, update=FALSE)')
+    ro.r('library(markeR)')
+
+
+def load_example_data():
+    """Load built-in markeR example datasets into R namespace."""
+    install_markeR()
+    # Load the example datasets
+    ro.r('data("genesets_example", package="markeR")')
+    ro.r('data("counts_example", package="markeR")')
+    ro.r('data("metadata_example", package="markeR")')
+    print("Loaded markeR example datasets: counts_example, metadata_example, genesets_example")
+
+
+def parse_parameter(value: str):
+    """
+    Parse a parameter value intelligently.
+    - Numbers become numeric
+    - "true"/"false" become logical
+    - "null" becomes NULL
+    - R object names (e.g., counts_example) are kept as-is
+    - JSON objects/arrays become R equivalents
+    - Strings are kept as strings
+    """
+    value_lower = value.lower()
+    
+    # Handle boolean
+    if value_lower == "true":
+        return "TRUE"
+    if value_lower == "false":
+        return "FALSE"
+    if value_lower == "null":
+        return "NULL"
+    
+    # Handle numbers
+    try:
+        if "." in value:
+            float(value)
+            return value
+        else:
+            int(value)
+            return value
+    except ValueError:
+        pass
+    
+    # Check if it's a known R object name (example data)
+    known_objects = ["counts_example", "metadata_example", "genesets_example"]
+    if value in known_objects:
+        return value
+    
+    # Try JSON parsing for objects/arrays
+    try:
+        json.loads(value)
+        # If it parses as JSON, return as-is (user can provide lists as JSON)
+        return value
+    except (json.JSONDecodeError, ValueError):
+        pass
+    
+    # Default: treat as string, escaping any internal quotes
+    value = value.replace('"', '\\"')
+    return f'"{value}"'
+
+
+def build_r_call(function_name: str, params: dict, output_file: str = None, width: int = 800, height: int = 600) -> str:
+    """
+    Build an R function call string from parameters.
+    
+    Parameters
+    ----------
+    function_name : str
+        Name of the R function to call
+    params : dict
+        Dictionary of parameter names and values
+    output_file : str
+        If provided, set up PNG device before the call and close after
+    width : int
+        PNG width in pixels (default: 800)
+    height : int
+        PNG height in pixels (default: 600)
+        
+    Returns
+    -------
+    str
+        Complete R code to execute
+    """
+    # Remove output_file from params if present
+    params = {k: v for k, v in params.items() if k != "output_file"}
+    
+    # Build parameter list
+    param_strings = []
+    for key, value in params.items():
+        parsed_value = parse_parameter(value)
+        param_strings.append(f"{key} = {parsed_value}")
+    
+    param_str = ", ".join(param_strings)
+    
+    # Build R code
+    r_code = ""
+    
+    if output_file:
+        output_file = os.path.abspath(output_file)
+        dirname = os.path.dirname(output_file)
+        if dirname and not os.path.isdir(dirname):
+            os.makedirs(dirname, exist_ok=True)
+        r_code += f'png("{output_file}", width={width}, height={height})\n'
+        r_code += f"result <- {function_name}({param_str})\n"
+        r_code += "tryCatch(print(result), error=function(e) { invisible(NULL) })\n"
+        r_code += "dev.off()\n"
+    else:
+        # For screen output, try to print the result
+        r_code += f"result <- {function_name}({param_str})\n"
+        r_code += "tryCatch(print(result), error=function(e) { cat('Function executed.\\n') })\n"
+    
+    return r_code
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Call any markeR R function from Python",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python run_marker_function.py CalculateScores \\
+    --data counts_example --metadata metadata_example \\
+    --gene_sets genesets_example --method logmedian
+    
+  python run_marker_function.py PlotScores \\
+    --data counts_example --metadata metadata_example \\
+    --gene_sets genesets_example --Variable "Condition" \\
+    --method logmedian --output my_plot.png
+
+Built-in example data: counts_example, metadata_example, genesets_example
+        """
+    )
+    
+    parser.add_argument(
+        "function_name",
+        help="Name of the markeR function to call (e.g., CalculateScores, PlotScores)"
+    )
+    
+    parser.add_argument(
+        "--help-function",
+        action="store_true",
+        help="Show help for the R function (instead of calling it)"
+    )
+    
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print the generated R code before executing"
+    )
+    
+    parser.add_argument(
+        "--output",
+        help="Save plot output to a PNG file"
+    )
+    
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=800,
+        help="PNG width in pixels (default: 800)"
+    )
+    
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=600,
+        help="PNG height in pixels (default: 600)"
+    )
+    
+    # Allow arbitrary parameters
+    parser.add_argument(
+        "params",
+        nargs="*",
+        help="Parameters as --name value pairs (e.g., --data counts_example --method logmedian)"
+    )
+    
+    # Handle --help for specific functions
+    if len(sys.argv) > 1 and sys.argv[1] not in ["--help", "-h"]:
+        if "--help-function" in sys.argv:
+            func_name = sys.argv[1]
+            print(f"\n{'='*70}")
+            print(f"Help for markeR::{func_name}")
+            print(f"{'='*70}\n")
+            install_markeR()
+            # Try to display help
+            try:
+                # Get function signature and description
+                ro.r(f'''
+library(markeR)
+cat("Function: {func_name}\\n\\n")
+# Try to get help
+tryCatch({{
+  help_file <- help("{func_name}", package="markeR")
+  # Get description from help
+}}, error = function(e) {{
+  cat("Help available at: https://diseasetranscriptomicslab.github.io/markeR/reference/{func_name}.html\\n")
+}})
+''')
+            except Exception as e:
+                pass
+            
+            print(f"\nDocumentation:")
+            print(f"  https://diseasetranscriptomicslab.github.io/markeR/reference/{func_name}.html")
+            print(f"\nTo use this function:")
+            print(f"  python run_marker_function.py {func_name} --param1 value1 --param2 value2 [--output output.png]")
+            print(f"\nTip: Use --verbose flag to see generated R code")
+            print(f"  python run_marker_function.py {func_name} --verbose --param1 value1 ...\n")
+            print(f"{'='*70}\n")
+            return
+    
+    # Parse args
+    if len(sys.argv) < 2:
+        parser.print_help()
+        return
+    
+    func_name = sys.argv[1]
+    
+    # Validate function name to prevent code injection
+    if not re.match(r'^[A-Za-z][A-Za-z0-9_.]*$', func_name):
+        sys.exit(f"Error: invalid function name '{func_name}'. "
+                 "Function names must start with a letter and contain only letters, digits, dots or underscores.")
+    
+    output_file = None
+    width = 800
+    height = 600
+    
+    # Parse remaining arguments as key-value pairs
+    params = {}
+    i = 2
+    while i < len(sys.argv):
+        arg = sys.argv[i]
+        if arg == "--output" and i + 1 < len(sys.argv):
+            output_file = sys.argv[i + 1]
+            i += 2
+        elif arg == "--width" and i + 1 < len(sys.argv):
+            try:
+                width = int(sys.argv[i + 1])
+            except ValueError:
+                print(f"Error: --width must be a number, got '{sys.argv[i + 1]}'")
+                sys.exit(1)
+            i += 2
+        elif arg == "--height" and i + 1 < len(sys.argv):
+            try:
+                height = int(sys.argv[i + 1])
+            except ValueError:
+                print(f"Error: --height must be a number, got '{sys.argv[i + 1]}'")
+                sys.exit(1)
+            i += 2
+        elif arg in ["--verbose", "--help-function"]:
+            # Skip flags that are not parameters
+            i += 1
+        elif arg.startswith("--"):
+            key = arg[2:]  # Remove --
+            if i + 1 < len(sys.argv) and not sys.argv[i + 1].startswith("--"):
+                value = sys.argv[i + 1]
+                params[key] = value
+                i += 2
+            else:
+                # Boolean flag
+                params[key] = "TRUE"
+                i += 1
+        else:
+            i += 1
+    
+    # Ensure markeR is installed
+    print("Installing markeR if needed...")
+    install_markeR()
+    load_example_data()
+    
+    # Build and execute the R call
+    print(f"\nCalling {func_name} with parameters:")
+    for key, value in params.items():
+        print(f"  {key} = {value}")
+    
+    if output_file:
+        print(f"  Saving plot to: {output_file}")
+        print(f"  PNG dimensions: {width}x{height} pixels")
+    
+    r_code = build_r_call(func_name, params, output_file, width, height)
+    
+    # Show R code if verbose mode
+    if "--verbose" in sys.argv:
+        print(f"\n{'='*70}")
+        print("Generated R code:")
+        print(f"{'='*70}")
+        print(r_code)
+        print(f"{'='*70}\n")
+    
+    print(f"Executing R code...\n")
+    print("=" * 60)
+    
+    try:
+        ro.r(r_code)
+        print("=" * 60)
+        if output_file:
+            print(f"\n✓ Plot saved to: {output_file}")
+        else:
+            print(f"\n✓ Function executed successfully")
+    except Exception as e:
+        print("=" * 60)
+        print(f"\n✗ Error executing function: {e}")
+        if "--verbose" not in sys.argv:
+            print("\nTip: Use --verbose flag to see the generated R code")
+            print(f"  python run_marker_function.py {func_name} --verbose [other options]")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf
new file mode 100644
index 0000000..49c490f
Binary files /dev/null and b/tests/testthat/Rplots.pdf differ
diff --git a/tests/testthat/test-calculateDE.R b/tests/testthat/test-calculateDE.R
index 00b45f5..b212192 100644
--- a/tests/testthat/test-calculateDE.R
+++ b/tests/testthat/test-calculateDE.R
@@ -62,7 +62,7 @@ test_that("calculateDE returns all coefficients when no contrast is given", {
   expect_true("A" %in% names(res))
   expect_true("B" %in% names(res))
 })
-
+ 
 test_that("calculateDE errors with mismatched design matrix", {
   set.seed(1004)
   expr <- matrix(rpois(100, lambda = 20), nrow = 10, ncol = 10)
@@ -74,6 +74,7 @@ test_that("calculateDE errors with mismatched design matrix", {
     stringsAsFactors = FALSE
   )
   design_bad <- matrix(1, nrow = 9, ncol = 2)
+  
   expect_error(
     calculateDE(
       data = expr,
@@ -81,10 +82,11 @@ test_that("calculateDE errors with mismatched design matrix", {
       modelmat = design_bad,
       contrasts = "A-B"
     ),
-    "Rows in 'modelmat' must match the number of samples"
+    regexp = "must be a matrix with rows equal to the number of samples"
   )
 })
 
+
 test_that("calculateDE handles NAs with ignore_NAs", {
   set.seed(1005)
   expr <- matrix(rpois(100, lambda = 20), nrow = 10, ncol = 10)
diff --git a/tests/testthat/test-geneset_similarity.R b/tests/testthat/test-geneset_similarity.R
index 62c311b..71739ec 100644
--- a/tests/testthat/test-geneset_similarity.R
+++ b/tests/testthat/test-geneset_similarity.R
@@ -16,39 +16,6 @@ test_that("geneset_similarity returns jaccard 1 for identical sets", {
   expect_equal(sim, 1)
 })
 
-test_that("geneset_similarity pval_threshold filters labels as expected for odds_ratio", {
-  # Fake simple signatures to ensure overlap
-  sig1 <- c("GENE1", "GENE2", "GENE3", "GENE4", "GENE5")
-  sig2 <- c("GENE1", "GENE2", "GENE6", "GENE7", "GENE8")
-  signatures <- list(A = sig1)
-  others <- list(B = sig2)
-  # Large universe to ensure non-perfect overlap
-  universe <- toupper(paste0("GENE", 1:50))
-
-  # Run with a permissive pval_threshold (should label if pval < threshold)
-  res <- geneset_similarity(
-    signatures = signatures,
-    other_user_signatures = others,
-    metric = "odds_ratio",
-    universe = universe,
-    pval_threshold = 1 # allow all p-values
-  )
-  d <- res$data
-  expect_true(any(d$Label != "")) # Some label should be shown
-
-  # Run with a restrictive pval_threshold (should blank out most labels)
-  res2 <- geneset_similarity(
-    signatures = signatures,
-    other_user_signatures = others,
-    metric = "odds_ratio",
-    universe = universe,
-    pval_threshold = 1e-10 # extremely small, almost nothing will label
-  )
-  d2 <- res2$data
-  expect_true(all(d2$Label == "")) # All labels should be blank
-})
-
-
 test_that("geneset_similarity returns expected odds ratio values", {
   # Simple signatures with known overlap
   sig1 <- c("GENE1", "GENE2", "GENE3", "GENE4")
@@ -67,19 +34,19 @@ test_that("geneset_similarity returns expected odds ratio values", {
   cont_tbl <- matrix(c(2, 2, 2, 4), nrow = 2)
   fisher_res <- fisher.test(cont_tbl)
   expected_or <- as.numeric(fisher_res$estimate)
-  expected_log10or <- log10(expected_or)
-
+ 
   # Run the function
   res <- geneset_similarity(
     signatures = signatures,
     other_user_signatures = others,
     metric = "odds_ratio",
-    universe = universe
+    universe = universe,
+    pval_threshold=1
   )
   d <- res$data
 
   # Check the actual odds ratio (on log10 scale) is close to expected
-  expect_equal(d$Score, expected_log10or, tolerance = 1e-6)
+  expect_equal(d$Score, expected_or, tolerance = 1e-6)
 })
 
 test_that("geneset_similarity returns expected Jaccard index values with H collection", {
@@ -104,7 +71,8 @@ test_that("geneset_similarity returns expected Jaccard index values with H colle
     signatures = signatures,
     metric = "jaccard",
     collection = "H",
-    msig_subset = "HALLMARK_MYC_TARGETS_V1"
+    msig_subset = "HALLMARK_MYC_TARGETS_V1",
+    pval_threshold=1
   )
   d <- res$data
   # Find the row for this comparison
diff --git a/tests/testthat/test-plotNESlollipop.R b/tests/testthat/test-plotNESlollipop.R
index 9288ff0..7789f95 100644
--- a/tests/testthat/test-plotNESlollipop.R
+++ b/tests/testthat/test-plotNESlollipop.R
@@ -9,7 +9,7 @@ test_that("plotNESlollipop returns list of ggplot objects for single contrast",
   )
   result <- plotNESlollipop(gsea_mock)
   expect_type(result, "list")
-  expect_true(all(sapply(result, function(x) inherits(x, "gg"))))
+  expect_true(all(vapply(result, function(x) inherits(x, "gg"), logical(1))))
 })
 
 test_that("plotNESlollipop does not fail when all padj > sig_threshold", {
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/articles/.gitignore b/vignettes/articles/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/articles/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/Tutorial_BenchmarkingMode.Rmd b/vignettes/articles/Article_BenchmarkingMode.Rmd
similarity index 77%
rename from vignettes/Tutorial_BenchmarkingMode.Rmd
rename to vignettes/articles/Article_BenchmarkingMode.Rmd
index cd4f8f9..ff87934 100644
--- a/vignettes/Tutorial_BenchmarkingMode.Rmd
+++ b/vignettes/articles/Article_BenchmarkingMode.Rmd
@@ -1,48 +1,35 @@
 ---
 title: "Benchmarking Mode Tutorial"
-output: rmarkdown::html_document
-vignette: >
-  %\VignetteIndexEntry{Benchmarking Mode Tutorial}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
+author: "Rita M. Silva"
+date: "`r Sys.Date()`"
 ---
-  
-```{r setup, include=FALSE}
+
+```{r, include = FALSE}
 knitr::opts_chunk$set(
-  dev = "png",
-  dpi = 72, 
-  fig.retina = 1
+  collapse = TRUE,
+  comment = "#>"
 )
 ```
 
-This vignette provides a comprehensive introduction to the **markeR** package, focusing on its **Benchmarking Mode**. This mode is designed to evaluate the performance of gene signatures in quantifying specific biological states or phenotypes, such as disease states or cellular conditions. It allows users to assess the robustness and reliability of gene signatures across various conditions, providing a standardized framework for benchmarking.
- 
-# Installation 
 
+This vignette provides a comprehensive introduction to the **markeR** package, focusing on its **Benchmarking Mode**. This mode is designed to evaluate gene sets' performance in marking a metadata variable, *i.e.*, a phenotype such as disease or cellular condition, returning comparative visualisations across scoring and enrichment methods. It allows users to assess the robustness and reliability of gene sets across various conditions, providing a standardized framework for benchmarking.
  
-```{r, include = FALSE }
-library(markeR)
-```
-
-``` r
-# install.packages("devtools")
-devtools::install_github("DiseaseTranscriptomicsLab/markeR")
-```
-
 # Case-study: Senescence
 
-We will be using an already pre-processed gene expression dataset, derived from the Marthandan et al. (2016) study (GSE63577), that includes human fibroblast samples cultured under two different conditions: replicative senescence and proliferative control. The dataset has already been filtered and normalized using the `edgeR` package. For more information about the dataset structure, see the help pages for `?counts_example` and `?metadata_example`.
+In this vignette, we use a pre‑processed RNA-seq dataset from Marthandan et al. (2016, GSE63577), with normalised read counts for human fibroblasts under **replicative senescence** and **proliferative control**. See `?counts_example` for preprocessing details and structure. `markeR` requires as input a filtered and normalised, non log-transformed, gene expression matrix (genes × samples). Row names must be gene identifiers; column names must match sample IDs in the metadata. 
+
+We use the accompanying metadata from the Marthandan et al. (2016) study (see `?metadata_example`).
 
-This dataset serves as a working example to demonstrate the main functionalities of the markeR package. In particular, it will be used to showcase the two primary modules designed for benchmarking gene signatures:
+This dataset serves as a working example to demonstrate the main functionalities of the `markeR` package. In particular, it will be used to showcase the two primary modules of `markeR` for quantifying phenotypes using gene sets:
 
--   **Score**: calculates expression-based signature scores for each sample, and
--   **Enrichment**: evaluates the over-representation of gene signatures within ranked gene lists.
+- **Score-based methods**:  log2-median expression, ranking approaches, and single-sample gene set enrichment analysis (ssGSEA) to quantify coordinated expression within a gene set.
+- **Enrichment-based methods**: GSEA using moderated t- or B-statistics.
 
 To illustrate the usage of `markeR`, we use three gene sets commonly associated with cellular senescence:
 
--   **LiteratureMarkers**: A small, curated set of well-established senescence-associated genes repeatedly reported in the literature. This concise gene set includes key markers often used for validating senescence phenotypes. This set includes information on the directionality of gene regulation (i.e., whether genes are typically up- or down-regulated in senescence).
+-   **LiteratureMarkers**: A small, curated set of well-established senescence-associated genes repeatedly reported in the literature. This concise gene set includes key markers often used for validating senescence phenotypes. This set includes information on the expected direction of change in expression of genes upon phenotype (i.e., whether genes are expected to be up- or down-regulated in senescence).
 -   **REACTOME_CELLULAR_SENESCENCE**: A comprehensive gene set from the MSigDB REACTOME collection, representing known molecular pathways involved in cellular senescence and commonly used in enrichment-based analyses. This is also treated as an undirected gene set without information on the up- or down-regulation of individual genes.
--   **HernandezSegura**: A transcriptomic gene set identified by Hernandez-Segura et al. (2017) as consistently altered across multiple senescence models. This set includes information on the directionality of gene regulation. It has shown strong performance in classification and enrichment analyses, including in the original paper of markeR.
+-   **HernandezSegura**: A transcriptomic gene set identified by Hernandez-Segura et al. (2017) as consistently altered across multiple senescence models. This set includes information on the expected direction of change in expression of genes upon phenotype. 
 
 ```{r example}
 library(markeR)
@@ -60,7 +47,12 @@ data(counts_example)
 counts_example[1:5,1:5]
 ```
 
-For illustration purposes of different variable types, let's imagine we also had two additional variables: one indicating the number of days between sample preparation and sequencing (`DaysToSequencing`), and another identifying the person who processed each sample (`researcher`). These variables are hypothetical and not part of the original study design.
+For illustration purposes, two synthetic variables were added to the data:
+
+* `DaysToSequencing`, the number of days between sample preparation and sequencing;
+* `Researcher`, identifying the person who processed each sample.
+
+This enables exploration of associations between gene set activity and both categorical and continuous variables.
  
 
 ```{r load_metadata}
@@ -76,28 +68,40 @@ head(metadata_example)
 
 # Calculate Senescence Scores 
 
-The `CalculateScores` function computes the signature scores for each sample based on predefined gene sets, such as a senescence gene set. It returns a named list where each entry corresponds to a specific gene set and includes the calculated scores, along with metadata (if available). When setting `method = "all"`, the function returns a list, where each element corresponds to a scoring method and contains the respective data frame of scores, allowing comparison between methods. The function allows users to select from three different scoring methods:
+The `CalculateScores` function computes the gene set scores for each sample based on predefined gene sets, such as a senescence gene set. It returns a named list where each entry corresponds to a specific gene set and includes the calculated scores, along with metadata (if available). When setting `method = "all"`, the function returns a list, where each element corresponds to a scoring method and contains the respective data frame of scores, allowing comparison between methods. The function allows users to select from three different scoring methods:
 
--   **ssGSEA**: Computes an enrichment score for each gene set in each sample.
--   **logmedian**: Calculates the score as the sum of the normalized (log2-median-centered) expression values of the genes in the gene set, divided by the number of genes.
--   **ranking**: Determines the score by ranking the expression of the genes in the gene set and normalizing the result.
+* **logmedian**: mean of the across-sample normalised log2 median-centred expression levels of the genes in the set; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
 
-These methods are very similar and, when applied to a robust gene set, will yield similar results across all three methods. Empirically, a good gene set will be one that shows consistent results, both in the calculated scores and in Cohen's d or F statistics, across different methods. If the gene set is not robust, or if there is considerable noise, the results across methods may differ significantly. Consistent scores across methods typically indicate a more reliable and meaningful gene set. These methods are explained in more detail below, allowing the user to select the most appropriate one for their analysis.
+* **Ranking**: mean expression rank of gene set members in each sample; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset, and normalised by the number of genes in the set. 
 
-The `PlotScores` function can be used to compute and visualize the scores in various ways, depending on the method and variable chosen.
+* **ssGSEA**: single-sample gene set enrichment score using ssGSEA; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
 
--   If `method = "all"` and the variable is categorical, it will return a heatmap of Cohen's d or F statistics and a volcano plot showing contrasts between all groups of that variable.
--   If `method = "all"` and the variable is numeric, a heatmap of Cohen's F and a volcano plot will be produced.
--   If `method != "all"` and the variable is categorical, it will generate a violin plot for each gene set.
--   If `method != "all"` and the variable is `NULL`, a density plot of the score distribution will be displayed.
--   If `method != "all"` and the variable is numeric, a scatter plot will be created to show the relationship between the scores and the numeric variable.
+These methods are very similar and, when applied to a robust gene set, are expected to yield similar results across all three methods. Empirically, a good gene set will be one that shows consistent results, both in the calculated scores and in Cohen's d or f statistics, across different methods. If the gene set is not robust, or if there is considerable noise, the results across methods may differ significantly.
+
+The `PlotScores` function computes and visualizes gene set scores according to the chosen method and variable type:
+
+- `method = "all"`  
+
+    - Categorical `Variable`: produces a heatmap of Cohen's *d* and a volcano plot of all pairwise group contrasts.  
+    - Numeric `Variable`: produces a heatmap of Cohen's *F* statistics and a volcano plot of associations.  
+
+- `method != "all"`  
+
+    - Categorical `Variable`: generates violin plots of scores per gene set.  
+    - Numeric `Variable`: generates scatter plots of scores versus the numeric variable.  
+    - `Variable` is `NULL`: displays a density plot of the score distribution.  
+
+This structure clearly links each combination of method and variable type to the resulting visualization, avoiding ambiguity.
 
 
 ## logmedian method
- 
-The following example uses the **`logmedian`** method to calculate a gene signature score. This method first applies a log2 transformation to the expression values, and then centers them by subtracting the median expression (across all samples) for each genes. The score for each sample is then computed by summing the normalised expression values of the genes in the gene set, and dividing by the number of genes in the gene set. This normalization makes each gene’s expression relative to its typical behavior across the dataset, allowing for meaningful comparisons between genes with different expression scales. By using log2 median-centering, the method ensures that both highly and lowly expressed genes contribute comparably to the score, as long as their variances are similar. This normalization emphasizes relative changes in expression rather than absolute values, allowing the score to reflect the coordinated behavior of the genes in a gene set. Users can calculate the gene signature score for each sample based on one or more predefined gene sets (signatures).  
 
-Here’s an example where we calculate the signature score using the "logmedian" method:
+The **logmedian** method computes a gene signature score per sample as follows. First, gene expression values are log2-transformed and each gene is median-centered across all samples. For a given sample, the score is the mean of the median-centered expression values of the genes in the set (De Almeida et al., 2019). Each gene’s contribution is thus evaluated relative to its baseline expression, and the resulting score quantifies the coordinated activity of the gene set.
+
+For bidirectional gene sets, where genes are annotated by expected direction of regulation, the score is calculated as the difference between the mean of the upregulated genes and the mean of the downregulated genes. Users can compute scores for individual samples using one or more predefined gene sets.
+
+The following example demonstrates calculation of a gene signature score using the logmedian method:
+
  
 ```{r}
 df_Scores <- CalculateScores(data = counts_example,
@@ -111,9 +115,11 @@ head(df_Scores$HernandezSegura)
 head(df_Scores$LiteratureMarkers)
 ```
 
-The user can also chose to directly plot the scores. 
+Users can directly visualize gene set scores with the plotting functions.
+
+Effect sizes are computed via the `compute_cohen` parameter (default = `TRUE`). For a categorical metadata variable with two levels, Cohen’s d is calculated; for more than two levels, Cohen’s f is used unless a specific pairwise comparison is specified via `cond_cohend`, in which case Cohen’s d is reported for that comparison.
 
-Effect sizes can be computed using the `compute_cohen` parameter (default = `T`): when the grouping variable has only two levels, Cohen’s d is calculated by default. If there are more than two levels, Cohen’s f is used unless a specific pairwise comparison is defined via `cond_cohend`, in which case Cohen’s d is reported for that comparison. If `pvalcalc = TRUE` (default = `FALSE`), an associated p-value (not corrected for multiple testing) is also reported. The p-value is derived from a two-sample t-test for two-group comparisons or numeric variables, or from an ANOVA for multi-group comparisons.  
+If `pvalcalc = TRUE` (default = `FALSE`), an associated p-value is reported (uncorrected for multiple testing). p-values are derived from a two-sample t-test for two-group or numeric-variable comparisons, and from ANOVA for multi-group comparisons.
 
 ```{r exampleScore, fig.width=8, fig.height=4, out.width="80%", warning=FALSE, message=FALSE}
 
@@ -148,8 +154,7 @@ PlotScores(data = counts_example,
 ```
 
 
-Interestingly, when we provide directionality for a signature—such as the *Literature_Senescence* set—the interpretation of the results can change substantially. For example, without specifying direction, senescent samples may appear to have lower scores than proliferative ones. But once directionality is accounted for, the scores shift in a way that aligns better with biological expectations. Therefore, it is **strongly advised** that, whenever possible (i.e., if known), the user states the putative regulation “sign” of the genes in the gene set This helps ensure more accurate and meaningful interpretations of the data.
-
+Providing gene directionality can substantially affect score interpretation. For example, in the *Literature_Senescence* signature, omitting direction of change in expression of genes upon phenotype may lead to senescent samples appearing to have lower scores than proliferative ones. Incorporating directionality aligns the scores with biological expectations. Therefore, it is **strongly recommended** to specify the expected  direction of gene expression changes in a set whenever this information is available, ensuring more accurate and meaningful interpretation of the results.
 
 ```{r exampleScore_bidirectional, fig.width=6, fig.height=4, out.width="60%", warning=FALSE, message=FALSE}
  
@@ -180,9 +185,11 @@ PlotScores(data = counts_example,
 ```
 
 
-To use the function for numeric variables, the user should specify the relevant parameters, including the numeric variable to be analysed. The function will generate a scatter plot for the numeric variable, optionally calculating Cohen's f as the effect size. The user can choose a correlation method (e.g., Pearson, Spearman, or Kendall) to assess the relationship between the variable and the signature scores. The plot will also include optional p-value calculations for comparisons.
+When analyzing a numeric variable, the function generates a scatter plot of the variable against gene set scores and can optionally compute an effect size using Cohen’s f. The user may select a correlation method (Pearson, Spearman, or Kendall) to quantify the association between the numeric variable and the scores. Optional p-value calculations for the association can also be included.
+
+The following example illustrates how to configure the function for a numeric variable:
+
 
-Here is an example of how to configure the function for numeric variables:
 
 ```{r examplenumeric, fig.width=8, fig.height=3.5, out.width="80%", warning=FALSE, message=FALSE}
 PlotScores(data = counts_example,
@@ -203,8 +210,9 @@ PlotScores(data = counts_example,
            widthTitle = 26,
            cor = "pearson")
 ```
+ 
+To visualize the overall distribution of scores across gene sets, the `PlotScores` function can be used without specifying the `GroupingVariable` parameter, i.e, without grouping scores by any metadata variable.. In this case, it generates a grid of density plots, with each plot representing the score distribution for a specific gene set.
 
-For users interested in viewing the overall distribution of scores across gene signatures, the `PlotScores` function can be used without specifying the `GroupingVariable` parameter, i.e, without grouping scores by any metadata variable. In this case, the function will automatically generate a grid of density plots, with each plot representing the distribution of scores for a specific gene set  
 
 ```{r plotscores_density, fig.width=8, fig.height=3, out.width="80%", warning=FALSE, message=FALSE}
  
@@ -225,7 +233,8 @@ PlotScores(data = counts_example,
 
 ## ssGSEA method
 
-The same approach can be applied for **ssGSEA** (single-sample Gene Set Enrichment Analysis; Barbie et al. (2009)) for score calculation and visualization, both for unidirectional and bidirectional signatures. ssGSEA computes an enrichment score for each gene signature in each sample using an adaptation of the `gsva()` function from the `GSVA` package. This method is useful for evaluating gene set enrichment in individual samples rather than groups, as described in the sections below.
+Single sample Gene Set Enrichment Analysis (**ssGSEA**) was implemented using a modified version of the `GSVA` package’s `gsva()` function (Barbie et al., 2009), based on the original Gene Set Enrichment Analysis (GSEA) method (Subramanian et al., 2005). ssGSEA ranks all genes by their expression within each sample and computes a running-sum statistic over the ranked list. For unidirectional gene sets (i.e., with no information on the expected direction of each gene’s regulation upon phenotype), the ssGSEA sample score reflects overall coordinated expression of the genes in the set. For bidirectional gene sets, the score is calculated as the ssGSEA score computed for the upregulated subset of genes minus the score computed for the downregulated subset.
+
  
 
 ```{r examplessGSEA, fig.width=8, fig.height=4, out.width="80%", warning=FALSE, message=FALSE}
@@ -262,9 +271,9 @@ PlotScores(data = counts_example,
 
 ## Ranking method
 
-The **ranking** method computes gene signature scores for each sample by ranking the expression of signature genes in the dataset and normalizing the score based on the total number of genes. 
+The **ranking** method calculates gene signature scores using a non-parametric approach based on the relative expression of genes within a set. For each sample, all genes are ranked by expression. The score is then calculated as the sum of the ranks of the genes in the gene set, multiplied by +1 (for upregulated genes those with unspecified direction of regulation change upon phenotype) or -1 (downregulated genes), and normalised by the number of genes in the set. 
 
-The following example demonstrates the use of the "ranking" method for both unidirectional and bidirectional signatures:
+The following example demonstrates the use of the "ranking" method for both unidirectional and bidirectional gene sets:
 
 ```{r ranking, fig.width=8, fig.height=4, out.width="80%", warning=FALSE, message=FALSE}
  
@@ -308,6 +317,7 @@ The `mode` parameter controls how contrasts are generated for categorical variab
 -   **"medium"**: Includes comparisons between one group and the union of other groups (e.g., A - (B + C + D); B - (A + C + D)), allowing for broader contrasts beyond simple pairwise comparisons. 
 -   **"extensive"**: Allows for all possible algebraic combinations of group levels (e.g., (A + B) - (C + D)). 
 
+In this example, HernandezSegura and LiteratureMarkers consistently discriminate Senescent from Proliferative samples, while REACTOME_CELLULAR_SENESCENCE shows weaker and less consistent separation.
 
 ```{r Overall_Scores, fig.width=6, fig.height=3, out.width="80%", warning=FALSE, message=FALSE}
 
@@ -389,6 +399,10 @@ AUC_Scores(data = counts_example,
            title="Marthandan et al. 2016") 
 ```
 
+
+ HernandezSegura and LiteratureMarkers exhibit consistently high AUCs across scoring methods, while REACTOME_CELLULAR_SENESCENCE shows more heterogeneous performance.
+
+
 ## False Positive Rate (FPR) Calculations
 
 The user can assess the significance of gene set scores by comparing observed effect sizes against a distribution of those originated by random gene sets with the same number of genes and matched directionality. For each original gene set, the function calculates the observed Cohen's d (and p‑value) using (`GroupingVariable`). It then generates a number of simulated gene sets (`number_of_sims`) by randomly sampling the same number of genes from a user provided gene list (`gene_list`) and computes their Cohen's d values. The simulation results are visualised as violin plots of the distribution of Cohen's d values for each method, overlaid with the observed values of the original gene sets, and a 95th percentile threshold. Significance is indicated by distinct point shapes based on the associated p‑value.
@@ -399,7 +413,7 @@ FPR_Simulation(data = counts_example,
                metadata = metadata_example,
                original_signatures = genesets_example,
                gene_list = row.names(counts_example),
-               number_of_sims = 10,
+               number_of_sims = 100,
                title = "Marthandan et al. 2016",
                widthTitle = 30,
                Variable = "Condition",
@@ -424,7 +438,7 @@ The `calculateDE` function in the `markeR` package leverages the `limma` framewo
 
 Below are the corresponding code snippets demonstrating each scenario, by answering the same question: **What are the genes differentially expressed between senescence and proliferative cells?**
 
-```{r DEGs, out.width="80%", warning=FALSE, message=FALSE}
+```{r DEGs, out.width="80%", warning=FALSE}
 # Example 1: Build design matrix from variables (Condition) and apply a contrast.
 # In this case, the design matrix is constructed automatically using the variable "Condition".
 DEGs <- calculateDE(data = counts_example,
@@ -445,7 +459,7 @@ DEGs2 <- calculateDE(data = counts_example,
 DEGs2$`Senescent-Proliferative`[1:5,]
 ```
 
-After running differential expression analysis (for example, using the `calculateDE` function), the user can visualize their results with the `plotVolcano` function. This function provides a flexible interface for exploring their data by allowing the user to:
+After running differential expression analysis (using the `calculateDE` function), the user can visualize their results with the `plotVolcano` function. This function provides a flexible interface for exploring their data by allowing the user to:
 
 -   **Plot Differential Gene Expression Statistics:**  
     Display a volcano plot with chosen statistics (e.g., log fold-change on the x-axis and –log₁₀ adjusted p-value on the y-axis).
@@ -453,7 +467,7 @@ After running differential expression analysis (for example, using the `calculat
     Highlight genes that pass user-specified thresholds by adjusting `threshold_y` and `threshold_x`.
 -   **Annotate Top and Bottom N Genes:**  
     Optionally, label the top (and bottom) N genes based on the chosen statistic to quickly identify the most significant genes.
--   **Highlight Gene Signatures:** If the user provides a list of gene signatures using the `genes` argument, the function can highlight these genes in the plot. The user can also specify distinct colors for putativelyupregulated and downregulated if their direction is known, or a color for genes that do not have a putative direction.
+-   **Highlight Gene Signatures:** If the user provides a list of gene signatures using the `genes` argument, the function can highlight these genes in the plot. The user can also specify distinct colors for putatively upregulated and downregulated if their direction is known, or a color for genes that do not have a putative direction.
 
 Below is an example usage of `plotVolcano` that visualizes differential expression results from a `DEResultsList`. The first plot shows the default behavior, generating a basic volcano plot without thresholds or gene highlights. Subsequent examples demonstrate how to customize the plot:
 
@@ -461,8 +475,7 @@ Below is an example usage of `plotVolcano` that visualizes differential expressi
 -   Annotating the top and bottom N genes by effect size,
 -   And using gene signatures to color genes across multiple plots arranged by contrast and signature.
 
-These examples illustrate how users can customise the output plot to highlight biologically meaningful patterns or focus on specific gene sets.
-
+These examples illustrate how users can customise the output plot to highlight biologically meaningful patterns or focus on specific gene sets. 
 
 ```{r volcanos_DEGs, fig.width=4, fig.height=3, out.width="40%", warning=FALSE, message=FALSE} 
 
@@ -484,6 +497,12 @@ plotVolcano(DEGs, genes = NULL, N = 5,
 
 ```
 
+In this example:
+
+* Genes in the HernandezSegura set annotated as upregulated (green) display positive log2 fold changes, whereas those annotated as downregulated (red) show negative log2 fold changes. This pattern is consistent with the annotation of the set, although these genes are not necessarily those exhibiting the largest absolute fold changes.
+* In the LiteratureMarkers set, *LMNB1* and *MKI67* exhibit strongly negative log2 fold change, consistent with their roles as proliferation markers absent in senescent cells.
+* Genes from the REACTOME_CELLULAR_SENESCENCE set are undirected and appear across the full range of log2 fold change values, diluting discriminatory power.
+
 ```{r volcanos_DEGs3, fig.width=10, fig.height=3, out.width="90%", warning=FALSE, message=FALSE}
 
 # Change order: signatures in columns, contrast in rows
@@ -639,10 +658,13 @@ Based on these very simple analyses, the REACTOME_CELLULAR_SENESCENCE showed con
 
 These findings highlight important trade-offs: while scoring methods offer per-sample resolution and are less sensitive (in terms of statistical significance) to gene set size, making them useful for classification tasks, they may be overly influenced by a small subset of genes, which could limit biological interpretability. While more robust to sample heterogeneity and better at capturing coordinated expression changes, enrichment-based methods are sensitive to gene set composition and size. Caution is warranted when interpreting results, especially from score-based approaches, as strong signals may not always reflect the intended biological process, but rather a handful of dominant genes. 
 
-## Visualise Individual Gene Behaviour
+
+## Visualise Individual Gene Behaviour.  
 
 As highlighted in the previous section, score-based approaches can be disproportionately influenced by a small subset of genes. To address this, `markeR` includes dedicated functions for exploring individual gene behaviour, enabling users to assess if and which genes may be driving the overall signal. We demonstrate this functionality using the `LiteratureMarkers` gene set. 
  
+`markeR` provides the wrapper function `VisualiseIndividualGenes` for plotting individual genes. In this tutorial, we illustrate each visualization function separately for clarity. However, the same outputs can be generated through the wrapper by setting the `type` argument to the desired visualization strategy (i.e., `"expression"`, `"correlation"`,`"violin"`, `"roc"`, `"auc"`, `"rocauc"`, `"cohend"`, `"pca"`), and the wrapper automatically dispatches to the correct function with the appropriate parameters. 
+ 
 The `ExpressionHeatmap` function generates a heatmap to display the expression levels of selected senescence genes across samples. Samples are annotated by a chosen condition, and expression values are color-scaled for easy visual comparison. Clustering options and customizable color palettes allow for flexible and informative visualization.
 
 ```{r example_exprheatmap, fig.width=8, fig.height=3, out.width="70%", warning=FALSE, message=FALSE}
@@ -668,10 +690,6 @@ ExpressionHeatmap(data=counts_example,
                   scale_position="right")
 ```
 
-To standardize the visualisation of individual genes across multiple analyses, we created a wrapper function called `VisualiseIndividualGenes`. This function consolidates several internal plotting functions, including `ExpressionHeatmap` and other listed below, into a single, user-friendly interface.
-
-The output remains consistent with the individual functions, but users can specify the desired type (i.e., "expression", "correlation","violin", "roc", "auc", "rocauc", "cohend", "pca"), and the wrapper automatically dispatches to the correct function with the appropriate parameters. This design simplifies and unifies the workflow for exploring gene-level patterns across various analysis types. 
-
 ```{r example_exprheatmap2, fig.width=8, fig.height=3, out.width="70%", warning=FALSE, message=FALSE}
 VisualiseIndividualGenes(type="expression",
                   data=counts_example, 
@@ -689,7 +707,6 @@ VisualiseIndividualGenes(type="expression",
 ```
 
 
-
 The `IndividualGenes_Violins` function creates violin plots to visualize the expression distributions of selected senescence genes across conditions. Jittered points represent individual samples, and grouping (x axis, `GroupingVariable`) and color variables (`ColorVariable` and `ColorValues`) from the metadata allow for additional stratification and insight. Customization options include layout, point size, colors, and axis labeling.
 
 ```{r exampleviolins, fig.width=10, fig.height=3, out.width="100%", warning=FALSE, message=FALSE}
@@ -805,7 +822,7 @@ plotPCA(data = counts_example,
 
 ## Comment 
 
-As demonstrated by the behaviour of individual genes in the `LiteratureMarkers` gene set, LMNB1, MKI67, and GLB1 appear to drive the overall signal. These genes consistently show higher performance metrics (e.g., Cohen’s d, AUC), strong expression changes between conditions, and LMNB1 and MKI67 specifically have correlated expression patterns. This underscores the importance of examining gene-level behaviour, as a strong overall signature score may reflect the influence of only a few informative genes, rather than coordinated activity across the entire set. In this case, the strong performance of the `LiteratureMarkers` set in scoring approaches is likely driven by these genes. However, relying heavily on a few markers can be a caveat: for example, MKI67 and LMNB1 (proliferation-related genes) may also change in other biological contexts like quiescence or differentiation, potentially limiting their specificity for senescence. Thus, the choice of gene set and analysis strategy should be guided by the research question, and complemented with both score distributions, enrichment analyses, and individual gene behaviour. Notably, scoring with just these three genes yielded results similar (or, even, slightly better) to the full `LiteratureMarkers` set.
+As demonstrated by the behaviour of individual genes in the `LiteratureMarkers` gene set, *LMNB1*, *MKI67*, and *GLB1* appear to drive the overall signal. These genes consistently show higher performance metrics (e.g., Cohen’s d, AUC), strong expression changes between conditions, and *LMNB1* and *MKI67* specifically have correlated expression patterns. This underscores the importance of examining gene-level behaviour, as a strong overall signature score may reflect the influence of only a few informative genes, rather than coordinated activity across the entire set. In this case, the strong performance of the `LiteratureMarkers` set in scoring approaches is likely driven by these genes. However, relying heavily on a few markers can be a caveat: for example, *MKI67* and *LMNB1* (proliferation-related genes) may also change in other biological contexts like quiescence or differentiation, potentially limiting their specificity for senescence. Thus, the choice of gene set and analysis strategy should be guided by the research question, and complemented with both score distributions, enrichment analyses, and individual gene behaviour. Notably, scoring with just these three genes yielded results similar to the full `LiteratureMarkers` set.
 
 ```{r example_genesdrivingresults, fig.width=6, fig.height=4, out.width="60%", warning=FALSE, message=FALSE}
  
diff --git a/vignettes/Tutorial_DiscoveryMode.Rmd b/vignettes/articles/Article_DiscoveryMode.Rmd
similarity index 55%
rename from vignettes/Tutorial_DiscoveryMode.Rmd
rename to vignettes/articles/Article_DiscoveryMode.Rmd
index e1045f7..c51e45c 100644
--- a/vignettes/Tutorial_DiscoveryMode.Rmd
+++ b/vignettes/articles/Article_DiscoveryMode.Rmd
@@ -1,47 +1,31 @@
 ---
 title: "Discovery Mode Tutorial"
-output: rmarkdown::html_document
-vignette: >
-  %\VignetteIndexEntry{Discovery Mode Tutorial}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
+author: "Rita M. Silva"
+date: "`r Sys.Date()`"
 ---
 
-```{r setup, include=FALSE}
+
+```{r, include = FALSE}
 knitr::opts_chunk$set(
-  dev = "png",
-  dpi = 72, 
-  fig.retina = 1
+  collapse = TRUE,
+  comment = "#>"
 )
 ```
 
-This vignette provides a comprehensive introduction to the **markeR** package, focusing on its **Discovery Mode**. The discovery mode was designed for users who are interested in quantifying a known, robust gene set in a given dataset to explore associations with other phenotypic or clinical variables. This approach is particularly suited to hypothesis generation where the phenotype marked by the gene set is of known biological or clinical relevance.
- 
-# Installation 
-
-The user can install the development version of markeR from [GitHub](https://github.com/) with:
-   
- 
-```{r, include = FALSE }
-library(markeR)
-```
-
-``` r
-# install.packages("devtools")
-devtools::install_github("DiseaseTranscriptomicsLab/markeR")
-```
+This vignette provides a comprehensive introduction to the **`markeR`** package, focusing on its **Discovery Mode**. The discovery mode was designed for users who are interested in examining the relationship between a gene set and one or more metadata variables of interest, being suitable for exploratory or hypothesis-generating analyses.   
 
- 
 # Case-study: Senescence
 
-We will be using an already pre-processed gene expression dataset, derived from the Marthandan et al. (2016) study (GSE63577), that includes human fibroblast samples cultured under two different conditions: replicative senescence and proliferative control. The dataset has already been filtered and normalized using the `edgeR` package. For more information about the dataset structure, see the help pages for `?counts_example` and `?metadata_example`.
+In this vignette, we use a pre‑processed RNA-seq dataset from Marthandan et al. (2016, GSE63577), with normalised read counts for human fibroblasts under **replicative senescence** and **proliferative control**. See `?counts_example` for preprocessing details and structure. `markeR` requires as input a filtered and normalised, non log-transformed, gene expression matrix (genes × samples). Row names must be gene identifiers; column names must match sample IDs in the metadata. 
+
+We use the accompanying metadata from the Marthandan et al. (2016) study (see `?metadata_example`).
 
-This dataset serves as a working example to demonstrate the main functionalities of the markeR package. In particular, it will be used to showcase the two primary modules designed for benchmarking gene signatures:
+This dataset serves as a working example to demonstrate the main functionalities of the `markeR` package. In particular, it will be used to showcase the two primary modules of `markeR` for quantifying phenotypes using gene sets:
 
--   **Score**: calculates expression-based signature scores for each sample, and
--   **Enrichment**: evaluates the over-representation of gene signatures within ranked gene lists.
+- **Score-based methods**:  log2-median expression, ranking approaches, and single-sample gene set enrichment analysis (ssGSEA) to quantify coordinated expression within a gene set.
+- **Enrichment-based methods**: GSEA using moderated t- or B-statistics.
 
-To illustrate the usage of `markeR`, we use the **HernandezSegura** gene set: A transcriptomic gene set identified by Hernandez-Segura et al. (2017) as consistently altered across multiple senescence models. This set includes information on the directionality of gene regulation. It has shown strong performance in classification and enrichment analyses, including in the original paper of markeR, and also in the Tutorial on the *Benchmarking Mode* of `markeR`.
+To illustrate the usage of `markeR`, we use the **HernandezSegura** gene set: A transcriptomic gene set identified by Hernandez-Segura et al. (2017) as consistently altered across multiple senescence models. This set includes information on the direction of change in expression of genes upon phenotype.
 
 ```{r example}
 library(markeR)
@@ -60,8 +44,12 @@ data(counts_example)
 counts_example[1:5,1:5]
 ```
 
-For illustration purposes of different variable types, let's imagine we also had two additional variables: one indicating the number of days between sample preparation and sequencing (`DaysToSequencing`), and another identifying the person who processed each sample (`researcher`). These variables are hypothetical and not part of the original study design.
- 
+For illustration purposes, two synthetic variables were added to the data:
+
+* `DaysToSequencing`, the number of days between sample preparation and sequencing;
+* `Researcher`, identifying the person who processed each sample. 
+
+This enables exploration of associations between gene set activity and both categorical and continuous variables.
 
 ```{r load_metadata}
 data(metadata_example)
@@ -76,24 +64,16 @@ head(metadata_example)
 
 # Score-Based approaches 
   
-The **Score** module in `markeR` quantifies the association between a gene signature and phenotypic variables by calculating a score for each sample based on the expression of genes in the signature. This score can then be correlated with other variables, such as clinical or experimental conditions:
-
-- **Quantifies associations** between phenotype variables and a gene signature score using *Cohen's effect sizes* and *p-values*.
-- **Visualizes** results through lollipop plots, contrast plots, and distribution plots.
-
-This is useful for identifying:
-
-- **Biological relationships** (e.g., phenotype-score associations)
-- **Technical confounders** (e.g., batch effects)
-
+A score summarising the collective expression of a gene set is assigned **to each sample**. Scores can be visualised using built-in functions, or used directly in downstream analyses (*e.g.*, comparisons between phenotypic groups of samples, correlations with numerical phenotypes). 
+ 
 ## Outputs
 
-The main function returns a structured list with:
+If `method = "logmedian"` (or `ssGSEA`, `ranking`), the main function `VariableAssociation()` returns a structured list with:
 
 - **`overall`**: Effect sizes (*Cohen’s f*) and p-values for each variable.
 - **`contrasts`**: For categorical variables, pairwise or grouped comparisons using *Cohen’s d* with BH-adjusted p-values.
 - **`plot`**: A combined visualization showing:
-    1. Lollipop plot of effect sizes (*Cohen’s f*)
+    1. Lollipop plot of effect sizes (i.e., *Cohen’s f* per variable)
     2. Distribution plots of the score by variable (density or scatter)
     3. Lollipop plots of contrasts (*Cohen’s d*) for categorical variables, if applicable
 - **`plot_overall`**, **`plot_contrasts`**, **`plot_distributions`**: Individual components of the combined plot.
@@ -114,13 +94,10 @@ For this example, we use:
 - The **`logmedian`** scoring method
 - **`mode = "extensive"`** for thorough contrast analysis
 
-We also include two synthetic phenotypic variables:
+Though artificial, `DaysToSequencing` and `Researcher` mimic potential **technical covariates**. Strong associations between these and the score could indicate **batch effects**, where technical variation may confound biological interpretation.
 
-- **`Researcher`** — a categorical variable representing who processed each sample
-- **`DaysToSequencing`** — a numeric variable indicating time between preparation and sequencing
-
-Though artificial, these mimic potential **technical covariates**. Strong associations between these and the score could indicate **batch effects**, where technical variation may confound biological interpretation.
 
+In this example, the `Condition` variable shows a large effect size (Cohen’s f and Cohen's d), confirming strong discrimination between Senescent and Proliferative samples. The remaining variables don't show significant associations, suggesting no major batch effects that might be reflected in the computed scores.
 
 ```{r variableassoc_score_sen, fig.width=7, fig.height=7, out.width="100%", warning=FALSE, message=FALSE} 
 results_scoreassoc_bidirect <- VariableAssociation(data = counts_example, 
@@ -136,20 +113,24 @@ results_scoreassoc_bidirect <- VariableAssociation(data = counts_example,
 results_scoreassoc_bidirect$Overall
 results_scoreassoc_bidirect$Contrasts
 ```
+
+
+
  
 # Enrichment-based approaches
 
-The `GSEA_VariableAssociation()` function evaluates how phenotypic variables are associated with **gene set activity**, using enrichment scores derived from gene expression statistics (B- or t-statistics). This allows users to understand whether a gene set is enriched or depleted in relation to different sample attributes.
+Enrichment-based methods implement **Gene Set Enrichment Analysis (GSEA)**. Genes are ranked according to differential expression statistics, and a Normalised Enrichment Score (NES) per variable of interest is computed, accompanied by a p-value adjusted for multiple hypothesis testing.
+
 
 ## Outputs
 
-The `GSEA_VariableAssociation()` function returns a list with two elements:
+If `method = "GSEA"`, the main function `VariableAssociation()` returns a structured list with:
 
 - **`data`**: A tidy data frame of GSEA results. For each variable contrast, this includes:
     - **Contrast**: The comparison performed (e.g., A - B, A - (B+C))
     - **Statistic**: The metric used for gene ranking (either *t* or *B*)
     - **NES**: Normalized Enrichment Score
-    - **Adjusted p-value**: Multiple-testing corrected p-value (e.g., Benjamini–Hochberg)
+    - **Adjusted p-value**: Multiple-testing corrected p-value (Benjamini–Hochberg)
     - **Gene Set Name**: The gene set being tested
 
 - **`plot`**: A `ggplot2` object showing the NES and significance of each contrast as a **lollipop plot**:
@@ -174,10 +155,15 @@ Depending on the statistic used (`t` or `B`), the interpretation of results vari
 
 ## Example: Exploring Gene Set Enrichment by Variable
 
-The following code evaluates how three phenotypic variables — `Condition`, `Researcher`, and `DaysToSequencing` — are associated with the **HernandezSegura** gene set:
+The following code evaluates how three phenotypic variables (`Condition`, `Researcher`, and `DaysToSequencing`) are associated with the **HernandezSegura** gene set. 
 
+ 
+ In this example, the HernandezSegura gene set shows significant enrichment in samples sequenced by Francisca relative to those processed by Ana or John, which would suggest a differentiated researcher-associated technical impact on the samples' biological phenotype. This gene set shows also a strong depletion in proliferative samples, which is expected given its annotation as senescence-associated and results from the score-based approach.  
+ 
+ 
+ 
 ```{r GSEA_varassoc, fig.width=6, fig.height=6, out.width="60%", warning=FALSE, message=FALSE}
-VariableAssociation(
+varassoc_gsea <- VariableAssociation(
   data = counts_example,
   metadata = metadata_example,
   method = "GSEA",
@@ -193,9 +179,12 @@ VariableAssociation(
   titlesize = 14,
   pointSize = 5
 )
- 
+
+varassoc_gsea$data
 ```
  
+
+ 
 # Session Information
 
 ```{r}
diff --git a/vignettes/Tutorial_GeneSetSimilarity.Rmd b/vignettes/articles/Article_GeneSetSimilarity.Rmd
similarity index 61%
rename from vignettes/Tutorial_GeneSetSimilarity.Rmd
rename to vignettes/articles/Article_GeneSetSimilarity.Rmd
index b40f97f..d950653 100644
--- a/vignettes/Tutorial_GeneSetSimilarity.Rmd
+++ b/vignettes/articles/Article_GeneSetSimilarity.Rmd
@@ -1,49 +1,32 @@
 ---
 title: "Gene Set Similarity Tutorial"
-output: rmarkdown::html_document
-vignette: >
-  %\VignetteIndexEntry{Gene Set Similarity Tutorial}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
+author: "Rita M. Silva"
+date: "`r Sys.Date()`"
 ---
 
-```{r setup, include=FALSE}
+```{r, include = FALSE}
 knitr::opts_chunk$set(
-  dev = "png",
-  dpi = 72, 
-  fig.retina = 1 
+  collapse = TRUE,
+  comment = "#>"
 )
 ```
-# Installation 
-
-The user can install the development version of markeR from [GitHub](https://github.com/) with:
- 
-```{r, include = FALSE }
-library(markeR)
-```
-
-``` r
-# install.packages("devtools")
-devtools::install_github("DiseaseTranscriptomicsLab/markeR")
-```
 
+Even if a user-defined gene signature demonstrates strong discriminatory power between conditions, it may reflect known biological pathways rather than novel mechanisms. To address this, the `geneset_similarity()` function implements two complementary similarity metrics:
 
-# Signature Similarity 
-
-Even if a user-defined gene signature demonstrates strong discriminatory power between conditions, it may reflect known biological pathways rather than novel mechanisms. To address this, the `geneset_similarity()` function computes pairwise **Jaccard indices** or **log odds ratios (logOR)** between user-provided gene signatures and a reference set, quantifying their overlap as a percentage or a statistical enrichment.
+* **Jaccard Index**:
+the ratio of the number of genes in common over the total number of genes in the two sets.
 
+* **Log Odds Ratio (logOR)** from Fisher’s exact test of association between gene sets, given a specified gene universe.
+ 
 Users can compare their signatures to:
 
-* **Custom gene sets**, defined manually, or  
+* **Custom gene sets**, defined manually;  
 * **MSigDB collections**, via the [`msigdbr`](https://cran.r-project.org/package=msigdbr) package.
 
 The function provides options to:
 
-* Filter by **Jaccard index threshold**, using `jaccard_threshold`  
-* Filter by **odds ratio and p-value**, using `or_threshold` and `pval_threshold`  
-* Limit the number of top-matching reference signatures shown, using `num_sigs_toplot`  
-
- 
+* Filter by **Jaccard index threshold**, using `jaccard_threshold`;
+* Filter by **odds ratio and p-value**, using `or_threshold` and `pval_threshold`, respectively.   
 
 # Similarity via Jaccard Index
 
@@ -54,9 +37,12 @@ The **Jaccard index** measures raw set overlap:
 \]
 
 ## Example 1: Compare against user-defined and MSigDB gene sets
-  
 
-```{r, fig.width=6, fig.height=6, out.width="60%", warning=FALSE, message=FALSE}
+```{r setup}
+library(markeR)
+```
+
+```{r, fig.width=6, fig.height=4, out.width="60%", warning=FALSE, message=FALSE}
 # Example data
 signature1 <- c("TP53", "BRCA1", "MYC", "EGFR", "CDK2")
 signature2 <- c("ATXN2", "FUS", "MTOR", "CASP3")
@@ -76,14 +62,15 @@ geneset_similarity(
   jaccard_threshold = 0.05,
   msig_subset = NULL, 
   metric = "jaccard"
-)
+)$plot
+ 
 ```
 
 
 
 ## Example 2: Restrict comparison to a custom subset of MSigDB
 
-```{r, fig.width=6, fig.height=6, out.width="60%"}
+```{r, fig.width=6, fig.height=4, out.width="60%"}
 
 geneset_similarity(
   signatures = list(Sig1 = signature1, Sig2 = signature2),
@@ -92,36 +79,30 @@ geneset_similarity(
   subcollection = "CP:KEGG_LEGACY", 
   jaccard_threshold = 0,
   msig_subset = c("KEGG_MTOR_SIGNALING_PATHWAY", "KEGG_APOPTOSIS", "NON_EXISTENT_PATHWAY"), 
-  metric = "jaccard"
-)
+  metric = "jaccard",
+  limits=c(0,0.1)
+)$plot
+ 
 ```
 
 
 
 # Similarity via Log Odds Ratio
 
-The log odds ratio (logOR) provides a statistically grounded alternative for assessing gene set similarity. It measures **enrichment of one set within another**, relative to a defined background or **gene universe**, using a 2×2 contingency table and a one-sided **Fisher’s exact test**.
+The log odds ratio (logOR) provides a statistically grounded alternative for assessing gene set similarity. It measures **enrichment of one set within another**, relative to a defined background or **gene universe**, using a 2×2 contingency table.
 
 - **Log odds ratio (logOR)**:  
   Derived from contingency tables using:
   - Genes in both sets
   - Genes in one but not the other
   - Gene universe as background  
-  Log-transformed odds ratios are visualized; statistical significance is assessed via the adjusted p-value.
 
 > **Note**: When using `metric = "odds_ratio"`, the `universe` parameter **must** be supplied.
 
 
-
 ## Example 3:  Compare against user-defined and MSigDB gene sets
 
-```{r, fig.width=6, fig.height=14, out.width="60%"}
-# Define gene universe (e.g., genes from HPA or your dataset)
-gene_universe <- unique(c(
-  signature1, signature2,
-  unlist(signature_list),
-  msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol
-))
+```{r, fig.width=6, fig.height=8, out.width="60%"}
 
 geneset_similarity(
   signatures = list(Sig1 = signature1, Sig2 = signature2),
@@ -129,14 +110,21 @@ geneset_similarity(
   collection = "C2",
   subcollection = "CP:KEGG_LEGACY",
   metric = "odds_ratio",
-  universe = gene_universe,
-  or_threshold = 1,
-  pval_threshold = 0.05, 
-  width_text=50
-)
+  # Define gene universe (e.g., genes from HPA or your dataset)
+  universe = unique(c(
+    signature1, signature2,
+    unlist(signature_list),
+    msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol
+  )),
+  or_threshold = 100, #log10OR = 2
+  width_text=50, 
+  pval_threshold = 0.05 
+)$plot
+
 ```
 
 
+ 
 # Session Information
 
 ```{r}
diff --git a/vignettes/markeR.Rmd b/vignettes/markeR.Rmd
new file mode 100644
index 0000000..41e98c1
--- /dev/null
+++ b/vignettes/markeR.Rmd
@@ -0,0 +1,530 @@
+---
+title: "Introduction to markeR"
+author: "Rita M. Silva"
+date: "`r Sys.Date()`"
+output:
+  BiocStyle::html_document:
+    toc: true
+    toc_depth: 3
+vignette: >
+  %\VignetteIndexEntry{Introduction to markeR}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+echo = TRUE,
+message = FALSE,
+warning = FALSE,
+results = 'markup',
+dev = 'png',     # raster images to reduce HTML size
+dpi = 72,
+fig.retina = 1
+)
+```
+ 
+# Introduction
+
+**`markeR`** is an R package that provides a modular and extensible framework for the systematic evaluation of gene sets as phenotypic markers using transcriptomic data. The package is designed to support both quantitative analyses and visual exploration of gene set behaviour across experimental and clinical phenotypes.
+
+In this vignette, we demonstrate the core functionalities of `markeR` using a pre-processed gene expression dataset and associated metadata from Marthandan et al. (2016) (GSE63577). This dataset comprises human fibroblast samples cultured under two experimental conditions: replicative senescence and proliferative control.
+
+## Primary Objectives
+
+The primary objectives of **`markeR`** are to:
+
+- Establish a reproducible and standardised framework for quantifying gene sets as phenotypic markers across transcriptomic datasets.
+- Provide a unified interface for complementary analytical strategies, including score-based and enrichment-based methods.
+- Enable systematic comparison of user-defined gene sets with curated reference collections (e.g., MSigDB) to enhance biological interpretability.
+- Deliver modular tools for visualisation and evaluation, allowing exploration of both gene set behaviour and individual gene-level contributions. 
+
+## Features and Capabilities
+
+The package integrates multiple analytical strategies for quantifying phenotypes using gene sets, supporting both unidirectional and bidirectional (*i.e.*, respectively without and with information on the expected direction of change in expression of genes upon phenotype) gene set definitions:
+
+- **Score-based methods**:  log2-median expression, ranking approaches, and single-sample gene set enrichment analysis (ssGSEA) to quantify coordinated expression within a gene set.
+- **Enrichment-based methods**: GSEA using moderated t- or B-statistics.
+- **Gene-level exploration**: expression heatmaps, violin plots, ROC curve analysis, area under the curve (AUC) calculations, effect size estimation, and principal component analysis (PCA) to characterise the contribution of individual genes.
+- **Gene set similarity assessment**: Jaccard index and log odds ratio (logOR) calculations to compare user-defined gene sets with reference or user-defined collections.
+
+The package is implemented with flexibility and extensibility as core design principles. Visualisation leverages `ggplot2`, `ComplexHeatmap`, `ggpubr`, `cowplot`, and `grid`, enabling a wide range of customisation options. The modular structure ensures compatibility with additional methods in future versions of the package.
+
+# Installation
+
+Install the latest release from Bioconductor:
+
+```{r, eval=FALSE}
+# Install from Bioconductor
+if (!requireNamespace("BiocManager", quietly = TRUE))
+  install.packages("BiocManager")
+BiocManager::install("markeR")
+```
+
+Or install the latest development release of `markeR` from [GitHub](https://github.com/) with:
+
+```r
+devtools::install_github("DiseaseTranscriptomicsLab/markeR@*release")
+```
+
+```{r, echo=FALSE} 
+library(markeR)
+```
+# Common Workflow
+
+## Input Requirements
+ 
+### Gene Sets
+
+A named list where each element is a gene set.
+
+- Use a **character vector** when putative direction of change in expression of genes upon phenotype is unknown (unidirectional).
+- Use a **data frame** with columns `gene` and `direction` (values `+1` for up-, `-1` for down-regulation) when direction is known (bidirectional).
+
+```{r example-gene-sets-vector, echo=FALSE}
+# Gene set without direction
+gene_set1 <- c("GeneA", "GeneB", "GeneC", "GeneD")
+
+# Gene set with direction
+gene_set2 <- data.frame(
+  gene = c("GeneX", "GeneY", "GeneZ"),
+  direction = c(1, -1, 1),
+  stringsAsFactors = FALSE
+)
+
+# Combine both into a named list
+gene_sets <- list(
+  Set1 = gene_set1,
+  Set2 = gene_set2
+)
+```
+
+```{r show-gene-set, echo=TRUE}
+# Example
+gene_sets
+```
+
+For this vignette, we will use three senescence-related gene sets that ship with the package (see `?genesets_example` for more information):
+
+* **LiteratureMarkers** (bidirectional; gene set of commonly reported senescence markers in the literature)
+* **REACTOME_CELLULAR_SENESCENCE** (unidirectional; from the MSigDB database)
+* **HernandezSegura** (bidirectional; from [Hernandez-Segura et al. (2017)](https://pubmed.ncbi.nlm.nih.gov/28844647/))
+
+```{r loadsig}
+# Load example gene sets
+data(genesets_example)
+```
+
+### Expression Data Frame
+
+A filtered and normalised, non log-transformed, gene expression matrix (genes × samples). Row names must be gene identifiers; column names must match sample IDs in the metadata.
+
+**Warning:** If you are using microarray data or outputs from common RNA-seq pipelines (*e.g.*, edgeR), note that the expression values may already be log2-normalised. The input to `markeR` must necessarily be **non-log-transformed**. If your data are log2-transformed, you can revert them by applying `2^data`.
+
+
+In this vignette, we use a pre‑processed RNA-seq dataset from Marthandan et al. (2016, GSE63577), with normalised read counts for human fibroblasts under **replicative senescence** and **proliferative control**. See `?counts_example` for structure.
+
+```{r loaddata}
+# Load example expression data
+data(counts_example)
+counts_example[1:5, 1:5]
+```
+
+### Sample Metadata
+
+A data frame with samples as rows and annotations as columns. The first column should contain sample IDs matching the expression matrix column names.
+
+We use the accompanying metadata from the Marthandan et al. (2016) study. See `?metadata_example`.
+
+```{r load_metadata}
+# Load example metadata
+data(metadata_example)
+head(metadata_example)
+```
+
+## Select Mode of Analysis
+
+`markeR` provides two modes of operation:
+
+* **Benchmarking**:
+evaluates gene sets' performance in marking a metadata variable, *i.e.*, a phenotype, returning comparative visualisations across scoring and enrichment methods.
+
+* **Discovery**:
+examines the relationship between a gene set and one or more variables of interest, suitable for exploratory or hypothesis-generating analyses. 
+ 
+## Choose a Quantification Approach
+
+Two complementary strategies are implemented for quantifying associations between gene sets and phenotypes:
+
+### Score-Based Approach
+
+A score summarising the collective expression of a gene set therein is assigned **to each sample**. Scores can be visualised using built-in functions, or used directly in downstream analyses (*e.g.*, comparisons between phenotypic groups of samples, correlations with numerical phenotypes). 
+
+Available methods:
+
+* **Log2-median**: mean of the across-sample normalised log2 median-centred expression levels of the genes in the set; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
+
+* **Ranking**: mean expression rank of gene set members in each sample; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset, and normalised by the number of genes in the set. 
+
+* **ssGSEA**: single-sample gene set enrichment score using ssGSEA; for bidirectional gene sets, the sample score is the partial score for the subset of putatively upregulated genes minus that of the downregulated subset.
+
+Gene sets that are robust phenotypic markers are expected to yield consistently high scores across methods.
+
+### Enrichment-Based Approach
+
+Enrichment-based methods implement **Gene Set Enrichment Analysis (GSEA)**. Genes are ranked according to differential expression statistics, and a Normalised Enrichment Score (NES) per variable of interest is computed, accompanied by a p-value adjusted for multiple hypothesis testing.
+
+## Visualisation and Evaluation
+
+### Benchmarking Mode
+
+`markeR` offers a range of visual summaries for Benchmarking:
+
+* Violin plots of score distributions by categorical phenotype;
+* Scatter plots of association between scores and numerical phenotypes;
+* Volcano plots and heatmaps of scores or differential gene set expression based on effect sizes (Cohen’s *d* or *f*);
+* ROC curves and respective AUC values of gene sets' phenotypic classification performance;
+* Violin plots of effect size distributions (Cohen’s *d*) for pairwise group differences in scores, for original and simulated gene sets;
+* Plots summarising NES alongside adjusted p-values (*e.g.*, lollipop plots); 
+* GSEA plots showing running enrichment scores across ranked gene lists.
+
+Example of common workflows in Benchmarking Mode (full tutorial [here][tutorial-benchmarking]):
+
+#### Score-based approaches
+
+Score-based methods facilitate direct comparisons of gene set activity between phenotypic groups (*e.g.*, being senescent) and can be used in downstream analyses, including for example correlation with other metadata variables of interest (*e.g.*, day of sequencing) .
+We first compute log2-median scores. In this dataset, the HernandezSegura and LiteratureMarkers gene sets yield large effect sizes (|Cohen’s d|), whereas REACTOME_CELLULAR_SENESCENCE shows more modest separation between senescent and proliferative fibroblasts. 
+
+```{r fig.width=8, fig.height=4, warning=FALSE, message=FALSE}
+# Quantification approach: scores
+# Method: log2-median 
+PlotScores(data = counts_example, 
+           metadata = metadata_example, 
+           gene_sets = genesets_example, 
+           Variable="Condition",  
+           method ="logmedian",    
+           nrow = 1,    
+           pointSize=4,  
+           title="Marthandan et al. 2016", 
+           widthTitle = 24,
+           labsize=12, 
+           titlesize = 12)  
+
+```
+
+
+Next, scores are calculated using multiple methods (log2-median, ranking, ssGSEA) to assess consistency across strategies. Outputs include heatmaps and volcano plots of effect sizes (|Cohen’s d|). HernandezSegura and LiteratureMarkers consistently discriminate Senescent from Proliferative samples, while REACTOME_CELLULAR_SENESCENCE shows weaker and less consistent separation.
+
+
+```{r warning=FALSE, message=FALSE}
+Overall_Scores <- PlotScores(data = counts_example, 
+                             metadata = metadata_example,  
+                             gene_sets=genesets_example, 
+                             Variable="Condition",  
+                             method ="all",   
+                             ncol = NULL, 
+                             nrow = 1, 
+                             widthTitle=30, 
+                             limits = c(0,3.5),   
+                             title="Marthandan et al. 2016", 
+                             titlesize = 10,
+                             ColorValues = list(heatmap=c("#F9F4AE", "#B44141"),
+                                                volcano=signature_colors <- c(
+                                                  HernandezSegura = "#A07395",               
+                                                  REACTOME_CELLULAR_SENESCENCE = "#6B8E9E",  
+                                                  LiteratureMarkers = "#CA7E45"             
+                                                )
+                             ),
+                             mode="simple",
+                             widthlegend=30, 
+                             sig_threshold=0.05, 
+                             cohen_threshold=0.6,
+                             pointSize=6,
+                             colorPalette="Paired")  
+```
+
+```{r Overall_Scores_heatmap, fig.width=10, fig.height=2, warning=FALSE, message=FALSE}
+Overall_Scores$heatmap
+```
+
+```{r Overall_Scores_volcano, fig.width=6, fig.height=3, warning=FALSE, message=FALSE}
+Overall_Scores$volcano
+```
+
+The discriminatory capacity of gene set scores can also be assessed using ROC curves and corresponding AUC values. HernandezSegura and LiteratureMarkers exhibit consistently high AUCs across scoring methods, while REACTOME_CELLULAR_SENESCENCE shows more heterogeneous performance.
+
+```{r roc_scores, fig.width=10, fig.height=3, warning=FALSE, message=FALSE}
+ROC_Scores(data = counts_example, 
+           metadata = metadata_example, 
+           gene_sets=genesets_example, 
+           method = "all", 
+           variable ="Condition",
+           colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), 
+           grid = TRUE, 
+           spacing_annotation=0.3, 
+           ncol=NULL, 
+           nrow=1,
+           mode = "simple",
+           widthTitle = 28,
+           titlesize = 10,  
+           title="Marthandan et al. 2016") 
+
+```
+
+Finally, false positive rates for effect sizes are estimated by simulating random gene sets of equal size. This step provides a null distribution against which observed scores can be compared. In this example, the LiteratureMarkers signature demonstrates the strongest performance. Increasing the number of simulations would yield finer resolution at the cost of additional computational time.
+
+```{r FDRSim, fig.width=12, fig.height=3,warning=FALSE, message=FALSE}
+set.seed("123456")
+FPR_Simulation(data = counts_example,
+               metadata = metadata_example,
+               original_signatures = genesets_example,
+               gene_list = row.names(counts_example),
+               number_of_sims = 100,
+               title = "Marthandan et al. 2016",
+               widthTitle = 30,
+               Variable = "Condition",
+               titlesize = 12,
+               pointSize = 5,
+               labsize = 10,
+               mode = "simple",
+               ColorValues=NULL,
+               ncol=NULL, 
+               nrow=1 ) 
+
+```
+
+
+#### Enrichment-based approaches
+ 
+The first step is the quantification of differential expression. Here, a design matrix (as defined by the `limma` package) is automatically constructed from the `Condition` variable in the metadata, defining the contrast `Senescent - Proliferative`. Internally, this fits a linear model without an intercept, enabling quick comparison between levels of a categorical variable. 
+
+```{r DEGs, fig.width=10, fig.height=3, warning=FALSE, message=FALSE}
+# Build design matrix from variables (Condition) and apply a contrast.
+# The design matrix is constructed automatically using the variable "Condition".
+DEGs <- calculateDE(data = counts_example,
+                    metadata = metadata_example,
+                    variables = "Condition",
+                    contrasts = c("Senescent - Proliferative"))
+DEGs$`Senescent-Proliferative`[1:5,]
+```
+
+Differential expression results can be visualised with volcano plots. These highlight the magnitude of expression changes (log2 fold change) against statistical significance (adjusted p-value for the moderated t-statistic), with genes from predefined sets emphasised (up- in green and downregulated in red). In this example:
+
+* Genes in the HernandezSegura set annotated as upregulated (green) display positive log2 fold changes, whereas those annotated as downregulated (red) show negative log2 fold changes. This pattern is consistent with the annotation of the set, although these genes are not necessarily those exhibiting the largest absolute fold changes.
+* In the LiteratureMarkers set, *LMNB1* and *MKI67* exhibit strongly negative log2 fold change, consistent with their roles as proliferation markers absent in senescent cells.
+* Genes from the REACTOME_CELLULAR_SENESCENCE set are undirected and appear across the full range of log2 fold change values, diluting discriminatory power.
+
+
+```{r DEGsvolcano, fig.width=10, fig.height=3,warning=FALSE, message=FALSE} 
+# Change order: gene sets in columns, contrast in rows
+plotVolcano(DEGs, genes = genesets_example, 
+            N = NULL,
+            x = "logFC", y = "-log10(adj.P.Val)", pointSize = 2,
+            color = "#6489B4", highlightcolor = "#05254A", highlightcolor_upreg = "#038C65", highlightcolor_downreg = "#8C0303",nointerestcolor = "#B7B7B7",
+            threshold_y = NULL, threshold_x = NULL,
+            xlab = NULL, ylab = NULL, ncol = NULL, nrow = NULL, title = "Marthandan et al. 2016",
+            labsize = 10, widthlabs = 24, invert = TRUE)
+```
+
+We next apply GSEA to the differential expression results. This evaluates whether genes from each set are non-randomly concentrated at the top or bottom of the ranked list. Results are reported as NES with multiple-testing adjusted p-values. Gene ranking can be based on alternative statistics (*e.g.*, moderated t or B-statistic), which influence whether results are interpreted as enrichment/depletion or as altered directionality. For a full example and explanation, see the online tutorial [here][tutorial-benchmarking].
+
+
+```{r GSEA, warning=FALSE, message=FALSE}
+GSEAresults <- runGSEA(DEGList = DEGs, 
+                       gene_sets = genesets_example,
+                       stat = NULL,
+                       ContrastCorrection = FALSE)
+
+GSEAresults
+```
+ 
+Enrichment curves from `fgsea` show the running enrichment score across the ranked list, indicating the distribution of set members therein.
+
+
+```{r GSEA_plotenrichment, fig.width=10, fig.height=3, warning=FALSE, message=FALSE}
+plotGSEAenrichment(GSEA_results=GSEAresults, 
+                   DEGList=DEGs, 
+                   gene_sets=genesets_example, 
+                   widthTitle=40, grid = TRUE, titlesize = 10, nrow=1, ncol=3) 
+```
+
+Enrichment results can also be summarised with a lollipop plot, which compactly shows the NES and highlights statistically significant gene sets. Here, we can see that the HernandezSegura gene set clearly exhibits the strongest enrichment signal. When many gene sets and contrasts are evaluated (which is not the case in this vignette), NES and adjusted p-values can be summarized in a scatter (volcano-style) plot for a simpler visualisation, using `plotCombinedGSEA`.
+ 
+```{r GSEA_lollypop, fig.width=5, fig.height=4, out.width="60%", warning=FALSE, message=FALSE}
+plotNESlollipop(GSEA_results=GSEAresults, 
+                saturation_value=NULL, 
+                nonsignif_color = "#F4F4F4", 
+                signif_color = "red",
+                sig_threshold = 0.05, 
+                grid = FALSE, 
+                nrow = NULL, ncol = NULL, 
+                widthlabels=13, 
+                title=NULL, titlesize=12) 
+```
+  
+
+From this exercise in Benchmarking Mode, we can see that two gene sets clearly perform best at discriminating between Senescent and Proliferative conditions: HernandezSegura and LiteratureMarkers. The REACTOME_CELLULAR_SENESCENCE gene set does not show a strong signal; the absense of information on putative gene up- or downregulation upon phenotype potentially dilutes the signal, highlighting the importance of providing directionality when available.
+
+Scoring and enrichment approaches provide complementary insights. Score-based methods offer sample-level resolution, capturing strong contributions from individual genes, while enrichment-based methods evaluate coordinated behaviour across the set. 
+
+
+### Discovery Mode
+
+In **Discovery** (full tutorial [here][tutorial-discovery]), analyses focus on a single gene set of interest, providing a targeted view of its behaviour across experimental variables (*i.e.*, phenotypes). The output includes:
+
+* Score distributions stratified by variable;
+* Effect sizes for pairwise and multiple-group differences (Cohen's *d* and *f*, respectively);
+* Cross-variable summaries of NES and adjusted p-values (*e.g.*, lollipop plots).
+
+We illustrate this using the HernandezSegura gene set from our example collection:
+
+```{r}
+HernandezSegura_GeneSet <- list(HernandezSegura=genesets_example$HernandezSegura) 
+```
+
+For illustration purposes, two synthetic variables were added to the data:
+
+* `DaysToSequencing`, the number of days between sample preparation and sequencing;
+* `Researcher`, identifying the person who processed each sample.
+
+This enables exploration of associations between gene set activity and both categorical and continuous variables.
+
+```{r load_metadata_discovery}
+data(metadata_example)
+set.seed("123456")
+metadata_example$Researcher <- sample(c("John","Ana","Francisca"),39, replace = TRUE)
+metadata_example$DaysToSequencing <- sample(c(1:20),39, replace = TRUE)
+head(metadata_example)
+```
+
+We can then examine how the gene set associates with these variables using enrichment-based approaches in Discovery Mode. The resulting plot highlights significant associations across variables and visually summarises the direction and strength of the effects. The “simple” mode provides comparison of effect sizes across pairwise contrasts between only two levels of the variable, but can be changed to more levels of comparison (see `?VariableAssociation`). 
+
+In this example, the HernandezSegura gene set shows significant enrichment in samples sequenced by Francisca relative to those processed by Ana or John, which would suggest a differentiated researcher-associated technical impact on the samples' biological phenotype. This gene set shows also a strong depletion in proliferative samples, which is expected given its annotation as senescence-associated and results from the Benchmarking Mode.  
+
+```{r GSEA_varassoc, fig.width=6, fig.height=6,warning=FALSE, message=FALSE , out.width="70%"}
+VariableAssociation(
+  data = counts_example,
+  metadata = metadata_example,
+  method = "GSEA",
+  cols = c("Condition","Researcher","DaysToSequencing"),
+  mode = "simple",
+  gene_set = HernandezSegura_GeneSet,
+  saturation_value = NULL,
+  nonsignif_color = "white",
+  signif_color = "red",
+  sig_threshold = 0.05,
+  widthlabels = 30,
+  labsize = 10,
+  titlesize = 14,
+  pointSize = 5
+) $plot
+
+```
+ 
+Alternatively, **score-based methods** provide a per-sample metric of gene set activity, which can then be summarised across variables. Here, we compute log2-median scores. The `Condition` variable shows a large effect size (Cohen’s f), confirming strong discrimination between Senescent and Proliferative samples. In contrast, `Researcher` does not show a detectable association, in contrast to enrichment-based results. This divergence illustrates the value of applying both enrichment- and score-based approaches in a complementary manner.
+
+
+```{r variableassoc_score_sen, fig.width=7, fig.height=7,warning=FALSE, message=FALSE , out.width="70%" } 
+VariableAssociation(data = counts_example, 
+                    metadata = metadata_example, 
+                    method = "logmedian",
+                    cols = c("Condition","Researcher","DaysToSequencing"),  
+                    gene_set = HernandezSegura_GeneSet,
+                    mode="simple",
+                    nonsignif_color = "white", signif_color = "red", saturation_value=NULL,sig_threshold = 0.05,
+                    widthlabels=30, labsize=10, titlesize=14, pointSize=5, discrete_colors=NULL,
+                    continuous_color = "#8C6D03", color_palette = "Set2")$Overall 
+
+```
+
+
+The Benchmarking Mode offers the most comprehensive set of features. Users are allowed to seamlessly move from Discovery to Benchmarking once a variable of interest has been identified and further testing is required. Benchmarking is designed to evaluate multiple gene sets simultaneously, whereas Discovery focuses on the performance of a single gene set.
+ 
+
+## Individual Gene Exploration  
+
+To better understand the contribution of individual genes within a gene set, and identify whether specific genes drive the set's collective signal,  `markeR` provides `VisualiseIndividualGenes.` Available options include:
+
+* Expression heatmaps of genes across samples or groups of samples;
+* Violin plots showing cross-sample expression distributions of individual genes; 
+* Heatmaps of pairwise cross-sample expression correlation between genes in the set;
+* ROC curves and AUC values to evaluate single genes' performance as phenotypic markers;
+* Effect size estimation (Cohen’s *d*) of expression differences between groups of samples;
+* Principal Component Analysis (PCA) of expression of genes in the set, to evaluate which genes dominate collective variance and how samples separate according to the gene set's expression.
+
+For a complete overview, see `?VisualiseIndividualGenes` and the extended online tutorial [here](https://diseasetranscriptomicslab.github.io/markeR/articles/Tutorial_BenchmarkingMode.html#visualise-individual-gene-behaviour).
+ 
+## Compare with Reference Gene Sets  
+
+`markeR` also supports comparison of user-defined gene sets against reference collections (e.g., MSigDB). Two complementary similarity metrics are implemented:
+
+* **Jaccard Index**:
+the ratio of the number of genes in common over the total number of genes in the two sets.
+
+* **Log Odds Ratio (logOR)** from Fisher’s exact test of association between gene sets, given a specified gene universe.
+
+Filters can be applied based on similarity thresholds (e.g., minimum Jaccard, OR, or Fisher's test p-value).
+
+Example of Gene Set Similarity (full tutorial [here][tutorial-signaturesimilarity]): We compare two user-defined gene sets against other user gene set and the MSigDB C2:CP:KEGG_LEGACY collection. Similarity is summarised in a heatmap of log odds ratios, highlighting associations above a defined threshold (e.g., OR > 100 for at least one of the signatures being compared). The underlying data for the heatmap can be accessed via `$data` for further analysis.
+
+
+```{r, fig.width=6, fig.height=8}
+
+# Example data
+signature1 <- c("TP53", "BRCA1", "MYC", "EGFR", "CDK2")
+signature2 <- c("ATXN2", "FUS", "MTOR", "CASP3")
+
+signature_list <- list(
+  "User_Apoptosis" = c("TP53", "CASP3", "BAX"),
+  "User_CellCycle" = c("CDK2", "CDK4", "CCNB1", "MYC"),
+  "User_DNARepair" = c("BRCA1", "RAD51", "ATM"),
+  "User_MTOR" = c("MTOR", "AKT1", "RPS6KB1")
+)
+
+geneset_similarity(
+  signatures = list(Sig1 = signature1, Sig2 = signature2),
+  other_user_signatures = signature_list,
+  collection = "C2",
+  subcollection = "CP:KEGG_LEGACY",
+  metric = "odds_ratio",
+  # Define gene universe (e.g., genes from HPA or your dataset)
+  universe = unique(c(
+    signature1, signature2,
+    unlist(signature_list),
+    msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol
+  )),
+  or_threshold = 100, 
+  width_text=50, 
+  pval_threshold = 0.05
+)$plot
+```
+ 
+
+# Further Reading
+
+Full tutorials with extended examples:
+
+- [Benchmarking Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_BenchmarkingMode.html)  
+- [Discovery Mode](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_DiscoveryMode.html)  
+- [Signature Similarity](https://diseasetranscriptomicslab.github.io/markeR/articles/Article_GeneSetSimilarity.html)
+
+# Contact
+
+📩 For questions, contact:
+
+**Rita Martins-Silva**  
+Email: [rita.silva@medicina.ulisboa.pt](mailto:rita.silva@medicina.ulisboa.pt)
+
+
+
+[tutorial-benchmarking]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_BenchmarkingMode.html
+[tutorial-discovery]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_DiscoveryMode.html
+[tutorial-signaturesimilarity]: https://diseasetranscriptomicslab.github.io/markeR/articles/Article_GeneSetSimilarity.html
+
+
+# Session Information
+
+```{r}
+sessionInfo()
+```
\ No newline at end of file