Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
327 changes: 327 additions & 0 deletions R/Trajectory_CellTypes_v12.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
#' Trajectory analysis with TSCAN
#'
#' Perform the CCBR single-cell RNA-seq trajectory workflow using TSCAN.
#'
#' @param Seurat_Object A Seurat object or path to an RDS file containing one.
#' @param MetaData A data.frame with per-cell metadata; must contain the clustering column.
#' @param Clusters_to_Use Character scalar naming the metadata column to use for clusters.
#' @param Custom_Gene_List Optional character vector or delimited string of genes to highlight.
#'
#' @return Invisibly returns `NULL`. Generates trajectory and pseudotime plots as side effects.
#' @examples
#' \donttest{
#' if (requireNamespace("Seurat", quietly = TRUE)) {
#' seurat_example <- Seurat::CreateSeuratObject(
#' counts = matrix(rpois(200, lambda = 5), nrow = 20,
#' dimnames = list(paste0("g", 1:20), paste0("c", 1:10)))
#' )
#' seurat_example <- Seurat::AddMetaData(
#' object = seurat_example,
#' metadata = data.frame(cluster = sample(letters[1:2], 10, replace = TRUE),
#' row.names = colnames(seurat_example))
#' )
#' Trajectory_CellTypes(
#' Seurat_Object = seurat_example,
#' MetaData = Seurat::FetchData(seurat_example, vars = c("cluster")),
#' Clusters_to_Use = "cluster"
#' )
#' }
#' }
#' @export
Trajectory_CellTypes <- function(Seurat_Object,
MetaData,
Clusters_to_Use,
Custom_Gene_List = NULL) {
Comment on lines +30 to +34
required_packages <- c(
"Seurat", "scater", "scran", "scuttle", "TrajectoryUtils", "TSCAN",
"ggplot2", "gridExtra", "mclust",
"SingleCellExperiment", "SummarizedExperiment"
)
ensure_namespace(required_packages)
Comment on lines +35 to +40

seurat_object <- load_seurat_object(Seurat_Object)

if (!is.data.frame(MetaData)) {
stop("MetaData must be a data.frame.", call. = FALSE)
}
if (!is.character(Clusters_to_Use) || length(Clusters_to_Use) != 1L ||
is.na(Clusters_to_Use)) {
stop("Clusters_to_Use must be a non-missing character scalar.", call. = FALSE)
}
if (!Clusters_to_Use %in% colnames(MetaData)) {
stop(sprintf("Column '%s' not found in MetaData.", Clusters_to_Use), call. = FALSE)
}

cache_original <- Sys.getenv("R_USER_CACHE_DIR", unset = NA_character_)
on.exit({
if (is.na(cache_original)) {
Sys.unsetenv("R_USER_CACHE_DIR")
} else {
Sys.setenv(R_USER_CACHE_DIR = cache_original)
}
}, add = TRUE)
cache_dir <- tools::R_user_dir("Trajectory_CellTypes", which = "cache")
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
Sys.setenv(R_USER_CACHE_DIR = cache_dir)

if (!is.null(rownames(MetaData)) && all(colnames(seurat_object) %in% rownames(MetaData))) {
MetaData <- MetaData[colnames(seurat_object), , drop = FALSE]
}

user_clusters <- MetaData[[Clusters_to_Use]]
if (length(user_clusters) != ncol(seurat_object)) {
warning("MetaData rows do not match Seurat object cells; attempting to align by cell names.")
shared_cells <- intersect(colnames(seurat_object), rownames(MetaData))
if (length(shared_cells) == 0) {
stop("Unable to align MetaData with Seurat object cells.", call. = FALSE)
}
MetaData <- MetaData[shared_cells, , drop = FALSE]
MetaData <- MetaData[colnames(seurat_object), , drop = FALSE]
user_clusters <- MetaData[[Clusters_to_Use]]
}
if (length(user_clusters) != ncol(seurat_object)) {
stop("MetaData does not align with the Seurat object cells after attempted matching.", call. = FALSE)
}
Comment thread
lobanovav marked this conversation as resolved.

label_column <- resolve_cluster_column(seurat_object, MetaData, Clusters_to_Use)
if (!label_column %in% colnames(seurat_object@meta.data)) {
seurat_object[[label_column]] <- user_clusters
}
cluster_labels <- seurat_object[[label_column, drop = TRUE]]
Seurat::Idents(seurat_object) <- cluster_labels

custom_gene_list <- normalise_gene_list(Custom_Gene_List)

message("The following genes were used:")
gene_universe <- rownames(seurat_object)
genes_present <- intersect(custom_gene_list, gene_universe)
if (length(genes_present) == 0) {
message("None")
} else {
message(paste(genes_present, collapse = ", "))
}

message("The following genes were not found:")
missing_genes <- setdiff(custom_gene_list, gene_universe)
if (length(missing_genes) == 0) {
message("None")
} else {
message(paste(missing_genes, collapse = ", "))
}

custom_gene_list <- genes_present

sce <- Seurat::as.SingleCellExperiment(seurat_object)
SummarizedExperiment::colData(sce)$label <- cluster_labels

aggregated <- scuttle::aggregateAcrossCells(sce, ids = cluster_labels)
centroids <- SingleCellExperiment::reducedDim(aggregated, "PCA")
mst <- TSCAN::createClusterMST(centroids, clusters = NULL)

line_data_tsne <- TSCAN::reportEdges(aggregated, mst = mst, clusters = NULL, use.dimred = "TSNE")
line_data_tsne <- harmonise_edge_axes(line_data_tsne, "tSNE")
plot_tsne1 <- scater::plotTSNE(sce, colour_by = "label") +
ggplot2::geom_line(data = line_data_tsne, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge))

line_data_umap <- TSCAN::reportEdges(aggregated, mst = mst, clusters = NULL, use.dimred = "UMAP")
line_data_umap <- harmonise_edge_axes(line_data_umap, "UMAP")
plot_umap1 <- scater::plotUMAP(sce, colour_by = "label") +
ggplot2::geom_line(data = line_data_umap, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge))

mapping <- TSCAN::mapCellsToEdges(sce, mst = mst, use.dimred = "PCA")
ordered <- TSCAN::orderCells(mapping, mst)
common_pseudo <- TrajectoryUtils::averagePseudotime(ordered)

plot_tsne2 <- scater::plotTSNE(sce, colour_by = I(common_pseudo), text_by = "label", text_colour = "red") +
ggplot2::geom_line(data = line_data_tsne, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge)) +
ggplot2::ggtitle("TSCAN-based MST")

plot_umap2 <- scater::plotUMAP(sce, colour_by = I(common_pseudo), text_by = "label", text_colour = "red") +
ggplot2::geom_line(data = line_data_umap, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge)) +
ggplot2::ggtitle("TSCAN-based MST")

pseudo_all <- TSCAN::quickPseudotime(sce, use.dimred = "PCA")
pseudo_mnn <- TSCAN::quickPseudotime(sce, use.dimred = "PCA", dist.method = "mnn")
if (!is.null(pseudo_mnn$connected$TSNE)) {
pseudo_mnn$connected$TSNE <- harmonise_edge_axes(pseudo_mnn$connected$TSNE, "tSNE")
}
if (!is.null(pseudo_mnn$connected$UMAP)) {
pseudo_mnn$connected$UMAP <- harmonise_edge_axes(pseudo_mnn$connected$UMAP, "UMAP")
}
mnn_pseudo <- TrajectoryUtils::averagePseudotime(pseudo_mnn$ordering)

plot_tsne3 <- scater::plotTSNE(sce, colour_by = I(mnn_pseudo), text_by = "label", text_colour = "red") +
ggplot2::geom_line(data = pseudo_mnn$connected$TSNE, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge)) +
ggplot2::ggtitle("TSCAN with MNN distances-based MST")

plot_umap3 <- scater::plotUMAP(sce, colour_by = I(mnn_pseudo), text_by = "label", text_colour = "red") +
ggplot2::geom_line(data = pseudo_mnn$connected$UMAP, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge)) +
ggplot2::ggtitle("TSCAN with MNN distances-based MST")

sce$Pseudotime <- TrajectoryUtils::pathStat(ordered)[, 1]
pseudo_tests <- TSCAN::testPseudotime(sce, pseudotime = ordered[, 1])[[1]]
sorted_tests <- pseudo_tests[order(pseudo_tests$p.value), ]

downregulated <- sorted_tests[sorted_tests$logFC < 0, , drop = FALSE]
downregulated$SYMBOL <- rownames(downregulated)
top_down <- head(downregulated$SYMBOL, 10)
plot_decrease <- scater::plotExpression(
sce,
features = top_down,
x = "Pseudotime",
colour_by = "label",
ncol = 5
) + ggplot2::ggtitle("Expression of the top 10 genes that decrease with pseudotime")

upregulated <- sorted_tests[sorted_tests$logFC > 0, , drop = FALSE]
upregulated$SYMBOL <- rownames(upregulated)
top_up <- head(upregulated$SYMBOL, 10)
plot_increase <- scater::plotExpression(
sce,
features = top_up,
x = "Pseudotime",
colour_by = "label",
ncol = 5
) + ggplot2::ggtitle("Expression of the top 10 genes that increase with pseudotime")

on_first_path <- !is.na(sce$Pseudotime)

print(Seurat::DimPlot(seurat_object))
set.seed(1)
plot(pseudo_all$mst)
print(plot_tsne1)
gridExtra::grid.arrange(plot_tsne2, plot_tsne3, ncol = 2)
print(plot_umap1)
gridExtra::grid.arrange(plot_umap2, plot_umap3, ncol = 2)
plot(plot_decrease)
plot(plot_increase)
scater::plotHeatmap(
sce[, on_first_path],
order_columns_by = "Pseudotime",
colour_columns_by = "label",
features = head(top_up, 50),
center = TRUE,
main = "Expression of the top 50 genes that increase with pseudotime"
)
scater::plotHeatmap(
sce[, on_first_path],
order_columns_by = "Pseudotime",
colour_columns_by = "label",
features = head(top_down, 50),
center = TRUE,
main = "Expression of the top 50 genes that decrease with pseudotime"
Comment on lines +198 to +212
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leaving this "AS IS" for compatibility purposes, but worth looking into

)

if (length(custom_gene_list) > 0) {
custom_scatter <- scater::plotExpression(
sce,
features = custom_gene_list,
x = "Pseudotime",
colour_by = "label",
ncol = 5
) + ggplot2::ggtitle("Expression of custom genes across pseudotime")
plot(custom_scatter)

scater::plotHeatmap(
sce[, on_first_path],
order_columns_by = "Pseudotime",
colour_columns_by = "label",
features = custom_gene_list,
center = TRUE,
main = "Expression of custom genes across pseudotime"
)
}

invisible(NULL)
}

utils::globalVariables(c(
"label", "edge", "Pseudotime", "SYMBOL", "UMAP_1", "UMAP_2",
"tSNE_1", "tSNE_2", "mst"
))

ensure_namespace <- function(pkgs) {
missing <- vapply(pkgs, function(pkg) !requireNamespace(pkg, quietly = TRUE), logical(1))
if (any(missing)) {
stop(
sprintf(
"Missing required packages: %s",
paste(pkgs[missing], collapse = ", ")
),
call. = FALSE
)
}
invisible(TRUE)
}

load_seurat_object <- function(input) {
if (inherits(input, "Seurat")) {
return(input)
}
if (is.character(input) && length(input) == 1L && file.exists(input)) {
return(readRDS(input))
}
if (requireNamespace("nidapFunctions", quietly = TRUE)) {
path <- tryCatch(
nidapFunctions::nidapGetPath(input, "seurat_object.rds"),
error = function(e) NULL
)
if (!is.null(path) && file.exists(path)) {
return(readRDS(path))
}
}
stop(
"Provide a Seurat object or a path to an RDS file containing one.",
call. = FALSE
)
}

resolve_cluster_column <- function(seurat_object, metadata, selected) {
seurat_columns <- colnames(seurat_object@meta.data)
if (selected %in% seurat_columns) {
return(selected)
}
idx <- match(selected, colnames(metadata))
if (!is.na(idx) && idx <= length(seurat_columns)) {
candidate <- seurat_columns[idx]
if (!is.na(candidate) && nzchar(candidate)) {
return(candidate)
}
}
Comment thread
lobanovav marked this conversation as resolved.
normalised_selected <- make.names(selected)
matches <- seurat_columns[make.names(seurat_columns) == normalised_selected]
if (length(matches) == 1) {
return(matches)
}
selected
}

normalise_gene_list <- function(genes) {
if (is.null(genes) || length(genes) == 0) {
return(character())
}
if (length(genes) == 1L) {
genes <- unlist(strsplit(gsub(",", " ", genes), " "))
}
genes <- genes[genes != ""]
unique(trimws(genes))
}

harmonise_edge_axes <- function(edge_df, prefix) {
if (is.null(edge_df) || nrow(edge_df) == 0) {
return(edge_df)
}
targets <- c(sprintf("%s_1", prefix), sprintf("%s_2", prefix))
lower_targets <- tolower(targets)
current_names <- names(edge_df)
for (i in seq_along(targets)) {
required <- targets[i]
if (!required %in% current_names) {
candidates <- current_names[tolower(current_names) == lower_targets[i]]
if (length(candidates) == 1L) {
names(edge_df)[names(edge_df) == candidates] <- required
}
}
}
edge_df
}
Loading
Loading