clustifyr is aimed for general use and flexibility in various analysis pipelines. While its core code functions with matrices and dataframes, and hence has minimal package dependencies, wrapper functions are provided for further convenience of the user.

Direct handling of seurat objects, v2 and v3, extracting the required data, and inserting called cell types directly into the objects

library(clustifyr)

# can even returning a new object with `type` added to metadata.
res <- clustify(
  input = s_small,
  cluster_col = "res.1",
  ref_mat = cbmc_ref,
  seurat_out = TRUE
)
#> [1] "use"

# or return a correlation matrix, if seurat_out is set to F
res2 <- clustify_lists(
  input = s_small,
  marker = pbmc_markers,
  marker_inmatrix = FALSE,
  cluster_col = "res.1",
  seurat_out = FALSE
)

Other scRNA-seq object formats are easily supported, by adding defaults to a lookup file object_loc_lookup, currently including SCE, URD, CDS, FSCE.

# location of data in object in this format
object_loc_lookup
#>              SingleCellExperiment               URD
#> expr  input@assays$data$logcounts input@logupx.data
#> meta as.data.frame(input@colData)        input@meta
#> var                          NULL   input@var.genes
#> col                    cell_type1           cluster
#>                         FunctionalSingleCellExperiment
#> expr input@ExperimentList$rnaseq@assays$data$logcounts
#> meta               input@ExperimentList$rnaseq@colData
#> var                                               NULL
#> col                                     leiden_cluster
#>                             Seurat
#> expr         input@assays$RNA@data
#> meta               input@meta.data
#> var  input@assays$RNA@var.features
#> col                  RNA_snn_res.1
#>                                                                                                                CellDataSet
#> expr do.call(function(x) {row.names(x) <- input@featureData@data$gene_short_name; return(x)}, list(input@assayData$exprs))
#> meta                                                                                   as.data.frame(input@phenoData@data)
#> var                     as.character(input@featureData@data$gene_short_name[input@featureData@data$use_for_ordering == T])
#> col                                                                                                           Main_Cluster
#>                                                                                                                                                                                                                                                                                              H5File
#> expr                                                                                                                {mat <- input[["layers/norm_data"]][,];\ncolnames(mat) <- input[["row_attrs/gene_names"]][];\nrownames(mat) <- input[["col_attrs/cell_names"]][];\nmat <- t(mat);\nreturn(mat)}
#> meta {l <- input$ls(recursive=TRUE);\ns <- l[,1][stringr::str_detect(l[,1], "col_attrs/")];\ns2 <- sapply(s, FUN=function(x) {input[[x]][]});\ncolnames(s2) <- stringr::str_remove(colnames(s2),"col_attrs/");\nmeta <- tibble::column_to_rownames(as.data.frame(s2), "cell_names");\nreturn(meta)}
#> var                                                                                                                                                                                                                                                                                            NULL
#> col                                                                                                                                                                                                                                                                                      orig.ident

# adding seurat3 locations
object_loc_lookup$Seurat <- c(
  expr = "input@assays$RNA@data",
  meta = "input@meta.data",
  var = "input@assays$RNA@var.features",
  col = "RNA_snn_res.1"
)

For example, sample code for scrunchy is as easy as:

res <- clustify_nudge(
  input = fsce_small,
  ref_mat = cbmc_ref,
  marker = cbmc_m,
  query_genes = pbmc_vargenes,
  cluster_col = "k_cluster"
)

Making new references from objects

Making scRNA-seq objects into references is assisted by wrappers as well.

# for both v2 and v3
seurat_ref <- object_ref(
  input = s_small3,
  cluster_col = "RNA_snn_res.1"
)

# and other object types with lookup
object_ref <- object_ref(
  input = sce_small,
  cluster_col = "cell_type1"
)