Combined function to compare scRNA-seq data to bulk RNA-seq data and marker list

Usage

clustify_nudge(input, ...)

# Default S3 method
clustify_nudge(
  input,
  ref_mat,
  marker,
  metadata = NULL,
  cluster_col = NULL,
  query_genes = NULL,
  compute_method = "spearman",
  weight = 1,
  threshold = -Inf,
  dr = "umap",
  norm = "diff",
  call = TRUE,
  marker_inmatrix = TRUE,
  mode = "rank",
  obj_out = FALSE,
  seurat_out = obj_out,
  rename_prefix = NULL,
  lookuptable = NULL,
  ...
)

# S3 method for class 'Seurat'
clustify_nudge(
  input,
  ref_mat,
  marker,
  cluster_col = NULL,
  query_genes = NULL,
  compute_method = "spearman",
  weight = 1,
  obj_out = TRUE,
  seurat_out = obj_out,
  threshold = -Inf,
  dr = "umap",
  norm = "diff",
  marker_inmatrix = TRUE,
  mode = "rank",
  rename_prefix = NULL,
  ...
)

Arguments

input: express matrix or object
...: passed to matrixize_markers
ref_mat: reference expression matrix
marker: matrix of markers
metadata: cell cluster assignments, supplied as a vector or data.frame. If data.frame is supplied then cluster_col needs to be set.
cluster_col: column in metadata that contains cluster ids per cell. Will default to first column of metadata if not supplied. Not required if running correlation per cell.
query_genes: A vector of genes of interest to compare. If NULL, then common genes between the expr_mat and ref_mat will be used for comparision.
compute_method: method(s) for computing similarity scores
weight: relative weight for the gene list scores, when added to correlation score
threshold: identity calling minimum score threshold, only used when obj_out = T
dr: stored dimension reduction
norm: whether and how the results are normalized
call: make call or just return score matrix
marker_inmatrix: whether markers genes are already in preprocessed matrix form
mode: use marker expression pct or ranked cor score for nudging
obj_out: whether to output object instead of cor matrix
seurat_out: output cor matrix or called seurat object (deprecated, use obj_out)
rename_prefix: prefix to add to type and r column names
lookuptable: if not supplied, will look in built-in table for object parsing

Value

single cell object, or matrix of numeric values, clusters from input as row names, cell types from ref_mat as column names

Examples


# Seurat
so <- so_pbmc()
clustify_nudge(
    input = so,
    ref_mat = cbmc_ref,
    marker = cbmc_m,
    cluster_col = "seurat_clusters",
    threshold = 0.8,
    obj_out = FALSE,
    mode = "pct",
    dr = "umap"
)
#> object data retrieval complete, moving to similarity computation
#> using # of genes: 356
#> similarity computation completed, matrix of 9 x 13, preparing output
#>            B CD14+ Mono CD16+ Mono     CD34+       CD4 T      CD8 T         DC
#> 0 -0.2302862 -0.1769367  0.2470036 0.6617410  0.78460428  0.7026442  0.3581463
#> 1 -0.2332382 -0.1303857  0.2576950 0.6365666  0.84244517  0.6374601  0.3526247
#> 2 -0.3500226  0.9319558  0.5265214 0.4937256  0.21503487  0.2415592  0.5348927
#> 3  0.8984684 -0.2649611  0.1970959 0.6186764  0.36216197  0.3957086  0.3467832
#> 4 -0.3181218 -0.2663265  0.1915529 0.5803217  0.63560967  0.5758856  0.2978260
#> 5 -0.3192465  0.6531223  0.9321784 0.5267102  0.23730786  0.3556856  0.5079047
#> 6 -0.4031240 -0.3243590  0.1449669 0.5519016  0.47571428  0.7001787  0.2509777
#> 7 -0.2131799  0.3995399  0.4037666 0.5326527  0.28589569  0.2471923  0.8598491
#> 8 -0.7094324 -0.4408541 -0.1458630 0.1911739 -0.09794103 -0.1014701 -0.1217075
#>       Eryth Memory CD4 T         Mk         NK Naive CD4 T       pDCs
#> 0 0.5676881    0.7205700 -0.6595967 -0.1761298   0.6792568 0.37054943
#> 1 0.5546148    0.7089640 -0.6869042 -0.1324900   0.6672364 0.35537999
#> 2 0.4613372    0.4766346 -0.5237981 -0.2573996   0.4879290 0.32134624
#> 3 0.4591180    0.4185166 -0.7490495 -0.3938896   0.5124924 0.45590634
#> 4 0.5214031    0.6270097 -0.6282614  0.5471485   0.5891127 0.33848708
#> 5 0.4607773    0.4931425 -0.5462895 -0.1977233   0.4907296 0.33568025
#> 6 0.4760044    0.5801648 -0.6771373  0.8816119   0.5435461 0.27642297
#> 7 0.4244292    0.4499408 -0.6223638 -0.1810176   0.4546397 0.66571228
#> 8 0.2690003    0.1155586  0.7281144 -0.7998201   0.1053592 0.09577586

# Matrix
clustify_nudge(
    input = pbmc_matrix_small,
    ref_mat = cbmc_ref,
    metadata = pbmc_meta,
    marker = as.matrix(cbmc_m),
    query_genes = pbmc_vargenes,
    cluster_col = "classified",
    threshold = 0.8,
    call = FALSE,
    marker_inmatrix = FALSE,
    mode = "pct"
)
#> using # of genes: 599
#> similarity computation completed, matrix of 9 x 13, preparing output
#>                       B CD14+ Mono  CD16+ Mono     CD34+       CD4 T
#> B             0.9093577 -0.2452939  0.22341215 0.6248639  0.39898892
#> CD14+ Mono   -0.3184633  0.9145428  0.52579175 0.5103056  0.20913853
#> CD8 T        -0.2866333 -0.2920874  0.17944732 0.5663773  0.64950686
#> DC           -0.1927865  0.3971091  0.41292804 0.5694074  0.31768980
#> FCGR3A+ Mono -0.2924419  0.6387074  0.92949138 0.5331927  0.22686950
#> Memory CD4 T -0.2094668 -0.1734003  0.22245854 0.6265186  0.86108263
#> NK           -0.3523479 -0.3353237  0.15173936 0.5350808  0.47524944
#> Naive CD4 T  -0.2115884 -0.2355533  0.19645924 0.6480900  0.80013677
#> Platelet     -0.6782452 -0.3759687 -0.08362827 0.2455947 -0.05758481
#>                    CD8 T          DC     Eryth Memory CD4 T         Mk
#> B             0.43921857  0.35367232 0.4817829    0.4803367 -0.6748463
#> CD14+ Mono    0.25084659  0.51955311 0.4559350    0.4553454 -0.4660850
#> CD8 T         0.59418188  0.26878200 0.5007841    0.6323214 -0.5987391
#> DC            0.28543102  0.84933995 0.4470039    0.4764929 -0.5578229
#> FCGR3A+ Mono  0.35105547  0.48594006 0.4604308    0.4614474 -0.4760099
#> Memory CD4 T  0.65776299  0.30625770 0.5395170    0.7128879 -0.6336165
#> NK            0.69267415  0.23172856 0.4578726    0.5665250 -0.6199834
#> Naive CD4 T   0.72313292  0.29335212 0.5528441    0.7277404 -0.6269043
#> Platelet     -0.07272109 -0.06019224 0.3068445    0.1457308  0.7319575
#>                      NK Naive CD4 T      pDCs
#> B            -0.3536418   0.5407149 0.4801131
#> CD14+ Mono   -0.2621333   0.4564707 0.3710598
#> CD8 T         0.5582283   0.6036230 0.3519523
#> DC           -0.1659100   0.4475783 0.6825388
#> FCGR3A+ Mono -0.1921798   0.4574654 0.3755701
#> Memory CD4 T -0.1208282   0.6871755 0.3668161
#> NK            0.8940904   0.5505877 0.3186506
#> Naive CD4 T  -0.1890644   0.6967073 0.3594580
#> Platelet     -0.7349565   0.1609963 0.1110947