Skip to contents

Calculate repertoire diversity

Usage

calc_diversity(
  input,
  data_col,
  cluster_col = NULL,
  method = abdiv::simpson,
  downsample = FALSE,
  n_boots = 0,
  chain = NULL,
  chain_col = global$chain_col,
  prefix = paste0(data_col, "_"),
  return_df = FALSE,
  sep = global$sep
)

Arguments

input

Single cell object or data.frame containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.

data_col

meta.data column containing values to use for calculating diversity, e.g. 'clonotype_id'

cluster_col

meta.data column containing cluster IDs to use for grouping cells when calculating diversity. If cluster_col is omitted, diversity index will be calculated using all cells.

method

Method to use for calculating diversity. A named list can also be passed to use multiple methods. The names should specify names for the output columns.

downsample

Downsample clusters to the same size when calculating diversity metrics

n_boots

Number of bootstrap replicates for calculating standard error, if n_boots is 0 this will be skipped.

chain

Chain to use for calculating diversity. To calculate diversity for a single chain, the column passed to the data_col argument must contain per-chain data such as CDR3 sequences. Set to NULL to include all chains.

chain_col

meta.data column containing chains for each cell

prefix

Prefix to add to new columns

return_df

Return results as a data.frame. If FALSE, results will be added to the input object.

sep

Separator used for storing per-chain V(D)J data for each cell

Value

Single cell object or data.frame with diversity metrics

See also

Examples

# Calculate diversity for each cell cluster
res <- calc_diversity(
  vdj_sce,
  data_col    = "clonotype_id",
  cluster_col = "orig.ident",
  method      = abdiv::simpson
)
#> Loading required package: SingleCellExperiment
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: ‘MatrixGenerics’
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins,
#>     colOrderStats, colProds, colQuantiles, colRanges, colRanks,
#>     colSdDiffs, colSds, colSums2, colTabulates, colVarDiffs,
#>     colVars, colWeightedMads, colWeightedMeans,
#>     colWeightedMedians, colWeightedSds, colWeightedVars, rowAlls,
#>     rowAnyNAs, rowAnys, rowAvgsPerColSet, rowCollapse, rowCounts,
#>     rowCummaxs, rowCummins, rowCumprods, rowCumsums, rowDiffs,
#>     rowIQRDiffs, rowIQRs, rowLogSumExps, rowMadDiffs, rowMads,
#>     rowMaxs, rowMeans2, rowMedians, rowMins, rowOrderStats,
#>     rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs,
#>     rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm,
#>     append, as.data.frame, basename, cbind, colnames, dirname,
#>     do.call, duplicated, eval, evalq, get, grep, grepl,
#>     intersect, is.unsorted, lapply, mapply, match, mget, order,
#>     paste, pmax, pmax.int, pmin, pmin.int, rank, rbind, rownames,
#>     sapply, setdiff, sort, table, tapply, union, unique, unsplit,
#>     which.max, which.min
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages
#>     'citation("pkgname")'.
#> 
#> Attaching package: ‘Biobase’
#> The following object is masked from ‘package:MatrixGenerics’:
#> 
#>     rowMedians
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     anyMissing, rowMedians

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 50 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq
#>                      <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA
#>                      clonotype_id_simpson_diversity
#>                                           <numeric>
#> 1_AAGCCGCAGCTTATCG-1                             NA