Skip to contents

Calculate repertoire similarity

Usage

calc_similarity(
  input,
  data_col,
  cluster_col,
  method = abdiv::jaccard,
  chain = NULL,
  chain_col = global$chain_col,
  prefix = NULL,
  return_mat = FALSE,
  sep = global$sep
)

Arguments

input

Object containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.

data_col

meta.data column containing values to use for calculating pairwise similarity between clusters, e.g. 'clonotype_id'

cluster_col

meta.data column containing cluster IDs to use for calculating repertoire overlap

method

Method to use for comparing clusters, possible values include:

  • 'count', count the number of clonotypes overlapping between each cluster

  • A function that takes two numeric vectors containing counts for each unique value in data_col, e.g. abdiv::jaccard()

chain

Chain to use for comparing clusters. To perform calculations using a single chain, the column passed to the data_col argument must contain per-chain data such as CDR3 sequences. Set to NULL to include all chains.

chain_col

meta.data column containing chains for each cell

prefix

Prefix to add to new columns

return_mat

Return a matrix with similarity values. If set to FALSE, results will be added to the input object.

sep

Separator used for storing per-chain V(D)J data for each cell

Value

Single cell object or data.frame with similarity values

Examples

# Calculate repertoire overlap
res <- calc_similarity(
  vdj_sce,
  data_col    = "clonotype_id",
  cluster_col = "orig.ident",
  method      = abdiv::jaccard
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 51 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq jaccard_avid_1
#>                      <character>  <character>      <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA              0
#>                      jaccard_avid_2
#>                           <numeric>
#> 1_AAGCCGCAGCTTATCG-1       0.941176

# Add a prefix to the new columns
# this is useful if multiple calculations are stored in the meta.data
res <- calc_similarity(
  vdj_sce,
  data_col    = "clonotype_id",
  cluster_col = "orig.ident",
  prefix      = "bcr_"
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 51 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq bcr_avid_1 bcr_avid_2
#>                      <character>  <character>  <numeric>  <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA          0   0.941176

# Return a matrix instead of adding the results to the input object
calc_similarity(
  vdj_sce,
  data_col    = "clonotype_id",
  cluster_col = "orig.ident",
  return_mat  = TRUE
)
#>           avid_1    avid_2
#> avid_1 0.0000000 0.9411765
#> avid_2 0.9411765 0.0000000