Calculate repertoire similarity
Usage
calc_similarity(
input,
data_col,
cluster_col,
method = abdiv::jaccard,
chain = NULL,
chain_col = global$chain_col,
prefix = NULL,
return_mat = FALSE,
sep = global$sep
)
Arguments
- input
Object containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.
- data_col
meta.data column containing values to use for calculating pairwise similarity between clusters, e.g. 'clonotype_id'
- cluster_col
meta.data column containing cluster IDs to use for calculating repertoire overlap
- method
Method to use for comparing clusters, possible values include:
'count', count the number of clonotypes overlapping between each cluster
A function that takes two numeric vectors containing counts for each unique value in data_col, e.g.
abdiv::jaccard()
- chain
Chain to use for comparing clusters. To perform calculations using a single chain, the column passed to the data_col argument must contain per-chain data such as CDR3 sequences. Set to NULL to include all chains.
- chain_col
meta.data column containing chains for each cell
- prefix
Prefix to add to new columns
- return_mat
Return a matrix with similarity values. If set to FALSE, results will be added to the input object.
- sep
Separator used for storing per-chain V(D)J data for each cell
Examples
# Calculate repertoire overlap
res <- calc_similarity(
vdj_sce,
data_col = "clonotype_id",
cluster_col = "orig.ident",
method = abdiv::jaccard
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 51 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq jaccard_avid_1
#> <character> <character> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 NA NA 0
#> jaccard_avid_2
#> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 0.941176
# Add a prefix to the new columns
# this is useful if multiple calculations are stored in the meta.data
res <- calc_similarity(
vdj_sce,
data_col = "clonotype_id",
cluster_col = "orig.ident",
prefix = "bcr_"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 51 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq bcr_avid_1 bcr_avid_2
#> <character> <character> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 NA NA 0 0.941176
# Return a matrix instead of adding the results to the input object
calc_similarity(
vdj_sce,
data_col = "clonotype_id",
cluster_col = "orig.ident",
return_mat = TRUE
)
#> avid_1 avid_2
#> avid_1 0.0000000 0.9411765
#> avid_2 0.9411765 0.0000000