Calculate the frequency of each cell label present in the provided meta.data column. This is useful for comparing the proportion of cells belonging to different samples, cell types, clonotypes, isotypes, etc.
Usage
calc_frequency(
input,
data_col,
cluster_col = NULL,
prefix = paste0(data_col, "_"),
return_df = FALSE,
per_chain = FALSE,
chain = NULL,
chain_col = global$chain_col,
sep = global$sep
)
Arguments
- input
Single cell object or data.frame containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.
- data_col
meta.data column containing cell labels to use for calculating frequency. To calculate clonotype frequencies, provide the column containing clonotype IDs, to calculate isotype frequencies provide the column containing cell isotypes. By default the clonotype_id is used for calculations.
- cluster_col
meta.data column containing cluster IDs to use for grouping cells when calculating clonotype abundance
- prefix
Prefix to add to new columns
- return_df
Return results as a data.frame. If set to
FALSE
, results will be added to the input object.- per_chain
If
TRUE
the frequency of each per-chain value will be calculated. IfFALSE
per-chain data will not be parsed and the values present indata_col
will be used as is.- chain
Chain(s) to use for calculating frequency. Set to
NULL
to include all chains.- chain_col
meta.data column(s) containing chains for each cell
- sep
Separator used for storing per-chain V(D)J data for each cell
Examples
# Calculate clonotype abundance using all cells
res <- calc_frequency(
vdj_sce,
data_col = "clonotype_id"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 53 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq n_cells clonotype_id_freq
#> <character> <character> <integer> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> clonotype_id_pct clonotype_id_grp
#> <numeric> <factor>
#> 1_AAGCCGCAGCTTATCG-1 NA NA
# Group cells based on meta.data column before calculating abundance
res <- calc_frequency(
vdj_sce,
data_col = "clonotype_id",
cluster_col = "orig.ident"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 54 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq n_cells clonotype_id_freq
#> <character> <character> <numeric> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> clonotype_id_pct clonotype_id_shared
#> <numeric> <logical>
#> 1_AAGCCGCAGCTTATCG-1 NA NA
#> clonotype_id_grp
#> <factor>
#> 1_AAGCCGCAGCTTATCG-1 NA
# Add a prefix to the new columns
# this is useful if multiple abundance calculations are stored in the
# meta.data
res <- calc_frequency(
vdj_sce,
data_col = "clonotype_id",
prefix = "bcr_"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 53 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq n_cells bcr_freq
#> <character> <character> <integer> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> bcr_pct bcr_grp
#> <numeric> <factor>
#> 1_AAGCCGCAGCTTATCG-1 NA NA
# Return a data.frame instead of adding the results to the input object
res <- calc_frequency(
vdj_sce,
data_col = "clonotype_id",
return_df = TRUE
)
head(res, 1)
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> 1_AAGCCGCAGCTTATCG-1 0 -5.977054 -2.418108 <NA>
#> exact_subclonotype_id chains n_chains cdr3 cdr3_nt
#> 1_AAGCCGCAGCTTATCG-1 NA <NA> NA <NA> <NA>
#> cdr3_length cdr3_nt_length v_gene d_gene j_gene
#> 1_AAGCCGCAGCTTATCG-1 <NA> <NA> <NA> <NA> <NA>
#> c_gene isotype reads umis productive full_length
#> 1_AAGCCGCAGCTTATCG-1 <NA> <NA> <NA> <NA> <NA> <NA>
#> paired v_ins v_del v_mis d_ins d_del d_mis j_ins
#> 1_AAGCCGCAGCTTATCG-1 NA <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> j_del j_mis c_ins c_del c_mis all_ins all_del
#> 1_AAGCCGCAGCTTATCG-1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> all_mis vd_ins vd_del dj_ins dj_del v_mis_freq
#> 1_AAGCCGCAGCTTATCG-1 <NA> <NA> <NA> <NA> <NA> <NA>
#> d_mis_freq j_mis_freq c_mis_freq all_mis_freq
#> 1_AAGCCGCAGCTTATCG-1 <NA> <NA> <NA> <NA>
#> n_cells clonotype_id_freq clonotype_id_pct
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> clonotype_id_grp
#> 1_AAGCCGCAGCTTATCG-1 <NA>