Modify per-chain V(D)J data for each cell. This function offers greater
flexibility than summarize_vdj()
, but is less user-friendly.
Usage
mutate_vdj(
input,
...,
clonotype_col = global$clonotype_col,
data_cols = NULL,
return_df = FALSE,
sep = global$sep
)
Arguments
- input
Single cell object or data.frame containing V(D)J data. If a data.frame is provided, cell barcodes should be stored as row names.
- ...
Name-value pairs to use for creating or modifying per-chain V(D)J meta.data, e.g. mean_umis = mean(umis).
To allow for modification of per-chain V(D)J data, the data for each cell is converted into a vector, e.g. 'IGH;IGK' is equivalent to c('IGH', 'IGK'). This allows R vector operations to be performed on the per-chain values. Any operations that produce a result greater than length 1 need to be returned as a list(), e.g. new_col = umis + 1 will return a new value for each chain, to prevent an error this must be written as new_col = list(umis + 1).
- clonotype_col
meta.data column containing clonotype IDs. This is used to identify columns containing V(D)J data.
- data_cols
meta.data columns containing V(D)J data to modify. If NULL, data are automatically selected by identifying columns that have NAs in the same rows as clonotype_col.
- return_df
Return results as a data.frame. If FALSE, results will be added to the input object.
- sep
Separator used for storing per cell V(D)J data. If NULL, columns containing V(D)J data will not be converted to vectors for filtering.
Examples
# Calculate mean reads and UMIs per cell
res <- mutate_vdj(
vdj_sce,
mean_umis = mean(umis),
mean_reads = mean(reads)
)
head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 51 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> 1_AATCCAGCATTACGAC-1 avid_1 6 4 0
#> 1_ACAGCTAGTCTGGTCG-1 avid_1 15 4 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.977054 -2.418108 NA
#> 1_AATCCAGCATTACGAC-1 0 1.282983 -0.700069 NA
#> 1_ACAGCTAGTCTGGTCG-1 0 -0.537163 0.133260 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq mean_umis mean_reads
#> <character> <character> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
# Calculate the total number of insertions + deletions for each chain
# we have to wrap our expression in list() since a value is returned for
# each chain
res <- mutate_vdj(
vdj_sce,
indels = list(all_ins + all_del)
)
head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 50 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> 1_AATCCAGCATTACGAC-1 avid_1 6 4 0
#> 1_ACAGCTAGTCTGGTCG-1 avid_1 15 4 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.977054 -2.418108 NA
#> 1_AATCCAGCATTACGAC-1 0 1.282983 -0.700069 NA
#> 1_ACAGCTAGTCTGGTCG-1 0 -0.537163 0.133260 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq indels
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
# Create a new column showing the unique chains for each cell
res <- mutate_vdj(
vdj_sce,
unique_chains = stringr::str_c(unique(chains), collapse = "_")
)
head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 50 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> 1_AATCCAGCATTACGAC-1 avid_1 6 4 0
#> 1_ACAGCTAGTCTGGTCG-1 avid_1 15 4 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.977054 -2.418108 NA
#> 1_AATCCAGCATTACGAC-1 0 1.282983 -0.700069 NA
#> 1_ACAGCTAGTCTGGTCG-1 0 -0.537163 0.133260 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq unique_chains
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
# Determine which cells have both an IGK and IGL chain
res <- mutate_vdj(
vdj_sce,
both_light = all(c("IGK", "IGL") %in% chains)
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 50 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq both_light
#> <character> <character> <logical>
#> 1_AAGCCGCAGCTTATCG-1 NA NA FALSE
# Determine which cells have multiple light chains
res <- mutate_vdj(
vdj_sce,
multi_light = sum(chains %in% c("IGK", "IGL")) > 1
)
head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 50 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> 1_AATCCAGCATTACGAC-1 avid_1 6 4 0
#> 1_ACAGCTAGTCTGGTCG-1 avid_1 15 4 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.977054 -2.418108 NA
#> 1_AATCCAGCATTACGAC-1 0 1.282983 -0.700069 NA
#> 1_ACAGCTAGTCTGGTCG-1 0 -0.537163 0.133260 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> 1_AATCCAGCATTACGAC-1 NA NA NA NA
#> 1_ACAGCTAGTCTGGTCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq multi_light
#> <character> <character> <logical>
#> 1_AAGCCGCAGCTTATCG-1 NA NA FALSE
#> 1_AATCCAGCATTACGAC-1 NA NA FALSE
#> 1_ACAGCTAGTCTGGTCG-1 NA NA FALSE