This will assign new clonotype IDs based on the combination of values present in the provided columns
Usage
define_clonotypes(
input,
data_cols,
clonotype_col = "clonotype_id",
filter_chains = c("productive", "full_length"),
sep = global$sep
)
Arguments
- input
Single cell object or data.frame containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.
- data_cols
meta.data columns containing V(D)J data to use for defining clonotypes
- clonotype_col
Name of column to use for storing clonotype IDs
- filter_chains
Column(s) to use for filtering chains prior to defining clonotypes (e.g. productive, full_length). The column(s) must contain TRUE or FALSE for each chain. If NULL, all chains are used when defining clonotypes.
- sep
Separator used for storing per cell V(D)J data
Examples
# Define clonotypes using the CDR3 nucleotide sequence
res <- define_clonotypes(
vdj_sce,
data_cols = "cdr3_nt"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2
#> <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq clonotype_id
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
# Define clonotypes based on the combination of the CDR3 nucleotide sequence
# and the V and J genes
res <- define_clonotypes(
vdj_sce,
data_cols = c("cdr3_nt", "v_gene", "j_gene")
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2
#> <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq clonotype_id
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
# Modify the name of the column used to store clonotype IDs
res <- define_clonotypes(
vdj_sce,
data_cols = "cdr3_nt",
clonotype_col = "NEW_clonotype_id"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 50 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2 clonotype_id
#> <factor> <numeric> <numeric> <character>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811 NA
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq NEW_clonotype_id
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
# When defining clonotypes only use chains that are productive
res <- define_clonotypes(
vdj_sce,
data_cols = "cdr3_nt",
filter_chains = "productive"
)
head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> <character> <numeric> <integer> <factor>
#> 1_AAGCCGCAGCTTATCG-1 avid_1 0 0 0
#> seurat_clusters UMAP_1 UMAP_2
#> <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1 0 -5.97705 -2.41811
#> exact_subclonotype_id chains n_chains
#> <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA
#> cdr3 cdr3_nt cdr3_length cdr3_nt_length
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_gene d_gene j_gene c_gene
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> isotype reads umis productive
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> full_length paired v_ins v_del
#> <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> v_mis d_ins d_del d_mis
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> j_ins j_del j_mis c_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_del c_mis all_ins all_del
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> all_mis vd_ins vd_del dj_ins
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> dj_del v_mis_freq d_mis_freq j_mis_freq
#> <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA NA
#> c_mis_freq all_mis_freq clonotype_id
#> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1 NA NA NA