Skip to contents

This will assign new clonotype IDs based on the combination of values present in the provided columns

Usage

define_clonotypes(
  input,
  data_cols,
  clonotype_col = "clonotype_id",
  filter_chains = c("productive", "full_length"),
  sep = global$sep
)

Arguments

input

Single cell object or data.frame containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.

data_cols

meta.data columns containing V(D)J data to use for defining clonotypes

clonotype_col

Name of column to use for storing clonotype IDs

filter_chains

Column(s) to use for filtering chains prior to defining clonotypes (e.g. productive, full_length). The column(s) must contain TRUE or FALSE for each chain. If NULL, all chains are used when defining clonotypes.

sep

Separator used for storing per cell V(D)J data

Value

Single cell object or data.frame with added clonotype IDs

Examples

# Define clonotypes using the CDR3 nucleotide sequence
res <- define_clonotypes(
  vdj_sce,
  data_cols = "cdr3_nt"
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2
#>                             <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq clonotype_id
#>                      <character>  <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA           NA

# Define clonotypes based on the combination of the CDR3 nucleotide sequence
# and the V and J genes
res <- define_clonotypes(
  vdj_sce,
  data_cols = c("cdr3_nt", "v_gene", "j_gene")
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2
#>                             <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq clonotype_id
#>                      <character>  <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA           NA

# Modify the name of the column used to store clonotype IDs
res <- define_clonotypes(
  vdj_sce,
  data_cols = "cdr3_nt",
  clonotype_col = "NEW_clonotype_id"
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 50 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq NEW_clonotype_id
#>                      <character>  <character>      <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA               NA

# When defining clonotypes only use chains that are productive
res <- define_clonotypes(
  vdj_sce,
  data_cols = "cdr3_nt",
  filter_chains = "productive"
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2
#>                             <factor> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq clonotype_id
#>                      <character>  <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA           NA