Skip to contents

Summarize per-chain values for each cell using a function or purrr-style lambda. This is useful for plotting or filtering cells based on the V(D)J meta.data.

Usage

summarize_vdj(
  input,
  data_cols,
  fn = NULL,
  ...,
  chain = NULL,
  chain_col = global$chain_col,
  col_names = "{.col}",
  return_df = FALSE,
  sep = global$sep
)

Arguments

input

Single cell object or data.frame containing V(D)J data. If a data.frame is provided, the cell barcodes should be stored as row names.

data_cols

meta.data column(s) containing V(D)J data to summarize for each cell

fn

Function to apply to each selected column, possible values can be either a function, e.g. mean, or a purrr-style lambda, e.g. ~ mean(.x, na.rm = TRUE). If NULL, the mean will be calculated for numeric values, non-numeric columns will be combined into a single string.

...

Additional arguments to pass to fn

chain

Chain to use for summarizing V(D)J data

chain_col

meta.data column(s) containing chains for each cell

col_names

A glue specification that describes how to name the output columns, use {.col} to refer to the original column name. If col_names is NULL, the original column names will be used.

return_df

Return results as a data.frame. If FALSE, results will be added to the input object.

sep

Separator used for storing per cell V(D)J data

Value

Object containing V(D)J data summarized for each cell

Examples

# Summarize numeric columns
# by default the mean will be calculated for numeric columns
res <- summarize_vdj(
  vdj_sce,
  data_cols = c("all_del", "all_ins")
)

head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#> 1_AATCCAGCATTACGAC-1      avid_1          6            4               0
#> 1_ACAGCTAGTCTGGTCG-1      avid_1         15            4               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108           NA
#> 1_AATCCAGCATTACGAC-1               0  1.282983 -0.700069           NA
#> 1_ACAGCTAGTCTGGTCG-1               0 -0.537163  0.133260           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#> 1_AATCCAGCATTACGAC-1                    NA          NA        NA
#> 1_ACAGCTAGTCTGGTCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA             NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA        NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            c_del       c_mis   all_ins   all_del
#>                      <character> <character> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA        NA        NA
#> 1_AATCCAGCATTACGAC-1          NA          NA        NA        NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA        NA        NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq
#>                      <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA
#> 1_AATCCAGCATTACGAC-1          NA           NA
#> 1_ACAGCTAGTCTGGTCG-1          NA           NA

# Specifying a different summary function
# this calculates the median number of insertions and deletions for each
# cell
res <- summarize_vdj(
  vdj_sce,
  data_cols = c("all_del", "all_ins"),
  fn = stats::median
)

head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#> 1_AATCCAGCATTACGAC-1      avid_1          6            4               0
#> 1_ACAGCTAGTCTGGTCG-1      avid_1         15            4               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108           NA
#> 1_AATCCAGCATTACGAC-1               0  1.282983 -0.700069           NA
#> 1_ACAGCTAGTCTGGTCG-1               0 -0.537163  0.133260           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#> 1_AATCCAGCATTACGAC-1                    NA          NA        NA
#> 1_ACAGCTAGTCTGGTCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA             NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA        NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            c_del       c_mis   all_ins   all_del
#>                      <character> <character> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA        NA        NA
#> 1_AATCCAGCATTACGAC-1          NA          NA        NA        NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA        NA        NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq
#>                      <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA
#> 1_AATCCAGCATTACGAC-1          NA           NA
#> 1_ACAGCTAGTCTGGTCG-1          NA           NA

# Summarize values for a specific chain
res <- summarize_vdj(
  vdj_sce,
  data_cols = c("all_del", "all_ins"),
  chain = "IGK"
)

head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 49 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#> 1_AATCCAGCATTACGAC-1      avid_1          6            4               0
#> 1_ACAGCTAGTCTGGTCG-1      avid_1         15            4               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108           NA
#> 1_AATCCAGCATTACGAC-1               0  1.282983 -0.700069           NA
#> 1_ACAGCTAGTCTGGTCG-1               0 -0.537163  0.133260           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#> 1_AATCCAGCATTACGAC-1                    NA          NA        NA
#> 1_ACAGCTAGTCTGGTCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA             NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA        NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            c_del       c_mis   all_ins   all_del
#>                      <character> <character> <numeric> <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA        NA        NA
#> 1_AATCCAGCATTACGAC-1          NA          NA        NA        NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA        NA        NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq
#>                      <character>  <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA
#> 1_AATCCAGCATTACGAC-1          NA           NA
#> 1_ACAGCTAGTCTGGTCG-1          NA           NA

# Specifying new names for summarized columns
# use {.col} to refer to the original column name
res <- summarize_vdj(
  vdj_sce,
  data_cols = c("all_del", "all_ins"),
  fn = stats::median,
  col_names = "median_{.col}"
)

head(slot(res, "colData"), 1)
#> DataFrame with 1 row and 51 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0  -5.97705  -2.41811           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq median_all_del
#>                      <character>  <character>      <numeric>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA             NA
#>                      median_all_ins
#>                           <numeric>
#> 1_AAGCCGCAGCTTATCG-1             NA

# Return a data.frame instead of adding the results to the input object
res <- summarize_vdj(
  vdj_sce,
  data_cols = c("all_del", "all_ins"),
  return_df = TRUE
)

head(res, 1)
#>                      orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#> 1_AAGCCGCAGCTTATCG-1     avid_1          0            0               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108         <NA>
#>                      exact_subclonotype_id chains n_chains cdr3 cdr3_nt
#> 1_AAGCCGCAGCTTATCG-1                    NA   <NA>       NA <NA>    <NA>
#>                      cdr3_length cdr3_nt_length v_gene d_gene j_gene
#> 1_AAGCCGCAGCTTATCG-1        <NA>           <NA>   <NA>   <NA>   <NA>
#>                      c_gene isotype reads umis productive full_length
#> 1_AAGCCGCAGCTTATCG-1   <NA>    <NA>  <NA> <NA>       <NA>        <NA>
#>                      paired v_ins v_del v_mis d_ins d_del d_mis j_ins
#> 1_AAGCCGCAGCTTATCG-1     NA  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
#>                      j_del j_mis c_ins c_del c_mis all_ins all_del
#> 1_AAGCCGCAGCTTATCG-1  <NA>  <NA>  <NA>  <NA>  <NA>      NA      NA
#>                      all_mis vd_ins vd_del dj_ins dj_del v_mis_freq
#> 1_AAGCCGCAGCTTATCG-1    <NA>   <NA>   <NA>   <NA>   <NA>       <NA>
#>                      d_mis_freq j_mis_freq c_mis_freq all_mis_freq
#> 1_AAGCCGCAGCTTATCG-1       <NA>       <NA>       <NA>         <NA>

# Using a lambda function to summarize values
# use '.x' to refer to values in the column
# this creates a new column showing the unique chains for each cell
res <- summarize_vdj(
  vdj_sce,
  data_cols = "chains",
  fn = ~ paste0(unique(.x), collapse = "_"),
  col_names = "unique_chains"
)

head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 50 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#> 1_AATCCAGCATTACGAC-1      avid_1          6            4               0
#> 1_ACAGCTAGTCTGGTCG-1      avid_1         15            4               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108           NA
#> 1_AATCCAGCATTACGAC-1               0  1.282983 -0.700069           NA
#> 1_ACAGCTAGTCTGGTCG-1               0 -0.537163  0.133260           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#> 1_AATCCAGCATTACGAC-1                    NA          NA        NA
#> 1_ACAGCTAGTCTGGTCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA             NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA        NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq unique_chains
#>                      <character>  <character>   <character>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA            NA
#> 1_AATCCAGCATTACGAC-1          NA           NA            NA
#> 1_ACAGCTAGTCTGGTCG-1          NA           NA            NA

# Creating an index column to use for filtering/plotting
# this creates a column indicating which cells have no insertions
# the V(D)J data can be filtered based on this new column
res <- summarize_vdj(
  vdj_sce,
  data_cols = "all_ins",
  fn = ~ all(.x == 0),
  col_names = "no_insertions"
)

res <- filter_vdj(
  res,
  filt = no_insertions
)

head(slot(res, "colData"), 3)
#> DataFrame with 3 rows and 50 columns
#>                       orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.3
#>                      <character>  <numeric>    <integer>        <factor>
#> 1_AAGCCGCAGCTTATCG-1      avid_1          0            0               0
#> 1_AATCCAGCATTACGAC-1      avid_1          6            4               0
#> 1_ACAGCTAGTCTGGTCG-1      avid_1         15            4               0
#>                      seurat_clusters    UMAP_1    UMAP_2 clonotype_id
#>                             <factor> <numeric> <numeric>  <character>
#> 1_AAGCCGCAGCTTATCG-1               0 -5.977054 -2.418108           NA
#> 1_AATCCAGCATTACGAC-1               0  1.282983 -0.700069           NA
#> 1_ACAGCTAGTCTGGTCG-1               0 -0.537163  0.133260           NA
#>                      exact_subclonotype_id      chains  n_chains
#>                                  <numeric> <character> <integer>
#> 1_AAGCCGCAGCTTATCG-1                    NA          NA        NA
#> 1_AATCCAGCATTACGAC-1                    NA          NA        NA
#> 1_ACAGCTAGTCTGGTCG-1                    NA          NA        NA
#>                             cdr3     cdr3_nt cdr3_length cdr3_nt_length
#>                      <character> <character> <character>    <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA             NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA             NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA             NA
#>                           v_gene      d_gene      j_gene      c_gene
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          isotype       reads        umis  productive
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                      full_length    paired       v_ins       v_del
#>                      <character> <logical> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA        NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA        NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA        NA          NA          NA
#>                            v_mis       d_ins       d_del       d_mis
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            j_ins       j_del       j_mis       c_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                            c_del       c_mis     all_ins     all_del
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                          all_mis      vd_ins      vd_del      dj_ins
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                           dj_del  v_mis_freq  d_mis_freq  j_mis_freq
#>                      <character> <character> <character> <character>
#> 1_AAGCCGCAGCTTATCG-1          NA          NA          NA          NA
#> 1_AATCCAGCATTACGAC-1          NA          NA          NA          NA
#> 1_ACAGCTAGTCTGGTCG-1          NA          NA          NA          NA
#>                       c_mis_freq all_mis_freq no_insertions
#>                      <character>  <character>     <logical>
#> 1_AAGCCGCAGCTTATCG-1          NA           NA            NA
#> 1_AATCCAGCATTACGAC-1          NA           NA            NA
#> 1_ACAGCTAGTCTGGTCG-1          NA           NA            NA