clustifyrdatahub
clustifyrdatahub provides external reference data sets for cell-type assignment with clustifyr.
Installation
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("clustifyrdatahub")
Available references include
knitr::kable(dplyr::select(
read.csv(system.file("extdata", "metadata.csv", package = "clustifyrdatahub")),
c(1, 9, 2:7)))
Title | Species | Description | RDataPath | BiocVersion | Genome | SourceType | SourceUrl |
---|---|---|---|---|---|---|---|
ref_MCA | Mus musculus | Mouse Cell Atlas | clustifyrdatahub/ref_MCA.rda | 3.12 | mm10 | Zip | https://ndownloader.figshare.com/files/10756795 |
ref_tabula_muris_drop | Mus musculus | Tabula Muris (10X) | clustifyrdatahub/ref_tabula_muris_drop.rda | 3.12 | mm10 | Zip | https://ndownloader.figshare.com/articles/5821263 |
ref_tabula_muris_facs | Mus musculus | Tabula Muris (SmartSeq2) | clustifyrdatahub/ref_tabula_muris_facs.rda | 3.12 | mm10 | Zip | https://ndownloader.figshare.com/articles/5821263 |
ref_mouse.rnaseq | Mus musculus | Mouse RNA-seq from 28 cell types | clustifyrdatahub/ref_mouse.rnaseq.rda | 3.12 | mm10 | RDA | https://github.com/dviraran/SingleR/tree/master/data |
ref_moca_main | Mus musculus | Mouse Organogenesis Cell Atlas (main cell types) | clustifyrdatahub/ref_moca_main.rda | 3.12 | mm10 | RDA | https://oncoscape.v3.sttrcancer.org/atlas.gs.washington.edu.mouse.rna/downloads |
ref_immgen | Mus musculus | Mouse sorted immune cells | clustifyrdatahub/ref_immgen.rda | 3.12 | mm10 | RDA | https://github.com/dviraran/SingleR/tree/master/data |
ref_hema_microarray | Homo sapiens | Human hematopoietic cell microarray | clustifyrdatahub/ref_hema_microarray.rda | 3.12 | hg38 | TXT | https://ftp.ncbi.nlm.nih.gov/geo/series/GSE24nnn/GSE24759/matrix/GSE24759_series_matrix.txt.gz |
ref_cortex_dev | Homo sapiens | Human cortex development scRNA-seq | clustifyrdatahub/ref_cortex_dev.rda | 3.12 | hg38 | TSV | https://cells.ucsc.edu/cortex-dev/exprMatrix.tsv.gz |
ref_pan_indrop | Homo sapiens | Human pancreatic cell scRNA-seq (inDrop) | clustifyrdatahub/ref_pan_indrop.rda | 3.12 | hg38 | RDA | https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/baron-human.rds |
ref_pan_smartseq2 | Homo sapiens | Human pancreatic cell scRNA-seq (SmartSeq2) | clustifyrdatahub/ref_pan_smartseq2.rda | 3.12 | hg38 | RDA | https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/segerstolpe.rds |
ref_mouse_atlas | Mus musculus | Mouse Atlas scRNA-seq from 321 cell types | clustifyrdatahub/ref_mouse_atlas.rda | 3.12 | mm10 | RDA | https://github.com/rnabioco/scRNA-seq-Cell-Ref-Matrix/blob/master/atlas/musMusculus/MouseAtlas.rda |
To use clustifyrdatahub
library(ExperimentHub)
eh <- ExperimentHub()
## query
refs <- query(eh, "clustifyrdatahub")
refs
#> ExperimentHub with 11 records
#> # snapshotDate(): 2022-04-26
#> # $dataprovider: figshare, S3, GitHub, GEO, washington.edu, UCSC
#> # $species: Mus musculus, Homo sapiens
#> # $rdataclass: data.frame
#> # additional mcols(): taxonomyid, genome, description,
#> # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
#> # tags, rdatapath, sourceurl, sourcetype
#> # retrieve records with, e.g., 'object[["EH3444"]]'
#>
#> title
#> EH3444 | ref_MCA
#> EH3445 | ref_tabula_muris_drop
#> EH3446 | ref_tabula_muris_facs
#> EH3447 | ref_mouse.rnaseq
#> EH3448 | ref_moca_main
#> ... ...
#> EH3450 | ref_hema_microarray
#> EH3451 | ref_cortex_dev
#> EH3452 | ref_pan_indrop
#> EH3453 | ref_pan_smartseq2
#> EH3779 | ref_mouse_atlas
## either by index or id
ref_hema_microarray <- refs[[7]] ## load the first resource in the list
ref_hema_microarray <- refs[["EH3450"]] ## load by EH id
## or list and load
refs <- listResources(eh, "clustifyrdatahub")
ref_hema_microarray <- loadResources(
eh,
"clustifyrdatahub",
"ref_hema_microarray"
)[[1]]
## use for classification of cell types
res <- clustifyr::clustify(
input = clustifyr::pbmc_matrix_small,
metadata = clustifyr::pbmc_meta$classified,
ref_mat = ref_hema_microarray,
query_genes = clustifyr::pbmc_vargenes
)
## or load refs by function name (after loading hub library)
library(clustifyrdatahub)
ref_hema_microarray()[1:5, 1:5] ## data are loaded
#> Basophils CD4+ Central Memory CD4+ Effector Memory
#> DDR1 6.084244 5.967502 5.933039
#> RFC2 6.280044 6.028615 6.047005
#> HSPA6 6.535444 5.811475 5.746326
#> PAX8 6.669153 5.896401 6.118577
#> GUCA1A 5.239230 5.232116 5.206960
#> CD8+ Central Memory CD8+ Effector Memory
#> DDR1 6.005278 5.895926
#> RFC2 5.992979 5.942426
#> HSPA6 5.928349 5.942670
#> PAX8 6.270870 6.323922
#> GUCA1A 5.227415 5.090882
ref_hema_microarray(metadata = TRUE) ## only metadata
#> ExperimentHub with 1 record
#> # snapshotDate(): 2022-04-26
#> # names(): EH3450
#> # package(): clustifyrdatahub
#> # $dataprovider: GEO
#> # $species: Homo sapiens
#> # $rdataclass: data.frame
#> # $rdatadateadded: 2020-05-14
#> # $title: ref_hema_microarray
#> # $description: Human hematopoietic cell microarray
#> # $taxonomyid: 9606
#> # $genome: hg38
#> # $sourcetype: TXT
#> # $sourceurl: https://ftp.ncbi.nlm.nih.gov/geo/series/GSE24nnn/GSE2475...
#> # $sourcesize: NA
#> # $tags: c("SingleCellData", "SequencingData", "MicroarrayData",
#> # "ExperimentHub")
#> # retrieve record with 'object[["EH3450"]]'
session info
sessionInfo()
#> R version 4.2.2 (2022-10-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.5 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] clustifyrdatahub_0.99.4 ExperimentHub_2.4.0
#> [3] AnnotationHub_3.4.0 BiocFileCache_2.4.0
#> [5] dbplyr_2.2.1 BiocGenerics_0.42.0
#> [7] BiocStyle_2.24.0
#>
#> loaded via a namespace (and not attached):
#> [1] matrixStats_0.62.0 bitops_1.0-7
#> [3] fs_1.5.2 bit64_4.0.5
#> [5] filelock_1.0.2 httr_1.4.4
#> [7] rprojroot_2.0.3 GenomeInfoDb_1.32.4
#> [9] tools_4.2.2 bslib_0.4.1
#> [11] utf8_1.2.2 R6_2.5.1
#> [13] DBI_1.1.3 colorspace_2.0-3
#> [15] withr_2.5.0 gridExtra_2.3
#> [17] tidyselect_1.2.0 bit_4.0.4
#> [19] curl_4.3.3 compiler_4.2.2
#> [21] textshaping_0.3.6 cli_3.4.1
#> [23] Biobase_2.56.0 DelayedArray_0.22.0
#> [25] desc_1.4.2 entropy_1.3.1
#> [27] bookdown_0.29 sass_0.4.2
#> [29] scales_1.2.1 rappdirs_0.3.3
#> [31] pkgdown_2.0.6 systemfonts_1.0.4
#> [33] stringr_1.4.1 digest_0.6.30
#> [35] rmarkdown_2.17 XVector_0.36.0
#> [37] pkgconfig_2.0.3 htmltools_0.5.3
#> [39] MatrixGenerics_1.8.1 fastmap_1.1.0
#> [41] highr_0.9 rlang_1.0.6
#> [43] RSQLite_2.2.18 shiny_1.7.3
#> [45] jquerylib_0.1.4 generics_0.1.3
#> [47] jsonlite_1.8.3 BiocParallel_1.30.4
#> [49] dplyr_1.0.10 clustifyr_1.8.0
#> [51] RCurl_1.98-1.9 magrittr_2.0.3
#> [53] GenomeInfoDbData_1.2.8 Matrix_1.5-1
#> [55] Rcpp_1.0.9 munsell_0.5.0
#> [57] S4Vectors_0.34.0 fansi_1.0.3
#> [59] lifecycle_1.0.3 stringi_1.7.8
#> [61] yaml_2.3.6 SummarizedExperiment_1.26.1
#> [63] zlibbioc_1.42.0 grid_4.2.2
#> [65] blob_1.2.3 parallel_4.2.2
#> [67] promises_1.2.0.1 crayon_1.5.2
#> [69] lattice_0.20-45 cowplot_1.1.1
#> [71] Biostrings_2.64.1 KEGGREST_1.36.3
#> [73] knitr_1.40 pillar_1.8.1
#> [75] fgsea_1.22.0 GenomicRanges_1.48.0
#> [77] codetools_0.2-18 stats4_4.2.2
#> [79] fastmatch_1.1-3 glue_1.6.2
#> [81] BiocVersion_3.15.2 evaluate_0.17
#> [83] data.table_1.14.4 BiocManager_1.30.19
#> [85] png_0.1-7 vctrs_0.5.0
#> [87] httpuv_1.6.6 tidyr_1.2.1
#> [89] gtable_0.3.1 purrr_0.3.5
#> [91] assertthat_0.2.1 cachem_1.0.6
#> [93] ggplot2_3.3.6 xfun_0.34
#> [95] mime_0.12 xtable_1.8-4
#> [97] later_1.3.0 ragg_1.2.4
#> [99] SingleCellExperiment_1.18.1 tibble_3.1.8
#> [101] AnnotationDbi_1.58.0 memoise_2.0.1
#> [103] IRanges_2.30.1 ellipsis_0.3.2
#> [105] interactiveDisplayBase_1.34.0