---
title: "R Notebook"
output: html_notebook
---
```{r}
#human datast (Fig. 4)
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
setwd("/YOURENVIRONMENT/homo_sapiens_oetjen")
options(timeout=100000000000)
url="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE120221&format=file"
download.file(url, "GSE120221.tar")
gunzip("GSE120221.tar", remove=FALSE)
# There should be have files for each fastq, containing one
# "barcodes.tsv.gz", one "features.tsv.gz" and one "matrix.mtx.gz". The folders should be named "bmA", "bmB", "bmC1", ...
for (file in c("bmA", "bmb", "bmC1", "bmCk", "bmC2", "bmE", "bmF", "bmG", "bmH", "bmJ", "bmK", "bmL", "bmM", "bmN", "bmO", "bmP", "bmQ", "bmR", "bmS1", "bmSk1", "bmS2", "bmSk2", "bmT", "bmU", "bmW")){
seurat_data <- Read10X(data.dir = paste0("data/", file))
seurat_obj <- CreateSeuratObject(counts = seurat_data,
min.features = 100,
project = file)
assign(file, seurat_obj)
}
merged_seurat <- merge(x = bmA,
y = c(bmB, bmC1, bmCk, bmC2, bmE, bmF, bmG, bmH, bmJ, bmK, bmL, bmM, bmN, bmO, bmP, bmQ, bmR, bmS1, bmSk1, bmS2, bmSk2, bmT, bmU, bmW),
add.cell.id = c("bmA", "bmb", "bmC1", "bmCk", "bmC2", "bmE", "bmF", "bmG", "bmH", "bmJ", "bmK", "bmL", "bmM", "bmN", "bmO", "bmP", "bmQ", "bmR", "bmS1", "bmSk1", "bmS2", "bmSk2", "bmT", "bmU", "bmW"))
merged_seurat$log10GenesPerUMI <- log10(merged_seurat$nFeature_RNA) / log10(merged_seurat$nCount_RNA)
merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^MT-")
merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100
metadata <- merged_seurat@meta.data
metadata$cells <- rownames(metadata)
metadata <- metadata %>%
dplyr::rename(seq_folder = orig.ident,
nUMI = nCount_RNA,
nGene = nFeature_RNA)
metadata$sample <- NA
metadata$sample[which(str_detect(metadata$cells, "^bm"))] <- "bone_marrow"
merged_seurat@meta.data <- metadata
save(merged_seurat, file="data/merged_filtered_seurat.RData")
# filtering:
filtered_seurat <- subset(x = merged_seurat,
subset= (nUMI >= 500) &
(nGene >= 250) &
(log10GenesPerUMI > 0.80) &
(mitoRatio < 0.20))
# and Gene-level filtering
counts <- GetAssayData(object = filtered_seurat, slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 10
filtered_counts <- counts[keep_genes, ]
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)
save(filtered_seurat, file="data/seurat_filtered.RData")
# normalization
# need cell cycle markers (this one for human) https://www.dropbox.com/s/hus4mrkueh1tfpr/cycle.rda?dl=1
# Saving in "data" folder
seurat_phase <- NormalizeData(filtered_seurat)
load("data/cycle.rda")
seurat_phase <- CellCycleScoring(seurat_phase,
g2m.features = g2m_genes,
s.features = s_genes)
seurat_phase <- FindVariableFeatures(seurat_phase,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
# scaling
seurat_phase <- ScaleData(seurat_phase)
seurat_phase <- RunPCA(seurat_phase)
# just if you ran locally
options(future.globals.maxSize = 4000 * 1024^2)
split_seurat <- SplitObject(filtered_seurat, split.by = "sample")
split_seurat <- split_seurat[c("bone_marrow")]
for (i in 1:length(split_seurat)) {
split_seurat[[i]] <- NormalizeData(split_seurat[[i]], verbose = TRUE)
split_seurat[[i]] <- CellCycleScoring(split_seurat[[i]], g2m.features=g2m_genes, s.features=s_genes)
split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio"))
}
# integrating
integ_features <- SelectIntegrationFeatures(object.list = split_seurat,
nfeatures = 3000)
split_seurat <- PrepSCTIntegration(object.list = split_seurat,
anchor.features = integ_features)
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat,
normalization.method = "SCT",
anchor.features = integ_features)
seurat_integrated <- IntegrateData(anchorset = integ_anchors,
normalization.method = "SCT")
seurat_integrated <- RunPCA(object = seurat_integrated)
seurat_integrated <- RunUMAP(seurat_integrated,
dims = 1:40,
reduction = "pca")
seurat_integrated <- FindNeighbors(object = seurat_integrated,
dims = 1:40)
seurat_integrated <- FindClusters(object = seurat_integrated,
resolution = c(0.4, 0.6, 0.8, 1.0, 1.4))
Idents(object = seurat_integrated) <- "integrated_snn_res.1.4"
saveRDS(seurat_integrated, "results/seurat_labelled_human.rds")
```
```{r}
#murine datast (Fig. 5)
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
setwd("/YOURENVIRONMENT/homo_sapiens_oetjen")
options(timeout=100000000000)
url="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE128423&format=file"
download.file(url, "GSE128423_RAW.tar")
gunzip("GSE128423_RAW.tar", remove=FALSE)
# There should be have files for each fastq, containing one
# "barcodes.tsv.gz", one "features.tsv.gz" and one "matrix.mtx.gz". The folders should be named "bmA", "bmB", "bmC1", ...
for (file in c("b1", "b2", "b3", "b4", "bm1", "bm2", "bm3", "bm4")){
seurat_data <- Read10X(data.dir = paste0("data/", file))
seurat_obj <- CreateSeuratObject(counts = seurat_data,
min.features = 100,
project = file)
assign(file, seurat_obj)
}
merged_seurat <- merge(x = b1,
y = c(b2, b3, b4, bm1, bm2, bm3, bm4),
add.cell.id = c("b1", "b2", "b3", "b4", "bm1", "bm2", "bm3", "bm4"))
merged_seurat$log10GenesPerUMI <- log10(merged_seurat$nFeature_RNA) / log10(merged_seurat$nCount_RNA)
merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^mt-")
merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100
metadata <- merged_seurat@meta.data
metadata$cells <- rownames(metadata)
metadata <- metadata %>%
dplyr::rename(seq_folder = orig.ident,
nUMI = nCount_RNA,
nGene = nFeature_RNA)
metadata$sample <- NA
metadata$sample[which(str_detect(metadata$cells, "^b"))] <- "bone"
metadata$sample[which(str_detect(metadata$cells, "^bm"))] <- "bone_marrow"
merged_seurat@meta.data <- metadata
save(merged_seurat, file="data/merged_filtered_seurat.RData")
# filtering:
filtered_seurat <- subset(x = merged_seurat,
subset= (nUMI >= 500) &
(nGene >= 250) &
(log10GenesPerUMI > 0.80) &
(mitoRatio < 0.20))
# and Gene-level filtering
counts <- GetAssayData(object = filtered_seurat, slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 10
filtered_counts <- counts[keep_genes, ]
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)
save(filtered_seurat, file="data/seurat_filtered.RData")
# get the cell cycle genes for mus musculus
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
install.packages(pkgs = "RCurl")
library(RCurl)
library(AnnotationHub)
cc_file <- getURL("https://raw.githubusercontent.com/hbc/tinyatlas/master/cell_cycle/Mus_musculus.csv")
cell_cycle_genes <- read.csv(text = cc_file)
ah <- AnnotationHub()
ahDb <- query(ah,
pattern = c("Mus musculus", "EnsDb"),
ignore.case = TRUE)
id <- ahDb %>%
mcols() %>%
rownames() %>%
tail(n = 1)
edb <- ah[[id]]
annotations <- genes(edb,
return.type = "data.frame")
cell_cycle_markers <- dplyr::left_join(cell_cycle_genes, annotations, by = c("geneID" = "gene_id"))
s_genes <- cell_cycle_markers %>%
dplyr::filter(phase == "S") %>%
pull("gene_name")
g2m_genes <- cell_cycle_markers %>%
filter(phase == "G2/M") %>%
pull("geneID")
# normalization
# need cell cycle markers (this one for mus musculus) https://www.dropbox.com/s/hus4mrkueh1tfpr/cycle.rda?dl=1
# Saving in "data" folder
seurat_phase <- NormalizeData(filtered_seurat)
load("data/cycle.rda")
seurat_phase <- CellCycleScoring(seurat_phase,
g2m.features = g2m_genes,
s.features = s_genes)
seurat_phase <- FindVariableFeatures(seurat_phase,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
# scaling
seurat_phase <- ScaleData(seurat_phase)
seurat_phase <- RunPCA(seurat_phase)
# just if you ran locally
options(future.globals.maxSize = 4000 * 1024^2)
split_seurat <- SplitObject(filtered_seurat, split.by = "sample")
split_seurat <- split_seurat[c("bone", "bone_marrow")]
for (i in 1:length(split_seurat)) {
split_seurat[[i]] <- NormalizeData(split_seurat[[i]], verbose = TRUE)
split_seurat[[i]] <- CellCycleScoring(split_seurat[[i]], g2m.features=g2m_genes, s.features=s_genes)
split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio"))
}
# integrating
integ_features <- SelectIntegrationFeatures(object.list = split_seurat,
nfeatures = 3000)
split_seurat <- PrepSCTIntegration(object.list = split_seurat,
anchor.features = integ_features)
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat,
normalization.method = "SCT",
anchor.features = integ_features)
seurat_integrated <- IntegrateData(anchorset = integ_anchors,
normalization.method = "SCT")
seurat_integrated <- RunPCA(object = seurat_integrated)
seurat_integrated <- RunUMAP(seurat_integrated,
dims = 1:40,
reduction = "pca")
seurat_integrated <- FindNeighbors(object = seurat_integrated,
dims = 1:40)
seurat_integrated <- FindClusters(object = seurat_integrated,
resolution = c(0.4, 0.6, 0.8, 1.0, 1.4))
Idents(object = seurat_integrated) <- "integrated_snn_res.1.4"
saveRDS(seurat_integrated, "results/seurat_labelled_murine.rds")
```