SENIC的使用

软件介绍

SENIC是一种同时重建基因调控网络并从单细胞RNA-seq数据中鉴定stable cell states的工具。基于共表达和DNA模基序 (motif)分析推断基因调控网络 ,然后在每个细胞中分析网络活性以鉴定细胞状态

https://www.nature.com/articles/nmeth.4463
参考帮助文档:https://rawcdn.githack.com/aertslab/SCENIC/0a4c96ed8d930edd8868f07428090f9dae264705/inst/doc/SCENIC_Running.html

输入:SCENIC需要输入的是单细胞RNA-seq表达矩阵—— 每列对应于样品(细胞),每行对应一个基因。基因ID应该是gene-symbol并存储为rownames (尤其是基因名字部分是为了与RcisTarget数据库兼容);表达数据是Gene的reads count。根据作者的测试,提供原始的或Normalized UMI count,无论是否log转换,或使用TPM值,结果相差不大。

软件的安装

if !requireNamespace"BiocManager", quietly = TRUE)) install.packages"BiocManager")

BiocManager::installc"AUCell", "RcisTarget"))
BiocManager::installc"GENIE3"))
BiocManager::installc"zoo", "mixtools", "rbokeh"))

BiocManager::installc"DT", "NMF", "pheatmap", "R2HTML", "Rtsne"))

BiocManager::installc"doMC", "doRNG"))
BiocManager::installc"SingleCellExperiment"))

if !requireNamespace"devtools", quietly = TRUE)) install.packages"devtools")
devtools::install_github"aertslab/SCopeLoomR", build_vignettes = TRUE)

if !requireNamespace"devtools", quietly = TRUE)) install.packages"devtools")
devtools::install_github"aertslab/SCENIC")
packageVersion"SCENIC")

下载评分数据库

需要下载RcisTarget的物种特定数据库(https://resources.aertslab.org/cistarget/

For Human,Mouse,Fly

mm_url="https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-500bp-upstream-7species.mc9nr.feather"
mm_url2="https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-tss-centered-10kb-7species.mc9nr.feather"
hg_url="https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-500bp-upstream-7species.mc9nr.feather"
hg_url2="https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-tss-centered-10kb-7species.mc9nr.feather"
fly_url="https://resources.aertslab.org/cistarget/databases/drosophila_melanogaster/dm6/flybase_r6.02/mc8nr/gene_based/dm6-5kb-upstream-full-tx-11species.mc8nr.feather"

wget -c $mm_url
wget -c $mm_url2
wget -c $hg_url
wget -c $hg_url2
wget -c $fly_url

不同数据格式的读入

对于loom文件

download.file"http://loom.linnarssonlab.org/clone/Previously%20Published/Cortex.loom", "Cortex.loom")
loomPath <- "Cortex.loom"

10x的输出文件

singleCellMatrix <- Seurat::Read10Xdata.dir="data/pbmc3k/filtered_gene_bc_matrices/hg19/")
cellInfo <- data.frameseuratCluster=IdentsseuratObject))

R objects e.g. Seurat, SingleCellExperiment)

sce <- load_as_scedataPath) # any SingleCellExperiment object
exprMat <- countssce)
cellInfo <- colDatasce)

简单的SENIC工作流程

setwd"/media/sdb/project/20200223/SCENIC_MouseBrain")

loomPath <- system.filepackage="SCENIC", "examples/mouseBrain_toy.loom")
librarySCopeLoomR)
loom <- open_loomloomPath)
exprMat <- get_dgemloom)
cellInfo <- get_cellAnnotationloom)
close_loomloom)

#查看矩阵大小
#dimexprMat)

librarySCENIC)
#scenicOptions <- initializeScenicorg="mgi", dbDir="cisTarget_databases", nCores=10)
scenicOptions <- initializeScenicorg="mgi", dbDir="/media/sdb/project/20200223/data", nCores=10)

saveRDSscenicOptions, file="int/scenicOptions.Rds") 

### Co-expression network
genesKept <- geneFilteringexprMat, scenicOptions)
exprMat_filtered <- exprMat[genesKept, ]
runCorrelationexprMat_filtered, scenicOptions)
exprMat_filtered_log <- log2exprMat_filtered+1) 
runGenie3exprMat_filtered_log, scenicOptions)

### Build and score the GRN
exprMat_log <- log2exprMat+1)
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # Toy run settings
runSCENIC_1_coexNetwork2modulesscenicOptions)
runSCENIC_2_createRegulonsscenicOptions, coexMethod=c"top5perTarget")) # Toy run settings
runSCENIC_3_scoreCellsscenicOptions, exprMat_log)

# Export: 运行这个时可能报错
#export2scopescenicOptions, exprMat)

# Binarize activity?
# aucellApp <- plotTsne_AUCellAppscenicOptions, exprMat_log)
# savedSelections <- shiny::runAppaucellApp)
# newThresholds <- savedSelections$thresholds
# scenicOptions@fileNames$int["aucell_thresholds",1] <- "int/newThresholds.Rds"
# saveRDSnewThresholds, file=getIntNamescenicOptions, "aucell_thresholds"))
# saveRDSscenicOptions, file="int/scenicOptions.Rds")
runSCENIC_4_aucell_binarizescenicOptions)

### Exploring output 
# Check files in folder 'output'
# .loom file @ http://scope.aertslab.org

# output/Step2_MotifEnrichment_preview.html in detail/subset:
motifEnrichment_selfMotifs_wGenes <- loadIntscenicOptions, "motifEnrichment_selfMotifs_wGenes")
tableSubset <- motifEnrichment_selfMotifs_wGenes[highlightedTFs=="Sox8"]
viewMotifstableSubset) 

# output/Step2_regulonTargetsInfo.tsv in detail: 
regulonTargetsInfo <- loadIntscenicOptions, "regulonTargetsInfo")
tableSubset <- regulonTargetsInfo[TF=="Stat6" & highConfAnnot==TRUE]
viewMotifstableSubset)

运行SENIC

建立基因调控网络Gene Regulation Network,GRN):

基于共表达识别每个转录因子TF的潜在靶标。过滤表达矩阵并运行GENIE3或者GRNBoost,它们是利用表达矩阵推断基因调控网络的一种算法,能得到转录因子和潜在靶标的相关性网络;将目标从GENIE3或者GRNBoost格式转为共表达模块。
根据DNA模序分析(motif)选择潜在的直接结合靶标(调节因子)(利用RcisTarget包:TF基序分析)

确定细胞状态及其调节因子
3. 分析每个细胞中的网络活性(AUCell) 在细胞中评分调节子(计算AUC)

SCENIC完整流程

导入数据

loomPath <- system.filepackage="SCENIC", "examples/mouseBrain_toy.loom")
librarySCopeLoomR)
loom <- open_loomloomPath) #mode='r' 如果报错可以加上
exprMat <- get_dgemloom)
cellInfo <- get_cellAnnotationloom)
close_loomloom)

Initialize settings 初始设置,导入评分数据库

librarySCENIC)
#先下载数据库,org用来选择物种,这里选择的是小鼠
scenicOptions <- initializeScenicorg="mgi", dbDir="cisTarget_databases", nCores=10)
# scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds"
saveRDSscenicOptions, file="int/scenicOptions.Rds")

共表达网络

根据已有的表达矩阵推断潜在的转录因子靶标,使用GENIE3或GRNBoost。首先需要进行基因的过滤。

genesKept <- geneFilteringexprMat, scenicOptions=scenicOptions,
                           minCountsPerGene=3*.01*ncolexprMat),
                           minSamples=ncolexprMat)*.01)

过滤表达矩阵,保留只有过滤后的基因

exprMat_filtered <- exprMat[genesKept, ]

计算相关性,这一步时间会比较长

runCorrelationexprMat_filtered, scenicOptions)
exprMat_filtered_log <- log2exprMat_filtered+1)
runGenie3exprMat_filtered_log, scenicOptions)

Build and score the GRN 构建并给基因调控网络(GRN)打分

exprMat_log <- log2exprMat+1)
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # Toy run settings
runSCENIC_1_coexNetwork2modulesscenicOptions)
runSCENIC_2_createRegulonsscenicOptions, coexMethod=c"top5perTarget")) # Toy run settings
runSCENIC_3_scoreCellsscenicOptions, exprMat_log)

输入表达矩阵

在本教程中,我们提供了一个示例,样本是小鼠大脑的200个细胞和862个基因:

loomPath <- system.filepackage="SCENIC", "examples/mouseBrain_toy.loom")

打开loom文件并加载表达矩阵;

librarySCopeLoomR)
loom <- open_loomloomPath, mode="r")
exprMat <- get_dgemloom)
cellInfo <- get_cellAnnotationloom)
close_loomloom)
#dimexprMat)

细胞信息/表型

# cellInfo$nGene <- colSumsexprMat>0)
headcellInfo)

cellInfo <- data.framecellInfo)
cellTypeColumn <- "Class"
colnamescellInfo)[whichcolnamescellInfo)==cellTypeColumn)] <- "CellType"
cbindtablecellInfo$CellType))

saveRDScellInfo, file="int/cellInfo.Rds")
# Color to assign to the variables same format as for NMF::aheatmap)
colVars <- listCellType=c"microglia"="forestgreen",
                            "endothelial-mural"="darkorange",
                            "astrocytes_ependymal"="magenta4", 
                            "oligodendrocytes"="hotpink", 
                            "interneurons"="red3",
                            "pyramidal CA1"="skyblue",
                            "pyramida SS"="darkblue"
                            ))
colVars$CellType <- colVars$CellType[intersectnamescolVars$CellType), cellInfo$CellType)]
saveRDScolVars, file="int/colVars.Rds")
plot.new)
legend0,1, fill=colVars$CellType, legend=namescolVars$CellType))

初始化SCENIC设置

为了在SCENIC的多个步骤中保持设置一致,SCENIC包中的大多数函数使用一个公共对象,该对象存储当前运行的选项并代替大多数函数的“参数”。比如下面的orgdbDir等,可以在开始就将物种rog(mgi—— mouse, hgnc —— human, dmel —— fly)和RcisTarge数据库位置分别读给对象orgdbDir,之后统一用函数initializeScenic得到对象scenicOptions。具体参数设置可以用?initializeScenichelp一下。

librarySCENIC)
org="mgi" # or hgnc, or 
dmeldbDir="cisTarget_databases" # RcisTarget databases location
myDatasetTitle="SCENIC example on Mouse brain" # choose a name for your analysis
datadefaultDbNames)
dbs <- defaultDbNames[[org]]
scenicOptions <- initializeScenicorg=org, dbDir=dbDir, dbs=dbs, datasetTitle=myDatasetTitle, nCores=10)

# 如果有需要就修改这个地方
scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds
"scenicOptions@inputDatasetInfo$colVars <- "int/colVars.Rds"
# Databases:
# scenicOptions@settings$dbs <- c"mm9-5kb-mc8nr"="mm9-tss-centered-5kb-10species.mc8nr.feather")
# scenicOptions@settings$db_mcVersion <- "v8"
# Save to use at a later time...
saveRDSscenicOptions, file="int/scenicOptions.Rds")

共表达网络

SCENIC工作流程的第一步是根据表达数据推断潜在的转录因子靶标。为此,我们使用GENIE3或GRNBoost,输入文件是表达矩阵(过滤后的)和转录因子列表。GENIE3/GRBBoost的输出结果和相关矩阵将用于创建共表达模块(runSCENIC_1_coexNetwork2modules())。

基因过滤/选择

按每个基因的reads总数进行过滤。该filter旨在去除最可能是噪音的基因。默认情况下,它(minCountsPerGene)保留所有样品中至少带有6个UMI reads的基因(例如,如果在1%的细胞中以3的值表达,则基因将具有的总数)。
通过基因的细胞数来实现过滤(例如 UMI > 0 ,或log 2(TPM)> 1 )。默认情况下minSamples),保留下来的基因能在至少1%的细胞中检测得到。
最后,只保留RcisTarget数据库中可用的基因。

# Adjust minimum values according to your dataset)
genesKept <- geneFilteringexprMat, scenicOptions=scenicOptions, 
                            minCountsPerGene=3*.01*ncolexprMat),
                            minSamples=ncolexprMat)*.01)

在进行网络推断之前,检查是否有任何已知的相关基因被过滤掉(如果缺少任何相关基因,请仔细检查filter设置是否合适):

interestingGenes <- c"Sox9", "Sox10", "Dlx5")
interestingGenes[which!interestingGenes %in% genesKept)]

相关性

GENIE33或者GRNBoost可以检测正负关联。为了区分潜在的激活和抑制,我们将目标分为正相关和负相关目标(比如TF与潜在目标之间的Spearman相关性)。

runCorrelationexprMat_filtered, scenicOptions)

运行GENIE3得到潜在转录因子TF

## If launched in a new session, you will need to reload...
# setwd"...")
# loomPath <- "..."
# loom <- open_loomloomPath, mode="r")
# exprMat <- get_dgemloom)
# close_loomloom)
# genesKept <- loadIntscenicOptions, "genesKept")
# exprMat_filtered <- exprMat[genesKept,]
# librarySCENIC)
# scenicOptions <- readRDS"int/scenicOptions.Rds")
# Optional: add log if it is not logged/normalized already)
exprMat_filtered <- log2exprMat_filtered+1)
# Run GENIE3
runGenie3exprMat_filtered, scenicOptions)

构建并评分GRN(runSCENIC_ …)

必要时重新加载表达式矩阵:

loom <- open_loomloomPath, mode="r")
exprMat <- get_dgemloom)
close_loomloom)
# Optional: log expression for TF expression plot, it does not affect any other calculation)
logMat <- log2exprMat+1)
dimexprMat)

使用wrapper函数运行其余步骤:

librarySCENIC)
scenicOptions <- readRDS"int/scenicOptions.Rds")
scenicOptions@settings$verbose <- TRUE
scenicOptions@settings$nCores <- 10
scenicOptions@settings$seed <- 123
# For a very quick run:
# coexMethod=c"top5perTarget")
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # For toy run
# save...
runSCENIC_1_coexNetwork2modulesscenicOptions)
runSCENIC_2_createRegulonsscenicOptions, coexMethod=c"top5perTarget")) #** Only for toy run!! 只用于测试数据
runSCENIC_3_scoreCellsscenicOptions, logMat)

可选步骤

将network activity转换成ON/OFF(二进制)格式

nPcs <- c5) # For toy dataset
# nPcs <- c5,15,50)
scenicOptions@settings$seed <- 123 # same seed for all of them
#使用不同的参数运行t-SNE
fileNames <- tsneAUCscenicOptions, aucType="AUC", nPcs=nPcs, perpl=c5,15,50))
fileNames <- tsneAUCscenicOptions, aucType="AUC", nPcs=nPcs, perpl=c5,15,50), onlyHighConf=TRUE, filePrefix="int/tSNE_oHC")
# 画图 individual files in int/):
fileNames <- paste0"int/",grep".Rds", grep"tSNE_", list.files"int"), value=T), value=T))
parmfrow=clengthnPcs), 3))
fileNames <- paste0"int/",grep".Rds", grep"tSNE_AUC", list.files"int"), value=T, perl = T), value=T))
plotTsne_compareSettingsfileNames, scenicOptions, showLegend=FALSE, varName="CellType", cex=.5)

# Using only "high-confidence" regulons normally similar)
parmfrow=c3,3))
fileNames <- paste0"int/",grep".Rds", grep"tSNE_oHC_AUC", list.files"int"), value=T, perl = T), value=T))
plotTsne_compareSettingsfileNames, scenicOptions, showLegend=FALSE, varName="CellType", cex=.5)

输出到 loom/SCope

SCENIC生成的结果既能在http://scope.aertslab.org查看,还能用函数export2scope)(需要SCopeLoomR包)保存成.loom文件。

# DGEM Digital gene expression matrix)
# non-normalized counts)
# exprMat <- get_dgemopen_loomloomPath))
# dgem <- exprMat
# headcolnamesdgem))  #should contain the Cell ID/name
# Export:
scenicOptions@fileNames$output["loomFile",] <- "output/mouseBrain_SCENIC.loom"
export2scopescenicOptions, exprMat)

加载.loom文件中的结果

SCopeLoomR中也有函数可以导入.loom文件中的内容,比如调节因子,AUC和封装内容(比如regulon activityt-SNE和UMAP结果)。


librarySCopeLoomR)
scenicLoomPath <- getOutNamescenicOptions, "loomFile")
loom <- open_loomscenicLoomPath)
# Read information from loom file:
regulons_incidMat <- get_regulonsloom)
regulons <- regulonsToGeneListsregulons_incidMat)
regulonsAUC <- get_regulonsAucloom)
regulonsAucThresholds <- get_regulonThresholdsloom)
embeddings <- get_embeddingsloom)

解读结果

1. 细胞状态

AUCell提供跨细胞的调节子的活性,AUCell使用“Area under Curve 曲线下面积”(AUC)来计算输入基因集的关键子集是否在每个细胞的表达基因中富集。通过该调节子活性(连续或二进制AUC矩阵)来聚类细胞,我们可以看出是否存在倾向于具有相同调节子活性的细胞群,并揭示在多个细胞中反复发生的网络状态。这些状态等同于网络的吸引子状态。将这些聚类与不同的可视化方法相结合,我们可以探索细胞状态与特定调节子的关联。

将AUC和TF表达投射到t-SNE上

logMat <- exprMat # Better if it is logged/normalized
aucellApp <- plotTsne_AUCellAppscenicOptions, logMat) # default t-SNE
savedSelections <- shiny::runAppaucellApp)
printtsneFileNamescenicOptions))

tSNE_scenic <- readRDStsneFileNamescenicOptions))
aucell_regulonAUC <- loadIntscenicOptions, "aucell_regulonAUC")
# Show TF expression:
parmfrow=c2,3))
AUCell::AUCell_plotTSNEtSNE_scenic$Y, exprMat, aucell_regulonAUC[onlyNonDuplicatedExtendedrownamesaucell_regulonAUC))[c"Dlx5", "Sox10", "Sox9","Irf1", "Stat6")],], plots="Expression")

# 保存AUC图片:
Cairo::CairoPDF"output/Step4_BinaryRegulonActivity_tSNE_colByAUC.pdf", width=20, height=15)
parmfrow=c4,6))
AUCell::AUCell_plotTSNEtSNE_scenic$Y, cellsAUC=aucell_regulonAUC, plots="AUC")
dev.off)
libraryKernSmooth)
libraryRColorBrewer)
dens2d <- bkde2DtSNE_scenic$Y, 1)$fhat
imagedens2d, col=brewer.pal9, "YlOrBr"), axes=FALSE)
contourdens2d, add=TRUE, nlevels=5, drawlabels=FALSE)

#parbg = "black")
parmfrow=c1,2))
regulonNames <- c "Dlx5","Sox10")
cellCol <- plotTsne_rgbscenicOptions, regulonNames, aucType="AUC", aucMaxContrast=0.6)
text0, 10, attrcellCol,"red"), col="red", cex=.7, pos=4)
text-20,-10, attrcellCol,"green"), col="green3", cex=.7, pos=4)
regulonNames <- listred=c"Sox10", "Sox8"),
                     green=c"Irf1"),
                     blue=c "Tef"))
cellCol <- plotTsne_rgbscenicOptions, regulonNames, aucType="Binary")
text5, 15, attrcellCol,"red"), col="red", cex=.7, pos=4)
text5, 15-4, attrcellCol,"green"), col="green3", cex=.7, pos=4)
text5, 15-8, attrcellCol,"blue"), col="blue", cex=.7, pos=4)

GRN:Regulon靶标和模序

regulons <- loadIntscenicOptions, "regulons")
regulons[c"Dlx5", "Irf1")]

regulons <- loadIntscenicOptions, "aucell_regulons")
headcbindonlyNonDuplicatedExtendednamesregulons))))

regulonTargetsInfo <- loadIntscenicOptions, "regulonTargetsInfo")
tableSubset <- regulonTargetsInfo[TF=="Stat6" & highConfAnnot==TRUE]
viewMotifstableSubset)

2. 细胞群的调控因子

regulonAUC <- loadIntscenicOptions, "aucell_regulonAUC")
regulonAUC <- regulonAUC[onlyNonDuplicatedExtendedrownamesregulonAUC)),]
regulonActivity_byCellType <- sapplysplitrownamescellInfo), cellInfo$CellType),functioncells) rowMeansgetAUCregulonAUC)[,cells]))
regulonActivity_byCellType_Scaled <- tscaletregulonActivity_byCellType), center = T, scale=T))
pheatmap::pheatmapregulonActivity_byCellType_Scaled, color=colorRampPalettec"blue","white","red"))100), breaks=seq-3, 3, length.out = 100),treeheight_row=10, treeheight_col=10, border_color=NA)

# filename="regulonActivity_byCellType.pdf", width=10, height=20)
topRegulators <- reshape2::meltregulonActivity_byCellType_Scaled)
colnamestopRegulators) <- c"Regulon", "CellType", "RelativeActivity")
topRegulators <- topRegulators[whichtopRegulators$RelativeActivity>0),]
viewTabletopRegulators)
minPerc <- .7
binaryRegulonActivity <- loadIntscenicOptions, "aucell_binary_nonDupl")
cellInfo_binarizedCells <- cellInfo[whichrownamescellInfo)%in% colnamesbinaryRegulonActivity)),, drop=FALSE]
regulonActivity_byCellType_Binarized <- sapplysplitrownamescellInfo_binarizedCells), cellInfo_binarizedCells$CellType),functioncells) rowMeansbinaryRegulonActivity[,cells, drop=FALSE]))
binaryActPerc_subset <- regulonActivity_byCellType_Binarized[whichrowSumsregulonActivity_byCellType_Binarized>minPerc)>0),]
pheatmap::pheatmapbinaryActPerc_subset,color = colorRampPalettec"white","pink","red"))100), breaks=seq0, 1, length.out = 100),treeheight_row=10, treeheight_col=10, border_color=NA)

Published by

风君子

独自遨游何稽首 揭天掀地慰生平

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注