用BioNet这个bioconductor包来找 maximal-scoring subgraph

## 此包是为了解决一个难题： maximal-scoring subgraph (MSS) problem ，在一个巨大的复杂网络里面找到significantly differentially expressed subnetworks，就是说，得到了几百个差异基因，去PPI数据库做网络图的时候，发现还是巨大无比，所以需要用这个包来精简我们的网络图。

heuristically的中文意思：启发性地

## 而这个R包可以整合多种数据结果来给一个网络打分，

包的主页是：https://www.bioconductor.org/packages/release/bioc/html/BioNet.html

paper：BioNet: an R-Package for the Functional Analysis of ... - Bioinformatics

它整合了PPI网络分析和寻找功能模块的需求。

脚本：https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.R

教程：https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.pdf

重点就是根据一个"igraph" or "graphNEL"对象和打分来找最大的MSS

subnet <- subNetwork(dataLym$label, interactome)

module <- runFastHeinz(subnet, scores)

plotModule(module, scores=scores, diff.expr=logFC) #这个就是精简后的我们的网络图。

其实另外一个函数也有类似的功能，dNetFind https://rdrr.io/cran/dnet/man/dNetFind.html

## 里面用到的网络，都是基于igraph的包： A graph object, either in graphNEL or igraph format.

## 首先加载一系列的包和内置数据

library(BioNet)

library(DLBCL)

data(dataLym)

data(interactome)

## dataLym 里面是3个样本,t,s,o 分别对应着的每个基因的p值

## interactome是一个内置的PPI网络对象，可以根据指定的基因list来提取里面的信息

pvals <- cbind(t=dataLym$t.pval, s=dataLym$s.pval)

rownames(pvals) <- dataLym$label

pval <- aggrPvals(pvals, order=2, plot=FALSE)

## 提取t,s样本的p值，然后用aggrPvals整合成一个p值

subnet <- subNetwork(dataLym$label, interactome)

subnet <- rmSelfLoops(subnet)

subnet

## 根据指定的dataLym$label基因信息来提取网络，但是这个基因信息有点奇怪,比如TP53(7157) ，看起来是symbol跟entrez ID的合体。

## 函数rmSelfLoops是标配，只要是网络，都需要处理一下，去除自循环信息

## 因为指定的dataLym$label基因是有限的，一般不会太多，提取的网络一般也就上千个nodes，万把个edges的

fb <- fitBumModel(pval, plot=FALSE)

## 对我们整合好的基因对应的P值进行Beta-Uniform-Mixture (BUM) model模型处理。

scores <- scoreNodes(subnet, fb, fdr=0.001)

module <- runFastHeinz(subnet, scores)

## Here we use a fast heuristic approach to calculate an approximation to the optimal scoring subnetwork.

logFC <- dataLym$diff

names(logFC) <- dataLym$label

plotModule(module, scores=scores, diff.expr=logFC)

## diff.expr是用来给nodes调色的

## scores是用来给nodes赋予性状的

## 这个函数本身是基于graphNEL or igraph format的定制版，其实可以直接用igraph包来绘图。

## 也可以把这个network导出成Cytoscape format，这样可以用cytoscape来绘图

## 一般来说，红色是上调基因，绿色是下调基因，圆形是得分为正，菱形是得分为负

## 下面是一个实际的例子，如何使用BioNet包来做网络分析

library(BioNet)

library(DLBCL)

data(exprLym)

data(interactome)

exprLym ## 内置对象，所以它的gene的laber是符合interactome的要求的

interactome

network <- subNetwork(featureNames(exprLym), interactome)

network

network <- largestComp(network)

## The function extracts the largest component of a network

network

library(genefilter)

library(impute)

expressions <- impute.knn(exprs(exprLym))$data

## exprs得到的不再是纯粹的表达矩阵，需要用来 impute missing expression data

## 这里选择genefilter包的rowttests函数来做差异分析

t.test <- rowttests(expressions, fac=exprLym$Subgroup)

t.test[1:10, ]

data(dataLym)

ttest.pval <- t.test[, "p.value"]

surv.pval <- dataLym$s.pval

names(surv.pval) <- dataLym$label

pvals <- cbind(ttest.pval, surv.pval)

pval <- aggrPvals(pvals, order=2, plot=FALSE)

fb <- fitBumModel(pval, plot=FALSE)

## 用图来展示这个fitBumModel函数到底做了什么

dev.new(width=13, height=7)

par(mfrow=c(1,2))

hist(fb)

plot(fb)

dev.off()

## 下面这个图可以看到 Beta-Uniform-Mixture (BUM) 模型的两个参数是如何体现的

plotLLSurface(pval, fb)

scores <- scoreNodes(network=network, fb=fb, fdr=0.001)

## 根据p值来对每个edge打分

network <- rmSelfLoops(network)

## 下面是把网络数据写到txt文档，就可以导入到cytoscape啦！

writeHeinzEdges(network=network, file="lymphoma_edges_001", use.score=FALSE)

writeHeinzNodes(network=network, file="lymphoma_nodes_001", node.scores = scores)

datadir <- file.path(path.package("BioNet"), "extdata")

dir(datadir)

## 本次算法变了：the heinz algorithm is used to calculate the maximum-scoring subnetwork

## 下面的文件需要借助heinz.py脚本生成，这里实例用的是包自带的数据

## 脚本代码是：heinz.py -e lymphoma_edges_001.txt -n lymphoma_nodes_001.txt -N True -E False

module <- readHeinzGraph(node.file=file.path(datadir, "lymphoma_nodes_001.txt.0.hnz"), network=network)

diff <- t.test[, "dm"]

names(diff) <- rownames(t.test)

plotModule(module, diff.expr=diff, scores=scores)

sum(scores[nodes(module)])

sum(scores[nodes(module)]>0)

sum(scores[nodes(module)]<0)

###################################################

### code chunk number 27: Tutorial.Rnw:375-380

###################################################

library(BioNet)

library(DLBCL)

library(ALL)

data(ALL)

data(interactome)

## 这个ALL是另外一个包的数据，基因ID现在还没有，是探针ID，需要转换成BioNet识别的！

mapped.eset <- mapByVar(ALL, network=interactome, attr="geneID")

mapped.eset[1:5,1:5]

length(intersect(rownames(mapped.eset), nodes(interactome)))

network <- subNetwork(rownames(mapped.eset), interactome)

network

network <- largestComp(network)

network <- rmSelfLoops(network)

network

## 这里用limma来做差异分析

library(limma)

design <- model.matrix(~ -1+ factor(c(substr(unlist(ALL$BT), 0, 1))))

colnames(design)<- c("B", "T")

contrast.matrix <- makeContrasts(B-T, levels=design)

contrast.matrix

fit <- lmFit(mapped.eset, design)

fit2 <- contrasts.fit(fit, contrast.matrix)

fit2 <- eBayes(fit2)

pval <- fit2$p.value[,1]

fb <- fitBumModel(pval, plot=FALSE)

dev.new(width=13, height=7)

par(mfrow=c(1,2))

hist(fb)

plot(fb)

scores <- scoreNodes(network=network, fb=fb, fdr=1e-14)

## 还是把网络数据写到本地，供cytoscape导入

writeHeinzEdges(network=network, file="ALL_edges_001", use.score=FALSE)

writeHeinzNodes(network=network, file="ALL_nodes_001", node.scores = scores)

## 还是使用 heinz algorithm is used to calculate the maximum-scoring subnetwork

## A new implementation Heinz v2.0 is also available at https://software.cwi.nl/software/heinz ,

datadir <- file.path(path.package("BioNet"), "extdata")

module <- readHeinzGraph(node.file=file.path(datadir, "ALL_nodes_001.txt.0.hnz"), network=network)

nodeDataDefaults(module, attr="diff") <- ""

nodeData(module, n=nodes(module), attr="diff") <- fit2$coefficients[nodes(module),1]

nodeDataDefaults(module, attr="score") <- ""

nodeData(module, n=nodes(module), attr="score") <- scores[nodes(module)]

nodeData(module)[1]

## 保存为XGMML file，供cytoscape使用

saveNetwork(module, file="ALL_module", type="XGMML")

## 一般来说，红色是上调基因，绿色是下调基因，圆形是得分为正，菱形是得分为负

一	二	三	四	五	六	日
« 九
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28	29
30	31

生信菜鸟团

欢迎去论坛biotrainee.com留言参与讨论，或者关注同名微信公众号biotrainee

用BioNet这个bioconductor包来找 maximal-scoring subgraph