# 院士课题组的WGCNA数据挖掘文章能复现吗

### 首先是差异分析

#### 首先是解析表达量矩阵

``````# 魔幻操作，一键清空
rm(list = ls())
options(stringsAsFactors = F)
library(data.table)
data.table = F)
dim(a1)
a1[1:4,1:4]
a1[(nrow(a1)-5):nrow(a1),1:4]
dim(a1)
# all data is then log2(x+1) transformed.

#length(unique(a1\$AccID))
#length(unique(a1\$GeneName))

mat= a1[,2:ncol(a1)]
mat[1:4,1:4]
mat=mat[1:(nrow(a1)-4),]
mat=ceiling(2^(mat)-1) #log2(x+1) transformed.
mat[1:4,1:4]

rownames(mat) = gsub('[.][0-9]+','',a1\$Ensembl_ID[1:(nrow(a1)-4)])
keep_feature <- rowSums (mat > 1) > 1
colSums(mat)/1000000
table(keep_feature)
mat <- mat[keep_feature, ]
mat[1:4,1:4]
mat=mat[, colSums(mat)/1000000 >10]
dim(mat)
colnames(mat)
ensembl_matrix=mat
colnames(ensembl_matrix)
ensembl_matrix[1:4,1:4]
``````

#### 简单的id转换

``````
library(AnnoProbe)
ids=annoGene(rownames(ensembl_matrix),'ENSEMBL','human')
ids=ids[!duplicated(ids\$SYMBOL),]
ids=ids[!duplicated(ids\$ENSEMBL),]
symbol_matrix= ensembl_matrix[match(ids\$ENSEMBL,
rownames(ensembl_matrix)),]

rownames(symbol_matrix) = ids\$SYMBOL
#symbol_matrix = ensembl_matrix
symbol_matrix[1:4,1:4]
``````

#### 然后确定样品的分组后差异分析

``````library(stringr)
symbol_matrix[1:4,1:4]
colnames(symbol_matrix)
# group_list=ifelse( grepl('PLVX',colnames(symbol_matrix)),'control','case' )
group_list=ifelse(substring(colnames(symbol_matrix),14,15)=='11',
'control','case' )
table(group_list)
group_list = factor(group_list,levels = c('control','case' ))
group_list
# save(symbol_matrix, group_list,
# file='symbol_matrix.Rdata')
colnames(symbol_matrix)

save(symbol_matrix,group_list,file = 'symbol_matrix.Rdata')
``````

``````
source('scripts/step2-qc-counts.R')
source('scripts/step3-deg-deseq2.R')
source('scripts/step3-deg-edgeR.R')
source('scripts/step3-deg-limma-voom.R')
source('scripts/step4-qc-for-deg.R')
source('scripts/step5-anno-by-GSEA.R')
source('scripts/step5-anno-by-ORA.R')
``````

• https://cowtransfer.com/s/e1817982ce974c 点击链接查看 [ 2023-浙江大学李兰娟院士-WGCNA数据挖掘-step1-deg.zip ] ，或访问奶牛快传 cowtransfer.com 输入传输口令 uwiucz 查看；