<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>生信菜鸟团 &#187; 基础软件</title>
	<atom:link href="http://www.bio-info-trainee.com/category/basic-bio-infomatics/basic-software/feed" rel="self" type="application/rss+xml" />
	<link>http://www.bio-info-trainee.com</link>
	<description>欢迎去论坛biotrainee.com留言参与讨论，或者关注同名微信公众号biotrainee</description>
	<lastBuildDate>Sat, 28 Jun 2025 14:30:13 +0000</lastBuildDate>
	<language>zh-CN</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>https://wordpress.org/?v=4.1.33</generator>
	<item>
		<title>snpEff软件提供的vcf报告</title>
		<link>http://www.bio-info-trainee.com/2933.html</link>
		<comments>http://www.bio-info-trainee.com/2933.html#comments</comments>
		<pubDate>Tue, 02 Jan 2018 07:03:13 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2933</guid>
		<description><![CDATA[还是作为图床使用，大家不需要看：]]></description>
				<content:encoded><![CDATA[<p>还是作为图床使用，大家不需要看：</p>
<p><span id="more-2933"></span></p>
<p><a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/1-variant-Summary.png"><img class="alignnone size-full wp-image-2934" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/1-variant-Summary.png" alt="1-variant-summary" width="793" height="452" /></a></p>
<p><a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/2-Variants-rate-details.png"><img class="alignnone size-full wp-image-2935" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/2-Variants-rate-details.png" alt="2-variants-rate-details" width="421" height="585" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/3-Number-variants-by-type.png"><img class="alignnone size-full wp-image-2936" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/3-Number-variants-by-type.png" alt="3-number-variants-by-type" width="215" height="310" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/4-Number-of-effects-by-impact.png"><img class="alignnone size-full wp-image-2937" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/4-Number-of-effects-by-impact.png" alt="4-number-of-effects-by-impact" width="400" height="401" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/5-Number-of-effects-by-type-and-region.png"><img class="alignnone size-full wp-image-2938" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/5-Number-of-effects-by-type-and-region.png" alt="5-number-of-effects-by-type-and-region" width="959" height="612" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/6-Number-of-effects-by-type-and-region.png"><img class="alignnone size-full wp-image-2939" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/6-Number-of-effects-by-type-and-region.png" alt="6-number-of-effects-by-type-and-region" width="935" height="357" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/7-variant-quality-hist.png"><img class="alignnone size-full wp-image-2940" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/7-variant-quality-hist.png" alt="7-variant-quality-hist" width="1081" height="557" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/8-ts-tv-ratio.png"><img class="alignnone size-full wp-image-2941" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/8-ts-tv-ratio.png" alt="8-ts-tv-ratio" width="719" height="616" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/9-snp-96-changes.png"><img class="alignnone size-full wp-image-2942" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/9-snp-96-changes.png" alt="9-snp-96-changes" width="1131" height="749" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/10-Amino-acid-changes.png"><img class="alignnone size-full wp-image-2943" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/10-Amino-acid-changes.png" alt="10-amino-acid-changes" width="1140" height="751" /></a> <a href="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/11-Variants-by-chromosome-histogram.png"><img class="alignnone size-full wp-image-2944" src="http://www.bio-info-trainee.com/wp-content/uploads/2018/01/11-Variants-by-chromosome-histogram.png" alt="11-variants-by-chromosome-histogram" width="1144" height="817" /></a></p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2933.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>PRINSEQ软件使用说明</title>
		<link>http://www.bio-info-trainee.com/2520.html</link>
		<comments>http://www.bio-info-trainee.com/2520.html#comments</comments>
		<pubDate>Sat, 02 Sep 2017 12:14:44 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2520</guid>
		<description><![CDATA[PRINSEQ软件使用说明 PRINSEQ全称是PReprocessing an &#8230; <a href="http://www.bio-info-trainee.com/2520.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<h1 class="md-end-block md-heading"><span class="">PRINSEQ软件使用说明</span></h1>
<p><span class="md-line md-end-block">PRINSEQ全称是PReprocessing and INformation of SEQuences，下面是关于这个软件工具的一些链接：</span></p>
<p><span class="md-line md-end-block"><span class=""><a spellcheck="false" href="http://prinseq.sourceforge.net/index.html">主页</a></span> —&gt; <span class=""><a spellcheck="false" href="http://edwards.sdsu.edu/prinseq_beta/">在线</a></span>—&gt;<span class=""><a spellcheck="false" href="http://sourceforge.net/projects/prinseq/files/">下载</a></span>—&gt;<span class=""><a spellcheck="false" href="http://prinseq.sourceforge.net/faq.html">FAQ</a></span> —&gt;<span class=""><a spellcheck="false" href="http://prinseq.sourceforge.net/manual.html">手册</a></span></span><span id="more-2520"></span></p>
<p><span class="md-line md-end-block"><span class="">它是这样介绍自己的：</span></span></p>
<p><span class="md-line md-end-block">PRINSEQ can be used to filter, reformat, or trim your genomic and metagenomic sequence data. It generates summary statistics of your sequences in graphical and tabular format. It is easily configurable and provides a user-friendly interface.</span></p>
<p><span class="md-line md-end-block md-focus"><span class="md-expand">是用perl语言写的一些脚本集合，</span><span class=""><strong>不依赖于其它perl模块</strong></span>，所以安装非常方便，功能大概就像是fastqc和fastx-toolkit的合集，是2011年发表的工具了，但是不知道为什么它不是很出名。</span></p>
<p><span class="md-line md-end-block">首先安装该软件：</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">cd ~/biosoft
mkdir PRINSEQ &amp;&amp;  cd PRINSEQ
wget  https://sourceforge.net/projects/prinseq/files/standalone/prinseq-lite-0.20.4.tar.gz
tar zxvf  prinseq-lite-0.20.4.tar.gz</pre>
<p><span class="md-line md-end-block">用法很简单：</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">perl prinseq-lite.pl -verbose -fastq test.fq -graph_data test.gd -out_good null -out_bad null
perl prinseq-graphs.pl -i test.gd -png_all -o test
perl prinseq-graphs.pl -i test.gd -html_all -o test</pre>
<p><span class="md-line md-end-block">我也简单测试了一下自己的数据</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">perl ~/biosoft/PRINSEQ/prinseq-lite-0.20.4/prinseq-lite.pl -verbose -phred64 -graph_data test.gd -out_good null -out_bad null  -fastq &lt;( zcat NPC10F-N_1.fastq.gz ) -fastq2  &lt;( zcat NPC10F-N_2.fastq.gz ) 
perl ~/biosoft/PRINSEQ/prinseq-lite-0.20.4/prinseq-graphs.pl -i test.gd -png_all -o test
perl ~/biosoft/PRINSEQ/prinseq-lite-0.20.4/prinseq-graphs.pl -i test.gd -html_all -o test</pre>
<p><span class="md-line md-end-block">我想它之所以不流行，就是因为它对fastq文件的支持性太差了，还需要用户自己解压开gz格式的文件，这对很多人来说是一个挑战。</span></p>
<p><span class="md-line md-end-block">分析的确不需要perl模块，但是绘图是需要一些特殊模块的，包括：</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">   Getopt::Long
   Pod::Usage
   File::Temp qw(tempfile)
   Fcntl qw(:flock SEEK_END)
   Cwd
   JSON
   Cairo
   Statistics::PCA
   MIME::Base64</pre>
<p><span class="md-line md-end-block">出网页包括也需要一些perl模块，如下：</span></p>
<pre class="md-fences md-end-block" lang="shell" contenteditable="false">   CGI
   File::Path
   IO::Uncompress::AnyUncompress
   LWP::Simple
   File::Copy
   File::Basename</pre>
<p><span class="md-line md-end-block">这就是大家为什么不喜欢用这个软件的原因了吧，还有一些特殊要求我都懒得讲解了，<span spellcheck="false"><a href="https://sourceforge.net/projects/prinseq/files/">https://sourceforge.net/projects/prinseq/files/</a></span><span class=""> 自行阅读哈。</span></span></p>
<p><span class="md-line md-end-block">但是它有两个值得一提的功能！</span></p>
<h2 class="md-end-block md-heading"><span class="">测序污染序列PCA分析</span></h2>
<p><span class="md-line md-end-block">很多情况下提取的DNA会混杂有其它物种，对下游分析不利，这个时候就需要仔细检查了，PRINSEQ软件正好派上用场。</span></p>
<p><span class="md-line md-end-block">不过我没看懂那个图!需要去看一篇文章 Willner D, Thurber RV, Rohwer F: <span class=""><strong>Metagenomic signatures of 86 microbial and viral metagenomes</strong></span><span class="">. Environ. Microbiol 2009.</span></span></p>
<p><span class="md-line md-end-block"><span class="md-image md-img-loaded" contenteditable="false" data-src="http://prinseq.sourceforge.net/images/contam_1.png"><img src="http://prinseq.sourceforge.net/images/contam_1.png" alt="" /></span></span></p>
<h2 class="md-end-block md-heading">组装基因组的N50等分析</h2>
<p><span class="md-line md-end-block">有参数 <span class=""><strong>-stats_assembly</strong></span> 可以选择!</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">perl ~/biosoft/PRINSEQ/prinseq-lite-0.20.4/prinseq-lite.pl -verbose -fasta output_prefix.contigs.fa  -stats_assembly</pre>
<p><span class="md-line md-end-block">就是给出一些指标，如下；</span></p>
<pre class="md-fences md-end-block" lang="" contenteditable="false">
stats_assembly  N50 176
stats_assembly  N75 113
stats_assembly  N90 78
stats_assembly  N95 70</pre>
<p>&nbsp;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2520.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>我的github极简指南</title>
		<link>http://www.bio-info-trainee.com/2477.html</link>
		<comments>http://www.bio-info-trainee.com/2477.html#comments</comments>
		<pubDate>Tue, 11 Jul 2017 01:16:25 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>
		<category><![CDATA[未分类]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2477</guid>
		<description><![CDATA[github极简指南 入生信的坑已经3年多了，但是开始github的旅程才一年多 &#8230; <a href="http://www.bio-info-trainee.com/2477.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<div class="markdown-here-wrapper" data-md-url="http://www.bio-info-trainee.com/wp-admin/post.php?post=2477&amp;action=edit">
<p style="margin: 0px 0px 1.2em !important;">github极简指南</p>
<blockquote style="margin: 1.2em 0px; border-left: 4px solid #dddddd; padding: 0px 1em; color: #777777; quotes: none;">
<p style="margin: 0px 0px 1.2em !important;">入生信的坑已经3年多了，但是开始github的旅程才一年多，起初主要是为了建立bioconductor中文社区而学习的，现在也在自己的github上面分享了不少代码，有一些心得体会，欢迎大家前往github <a href="https://github.com/jmzeng1314/NGS-pipeline">star我的项目</a></p>
</blockquote>
<p style="margin: 0px 0px 1.2em !important;"><span id="more-2477"></span></p>
<p style="margin: 0px 0px 1.2em !important;">当初想了解github的时候看到过不少教程，始终觉得不够透彻，还是分享一下自己的心得吧。<br />
首先要明白为什么要用github，一般就4类需求啦：</p>
<ul style="margin: 1.2em 0px; padding-left: 2em;">
<li style="margin: 0.5em 0px;">仅仅是为了查看拷贝别人的代码，那么其实没必要用github，下载代码即可。</li>
<li style="margin: 0.5em 0px;">需要分享代码，那么创建一个账户把代码上传即可。</li>
<li style="margin: 0.5em 0px;">一个长期的编程项目，就略微有点麻烦，会涉及到代码备份，回滚，撤销等各种git指令，其实大部分人不需要学这东西。我就从来没有用过回滚操作。</li>
<li style="margin: 0.5em 0px;">如果是团队合作，那么更加复杂了，需要买一本git操作书籍开学习，并且经常团队会议，学习几十个小时才行。</li>
</ul>
<p style="margin: 0px 0px 1.2em !important;">大部分人只需要学会把自己的电脑跟自己的github账户关联，然后新建一个git项目跟github里面的仓库关联即可。</p>
<h2 id="-git-clone-git-" style="margin: 1.3em 0px 1em; padding: 0px; font-weight: bold; font-size: 1.4em; border-bottom: 1px solid #eeeeee;">用<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">git clone</code>复制一个 Git 仓库</h2>
<p style="margin: 0px 0px 1.2em !important;">大多数人的需求仅止于此啦，就是想复制一个项目，看看代码，你就可以克隆那个项目。 在终端执行 git clone [url]，[url] 为你想要复制的项目，就可以了，比如下面：<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github5.png" alt="" /><br />
上述操作将复制该项目的全部记录，让你本地拥有这些。并且该操作将拷贝该项目的主分支， 使你能够查看代码，或编辑、修改。</p>
<p style="margin: 0px 0px 1.2em !important;">默认情况下，Git 会按照你提供的 URL 所指示的项目的名称创建你的本地项目目录。 通常就是该 URL 最后一个 / 之后的任何东西。</p>
<p style="margin: 0px 0px 1.2em !important;">这个时候跟github本身没有关系，因为你只是下载了文件而已，并没有涉及到提交代码。<strong>如果要提交代码，需要把自己的电脑跟github关联，并且关联指定的仓库。</strong></p>
<h2 id="-git-github-" style="margin: 1.3em 0px 1em; padding: 0px; font-weight: bold; font-size: 1.4em; border-bottom: 1px solid #eeeeee;">安装git，然后把自己的电脑关联自己的github账户：</h2>
<blockquote style="margin: 1.2em 0px; border-left: 4px solid #dddddd; padding: 0px 1em; color: #777777; quotes: none;">
<p style="margin: 0px 0px 1.2em !important;">去<a href="https://github.com/">github官网</a>创建账号的教程我就不写了，有了账号密码就需要跟自己的电脑关联起来，这样github网页才会认可你，允许你上传你的代码。</p>
</blockquote>
<p style="margin: 0px 0px 1.2em !important;">如果是windows电脑，那么需要下载git软件才可以,软件下载地址<a href="http://git-scm.com/">http://git-scm.com/</a>,安装好git就可以打开终端了。</p>
<p style="margin: 0px 0px 1.2em !important;">如果是mac或者linux，那么打开终端即可，一般都默认安装了git软件。</p>
<p style="margin: 0px 0px 1.2em !important;">终端，就是下面这样的黑白命令行,打开之后在终端里面运行命令： <code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">ssh-keygen -t rsa -C "jmzeng1314@163.com"</code>(替换成自己的github注册邮箱)</p>
<p style="margin: 0px 0px 1.2em !important;">可以看到<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">home目录</code>下面多了一个.ssh文件夹<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github1.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">用notepad++等高级文本编辑器打开那个public key文件，把里面的内容复制到自己的<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">github网页</code>里面的ssh keys里面<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github2.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">然后就成功啦,如下<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">ssh -T git@github.com</code>，可以用测试一下<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github3.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">配置本地用户和邮箱</p>
<p style="margin: 0px 0px 1.2em !important;">用户名邮箱作用 : 我们需要设置一个<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">用户名</code>和<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">邮箱</code>, 这是用来上传本地仓库到GitHub中, 在GitHub中显示代码上传者;</p>
<p style="margin: 0px 0px 1.2em !important;">使用命令 :</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">git config --global user.name "jmzeng1314" //设置用户名 
git config --global user.email "jmzeng1314@163.com" //设置邮箱
</code></pre>
<p style="margin: 0px 0px 1.2em !important;"><img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github4.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">到此Git客户端已安装及GitHub配置完成，现在可以给GitHub传输代码了。</p>
<h2 id="-github-" style="margin: 1.3em 0px 1em; padding: 0px; font-weight: bold; font-size: 1.4em; border-bottom: 1px solid #eeeeee;">客户端把本地文件夹和github仓库关联</h2>
<p style="margin: 0px 0px 1.2em !important;">github的客户端非常之多，很多人喜欢github desktop，不过我比较熟悉的Rstudio，因为我喜欢R语言。</p>
<p style="margin: 0px 0px 1.2em !important;">Rstudio客户端的global菜单里面有设置github账号关联的方法,因为我们电脑本来就已经关联了，这个就略过哈。</p>
<p style="margin: 0px 0px 1.2em !important;">首先在自己的github网页里面新建同样的空的project，然后去自己刚才在本机用Rstudio新建的文件夹里面：<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github6.png" alt="" /><br />
因为是空白仓库，所以直接进入终端，然后进入项目文件夹里面把本地文件上传到指定的即可。<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github7.png" alt="" /></p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$git init //初始化
$git add . //把所有文件加入到索引（不想把所有文件加入，可以用gitignore或add 具体文件，见下文)
$git commit //提交到本地仓库，然后会填写更新日志($git commit -m “my first vesion of ...”)
$git remote add origin https://github.com/jmzeng1314/test.git //增加到remote
$git push origin master //push到github上
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">记住要在github网站里面新建的是空白的仓库哦。</p>
<p style="margin: 0px 0px 1.2em !important;">这样就把网页版github和本地的文件夹联系起来了，以后要修改了这个程序，只需要点击commit+push即可，如果是网页版的程序被修改了，就先pull一下。<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github8.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">当然，其实对大部分人来说，意义不大，因为大家喜欢命令行的，不是很喜欢这个鼠标点击来进行同步。命令行就需要继续看下面的教程啦。</p>
<h2 id="-github-" style="margin: 1.3em 0px 1em; padding: 0px; font-weight: bold; font-size: 1.4em; border-bottom: 1px solid #eeeeee;">命令行把本地文件夹和github仓库关联</h2>
<p style="margin: 0px 0px 1.2em !important;">首先在自己的github网页里面新建一个空的仓库，然后运行下面的代码即可！</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">cd ~/test //到test目录,本地目录名与repository的名字不一定相同
git init ##初始化
git add . ##把所有文件加入到索引（不想把所有文件加入，可以用gitignore或add 具体文件，见下文)
git commit -m 'aha' ##提交到本地仓库，然后会填写更新日志($git commit -m “my first vesion of ...”)
git remote add origin https://github.com/jmzeng1314/test.git ##增加到remote
git push origin master ##push到github上
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">记住一定要是空白的仓库哦，如果万一你新建仓库的同时建立了readme文件，就直接clone好了，走下面的流程。</p>
<h2 id="github-" style="margin: 1.3em 0px 1em; padding: 0px; font-weight: bold; font-size: 1.4em; border-bottom: 1px solid #eeeeee;">github进阶操作。</h2>
<blockquote style="margin: 1.2em 0px; border-left: 4px solid #dddddd; padding: 0px 1em; color: #777777; quotes: none;">
<p style="margin: 0px 0px 1.2em !important;">其实就是本地的代码有所修改，需要同步到github而已，又或者github网页里面的代码被修改了，需要同步到本地。如果是多个人合作，那么别人会修改你的代码，所以每次你上传代码之前，都需要先把github网页里面的代码先拉下了，再合并后上传自己的。</p>
</blockquote>
<p style="margin: 0px 0px 1.2em !important;">1.更新项目（新加了文件），<code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0px 0.3em; white-space: pre-wrap; border: 1px solid #eaeaea; background-color: #f8f8f8; border-radius: 3px; display: inline;">这个是最高频需求</code></p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$cd ~/hello-world
$git add . //这样可以自动判断新加了哪些文件，或者手动加入文件名字
$git commit //提交到本地仓库，不加参数会提示，注意:^=Ctrl，按照提示来就好了～～～
$git push origin master //不是新创建的，不用再add 到remote上了
</code></pre>
<p style="margin: 0px 0px 1.2em !important;"><img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github9.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">2.更新项目（没新加文件，只有删除或者修改文件）：<br />
$cd ~/hello-world<br />
$git commit -a //记录删除或修改了哪些文件<br />
$git push origin master //提交到github</p>
<p style="margin: 0px 0px 1.2em !important;">3.忽略一些文件，比如*.o等:</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$cd ~/hello-world
$vim .gitignore //把文件类型加入到.gitignore中，保存
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">然后就可以git add . 能自动过滤这种文件</p>
<p style="margin: 0px 0px 1.2em !important;">4.clone代码到本地：</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$git clone git@github.com:WadeLeng/hello-world.git
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">5.假如本地已经存在了代码，而仓库里有更新，把更改的合并到本地的项目：</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$git fetch origin //获取远程更新
$git merge origin/master //把更新的内容合并到本地分支
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">这个是最高频需求<br />
<img src="http://www.bio-info-trainee.com/wp-content/uploads/2017/07/github10.png" alt="" /></p>
<p style="margin: 0px 0px 1.2em !important;">6.撤销</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$git reset
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">7.删除</p>
<pre style="font-size: 1em; font-family: Consolas, Inconsolata, Courier, monospace; line-height: 1.2em; margin: 1.2em 0px;"><code style="font-size: 0.85em; font-family: Consolas, Inconsolata, Courier, monospace; margin: 0px 0.15em; padding: 0.5em 0.7em; white-space: pre; border: 1px solid #cccccc; background-color: #f8f8f8; border-radius: 3px; display: block !important; overflow: auto;">$git rm * // 不是用rm
</code></pre>
<p style="margin: 0px 0px 1.2em !important;">//———————————————常见错误—————————————————-<br />
1.$ git remote add origin git@github.com:WadeLeng/hello-world.git<br />
错误提示：fatal: remote origin already exists.<br />
解决办法：$ git remote rm origin<br />
然后在执行：$ git remote add origin git@github.com:WadeLeng/hello-world.git 就不会报错误了</p>
<ol style="margin: 1.2em 0px; padding-left: 2em;">
<li style="margin: 0.5em 0px;">$ git push origin master<br />
错误提示：error:failed to push som refs to<br />
解决办法：$ git pull origin master //先把远程服务器github上面的文件拉先来，再push 上去。<br />
//———————————————————————————————————————</li>
</ol>
<p style="margin: 0px 0px 1.2em !important;">装逼请看这个：<a href="http://www.oschina.net/question/1397765_166368">http://www.oschina.net/question/1397765_166368</a><br />
安装git工具看这个：<a href="http://www.ihref.com/read-16377.html">http://www.ihref.com/read-16377.html</a><br />
上传自己的代码看这个：<a href="http://blog.csdn.net/hanhailong726188/article/details/46738929">http://blog.csdn.net/hanhailong726188/article/details/46738929</a><br />
一些开发过程的注意事项：<a href="http://blog.csdn.net/u011068702/article/details/49531167">http://blog.csdn.net/u011068702/article/details/49531167</a><br />
简介完整教程：<a href="http://caibaojian.com/use-github.html">http://caibaojian.com/use-github.html</a></p>
<p style="margin: 0px 0px 1.2em !important;"><a href="https://www.r-bloggers.com/rstudio-and-github/">https://www.r-bloggers.com/rstudio-and-github/</a><br />
<a href="http://r-bio.github.io/intro-git-rstudio/">http://r-bio.github.io/intro-git-rstudio/</a></p>
<div style="height: 0; width: 0; max-height: 0; max-width: 0; overflow: hidden; font-size: 0em; padding: 0; margin: 0;" title="MDH:PHA+Z2l0aHVi5p6B566A5oyH5Y2XPGJyPjwvcD48cD4mZ3Q7IOWFpeeUn+S/oeeahOWdkeW3sue7
jzPlubTlpJrkuobvvIzkvYbmmK/lvIDlp4tnaXRodWLnmoTml4XnqIvmiY3kuIDlubTlpJrvvIzo
tbfliJ3kuLvopoHmmK/kuLrkuoblu7rnq4tiaW9jb25kdWN0b3LkuK3mlofnpL7ljLrogIzlrabk
uaDnmoTvvIznjrDlnKjkuZ/lnKjoh6rlt7HnmoRnaXRodWLkuIrpnaLliIbkuqvkuobkuI3lsJHk
u6PnoIHvvIzmnInkuIDkupvlv4PlvpfkvZPkvJrvvIzmrKLov47lpKflrrbliY3lvoBnaXRodWIg
W3N0YXLmiJHnmoTpobnnm65dKGh0dHBzOi8vZ2l0aHViLmNvbS9qbXplbmcxMzE0L05HUy1waXBl
bGluZSk8L3A+PHA+5b2T5Yid5oOz5LqG6KejZ2l0aHVi55qE5pe25YCZ55yL5Yiw6L+H5LiN5bCR
5pWZ56iL77yM5aeL57uI6KeJ5b6X5LiN5aSf6YCP5b2777yM6L+Y5piv5YiG5Lqr5LiA5LiL6Ieq
5bex55qE5b+D5b6X5ZCn44CCPGJyPummluWFiOimgeaYjueZveS4uuS7gOS5iOimgeeUqGdpdGh1
Yu+8jOS4gOiIrOWwsTTnsbvpnIDmsYLllabvvJo8L3A+PHA+KiDku4Xku4XmmK/kuLrkuobmn6Xn
nIvmi7fotJ3liKvkurrnmoTku6PnoIHvvIzpgqPkuYjlhbblrp7msqHlv4XopoHnlKhnaXRodWLv
vIzkuIvovb3ku6PnoIHljbPlj6/jgII8YnI+KiDpnIDopoHliIbkuqvku6PnoIHvvIzpgqPkuYjl
iJvlu7rkuIDkuKrotKbmiLfmiorku6PnoIHkuIrkvKDljbPlj6/jgII8YnI+KiDkuIDkuKrplb/m
nJ/nmoTnvJbnqIvpobnnm67vvIzlsLHnlaXlvq7mnInngrnpurvng6bvvIzkvJrmtonlj4rliLDk
u6PnoIHlpIfku73vvIzlm57mu5rvvIzmkqTplIDnrYnlkITnp41naXTmjIfku6TvvIzlhbblrp7l
pKfpg6jliIbkurrkuI3pnIDopoHlrabov5nkuJzopb/jgILmiJHlsLHku47mnaXmsqHmnInnlKjo
v4flm57mu5rmk43kvZzjgII8YnI+KiDlpoLmnpzmmK/lm6LpmJ/lkIjkvZzvvIzpgqPkuYjmm7Tl
iqDlpI3mnYLkuobvvIzpnIDopoHkubDkuIDmnKxnaXTmk43kvZzkuabnsY3lvIDlrabkuaDvvIzl
ubbkuJTnu4/luLjlm6LpmJ/kvJrorq7vvIzlrabkuaDlh6DljYHkuKrlsI/ml7bmiY3ooYzjgII8
L3A+PHA+5aSn6YOo5YiG5Lq65Y+q6ZyA6KaB5a2m5Lya5oqK6Ieq5bex55qE55S16ISR6Lef6Ieq
5bex55qEZ2l0aHVi6LSm5oi35YWz6IGU77yM54S25ZCO5paw5bu65LiA5LiqZ2l06aG555uu6Lef
Z2l0aHVi6YeM6Z2i55qE5LuT5bqT5YWz6IGU5Y2z5Y+v44CCPC9wPjxwPiMjIOeUqGBgZ2l0IGNs
b25lYGDlpI3liLbkuIDkuKogR2l0IOS7k+W6kzwvcD48cD7lpKflpJrmlbDkurrnmoTpnIDmsYLk
u4XmraLkuo7mraTllabvvIzlsLHmmK/mg7PlpI3liLbkuIDkuKrpobnnm67vvIznnIvnnIvku6Pn
oIHvvIzkvaDlsLHlj6/ku6XlhYvpmobpgqPkuKrpobnnm67jgIIg5Zyo57uI56uv5omn6KGMIGdp
dCBjbG9uZSBbdXJsXe+8jFt1cmxdIOS4uuS9oOaDs+imgeWkjeWItueahOmhueebru+8jOWwseWP
r+S7peS6hu+8jOavlOWmguS4i+mdou+8mjxicj4hW10oaHR0cDovL3d3dy5iaW8taW5mby10cmFp
bmVlLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxNy8wNy9naXRodWI1LnBuZyk8YnI+5LiK6L+w
5pON5L2c5bCG5aSN5Yi26K+l6aG555uu55qE5YWo6YOo6K6w5b2V77yM6K6p5L2g5pys5Zyw5oul
5pyJ6L+Z5Lqb44CC5bm25LiU6K+l5pON5L2c5bCG5ou36LSd6K+l6aG555uu55qE5Li75YiG5pSv
77yMIOS9v+S9oOiDveWkn+afpeeci+S7o+egge+8jOaIlue8lui+keOAgeS/ruaUueOAgjwvcD48
cD7pu5jorqTmg4XlhrXkuIvvvIxHaXQg5Lya5oyJ54Wn5L2g5o+Q5L6b55qEIFVSTCDmiYDmjIfn
pLrnmoTpobnnm67nmoTlkI3np7DliJvlu7rkvaDnmoTmnKzlnLDpobnnm67nm67lvZXjgIIg6YCa
5bi45bCx5piv6K+lIFVSTCDmnIDlkI7kuIDkuKogLyDkuYvlkI7nmoTku7vkvZXkuJzopb/jgII8
L3A+PHA+6L+Z5Liq5pe25YCZ6LefZ2l0aHVi5pys6Lqr5rKh5pyJ5YWz57O777yM5Zug5Li65L2g
5Y+q5piv5LiL6L295LqG5paH5Lu26ICM5bey77yM5bm25rKh5pyJ5raJ5Y+K5Yiw5o+Q5Lqk5Luj
56CB44CCKirlpoLmnpzopoHmj5DkuqTku6PnoIHvvIzpnIDopoHmioroh6rlt7HnmoTnlLXohJHo
t59naXRodWLlhbPogZTvvIzlubbkuJTlhbPogZTmjIflrprnmoTku5PlupPjgIIqKjwvcD48cD4j
IyDlronoo4VnaXTvvIznhLblkI7mioroh6rlt7HnmoTnlLXohJHlhbPogZToh6rlt7HnmoRnaXRo
dWLotKbmiLfvvJo8L3A+PHA+Jmd0OyDljrtbZ2l0aHVi5a6Y572RXShodHRwczovL2dpdGh1Yi5j
b20vKeWIm+W7uui0puWPt+eahOaVmeeoi+aIkeWwseS4jeWGmeS6hu+8jOacieS6hui0puWPt+Wv
hueggeWwsemcgOimgei3n+iHquW3seeahOeUteiEkeWFs+iBlOi1t+adpe+8jOi/meagt2dpdGh1
Yue9kemhteaJjeS8muiupOWPr+S9oO+8jOWFgeiuuOS9oOS4iuS8oOS9oOeahOS7o+eggeOAgjwv
cD48cD7lpoLmnpzmmK93aW5kb3dz55S16ISR77yM6YKj5LmI6ZyA6KaB5LiL6L29Z2l06L2v5Lu2
5omN5Y+v5LulLOi9r+S7tuS4i+i9veWcsOWdgFtodHRwOi8vZ2l0LXNjbS5jb20vXShodHRwOi8v
Z2l0LXNjbS5jb20vKSzlronoo4Xlpb1naXTlsLHlj6/ku6XmiZPlvIDnu4jnq6/kuobjgII8L3A+
PHA+5aaC5p6c5pivbWFj5oiW6ICFbGludXjvvIzpgqPkuYjmiZPlvIDnu4jnq6/ljbPlj6/vvIzk
uIDoiKzpg73pu5jorqTlronoo4XkuoZnaXTova/ku7bjgII8L3A+PHA+57uI56uv77yM5bCx5piv
5LiL6Z2i6L+Z5qC355qE6buR55m95ZG95Luk6KGMLOaJk+W8gOS5i+WQjuWcqOe7iOerr+mHjOmd
oui/kOihjOWRveS7pO+8miBgYHNzaC1rZXlnZW4gLXQgcnNhIC1DICJqbXplbmcxMzE0QDE2My5j
b20iIGBgKOabv+aNouaIkOiHquW3seeahGdpdGh1YuazqOWGjOmCrueusSk8L3A+PHA+5Y+v5Lul
55yL5YiwYGBob21l55uu5b2VYGDkuIvpnaLlpJrkuobkuIDkuKouc3No5paH5Lu25aS5PGJyPiFb
XShodHRwOi8vd3d3LmJpby1pbmZvLXRyYWluZWUuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDE3
LzA3L2dpdGh1YjEucG5nKTwvcD48cD48YnI+55Sobm90ZXBhZCsr562J6auY57qn5paH5pys57yW
6L6R5Zmo5omT5byA6YKj5LiqcHVibGljIGtleeaWh+S7tu+8jOaKiumHjOmdoueahOWGheWuueWk
jeWItuWIsOiHquW3seeahGBgZ2l0aHVi572R6aG1YGDph4zpnaLnmoRzc2gga2V5c+mHjOmdojxi
cj4hW10oaHR0cDovL3d3dy5iaW8taW5mby10cmFpbmVlLmNvbS93cC1jb250ZW50L3VwbG9hZHMv
MjAxNy8wNy9naXRodWIyLnBuZyk8L3A+PHA+54S25ZCO5bCx5oiQ5Yqf5ZWmLOWmguS4i2Bgc3No
IC1UIGdpdEBnaXRodWIuY29tYGDvvIzlj6/ku6XnlKjmtYvor5XkuIDkuIs8YnI+IVtdKGh0dHA6
Ly93d3cuYmlvLWluZm8tdHJhaW5lZS5jb20vd3AtY29udGVudC91cGxvYWRzLzIwMTcvMDcvZ2l0
aHViMy5wbmcpPC9wPjxwPumFjee9ruacrOWcsOeUqOaIt+WSjOmCrueusTwvcD48cD7nlKjmiLfl
kI3pgq7nrrHkvZznlKggOiDmiJHku6zpnIDopoHorr7nva7kuIDkuKpgYOeUqOaIt+WQjWBg5ZKM
YGDpgq7nrrFgYCwg6L+Z5piv55So5p2l5LiK5Lyg5pys5Zyw5LuT5bqT5YiwR2l0SHVi5LitLCDl
nKhHaXRIdWLkuK3mmL7npLrku6PnoIHkuIrkvKDogIU7PC9wPjxwPuS9v+eUqOWRveS7pCA6PGJy
PmBgYDxicj5naXQgY29uZmlnIC0tZ2xvYmFsIHVzZXIubmFtZSAiam16ZW5nMTMxNCIgLy/orr7n
va7nlKjmiLflkI0gPGJyPmdpdCBjb25maWcgLS1nbG9iYWwgdXNlci5lbWFpbCAiam16ZW5nMTMx
NEAxNjMuY29tIiAvL+iuvue9rumCrueusTxicj5gYGA8YnI+IVtdKGh0dHA6Ly93d3cuYmlvLWlu
Zm8tdHJhaW5lZS5jb20vd3AtY29udGVudC91cGxvYWRzLzIwMTcvMDcvZ2l0aHViNC5wbmcpPC9w
PjxwPuWIsOatpEdpdOWuouaIt+err+W3suWuieijheWPikdpdEh1YumFjee9ruWujOaIkO+8jOeO
sOWcqOWPr+S7pee7mUdpdEh1YuS8oOi+k+S7o+eggeS6huOAgjwvcD48cD4jIyDlrqLmiLfnq6/m
iormnKzlnLDmlofku7blpLnlkoxnaXRodWLku5PlupPlhbPogZQ8YnI+Z2l0aHVi55qE5a6i5oi3
56uv6Z2e5bi45LmL5aSa77yM5b6I5aSa5Lq65Zac5qyiZ2l0aHViIGRlc2t0b3DvvIzkuI3ov4fm
iJHmr5TovoPnhp/mgonnmoRSc3R1ZGlv77yM5Zug5Li65oiR5Zac5qyiUuivreiogOOAgjwvcD48
cD5Sc3R1ZGlv5a6i5oi356uv55qEZ2xvYmFs6I+c5Y2V6YeM6Z2i5pyJ6K6+572uZ2l0aHVi6LSm
5Y+35YWz6IGU55qE5pa55rOVLOWboOS4uuaIkeS7rOeUteiEkeacrOadpeWwseW3sue7j+WFs+iB
lOS6hu+8jOi/meS4quWwseeVpei/h+WTiOOAgjwvcD48cD7pppblhYjlnKjoh6rlt7HnmoRnaXRo
dWLnvZHpobXph4zpnaLmlrDlu7rlkIzmoLfnmoTnqbrnmoRwcm9qZWN077yM54S25ZCO5Y676Ieq
5bex5Yia5omN5Zyo5pys5py655SoUnN0dWRpb+aWsOW7uueahOaWh+S7tuWkuemHjOmdou+8mjxi
cj4hW10oaHR0cDovL3d3dy5iaW8taW5mby10cmFpbmVlLmNvbS93cC1jb250ZW50L3VwbG9hZHMv
MjAxNy8wNy9naXRodWI2LnBuZyk8YnI+5Zug5Li65piv56m655m95LuT5bqT77yM5omA5Lul55u0
5o6l6L+b5YWl57uI56uv77yM54S25ZCO6L+b5YWl6aG555uu5paH5Lu25aS56YeM6Z2i5oqK5pys
5Zyw5paH5Lu25LiK5Lyg5Yiw5oyH5a6a55qE5Y2z5Y+v44CCPGJyPiFbXShodHRwOi8vd3d3LmJp
by1pbmZvLXRyYWluZWUuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDE3LzA3L2dpdGh1YjcucG5n
KTxicj5gYGA8YnI+JGdpdCBpbml0IC8v5Yid5aeL5YyWPGJyPiRnaXQgYWRkIC4gLy/miormiYDm
nInmlofku7bliqDlhaXliLDntKLlvJXvvIjkuI3mg7PmiormiYDmnInmlofku7bliqDlhaXvvIzl
j6/ku6XnlKhnaXRpZ25vcmXmiJZhZGQg5YW35L2T5paH5Lu277yM6KeB5LiL5paHKTxicj4kZ2l0
IGNvbW1pdCAvL+aPkOS6pOWIsOacrOWcsOS7k+W6k++8jOeEtuWQjuS8muWhq+WGmeabtOaWsOaX
peW/lygkZ2l0IGNvbW1pdCAtbSDigJxteSBmaXJzdCB2ZXNpb24gb2YgLi4u4oCdKTxicj4kZ2l0
IHJlbW90ZSBhZGQgb3JpZ2luIGh0dHBzOi8vZ2l0aHViLmNvbS9qbXplbmcxMzE0L3Rlc3QuZ2l0
IC8v5aKe5Yqg5YiwcmVtb3RlPGJyPiRnaXQgcHVzaCBvcmlnaW4gbWFzdGVyIC8vcHVzaOWIsGdp
dGh1YuS4ijxicj5gYGA8YnI+6K6w5L2P6KaB5ZyoZ2l0aHVi572R56uZ6YeM6Z2i5paw5bu655qE
5piv56m655m955qE5LuT5bqT5ZOm44CCPC9wPjxwPui/meagt+WwseaKiue9kemhteeJiGdpdGh1
YuWSjOacrOWcsOeahOaWh+S7tuWkueiBlOezu+i1t+adpeS6hu+8jOS7peWQjuimgeS/ruaUueS6
hui/meS4queoi+W6j++8jOWPqumcgOimgeeCueWHu2NvbW1pdCtwdXNo5Y2z5Y+v77yM5aaC5p6c
5piv572R6aG154mI55qE56iL5bqP6KKr5L+u5pS55LqG77yM5bCx5YWIcHVsbOS4gOS4i+OAgjxi
cj4hW10oaHR0cDovL3d3dy5iaW8taW5mby10cmFpbmVlLmNvbS93cC1jb250ZW50L3VwbG9hZHMv
MjAxNy8wNy9naXRodWI4LnBuZyk8L3A+PHA+5b2T54S277yM5YW25a6e5a+55aSn6YOo5YiG5Lq6
5p2l6K+077yM5oSP5LmJ5LiN5aSn77yM5Zug5Li65aSn5a625Zac5qyi5ZG95Luk6KGM55qE77yM
5LiN5piv5b6I5Zac5qyi6L+Z5Liq6byg5qCH54K55Ye75p2l6L+b6KGM5ZCM5q2l44CC5ZG95Luk
6KGM5bCx6ZyA6KaB55yL5LiL6Z2i55qE5pWZ56iL5ZWm44CCPGJyPltodHRwczovL3d3dy5yLWJs
b2dnZXJzLmNvbS9yc3R1ZGlvLWFuZC1naXRodWIvXShodHRwczovL3d3dy5yLWJsb2dnZXJzLmNv
bS9yc3R1ZGlvLWFuZC1naXRodWIvKTxicj5baHR0cDovL3ItYmlvLmdpdGh1Yi5pby9pbnRyby1n
aXQtcnN0dWRpby9dKGh0dHA6Ly9yLWJpby5naXRodWIuaW8vaW50cm8tZ2l0LXJzdHVkaW8vKTwv
cD48cD4jIyDlkb3ku6TooYzmiormnKzlnLDmlofku7blpLnlkoxnaXRodWLku5PlupPlhbPogZQ8
L3A+PHA+6aaW5YWI5Zyo6Ieq5bex55qEZ2l0aHVi572R6aG16YeM6Z2i5paw5bu65LiA5Liq56m6
55qE5LuT5bqT77yM54S25ZCO6L+Q6KGM5LiL6Z2i55qE5Luj56CB5Y2z5Y+v77yBPGJyPmBgYDxi
cj5jZCB+L3Rlc3QgLy/liLB0ZXN055uu5b2VLOacrOWcsOebruW9leWQjeS4jnJlcG9zaXRvcnnn
moTlkI3lrZfkuI3kuIDlrprnm7jlkIw8YnI+Z2l0IGluaXQgIyPliJ3lp4vljJY8YnI+Z2l0IGFk
ZCAuICMj5oqK5omA5pyJ5paH5Lu25Yqg5YWl5Yiw57Si5byV77yI5LiN5oOz5oqK5omA5pyJ5paH
5Lu25Yqg5YWl77yM5Y+v5Lul55SoZ2l0aWdub3Jl5oiWYWRkIOWFt+S9k+aWh+S7tu+8jOingeS4
i+aWhyk8YnI+Z2l0IGNvbW1pdCAtbSAnYWhhJyAjI+aPkOS6pOWIsOacrOWcsOS7k+W6k++8jOeE
tuWQjuS8muWhq+WGmeabtOaWsOaXpeW/lygkZ2l0IGNvbW1pdCAtbSDigJxteSBmaXJzdCB2ZXNp
b24gb2YgLi4u4oCdKTxicj5naXQgcmVtb3RlIGFkZCBvcmlnaW4gaHR0cHM6Ly9naXRodWIuY29t
L2ptemVuZzEzMTQvdGVzdC5naXQgIyPlop7liqDliLByZW1vdGU8YnI+Z2l0IHB1c2ggb3JpZ2lu
IG1hc3RlciAjI3B1c2jliLBnaXRodWLkuIo8YnI+YGBgPGJyPuiusOS9j+S4gOWumuimgeaYr+ep
uueZveeahOS7k+W6k+WTpu+8jOWmguaenOS4h+S4gOS9oOaWsOW7uuS7k+W6k+eahOWQjOaXtuW7
uueri+S6hnJlYWRtZeaWh+S7tu+8jOWwseebtOaOpWNsb25l5aW95LqG77yM6LWw5ZCO6Zeo55qE
5pu05paw5LiL6Z2i55qE5rWB56iL44CCPC9wPjxwPiMjIGdpdGh1Yui/m+mYtuaTjeS9nOOAgjwv
cD48cD4mZ3Q7IOWFtuWunuWwseaYr+acrOWcsOeahOS7o+eggeacieaJgOS/ruaUue+8jOmcgOim
geWQjOatpeWIsGdpdGh1YuiAjOW3su+8jOWPiOaIluiAhWdpdGh1Yue9kemhtemHjOmdoueahOS7
o+eggeiiq+S/ruaUueS6hu+8jOmcgOimgeWQjOatpeWIsOacrOWcsOOAguWmguaenOaYr+WkmuS4
quS6uuWQiOS9nO+8jOmCo+S5iOWIq+S6uuS8muS/ruaUueS9oOeahOS7o+egge+8jOaJgOS7peav
j+asoeS9oOS4iuS8oOS7o+eggeS5i+WJje+8jOmDvemcgOimgeWFiOaKimdpdGh1Yue9kemhtemH
jOmdoueahOS7o+eggeWFiOaLieS4i+S6hu+8jOWGjeWQiOW5tuWQjuS4iuS8oOiHquW3seeahOOA
gjwvcD48cD4xLuabtOaWsOmhueebru+8iOaWsOWKoOS6huaWh+S7tu+8ie+8jGDov5nkuKrmmK/m
nIDpq5jpopHpnIDmsYJgPGJyPmBgYDxicj4kY2Qgfi9oZWxsby13b3JsZDxicj4kZ2l0IGFkZCAu
IC8v6L+Z5qC35Y+v5Lul6Ieq5Yqo5Yik5pat5paw5Yqg5LqG5ZOq5Lqb5paH5Lu277yM5oiW6ICF
5omL5Yqo5Yqg5YWl5paH5Lu25ZCN5a2XPGJyPiRnaXQgY29tbWl0IC8v5o+Q5Lqk5Yiw5pys5Zyw
5LuT5bqT77yM5LiN5Yqg5Y+C5pWw5Lya5o+Q56S677yM5rOo5oSPOl49Q3RybO+8jOaMieeFp+aP
kOekuuadpeWwseWlveS6hu+9nu+9nu+9njxicj4kZ2l0IHB1c2ggb3JpZ2luIG1hc3RlciAvL+S4
jeaYr+aWsOWIm+W7uueahO+8jOS4jeeUqOWGjWFkZCDliLByZW1vdGXkuIrkuoY8YnI+YGBgPGJy
PiFbXShodHRwOi8vd3d3LmJpby1pbmZvLXRyYWluZWUuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8y
MDE3LzA3L2dpdGh1YjkucG5nKTwvcD48cD4yLuabtOaWsOmhueebru+8iOayoeaWsOWKoOaWh+S7
tu+8jOWPquacieWIoOmZpOaIluiAheS/ruaUueaWh+S7tu+8ie+8mjxicj4kY2Qgfi9oZWxsby13
b3JsZDxicj4kZ2l0IGNvbW1pdCAtYSAvL+iusOW9leWIoOmZpOaIluS/ruaUueS6huWTquS6m+aW
h+S7tjxicj4kZ2l0IHB1c2ggb3JpZ2luIG1hc3RlciAvL+aPkOS6pOWIsGdpdGh1YjwvcD48cD4z
LuW/veeVpeS4gOS6m+aWh+S7tu+8jOavlOWmgioub+etiTo8YnI+YGBgPGJyPiRjZCB+L2hlbGxv
LXdvcmxkPGJyPiR2aW0gLmdpdGlnbm9yZSAvL+aKiuaWh+S7tuexu+Wei+WKoOWFpeWIsC5naXRp
Z25vcmXkuK3vvIzkv53lrZg8YnI+YGBgPGJyPueEtuWQjuWwseWPr+S7pWdpdCBhZGQgLiDog73o
h6rliqjov4fmu6Tov5nnp43mlofku7Y8L3A+PHA+NC5jbG9uZeS7o+eggeWIsOacrOWcsO+8mjxi
cj5gYGA8YnI+JGdpdCBjbG9uZSBnaXRAZ2l0aHViLmNvbTpXYWRlTGVuZy9oZWxsby13b3JsZC5n
aXQ8YnI+YGBgPGJyPjUu5YGH5aaC5pys5Zyw5bey57uP5a2Y5Zyo5LqG5Luj56CB77yM6ICM5LuT
5bqT6YeM5pyJ5pu05paw77yM5oqK5pu05pS555qE5ZCI5bm25Yiw5pys5Zyw55qE6aG555uu77ya
PGJyPmBgYDxicj4kZ2l0IGZldGNoIG9yaWdpbiAvL+iOt+WPlui/nOeoi+abtOaWsDxicj4kZ2l0
IG1lcmdlIG9yaWdpbi9tYXN0ZXIgLy/miormm7TmlrDnmoTlhoXlrrnlkIjlubbliLDmnKzlnLDl
iIbmlK88YnI+YGBgPGJyPui/meS4quaYr+acgOmrmOmikemcgOaxgjxicj4hW10oaHR0cDovL3d3
dy5iaW8taW5mby10cmFpbmVlLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxNy8wNy9naXRodWIx
MC5wbmcpPC9wPjxwPjYu5pKk6ZSAPGJyPmBgYDxicj4kZ2l0IHJlc2V0PGJyPmBgYDwvcD48cD43
LuWIoOmZpDxicj5gYGA8YnI+JGdpdCBybSAqIC8vIOS4jeaYr+eUqHJtPGJyPmBgYDwvcD48cD4v
Ly0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLeW4uOingemUmeivry0tLS0tLS0tLS0tLS0t
LS0tLS0tLS0tLS0tLS0tLS0tLS0tPGJyPjEuJCBnaXQgcmVtb3RlIGFkZCBvcmlnaW4gZ2l0QGdp
dGh1Yi5jb206V2FkZUxlbmcvaGVsbG8td29ybGQuZ2l0PGJyPumUmeivr+aPkOekuu+8mmZhdGFs
OiByZW1vdGUgb3JpZ2luIGFscmVhZHkgZXhpc3RzLjxicj7op6PlhrPlip7ms5XvvJokIGdpdCBy
ZW1vdGUgcm0gb3JpZ2luPGJyPueEtuWQjuWcqOaJp+ihjO+8miQgZ2l0IHJlbW90ZSBhZGQgb3Jp
Z2luIGdpdEBnaXRodWIuY29tOldhZGVMZW5nL2hlbGxvLXdvcmxkLmdpdCDlsLHkuI3kvJrmiqXp
lJnor6/kuoY8YnI+Mi4gJCBnaXQgcHVzaCBvcmlnaW4gbWFzdGVyPGJyPumUmeivr+aPkOekuu+8
mmVycm9yOmZhaWxlZCB0byBwdXNoIHNvbSByZWZzIHRvPGJyPuino+WGs+WKnuazle+8miQgZ2l0
IHB1bGwgb3JpZ2luIG1hc3RlciAvL+WFiOaKiui/nOeoi+acjeWKoeWZqGdpdGh1YuS4iumdouea
hOaWh+S7tuaLieWFiOadpe+8jOWGjXB1c2gg5LiK5Y6744CCPGJyPi8vLS0tLS0tLS0tLS0tLS0t
LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t
LS0tLS0tPC9wPjxwPuijhemAvOivt+eci+i/meS4qu+8mmh0dHA6Ly93d3cub3NjaGluYS5uZXQv
cXVlc3Rpb24vMTM5Nzc2NV8xNjYzNjg8YnI+5a6J6KOFZ2l05bel5YW355yL6L+Z5Liq77yaaHR0
cDovL3d3dy5paHJlZi5jb20vcmVhZC0xNjM3Ny5odG1sPGJyPuS4iuS8oOiHquW3seeahOS7o+eg
geeci+i/meS4qu+8mmh0dHA6Ly9ibG9nLmNzZG4ubmV0L2hhbmhhaWxvbmc3MjYxODgvYXJ0aWNs
ZS9kZXRhaWxzLzQ2NzM4OTI5PGJyPuS4gOS6m+W8gOWPkei/h+eoi+eahOazqOaEj+S6i+mhue+8
mmh0dHA6Ly9ibG9nLmNzZG4ubmV0L3UwMTEwNjg3MDIvYXJ0aWNsZS9kZXRhaWxzLzQ5NTMxMTY3
PGJyPueugOS7i+WujOaVtOaVmeeoi++8mmh0dHA6Ly9jYWliYW9qaWFuLmNvbS91c2UtZ2l0aHVi
Lmh0bWw8L3A+">​</div>
</div>
<p>&nbsp;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2477.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>用DEXSeq分析可变剪切，外显子差异表达</title>
		<link>http://www.bio-info-trainee.com/2470.html</link>
		<comments>http://www.bio-info-trainee.com/2470.html#comments</comments>
		<pubDate>Wed, 05 Jul 2017 03:09:08 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2470</guid>
		<description><![CDATA[以后只用Rmarkdown写博客啦！ 直接点击链接阅读，省掉了图文排版时间，赞~ &#8230; <a href="http://www.bio-info-trainee.com/2470.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<p>以后只用Rmarkdown写博客啦！</p>
<p>直接点击<a href="http://www.bio-info-trainee.com/bioconductor_China/software/DEXSeq.html" target="_blank">链接阅读</a>，省掉了图文排版时间，赞~</p>
<p><a href="http://www.bio-info-trainee.com/bioconductor_China/software/DEXSeq.html" target="_blank">http://www.bio-info-trainee.com/bioconductor_China/software/DEXSeq.html</a></p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2470.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>scalpel软件找indel</title>
		<link>http://www.bio-info-trainee.com/2341.html</link>
		<comments>http://www.bio-info-trainee.com/2341.html#comments</comments>
		<pubDate>Mon, 06 Feb 2017 02:37:58 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>
		<category><![CDATA[indel]]></category>
		<category><![CDATA[scalpel]]></category>
		<category><![CDATA[WGS]]></category>
		<category><![CDATA[全基因组]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2341</guid>
		<description><![CDATA[Scalpel is available here: http://scalpe &#8230; <a href="http://www.bio-info-trainee.com/2341.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<p>Scalpel is available here: <a class="gj_safe_a" href="http://scalpel.sourceforge.net/" target="_blank">http://scalpel.sourceforge.net/</a><br />
文章是： <a class="gj_safe_a" href="http://www.nature.com/nmeth/journal/v11/n10/full/nmeth.3069.html" target="_blank">http://www.nature.com/nmeth/journal/v11/n10/full/nmeth.3069.html</a><br />
很赞的工具！<br />
软件说明书写的也比较详细：<a class="gj_safe_a" href="http://scalpel.sourceforge.net/manual.html" target="_blank">http://scalpel.sourceforge.net/manual.html</a><br />
他提供了3种情况的找INDELs变异，我目前需要用的就是对我的全基因组测序数据来找，所以用single模式：<br />
<strong><span style="color: #ff0000;">为了节省对计算资源的消耗，作者建议我单独对每条染色体分别处理。</span></strong><span id="more-2341"></span></p>
<p>软件安装是：</p>
<div>
<div></div>
<div>
<div id="highlighter_669247" class="syntaxhighlighter notranslate applescript">
<table border="0" cellspacing="0" cellpadding="0">
<tbody>
<tr>
<td class="gutter">
<div class="line number1 index0 alt2">1</div>
<div class="line number2 index1 alt1">2</div>
<div class="line number3 index2 alt2">3</div>
<div class="line number4 index3 alt1">4</div>
<div class="line number5 index4 alt2">5</div>
<div class="line number6 index5 alt1">6</div>
<div class="line number7 index6 alt2">7</div>
<div class="line number8 index7 alt1">8</div>
<div class="line number9 index8 alt2">9</div>
</td>
<td class="code">
<div class="container">
<div class="line number1 index0 alt2"><code class="applescript comments">## Download and install Scalpel</code></div>
<div class="line number2 index1 alt1"><code class="applescript plain">cd ~</code><code class="applescript color2">/</code><code class="applescript plain">biosoft</code></div>
<div class="line number3 index2 alt2"><code class="applescript plain">mkdir Scalpel </code><code class="applescript color2">&amp;</code><code class="applescript color2">&amp;</code>  <code class="applescript plain">cd Scalpel</code></div>
<div class="line number4 index3 alt1"><code class="applescript plain">wget [</code><code class="applescript color3">url</code><code class="applescript plain">]https</code><code class="applescript color1">:</code><code class="applescript color2">/</code><code class="applescript color2">/</code><code class="applescript plain">downloads.sourceforge.net</code><code class="applescript color2">/</code><code class="applescript plain">project</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color1">-0.5</code><code class="applescript plain">.</code><code class="applescript color1">3.</code><code class="applescript plain">tar.gz[</code><code class="applescript color2">/</code><code class="applescript color3">url</code><code class="applescript plain">]  </code></div>
<div class="line number5 index4 alt2"><code class="applescript plain">tar zxvf scalpel</code><code class="applescript color1">-0.5</code><code class="applescript plain">.</code><code class="applescript color1">3.</code><code class="applescript plain">tar.gz</code></div>
<div class="line number6 index5 alt1"><code class="applescript plain">cd scalpel</code><code class="applescript color1">-0.5</code><code class="applescript plain">.</code><code class="applescript color1">3</code></div>
<div class="line number7 index6 alt2"><code class="applescript color3">make</code></div>
<div class="line number8 index7 alt1"><code class="applescript plain">~</code><code class="applescript color2">/</code><code class="applescript plain">biosoft</code><code class="applescript color2">/</code><code class="applescript plain">Scalpel</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color1">-0.5</code><code class="applescript plain">.</code><code class="applescript color1">3</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color2">-</code><code class="applescript plain">discovery  </code><code class="applescript comments">--help</code></div>
<div class="line number9 index8 alt2"><code class="applescript plain">~</code><code class="applescript color2">/</code><code class="applescript plain">biosoft</code><code class="applescript color2">/</code><code class="applescript plain">Scalpel</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color1">-0.5</code><code class="applescript plain">.</code><code class="applescript color1">3</code><code class="applescript color2">/</code><code class="applescript plain">scalpel</code><code class="applescript color2">-</code><code class="applescript plain">export  </code><code class="applescript comments">--help</code></div>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<p>它需要自己指定--bed参数来选择染色体运行，而且不是给一个chr1就可以了，需要指定染色体及其起始终止坐标：single region in format chr:start-end (example: 1:31656613-31656883)<br />
所以就考验shell编程技巧啦！<br />
制作 ~/reference/genome/hg19/hg19.chr.bed  这个文件，我就不多说了，前面我们已经讲过了！</p>
<div>
<div></div>
<div>
<div id="highlighter_593827" class="syntaxhighlighter notranslate applescript">
<table border="0" cellspacing="0" cellpadding="0">
<tbody>
<tr>
<td class="gutter">
<div class="line number1 index0 alt2">01</div>
<div class="line number2 index1 alt1">02</div>
<div class="line number3 index2 alt2">03</div>
<div class="line number4 index3 alt1">04</div>
<div class="line number5 index4 alt2">05</div>
<div class="line number6 index5 alt1">06</div>
<div class="line number7 index6 alt2">07</div>
<div class="line number8 index7 alt1">08</div>
<div class="line number9 index8 alt2">09</div>
<div class="line number10 index9 alt1">10</div>
<div class="line number11 index10 alt2">11</div>
<div class="line number12 index11 alt1">12</div>
<div class="line number13 index12 alt2">13</div>
<div class="line number14 index13 alt1">14</div>
<div class="line number15 index14 alt2">15</div>
<div class="line number16 index15 alt1">16</div>
<div class="line number17 index16 alt2">17</div>
<div class="line number18 index17 alt1">18</div>
<div class="line number19 index18 alt2">19</div>
<div class="line number20 index19 alt1">20</div>
<div class="line number21 index20 alt2">21</div>
<div class="line number22 index21 alt1">22</div>
</td>
<td class="code">
<div class="container">
<div class="line number1 index0 alt2"><code class="applescript plain">chr</code><code class="applescript color1">10</code>   <code class="applescript color1">1</code>   <code class="applescript color1">135534747</code></div>
<div class="line number2 index1 alt1"><code class="applescript plain">chr</code><code class="applescript color1">11</code>   <code class="applescript color1">1</code>   <code class="applescript color1">135006516</code></div>
<div class="line number3 index2 alt2"><code class="applescript plain">chr</code><code class="applescript color1">12</code>   <code class="applescript color1">1</code>   <code class="applescript color1">133851895</code></div>
<div class="line number4 index3 alt1"><code class="applescript plain">chr</code><code class="applescript color1">13</code>   <code class="applescript color1">1</code>   <code class="applescript color1">115169878</code></div>
<div class="line number5 index4 alt2"><code class="applescript plain">chr</code><code class="applescript color1">14</code>   <code class="applescript color1">1</code>   <code class="applescript color1">107349540</code></div>
<div class="line number6 index5 alt1"><code class="applescript plain">chr</code><code class="applescript color1">15</code>   <code class="applescript color1">1</code>   <code class="applescript color1">102531392</code></div>
<div class="line number7 index6 alt2"><code class="applescript plain">chr</code><code class="applescript color1">16</code>   <code class="applescript color1">1</code>   <code class="applescript color1">90354753</code></div>
<div class="line number8 index7 alt1"><code class="applescript plain">chr</code><code class="applescript color1">17</code>   <code class="applescript color1">1</code>   <code class="applescript color1">81195210</code></div>
<div class="line number9 index8 alt2"><code class="applescript plain">chr</code><code class="applescript color1">18</code>   <code class="applescript color1">1</code>   <code class="applescript color1">78077248</code></div>
<div class="line number10 index9 alt1"><code class="applescript plain">chr</code><code class="applescript color1">19</code>   <code class="applescript color1">1</code>   <code class="applescript color1">59128983</code></div>
<div class="line number11 index10 alt2"><code class="applescript plain">chr</code><code class="applescript color1">1</code>    <code class="applescript color1">1</code>   <code class="applescript color1">249250621</code></div>
<div class="line number12 index11 alt1"><code class="applescript plain">chr</code><code class="applescript color1">20</code>   <code class="applescript color1">1</code>   <code class="applescript color1">63025520</code></div>
<div class="line number13 index12 alt2"><code class="applescript plain">chr</code><code class="applescript color1">21</code>   <code class="applescript color1">1</code>   <code class="applescript color1">48129895</code></div>
<div class="line number14 index13 alt1"><code class="applescript plain">chr</code><code class="applescript color1">22</code>   <code class="applescript color1">1</code>   <code class="applescript color1">51304566</code></div>
<div class="line number15 index14 alt2"><code class="applescript plain">chr</code><code class="applescript color1">2</code>    <code class="applescript color1">1</code>   <code class="applescript color1">243199373</code></div>
<div class="line number16 index15 alt1"><code class="applescript plain">chr</code><code class="applescript color1">3</code>    <code class="applescript color1">1</code>   <code class="applescript color1">198022430</code></div>
<div class="line number17 index16 alt2"><code class="applescript plain">chr</code><code class="applescript color1">4</code>    <code class="applescript color1">1</code>   <code class="applescript color1">191154276</code></div>
<div class="line number18 index17 alt1"><code class="applescript plain">chr</code><code class="applescript color1">5</code>    <code class="applescript color1">1</code>   <code class="applescript color1">180915260</code></div>
<div class="line number19 index18 alt2"><code class="applescript plain">chr</code><code class="applescript color1">6</code>    <code class="applescript color1">1</code>   <code class="applescript color1">171115067</code></div>
<div class="line number20 index19 alt1"><code class="applescript plain">chr</code><code class="applescript color1">7</code>    <code class="applescript color1">1</code>   <code class="applescript color1">159138663</code></div>
<div class="line number21 index20 alt2"><code class="applescript plain">chr</code><code class="applescript color1">8</code>    <code class="applescript color1">1</code>   <code class="applescript color1">146364022</code></div>
<div class="line number22 index21 alt1"><code class="applescript plain">chr</code><code class="applescript color1">9</code>    <code class="applescript color1">1</code>   <code class="applescript color1">141213431</code></div>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<p>区分染色体分别运行scalpel软件代码如下：</p>
<div>
<div></div>
<div>
<div id="highlighter_78787" class="syntaxhighlighter notranslate shell">
<table border="0" cellspacing="0" cellpadding="0">
<tbody>
<tr>
<td class="gutter">
<div class="line number1 index0 alt2">01</div>
<div class="line number2 index1 alt1">02</div>
<div class="line number3 index2 alt2">03</div>
<div class="line number4 index3 alt1">04</div>
<div class="line number5 index4 alt2">05</div>
<div class="line number6 index5 alt1">06</div>
<div class="line number7 index6 alt2">07</div>
<div class="line number8 index7 alt1">08</div>
<div class="line number9 index8 alt2">09</div>
<div class="line number10 index9 alt1">10</div>
<div class="line number11 index10 alt2">11</div>
<div class="line number12 index11 alt1">12</div>
<div class="line number13 index12 alt2">13</div>
<div class="line number14 index13 alt1">14</div>
<div class="line number15 index14 alt2">15</div>
<div class="line number16 index15 alt1">16</div>
<div class="line number17 index16 alt2">17</div>
<div class="line number18 index17 alt1">18</div>
<div class="line number19 index18 alt2">19</div>
<div class="line number20 index19 alt1">20</div>
<div class="line number21 index20 alt2">21</div>
<div class="line number22 index21 alt1">22</div>
<div class="line number23 index22 alt2">23</div>
<div class="line number24 index23 alt1">24</div>
<div class="line number25 index24 alt2">25</div>
<div class="line number26 index25 alt1">26</div>
<div class="line number27 index26 alt2">27</div>
<div class="line number28 index27 alt1">28</div>
<div class="line number29 index28 alt2">29</div>
<div class="line number30 index29 alt1">30</div>
</td>
<td class="code">
<div class="container">
<div class="line number1 index0 alt2"><code class="shell functions">cat</code> <code class="shell plain">~</code><code class="shell plain">/reference/genome/hg19/hg19</code><code class="shell plain">.chr.bed |</code><code class="shell keyword">while</code> <code class="shell functions">read</code> <code class="shell functions">id</code></div>
<div class="line number2 index1 alt1"><code class="shell keyword">do</code></div>
<div class="line number3 index2 alt2"><code class="shell plain">arr=($</code><code class="shell functions">id</code><code class="shell plain">) </code></div>
<div class="line number4 index3 alt1"></div>
<div class="line number5 index4 alt2"><code class="shell comments"># arr=($a) will split the $a to $arr , ${arr[0]} ${arr[1]} ~~~, but ${arr[@]}  is the whole array .</code></div>
<div class="line number6 index5 alt1"><code class="shell comments"># OLD_IFS="$IFS" </code></div>
<div class="line number7 index6 alt2"><code class="shell comments"># IFS="," </code></div>
<div class="line number8 index7 alt1"><code class="shell comments"># arr=($a) </code></div>
<div class="line number9 index8 alt2"><code class="shell comments"># IFS="$OLD_IFS" </code></div>
<div class="line number10 index9 alt1"></div>
<div class="line number11 index10 alt2"><code class="shell comments">#arr=($a)用于将字符串$a分割到数组$arr ${arr[0]} ${arr[1]} ... 分别存储分割后的数组第1 2 ... 项 ，${arr[@]}存储整个数组。</code></div>
<div class="line number12 index11 alt1"><code class="shell comments">#变量$IFS存储着分隔符，这里我们将其设为逗号 "," OLD_IFS用于备份默认的分隔符，使用完后将之恢复默认。</code></div>
<div class="line number13 index12 alt2"></div>
<div class="line number14 index13 alt1"><code class="shell functions">echo</code> <code class="shell plain">${arr[0]}:${arr[1]}-${arr[2]}</code></div>
<div class="line number15 index14 alt2"><code class="shell spaces"> </code></div>
<div class="line number16 index15 alt1"></div>
<div class="line number17 index16 alt2"><code class="shell functions">date</code></div>
<div class="line number18 index17 alt1"><code class="shell plain">start=`</code><code class="shell functions">date</code> <code class="shell plain">+%s`</code></div>
<div class="line number19 index18 alt2"></div>
<div class="line number20 index19 alt1"><code class="shell plain">~</code><code class="shell plain">/biosoft/Scalpel/scalpel-0</code><code class="shell plain">.5.3</code><code class="shell plain">/scalpel-discovery</code> <code class="shell plain">--single \</code></div>
<div class="line number21 index20 alt2"><code class="shell plain">--bam  ~</code><code class="shell plain">/data/project/myGenome/fastq/bamFiles/jmzeng</code><code class="shell plain">.filter.rmdup.bam \</code></div>
<div class="line number22 index21 alt1"><code class="shell plain">--ref ~</code><code class="shell plain">/reference/genome/hg19/hg19</code><code class="shell plain">.fa \</code></div>
<div class="line number23 index22 alt2"><code class="shell plain">--bed ${arr[0]}:${arr[1]}-${arr[2]}  \</code></div>
<div class="line number24 index23 alt1"><code class="shell plain">--window 600 --numprocs 5  --</code><code class="shell functions">dir</code> <code class="shell plain">${arr[0]}</code></div>
<div class="line number25 index24 alt2"></div>
<div class="line number26 index25 alt1"><code class="shell plain">end=`</code><code class="shell functions">date</code> <code class="shell plain">+%s`</code></div>
<div class="line number27 index26 alt2"><code class="shell plain">runtime=$((end-start))</code></div>
<div class="line number28 index27 alt1"><code class="shell functions">echo</code> <code class="shell string">"Runtime for ${arr[0]}:${arr[1]}-${arr[2]} was $runtime"</code></div>
<div class="line number29 index28 alt2"></div>
<div class="line number30 index29 alt1"><code class="shell keyword">done</code></div>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2341.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>制作自己的gene set文件给gsea软件</title>
		<link>http://www.bio-info-trainee.com/2144.html</link>
		<comments>http://www.bio-info-trainee.com/2144.html#comments</comments>
		<pubDate>Thu, 15 Dec 2016 11:43:56 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础数据格式]]></category>
		<category><![CDATA[基础软件]]></category>
		<category><![CDATA[geneset]]></category>
		<category><![CDATA[GMT]]></category>
		<category><![CDATA[GSEA]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2144</guid>
		<description><![CDATA[熟悉GSEA软件的都知道，它只需要GCT,CLS和GMT文件，其中GMT文件，G &#8230; <a href="http://www.bio-info-trainee.com/2144.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<p>熟悉GSEA软件的都知道，它只需要GCT,CLS和GMT文件，其中GMT文件，GSEA的作者已经给出了一大堆！就是记录broad的<a href="http://software.broadinstitute.org/gsea/msigdb/collections.jsp">Molecular Signatures Database (MSigDB) </a>已经收到了18026个geneset，<span style="color: #ff00ff;"><strong>但是我奇怪的是里面竟然没有包括cancer testis的gene set，MSigDB的确是多，但未必全，其实里面还有很多重复。而且有不少几乎没有意义的gene set。</strong></span>那我想做自己的gene set来用gsea软件做分析，就需要自己制造gmt格式的数据。因为即使下载了MSigDB的gene set，本质上就是gmt格式的数据而已：<a href="http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GMT:_Gene_Matrix_Transposed_file_format_.28.2A.gmt.29">http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GMT:_Gene_Matrix_Transposed_file_format_.28.2A.gmt.29</a><span id="more-2144"></span></p>
<div><img src="C:\Users\jimmy1314\AppData\Local\YNote\data\jmzeng1314@163.com\d248f30a00954d078e9ccb7b485f0c6c\clipboard.png" alt="" data-media-type="image" data-attr-org-src-id="1421CC66B794477C8577DABCCA491669" /><a href="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/41.png"><img class="alignnone size-full wp-image-2145" src="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/41.png" alt="4" width="937" height="615" /></a></div>
<div>我们首先要拿到自己感兴趣的gene set里面的gene list，最好是以hugo规定的标准symbol。</div>
<div>比如我感兴趣的是 ：<a href="http://www.cta.lncc.br/modelo.php">http://www.cta.lncc.br/modelo.php</a></div>
<div>我这里提供一个2列的文件，直接转换成gmt的R代码！</div>
<div>
<div>文件来自于：<a href="http://www.bio-info-trainee.com/1188.html">下载最新版的KEGG信息，并且解析好</a>，如下：</div>
<div><img src="file:///C:/Users/jimmy1314/AppData/Local/YNote/data/jmzeng1314@163.com/4b709b96ce244dcaad788d8a71e8a8ef/clipboard.png" alt="" data-media-type="image" data-attr-org-src-id="5C955ADB038545608FBEC81072EE8201" /><img class="alignnone" src="http://www.bio-info-trainee.com/wp-content/uploads/2015/12/image004.png" alt="" width="745" height="326" /></div>
<div>首先在R里面赋值一个变量path2gene_file就是图中的kegg2gene.txt文件，读到R里面去</div>
<div>tmp=read.table(path2gene_file,sep="\t",colClasses=c('character'))</div>
<div>#tmp=toTable(org.Hs.egPATH)</div>
<div># first column is kegg ID, second column is entrez ID</div>
<div>GeneID2kegg_list&lt;&lt;- tapply(tmp[,1],as.factor(tmp[,2]),function(x) x)</div>
<div>kegg2GeneID_list&lt;&lt;- tapply(tmp[,2],as.factor(tmp[,1]),function(x) x)</div>
<div>这个变量kegg2GeneID_list是一个list，因为是entrez gene ID，需要转换成symbol，我就不多说了，转换后的数据，就是kegg2symbol_list 。</div>
<div><img src="file:///C:/Users/jimmy1314/AppData/Local/YNote/data/jmzeng1314@163.com/b98ac452e2a34f39946b3048bccc7d32/clipboard.png" alt="" data-media-type="image" data-attr-org-src-id="2E7838E03E8F44EAAB168B5F42FAB6CC" /></div>
<div>最后对 kegg2symbol_list 输出成gmt文件：</div>
<div>
<blockquote>
<div>write.gmt &lt;- function(geneSet=kegg2symbol_list,gmt_file='kegg2symbol.gmt'){</div>
<div></div>
<div>sink( gmt_file )</div>
<div>for (i in 1:length(geneSet)){</div>
<div>cat(names(geneSet)[i])</div>
<div>cat('\tNA\t')</div>
<div>cat(paste(geneSet[[i]],collapse = '\t'))</div>
<div>cat('\n')</div>
<div></div>
<div>}</div>
<div></div>
<div>sink()</div>
<div></div>
<div>}</div>
</blockquote>
</div>
</div>
<div><img class="alignnone size-full wp-image-2146" src="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/5.png" alt="5" width="555" height="562" /></div>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2144.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>java版本GSEA软件的ES score图片的修改</title>
		<link>http://www.bio-info-trainee.com/2105.html</link>
		<comments>http://www.bio-info-trainee.com/2105.html#comments</comments>
		<pubDate>Thu, 01 Dec 2016 16:53:10 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[R]]></category>
		<category><![CDATA[基础软件]]></category>
		<category><![CDATA[ES score]]></category>
		<category><![CDATA[GSEA]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2105</guid>
		<description><![CDATA[首先要明白这个ES score图片里面的数据是什么，这样才能修改它，因为java &#8230; <a href="http://www.bio-info-trainee.com/2105.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<p>首先要明白这个ES score图片里面的数据是什么，这样才能修改它，因为java是一个封闭打包好的软件，所以我们没办法在里面修改它没有提供的参数，运行完GSEA，默认输出的图就是下面这样：<span id="more-2105"></span></p>
<div style="width: 513px" class="wp-caption alignnone"><img class="" src="http://note.youdao.com/yws/api/group/23785548/noteresource/9ED49F972A0F4980AE784E76A7DFFC29/version/256?method=get-resource&amp;shareToken=DBDB0277A315444BBBAB2024190208AE&amp;entryId=123732909" alt="" width="503" height="504" /><p class="wp-caption-text">ES score</p></div>
<p>这个图片在发表的时候，就会发现其实蛮模糊的， 所以有可能需要自己重新制作这个图，那么就需要明白这个图后面的数据。</p>
<p>其中最下面的数据是量方法测到了2万个基因，那么这两万个基因在case和control组的差异度量(六种差异度量，默认是signal 2 noise，GSEA官网有提供公式，也可以选择大家熟悉的foldchange)肯定不一样,那么根据它们的差异度量，就可以对它们进行排序，并且Z-score标准化的结果。</p>
<p>而中间的就是该gene set在测到了的已经根据signal2noise排好序的2万个基因的位置。</p>
<p>最上面的图，就是所有的基因的ES score都要一个个加起来，叫做running  ES score，在加的过程中，什么时候ES score达到了最大值，就是这个gene set最终的ES score！</p>
<p>我这里全面解析了GSEA官网提供的R代码的绘图函数，如下：</p>
<p><a href="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/ES-SCORE图的画法.png"><img class="alignnone size-full wp-image-2106" src="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/ES-SCORE图的画法.png" alt="es-score%e5%9b%be%e7%9a%84%e7%94%bb%e6%b3%95" width="1574" height="650" /></a></p>
<p>这个函数本身也被我抽离出来了：</p>
<p>这个知识点有点复杂，<strong><span style="color: #ff0000;">我解释的很清楚数据是什么，但是数据如何来的（就是下面代码读取的txt文件）</span></strong>，我没办法用博客写清楚，需要修改一个2500行的源代码才能获取数据！</p>
<blockquote><p>setwd('data')<br />
<strong><span style="color: #ff0000;">Obs.RES=read.table('Obs.RES.txt') </span></strong><br />
<strong><span style="color: #ff0000;">Obs.RES=t(Obs.RES) ## 每个基因在每个gene set里面的running ES score，一个矩阵</span></strong><br />
<strong><span style="color: #ff0000;">Obs.indicator=read.table('Obs.indicator.txt') </span></strong><br />
<strong><span style="color: #ff0000;">Obs.indicator=t(Obs.indicator) ## 每个基因是否属于每个gene set，一个0/1矩阵</span></strong><br />
<strong><span style="color: #ff0000;">obs.s2n=read.table('obs.s2n.txt')[,1]  ## 每个基因的signal 2 noise值，已经Z-score化，而且排好序了。</span></strong><br />
<strong><span style="color: #ff0000;">size.G=read.table('size.G.txt')[,1]  ## 每个gene set的基因数量，在图中需要显示</span></strong><br />
<strong><span style="color: #ff0000;">gs.names=read.table('gs.names.txt')[,1] ## 每个gene set的名字，在图中需要显示</span></strong><br />
<strong><span style="color: #ff0000;">Obs.arg.ES=read.table('Obs.arg.ES.txt')[,1]## 每个gene set的最大ES score出现在排序基因的位置</span></strong><br />
<strong><span style="color: #ff0000;">Obs.ES.index=read.table('Obs.ES.index.txt')[,1]## 这个用不着的，我也忘记是什么了</span></strong><br />
<strong><span style="color: #ff0000;">Obs.ES=read.table('Obs.ES.txt')[,1]  ##每个gene set的最大ES score是多少，如果是正值，用红色表示富集在case组，如果是负值，用蓝色，表示富集在control组。</span></strong></p>
<p>plot_ES_score &lt;- function(Ng=12,N=34688,phen1='control',phen2='case',Obs.RES,Obs.indicator,obs.s2n,size.G,gs.names,Obs.arg.ES,Obs.ES.index){<br />
for (i in 1:Ng) {<br />
png(paste0('number_',gs.names[i],'.png'))<br />
ind &lt;- 1:N<br />
min.RES &lt;- min(Obs.RES[i,])<br />
max.RES &lt;- max(Obs.RES[i,])<br />
if (max.RES &lt; 0.3) max.RES &lt;- 0.3<br />
if (min.RES &gt; -0.3) min.RES &lt;- -0.3<br />
delta &lt;- (max.RES - min.RES)*0.50<br />
min.plot &lt;- min.RES - 2*delta<br />
max.plot &lt;- max.RES<br />
max.corr &lt;- max(obs.s2n)<br />
min.corr &lt;- min(obs.s2n)<br />
Obs.correl.vector.norm &lt;- (obs.s2n - min.corr)/(max.corr - min.corr)*1.25*delta + min.plot<br />
zero.corr.line &lt;- (- min.corr/(max.corr - min.corr))*1.25*delta + min.plot<br />
col &lt;- ifelse(Obs.ES[i] &gt; 0, 2, 4)</p>
<p># Running enrichment plot</p>
<p>sub.string &lt;- paste("Number of genes: ", N, " (in list), ", size.G[i], " (in gene set)", sep = "", collapse="")</p>
<p>main.string &lt;- paste("Gene Set ", i, ":", gs.names[i])</p>
<p>plot(ind, Obs.RES[i,], main = main.string, sub = sub.string, xlab = "Gene List Index", ylab = "Running Enrichment Score (RES)", xlim=c(1, N), ylim=c(min.plot, max.plot), type = "l", lwd = 2, cex = 1, col = col)<br />
for (j in seq(1, N, 20)) {<br />
lines(c(j, j), c(zero.corr.line, Obs.correl.vector.norm[j]), lwd = 1, cex = 1, col = colors()[12]) # shading of correlation plot<br />
}<br />
lines(c(1, N), c(0, 0), lwd = 1, lty = 2, cex = 1, col = 1) # zero RES line<br />
lines(c(Obs.arg.ES[i], Obs.arg.ES[i]), c(min.plot, max.plot), lwd = 1, lty = 3, cex = 1, col = col) # max enrichment vertical line<br />
for (j in 1:N) {<br />
if (Obs.indicator[i, j] == 1) {<br />
lines(c(j, j), c(min.plot + 1.25*delta, min.plot + 1.75*delta), lwd = 1, lty = 1, cex = 1, col = 1) # enrichment tags<br />
}<br />
}<br />
lines(ind, Obs.correl.vector.norm, type = "l", lwd = 1, cex = 1, col = 1)<br />
lines(c(1, N), c(zero.corr.line, zero.corr.line), lwd = 1, lty = 1, cex = 1, col = 1) # zero correlation horizontal line<br />
temp &lt;- order(abs(obs.s2n), decreasing=T)<br />
arg.correl &lt;- temp[N]<br />
lines(c(arg.correl, arg.correl), c(min.plot, max.plot), lwd = 1, lty = 3, cex = 1, col = 3) # zero crossing correlation vertical line</p>
<p>leg.txt &lt;- paste("\"", phen1, "\" ", sep="", collapse="")<br />
text(x=1, y=min.plot, adj = c(0, 0), labels=leg.txt, cex = 1.0)</p>
<p>leg.txt &lt;- paste("\"", phen2, "\" ", sep="", collapse="")<br />
text(x=N, y=min.plot, adj = c(1, 0), labels=leg.txt, cex = 1.0)</p>
<p>adjx &lt;- ifelse(Obs.ES[i] &gt; 0, 0, 1)</p>
<p>leg.txt &lt;- paste("Peak at ", Obs.arg.ES[i], sep="", collapse="")<br />
text(x=Obs.arg.ES[i], y=min.plot + 1.8*delta, adj = c(adjx, 0), labels=leg.txt, cex = 1.0)</p>
<p>leg.txt &lt;- paste("Zero crossing at ", arg.correl, sep="", collapse="")<br />
text(x=arg.correl, y=min.plot + 1.95*delta, adj = c(adjx, 0), labels=leg.txt, cex = 1.0)<br />
dev.off()<br />
}</p>
<p>}</p>
<p>&nbsp;</p></blockquote>
<p>通过这个代码，就可以把当前所有gese set的 ES score图给重新画一下，如果需要调整字体大小，就去代码里面慢慢调整。</p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2105.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>GSEA的统计学原理试讲</title>
		<link>http://www.bio-info-trainee.com/2102.html</link>
		<comments>http://www.bio-info-trainee.com/2102.html#comments</comments>
		<pubDate>Thu, 01 Dec 2016 16:39:21 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>
		<category><![CDATA[生信基础]]></category>
		<category><![CDATA[ES score]]></category>
		<category><![CDATA[foldchange]]></category>
		<category><![CDATA[GSEA]]></category>
		<category><![CDATA[RES]]></category>
		<category><![CDATA[signal2noise]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2102</guid>
		<description><![CDATA[GSEA这个java软件使用非常方便，只需要根据要求做好GCT/CLS格式的in &#8230; <a href="http://www.bio-info-trainee.com/2102.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<p>GSEA这个java软件使用非常方便，只需要根据要求做好GCT/CLS格式的input文件就好了。我以前也写个用法教程：</p>
<div><a href="http://www.bio-info-trainee.com/1282.html">用GSEA来做基因集富集分析</a></div>
<div><a href="http://www.bio-info-trainee.com/1334.html">批量运行GSEA，命令行版本</a></div>
<div>但说到统计学原理，就有点麻烦了，我试着用自己的思路阐释一下：</div>
<div>假设芯片或者其它测量方法测到了2万个基因，那么这两万个基因在case和control组的差异度量(六种差异度量，默认是signal 2 noise，GSEA官网有提供公式，也可以选择大家熟悉的foldchange)肯定不一样,那么根据它们的差异度量，就可以对它们进行排序，并且Z-score标准化，在下图的最底端展示的就是</div>
<p><span id="more-2102"></span></p>
<div><img class="alignnone" src="http://note.youdao.com/yws/api/group/23785548/noteresource/9ED49F972A0F4980AE784E76A7DFFC29/version/256?method=get-resource&amp;shareToken=DBDB0277A315444BBBAB2024190208AE&amp;entryId=123732909" alt="" width="503" height="504" /></div>
<div>那么图中间，就是我们每个gene set里面的基因在所有的2万个排序好基因的位置，如果gene set里面的基因集中在2万个基因的前面部分，就是在case里面富集，如果集中在后面部分，就是在control里面富集着。</div>
<div>而最上面的那个ES score的算法，大概如下：</div>
<div><a href="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/1.png"><img class="alignnone  wp-image-2103" src="http://www.bio-info-trainee.com/wp-content/uploads/2016/12/1.png" alt="1" width="725" height="581" /></a></div>
<div>仔细看，其实还是能看明白的，每个基因在每个gene set里面的ES score取决于这个基因是否属于该gene set，还有就是它的差异度量，上图的差异度量就是FC（foldchange）,对每个gene set来说，所有的基因的ES score都要一个个加起来，叫做running  ES score，在加的过程中，什么时候ES score达到了最大值，就是这个gene set最终的ES score！</div>
<div>
<div>算法解读我参考的PPT，反正我是看懂了，但不一定能讲清楚：</div>
<div><a href="http://bioinformatics.mdanderson.org/MicroarrayCourse/Lectures09/gsea1_bw.pdf">http://bioinformatics.mdanderson.org/MicroarrayCourse/Lectures09/gsea1_bw.pdf</a></div>
<div><a href="https://bioinformatics.cancer.gov/sites/default/files/course_material/GSEA_Theory.pptx">https://bioinformatics.cancer.gov/sites/default/files/course_material/GSEA_Theory.pptx</a></div>
<div><a href="http://compbio.ucdenver.edu/Hunter_lab/Phang/downloads/files/GSEA.ppt">http://compbio.ucdenver.edu/Hunter_lab/Phang/downloads/files/GSEA.ppt</a></div>
<div><a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1239896/">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1239896/</a></div>
<div><a href="http://www.baderlab.org/CancerStemCellProject/VeroniqueVoisin/AdditionalResources/GSEA">http://www.baderlab.org/CancerStemCellProject/VeroniqueVoisin/AdditionalResources/GSEA</a></div>
<div>软件还有大把的参数可以调整：<a href="http://www.baderlab.org/CancerStemCellProject/VeroniqueVoisin/AdditionalResources/GSEA/parameters">http://www.baderlab.org/CancerStemCellProject/VeroniqueVoisin/AdditionalResources/GSEA/parameters</a></div>
</div>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2102.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>用BioNet这个bioconductor包来找 maximal-scoring subgraph</title>
		<link>http://www.bio-info-trainee.com/2071.html</link>
		<comments>http://www.bio-info-trainee.com/2071.html#comments</comments>
		<pubDate>Fri, 25 Nov 2016 14:54:20 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[R]]></category>
		<category><![CDATA[基础软件]]></category>
		<category><![CDATA[bioconductor]]></category>
		<category><![CDATA[BioNet]]></category>
		<category><![CDATA[网络分析]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2071</guid>
		<description><![CDATA[## 此包是为了解决一个难题： maximal-scoring subgraph &#8230; <a href="http://www.bio-info-trainee.com/2071.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<div>## 此包是为了解决一个难题： maximal-scoring subgraph (MSS) problem ，在一个巨大的复杂网络里面找到significantly differentially expressed subnetworks，就是说，得到了几百个差异基因，去PPI数据库做网络图的时候，发现还是巨大无比，所以需要用这个包来精简我们的网络图。</div>
<div>heuristically的中文意思：启发性地</div>
<div>## 而这个R包可以整合多种数据结果来给一个网络打分，</div>
<div>包的主页是：<a href="https://www.bioconductor.org/packages/release/bioc/html/BioNet.html">https://www.bioconductor.org/packages/release/bioc/html/BioNet.html</a></div>
<div>paper：<a href="http://bioinformatics.oxfordjournals.org/content/early/2010/02/25/bioinformatics.btq089">BioNet: an R-Package for the Functional Analysis of ... - Bioinformatics</a></div>
<div>它整合了PPI网络分析和寻找功能模块的需求。</div>
<div>脚本：<a href="https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.R">https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.R</a></div>
<div>教程：<a href="https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.pdf">https://www.bioconductor.org/packages/release/bioc/vignettes/BioNet/inst/doc/Tutorial.pdf</a></div>
<div>重点就是根据一个"igraph" or "graphNEL"对象和打分来找最大的MSS</div>
<div>subnet &lt;- subNetwork(dataLym$label, interactome)</div>
<div>module &lt;- runFastHeinz(subnet, scores)</div>
<div>plotModule(module, scores=scores, diff.expr=logFC) #这个就是精简后的我们的网络图。</div>
<div>其实另外一个函数也有类似的功能，dNetFind <a href="https://rdrr.io/cran/dnet/man/dNetFind.html">https://rdrr.io/cran/dnet/man/dNetFind.html</a></div>
<div></div>
<p><span id="more-2071"></span></p>
<div>## 里面用到的网络，都是基于igraph的包： A graph object, either in graphNEL or igraph format.</div>
<div>## 首先加载一系列的包和内置数据</div>
<div></div>
<div>library(BioNet)</div>
<div>library(DLBCL)</div>
<div>data(dataLym)</div>
<div>data(interactome)</div>
<div>## dataLym 里面是3个样本,t,s,o 分别对应着的每个基因的p值</div>
<div>## interactome是一个内置的PPI网络对象，可以根据指定的基因list来提取里面的信息</div>
<div></div>
<div>pvals &lt;- cbind(t=dataLym$t.pval, s=dataLym$s.pval)</div>
<div>rownames(pvals) &lt;- dataLym$label</div>
<div>pval &lt;- aggrPvals(pvals, order=2, plot=FALSE)</div>
<div></div>
<div>## 提取t,s样本的p值，然后用aggrPvals整合成一个p值</div>
<div></div>
<div>subnet &lt;- subNetwork(dataLym$label, interactome)</div>
<div>subnet &lt;- rmSelfLoops(subnet)</div>
<div>subnet</div>
<div>## 根据指定的dataLym$label基因信息来提取网络，但是这个基因信息有点奇怪,比如TP53(7157) ， 看起来是symbol跟entrez ID的合体。</div>
<div>## 函数rmSelfLoops是标配，只要是网络，都需要处理一下，去除自循环信息</div>
<div>## 因为指定的dataLym$label基因是有限的，一般不会太多，提取的网络一般也就上千个nodes，万把个edges的</div>
<div></div>
<div>fb &lt;- fitBumModel(pval, plot=FALSE)</div>
<div>## 对我们整合好的基因对应的P值进行Beta-Uniform-Mixture (BUM) model模型处理。</div>
<div>scores &lt;- scoreNodes(subnet, fb, fdr=0.001)</div>
<div></div>
<div>module &lt;- runFastHeinz(subnet, scores)</div>
<div>## Here we use a fast heuristic approach to calculate an approximation to the optimal scoring subnetwork.</div>
<div>logFC &lt;- dataLym$diff</div>
<div>names(logFC) &lt;- dataLym$label</div>
<div></div>
<div>plotModule(module, scores=scores, diff.expr=logFC)</div>
<div>## diff.expr是用来给nodes调色的</div>
<div>## scores是用来给nodes赋予性状的</div>
<div>## 这个函数本身是基于graphNEL or igraph format的定制版，其实可以直接用igraph包来绘图。</div>
<div>## 也可以把这个network导出成Cytoscape format，这样可以用cytoscape来绘图</div>
<div>## 一般来说，红色是上调基因，绿色是下调基因，圆形是得分为正，菱形是得分为负</div>
<div></div>
<div></div>
<div>## 下面是一个实际的例子，如何使用BioNet包来做网络分析</div>
<div>library(BioNet)</div>
<div>library(DLBCL)</div>
<div>data(exprLym)</div>
<div>data(interactome)</div>
<div>exprLym ## 内置对象，所以它的gene的laber是符合interactome的要求的</div>
<div>interactome</div>
<div>network &lt;- subNetwork(featureNames(exprLym), interactome)</div>
<div>network</div>
<div>network &lt;- largestComp(network)</div>
<div>## The function extracts the largest component of a network</div>
<div>network</div>
<div></div>
<div>library(genefilter)</div>
<div>library(impute)</div>
<div>expressions &lt;- impute.knn(exprs(exprLym))$data</div>
<div>## exprs得到的不再是纯粹的表达矩阵，需要用来 impute missing expression data</div>
<div>## 这里选择genefilter包的rowttests函数来做差异分析</div>
<div>t.test &lt;- rowttests(expressions, fac=exprLym$Subgroup)</div>
<div>t.test[1:10, ]</div>
<div>data(dataLym)</div>
<div></div>
<div>ttest.pval &lt;- t.test[, "p.value"]</div>
<div>surv.pval &lt;- dataLym$s.pval</div>
<div>names(surv.pval) &lt;- dataLym$label</div>
<div>pvals &lt;- cbind(ttest.pval, surv.pval)</div>
<div>pval &lt;- aggrPvals(pvals, order=2, plot=FALSE)</div>
<div>fb &lt;- fitBumModel(pval, plot=FALSE)</div>
<div>fb</div>
<div>## 用图来展示这个fitBumModel函数到底做了什么</div>
<div>dev.new(width=13, height=7)</div>
<div>par(mfrow=c(1,2))</div>
<div>hist(fb)</div>
<div>plot(fb)</div>
<div>dev.off()</div>
<div></div>
<div>## 下面这个图可以看到 Beta-Uniform-Mixture (BUM) 模型的两个参数是如何体现的</div>
<div>plotLLSurface(pval, fb)</div>
<div></div>
<div>scores &lt;- scoreNodes(network=network, fb=fb, fdr=0.001)</div>
<div>## 根据p值来对每个edge打分</div>
<div></div>
<div>network &lt;- rmSelfLoops(network)</div>
<div></div>
<div>## 下面是把网络数据写到txt文档，就可以导入到cytoscape啦！</div>
<div>writeHeinzEdges(network=network, file="lymphoma_edges_001", use.score=FALSE)</div>
<div>writeHeinzNodes(network=network, file="lymphoma_nodes_001", node.scores = scores)</div>
<div></div>
<div>datadir &lt;- file.path(path.package("BioNet"), "extdata")</div>
<div>dir(datadir)</div>
<div>## 本次算法变了：the heinz algorithm is used to calculate the maximum-scoring subnetwork</div>
<div>## 下面的文件需要借助heinz.py脚本生成，这里实例用的是包自带的数据</div>
<div>## 脚本代码是：heinz.py -e lymphoma_edges_001.txt -n lymphoma_nodes_001.txt -N True -E False</div>
<div></div>
<div>module &lt;- readHeinzGraph(node.file=file.path(datadir, "lymphoma_nodes_001.txt.0.hnz"), network=network)</div>
<div>diff &lt;- t.test[, "dm"]</div>
<div>names(diff) &lt;- rownames(t.test)</div>
<div></div>
<div>plotModule(module, diff.expr=diff, scores=scores)</div>
<div></div>
<div>sum(scores[nodes(module)])</div>
<div>sum(scores[nodes(module)]&gt;0)</div>
<div>sum(scores[nodes(module)]&lt;0)</div>
<div></div>
<div></div>
<div>###################################################</div>
<div>### code chunk number 27: Tutorial.Rnw:375-380</div>
<div>###################################################</div>
<div>library(BioNet)</div>
<div>library(DLBCL)</div>
<div>library(ALL)</div>
<div>data(ALL)</div>
<div>data(interactome)</div>
<div>## 这个ALL是另外一个包的数据，基因ID现在还没有，是探针ID，需要转换成BioNet识别的！</div>
<div>mapped.eset &lt;- mapByVar(ALL, network=interactome, attr="geneID")</div>
<div>mapped.eset[1:5,1:5]</div>
<div>length(intersect(rownames(mapped.eset), nodes(interactome)))</div>
<div>network &lt;- subNetwork(rownames(mapped.eset), interactome)</div>
<div>network</div>
<div>network &lt;- largestComp(network)</div>
<div>network &lt;- rmSelfLoops(network)</div>
<div>network</div>
<div></div>
<div>## 这里用limma来做差异分析</div>
<div>library(limma)</div>
<div>design &lt;- model.matrix(~ -1+ factor(c(substr(unlist(ALL$BT), 0, 1))))</div>
<div>colnames(design)&lt;- c("B", "T")</div>
<div>contrast.matrix &lt;- makeContrasts(B-T, levels=design)</div>
<div>contrast.matrix</div>
<div>fit &lt;- lmFit(mapped.eset, design)</div>
<div>fit2 &lt;- contrasts.fit(fit, contrast.matrix)</div>
<div>fit2 &lt;- eBayes(fit2)</div>
<div>pval &lt;- fit2$p.value[,1]</div>
<div>fb &lt;- fitBumModel(pval, plot=FALSE)</div>
<div>fb</div>
<div>dev.new(width=13, height=7)</div>
<div>par(mfrow=c(1,2))</div>
<div>hist(fb)</div>
<div>plot(fb)</div>
<div>scores &lt;- scoreNodes(network=network, fb=fb, fdr=1e-14)</div>
<div>## 还是把网络数据写到本地，供cytoscape导入</div>
<div>writeHeinzEdges(network=network, file="ALL_edges_001", use.score=FALSE)</div>
<div>writeHeinzNodes(network=network, file="ALL_nodes_001", node.scores = scores)</div>
<div>## 还是使用 heinz algorithm is used to calculate the maximum-scoring subnetwork</div>
<div>## A new implementation Heinz v2.0 is also available at https://software.cwi.nl/software/heinz ,</div>
<div></div>
<div>datadir &lt;- file.path(path.package("BioNet"), "extdata")</div>
<div>module &lt;- readHeinzGraph(node.file=file.path(datadir, "ALL_nodes_001.txt.0.hnz"), network=network)</div>
<div></div>
<div>nodeDataDefaults(module, attr="diff") &lt;- ""</div>
<div>nodeData(module, n=nodes(module), attr="diff") &lt;- fit2$coefficients[nodes(module),1]</div>
<div>nodeDataDefaults(module, attr="score") &lt;- ""</div>
<div>nodeData(module, n=nodes(module), attr="score") &lt;- scores[nodes(module)]</div>
<div>nodeData(module)[1]</div>
<div></div>
<div>## 保存为XGMML file，供cytoscape使用</div>
<div>saveNetwork(module, file="ALL_module", type="XGMML")</div>
<div></div>
<div><span style="color: #ff0000;">## 一般来说，红色是上调基因，绿色是下调基因，圆形是得分为正，菱形是得分为负</span></div>
<div></div>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2071.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>cytoscape五步曲之二：在cytoscape里面生成网络图</title>
		<link>http://www.bio-info-trainee.com/2055.html</link>
		<comments>http://www.bio-info-trainee.com/2055.html#comments</comments>
		<pubDate>Thu, 24 Nov 2016 03:21:49 +0000</pubDate>
		<dc:creator><![CDATA[ulwvfje]]></dc:creator>
				<category><![CDATA[基础软件]]></category>
		<category><![CDATA[cytoscape]]></category>
		<category><![CDATA[网络图]]></category>

		<guid isPermaLink="false">http://www.bio-info-trainee.com/?p=2055</guid>
		<description><![CDATA[通过上一讲大家应该明白了，网络图是为了展现分子之间的连接关系的，并不是一定要用c &#8230; <a href="http://www.bio-info-trainee.com/2055.html">Continue reading <span class="meta-nav">&#8594;</span></a>]]></description>
				<content:encoded><![CDATA[<div>通过上一讲大家应该明白了，网络图是为了展现分子之间的连接关系的，并不是一定要用cytoscape来做，只需要根据连接关系给我们的所有点安排一个坐标，然后把相应的线连接起来即可！那么既然我们要学习cytoscape，肯定是要用cytoscape做好第一步，就是根据输入数据来做网络图。</div>
<div>可以先了解一下cytoscape定义好的输入数据，</div>
<div><a href="http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats">http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats</a> 当然，其实木有意义！因为我们不可能拿到cytoscape的输入文件（cys格式的），除非是你朋友传给你的。我们肯定是根据txt.csv等分割的文本文件来做网络图。</div>
<p><span id="more-2055"></span></p>
<div>cytoscape里面有很多示例数据，请务必打开看看：C:\Program Files\Cytoscape_v3.3.0\sampleData 了解它要求什么数据！！！</div>
<div>你可以打开cytoscape，然后直接点击菜单栏的file--&gt;open--&gt;然后选择示例数据的cys文件，就可以看到一个图啦！但是木有任何意义，还是那句老话，你不可能预先得到cys文件，必然是你自己有txt文本数据，然后做出cys的文件。</div>
<div>文本数据必须要有2列，就是source node和target node，其余的都是可选！！！</div>
<div>
<table border="1" cellspacing="0" cellpadding="0">
<tbody>
<tr>
<td>source</td>
<td>target</td>
</tr>
<tr>
<td>YKR026C</td>
<td>YGL122C</td>
</tr>
<tr>
<td>YGR218W</td>
<td>YGL097W</td>
</tr>
<tr>
<td>YGL097W</td>
<td>YOR204W</td>
</tr>
<tr>
<td>YLR249W</td>
<td>YPR080W</td>
</tr>
<tr>
<td>YLR249W</td>
<td>YBR118W</td>
</tr>
<tr>
<td>YLR293C</td>
<td>YGL097W</td>
</tr>
<tr>
<td>YMR146C</td>
<td>YDR429C</td>
</tr>
<tr>
<td>YDR429C</td>
<td>YFL017C</td>
</tr>
<tr>
<td>YPR080W</td>
<td>YAL003W</td>
</tr>
<tr>
<td>YBR118W</td>
<td>YAL003W</td>
</tr>
<tr>
<td>YOL123W</td>
<td>YGL044C</td>
</tr>
<tr>
<td>YPL211W</td>
<td>YGR014W</td>
</tr>
<tr>
<td>YJL030W</td>
<td>YGL229C</td>
</tr>
<tr>
<td>YJL013C</td>
<td>YGL229C</td>
</tr>
<tr>
<td>YGL122C</td>
<td>YOL123W</td>
</tr>
<tr>
<td>YGR014W</td>
<td>YJL030W</td>
</tr>
<tr>
<td>YGR014W</td>
<td>YJL013C</td>
</tr>
<tr>
<td>YGR203W</td>
<td>YIL061C</td>
</tr>
<tr>
<td>YCR084C</td>
<td>YBR112C</td>
</tr>
<tr>
<td>YCR084C</td>
<td>YCL067C</td>
</tr>
</tbody>
</table>
</div>
<div></div>
<div>导入文本的方式如下：</div>
<div><a href="http://www.bio-info-trainee.com/wp-content/uploads/2016/11/15.png"><img class="alignnone size-full wp-image-2056" src="http://www.bio-info-trainee.com/wp-content/uploads/2016/11/15.png" alt="1" width="848" height="536" /></a></div>
<div><img src="file:///C:/Users/jimmy1314/AppData/Local/YNote/data/jmzeng1314@163.com/5dc2039034d641538c531dcb9a2cf8c3/clipboard.png" alt="" data-media-type="image" data-attr-org-src-id="FD876A7F42DC48919D465A2A2866B2B1" /></div>
<div>其实已经有了这两列信息，在R里面就可以自己画网络图了，或者在html网页里面写js来做。实在是没必要用cytoscape，这也就是为什么像我这样的大神，到现在才开始使用cytoscape的原因。即使用cytoscape生成了网络图，还需要进行一大堆的细节调整，很烦人的。</div>
<div></div>
<p>&nbsp;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.bio-info-trainee.com/2055.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
