<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.taichimd.us/index.php?action=history&amp;feed=atom&amp;title=File%3AFiltered_p.png</id>
	<title>File:Filtered p.png - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.taichimd.us/index.php?action=history&amp;feed=atom&amp;title=File%3AFiltered_p.png"/>
	<link rel="alternate" type="text/html" href="https://wiki.taichimd.us/index.php?title=File:Filtered_p.png&amp;action=history"/>
	<updated>2026-04-09T09:32:22Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://wiki.taichimd.us/index.php?title=File:Filtered_p.png&amp;diff=38976&amp;oldid=prev</id>
		<title>Brb: Note:

# x-axis &quot;p cutoff&quot; should be &quot;BH cutoff&quot; or &quot;FDR cutoff&quot;.
# Each curve represents theta (filtering threshold). For example, theta=.1 means 10% of genes are filtered out before we do multiple testing (or BH adjustment).
# It is seen the larger the theta, the more hypotheses are rejected at the same FDR cutoff. For example, 
#* if theta=0, 251 hypotheses are rejected at FDR=.1
#* if theta=.5, 355 hypotheses are rejected at FDR=.1.

&lt;syntaxhighlight lang=&#039;r&#039;&gt;
BiocManager::install(&quot;ALL&quot;)...</title>
		<link rel="alternate" type="text/html" href="https://wiki.taichimd.us/index.php?title=File:Filtered_p.png&amp;diff=38976&amp;oldid=prev"/>
		<updated>2024-03-12T01:35:00Z</updated>

		<summary type="html">&lt;p&gt;Note:  # x-axis &amp;quot;p cutoff&amp;quot; should be &amp;quot;BH cutoff&amp;quot; or &amp;quot;FDR cutoff&amp;quot;. # Each curve represents theta (filtering threshold). For example, theta=.1 means 10% of genes are filtered out before we do multiple testing (or BH adjustment). # It is seen the larger the theta, the more hypotheses are rejected at the same FDR cutoff. For example,  #* if theta=0, 251 hypotheses are rejected at FDR=.1 #* if theta=.5, 355 hypotheses are rejected at FDR=.1.  &amp;lt;syntaxhighlight lang=&amp;#039;r&amp;#039;&amp;gt; BiocManager::install(&amp;quot;ALL&amp;quot;)...&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;== Summary ==&lt;br /&gt;
Note:&lt;br /&gt;
&lt;br /&gt;
# x-axis &amp;quot;p cutoff&amp;quot; should be &amp;quot;BH cutoff&amp;quot; or &amp;quot;FDR cutoff&amp;quot;.&lt;br /&gt;
# Each curve represents theta (filtering threshold). For example, theta=.1 means 10% of genes are filtered out before we do multiple testing (or BH adjustment).&lt;br /&gt;
# It is seen the larger the theta, the more hypotheses are rejected at the same FDR cutoff. For example, &lt;br /&gt;
#* if theta=0, 251 hypotheses are rejected at FDR=.1&lt;br /&gt;
#* if theta=.5, 355 hypotheses are rejected at FDR=.1.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;syntaxhighlight lang=&amp;#039;r&amp;#039;&amp;gt;&lt;br /&gt;
BiocManager::install(&amp;quot;ALL&amp;quot;)&lt;br /&gt;
library(&amp;quot;ALL&amp;quot;)&lt;br /&gt;
library(genefilter)&lt;br /&gt;
&lt;br /&gt;
data(&amp;quot;ALL&amp;quot;)&lt;br /&gt;
bcell &amp;lt;- grep(&amp;quot;^B&amp;quot;, as.character(ALL$BT))&lt;br /&gt;
moltyp &amp;lt;- which(as.character(ALL$mol.biol) %in%&lt;br /&gt;
                    +                     c(&amp;quot;NEG&amp;quot;, &amp;quot;BCR/ABL&amp;quot;))&lt;br /&gt;
ALL_bcrneg &amp;lt;- ALL[, intersect(bcell, moltyp)]&lt;br /&gt;
ALL_bcrneg$mol.biol &amp;lt;- factor(ALL_bcrneg$mol.biol)&lt;br /&gt;
table(ALL_bcrneg$mol.biol)&lt;br /&gt;
&lt;br /&gt;
S2 &amp;lt;- rowVars(exprs(ALL_bcrneg))&lt;br /&gt;
p2 &amp;lt;- rowttests(ALL_bcrneg, &amp;quot;mol.biol&amp;quot;)$p.value&lt;br /&gt;
theta &amp;lt;- seq(0, .5, .1)&lt;br /&gt;
# The filtered_p function permits easy simultaneous calculation of unadjusted &lt;br /&gt;
#   or adjusted p-values over a range of filtering thresholds (θ).&lt;br /&gt;
p_bh &amp;lt;- filtered_p(S2, p2, theta, method=&amp;quot;BH&amp;quot;)&lt;br /&gt;
dim(p_bh)&lt;br /&gt;
# [1] 12625     6&lt;br /&gt;
head(p_bh) # adjusted p-values using the BH method&lt;br /&gt;
#             0%       10%       20%       30%       40%       50%&lt;br /&gt;
# [1,] 0.9185626 0.8943104 0.8624798 0.8278077        NA        NA&lt;br /&gt;
# [2,] 0.9585758 0.9460504 0.9304104 0.9059466 0.8874485 0.8709793&lt;br /&gt;
# [3,] 0.7022442        NA        NA        NA        NA        NA&lt;br /&gt;
# [4,] 0.9806216 0.9747555 0.9680574 0.9567131        NA        NA&lt;br /&gt;
# [5,] 0.9506087 0.9349386 0.9123998 0.8836386        NA        NA&lt;br /&gt;
# [6,] 0.6339004 0.5896890 0.5440851 0.4951371 0.4497915 0.4102711&lt;br /&gt;
&lt;br /&gt;
range(p.adjust(p2, &amp;quot;BH&amp;quot;) - p_bh[,1]) # 0 0&lt;br /&gt;
&lt;br /&gt;
apply(p_bh, 2, function(x) mean(is.na(x))) |&amp;gt; round(2)&lt;br /&gt;
#   0% 10% 20% 30% 40% 50% &lt;br /&gt;
#  0.0 0.1 0.2 0.3 0.4 0.5&lt;br /&gt;
&lt;br /&gt;
i &amp;lt;- order(S2, decreasing = T)&lt;br /&gt;
i2 &amp;lt;- sort(i[1:round(12625*.9)])&lt;br /&gt;
# [1] 1 2 4 5 6  # sort the 3rd gene is filtered out&lt;br /&gt;
tmp &amp;lt;- rep(NA, 12625); tmp[i2] &amp;lt;- p.adjust(p2[i2], &amp;quot;BH&amp;quot;)&lt;br /&gt;
range(tmp - p_bh[, 2], na.rm = TRUE) # 0 0&lt;br /&gt;
&lt;br /&gt;
i3 &amp;lt;- sort(i[1:round(12625*.8)]); i3[1:5]&lt;br /&gt;
# [1] 1 2 4 5 6&lt;br /&gt;
tmp &amp;lt;- rep(NA, 12625); tmp[i3] &amp;lt;- p.adjust(p2[i3], &amp;quot;BH&amp;quot;)&lt;br /&gt;
range(tmp - p_bh[, 3], na.rm = TRUE) # 0 0&lt;br /&gt;
&lt;br /&gt;
i4 &amp;lt;- sort(i[1:round(12625*.7)]); i4[1:5]&lt;br /&gt;
# [1] 1 2 4 5 6&lt;br /&gt;
tmp &amp;lt;- rep(NA, 12625); tmp[i4] &amp;lt;- p.adjust(p2[i4], &amp;quot;BH&amp;quot;)&lt;br /&gt;
range(tmp - p_bh[, 4], na.rm = TRUE) # [1] -6.533250e-05  9.570921e-05&lt;br /&gt;
&lt;br /&gt;
i5 &amp;lt;- sort(i[1:round(12625*.6)]); i5[1:5]&lt;br /&gt;
# [1]  2  6  8  9 10&lt;br /&gt;
tmp &amp;lt;- rep(NA, 12625); tmp[i5] &amp;lt;- p.adjust(p2[i5], &amp;quot;BH&amp;quot;)&lt;br /&gt;
range(tmp - p_bh[, 5], na.rm = TRUE) # 0 0&lt;br /&gt;
&lt;br /&gt;
i6 &amp;lt;- sort(i[1:round(12625*.5)]); i6[1:5]&lt;br /&gt;
# [1]  2  6  8  9 10&lt;br /&gt;
tmp &amp;lt;- rep(NA, 12625); tmp[i6] &amp;lt;- p.adjust(p2[i6], &amp;quot;BH&amp;quot;)&lt;br /&gt;
range(tmp - p_bh[, 6], na.rm = TRUE) # [1] -0.0000171919  0.0002731363&lt;br /&gt;
&lt;br /&gt;
# input p_bh is already FDR adjusted p-values&lt;br /&gt;
rejection_plot(p_bh, at=&amp;quot;sample&amp;quot;,&lt;br /&gt;
               xlim=c(0,.3), ylim=c(0,1000),&lt;br /&gt;
               main=&amp;quot;Benjamini &amp;amp; Hochberg adjustment&amp;quot;)&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,1] &amp;lt; x))&lt;br /&gt;
# 169 251 341 426 554 660&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,2] &amp;lt; x, na.rm=T))&lt;br /&gt;
# 178 272 363 459 602 723&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,3] &amp;lt; x, na.rm=T))&lt;br /&gt;
# 192 294 402 526 649 784&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,4] &amp;lt; x, na.rm=T))&lt;br /&gt;
# 201 308 439 590 717 876&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,5] &amp;lt; x, na.rm=T))&lt;br /&gt;
# 209 343 475 637 788 953&lt;br /&gt;
sapply(c(.05, .1, .15, .2, .25, .3), function(x) sum(p_bh[,6] &amp;lt; x, na.rm=T))&lt;br /&gt;
# 222 355 530 655 836 993&lt;br /&gt;
&lt;br /&gt;
abline(h=200, lty=2); abline(v=.05, lty=2)&lt;br /&gt;
abline(h=300, lty=2); abline(v=.1, lty=2)&lt;br /&gt;
abline(h=400, lty=2); abline(v=.15, lty=2)&lt;br /&gt;
abline(h=500, lty=2); abline(v=.2, lty=2)&lt;br /&gt;
abline(h=600, lty=2); abline(v=.25, lty=2)&lt;br /&gt;
abline(h=800, lty=2); abline(v=.3, lty=2)&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;/div&gt;</summary>
		<author><name>Brb</name></author>
	</entry>
</feed>