From beb47c11e9feaccc3979c3797053cfb594284d8d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 28 Jul 2021 19:49:11 +0000 Subject: [PATCH 1/3] add pbta-snv-scavenged-hotspots.maf.tsv.gz --- .../01-get_biospecimen_identifiers.R | 2 ++ .../create-subset-files/02-subset_files.R | 2 +- .../biospecimen_ids_for_subset.RDS | Bin 43575 -> 43566 bytes .../create_subset_files.sh | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/analyses/create-subset-files/01-get_biospecimen_identifiers.R b/analyses/create-subset-files/01-get_biospecimen_identifiers.R index f2db7e856d..8ffd113832 100644 --- a/analyses/create-subset-files/01-get_biospecimen_identifiers.R +++ b/analyses/create-subset-files/01-get_biospecimen_identifiers.R @@ -59,6 +59,8 @@ get_biospecimen_ids <- function(filename, id_mapping_df) { # not contain MAF version information if (grepl("consensus", filename)) { snv_file <- data.table::fread(filename, data.table = FALSE) + } else if (grepl("hotspots", filename)){ + snv_file <- data.table::fread(filename, data.table = FALSE) } else { snv_file <- data.table::fread(filename, skip = 1, # skip version string diff --git a/analyses/create-subset-files/02-subset_files.R b/analyses/create-subset-files/02-subset_files.R index 5f366b49dd..b5f7415772 100644 --- a/analyses/create-subset-files/02-subset_files.R +++ b/analyses/create-subset-files/02-subset_files.R @@ -69,7 +69,7 @@ subset_files <- function(filename, biospecimen_ids, output_directory) { # filtering strategy depends on the file type, mostly because how the sample # IDs change based on the file type -- that's why this logic is required if (grepl("pbta-snv", filename)) { - if (grepl("consensus-mutation", filename)) { + if (grepl("consensus-mutation|hotspots", filename)) { snv_file <- data.table::fread(filename, data.table = FALSE) snv_file %>% dplyr::filter(Tumor_Sample_Barcode %in% biospecimen_ids) %>% diff --git a/analyses/create-subset-files/biospecimen_ids_for_subset.RDS b/analyses/create-subset-files/biospecimen_ids_for_subset.RDS index 25d0c2a3c5f55b5820cbbe224b00de2760cbb992..947a3ee3e44491f3f2a432d381a91773bcb25317 100644 GIT binary patch literal 43566 zcmeHQTXWmS6;_kDGi_cn(?5{6K6EJKeo5O00Ev%3?2Cq$Xjfy2)R43$?vL*j zl1cf%Jhj&lAd)g>qQu^ahr5g2?>l!?e>^!k`Tpel-#7n$(fsk`#jm+D(N0dj`&09m zpPQebnxFLKhm$`v|NYU-&Nx+;3|et;$jE&`NDMd>1fE_5F&N@GQL-(qfI}9I3#)?S z`9v0^STwdC<$SXJ$ z$DWZ?P?pT61|PdN_$@`tWTnnBFr?~PZF{u{hk}$BQxWcj8mfNea&QYZqZZsyf>YIq zEcgMR09jhfle!Aw=4iB*+$@K(bS2}8#EifPbrMNQs=(PBfo>ME0Kd^xcwW&6{4KMj zd06HlkYy>9icT%Vp*k>h5-VVch(Y0osoxr^%|Z>}?1_@)R+zvoB)V%?l?NXOiPD6# zBe+IVkX$M%xH&c#iE$poW!rKZc&@~QN!Wy1hL;Lp$l=_mw0$^LN46`Gw$|uqfm9?R zJQI#(1OXM`yc~-do+@Zid>L!hkODB|imBh6X#$5_y%c@DhC@MI(5jB%Q0X|jEm~m6 z)0z`4@o?FGDl2J1;BN(9RvV@W*BGQy7|S-i3PI)iycfbV5xHKVrxZ>iiX7q?4m=Yv z@7GEdz%vnxu_KEG+=*BsS}kU+Ax#nNScl?ct&FtFhL1K;Q_ajdc-<3&D>bErA56;J z=B*N}j!?T)~8c942ZebCXL6p`K_+Y6= zx+Y0*_N7O2Nh{#4mVupFVg(;s83j2X8*o?4S`O;WgWp)mMZ`HJ2fnd#a>Z>1X2XJ@+OPwh@)5B^pHv_V zmaHnC2R3OB>@X}9u;M-w$ah9`E)!}lhX)h`dDBc(m%uAwdy*KN5>z6d9qwZw&xYu} z6G&nNF|j+ykBGj}lshB()513-p>Gtv_kh)js(Ms{y2`+s{NV7dDIr%w6|J+5J~S{F zZk5In)W@7B*UgcMLCz;fC*9`2z8CFL(RnZUB96CG($Ni3un4v-b=@A`qv9M|1o&71p zVl@KHX(cOsEkIvZXMajoSyZ%IK9ECovD9kgbrWo8JU$adSDaicB$1H#M+Dyoi`7WX)3K?^QE2&KSS=A=(G%#A zIPNegqd%q304I||9%o!ECO#zoJp?#i)sCrjq%qH@)xKenxieOCg{p*B4mbSiGh&{f z_AL}7;Ubu_!kGTTOH?yWXvwl=#2kh~z6p`uy<+tU%#Y1asp50Sw@sRFL^Mv!&(uWD-h#x(#9iq`zX8u7WpAU`u?M(B(0GQ;Eq9bZ3s%gUH zBHl_kJ}sbco-x45RAGc+^)3o4T2Lv51Sf&zrxbZ2mJ-aSS9fcNFyv$^?je$vRCVhT zV%0Zll{z*DPd1juhqxa)*TN`N5#3fT3{uj9Ri^BwO(cq2q(%vyI+%q`W4DM*81zO` z9gqu++{>aOZj}VEIsz;AU@GM3ewV&jq5+LuOgrjatrz4?$sGq%DUoVDq{0pa6LRzDQ{ z{`vDnyAKN8skK#dQh06v-Mqc>B~qG-6rJ|Sgb3|PVG;4b6l$b`>2VZN!bqW8M=3_3K^A7F0#EiH_|g^Qx-#KIOLhyT zM+)6O$G#{OmZ)M7f|#$JweUWBX1GG(=dHpt%*Z{Wo*6x+?DmRwh<>qaLtw@?Q=f}I zEe))xg-24=@uBjyhZ?$XpyFZ=z}&fAMXdXpW<$RUn+Dijw?o=hIHHvKF4UN@WL!Vz zQ@emueTXN0w7EoEw)i-L)l>+}BOgqw%=C2uCRHQtP#)R>#F1hh84qA)|Zi!3jLF!-gGVN6BNcmAu&}VWfJY31&-C>hAHfIkzCKzFt_y z&LQ+{9XaG%=;WR?6H`pR}15a;vr8@7>XHI zc?mUBHZN5eMk(c6{k7S=R8eB;d@Mneyw_YWK8t-xfeJ~VOw*lK4z%z0bLSA~SvvA~ z2$PpeN}SA7E$I73vVIWL1fvx;FI6FBvZ+CvhLKjfda3m0(5i_8n;;mLtt3GKOS7>; zd|9Mi-g+6vXswLa%BL@ihyz~p)K+lqh&ZGuH93(gm@oG9uag*>R($H@oB`K}1km>w zdFLg&2nCo`IW+lrNf}WK<|6d@5~P9sVyYyhUck4imX5lG?r6YlX$31Vu@$$BCD`c1 z?eoP&p|0xf!^CFOuo*C@wPF{6Xu*&@lBqK`bJ?DWgMX*R#~AD$Ipla=kvB9ubfyB1 zQ?-OZzZj!wGJ3o=p8KIwwVvMNB~`BNw%&cl#txl5UMOMgzSfSdl<|n%TOt*SDZw1t zZ>rhAzBmLb(L);@eWq$5B_kxt%)*7;X%3cY*xtO_)#HWvvUXpyIvO2Y1rswmdbBXu z2;&jvL|1m$EzUiRH=HV7P*prk@_T+%@aU3}4+R`!cbYRSxIA{J*(O?!-Dx&E@!;_t zFV1F1#*4F60Fd&Sv8~f~bHU~k;EWe%dp5?4v)u`-;CI_mX=I#{ad@xQNOQb6 z)kD=a!eSEVyUhfpf%*GkxsXyMZ>hbeb1Xw;Jk4ftv`Nz;mjgygXmbSF`!*-Rf*Ldvi9O&ll_IdUkPkK0lkjTQ63N zM&{{yb$j~eKO2$%c!bE`ns2x`d*2qQd`aNVtMznpKEJ*Dd3(7&ZD#s$Iy(EOw@Ymc z_}kYAST7e>ua~pgd3yNrpx8!t7%(+$ux#hcmu z@_sg(#}j{zq~+}V-Eujbudm)u8sW2xNh5Byn!I|ynqJ>r&F&_-k^A?rk-J(?mmenE zUS`B3sOOzs^A&3P4F|~ z&KDP#^EW`E!?a{v;_3DDb^G}I!Yu7-IzOMSPj6>8_fj6f>#xibnr&*%oUki`eMzhH z>FsR(rny!BUaVI)&Cl+V*wE*0gPO-byLvlCX2iaz+v)P+@*LS0iEUqQC)d;Y`V;I* znLc}eslhF|zu2#D-d;CH^Xh7P(}2&;h4CIIc}HHi`+Pss_09E;I5yn5TrFNNmUqi! z1~j&NxE1{Areah5^>I51o)LmVUsN*g3H<*afKx;y+YX>2@0O29%$aszyQiOdScvTK Pt7K!(t0na0<=hV?5mvNo!rXWX6-6&%VnSY5 zGeQ6@on}%Hl|C3Uwe}ZO23(Mtn^?Ca3O;2?6-y-vZjPlmiWy~)yyamvFLX%WPAsR= zC;%-|4nYrRS&FytcUd3gf&%hj7k377^y)fBd7sc>^VMK>K>c;Hi>PD(e; z;Z(iAN~6?;hvubCW`$A&zgRHM^g6rhRbHu9bq+q|D~?^NE}Xru%7o-$3_qo#xUm`q z4#gG1iUYaOFKkMc2=au_>I%EM1-=-#o*fyo35Sy02y_olBA}Vs*eYCOSTZd}L84j5(7pfA5LzvxbFlX(BQ4ySdA-R=mSSeUgVU$fl z8puqABNLQ#?qlI7nQw&fHxkV>sa3)KB04chqQGlK97(294apNHz(HfeWs{)D>(tP} zyhuW`Jo2IZA~}h)(13ei`beTwg$pVJtt|Z-a&#G&eqbPYJIcgtx7vW$sw|R7CK6&Wi!NJVMrc4rl%1q_WD$cZ4k zi~~aiU~KfAaR3Gf9qcSGNC1Uak|Bmm8%mYI7=k^l%MpTt=0(8DC@>_`Lt;A+$at_a z7(@057)OW>hlT=84>eVX_wvZii5CF{NTVf&((ahd2@X04+hK#>JlkQgkk>S(HV7J4 z2euumto@Bz11+-QwnIO;ulQxC*-#_xX*)DB!+s16_h*X^GIQffuArAnk<3g}DLko{ z>VJ9>fN?y|CV>uHDCp-|@=e*#1L$KLt7oY!D@T?wbX}LsX05N30(Z@Dz%cUItzzh7 z+o!YX*`mW>xv$xQ-ryLXr?4Hifv0iA9fGp(l~@JW%RlZ=#4b9-9zlc==mhTPMoOun zhJgYl=Sz)F47M;19Euq1GjbEJC>Z34o>1VI4n{>M$j~bk@nv*?;f!-Nu8kN z69lS5HL{2T4Zq>B>jT>kQRJ44LG4JyuD1Z&Gj_cT82!*;pHPqylqq=(Bgw}UyMA`) z(9g59C%F%!`J<5gQYl$Dh@swd=pe)31SQ8asn=1__8D)sQmu|91d|v>3k4!)Y2y|- zBDv2K@_yLpE`#|g2hLe4@+{FXgQ;4>)0jp91zBw{ra{;B@y45BhSMm}VYE;%id@wL z>-$2;KMx^4)-2S);u{r7;3rU(J#Zv>cod+Qa$hHjqoxgvi1rH`f7#CiH%UnJt+jsO z>I17NbtU#->b%h8w|oEWT>eks!O8X9uY71T^OXAo%Y8(NszlIp`S_YVVPTScCQKMS zawwp(jx8NJSsxAs9}Cc$hNhn0mQ(yzoAxsZ#i89oG-(XQV?C?8IHg>iA}vlu7N@X^ zQ$@8~AH~aU->rO#I0a0WK{S#>TPt>{r+O)@a^Xl7oY}hDxuH(fv49W3I84p{xXxqBi z#1{0vI2J;zOd)UdtXmvx3l6PFe1h`BP**LR!V-;OxBPH;t>JEP=FlaB{Sf-V#ejp& zuu+8D`x#gn@nl>;U4>J?*T}f7cub&t{k>Z6BZQV|Tf&P97(iDYJ4qTD zmLJmCid5Xd>)x2G(etpAI^UM&W@Uyq2>o|JbDsz;$C*($DNIl6nR%s3Nn#TSRXoYL z8)GrTB+ye6)_eAZ_QcKFa#i?6XE55ww$EG&(Ti7EeOLt`c2uQyT$t98WT8sk2r>dk z0Z5u6k&FN`=sp#Iec?D{ssrOVkM#}<+RqP;BPT+bLE$yrMBgVIOAXP26pBk;oqw)3 z6m_J^fk8WCnp8)o3&I!EirdtCC0?abp+k? z3ASWnFsN5>kVe4DRze-RaX~~f63WnhG#F{hN|M986F#Vt$V%F#mB39&ZC&%jPLmmd zN88i>1drgc@a!YiF>6hQ@fuEaZnTcRJ=#ZllgUYu!GtA)V$xtA78sgxl*@$!1F(;e zZ8ohW-(&DfI2kF4+nJZJwYFD+*Tt=JZCAn2MLgO*(Yv6SSlA0Nh3QNJ0k@~k%Lp5( zqfemL_Ru~-tta;C2`g$7oe&tZWl$c5a2^&=E4^s*qkytHT7O-yU=b%Wgu^y-*4LrR zcvMZH4TXgj)af2S+kdpF{ZP@)tHX0T$cl zO3cT_BdA0l6CUmJ1<^?HG8Dpcx^bjSNKu6=1eKiOzCUba4)S2_?>I~*iBMZc@TJZt zsQn$BXOXtgxgs5AJqSG7D)&$D^ypae$i#SdnReiL*--JvQGVw&n z#7Z?R*x>2!e@Ds!dg`Oz%#6dkUCJMS)`-z46=UtKQdlj(AK`Fh$V;Pjfr zhQQ7H_KYrXXOqSJeDQWRzr3H#=6JHNp|qTxzh5qA^Yzt-NlScoF=@%oR+HBsR@3X7 ztJ&QoH+28~HFQ_&>GHS9c9)5J5xJYgG=AO#$ z!t0;CDYV Date: Wed, 28 Jul 2021 18:15:56 -0400 Subject: [PATCH 2/3] Update 01-get_biospecimen_identifiers.R --- analyses/create-subset-files/01-get_biospecimen_identifiers.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/analyses/create-subset-files/01-get_biospecimen_identifiers.R b/analyses/create-subset-files/01-get_biospecimen_identifiers.R index 8ffd113832..65559133e8 100644 --- a/analyses/create-subset-files/01-get_biospecimen_identifiers.R +++ b/analyses/create-subset-files/01-get_biospecimen_identifiers.R @@ -57,9 +57,7 @@ get_biospecimen_ids <- function(filename, id_mapping_df) { # 'Tumor_Sample_Barcode' # if the files have consensus in the name, the first line of the file does # not contain MAF version information - if (grepl("consensus", filename)) { - snv_file <- data.table::fread(filename, data.table = FALSE) - } else if (grepl("hotspots", filename)){ + if (grepl("consensus|hotspots", filename)) { snv_file <- data.table::fread(filename, data.table = FALSE) } else { snv_file <- data.table::fread(filename, From 356cfac14499ac918f92c26a574718ff73e49385 Mon Sep 17 00:00:00 2001 From: Krutika Gaonkar <34580719+kgaonkar6@users.noreply.github.com> Date: Wed, 28 Jul 2021 18:57:48 -0400 Subject: [PATCH 3/3] add pbta-mb-pathology-subtypes.tsv --- analyses/create-subset-files/create_subset_files.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/analyses/create-subset-files/create_subset_files.sh b/analyses/create-subset-files/create_subset_files.sh index 4d2cd97dea..6bf64ad3b2 100755 --- a/analyses/create-subset-files/create_subset_files.sh +++ b/analyses/create-subset-files/create_subset_files.sh @@ -87,6 +87,9 @@ cp $FULL_DIRECTORY/pbta-mend* $SUBSET_DIRECTORY # fusion summary files cp $FULL_DIRECTORY/fusion_summary* $SUBSET_DIRECTORY +# MB pathology subtypes +cp $FULL_DIRECTORY/pbta-mb-pathology-subtypes.tsv $SUBSET_DIRECTORY + # if the md5sum.txt file already exists, get rid of it cd $SUBSET_DIRECTORY rm -f md5sum.txt