From cecd8ca89ba5004715ee656d7416cc2cdcc03da4 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 21 Feb 2021 21:22:22 -0800 Subject: [PATCH 001/162] bugfix useHdf5=FALSE addImputeWeights --- .DS_Store | Bin 14340 -> 14340 bytes R/Imputation.R | 6 ++---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.DS_Store b/.DS_Store index 2b21082f6917b33f76b7b6087a8e67597128a1da..6465b807d491c2119319c1fc58709491def719fd 100644 GIT binary patch delta 964 zcmaLVTSyd97zgn0zp0(^q@Fc(bxcRow9Il%i!P*=CU(yihAg!$6kbPL^U(J2thRu=R4nbIN$l;H+{jr;EgP2 z{vB`7r-#CB_ggqdB_t+|W>PZeDRR@orbUaF7}w=YNTlvPyEs;Q5r31|A)49<ERwx#8@<4IMaB( zTp)=-wqEswwWR@7-=b@(eIn<}#f+`9>h+zPxvF-Xl2Vzsgv^J9hs7FCF6X>utL|4t zDE^eYlFO@o99YH z(1|d*(2WTCu@C!k07r2K=Wreua0OR!4cGAyPcejNc!en5<0FRg!x*Sg+&|4hK`VWs z?x5Ef@De$W{as(N^(XyXKU0$4q{hfIvs?ue%BEJ$oIBr`SG=5==DFkYxMG|rE{Wvk z$l1zxrI?8!CRo%;d8snV(igB(rBs>1m~cd`dlhbli&3_+l~zyZibxcyNo1w7W^*nQ zY;h~CsWnfNY*)K)18F+K8fpBI>cA3q%j~&FluzYx6K?&N<)t_@DXxdpmkN z26LPkHr~42A{!?!uU%2c>W#hb@kYbUSw@w6f$`PjDr(W&V$mjjmnK2%w|kVL$^Me^ zibcVcLvm&_H_I`$dA-REx~fGx^)0#<7oE0!VskVU(Nfux)8WmP<(96nzO75|+$s44 zAw5_i7mCt#VUrr_)aqlZIjLG5xj>#I2pi3MFs|{pYF$%2MP|(jQ$|P-t;+Nnj5Tl7 zqpFk;m!?alWlUb%9tvyik}3+;sk!r*ysle|C-hiUs>>7?Rx-JzEf$Gz%?T%F@Upp$ zaW~tWG(Wh88bMfZYFMnQnkx7Lp;j^g$(Ig9)~2E_P?{+Sluxs%npRRP#i@r5 z({bvjA-X{~={`NC6uqSP^oc&x7y3#+U_%zh!43z!$VFZf3Z`K;=Aaw_)S>~au?CIU zh*pHr1{Dz`(1mVn$6oBm0UX339KmUv#W|eE6kj#Ecf7Eh|IXzepW`WMvNYDZXi1PUiA+0+e-TXn7s5yMl-_ZC zKS3DF(YZLbB2KIX6{zCGf~ZCfR&i!cXvSJ>=G;P@+*ZyliWqjFawmGQ8+*`~!FL!( yaSSJL5~pwh7jX%f(T^eAz%AUyFz(<1p5QrN;k7Z~nt%`2x^w!(YgvUqJih_r|Jz&u diff --git a/R/Imputation.R b/R/Imputation.R index 863e292a..de20a719 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -109,10 +109,9 @@ addImputeWeights <- function( }else{ weightFiles <- file.path(getOutputDirectory(ArchRProj), "ImputeWeights", paste0("Impute-Weights-Rep-", seq_len(nRep))) } + o <- suppressWarnings(file.remove(weightFiles)) } - o <- suppressWarnings(file.remove(weightFiles)) - weightList <- .safelapply(seq_len(nRep), function(y){ .logDiffTime(sprintf("Computing Partial Diffusion Matrix with Magic (%s of %s)", y, nRep), t1 = tstart, verbose = FALSE, logFile = logFile) @@ -124,9 +123,8 @@ addImputeWeights <- function( blocks <- list(rownames(matDR)) } - weightFile <- weightFiles[y] - if(useHdf5){ + weightFile <- weightFiles[y] o <- h5createFile(weightFile) } From 716abd879efb8b4948328b0f089f6d8bca6e1be0 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 21 Feb 2021 21:31:06 -0800 Subject: [PATCH 002/162] bugfix imputation --- R/Imputation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Imputation.R b/R/Imputation.R index de20a719..fad0239c 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -170,8 +170,8 @@ addImputeWeights <- function( for(i in seq_len(td)){ Wt <- Wt %*% W } - rownames(Wt) <- rownames(matDR)[ix] - colnames(Wt) <- rownames(matDR)[ix] + rownames(Wt) <- ix + colnames(Wt) <- ix rm(knnIdx) rm(knnDist) From 35a89aa9931f8acfdefa40ee160bf98a427448bd Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Mon, 22 Feb 2021 22:23:19 -0800 Subject: [PATCH 003/162] trim edge cases tssFeatures --- .DS_Store | Bin 14340 -> 14340 bytes R/CreateArrow.R | 3 +++ 2 files changed, 3 insertions(+) diff --git a/.DS_Store b/.DS_Store index 6465b807d491c2119319c1fc58709491def719fd..dfc4f20f6fd40dc1e8150396079621480fd782b3 100644 GIT binary patch delta 901 zcma*lT}TvB6bJD0-%4iK>pH8euH*QXskvIFsbP{@YnEwNreqo6t~0@+?y9Sn87U+f zeLyhuNkkS!QK6tIWz<6iCeceq6cHGGJSJ2R5>$6~_aS;}9_F5V&b^m2^SguXgYBa+ zR!r%0Q8tl<%bP2=R8=#5z!t67tXQd++MN37jA)xBR!K>7{VS| z+E$-x>{J64IZ<9F2#rSG*{Sij8aO6BLuPe;&mm2631T3VmCaaPi^r=`|GJQNV=>$2?q36w|WR839PNquyR&eJfB(*#Y@6M9aw^p-x- zSDL49^qm%9f*IkkAOd#8Asz}ck%!gDM;R(ngYDRXS~Q{wZZtzh8~o@-4-Q}e$8a1c za1v*58N;}WYq*2E)fmAj9%CBMFoRjV!h3wdJbvI;uul_$Bh`e)(171O0M)kod)nQt zJ~xr+N(f)sWa$5kZ?n9y82={979A6toR+E2$q9OsV~xOpDq-oA=OwH*EqVx z!pa1$D1`+M+TdhNBICM=e+ilYpJ;|&(g)7-6NE5M6UA9lI7tqQP|8W1C`Sdhag?2? zLp}C!7#F8$;V@qKa1g!d!(kl3P>AOg&f**{;36*JI&R=5ZrO1g XHQwQqJ`$CL&tp9?L*ke@c_H=>@|n&< delta 929 zcmaLVTSyd97zgn0zg{}iDLw0Lbu35oZkAhGWRds6QZvQKtSqKTUf%vBO+%pX*TC`uYMY5jkF)u;pv}td&|=T&zu!hNEor(lYZ3 zii%4r1JS}76C205Hk6jy-fow#SEMr*h!NIko+778VvKE5+kDQt zPSv&B~iW~)ohcME%PR?Yrj*pv0&qc`J6ZHadoJ|8~T-f370py z+Z;}}Pzxny-dnbu%bR18>=-5-Oem)V0=0b(?t>{LPPsg*p`OULOPU7@=)M&mR=&*?S2 zqZ#@@AL$c)rY`_yV-Dsb3N|D{Zb33~umnp{h*DIe4)xfG2DD%Y+Tg%0xX}e4y3qqa z25=O|a2#iF8CP))*D;J+xQ!7!#xqP}3a=2rG-mJ--?hO4#r{Jd6x8VU_IMobP6v_Y z?7z(w(>(WY^LR;m6BUGxPq3wC<}EH_3`nOBM~(8-ueM);sn3=G diff --git a/R/CreateArrow.R b/R/CreateArrow.R index d793bc69..35b9e4ea 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -854,6 +854,9 @@ createArrowFiles <- function( ) tssFlank$type <- "flank" tssFeatures <- c(tssWindow, tssFlank) + + #Trim In Case Extending beyond Chromosomes + tssFeatures <- GenomicRanges::trim(tssFeatures) #.logThis(tssFeatures, paste0(prefix, " tssFeatures"), logFile = logFile) #Counting From 3187af46f6586c496c4448c040f66fd189f0caa0 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Tue, 23 Feb 2021 22:37:41 -0800 Subject: [PATCH 004/162] bugfix handling low cells when subsetting keep 1 range regardless if its from subsetted cells because it will keep stability --- .DS_Store | Bin 14340 -> 14340 bytes R/ArrowUtils.R | 21 ++++++++++----------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.DS_Store b/.DS_Store index dfc4f20f6fd40dc1e8150396079621480fd782b3..509c71c009ad455434a55497701d856e1daa513a 100644 GIT binary patch delta 878 zcmZ{iOH5Ni6o%(7uX|;HX@M3l1&ZYrAW}h~fDbAL5hNM|0>XxRFXl?2NDC-YD^kP8 zMDhCAm|!#}%0f{PR$w&h!i7pCF(SIsg&Q^+5@UP}Z5J+N7Bgqg$;_O5-}rm|y^}_L z>ZG&RrFy(J+Y)qP;SrHhg18}?#z?h$YU`*rW3p?5db_4eDo%(snKQET3b&MRuU?E} zdP95yrP@SBo^M1H1Bs+$NowhLs>k|O?+IoVghd0>$I&#&BC0g3z{>ukKU$EC;U1&XH)NM;%gFhY-9tzkt%lF11Hyp3qaa%~Wb~bvP9llZ!C0?-b0Nbw668wkYz;{ ztRyt7Rk^{av8SV35gCX;Qb{(klPc0kI*FSMlk;SpOp`e>PhOI@&&(Nq&<* zAP`|d0uo_FIxN@-D{Lr71uC%{`_WW~X0)IcP8@-Xqv(PcLpX_3IE_(Uz!)w;!*$%o z9o)qPrZ9~e%;E)J;|&(@0n7M;6@0^Y?OI{FZI!nRa=_&o=ykf>PC|57{_oZtaGR~o zn#w|Qi{jtM29+inO_r>@O~v+YJ2gk9pjnoDSzz+{m{1gjNvtH`CX@L+WZbqHJyiwstG(Hc>&8xa{<7dwaUTc7#@ zSt4J_Ds+fK45uw|(z%>+DXLJzDc7MM4LHa#A3_`2q5Ok-0E0M=VVs2@=Q!|-9Jq$d zxQcPyz)jrZ;P2r+9+cuCCqIXIJi${u!*jgCJ1hr3hYq3dvNyDb7JQuY8%M>lGsdZC H%Ma6EEcL~Q delta 918 zcmaKqT}V@57{{OgOgpE&>Y2?sk4rZ+OHKU9EVFEDWtpaZEV2)x&2xf7b6aj&W@M0H zR9+qWBqEEVs7TO^GUy_LlIW&IAkjtiaamAZNKn1o3<;w5;ylm!Kj(eV^Sr-*U#KrM zWHIHA`9gjz94RlKfFWr{a>`6WoMmLG^4f-VjZI9?bGY?V^Af$wVK=8`*&K6=%c^SX z+|hK&l)=o*q9fDVyIoV%Kt$W4sU4!pa8&dJyzOcJnS;uSepE)awrG2Pvv@~09%rf{CjJYnsiDtQGlu9Pfb%(GVu zC{kGLO-SVxOy1)6`c%K9h(i3UMb%8++NE}cwO~MMoD!GTGP$)a*xt^24x6xm%du%> zr@T^-o(j8SKX3AeBP}5{Fx^(YAZ&|OT#BM9LcVhm#aF&_J_(78NMBxJ?VUrFw2GQ& zH+4`C4bWK{q!AjWd-Rx|Q zHewTYU^jedgMxO1(TOhX#}OREah$+OoWey6;xew_7H(^}gCRV^Q#`{sqIiY3_=GR` zj-QE+4M;?4o{Ccezjy-LpEn=>t0_PIr+ zm5Z0TbXP%(K52IK8VhZV!xp1C*|N2`&}Nei+y~qlIjqc9A#$WrQvB6c#i|9)PD)Nx zYgnzw@k%oi)jH-BI7BHWe$YxcV-m&OCjLz$K2H9h**pP#b&2#JDC6{!-f Date: Tue, 23 Feb 2021 22:41:26 -0800 Subject: [PATCH 005/162] update --- .DS_Store | Bin 14340 -> 14340 bytes DESCRIPTION | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.DS_Store b/.DS_Store index 509c71c009ad455434a55497701d856e1daa513a..6c7721e28182e25bebe48ac72f527b5f8b934978 100644 GIT binary patch delta 14 VcmZoEXeromRDjW9^DzNCbpS5}1(g5* delta 14 VcmZoEXeromRDjWZ^DzNCbpS5@1(W~) diff --git a/DESCRIPTION b/DESCRIPTION index 40311f0c..0f411743 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ArchR Type: Package -Date: 2020-11-23 +Date: 2021-02-23 Title: Analyzing single-cell regulatory chromatin in R. -Version: 1.0.1 +Version: 1.0.2 Authors@R: c( person("Jeffrey", "Granja", email = "jgranja.stanford@gmail.com", role = c("aut","cre")), person("Ryan", "Corces", role = "aut")) From af56a4924314bbf43d2fb06dda35cda56d2b4eca Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 15:47:33 -0800 Subject: [PATCH 006/162] maxDist coAccessibilty is 1/2 the size it should be. --- .DS_Store | Bin 14340 -> 14340 bytes R/IntegrativeAnalysis.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 6c7721e28182e25bebe48ac72f527b5f8b934978..9dba1b5a5ff21095dee9588709f821d9a0e5aa02 100644 GIT binary patch delta 865 zcma))OH30{7=`ZvfjeOWQ=l;AHI^1j4WI%Mhytxec@;4N5rv?I8O;Rxpe;V|Q4J)< zD65GE(Va0q>cT|C_+A(YF~)_|g)Uqe6E_-7bYooTofZ?8-o+&Uf9}mW`OZuQrUG{t z1k`Ga!l5*=wXLG};d^^LArreMh8bNE~h zMlrHal@(uDJ*X-{&X6?8_4qtqC4NOTi3L=!(7y2@b!=P>AB$xPhRmX3kspvfVWrbA z>#txwI=u&+#M!pSDuIl)7FLM%#RQ_fd%LWt`bQGF3P;kAjcrh?4% zj(Uca(Zp&qUq^)wZqH#Q7*hSd#BvMYKn*D8`iA{pFAE-uTQ@mGaZtB*dqUx^fZ}7Z zmYBaDxS7S;t7}eK)i-EG(OhV^SrQzRhdv58kw5P zI!mbATudeUPsOc*uH7_d{3r33WSM+qw10yz#_7mrl+BEC1!@^#Cz`PxJJ5w)jBz)5 z8C5 zW0rSB%T@cTs+lorbsI&N8e_$pW{I^bjx5*a?M1sP=Hs}@9B*OF7<0ISLy{C0l5Dcv z)Yqn;=+gtIc?PBPW^RgOsdAdcpVC&<6VPgXs@|?^YS@ue0W?PVhP?AEN z#7#;`J!vOCGDt3w5i&uh$TWFP-jgrnD_JDVnAU%v0v_ks_Yd=on6b5k~!?+;eUJ-B&T*nQJU=+7;M?ii6+e19Y Z69IV&(|C>-c!?Rj!3QjiA4r^Z`~ll4!7=~< diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 543d1eff..a34ae4f0 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -760,7 +760,7 @@ addCoAccessibility <- function( #Create Ranges peakSummits <- resize(peakSet, 1, "center") - peakWindows <- resize(peakSummits, maxDist, "center") + peakWindows <- resize(peakSummits, 2*maxDist + 1, "center") #Create Pairwise Things to Test o <- DataFrame(findOverlaps(peakSummits, peakWindows, ignore.strand = TRUE)) From fe4810162e293d98101c277f4cb5ff434280829f Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:46:03 -0800 Subject: [PATCH 007/162] fix NA's in bam/tabix and max/minFragSize --- .DS_Store | Bin 14340 -> 14340 bytes R/CreateArrow.R | 47 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/.DS_Store b/.DS_Store index 9dba1b5a5ff21095dee9588709f821d9a0e5aa02..3d51ef3aac9b36926a8d98f5adec91e6f926f1c5 100644 GIT binary patch delta 878 zcmaKpOGs2v7{|}wGS2O!p7Aw$y*{SV85zxImRk83OU!5GMD{X9?+u1@W^_hDDQ2P2 z!)SA(q6n)+kwyijR8)%=rnCrZ2!dL)aTByDD%#kYJ5nvGvpAgp_vd`y$Nx9(ANSua zFi9*x%TMMCvsB)%aU^>K0^y=;Sk*DK`;6+h#-bA zj1Z3FBu-%hXK@)Uwz^oXUZU4rc zB1v!Kqij=B%{h6FqVmcuPQ5<2g9^AAN+!$JCmps>M!KA4S*s{Kk|7O8q|L4{4#8P6 zWPx3{F~W=dV`mrt24o%`{C}|hkb0J z5K*0m!akrwM3fzZ%)aXog+z47?NEmf5p*#M>JlC5ommfFo5L{A^PiXJncr`wgVVt~ z>n#FXEw#0`buf=E+ueF?MxDNtWwrNfBLRPpHmQmbW>d3r?M0=PHT8||Sej@_XBjM0 zzgj9q_h^dhk7)ZfH6&OPri5O<*Qc7VNH!^liVoU8F|3VGXpzIQbkUNQmoKqF#T!w( z0*Y};5${pKObYTPX~0O_A(h`1O5>B9Qi&jXX>XXX67Q6;3PB7Dy`!2xs>Ek)WYttu zguXHFusRlRZDMs)>=bq#P(xuY;Ey*qvj%EGnb1EH@cH=SVRLkoOOl3+(H?I&(j8R& ze60mO-&nYXueICAnia)ZiaN;hD6wmM+3wVb`#PA-U@D)q=rvJ6c`YWrG zJC|GATH9IYO6T%Q=O0oH(bkMLnOSR#O3JHhw>0a`h22!LW#{DPF= start(tileChromSizes[x]),] @@ -1622,13 +1636,18 @@ createArrowFiles <- function( .logThis(unique(dt$V4), name = paste0(prefix, " .bamToTmp Barcodes-Chunk-(",x," of ",length(tileChromSizes),")-", tileChromSizes[x]), logFile = logFile) } + #No NAs + dt <- dt[!is.na(dt$RG), , drop=FALSE] + dt <- dt[!is.na(dt$start), , drop=FALSE] + dt <- dt[!is.na(dt$end), , drop=FALSE] + #Care for Break Points - dt <- dt[dt$start >= start(tileChromSizes[x]),] - dt <- dt[dt$end - dt$start >= 10, ] #Minimum Fragment Size + dt <- dt[dt$start >= start(tileChromSizes[x]),, drop=FALSE] + dt <- dt[dt$end - dt$start >= 10, , drop=FALSE] #Minimum Fragment Size #Check for valid barcodes if(!is.null(validBC)){ - dt <- dt[dt$RG %in% validBC, ] + dt <- dt[dt$RG %in% validBC, , drop=FALSE] } if(all(!is.null(dt), nrow(dt) > 0)){ @@ -1790,8 +1809,10 @@ createArrowFiles <- function( outArrow = NULL, genome = NULL, chromSizes = NULL, - minFrags = 500, - maxFrags = 100000, + minFrags = 1000, + maxFrags = 100000, + minFragSize = 10, + maxFragSize = 2000, sampleName = NULL, verbose = TRUE, tstart = NULL, @@ -1922,6 +1943,12 @@ createArrowFiles <- function( #Order RG RLE based on bcPass fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] + + #Check if Fragments are greater than minFragSize and smaller than maxFragSize + fragments <- fragments[width(fragments) >= minFragSize] + fragments <- fragments[width(fragments) <= maxFragSize] + + #Length of BC lengthRG <- length(mcols(fragments)$RG@lengths) if(x == 1){ @@ -2002,6 +2029,12 @@ createArrowFiles <- function( #Order RG RLE based on bcPass fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] + + #Check if Fragments are greater than minFragSize and smaller than maxFragSize + fragments <- fragments[width(fragments) >= minFragSize] + fragments <- fragments[width(fragments) <= maxFragSize] + + #Length of BC lengthRG <- length(mcols(fragments)$RG@lengths) if(x == 1){ From 49f87b77731545ef7f3e8b196d5088f096e15ae9 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:47:03 -0800 Subject: [PATCH 008/162] Update createArrowFiles.Rd --- man/createArrowFiles.Rd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index 14648b84..e06f9ff5 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -14,6 +14,8 @@ createArrowFiles( minTSS = 4, minFrags = 1000, maxFrags = 1e+05, + minFragSize = 10, + maxFragSize = 2000, QCDir = "QualityControl", nucLength = 147, promoterRegion = c(2000, 100), @@ -69,6 +71,10 @@ Cells containing greater than or equal to \code{minFrags} total fragments wll be \item{maxFrags}{The maximum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. Cells containing greater than or equal to \code{maxFrags} total fragments wll be retained.} +\item{minFragSize}{The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} + +\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} + \item{QCDir}{The relative path to the output directory for QC-level information and plots for each sample/ArrowFile.} \item{nucLength}{The length in basepairs that wraps around a nucleosome. This number is used for identifying fragments as From c08c9131b2e818764a463e7f3a83a203480cc02c Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:47:39 -0800 Subject: [PATCH 009/162] documentation --- R/CreateArrow.R | 4 ++-- man/createArrowFiles.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index e15beb59..f5c637bf 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -22,7 +22,7 @@ #' @param maxFrags The maximum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. #' Cells containing greater than or equal to `maxFrags` total fragments wll be retained. #' @param minFragSize The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize. -#' @param maxFragSize The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize. +#' @param maxFragSize The maximum fragment size to be included into Arrow File. Fragments above than this number are discarded. Must be greater than maxFragSize. #' @param QCDir The relative path to the output directory for QC-level information and plots for each sample/ArrowFile. #' @param nucLength The length in basepairs that wraps around a nucleosome. This number is used for identifying fragments as #' sub-nucleosome-spanning, mono-nucleosome-spanning, or multi-nucleosome-spanning. @@ -1261,7 +1261,7 @@ createArrowFiles <- function( dt <- dt[!is.na(dt$RG), , drop=FALSE] dt <- dt[!is.na(dt$start), , drop=FALSE] dt <- dt[!is.na(dt$end), , drop=FALSE] - + #Care for Break Points dt <- dt[dt$V2 >= start(tileChromSizes[x]),] diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index e06f9ff5..4b31cfa2 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -73,7 +73,7 @@ Cells containing greater than or equal to \code{maxFrags} total fragments wll be \item{minFragSize}{The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} -\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} +\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments above than this number are discarded. Must be greater than maxFragSize.} \item{QCDir}{The relative path to the output directory for QC-level information and plots for each sample/ArrowFile.} From 9f008ad2da9fd0a273ac5927c41eb4b0db4ff32a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 10 Mar 2021 14:29:01 -0800 Subject: [PATCH 010/162] improve offsetPlus and offsetMinus description offsetPlus and offsetMinus only apply when input is a bam file. --- R/CreateArrow.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index f5c637bf..fae3f845 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -45,8 +45,10 @@ #' @param bamFlag A vector of bam flags to be used for reading in fragments from input bam files. Should be in the format of a #' `scanBamFlag` passed to `ScanBam` in Rsamtools. #' @param offsetPlus The numeric offset to apply to a "+" stranded Tn5 insertion to account for the precise Tn5 binding site. +#' This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. #' See Buenrostro et al. Nature Methods 2013. #' @param offsetMinus The numeric offset to apply to a "-" stranded Tn5 insertion to account for the precise Tn5 binding site. +#' This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. #' See Buenrostro et al. Nature Methods 2013. #' @param addTileMat A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. A Tile Matrix is a counts matrix that, #' instead of using peaks, uses a fixed-width sliding window of bins across the whole genome. This matrix can be used in many downstream ArchR operations. From 921d8dbfc986cf8838f7219af383758bc20d74a4 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 21:04:30 -0700 Subject: [PATCH 011/162] check for peakset matched in ArchRProj for addBgdPeaks --- .DS_Store | Bin 14340 -> 14340 bytes R/MatrixDeviations.R | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/.DS_Store b/.DS_Store index 3d51ef3aac9b36926a8d98f5adec91e6f926f1c5..22ad7da9874ee5b18c6c49e75e81f542cc5dd9d8 100644 GIT binary patch delta 955 zcmaKqTSyd97{|Z=nl_V19d*@p9CO9e(iL~j3ubv~%kDQ@$u5Mu%>+a1uDNQNl|4vB zB?LoPqO#~lMS*2>p&lYAiC)@>A|j&8V?y;1L3L(#A9|3^!}-4R|IRsl=luQy9RnRV zY9-@h=76=*S?REsx^_D3eJ)ou&t1-Pqdu$30c!S#dfYyL&_`ss=j-t6p)RNM4J>gJ z;u9tc;-o~DBrjOFXz>zeogqCKkQT?pnYB!A-56|d=RJo~;o!3Q8skBlt=LR>4^4uw#_XX{ zRW(&8XlQ#+<|t^G7Kv0qWXvwH_2*JK&7~#OMxE40gLI6B=q8QOZF)q{C`@nYBYmYY z`bOXBCoD)r5|WVu8?uoD1=CTE8K^)57GfDzU?rNd4sGyZBUH2_gl_a;D|TZa_TvB! z;xJBO2xoB)S8+|pb=<&1Ji$|pB8-=Khc6hz5B!eyX+bp7vRI4-{NYK|2OTjVoEhH- z%6!uZ|N6k1{yN$GrgUpYW|n>G^wRR`S&c^Hl;r}4oulNkJno4?BQGPpKe13@9J3hC z$(C)!g^D6sxC6{>tW0r<9HtZ(St=E*O5oI__-M6;)ruUfG$C58V+{hwCnZEKn&W0n zBGYH$_-LY!^aPA~CDMNojnWJH!14S7A%UYw=UDPNk}}kwo+EK%E}F2MLsh9S9pt0 U#-Hpwd>#&D?h%I*?LV{r0@{|^SO5S3 delta 989 zcmaLVZA?>F7zgm@Uj*;*D4wFwdZ|#tn_{Cao!}cNbR)buCbCTD%WS!KNwd;cT8MzP zB=|a(CCi<EyXxSX{cYZ3{j*vdV1EeCoA z28RwOj(lm{bQZhEhzQgfiuL;fp-6zpG9HMAg0Z;UeGitb1=%?Z6}E`;C2Dg^b8B0> zF|Ottf7x8dA*;_QDOqM?>|(CXQBq#D?v0K0zSL4-&0DsdbEDN>l^kI#ov^P^)m?po z;K9CNd_a^cN-9rSm+~Uj$;70xSM$g9?GY{bVNlo76}3n$Qbib|Dt z#e##n@GIuUYh0XneH099B9{4-+r!m&L;ips5}KDO>2vFAxcXkd-Wv->!lEsc+_agi zySgJiJ$g7Ev%)3Qb4?k4=Gl{T3)w@HqU@gQp+VDhO(~U=Ngu)jwy-R3rT z0;JGNa#JI<(te840FBZaI#0jQBu&v3x%P4C$dq~I-0RQ{r}dEv~|Vn*7=Ox%{77Z3vABvsj*d5(n_8^dv^pr zG$yjm>L$L0Nlmf9tZwBEiZl~BX7x?p#H^MPCTKp7q$x5V$28rdel1yR>m+&io!)08>-$ Date: Sun, 28 Mar 2021 21:47:38 -0700 Subject: [PATCH 012/162] add check for default assay in seurat object. --- .DS_Store | Bin 14340 -> 14340 bytes R/RNAIntegration.R | 6 ++++++ 2 files changed, 6 insertions(+) diff --git a/.DS_Store b/.DS_Store index 22ad7da9874ee5b18c6c49e75e81f542cc5dd9d8..e10ad6b0d5facb516d5c7c93bb0ef0c107544f6c 100644 GIT binary patch delta 739 zcmZvZTSyd90EYM9b=Py+i9MsG$6azltxYu7QpyrvCoVasWs(8OIOBx`*}3i3+IB4p6er?uQkwNyiyK?le5 zgleDgO;R0UDhi9RjU*!cPwA#_NsvScD3KEFD#mI!3__5;8=$s zHlPxl(Tok%{L+#T)0b;!;zrT;jVn~ha|0kZZ$5F4>oKx;z>c48O$u}5Ir zi?EF?f+)ri6Y!EaifJ6j37o_^oW})$?~=ebFYw*KP29m<+{1l5z+*hcGdveSUz(Rg a#d7Z-V7VtgFPtNQ#magSmCa6@K8HA0Y#29lCk}6=1LR!u%WcHZsQG*F>drZ~$>YAE%csXBANS~$qhc)q5Q`R-^6`TeP z{gB3jL{2N7YDxn`dQ@eGoKd{Bly`@NAuY^QnWPGn;@rA=%6mq&VMC8aS(~K_v9XEr z-oaQTqD2#i1Jy#dLUA8&CTy1UTR(3P8i~%h7X25ig^=Brvq@DomAHHZpC#*Y`BtW< z;eMa4-w^05#FaH14tP*90ASc^LN(25T1z)p0b4+98c z5Go=tFoIDWz%iV}DV)X`oWm7d#|_-XeLTQJOyW6a@d~ez!aIDzH+;tr{K}-TVJ=Na zW;Xl{hu`80Qdig*jfcXqkR;iz{l8RYi`DA9QeT)?tK7-5Qtn*3Oi`GHilnU7?pCNM zQ%)~cZsFO4BbqOxHA<~4Dq~scRjH%(M3lpFGHwHHl0|(iJL7JkJ|aqDx#^3V{gg7v zVe6LVd9>~GaGUErP661vuM#zAz(xVnkIiU7r@*-j0d%8ZAPowrLjq|OF&xAg4&w-p z;=~--vpA0nxQI)*j9a*kJGhH`c!Vc-ifPQ?8D8QI-s2;_OzGDjewO?J DuB@`r diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index 8e11b27e..fee11668 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -203,6 +203,12 @@ addGeneIntegrationMatrix <- function( seuratRNA$Group <- paste0(seRNA@meta.data[,groupRNA]) rm(seRNA) } + + if("RNA" %in% names(seuratRNA@assays)){ + DefaultAssay(seuratRNA) <- "RNA" + }else{ + stop("'RNA' is not present in Seurat Object's Assays! Please make sure that this assay is present!") + } gc() if(!is.null(groupRNA)){ From 66d89887e297d9ce4bdf127816d4d3265a088bc7 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 21:57:18 -0700 Subject: [PATCH 013/162] update check in iterativeLSI to 50 cells to bug handle. Additionally print checks if the error actually occurs. --- .DS_Store | Bin 14340 -> 14340 bytes R/IterativeLSI.R | 8 +++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index e10ad6b0d5facb516d5c7c93bb0ef0c107544f6c..df3c3db35c59395b878b3cb1c962fe9933022de8 100644 GIT binary patch delta 792 zcmaKpOGs2v9LCRQ56^{@o*7@`@%k7aV~t{_spHtgN0@113nw+D7`xZFsWaw`R#J&! zviRb z=Uy5aoR`m7bGPBscwlK&;W{QfJ>E{ehiiKj8tyA8Wuj$J?++M$A8%L^-4#rb_vWf_y25juMrkn)2s#cWC)`Zy?yzr~Ce%RZD4G$f?paT{FL0ZNmx&p(K?!h>KK` zX3|CcWQ3d|<7A4=kXiDAydfXSC$c~m$xrf&{04ynE8>xWM5H4FnaDyR%Hc)@wqgf7 zXhAF5(20F8upiwBVhBfY6vuE9r*RfzFmVyra2+?&Fo8)-;Q^-c6tD0ab9j#r_>2X7 z!}l?fZa#}!6Rn10bC^w{Y$>;5YfL1ogv6wb9A`nXYh$%pmEA;TSSe~MOE<@zj^UJK zVY6qdDi1NC^P$p_t1=Fmn-VVba-|z3y-*)3RErdu4qp~7)e=@lxu)fsmZB21 zl4m^{(1@Lq=q|LQ1NuKy2XPRGFoNS4#R;fTCQTY&LM%m?|ha`H+?WKnvBIqS3s2-x1&g}HkQ}b}(f4+0h_woB?CNvYe z?=q)RPp7A=TbQL*ZF`kVb6Fa+NqdpSl$~!ctEjGPY;KEZh~`YeBG|O+6*RV2m6bqL zJ*X;S-kjv+`vX3|k~l(hXdWk)a03$~>iC2jJsi&z%^3xSR2Y(dQKdI1>$ha_A;%|_ z;zCLXb)_w=uvO+b!^uINyy)ZlBkHvAuuv(ucrn8FkE(%~YFgZfblQ}X^7ReHML|&3NBu?Iw zPvk54M!u6D3?3Ih!kKudcT*I0))TU;prn1Tm zbxRvliT+RVjJY;%wf+b6YqCl{GRVI{7}|7XF}QXHw+i(PtQ+mvif!n_4u*CYya}{E zj9?UF3~&%rn8r~Y!*QI(8JuN^&ojhx4DnT5!%f`A9o#kH9u~2L$9RHeJk>5#XFnLW H-Ln4y-ulH? diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 57300734..f4b98137 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -1096,7 +1096,7 @@ addIterativeLSI <- function( #.safeSaveRDS(mat, "temp.rds", compress = FALSE) matO <- mat[, idxOutlier, drop = FALSE] mat <- mat[, -idxOutlier, drop = FALSE] - mat2 <- mat[, head(seq_len(ncol(mat)), 10), drop = FALSE] # A 2nd Matrix to Check Projection is Working + mat2 <- mat[, head(seq_len(ncol(mat)), 50), drop = FALSE] # A 2nd Matrix to Check Projection is Working colSm <- colSm[-idxOutlier] filterOutliers <- 1 } @@ -1196,6 +1196,12 @@ addIterativeLSI <- function( cor(pCheck[,x], pCheck2[,x]) }) %>% unlist if(min(pCheck3) < 0.95){ + print("Check1 :") + print(head(pCheck)) + print("\nCheck2 :") + print(head(pCheck2)) + print("\nCheck3 :") + print(pCheck3) stop("Error with LSI-projection! Cor less than 0.95 of re-projection. Please report bug to github!") } #Project LSI Outliers From 1823c9925ceacc69ef0d809536c5c1ae7856ff33 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 22:02:43 -0700 Subject: [PATCH 014/162] Update IterativeLSI.R --- R/IterativeLSI.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index f4b98137..eebe6b51 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -1196,13 +1196,10 @@ addIterativeLSI <- function( cor(pCheck[,x], pCheck2[,x]) }) %>% unlist if(min(pCheck3) < 0.95){ - print("Check1 :") - print(head(pCheck)) - print("\nCheck2 :") - print(head(pCheck2)) - print("\nCheck3 :") - print(pCheck3) - stop("Error with LSI-projection! Cor less than 0.95 of re-projection. Please report bug to github!") + .logThis(pCheck, "pCheck", logFile=logFile) + .logThis(pCheck2, "pCheck2", logFile=logFile) + .logThis(pCheck3, "pCheck3", logFile=logFile) + warning("Warning with LSI-projection! Cor less than 0.95 of re-projection. Please report this to github with logFile!") } #Project LSI Outliers out$outliers <- colnames(matO) From 803fba670a689ed4d6c1afb9b7c281a587206421 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 23:02:10 -0700 Subject: [PATCH 015/162] checking fragments for tile matrix creation --- R/MatrixTiles.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index fa22896f..0c6ecb1a 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -204,6 +204,29 @@ addTileMatrix <- function( .logThis(min(matchID), paste0("MinCell_TileMatrix_",z,"_",chr), logFile = logFile) .logThis(max(matchID), paste0("MaxCell_TileMatrix_",z,"_",chr), logFile = logFile) + #Check Fragments for validity in case + nf1 <- length(fragments) + + #Check 1 + fragmentsBad1 <- fragments[!(start(fragments) >= 1)] + fragments <- fragments[start(fragments) >= 1] + + #Check 2 + fragmentsBad2 <- fragments[!(end(fragments) < chromLengths[z])] + fragments <- fragments[end(fragments) < chromLengths[z]] + + #Check N + nf2 <- length(fragments) + if(nf2 < nf1) + warning("Skipping over fragments not within chromosome range on Chr:", chr) + .logThis(fragmentsBad1, "fragmentsBad1", logFile = logFile) + print("Bad1 (Start not greater than 0): ") + print(fragmentsBad1) + print("Bad2 (End greater than chromsome length): ") + .logThis(fragmentsBad2, "fragmentsBad2", logFile = logFile) + print(fragmentsBad2) + } + #Create Sparse Matrix mat <- Matrix::sparseMatrix( i = c(trunc(start(fragments) / tileSize), trunc(end(fragments) / tileSize)) + 1, From faf6debd16fe7a92b48a13425e8719caa38dbbcc Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 20:18:59 -0700 Subject: [PATCH 016/162] bugfix inputFiles improperly being called instead of ArrowFiles --- .DS_Store | Bin 14340 -> 14340 bytes R/MatrixGeneScores.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index df3c3db35c59395b878b3cb1c962fe9933022de8..ff9ed4861ffa9f704e0e0d4294960fedcc5e653e 100644 GIT binary patch delta 792 zcma*kO-PhM7zgm@?^o~ZWX^88uC5!dNhW3W?xwb>X}MOWlA=a-NZZv7UR-w9-d#b> z>>yDY9egVz73)%zeSs-ObqV^S@SxHmymY8TK^L*WOAWh1mx$&tFwe|9%>4cXp@GoD z5{~zJDL-w>?Co_pTm|{|EzTlmU%tK9<#g#Y+=5Yk#-8OKBO*|PU+Z#v{Xs8L+yigO zuWDgM`G~mqgv6v(lAO#~inZ!MRdr2mE9Vi-*WmMe!fKbV(i8S+L#E?$v)UF6G^#zm zp>(rFmM7L&Y}p$&Z7bMW=AKXG2IE>2WBNBko}Dktaq*1mS*Xb5M8EUNJKJ>!qJXAY=ui0mZJi0>=lmnXh0*Ha1^ak$u{^AKoB8x zqX#F^hyJMNATHn{hH-U0u3-dYxQSc1EoARv3e$LuCwPiyc!?Rz>2+yYSeOi^UC#au DE|g?L4NY8r{P*f4D6W4D)tXNuIprOlye;kHtcfF5a|MR@X3b z*VNjla*#rW8lnlBq$zqu@8}bKrUhE0pY)49}Es!KF$6-%mRV!+&CV0JO&b_r*$$0q(~{BVWfh79dChY Date: Sun, 4 Apr 2021 20:25:54 -0700 Subject: [PATCH 017/162] bugfix missing logFile in addIterativeLSI --- R/ArrowRead.R | 3 ++- R/IterativeLSI.R | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 20bbdb6c..35ac08f8 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -719,7 +719,8 @@ getMatrixFromArrow <- function( tmpPath = .tempfile(pattern = paste0("tmp-partial-mat")), useIndex = FALSE, tstart = NULL, - verbose = TRUE + verbose = TRUE, + logFile = NULL ){ ######################################### diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index eebe6b51..0fc60880 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -544,7 +544,8 @@ addIterativeLSI <- function( cellNames = cellNames, doSampleCells = FALSE, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) #Compute LSI @@ -591,7 +592,8 @@ addIterativeLSI <- function( cellNames = sampledCellNames, doSampleCells = FALSE, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) #Compute LSI @@ -628,7 +630,8 @@ addIterativeLSI <- function( tmpPath = tmpPath, useIndex = useIndex, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) gc() From 46dfca98c688421f4cc37eef0f92ec3a32aca396 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:03:10 -0700 Subject: [PATCH 018/162] bugfix check cells in project for ColSums --- .DS_Store | Bin 14340 -> 14340 bytes R/MarkerFeatures.R | 5 +++++ 2 files changed, 5 insertions(+) diff --git a/.DS_Store b/.DS_Store index ff9ed4861ffa9f704e0e0d4294960fedcc5e653e..15f43769510d600e17ec3a9dafa1c541b5e1c4ff 100644 GIT binary patch delta 38 ucmZoEXeroWEilqYvW-B4j&yakp`nF|j)Ja{nNh8dLbZjFg^q%ufsuJ_EhmS#s-dlC eLT+VMbxm#EoXP&;@{Dsgvj{TtZ{}9`DGmTXO%?P2 diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 0ed03a5b..e812beb0 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -191,11 +191,16 @@ getMarkerFeatures <- function( ##################################################### # Pairwise Test Per Seqnames ##################################################### + #ColSums mColSums <- tryCatch({ suppressMessages(.getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) }, error = function(x){ rep(1, nCells(ArchRProj)) }) + + #Subset By Cells in ArchRProj + mColSums <- mColSums[ArchRProj$cellNames] + if(all(mColSums==1) & is.null(normBy)){ normBy <- "none" } From d9eda0a004eee65050771219c0e181ebf2f3318f Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:04:27 -0700 Subject: [PATCH 019/162] bugfix getColSums check cellNames --- R/IntegrativeAnalysis.R | 2 ++ R/IterativeLSI.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index a34ae4f0..dd2fe151 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -774,6 +774,8 @@ addCoAccessibility <- function( #Peak Matrix ColSums cS <- .getColSums(getArrowFiles(ArchRProj), chri, verbose = FALSE, useMatrix = "PeakMatrix") + cS <- cS[ArchRProj$cellNames] + gS <- unlist(lapply(seq_along(knnObj), function(x) sum(cS[knnObj[[x]]], na.rm=TRUE))) for(x in seq_along(chri)){ diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 0fc60880..8a341c5b 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -285,7 +285,7 @@ addIterativeLSI <- function( v }, error = function(e){ tryCatch({ - .getColSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix, seqnames = chrToRun) + .getColSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix, seqnames = chrToRun)[ArchRProj$cellNames] }, error = function(y){ stop("Could not determine depth from depthCol or colSums!") }) From 1a62519734323edb43fb36c49242afcef2aedb00 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:06:55 -0700 Subject: [PATCH 020/162] bugfix --- R/MatrixTiles.R | 2 +- man/createArrowFiles.Rd | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 0c6ecb1a..451384eb 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -217,7 +217,7 @@ addTileMatrix <- function( #Check N nf2 <- length(fragments) - if(nf2 < nf1) + if(nf2 < nf1){ warning("Skipping over fragments not within chromosome range on Chr:", chr) .logThis(fragmentsBad1, "fragmentsBad1", logFile = logFile) print("Bad1 (Start not greater than 0): ") diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index 4b31cfa2..f1bd3018 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -106,9 +106,11 @@ gsubExpression would be ":.*". This would retrieve the string after the colon as \code{scanBamFlag} passed to \code{ScanBam} in Rsamtools.} \item{offsetPlus}{The numeric offset to apply to a "+" stranded Tn5 insertion to account for the precise Tn5 binding site. +This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. See Buenrostro et al. Nature Methods 2013.} \item{offsetMinus}{The numeric offset to apply to a "-" stranded Tn5 insertion to account for the precise Tn5 binding site. +This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. See Buenrostro et al. Nature Methods 2013.} \item{addTileMat}{A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. A Tile Matrix is a counts matrix that, From 73287e62b2340ec9e3c9ed62c76c16820e417826 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 11 Apr 2021 21:29:26 -0700 Subject: [PATCH 021/162] bug fix sampledCellNames arrowRead --- .DS_Store | Bin 14340 -> 14340 bytes R/ArrowRead.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 15f43769510d600e17ec3a9dafa1c541b5e1c4ff..c201598da24f47b1678239b13fdff6ae6e5eb315 100644 GIT binary patch delta 16 XcmZoEXerpRUx3-d)M)b|0Vj0;IJpJ* delta 16 XcmZoEXerpRUx3-n(qQu;0Vj0;IJpJ* diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 35ac08f8..eaed91ef 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -784,7 +784,7 @@ getMatrixFromArrow <- function( matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .) mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .) - if(!all(cellNames %in% colnames(mat))){ + if(!all(sampledCellNames %in% colnames(mat))){ .logThis(sampledCellNames, "cellNames supplied", logFile = logFile) .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) stop("Error not all cellNames found in partialMatrix") From 0291b6fee341a8c910d016a09be432a022fa01d8 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Apr 2021 20:26:24 -0700 Subject: [PATCH 022/162] fix fast H5Fopen to be read only --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index eaed91ef..9b8f9bdf 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -1021,7 +1021,7 @@ getMatrixFromArrow <- function( ){ if(tolower(method) == "fast" & is.null(index) & is.null(start) & is.null(block) & is.null(count)){ - fid <- H5Fopen(file) + fid <- H5Fopen(file, "H5F_ACC_RDONLY")) dapl <- H5Pcreate("H5P_DATASET_ACCESS") did <- .Call("_H5Dopen", fid@ID, name, dapl@ID, PACKAGE='rhdf5') res <- .Call("_H5Dread", did, NULL, NULL, NULL, TRUE, 0L, FALSE, fid@native, PACKAGE='rhdf5') From 400ecf9357cbc16a80079dab789af3e3a994dd16 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Apr 2021 07:24:59 -0700 Subject: [PATCH 023/162] remove extra paren --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 9b8f9bdf..13111d05 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -1021,7 +1021,7 @@ getMatrixFromArrow <- function( ){ if(tolower(method) == "fast" & is.null(index) & is.null(start) & is.null(block) & is.null(count)){ - fid <- H5Fopen(file, "H5F_ACC_RDONLY")) + fid <- H5Fopen(file, "H5F_ACC_RDONLY") dapl <- H5Pcreate("H5P_DATASET_ACCESS") did <- .Call("_H5Dopen", fid@ID, name, dapl@ID, PACKAGE='rhdf5') res <- .Call("_H5Dread", did, NULL, NULL, NULL, TRUE, 0L, FALSE, fid@native, PACKAGE='rhdf5') From 7ff7e4d2306d06304d0a1a73bd6bb2a718a3a724 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Apr 2021 20:20:17 -0700 Subject: [PATCH 024/162] Add requirement for genomeSize argument when using non-standard genomes. --- R/ReproduciblePeakSet.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 8699e748..6c50c0cf 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -20,7 +20,7 @@ #' This is important to allow for exclusion of pseudo-bulk replicates derived from very low cell numbers. #' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from peak calling. #' @param pathToMacs2 The full path to the MACS2 executable. -#' @param genomeSize The genome size to be used for MACS2 peak calling (see MACS2 documentation). +#' @param genomeSize The genome size to be used for MACS2 peak calling (see MACS2 documentation). This is required if genome is not hg19, hg38, mm9, or mm10. #' @param shift The number of basepairs to shift each Tn5 insertion. When combined with `extsize` this allows you to create proper fragments, #' centered at the Tn5 insertion site, for use with MACS2 (see MACS2 documentation). #' @param extsize The number of basepairs to extend the MACS2 fragment after `shift` has been applied. When combined with `extsize` this @@ -166,6 +166,8 @@ addReproduciblePeakSet <- function( genomeSize <- 2.7e9 }else if(grepl("mm9|mm10", getGenome(ArchRProj), ignore.case = TRUE)){ genomeSize <- 1.87e9 + }else { + stop("Non-standard genome detected. Argument genomeSize is required!") } } From bac49aef6ac74c1d1d049d4c12dc6ea1bf8cd25d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Apr 2021 07:32:17 -0700 Subject: [PATCH 025/162] add default nDim for .loadUWOT Fixes backwards compatibility problem with .loadUWOT --- R/Embedding.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Embedding.R b/R/Embedding.R index e0ead19c..b4e9747a 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -253,7 +253,7 @@ addUMAP <- function( } #New Save UWOT -.loadUWOT <- function(file){ +.loadUWOT <- function(file, nDim = NULL){ tryCatch({ uwot::load_uwot(file = file, verbose = TRUE) }, error = function(e){ From 05d66f20abe6865a330c1409374d4ea33fdecdf5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 30 Apr 2021 07:23:52 -0700 Subject: [PATCH 026/162] Add check for existence of matrix passed to useMatrix --- R/RNAIntegration.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index fee11668..d1137e79 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -143,6 +143,11 @@ addGeneIntegrationMatrix <- function( ######################################################################################### .logDiffTime("Checking ATAC Input", tstart, verbose = verbose, logFile = logFile) + if (useMatrix %ni% getAvailableMatrices(ArchRProj)) { + .logMessage(paste0("Matrix ", useMatrix, " does not exist in the provided ArchRProject. See available matrix names from getAvailableMatrices()!"), logFile = logFile) + stop("Matrix name provided to useMatrix does not exist in ArchRProject!") + } + if(!is.null(groupATAC)){ dfATAC <- getCellColData(ArchRProj = ArchRProj, select = groupATAC, drop = FALSE) } From eec8474ddf924149e38a9469784942b5f456cb1f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 30 Apr 2021 14:11:04 -0700 Subject: [PATCH 027/162] update normMethod param documentation --- R/GroupExport.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/GroupExport.R b/R/GroupExport.R index 87bf3719..9102c6f3 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -140,7 +140,8 @@ getGroupSE <- function( #' user-supplied `cellColData` metadata columns (for example, "Clusters"). Cells with the same value annotated in this metadata #' column will be grouped together and the average signal will be plotted. #' @param normMethod The name of the column in `cellColData` by which normalization should be performed. The recommended and default value -#' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. +#' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. Accepted values are +#' "None", "ReadsInTSS", "nCells", "ReadsInPromoter", or "nFrags". #' @param tileSize The numeric width of the tile/bin in basepairs for plotting ATAC-seq signal tracks. All insertions in a single bin will be summed. #' @param maxCells Maximum number of cells used for each bigwig. #' @param ceiling Maximum contribution of accessibility per cell in each tile. From 980198829169c326f57afbed8a1c738c4f260837 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 10 May 2021 08:02:39 -0700 Subject: [PATCH 028/162] Check for PeakMatrix in addBgdPeaks --- R/MatrixDeviations.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/MatrixDeviations.R b/R/MatrixDeviations.R index 50721fc5..904c049f 100644 --- a/R/MatrixDeviations.R +++ b/R/MatrixDeviations.R @@ -626,6 +626,11 @@ addBgdPeaks <- function( .validInput(input = outFile, name = "outFile", valid = c("character")) .validInput(input = force, name = "force", valid = c("boolean")) + if ("PeakMatrix" %ni% getAvailableMatrices(ArchRProj)) { + .logMessage(paste0("PeakMatrix does not exist in the provided ArchRProject. Add a peak matrix using addPeakMatrix(). See available matrix names from getAvailableMatrices()!"), logFile = logFile) + stop("PeakMatrix does not exist in the provided ArchRProject. Add a peak matrix using addPeakMatrix(). See available matrix names from getAvailableMatrices()!") + } + if(!is.null(metadata(getPeakSet(ArchRProj))$bgdPeaks) & !force){ if(file.exists(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ From d4dd9aef9483febcf6758b311aff58a36f3bf541 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 2 Jun 2021 20:50:58 -0700 Subject: [PATCH 029/162] add force parameter where missing --- R/BulkProjection.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index b02d027e..04b2f1d4 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -9,6 +9,7 @@ #' @param n An integer specifying the number of subsampled "pseudo single cells" per bulk sample. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param threads The number of threads used for parallel execution +#' @param force A boolean value indicating whether to force the projection of bulk ATAC data even if fewer than 25% of the features are present in the bulk ATAC data set. #' @param logFile The path to a file to be used for logging ArchR output. #' @export #' @@ -20,6 +21,7 @@ projectBulkATAC <- function( n = 250, verbose = TRUE, threads = getArchRThreads(), + force = FALSE, logFile = createLogFile("projectBulkATAC") ){ @@ -30,8 +32,9 @@ projectBulkATAC <- function( .validInput(input = n, name = "n", valid = c("integer")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = force, name = "force", valid = c("boolean")) .validInput(input = logFile, name = "logFile", valid = c("character")) - + tstart <- Sys.time() .startLogging(logFile = logFile) From 0d77da238cbcf6f4fa0c8daac4cec7df34c63a44 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 2 Jun 2021 21:19:00 -0700 Subject: [PATCH 030/162] create error when genes lacks symbol column --- R/MatrixGeneScores.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index b7f5548a..afbd14f3 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -103,6 +103,9 @@ addGeneScoreMatrix <- function( if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){ stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!") } + if(!any(colnames(mcols(genes)) == "symbol")) { + stop("No symbol column in genes! A column named symbol is exected in the GRanges object passed to the genes parameter!") + } .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneScoreMatrix Input-Parameters", logFile = logFile) From b0a48c86c77f9af66ac7254d782dde917e817284 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 1 Jul 2021 13:10:25 -0700 Subject: [PATCH 031/162] bugfix for tabix dt column names --- R/CreateArrow.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index fae3f845..55e35670 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1260,9 +1260,9 @@ createArrowFiles <- function( } #No NAs - dt <- dt[!is.na(dt$RG), , drop=FALSE] - dt <- dt[!is.na(dt$start), , drop=FALSE] - dt <- dt[!is.na(dt$end), , drop=FALSE] + dt <- dt[!is.na(dt$V2), , drop=FALSE] + dt <- dt[!is.na(dt$V3), , drop=FALSE] + dt <- dt[!is.na(dt$V4), , drop=FALSE] #Care for Break Points dt <- dt[dt$V2 >= start(tileChromSizes[x]),] From f98354ce07f72c86213982e5fbcd9a5bd70d743b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 3 Jul 2021 05:45:52 -0700 Subject: [PATCH 032/162] Fix mColSums Suggested by @andyyhchen --- R/MarkerFeatures.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index e812beb0..b439771d 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -193,14 +193,12 @@ getMarkerFeatures <- function( ##################################################### #ColSums mColSums <- tryCatch({ - suppressMessages(.getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) + suppressMessages(tmpColSum <- .getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) + tmpColSum[ArchRProj$cellNames] }, error = function(x){ rep(1, nCells(ArchRProj)) }) - #Subset By Cells in ArchRProj - mColSums <- mColSums[ArchRProj$cellNames] - if(all(mColSums==1) & is.null(normBy)){ normBy <- "none" } From 2aabfc349fcd403388cc26a5f3fd0821b33a9764 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 6 Jul 2021 11:16:34 -0700 Subject: [PATCH 033/162] add useMatrix to param documentation --- R/IntegrativeAnalysis.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index dd2fe151..4e85635d 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -948,6 +948,7 @@ getCoAccessibility <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param reducedDims The name of the `reducedDims` object (i.e. "IterativeLSI") to retrieve from the designated `ArchRProject`. +#' @param useMatrix The name of the matrix containing gene expression information to be used for determining peak-to-gene links. See `getAvailableMatrices(ArchRProj)` #' @param dimsToUse A vector containing the dimensions from the `reducedDims` object to use in clustering. #' @param scaleDims A boolean value that indicates whether to z-score the reduced dimensions for each cell. This is useful for minimizing #' the contribution of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific @@ -996,6 +997,7 @@ addPeak2GeneLinks <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = reducedDims, name = "reducedDims", valid = c("character")) + .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) .validInput(input = dimsToUse, name = "dimsToUse", valid = c("numeric", "null")) .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null")) From a861399f489fa2a870e42cdf759166d44543caaf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 16 Jul 2021 13:53:33 -0700 Subject: [PATCH 034/162] fix ylim quantile calculation --- R/ArchRBrowser.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index c496cc9f..b472cb17 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -995,7 +995,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- quantile(df$y, ylim) + ylim <- c(0,quantile(df$y, ylim)) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From 8caa716086832c2560b867b6e4a0d1bab48716e1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 16 Jul 2021 14:02:51 -0700 Subject: [PATCH 035/162] fix ylim quantile again --- R/ArchRBrowser.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index b472cb17..20abfb19 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -995,7 +995,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- c(0,quantile(df$y, ylim)) + ylim <- c(0,quantile(df$y, probs=c(ylim))) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From d596b235d2a64393ed7d90f23ae00842a21e1cf5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Jul 2021 08:09:59 -0700 Subject: [PATCH 036/162] Fix CreateGenomeAnnotation This function had a very odd logic where an if statement asked if genome param was NULL even though the .validInput statements prevented that from being the case. I think I fixed the logic which will enable more flexible custom genomes --- R/AnnotationGenome.R | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 3d75ac3a..dc504544 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -24,23 +24,30 @@ createGenomeAnnotation <- function( .validInput(input = filter, name = "filter", valid = c("boolean")) .validInput(input = filterChr, name = "filterChr", valid = c("character", "null")) - if(is.null(genome) | is.null(blacklist) | is.null(chromSizes)){ - - ################## - message("Getting genome..") - bsg <- validBSgenome(genome) - genome <- bsg@pkgname - - ################## - message("Getting chromSizes..") + ################## + message("Getting genome..") + #validBSgenome works on both character and BSgenome inputs, which are the only allowable inputs to the param + bsg <- validBSgenome(genome) + genome <- bsg@pkgname + + if(is.null(chromSizes)) { + message("Attempting to infer chromSizes..") chromSizes <- GRanges(names(seqlengths(bsg)), IRanges(1, seqlengths(bsg))) if(filter){ - chromSizes <- filterChrGR(chromSizes, remove = filterChr) + if(is.null(filterChr)) { + stop("Cannot have filterChr = NULL when filter = TRUE!") + } + chromSizes <- filterChrGR(chromSizes, remove = filterChr) } seqlengths(chromSizes) <- end(chromSizes) + } else { + message("Using provided chromSizes..") + chromSizes <- .validGRanges(chromSizes) + } + if(is.null(blacklist)){ ################## - message("Getting blacklist..") + message("Attempting to infer blacklist..") genomeName <- tryCatch({ bsg@provider_version @@ -50,15 +57,9 @@ createGenomeAnnotation <- function( blacklist <- .getBlacklist(genome = genomeName) - }else{ - - bsg <- validBSgenome(genome) - genome <- bsg@pkgname - - chromSizes <- .validGRanges(chromSizes) - + } else { + message("Using provided blacklist...") blacklist <- .validGRanges(blacklist) - } SimpleList(genome = genome, chromSizes = chromSizes, blacklist = blacklist) From ef6fd035fe75329aca19d14f3c656503ff3a834e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Jul 2021 09:01:51 -0700 Subject: [PATCH 037/162] annotate that this is pearson correlation --- src/Correlation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Correlation.cpp b/src/Correlation.cpp index 419da16d..e86ea2c4 100644 --- a/src/Correlation.cpp +++ b/src/Correlation.cpp @@ -3,7 +3,7 @@ using namespace Rcpp; using namespace std; -// Adapted from https://github.com/AEBilgrau/correlateR/blob/master/src/auxiliary_functions.cpp +// Pearson Correlation, Adapted from https://github.com/AEBilgrau/correlateR/blob/master/src/auxiliary_functions.cpp // [[Rcpp::export]] Rcpp::NumericVector rowCorCpp(IntegerVector idxX, IntegerVector idxY, Rcpp::NumericMatrix X, Rcpp::NumericMatrix Y) { From 699a886f748066d89fea73db6fd2acd119237f31 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 22 Jul 2021 11:51:33 -0700 Subject: [PATCH 038/162] add the ability to designate sample labels independently addGroupCoverages automatically assumes that sample labels come from cellColData$Sample which assumes that each arrow file represents an individual sample. This is not the case for applications where different samples are mixed together into the same GEM reaction as is the case for multiplexing based on sample genotype or lipid barcode. --- R/GroupCoverages.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 63859218..ca1f7d02 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -6,6 +6,10 @@ #' @param ArchRProj An `ArchRProject` object. #' @param groupBy The name of the column in `cellColData` to use for grouping multiple cells together prior to generation of the insertion coverage file. #' @param useLabels A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation. +#' @param sampleLabels The name of a column in `cellColData` to use to identify samples. In most cases, this parameter should be left as `NULL` and you +#' should only use this parameter if you do not want to use the default sample labels stored in `cellColData$Sample`. However, if your individual Arrow +#' files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +#' multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype. #' @param minCells The minimum number of cells required in a given cell group to permit insertion coverage file generation. #' @param maxCells The maximum number of cells to use during insertion coverage file generation. #' @param maxFragments The maximum number of fragments per cell group to use in insertion coverage file generation. This prevents the generation @@ -28,6 +32,7 @@ addGroupCoverages <- function( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, + sampleLabels = NULL, minCells = 40, maxCells = 500, maxFragments = 25*10^6, @@ -46,6 +51,7 @@ addGroupCoverages <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = groupBy, name = "groupBy", valid = c("character")) .validInput(input = useLabels, name = "useLabels", valid = c("boolean")) + .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character","null")) .validInput(input = minCells, name = "minCells", valid = c("integer")) .validInput(input = maxCells, name = "maxCells", valid = c("integer")) .validInput(input = maxFragments, name = "maxFragments", valid = c("integer")) @@ -64,6 +70,12 @@ addGroupCoverages <- function( stop("minReplicates must be at least 2!") } + if(!is.null(sampleLabels)){ + if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { + stop("sampleLabels is not a column in cellColData!") + } + } + tstart <- Sys.time() .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGroupCoverages Input-Parameters", logFile = logFile) @@ -118,8 +130,11 @@ addGroupCoverages <- function( # outListx <- SimpleList(LowCellGroup = cellNamesx) or NULL #} if(useLabels){ - sampleLabelsx <- paste0(subColDat$Sample) - }else{ + if(is.null(sampleLabels)) { + sampleLabels <- "Sample" + } + sampleLabelsx <- paste0(subColDat[,sampleLabels]) + } else { sampleLabelsx <- NULL } outListx <- .identifyGroupsForPseudoBulk( From eb8d77a3c45a0350b8bfebd09d07100c730dd40d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 23 Jul 2021 08:42:58 -0700 Subject: [PATCH 039/162] delete requirePackage(genome) When using a custom BSgenome object, no package is installed associated with that object. This causes requirePackage(genome) to fail. This statement is unnecessary and is superseded by validBSgenome. --- R/Footprinting.R | 1 - R/GroupCoverages.R | 1 - 2 files changed, 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index baf77703..7c346c0f 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -68,7 +68,6 @@ getFootprints <- function( } genome <- getGenome(ArchRProj) - .requirePackage(genome) .requirePackage("Biostrings", source = "bioc") BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 63859218..45683429 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -585,7 +585,6 @@ addGroupCoverages <- function( .logThis(append(args, mget(names(formals()),sys.frame(sys.nframe()))), "kmerBias-Parameters", logFile = logFile) - .requirePackage(genome) .requirePackage("Biostrings", source = "bioc") BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) From 54efef17f488fabbe81c5df98b019e90e12f1a8c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Jul 2021 07:33:52 -0700 Subject: [PATCH 040/162] Remove unneeded if and update param definitions Removed if statement that prevented `filter = TRUE` in combination with `filterChr = NULL` since this was not necessary. --- R/AnnotationGenome.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index dc504544..876828d7 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -6,9 +6,10 @@ #' @param chromSizes A `GRanges` object containing chromosome start and end coordinates. #' @param blacklist A `GRanges` object containing regions that should be excluded from analyses due to unwanted biases. #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. -#' These "non-standard" chromosomes are defined by `filterChrGR()`. +#' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -#' If no manual removal is desired, `filterChr` should be set to `NULL`. +#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, +#' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( genome = NULL, @@ -34,9 +35,6 @@ createGenomeAnnotation <- function( message("Attempting to infer chromSizes..") chromSizes <- GRanges(names(seqlengths(bsg)), IRanges(1, seqlengths(bsg))) if(filter){ - if(is.null(filterChr)) { - stop("Cannot have filterChr = NULL when filter = TRUE!") - } chromSizes <- filterChrGR(chromSizes, remove = filterChr) } seqlengths(chromSizes) <- end(chromSizes) From 779a2d5b29a003e0dff975a1954d165361d4a8aa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Jul 2021 07:48:47 -0700 Subject: [PATCH 041/162] Default sampleLabels to Sample Made default for sampleLabels to Sample and removed checks for if sampleLabels is NULL --- R/GroupCoverages.R | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index ca1f7d02..b2769a86 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -32,7 +32,7 @@ addGroupCoverages <- function( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, - sampleLabels = NULL, + sampleLabels = "Sample", minCells = 40, maxCells = 500, maxFragments = 25*10^6, @@ -51,7 +51,7 @@ addGroupCoverages <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = groupBy, name = "groupBy", valid = c("character")) .validInput(input = useLabels, name = "useLabels", valid = c("boolean")) - .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character","null")) + .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character")) .validInput(input = minCells, name = "minCells", valid = c("integer")) .validInput(input = maxCells, name = "maxCells", valid = c("integer")) .validInput(input = maxFragments, name = "maxFragments", valid = c("integer")) @@ -70,10 +70,8 @@ addGroupCoverages <- function( stop("minReplicates must be at least 2!") } - if(!is.null(sampleLabels)){ - if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { - stop("sampleLabels is not a column in cellColData!") - } + if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { + stop("sampleLabels is not a column in cellColData!") } tstart <- Sys.time() @@ -130,9 +128,6 @@ addGroupCoverages <- function( # outListx <- SimpleList(LowCellGroup = cellNamesx) or NULL #} if(useLabels){ - if(is.null(sampleLabels)) { - sampleLabels <- "Sample" - } sampleLabelsx <- paste0(subColDat[,sampleLabels]) } else { sampleLabelsx <- NULL From 79f567e0b038cb49d01626fd504ed41e3c85ce46 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:05:16 -0700 Subject: [PATCH 042/162] update function description for getMatrixFromProject in response to https://github.com/GreenleafLab/ArchR/discussions/943 --- R/ArrowRead.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 13111d05..55d12c41 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -239,7 +239,12 @@ getFragmentsFromArrow <- function( #' Get a data matrix stored in an ArchRProject #' -#' This function gets a given data matrix from an `ArchRProject`. +#' This function gets a given data matrix from an `ArchRProject` and returns it as a `SummarizedExperiment`. +#' This function will return the matrix you ask it for, without altering that matrix unless you tell it to. +#' For example, if you added your `PeakMatrix` using `addPeakMatrix()` with `binarize = TRUE`, then +#' `getMatrixFromProject()` will return a binarized `PeakMatrix`. Alternatively, you could set `binarize = TRUE` +#' in the parameters passed to `getMatrixFromProject()` and the `PeakMatrix` will be binarized as you pull +#' it out. No other normalization is applied to the matrix by this function. #' #' @param ArchRProj An `ArchRProject` object to get data matrix from. #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. From ff34d6180aa70dd3dbed9af476c462a030596f99 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:12:32 -0700 Subject: [PATCH 043/162] remove .requirePackage(genome) Prevents use of custom (non-installed) BSgenome objects and this check is superseded by `validBSgenome()` --- R/ProjectMethods.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ProjectMethods.R b/R/ProjectMethods.R index 1cb18b7d..0ac8258c 100644 --- a/R/ProjectMethods.R +++ b/R/ProjectMethods.R @@ -390,7 +390,6 @@ addPeakSet <- function( #Get NucleoTide Content peakSet <- tryCatch({ - .requirePackage(genomeAnnotation$genome) .requirePackage("Biostrings",source="bioc") BSgenome <- eval(parse(text = genomeAnnotation$genome)) BSgenome <- validBSgenome(BSgenome) From ccac7bc4bf0de87c7ba51de9970a95a581b45f90 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:14:05 -0700 Subject: [PATCH 044/162] remove requirePackage(genome) This prevents the use of custom (non-installed) BSgenome objects and this check is superseded by `validBSgenome` --- R/ReproduciblePeakSet.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 6c50c0cf..1b153091 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -213,7 +213,6 @@ addReproduciblePeakSet <- function( ##################################################### # BSgenome for Add Nucleotide Frequencies! ##################################################### - .requirePackage(genomeAnnotation$genome) .requirePackage("Biostrings",source="bioc") BSgenome <- eval(parse(text = genomeAnnotation$genome)) BSgenome <- validBSgenome(BSgenome) From 73a286707415c01f07394861ab606cfd187a2568 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:39:22 -0700 Subject: [PATCH 045/162] explain geneAnnotations in plots --- R/ArchRBrowser.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 20abfb19..3b139ea1 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -5,7 +5,8 @@ #' Launch ArchR Genome Browser #' #' This function will open an interactive shiny session in style of a browser track. It allows for normalization of the signal which -#' enables direct comparison across samples. +#' enables direct comparison across samples. Note that the genes displayed in this browser are derived from your `geneAnnotation` +#' (i.e. the `BSgenome` object you used) so they may not match other online genome browsers that use different gene annotations. #' #' @param ArchRProj An `ArchRProject` object. #' @param features A `GRanges` object containing the "features" to be plotted via the "featureTrack". This should be thought of as a @@ -628,7 +629,8 @@ ArchRBrowserTrack <- function(...){ #' Plot an ArchR Region Track #' #' This function will plot the coverage at an input region in the style of a browser track. It allows for normalization of the signal -#' which enables direct comparison across samples. +#' which enables direct comparison across samples. Note that the genes displayed in these plots are derived from your `geneAnnotation` +#' (i.e. the `BSgenome` object you used) so they may not match other online genome browsers that use different gene annotations. #' #' @param ArchRProj An `ArchRProject` object. #' @param region A `GRanges` region that indicates the region to be plotted. If more than one region exists in the `GRanges` object, From 17d613db4b847e978264d5a7dd275c081e36541b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 17:40:22 -0700 Subject: [PATCH 046/162] fix clusterCols being set to NULL by .binarySort() addressing https://github.com/GreenleafLab/ArchR/issues/948 --- R/MarkerFeatures.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index b439771d..7816e1bb 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -965,7 +965,9 @@ plotMarkerHeatmap <- function( mat <- bS[[1]][,colnames(mat),drop=FALSE] } clusterRows <- FALSE - clusterCols <- bS[[2]] + if (clusterCols) { + clusterCols <- bS[[2]] + } }else{ clusterRows <- TRUE clusterCols <- TRUE From 8adea54390f1d6ce2e92370f9a09b8d8409683b3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 2 Aug 2021 09:39:35 -0700 Subject: [PATCH 047/162] delete requirePackage(genome) --- R/AnnotationPeaks.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 13df1f22..40627965 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -429,7 +429,6 @@ addMotifAnnotations <- function( # Get BSgenome Information! ############################################################# genome <- ArchRProj@genomeAnnotation$genome - .requirePackage(genome) BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) From 818b615896ee661130892de05767d230c782a2fc Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 19 Aug 2021 07:59:24 -0700 Subject: [PATCH 048/162] Set addRowVarsLog2 to true when adding TileMatrix addIterativeLSI depends on the `rowMeansLog2` and `rowVarsLog2` HDF5 groups when `firstSelection = "var"` when using a TileMatrix. Brought up in https://github.com/GreenleafLab/ArchR/issues/958 --- R/MatrixTiles.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 451384eb..402bfc1e 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -267,7 +267,8 @@ addTileMatrix <- function( Group = paste0("TileMatrix/", chr), binarize = binarize, addColSums = TRUE, - addRowSums = TRUE + addRowSums = TRUE, + addRowVarsLog2 = TRUE ) gc() From cd8baa379a169642cd8ba9f71272aea65f8d130e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 24 Aug 2021 11:44:54 -0700 Subject: [PATCH 049/162] Enable null ArchRProj in PlotFootprints --- R/Footprinting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index baf77703..764a1e17 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -374,7 +374,7 @@ plotFootprints <- function( .validInput(input = smoothWindow, name = "smoothWindow", valid = c("integer", "null")) .validInput(input = baseSize, name = "baseSize", valid = c("numeric")) .validInput(input = plot, name = "plot", valid = c("boolean")) - .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) + .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj", "null")) .validInput(input = plotName, name = "plotName", valid = c("character")) .validInput(input = height, name = "height", valid = c("numeric")) .validInput(input = width, name = "width", valid = c("numeric")) From 96a71cc17bceb99eb5802ad33bc6fda74b8d1229 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 Aug 2021 13:53:38 -0700 Subject: [PATCH 050/162] improve messaging for tabix indexing failure in response to #1000 --- R/CreateArrow.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 55e35670..5a478abd 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1137,6 +1137,7 @@ createArrowFiles <- function( indexTabix(file, format = "bed") TRUE }, error = function(y){ + message("Tabix indexing failed for ", file,". Note that ArchR requires bgzipped fragment files which is different from gzip. See samtools bgzip!") FALSE }) }) @@ -1152,7 +1153,8 @@ createArrowFiles <- function( } }, error = function(x){ tryCatch({ - if(getArchRVerbose()) message("Attempting to index ", file," as bam...") + if(getArchRVerbose()) + ("Attempting to index ", file," as bam...") indexBam(file) TRUE }, error = function(y){ From 38614209d4f9c9cbf0cc4e6b2d24a2ccbb51e512 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 Aug 2021 14:01:51 -0700 Subject: [PATCH 051/162] fix inadvertent typo and add bam index fail message --- R/CreateArrow.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 5a478abd..300cb7c8 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1153,11 +1153,11 @@ createArrowFiles <- function( } }, error = function(x){ tryCatch({ - if(getArchRVerbose()) - ("Attempting to index ", file," as bam...") + if(getArchRVerbose()) message("Attempting to index ", file," as bam...") indexBam(file) TRUE }, error = function(y){ + message("Indexing of BAM file failed for ",file,".") FALSE }) }) From ee6bc0b89aeb9a5afdd971f2d8ede3c3827dd564 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 08:37:23 -0700 Subject: [PATCH 052/162] add rastr option to plotMarkers --- R/MarkerFeatures.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7816e1bb..09662341 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -1225,7 +1225,8 @@ plotMarkers <- function( name = NULL, cutOff = "FDR <= 0.01 & abs(Log2FC) >= 0.5", plotAs = "Volcano", - scaleTo = 10^4 + scaleTo = 10^4, + rastr = TRUE ){ .validInput(input = seMarker, name = "seMarker", valid = c("SummarizedExperiment")) @@ -1233,6 +1234,7 @@ plotMarkers <- function( .validInput(input = cutOff, name = "cutOff", valid = c("character")) .validInput(input = plotAs, name = "plotAs", valid = c("character")) .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) + .validInput(input = rastr, name = "rastr", valid = c("boolean")) #Evaluate AssayNames assayNames <- names(SummarizedExperiment::assays(seMarker)) @@ -1287,7 +1289,7 @@ plotMarkers <- function( ylim = c(-qLFC, qLFC), size = 1, extend = 0, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, @@ -1304,7 +1306,7 @@ plotMarkers <- function( xlim = c(-qLFC, qLFC), extend = 0, size = 1, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, @@ -1321,7 +1323,7 @@ plotMarkers <- function( xlim = c(-qDiff, qDiff), extend = 0, size = 1, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, From 2770d1b9844451cc65a403999cd242eb54d46f20 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 08:40:00 -0700 Subject: [PATCH 053/162] add param definition for rastr --- R/MarkerFeatures.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 09662341..092d3b1b 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -1219,6 +1219,8 @@ markerPlot <- function(...){ #' @param cutOff A valid-syntax logical statement that defines which marker features from `seMarker` will be plotted. #' `cutoff` can contain any of the `assayNames` from `seMarker`. #' @param plotAs A string indicating whether to plot a volcano plot ("Volcano") or an MA plot ("MA"). +#' @param rastr A boolean value that indicates whether the plot should be rasterized using `ggrastr`. This does not rasterize +#' lines and labels, just the internal portions of the plot. #' @export plotMarkers <- function( seMarker = NULL, From 18bcead479721978f1d12c21931d83fe294b56d7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 10:49:13 -0700 Subject: [PATCH 054/162] update getMatches() description detail that the peak order is the same as from getPeakSet() --- R/AnnotationPeaks.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 13df1f22..4a6c473c 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -59,7 +59,8 @@ getPositions <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ #' Get peak annotation matches from an ArchRProject #' -#' This function gets peak annotation matches from a given ArchRProject. +#' This function gets peak annotation matches from a given ArchRProject. The peaks in the returned object are in the +#' same order as the peaks returned by `getPeakSet()`. #' #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the `peakAnnotation` object (i.e. Motifs) to retrieve from the designated `ArchRProject`. From 9366af8b6022f4d7f7d6ff3f65b16725c32fdde4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 Aug 2021 15:27:44 -0700 Subject: [PATCH 055/162] add documentation and comments for addModuleScore --- R/ModuleScore.R | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 99af34de..60b6fc07 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -1,12 +1,20 @@ #' Add Module Scores to an ArchRProject #' -#' This function computes imputations weights that describe each cell as a linear combination of many cells based on a MAGIC diffusion matrix. -#' -#' RRR +#' This function calculates a module score from a set of features across all cells. This allows for +#' grouping of multiple features together into a single quantitative measurement. Currently, this +#' function only works for modules derived from the `GeneScoreMatrix`. Each module is added as a +#' new column in `cellColData` #' #' @param ArchRProj An `ArchRProject` object. -#' @param seed A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can -#' reproduce results downstream. +#' @param useMatrix The name of the matrix to be used for calculation of the module score. See `getAvailableMatrices()` to view available options. +#' @param name The name to be given to the designated module. If `features` is a list, this name will be prepended to the feature set names given in the list as shown below. +#' @param features A list of feature names to be grouped into modules. For example, `list(BScore = c("MS4A1", "CD79A", "CD74"), TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1"))`. +#' Each named element in this list will be stored as a separate module. The examples given in these parameters would yield two modules called `Module.Bscore` and `Module.Tscore`. +#' If the elements of this list are not named, they will be numbered in order, i.e. `Module1`, `Module2`. +#' @param nBin The number of bins to use to divide all features for identification of signal-matched features for background calculation +#' @param nBgd The number of background features to use for signal normalization. +#' @param seed A number to be used as the seed for random number generation required when sampling cells for the background set. It is recommended +#' to keep track of the seed used so that you can reproduce results downstream. #' @param threads The number of threads to be used for parallel computing. #' @param logFile The path to a file to be used for logging ArchR output. #' @export @@ -22,6 +30,10 @@ addModuleScore <- function( logFile = createLogFile("addModuleScore") ){ + if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ + stop("useMatrix not in available matrices! See getAvailableMatrices!") + } + if(!is.null(seed)) set.seed(seed) #Get Feature DF @@ -29,10 +41,6 @@ addModuleScore <- function( rownames(featureDF) <- paste0(featureDF$seqnames, ":", featureDF$idx) featureDF$Match <- seq_len(nrow(featureDF)) - if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ - stop("useMatrix not in available matrices! See getAvailableMatrices!") - } - matrixClass <- h5read(getArrowFiles(ArchRProj)[1], paste0(useMatrix, "/Info/Class")) if(matrixClass == "Sparse.Assays.Matrix"){ @@ -42,6 +50,7 @@ addModuleScore <- function( } } + #Figure out the index numbers of the selected features within the given matrix if(grepl(":",unlist(features)[1])){ sname <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,1] @@ -76,24 +85,28 @@ addModuleScore <- function( featuresUse <- featureDF[idx,] featuresUse$Module <- Rle(stack(features)[,2]) - #Get Averages + #Get average values for all features and then order the features based on their average values + #so that the features can be binned into nBins rS <- ArchR:::.getRowSums(ArrowFiles = getArrowFiles(ArchRProj), useMatrix = useMatrix) rS <- rS[order(rS[,3]), ] rS$Bins <- Rle(ggplot2::cut_number(x = rS[,3] + rnorm(length(rS[,3]))/1e30, n = nBin, labels = FALSE, right = FALSE)) rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), rownames(featureDF)) + #check that the number of selected background features isnt bigger than the size of each bin if(nBgd > min(rS$Bins@lengths)){ stop("nBgd must be lower than ", min(rS$Bins@lengths), "!") } + #Match the indicies across the different vectors idxMatch <- match(paste0(featuresUse$seqnames, ":", featuresUse$idx), paste0(rS$seqnames, ":", rS$idx)) featuresUse$Bins <- as.vector(rS$Bins[idxMatch]) - #MakeLists - featureList <- split(featuresUse$Match, featuresUse$Module) - moduleList <- split(featuresUse$Bins, featuresUse$Module) - binList <- split(rS$Match, rS$Bins) + #Make lists + featureList <- split(featuresUse$Match, featuresUse$Module) #feature indicies per module + moduleList <- split(featuresUse$Bins, featuresUse$Module) #bins for each feature per module + binList <- split(rS$Match, rS$Bins) #list of all indicies for each bin + #calculate the module score by normalizing to a background set of features dfM <- lapply(seq_along(featureList), function(x){ message("Computing Module ",x, " of ", length(featureList)) binx <- binList[moduleList[[x]]] @@ -111,6 +124,7 @@ addModuleScore <- function( Matrix::colMeans(m[seq_along(idxFgd), ]) - Matrix::colMeans(m[-seq_along(idxFgd), ]) }) %>% Reduce("cbind", .) + #add the module scores as new columns in cellColData for(x in seq_len(ncol(dfM))){ ArchRProj <- addCellColData(ArchRProj, data = dfM[,x], name=names(featureList)[x], cells=rownames(dfM), force = TRUE) } From 8fa2c857fe745b08f0a461f8ec624e2c0815981b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 Aug 2021 16:28:46 -0700 Subject: [PATCH 056/162] add validInput checks to addModuleScore --- R/ModuleScore.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 60b6fc07..06daa745 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -30,6 +30,16 @@ addModuleScore <- function( logFile = createLogFile("addModuleScore") ){ + .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) + .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) + .validInput(input = name, name = "name", valid = c("character")) + .validInput(input = features, name = "features", valid = c("character")) + .validInput(input = nBin, name = "nBin", valid = c("integer")) + .validInput(input = nBgd, name = "nBgd", valid = c("integer")) + .validInput(input = seed, name = "seed", valid = c("integer","null")) + .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = logFile, name = "logFile", valid = c("character", "null")) + if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ stop("useMatrix not in available matrices! See getAvailableMatrices!") } From 96aa3a8c39119c8ad521f9b2002009e329d74a3e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 10 Sep 2021 09:57:13 -0700 Subject: [PATCH 057/162] Fix coverage files for chromosomes with no insertions https://github.com/GreenleafLab/ArchR/issues/1025 throw a warning when a seqname is found with no insertions but do not write an empty entry to the BED file --- R/GroupCoverages.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index b2769a86..0c2f59d1 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -762,7 +762,11 @@ addGroupCoverages <- function( if(x == 1) .logThis(iS, "InsertionSites", logFile = logFile) iS <- data.table(seqnames = allChr[x], start = iS - 1L, end = iS) if(x == 1) .logThis(iS, "InsertionSites-DT", logFile = logFile) - data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) + if(!any(is.na(iS$start))) { + data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) + } else { + .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) + } }, error = function(e){ errorList <- list( x = x, From e39e2784a7adc23f8476cd01c74fc4f5d5991f9b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 13 Sep 2021 07:53:41 -0700 Subject: [PATCH 058/162] add RNGkind("L'Ecuyer-CMRG") to fix mclapply seed https://github.com/GreenleafLab/ArchR/issues/756 mclapply will generate new random seeds unless you set RNGkind("L'Ecuyer-CMRG") outside. To handle this, I added this call to `addArchRThreads()` when `threads > 1` --- R/GlobalDefaults.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index aae3753b..6fc570ab 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -284,6 +284,9 @@ addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = message("Input threads is equal to or greater than ncores minus 1 (",parallel::detectCores()-1,")\nSetting cores to ncores minus 2. Set force = TRUE to set above this number!") threads <- parallel::detectCores()-2 } + if(threads > 1){ + RNGkind("L'Ecuyer-CMRG") + } } message("Setting default number of Parallel threads to ", threads, ".") From 87b2dfb8d4d478bec551e137cbe503c83dd6a4a6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 17 Sep 2021 08:16:09 -0700 Subject: [PATCH 059/162] fix errorList biasMat type mentioned in https://github.com/GreenleafLab/ArchR/issues/1034 --- R/Footprinting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index 764a1e17..c9da4bea 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -501,7 +501,7 @@ plotFootprints <- function( biasMat <- t(t(biasMat) / colMeans(biasMat[idx, ,drop=FALSE])) errorList$footMatNorm <- footMat - errorList$biasMatNorm <- footMat + errorList$biasMatNorm <- biasMat #Norm Foot By Bias if(tolower(normMethod) == "none"){ From 4d005fef65923b487ffe6f6151c367f0a4f61408 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 23 Sep 2021 11:40:13 -0700 Subject: [PATCH 060/162] add warning message to console in addition to log file --- R/GroupCoverages.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 0c2f59d1..d2dea9ea 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -765,6 +765,7 @@ addGroupCoverages <- function( if(!any(is.na(iS$start))) { data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) } else { + message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".") .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) } }, error = function(e){ From 5d666ffb7b546387a182472977ae3b75d4a84236 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 23 Sep 2021 11:40:58 -0700 Subject: [PATCH 061/162] fix typo --- R/GroupCoverages.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index d2dea9ea..4e4145a3 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -765,7 +765,7 @@ addGroupCoverages <- function( if(!any(is.na(iS$start))) { data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) } else { - message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".") + message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".")) .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) } }, error = function(e){ From f6ac862f62d5fa5dea0932b000ecaeed34420e3a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 24 Sep 2021 07:40:50 -0700 Subject: [PATCH 062/162] fix RNGkind if statement --- R/GlobalDefaults.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index 6fc570ab..14bfd94f 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -284,9 +284,9 @@ addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = message("Input threads is equal to or greater than ncores minus 1 (",parallel::detectCores()-1,")\nSetting cores to ncores minus 2. Set force = TRUE to set above this number!") threads <- parallel::detectCores()-2 } - if(threads > 1){ - RNGkind("L'Ecuyer-CMRG") - } + } + if(threads > 1){ + RNGkind("L'Ecuyer-CMRG") } message("Setting default number of Parallel threads to ", threads, ".") From c61f782e0feb12433e365c46440b5f87dffffac6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Oct 2021 08:02:12 -0700 Subject: [PATCH 063/162] fix grey background in gene tracks suggested in https://github.com/GreenleafLab/ArchR/issues/1020#issuecomment-954490261 --- R/ArchRBrowser.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 3b139ea1..41be35fc 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1352,17 +1352,17 @@ plotBrowserTrack <- function( #Add Labels if There are Genes with this orientation! if(length(which(genesO$strand!="-")) > 0){ p <- p + ggrepel::geom_label_repel(data=genesO[which(genesO$strand!="-"),], - aes(x = start, y = cluster, label = symbol, color = strand, fill = NA), + aes(x = start, y = cluster, label = symbol, color = strand), segment.color = "grey", nudge_x = -0.01*(end(region) - start(region)), nudge_y = -0.25, - size = labelSize, direction = "x") + size = labelSize, direction = "x", inherit.aes=FALSE) } #Add Labels if There are Genes with this orientation! if(length(which(genesO$strand=="-")) > 0){ p <- p + ggrepel::geom_label_repel(data=genesO[which(genesO$strand=="-"),], - aes(x = end, y = cluster, label = symbol, color = strand, fill = NA), + aes(x = end, y = cluster, label = symbol, color = strand), segment.color = "grey", nudge_x = +0.01*(end(region) - start(region)), nudge_y = 0.25, - size = labelSize, direction = "x") + size = labelSize, direction = "x", inherit.aes=FALSE) } p <- p + theme(legend.justification = c(0, 1), From b2f3d95171ea1d708ae1e39a980669091d02689f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Oct 2021 08:34:20 -0700 Subject: [PATCH 064/162] Fixing typo in blacklisted tile removal suggested in https://github.com/GreenleafLab/ArchR/issues/1110#issuecomment-953000886 --- R/MatrixGeneScores.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index afbd14f3..4957141a 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -396,7 +396,7 @@ addGeneScoreMatrix <- function( if(!is.null(blacklist)){ if(length(blacklist) > 0){ blacklistz <- blacklist[[chrz]] - if(is.null(blacklistz) | length(blacklistz) > 0){ + if(!is.null(blacklistz) | length(blacklistz) > 0){ tilesBlacklist <- 1 * (!overlapsAny(uniqueTiles, ranges(blacklistz))) if(sum(tilesBlacklist == 0) > 0){ x <- x * tilesBlacklist[subjectHits(tmp)] #Multiply Such That All Blacklisted Tiles weight is now 0! From b38b3bda99eff78c4ea56d22a2b4eac422ad4c03 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 3 Nov 2021 21:36:48 -0700 Subject: [PATCH 065/162] Fix distTSS bug raised in https://github.com/GreenleafLab/ArchR/issues/1122 --- R/ReproduciblePeakSet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 6c50c0cf..21fe8c11 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -625,9 +625,9 @@ addReproduciblePeakSet <- function( distTSS <- distanceToNearest(peakSummits, resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToTSS <- mcols(distTSS)$distance if("symbol" %in% colnames(mcols(geneAnnotation$TSS))){ - mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distPeaks)] + mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distTSS)] }else if("tx_name" %in% colnames(mcols(geneAnnotation$TSS))){ - mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$tx_name[subjectHits(distPeaks)] + mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$tx_name[subjectHits(distTSS)] } #Get NucleoTide Content From 351875aa3a7a634bac4b8ba08ac7099b99928e01 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 4 Nov 2021 11:57:49 -0700 Subject: [PATCH 066/162] fix typo in validInput for features make it a list instead of character --- R/ModuleScore.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 06daa745..87920983 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -33,7 +33,7 @@ addModuleScore <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) .validInput(input = name, name = "name", valid = c("character")) - .validInput(input = features, name = "features", valid = c("character")) + .validInput(input = features, name = "features", valid = c("list")) .validInput(input = nBin, name = "nBin", valid = c("integer")) .validInput(input = nBgd, name = "nBgd", valid = c("integer")) .validInput(input = seed, name = "seed", valid = c("integer","null")) From d32148104a5ffe82bcd40b30c7493618df8ea74b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 10 Nov 2021 12:06:44 -0800 Subject: [PATCH 067/162] update error handling for binarized matrix with top addressing https://github.com/GreenleafLab/ArchR/issues/958#issuecomment-954830868 --- R/IterativeLSI.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 8a341c5b..d708df14 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -211,7 +211,10 @@ addIterativeLSI <- function( if(tolower(firstSelection) == "top"){ if(!binarize){ - stop("Please binarize data if using top selection for first iteration! Set binarize = TRUE!") + matClass <- h5read(ArrowFiles[1], paste0(useMatrix,"/Info/Class")) + if(matClass != "Sparse.Binary.Matrix"){ + stop("Input matrix is not binarized and binarize != TRUE. Please use binarized data if using top selection for first iteration! Set binarize = TRUE!") + } } #Compute Row Sums Across All Samples From 3927b7061e4f892a6082a17c43840804e92da356 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 11 Nov 2021 10:22:35 -0800 Subject: [PATCH 068/162] fix check for feature not in matrix in response to https://github.com/GreenleafLab/ArchR/issues/1142#issuecomment-964771521 --- R/ModuleScore.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 87920983..aecd7c44 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -78,7 +78,7 @@ addModuleScore <- function( idx <- lapply(seq_along(unlist(features)), function(x){ ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] - if(length(ix)==0){ + if(is.na(ix)){ .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", unlist(features)[x]), logFile = logFile) } ix From 4800d9ff7ecd6bc65364027d8cebfefe6510777d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 11 Nov 2021 10:42:40 -0800 Subject: [PATCH 069/162] improve error message --- R/ModuleScore.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index aecd7c44..2e7d4fd9 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -69,7 +69,7 @@ addModuleScore <- function( idx <- lapply(seq_along(name), function(x){ ix <- intersect(which(tolower(name[x]) == tolower(featureDF$name)), BiocGenerics::which(tolower(sname[x]) == tolower(featureDF$seqnames))) if(length(ix)==0){ - .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", name[x]), logFile = logFile) + .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", name[x]), logFile = logFile) } ix }) %>% unlist @@ -79,7 +79,7 @@ addModuleScore <- function( idx <- lapply(seq_along(unlist(features)), function(x){ ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] if(is.na(ix)){ - .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", unlist(features)[x]), logFile = logFile) + .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", unlist(features)[x]), logFile = logFile) } ix }) %>% unlist From 9997f87c0b449224738e9f3245507424ce4f1887 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 15 Nov 2021 09:35:50 -0800 Subject: [PATCH 070/162] make acceptable fragments less or equal to chr ends in response to https://github.com/GreenleafLab/ArchR/issues/1145#issuecomment-968334964 --- R/MatrixTiles.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 402bfc1e..4b420c50 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -5,7 +5,6 @@ #' Add TileMatrix to ArrowFiles or an ArchRProject #' #' This function, for each sample, will independently compute counts for each tile -#' per cell in the ArrowFile #' #' @param input An `ArchRProject` object or character vector of ArrowFiles. #' @param chromSizes A named numeric vector containing the chromsome names and lengths. The default behavior is to retrieve @@ -212,8 +211,8 @@ addTileMatrix <- function( fragments <- fragments[start(fragments) >= 1] #Check 2 - fragmentsBad2 <- fragments[!(end(fragments) < chromLengths[z])] - fragments <- fragments[end(fragments) < chromLengths[z]] + fragmentsBad2 <- fragments[!(end(fragments) <= chromLengths[z])] + fragments <- fragments[end(fragments) <= chromLengths[z]] #Check N nf2 <- length(fragments) From 0630ae09571734338520a16f801cc8a71a06d2f1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 16 Nov 2021 15:14:56 -0800 Subject: [PATCH 071/162] add require(parallel) In Bioconductor >3.1, at some point the BiocGenerics package stopped depending on the parallel package. Since parallel no longer gets loaded at runtime with ArchR, this require statement ensures that it has been loaded. --- R/HiddenUtils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 4ef6980e..19eccbc3 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -393,7 +393,7 @@ } if(threads > 1){ - + require(parallel) o <- mclapply(..., mc.cores = threads, mc.preschedule = preschedule) errorMsg <- list() From 560764a94c1cb117845cefd72f0089097a3e04b2 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 17 Nov 2021 08:16:31 -0800 Subject: [PATCH 072/162] log ranges1/2 before removal in response to https://github.com/GreenleafLab/ArchR/issues/1169#issue-1056312775 --- R/IntegrativeAnalysis.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 4e85635d..f662630b 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -475,7 +475,6 @@ correlateTrajectories <- function( #mcols(ranges1) <- featureDF1 names(ranges1) <- rownames(featureDF1) rowRanges(seTrajectory1) <- ranges1 - rm(ranges1) if("strand" %in% colnames(featureDF2)){ ranges2 <- GRanges( @@ -492,11 +491,12 @@ correlateTrajectories <- function( #mcols(ranges2) <- featureDF2 names(ranges2) <- rownames(featureDF2) rowRanges(seTrajectory2) <- ranges2 - rm(ranges2) .logThis(ranges1, "ranges1", logFile = logFile) .logThis(ranges2, "ranges2", logFile = logFile) - + rm(ranges1) + rm(ranges2) + #Find Associations to test isStranded1 <- any(as.integer(strand(seTrajectory1)) == 2) isStranded2 <- any(as.integer(strand(seTrajectory2)) == 2) From 84c8ff41940d8ce1ab2efa8237f3376a257a331d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 18 Nov 2021 14:39:50 -0800 Subject: [PATCH 073/162] address boundary case where chr length is a multiple of num tiles in response to https://github.com/GreenleafLab/ArchR/issues/1163 --- R/GroupExport.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/GroupExport.R b/R/GroupExport.R index 9102c6f3..fdb15a07 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -328,7 +328,10 @@ getGroupBW <- function( }else{ #N Tiles - nTiles <- trunc(chromLengths[availableChr[k]] / tileSize) + 1 + nTiles <- chromLengths[availableChr[k]] / tileSize + if (nTiles%%1 != 0) { + nTiles <- trunc(nTiles) + 1 + } #Create Sparse Matrix matchID <- S4Vectors::match(mcols(fragik)$RG, cellGroupi) From 2291f019b8ed3753fbf3d9d4d808a61163536340 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 18 Nov 2021 15:24:30 -0800 Subject: [PATCH 074/162] add check for success for file download --- R/InputData.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/InputData.R b/R/InputData.R index 178572e8..1e12c11e 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -36,6 +36,10 @@ getTutorialData <- function( ) }, threads = min(threads, length(filesUrl))) + #check for success of file download + if(!all(unlist(downloadFiles) == 0)) { + stop("Error! Some tutorial files did not download successfully. Please try again.") + } } pathFragments <- "HemeFragments" From 41e621b8b6960300e0ef93478baf1313edc071f9 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 09:06:34 -0800 Subject: [PATCH 075/162] update loading of parallel to use native ArchR .requirePackage --- R/HiddenUtils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 19eccbc3..57fdad35 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -393,7 +393,7 @@ } if(threads > 1){ - require(parallel) + .requirePackage("parallel", source = "cran") o <- mclapply(..., mc.cores = threads, mc.preschedule = preschedule) errorMsg <- list() From 5e489a3b765b38d14a000ed36fa74119f43b8102 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 09:24:15 -0800 Subject: [PATCH 076/162] Sort SE containing rowRanges prior to return In response to https://github.com/GreenleafLab/ArchR/issues/1148 --- R/ArrowRead.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 55d12c41..0d9c786f 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -357,6 +357,7 @@ getMatrixFromProject <- function( .logDiffTime("Constructing SummarizedExperiment", t1 = tstart, verbose = verbose, logFile = logFile) if(!is.null(rR1)){ se <- SummarizedExperiment(assays = asy, colData = cD, rowRanges = rR1) + se <- sort(se) }else{ se <- SummarizedExperiment(assays = asy, colData = cD, rowData = rD1) } From c4296eea67d3a6dc771a4d9e33256c3443a38c2f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 10:29:30 -0800 Subject: [PATCH 077/162] define prefix in response to https://github.com/GreenleafLab/ArchR/issues/586 --- R/MatrixFeatures.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/MatrixFeatures.R b/R/MatrixFeatures.R index a13730ae..15e50137 100644 --- a/R/MatrixFeatures.R +++ b/R/MatrixFeatures.R @@ -265,6 +265,8 @@ addPeakMatrix <- function( for(z in seq_along(uniqueChr)){ + prefix <- sprintf("Chr %s (%s of %s)!", uniqueChr[z], z, length(uniqueChr)) + o <- tryCatch({ o <- h5closeAll() From 48dccf361bab6d9191a4d9b61e1317acfa71167c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 22 Nov 2021 08:42:54 -0800 Subject: [PATCH 078/162] fix typo in check for isDeviations Supposed to be fixed in https://github.com/GreenleafLab/ArchR/issues/78 but "dev" was supposed to be "deviations" raised again in https://github.com/GreenleafLab/ArchR/discussions/1177 --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7816e1bb..56c95e8a 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -129,7 +129,7 @@ getMarkerFeatures <- function( .logThis(range(as.vector(table(paste0(featureDF$seqnames)))), "FeaturesPerSeqnames", logFile = logFile) isDeviations <- FALSE - if(all(unique(paste0(featureDF$seqnames)) %in% c("z", "dev"))){ + if(all(unique(paste0(featureDF$seqnames)) %in% c("z", "deviations"))){ isDeviations <- TRUE } From 63e7883586a013b9a9c264ddb19dea18d9287087 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 2 Dec 2021 13:52:25 -0800 Subject: [PATCH 079/162] revert change to ylim Previous commit https://github.com/GreenleafLab/ArchR/commit/8caa716086832c2560b867b6e4a0d1bab48716e1 was not correct and broke the ArchRBrowser as detailed in https://github.com/GreenleafLab/ArchR/issues/1206 This commit reverts that change and also updates the param definition for `ylim` to make it clear how this argument should be used. --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 41be35fc..41be2a83 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -665,7 +665,7 @@ ArchRBrowserTrack <- function(...){ #' @param normMethod The name of the column in `cellColData` by which normalization should be performed. The recommended and default value #' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. #' @param threads The number of threads to use for parallel execution. -#' @param ylim The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. If not provided, the y-axis limit will be c(0, 0.999). +#' @param ylim The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as `c(lower limit, upper limit)` such as `c(0,0.99)`. If not provided, the y-axis limit will be c(0, 0.999). #' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override coloring for groups. #' @param baseSize The numeric font size to be used in the plot. This applies to all plot labels. #' @param scTileSize The width of the tiles in scTracks. Larger numbers may make cells overlap more. Default is 0.5 for about 100 cells. @@ -997,7 +997,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- c(0,quantile(df$y, probs=c(ylim))) + ylim <- quantile(df$y, ylim) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From fdfce2d27949be94a4fb23c9fc2982532a63fe94 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 09:56:29 -0800 Subject: [PATCH 080/162] update no cell overlap error message --- R/MatrixGeneExpression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 1b432e45..fcf7348f 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -65,7 +65,7 @@ addGeneExpressionMatrix <- function( .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ - stop("No overlap found with scATAC!") + stop("No overlapping cell names found between ArrowFiles and seRNA object!") } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From 5350569e904ccc2a48e4ddc6c13acbf220f3ad23 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 09:57:01 -0800 Subject: [PATCH 081/162] update overlap error message --- R/MatrixGeneExpression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index fcf7348f..7d91a480 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -65,7 +65,7 @@ addGeneExpressionMatrix <- function( .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ - stop("No overlapping cell names found between ArrowFiles and seRNA object!") + stop("No overlapping cell names found between ArrowFiles and seRNA object! Cell names in ArrowFiles must match colnames in seRNA!") } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From e5752763cec99c686144d1f07853004e686c5906 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 10:07:21 -0800 Subject: [PATCH 082/162] fix processing multiple RNA inputs In response to https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index ac20df74..b645d046 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -17,21 +17,43 @@ import10xFeatureMatrix <- function( featureType = "Gene Expression" ){ - if(!all(file.exists(input))){ + .validInput(input = input, name = "input", valid = c("character")) + .validInput(input = names, name = "names", valid = c("character")) + .validInput(input = featureType, name = "featureType", valid = c("character")) + + if (!all(file.exists(input))) { stop("Not all input file paths exist!") } - - featureMats <- lapply(seq_along(input), function(y){ + featureMats <- lapply(seq_along(input), function(y) { message("Importing Feature Matrix ", y, " of ", length(input)) - .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) + ArchR:::.importFM(featureMatrix = input[y], featureType = featureType, + name = names[y]) }) - featureMats <- tryCatch({ - Reduce("cbind", featureMats) - }, error = function(e){ - message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") - featureMats - }) + #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects + if(length(featureMats) > 1) { + featureMats <- tryCatch({ + rse_final <- featureMats[[1]] + #for each element of the list, test to make sure each SE attribute is identical + for(i in 2:length(featureMats)){ + + if(!all.equal(rownames(featureMats[[1]]),rownames(featureMats[[i]]))) { + stop("Error - rownames (genes) of individual RNA objects are not equivalent.") + } + if(!all.equal(rowData(featureMats[[1]]),rowData(featureMats[[i]]))) { + stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + } + if(!all.equal(names(assays(featureMats[[1]])),names(assays(featureMats[[i]])))) { + stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") + } + rse_final <- cbind(rse_final,featureMats[[i]]) + } + rse_final + }, error = function(e) { + message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") + featureMats + }) + } featureMats From 705e9f5b59b70cbe406d8f708d533813d1e8a4cb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 10:13:47 -0800 Subject: [PATCH 083/162] remove unnecessary ArchR::: reference --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b645d046..db257fc9 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -26,7 +26,7 @@ import10xFeatureMatrix <- function( } featureMats <- lapply(seq_along(input), function(y) { message("Importing Feature Matrix ", y, " of ", length(input)) - ArchR:::.importFM(featureMatrix = input[y], featureType = featureType, + .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) }) From c95add0b8ffbc10efb3d5e072c00c6dfde7747c7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 09:16:12 -0800 Subject: [PATCH 084/162] remove try catch and improve error handling --- R/MultiModal.R | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index db257fc9..b77ae8ff 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -31,32 +31,26 @@ import10xFeatureMatrix <- function( }) #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if(length(featureMats) > 1) { - featureMats <- tryCatch({ - rse_final <- featureMats[[1]] - #for each element of the list, test to make sure each SE attribute is identical - for(i in 2:length(featureMats)){ - - if(!all.equal(rownames(featureMats[[1]]),rownames(featureMats[[i]]))) { - stop("Error - rownames (genes) of individual RNA objects are not equivalent.") - } - if(!all.equal(rowData(featureMats[[1]]),rowData(featureMats[[i]]))) { - stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") - } - if(!all.equal(names(assays(featureMats[[1]])),names(assays(featureMats[[i]])))) { - stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") - } - rse_final <- cbind(rse_final,featureMats[[i]]) + if (length(featureMats) > 1) { + rse_final <- featureMats[[1]] + for (i in 2:length(featureMats)) { + if (!all.equal(rownames(featureMats[[1]]), rownames(featureMats[[i]]))) { + stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - rse_final - }, error = function(e) { - message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") - featureMats - }) + if (!all.equal(rowData(featureMats[[1]]), rowData(featureMats[[i]]))) { + stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + } + if (!all.equal(names(assays(featureMats[[1]])), + names(assays(featureMats[[i]])))) { + stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") + } + rse_final <- cbind(rse_final, featureMats[[i]]) + } + return(rse_final) + } else { + return(featureMats) } - featureMats - } .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ From 972aa83d7b4214fa50a0bccc1074ba9324d2ac4f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:15:12 -0800 Subject: [PATCH 085/162] Update MultiModal.R --- R/MultiModal.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b77ae8ff..34473126 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -34,23 +34,22 @@ import10xFeatureMatrix <- function( if (length(featureMats) > 1) { rse_final <- featureMats[[1]] for (i in 2:length(featureMats)) { - if (!all.equal(rownames(featureMats[[1]]), rownames(featureMats[[i]]))) { + if (!all.equal(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - if (!all.equal(rowData(featureMats[[1]]), rowData(featureMats[[i]]))) { + if (!all.equal(rowData(rse_final), rowData(featureMats[[i]]))) { stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") } - if (!all.equal(names(assays(featureMats[[1]])), - names(assays(featureMats[[i]])))) { + if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } rse_final <- cbind(rse_final, featureMats[[i]]) } return(rse_final) - } else { + } + else { return(featureMats) } - } .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ From 99248d6df8a6ca11904680f49ffbc0d669550a37 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:32:02 -0800 Subject: [PATCH 086/162] change from cbind to combineCols if desired later, combineCols would allow combination of RSEs with different rows but I think its better to enforce equality before merge. --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 34473126..b2634c97 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -43,7 +43,7 @@ import10xFeatureMatrix <- function( if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- cbind(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::combineCols(rse_final, featureMats[[i]]) } return(rse_final) } From fa9c1b7bfb010c762c02546be7de1d92853463fc Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:35:57 -0800 Subject: [PATCH 087/162] revert back to cbind but force SummarizedExperiment --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b2634c97..5994f2d6 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -43,7 +43,7 @@ import10xFeatureMatrix <- function( if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- SummarizedExperiment::combineCols(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 09864ada3c37724894ea077018465bf4bd41d006 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 21 Feb 2022 20:46:40 -0800 Subject: [PATCH 088/162] improve stop message for chr not in AnnoFile --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 4a6c473c..2a9f4999 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -787,7 +787,7 @@ addArchRAnnotations <- function( } if(chr %ni% .availableSeqnames(AnnoFile, Group)){ - stop("Error Chromosome not in AnnoFile!") + stop(paste("Error! Chromosome ",chr," not in AnnoFile!")) } o <- h5closeAll() From 4a56079c57f9f672290832457a0e190df0b783fb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 8 Mar 2022 12:33:22 -0800 Subject: [PATCH 089/162] fix multi-sample handling and mismatch reporting in response to https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 5994f2d6..665d9fc4 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -31,19 +31,40 @@ import10xFeatureMatrix <- function( }) #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if (length(featureMats) > 1) { + if (length(featureMats) > 1) { rse_final <- featureMats[[1]] for (i in 2:length(featureMats)) { - if (!all.equal(rownames(rse_final), rownames(featureMats[[i]]))) { + print(paste0("featureMats[[",i,"]]")) + if (!identical(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - if (!all.equal(rowData(rse_final), rowData(featureMats[[i]]))) { - stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + if (!identical(colnames(rowData(rse_final)), colnames(rowData(featureMats[[i]])))) { + stop("Error - rowData (gene metadata) of individual RNA objects have different columns. This is highly unusual and merging has been aborted.") } - if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { + if (!identical(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) + + #check each column in rowData to check for mismatches that should be thrown as warnings + #occasionally, it seems like 10x is annotating different ensembl IDs to the same gene which seems like a bad way to go + #this is a bit heavy-handed but it seems like the safest thing to do is report any mismatch rather than merge blindly + mismatchWarning <- TRUE + for (x in 1:ncol(rowData(rse_final))) { + if (!identical(rowData(rse_final)[,x], rowData(featureMats[[i]])[,x])) { + if(mismatchWarning) { + message(sprintf("Warning! Some values within column \"%s\" the rowData of your objects do not precisely match!", colnames(rowData(rse_final))[x])) + message("This is often caused by slight variations in Ensembl IDs used by cellranger. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") + mismatchWarning <- FALSE + } + + mismatch <- which(rowData(rse_final)[,x] != rowData(featureMats[[i]])[,x]) + for (y in 1:length(mismatch)) { + message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + } + } + } + + rse_final <- cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 8ddd94315b3ba5bc2b7590d825f438ce1e79ed9a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 8 Mar 2022 21:34:03 -0800 Subject: [PATCH 090/162] fix cbind to explicit reference SummarizedExperiment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit avoid error - unable to find an inherited method for function ‘bindCOLS’ for signature ‘"RangedSummarizedExperiment"’ --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 665d9fc4..4d26371a 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -64,7 +64,7 @@ import10xFeatureMatrix <- function( } } - rse_final <- cbind(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 8f05eef02685c5dd37e3ecadab725daadb71ea1c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 08:40:48 -0800 Subject: [PATCH 091/162] Fix handling of multiple input samples see https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 69 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 4d26371a..24b25f4d 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -8,17 +8,27 @@ #' #' @param input A character of paths to 10x feature hdf5 file(s). These will traditionally have a suffix similar to "filtered_feature_bc_matrix.h5". #' @param names A character of sample names associated with each input file. +#' @param strictMatch Only relevant when multiple input files are used. A boolean that indictes whether rows (genes) that do not match perfectly in the matrices +#' should be removed (`strictMatch = TRUE`) or coerced (`strictMatch = FALSE`). CellRanger seems to occassionally use different ensembl ids for the same gene across +#' different samples. If you are comfortable tolerating such mismatches, you can coerce all matrices to fit together, in which case the gene metadata present in +#' the first listed sample will be applied to all matrices for that particular gene entry. Regardless of what value is used for `strictMatch`, this function +#' cannot tolerate mismatched gene names, only mismatched metadata for the same gene. +#' @param verbose Only relevant when multiple input files are used. A boolean that indicates whether messaging about mismatches should be verbose (`TRUE`) or minimal (`FALSE`) #' @param featureType The name of the feature to extract from the 10x feature file. #' See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information. #' @export import10xFeatureMatrix <- function( input = NULL, - names = NULL, + names = NULL, + strictMatch = TRUE, + verbose = TRUE, featureType = "Gene Expression" - ){ - +){ + .validInput(input = input, name = "input", valid = c("character")) .validInput(input = names, name = "names", valid = c("character")) + .validInput(input = strictMatch, name = "strictMatch", valid = c("boolean")) + .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = featureType, name = "featureType", valid = c("character")) if (!all(file.exists(input))) { @@ -29,12 +39,26 @@ import10xFeatureMatrix <- function( .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) }) - + + message("Re-ordering RNA matricies alphabetically for consistency.") + for(j in 1:length(featureMats)) { + featureMats[[j]] <- featureMats[[j]][order(rownames(featureMats[[j]])),] + } + #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if (length(featureMats) > 1) { + if (length(featureMats) > 1) { + message("Merging individual RNA objects...") + #make the first matrix the base matrix and merge all others into it rse_final <- featureMats[[1]] + + rowsToRemove <- c() #rows that have previously been removed from rse_final + + #for each additional feature matrix (starting with the second), look for mismatches with rse_final and merge accordingly for (i in 2:length(featureMats)) { - print(paste0("featureMats[[",i,"]]")) + mismatchWarning <- TRUE #a boolean to prevent output of the warning message many times and only output it once + + message(sprintf("\nMerging %s", names[i])) + if (!identical(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } @@ -48,24 +72,46 @@ import10xFeatureMatrix <- function( #check each column in rowData to check for mismatches that should be thrown as warnings #occasionally, it seems like 10x is annotating different ensembl IDs to the same gene which seems like a bad way to go #this is a bit heavy-handed but it seems like the safest thing to do is report any mismatch rather than merge blindly - mismatchWarning <- TRUE + for (x in 1:ncol(rowData(rse_final))) { if (!identical(rowData(rse_final)[,x], rowData(featureMats[[i]])[,x])) { if(mismatchWarning) { - message(sprintf("Warning! Some values within column \"%s\" the rowData of your objects do not precisely match!", colnames(rowData(rse_final))[x])) - message("This is often caused by slight variations in Ensembl IDs used by cellranger. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") + message(sprintf("Warning! Some values within column \"%s\" of the rowData (gene metadata) of your objects do not precisely match!", colnames(rowData(rse_final))[x])) + message("This is often caused by slight variations in Ensembl IDs and gene locations used by cellranger across different samples. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") mismatchWarning <- FALSE } + #detect all of the mismatches betwenn rse_final and the current featureMat mismatch <- which(rowData(rse_final)[,x] != rowData(featureMats[[i]])[,x]) + #for each detected mismatch, handle the mismatch according to the value of strictMatch for (y in 1:length(mismatch)) { - message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + if (verbose) { + message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + } + if (strictMatch) { + if (verbose) { + message("strictMatch = TRUE so the corresponding gene entry with mismatching information will be removed.") + } + rowsToRemove <- unique(c(rowsToRemove, mismatch[y])) + #temporarily force the data to match so that merging can occur easily. Mismatched rows will be removed later + rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + } else { + if (verbose) { + message("strictMatch = FALSE so mismatching information will be coerced to match the first sample provided.") + } + rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + } } } } - + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } + if (strictMatch) { + rse_final <- rse_final[-rowsToRemove,] + } return(rse_final) } else { @@ -73,6 +119,7 @@ import10xFeatureMatrix <- function( } } + .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ o <- h5closeAll() From f31660dfb4b76c70792c9074213f2add0a77e3c3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 08:59:13 -0800 Subject: [PATCH 092/162] explicitly call GenomicRanges::resize To avoid conflicst with the `webshot` package proposed in https://github.com/GreenleafLab/ArchR/issues/1324 --- R/ReproduciblePeakSet.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 21fe8c11..75eaf1d1 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -598,7 +598,7 @@ addReproduciblePeakSet <- function( #Validate peaks <- .validGRanges(peaks) - peakSummits <- resize(peaks,1,"center") + peakSummits <- GenomicRanges::resize(peaks,1,"center") geneAnnotation$genes <- .validGRanges(geneAnnotation$genes) geneAnnotation$exons <- .validGRanges(geneAnnotation$exons) geneAnnotation$TSS <- .validGRanges(geneAnnotation$TSS) @@ -606,11 +606,11 @@ addReproduciblePeakSet <- function( #First Lets Get Distance to Nearest Gene Start .logMessage("Annotating Peaks : Nearest Gene", logFile = logFile) - distPeaks <- distanceToNearest(peakSummits, resize(geneAnnotation$genes, 1, "start"), ignore.strand = TRUE) + distPeaks <- distanceToNearest(peakSummits, GenomicRanges::resize(geneAnnotation$genes, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToGeneStart <- mcols(distPeaks)$distance mcols(peaks)$nearestGene <- mcols(geneAnnotation$genes)$symbol[subjectHits(distPeaks)] .logMessage("Annotating Peaks : Gene", logFile = logFile) - promoters <- extendGR(resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2]) + promoters <- extendGR(GenomicRanges::resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2]) op <- overlapsAny(peakSummits, promoters, ignore.strand = TRUE) og <- overlapsAny(peakSummits, geneAnnotation$genes, ignore.strand = TRUE) oe <- overlapsAny(peakSummits, geneAnnotation$exons, ignore.strand = TRUE) @@ -622,7 +622,7 @@ addReproduciblePeakSet <- function( #First Lets Get Distance to Nearest TSS's .logMessage("Annotating Peaks : TSS", logFile = logFile) - distTSS <- distanceToNearest(peakSummits, resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) + distTSS <- distanceToNearest(peakSummits, GenomicRanges::resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToTSS <- mcols(distTSS)$distance if("symbol" %in% colnames(mcols(geneAnnotation$TSS))){ mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distTSS)] @@ -663,7 +663,7 @@ addReproduciblePeakSet <- function( summits <- Reduce("c", as(summits, "GRangesList")) .logMessage(paste0(prefix, " Extending Summits"), logFile = logFile) - extendedSummits <- resize(summits, extendSummits * 2 + 1, "center") + extendedSummits <- GenomicRanges::resize(summits, extendSummits * 2 + 1, "center") extendedSummits <- lapply(split(extendedSummits, extendedSummits$GroupReplicate), function(x){ nonES <- nonOverlappingGR(x, by = "score", decreasing = TRUE) nonES$replicateScoreQuantile <- round(.getQuantiles(nonES$score),3) From b441c88a2e0528d38f40555240d0c177339d0a8c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:01:10 -0800 Subject: [PATCH 093/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/MatrixGeneScores.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index 4957141a..1d1d10b3 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -221,9 +221,9 @@ addGeneScoreMatrix <- function( if(useTSS){ .logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = TRUE")) distMethod <- "GenePromoter" - geneRegions$geneStart <- start(resize(geneRegions, 1, "start")) - geneRegions$geneEnd <- start(resize(geneRegions, 1, "end")) - geneRegions <- resize(geneRegions, 1, "start") + geneRegions$geneStart <- start(GenomicRanges::resize(geneRegions, 1, "start")) + geneRegions$geneEnd <- start(GenomicRanges::resize(geneRegions, 1, "end")) + geneRegions <- GenomicRanges::resize(geneRegions, 1, "start") if(extendTSS){ geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream) } @@ -231,8 +231,8 @@ addGeneScoreMatrix <- function( }else{ .logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = FALSE")) distMethod <- "GeneBody" - geneRegions$geneStart <- start(resize(geneRegions, 1, "start")) - geneRegions$geneEnd <- start(resize(geneRegions, 1, "end")) + geneRegions$geneStart <- start(GenomicRanges::resize(geneRegions, 1, "start")) + geneRegions$geneEnd <- start(GenomicRanges::resize(geneRegions, 1, "end")) geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream) m <- 1 / width(geneRegions) geneRegions$geneWeight <- 1 + m * (geneScaleFactor - 1) / (max(m) - min(m)) @@ -317,8 +317,8 @@ addGeneScoreMatrix <- function( #Time to Overlap Gene Windows if(useGeneBoundaries){ - geneStartz <- start(resize(geneRegionz, 1, "start")) - geneEndz <- start(resize(geneRegionz, 1, "end")) + geneStartz <- start(GenomicRanges::resize(geneRegionz, 1, "start")) + geneEndz <- start(GenomicRanges::resize(geneRegionz, 1, "end")) pminGene <- pmin(geneStartz, geneEndz) pmaxGene <- pmax(geneStartz, geneEndz) @@ -380,7 +380,7 @@ addGeneScoreMatrix <- function( #Determine Sign for Distance relative to strand (Directionality determined based on dist from gene start) isMinus <- BiocGenerics::which(strand(geneRegionz) == "-") - signDist <- sign(start(uniqueTiles)[subjectHits(tmp)] - start(resize(geneRegionz,1,"start"))[queryHits(tmp)]) + signDist <- sign(start(uniqueTiles)[subjectHits(tmp)] - start(GenomicRanges::resize(geneRegionz,1,"start"))[queryHits(tmp)]) signDist[isMinus] <- signDist[isMinus] * -1 #Correct the orientation for the distance! From 394f7ccd917a8fbc0757679961bcbf1d50857cdd Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:02:05 -0800 Subject: [PATCH 094/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/QualityControl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/QualityControl.R b/R/QualityControl.R index 20c83f30..0c76ee36 100644 --- a/R/QualityControl.R +++ b/R/QualityControl.R @@ -48,7 +48,7 @@ plotTSSEnrichment <- function( chr <- paste0(seqnames(chromSizes)) chr <- gtools::mixedsort(intersect(chr, paste0(seqnames(TSS)))) TSS <- sort(sortSeqlevels(TSS)) - splitTSS <- split(resize(TSS,1,"start"), seqnames(TSS))[chr] + splitTSS <- split(GenomicRanges::resize(TSS,1,"start"), seqnames(TSS))[chr] window <- 2 * flank + 1 groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = FALSE) uniqGroups <- gtools::mixedsort(unique(groups[,1])) From 4803ee9f6e70bea93b70998c863a1deaade9e713 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:03:10 -0800 Subject: [PATCH 095/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/Footprinting.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index c9da4bea..e02afd22 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -221,7 +221,7 @@ getFootprints <- function( footprintDF <- lapply(seq_along(featureList), function(x){ outx <- tryCatch({ - featurex <- split(resize(featureList[[x]],1,"center"), seqnames(featureList[[x]])) + featurex <- split(GenomicRanges::resize(featureList[[x]],1,"center"), seqnames(featureList[[x]])) intSeq <- intersect(names(featurex), names(cov)) if(length(intSeq)==0){ .logMessage(paste0("No intersecting chromsomes for feature ", names(featureList)[x], "!")) @@ -292,7 +292,7 @@ getFootprints <- function( kmerList <- .safelapply(seq_along(featureList), function(i){ .logDiffTime(sprintf("Computing Kmer Tables for %s of %s features", i, length(featureList)), tstart, verbose=verbose, logFile = logFile) - bsv <- BSgenomeViews(genome , resize(featureList[[i]], window + k, "center")) + bsv <- BSgenomeViews(genome, GenomicRanges::resize(featureList[[i]], window + k, "center")) bsv <- bsv[width(bsv) == window + k] #none that are trimmed! #BSgenome is already stranded #kmerPositionFrequencyCpp is Rcpp export for getting kmer position frequencies from strings From f208a345d2e07606f0b89acb44dd5c5d86a07a78 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:04:03 -0800 Subject: [PATCH 096/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/AnnotationGenome.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 876828d7..9df316fc 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -8,7 +8,6 @@ #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. #' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, #' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( @@ -171,7 +170,7 @@ createGeneAnnotation <- function( ########################### message("Getting TSS..") - TSS <- unique(resize(GenomicFeatures::transcripts(TxDb), width = 1, fix = "start")) + TSS <- unique(GenomicRanges::resize(GenomicFeatures::transcripts(TxDb), width = 1, fix = "start")) if(!is.null(inGenes)){ genes <- .validGRanges(inGenes) From c57440498fdb1a042702eb3fac21bc6d58af37e2 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:04:51 -0800 Subject: [PATCH 097/162] fix deleted line --- R/AnnotationGenome.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 9df316fc..e76026de 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -8,6 +8,7 @@ #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. #' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. +#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, #' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( From 6dd1e30df8bbfbc3d51b789fa6d4b1eed5c582fe Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:06:12 -0800 Subject: [PATCH 098/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/CreateArrow.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 300cb7c8..0fa4335a 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -580,7 +580,7 @@ createArrowFiles <- function( featureList <- list() featureList$Promoter <- extendGR( - gr = resize(geneAnnotation$genes, 1, "start"), + gr = GenomicRanges::resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2] ) @@ -852,10 +852,10 @@ createArrowFiles <- function( } #Create Window and Flank - TSS <- resize(TSS, 1, fix = "start") + TSS <- GenomicRanges::resize(TSS, 1, fix = "start") strand(TSS) <- "*" TSS <- unique(TSS) - tssWindow <- resize(TSS, window, "center") + tssWindow <- GenomicRanges::resize(TSS, window, "center") tssWindow$type <- "window" tssFlank <- c( #Positive Flank From 0023165538d737735112f6ae4a942bde5bb30483 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:07:18 -0800 Subject: [PATCH 099/162] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 41be2a83..c19333a6 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -292,7 +292,7 @@ ArchRBrowser <- function( region <- region[which(tolower(mcols(region)$symbol) %in% tolower(input$name))] region <- region[order(match(tolower(mcols(region)$symbol), tolower(input$name)))] - region1 <- resize(region, 1, "start") + region1 <- GenomicRanges::resize(region, 1, "start") strand(region1) <- "*" #Extend Region @@ -752,7 +752,7 @@ plotBrowserTrack <- function( region <- region[which(tolower(mcols(region)$symbol) %in% tolower(geneSymbol))] region <- region[order(match(tolower(mcols(region)$symbol), tolower(geneSymbol)))] print(region) - region <- resize(region, 1, "start") + region <- GenomicRanges::resize(region, 1, "start") strand(region) <- "*" region <- extendGR(region, upstream = upstream, downstream = downstream) } From 7c6690b6064bad593a25a02465fa82630ad908d5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:14:13 -0800 Subject: [PATCH 100/162] remove NULL as option for scaleDims https://github.com/GreenleafLab/ArchR/issues/1314 --- R/IterativeLSI.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index d708df14..f8b75a15 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -24,8 +24,7 @@ #' Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logidf". #' @param scaleDims A boolean that indicates whether to z-score the reduced dimensions for each cell. This is useful forminimizing the contribution #' of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific biases since -#' it is over-weighting latent PCs. If set to `NULL` this will scale the dimensions based on the value of `scaleDims` when the `reducedDims` were -#' originally created during dimensionality reduction. This idea was introduced by Timothy Stuart. +#' it is over-weighting latent PCs. #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to #' sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param binarize A boolean value indicating whether the matrix should be binarized before running LSI. This is often desired when working with insertion counts. @@ -117,7 +116,7 @@ addIterativeLSI <- function( .validInput(input = varFeatures, name = "varFeatures", valid = c("integer")) .validInput(input = dimsToUse, name = "dimsToUse", valid = c("integer")) .validInput(input = LSIMethod, name = "LSIMethod", valid = c("integer", "character")) - .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) + .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric")) .validInput(input = binarize, name = "binarize", valid = c("boolean")) .validInput(input = outlierQuantiles, name = "outlierQuantiles", valid = c("numeric", "null")) From 6bde881b2127e8ce81632ed8b451314c569c47cd Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 12 Mar 2022 09:39:08 -0800 Subject: [PATCH 101/162] fix footprinting crashes bc of ggrepel addressing https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 --- R/Footprinting.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index e02afd22..9d62449f 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -580,8 +580,9 @@ plotFootprints <- function( xlim = c(min(plotFootDF$x),max(plotFootDF$x)) ) + theme_ArchR(baseSize = baseSize) + ggtitle(name) + guides(fill = FALSE) + - guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + - ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) + guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + #removed ggrepel due to incompatibility with coord_cartesian - see https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 + #ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) ggBias <- ggplot(plotBiasDF, aes(x = x, y = mean, color = group)) + geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, linetype = NA, fill = group), alpha = 0.4) + From d28fa1d61fd3a3444a01721785b6f32bce8cbf48 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 12 Mar 2022 10:13:00 -0800 Subject: [PATCH 102/162] set drop=FALSE in matrix operations addressing https://github.com/GreenleafLab/ArchR/issues/1325 set drop=FALSE to prevent conversion to a vector this would happen if a chr had only one feature on it --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 0d9c786f..4129242c 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -922,7 +922,7 @@ getMatrixFromArrow <- function( #Check if samples have NAs due to N = 1 sample or some other weird thing. #Set it to min non NA variance dfVars <- lapply(seq_len(nrow(dfVars)), function(x){ - vx <- dfVars[x, ] + vx <- dfVars[x, , drop = FALSE] if(any(is.na(vx))){ vx[is.na(vx)] <- min(vx[!is.na(vx)]) } From f7a0f74cd7f9dc98a38f81eca6b25233faea522b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sun, 13 Mar 2022 10:34:20 -0700 Subject: [PATCH 103/162] fix deprecated ggplot calls and ggridges xlim addressing https://github.com/GreenleafLab/ArchR/issues/1249 --- R/GgplotUtils.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 609caa08..90fd6447 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -699,8 +699,8 @@ ggGroup <- function( df$x <- factor(df$x, groupOrder) p <- ggplot(df, aes(x = x, y = y, color = x)) + - scale_color_manual(values = pal, guide = FALSE) + - scale_fill_manual(values = pal, guide = FALSE) + + scale_color_manual(values = pal, guide = "none") + + scale_fill_manual(values = pal, guide = "none") + ggtitle(title) if(tolower(plotAs) == "ridges" | tolower(plotAs) == "ggridges"){ @@ -728,7 +728,9 @@ ggGroup <- function( val <- 1/length(unique(x)) p <- p + geom_density_ridges(data = df, aes(x = y, y = x, color = x, fill = x), scale = ridgeScale, - alpha = alpha, color = "black") + scale_y_discrete(expand = expand_scale(mult = c(0.01, val))) + alpha = alpha, color = "black") + scale_y_discrete(expand = expansion(mult = c(0.01, val))) + xmax <- layer_scales(p)$x$range$range[2] + p <- p + xlim(0, xmax) } }else{ type <- "violin" From 8f2ec3a0df1954dd4334ef6cf5caedeb794441f5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sun, 13 Mar 2022 11:05:28 -0700 Subject: [PATCH 104/162] revert xlim change I decided against implementing an xlim cutoff because there are situations where users may want negative values shown and it doesnt seem worth the gain to add an extra argument to handle this edge case. https://github.com/GreenleafLab/ArchR/issues/1249 --- R/GgplotUtils.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 90fd6447..4d4c31ea 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -729,8 +729,6 @@ ggGroup <- function( p <- p + geom_density_ridges(data = df, aes(x = y, y = x, color = x, fill = x), scale = ridgeScale, alpha = alpha, color = "black") + scale_y_discrete(expand = expansion(mult = c(0.01, val))) - xmax <- layer_scales(p)$x$range$range[2] - p <- p + xlim(0, xmax) } }else{ type <- "violin" From 6a005c47ddc4682cd4916cc98fb0de2c021953c6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 14 Mar 2022 21:37:34 -0700 Subject: [PATCH 105/162] bugfixes problems when only one sample is provided, return value was a list also when rowsToRemove was NULL, this caused errors --- R/MultiModal.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 24b25f4d..2ff1e963 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -110,12 +110,14 @@ import10xFeatureMatrix <- function( rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } if (strictMatch) { - rse_final <- rse_final[-rowsToRemove,] + if(length(rowsToRemove) > 0) { + rse_final <- rse_final[-rowsToRemove,] + } } return(rse_final) } else { - return(featureMats) + return(featureMats[[1]]) } } From 849a59c2e728304c2fef15a057515aa93e36a964 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 15 Mar 2022 21:07:09 -0700 Subject: [PATCH 106/162] change how featureMats are sorted trying to fix https://github.com/GreenleafLab/ArchR/issues/507#issuecomment-1068558649 --- R/MultiModal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 2ff1e963..ed444ebb 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -40,9 +40,9 @@ import10xFeatureMatrix <- function( name = names[y]) }) - message("Re-ordering RNA matricies alphabetically for consistency.") + message("Re-ordering RNA matricies for consistency.") for(j in 1:length(featureMats)) { - featureMats[[j]] <- featureMats[[j]][order(rownames(featureMats[[j]])),] + featureMats[[j]] <- sort(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) } #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects From 42c8be39d003545bccab9450f53da6815809f7e6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 15 Mar 2022 21:32:30 -0700 Subject: [PATCH 107/162] specify sort function https://github.com/GreenleafLab/ArchR/issues/507#issuecomment-1068719393 --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index ed444ebb..6733d78c 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -42,7 +42,7 @@ import10xFeatureMatrix <- function( message("Re-ordering RNA matricies for consistency.") for(j in 1:length(featureMats)) { - featureMats[[j]] <- sort(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) + featureMats[[j]] <- sort.GenomicRanges(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) } #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects From 0aa874a05b2969c4796d30591f98c0e09937aa8a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Mar 2022 09:39:31 -0700 Subject: [PATCH 108/162] update comments --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index c19333a6..98a1e251 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -808,7 +808,7 @@ plotBrowserTrack <- function( } ########################################################## - # Bulk Tracks + # Single-cell Tracks ########################################################## if("sctrack" %in% tolower(plotSummary)){ .logDiffTime(sprintf("Adding SC Tracks (%s of %s)",x,length(region)), t1=tstart, verbose=verbose, logFile=logFile) @@ -850,7 +850,7 @@ plotBrowserTrack <- function( } ########################################################## - # Feature Tracks + # Loop Tracks ########################################################## if("looptrack" %in% tolower(plotSummary)){ if(!is.null(loops)){ From 420a3c2573fc07ad27ac4cf3fa911ceccc0176fb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 22 Mar 2022 13:11:08 -0700 Subject: [PATCH 109/162] update function param docs --- R/ArrowRead.R | 3 ++- R/Harmony.R | 2 ++ R/IntegrativeAnalysis.R | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 4129242c..8fbb09aa 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -250,7 +250,8 @@ getFragmentsFromArrow <- function( #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. #' @param useSeqnames A character vector of chromosome names to be used to subset the data matrix being obtained. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. -#' @param binarize A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts. +#' @param binarize A boolean value indicating whether the matrix should be binarized before return. +#' This is often desired when working with insertion counts. Note that if the matrix has already been binarized previously, this should be set to `TRUE`. #' @param logFile The path to a file to be used for logging ArchR output. #' @export getMatrixFromProject <- function( diff --git a/R/Harmony.R b/R/Harmony.R index 5a2f33ac..ca9fbabe 100644 --- a/R/Harmony.R +++ b/R/Harmony.R @@ -13,6 +13,8 @@ #' to sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param name The name to store harmony output as a `reducedDims` in the `ArchRProject` object. #' @param groupBy The name of the column in `cellColData` to use for grouping cells together for vars in harmony batch correction. +#' The value of `groupBy` is passed to the `vars_use` parameter in `harmony::HarmonyMatrix()`. When run through ArchR, this parameter +#' defines which variables to correct for during batch correction. See `harmony::HarmonyMatrix()` for more information. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param force A boolean value that indicates whether or not to overwrite data in a given column when the value passed to `name` already #' exists as a column name in `cellColData`. diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index f662630b..8695c83d 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -846,7 +846,8 @@ addCoAccessibility <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param corCutOff A numeric describing the minimum numeric peak-to-peak correlation to return. -#' @param resolution A numeric describing the bp resolution to return loops as. This helps with overplotting of correlated regions. +#' @param resolution A numeric describing the bp resolution to use when returning loops. This helps with overplotting of correlated regions. +#' This only takes affect if `returnLoops = TRUE`. #' @param returnLoops A boolean indicating to return the co-accessibility signal as a `GRanges` "loops" object designed for use with #' the `ArchRBrowser()` or as an `ArchRBrowserTrack()`. #' @export From 293d20fcd199eb3964d8e6606fd70923622bfdde Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 27 Mar 2022 21:16:59 -0700 Subject: [PATCH 110/162] Bug Fixes 20220327 Bug Fixes - Timeout for downloading tutorial/any url data in inputData - Fixed bug for GroupCoverages path not being updated after saveArchRProject --- .DS_Store | Bin 14340 -> 16388 bytes R/AllClasses.R | 54 +++++++++++++++---------------------------------- R/InputData.R | 25 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/.DS_Store b/.DS_Store index c201598da24f47b1678239b13fdff6ae6e5eb315..acd08dd80702b22ae611211269c826d0cd190fe5 100644 GIT binary patch delta 1000 zcmb7DO=}ZT6g_Wdl1!KZC$**)8W^$0h3SyADaM718nX!0R@%DKO`4R5rk!Fxpj4?~ zcP>;OF5I{&xU_`#3kvE#@F%zs*DhS?nE}U{annAy^X?nYx%Zy?-aXtrd@d#dh+8|o z8qlC?(t_QwfGW+kfs59g$!nR*{A&zXlz!xzMqcp5Wm>9W77bOlzv;r1kBe9D<*|eo z=5QN#xO-SfuRZX)s{?b5YlzLTskrR347*926JC_b} zm`58;W>~;7T1l?nB-e{&Ffo9SKFwpUUG!7JB!NC~Au8en1z6?eZjQ@vhs8`HD$=Gd&t6;8A(26`0J z#a1$!)@QUxxGa^w9E2^0#p>8%Jmlqgv`wCTJW8@dff1XL95!JG+eA*0>$55ASi=VY zc-W&qNbv);aefBP`M)hnLPObcXdtM=z?~E>cqI;KkHLUtkO4=F8M!8RNGUQiG6YTb zm+u$0Fg4OqFsRj0sJ1Y&&`~fnFfyO~QQdTNtjs~C&1Z}jGfn0ZP~Y4w(7?X2;U^;_ z*XAnl>W?_S^OcP6dC0QYM8gK(?S7euOEPT&A cncv0}>;)!>-z+Al*i7AgP*t4~%}^gk06_aZKL7v# diff --git a/R/AllClasses.R b/R/AllClasses.R index f87ff98c..be9c05e9 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -569,51 +569,29 @@ saveArchRProject <- function( newProj@imputeWeights <- SimpleList() } - #Copy Other Folders 2 layers nested + #Copy Recursively message("Copying Other Files...") for(i in seq_along(oldFiles)){ - - fin <- file.path(outDirOld, oldFiles[i]) - fout <- file.path(outputDirectory, oldFiles[i]) - message(sprintf("Copying Other Files (%s of %s): %s", i, length(oldFiles), basename(fin))) - - if(dir.exists(fin)){ - - dir.create(file.path(outputDirectory, basename(fin)), showWarnings=FALSE) - fin2 <- list.files(fin, full.names = TRUE) - - for(j in seq_along(fin2)){ - - if(dir.exists(fin2[j])){ - - dir.create(file.path(outputDirectory, basename(fin), basename(fin2)[j]), showWarnings=FALSE) - fin3 <- list.files(fin2[j], full.names = TRUE) - - for(k in seq_along(fin3)){ - - cf <- file.copy(fin3[k], file.path(fout, basename(fin3[k])), overwrite = overwrite) - - } - - }else{ - - cf <- file.copy(fin2[j], file.path(fout, basename(fin2[j])), overwrite = overwrite) - - } - - } - - }else{ - - cf <- file.copy(fin, fout, overwrite = overwrite) - - } - + message(sprintf("Copying Other Files (%s of %s): %s", i, length(oldFiles), oldFiles[i])) + oldPath <- file.path(outDirOld, oldFiles[i]) + file.copy(oldPath, outputDirectory, recursive=TRUE, overwrite=overwrite) } + #Set New Info newProj@sampleColData <- newProj@sampleColData[names(ArrowFilesNew), , drop = FALSE] newProj@sampleColData$ArrowFiles <- ArrowFilesNew[rownames(newProj@sampleColData)] + #Check for Group Coverages Copied + groupC <- length(newProj@projectMetadata$GroupCoverages) + if(length(groupC) > 0){ + for(z in seq_len(groupC)){ + zdata <- newProj@projectMetadata$GroupCoverages[[z]]$coverageMetadata + zfiles <- gsub(outDirOld, outputDirectory, zdata$File) + newProj@projectMetadata$GroupCoverages[[z]]$coverageMetadata$File <- zfiles + stopifnot(all(file.exists(zfiles))) + } + } + } message("Saving ArchRProject...") diff --git a/R/InputData.R b/R/InputData.R index 1e12c11e..e25e743e 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -17,6 +17,10 @@ getTutorialData <- function( .validInput(input = threads, name = "threads", valid = c("integer")) ######### + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + if(tolower(tutorial) %in% c("heme","hematopoiesis")){ if(!dir.exists("HemeFragments")){ @@ -49,6 +53,10 @@ getTutorialData <- function( } + #Set back URL Options + options(timeout=oldTimeout) + + #Return Fragment Files inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] @@ -62,14 +70,24 @@ getTutorialData <- function( #' #' @export getTestFragments <- function(x){ + + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + if(!file.exists("PBMCSmall.tsv.gz")){ download.file( url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.tsv.gz", destfile = "PBMCSmall.tsv.gz" ) } + #Set back URL Options + options(timeout=oldTimeout) + + #Add Genome Return Name Vector addArchRGenome("hg19test") c("PBMC" = "PBMCSmall.tsv.gz") + } #' Get PBMC Small Test Project @@ -78,6 +96,10 @@ getTestFragments <- function(x){ #' #' @export getTestProject <- function(){ + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + #Download if(!dir.exists("PBMCSmall")){ download.file( url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.zip", @@ -86,6 +108,9 @@ getTestProject <- function(){ unzip("PBMCSmall.zip", exdir = getwd()) file.remove("PBMCSmall.zip") } + #Set back URL Options + options(timeout=oldTimeout) + #Load addArchRGenome("hg19test") loadArchRProject("PBMCSmall") } From 506161d36d1457d4b3aba20918c0e4f622ae937e Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Tue, 29 Mar 2022 22:54:58 -0700 Subject: [PATCH 111/162] Bugfix https://github.com/GreenleafLab/ArchR/issues/252#issuecomment-796829243 Issue was due to C++ overwriting ranges after each iteration when using 1 core vs being copied for each core and not being a noticeable bug. This fix should be more stable for other functions using this as well. --- .DS_Store | Bin 16388 -> 16388 bytes DESCRIPTION | 2 +- R/QualityControl.R | 62 ++++++++++++++++++++++------------ man/ArchRBrowser.Rd | 3 +- man/addGroupCoverages.Rd | 6 ++++ man/addHarmony.Rd | 4 ++- man/addIterativeLSI.Rd | 3 +- man/addModuleScore.Rd | 24 +++++++++---- man/addPeak2GeneLinks.Rd | 2 ++ man/addReproduciblePeakSet.Rd | 2 +- man/addTileMatrix.Rd | 1 - man/createGenomeAnnotation.Rd | 5 +-- man/getCoAccessibility.Rd | 3 +- man/getGroupBW.Rd | 3 +- man/getMatches.Rd | 3 +- man/getMatrixFromProject.Rd | 10 ++++-- man/import10xFeatureMatrix.Rd | 10 ++++++ man/plotBrowserTrack.Rd | 5 +-- man/projectBulkATAC.Rd | 3 ++ src/Footprinting_utils.cpp | 3 ++ src/RcppExports.cpp | 5 +++ 21 files changed, 116 insertions(+), 43 deletions(-) diff --git a/.DS_Store b/.DS_Store index acd08dd80702b22ae611211269c826d0cd190fe5..6010e1e42cb6a5dfbfe254daf8f62afd56f32fb7 100644 GIT binary patch delta 2591 zcmeH{Yitx%6vyv>wkNRyN~X6+it6U(Uv}F3l#d`k@ zJWAP;G#FGAGf@OZjEY3m22%;*6F*=CkwmldRPYfKFp3frKz!br-K8Y>MWWx>4|DH5 zXL9$>`Q6_c8pO~b_U)uNqHIkW%rW)%SCkgzrFo0L)pd0BbP&Q_=;-Zo`}{>kr@-l=qGQJJf~c38 zTf_y*wuAxWaIC>%cVv$%E}vX8eb(&Y*tmF_kSHlDGjcNM9$wG_$wo9ta!IG(32VSbmhL+;X2 zQmHtBrwu}DpX~Lydpx0Fr8tRCK)%q{+0)%Ed;Gpoc#5RVbr|kb4_0fHq=oAL)VN%- ziycD^&X_4li+uMxy>ioPkL;zNh2sq@-cIASOrz81pYC$|d%WRfla{1zPoLZG^s^&0 zvtgBw=aqUXFR+^olY``aa)f+JPLNaNG&w`QB|nf$8LP8P@NJlR6 zFb)MM!DLinDylIZv(bdPn1?nv(E%4^bfXWeu@3zh!WKM=t#};I;bn~A4eZBzID(IH z44>eHj0?Dk@9_&4!;Rs1PT&$bBWGfG*^fhLB(32wJa85^8O7fl#=rER@Y`n06iZb6 z&eO4B;7Nv5^%hK+RDGlTAz%?kH}h@rx!1`{R1o*{xZL-;Wmn4sZjY-L~N%^G5B~ zR z8hGZ+O$J5cO+0_AKapg!n!ME?NA<=v+DG3&;9%eWfjLQOPqhaQljF>#^W-AAOs3UP=Bg&7uDVPiI_49ZZ0Sz(8ov4qvVQ?tkezvj^zY-BZmnALm>p2RLZj~DPF zUc)f<;dLBhHXUU?eU5WDkFW6!e#DiDxQc6>$VprhXW+8A9Ilv~#7*Psxp||YwAtd& zI`ZUNl%pv#M$X#*O2jVZY_U0NRzqW1q`E6khk9m`A=#Lw7P+FT_P>=VXfj(;tTw$} zX-_mMU*^RHv_OVFQ*Ep?I+VX#pQAQa8WT)0TTFTRR%_TR^=18dYl*GmT%Q- zUTL|B`pE**ibiQ0>8I*z1bSUyUa1?^O=BBHyb9Bey#?=q()7S7@)c{6m&u#TF*hGEJsVCX)xfSP8Et(*^_>SXCvpMQ+1WnoK+K685lGxfidp zR(Svi@fP03J2;HvIEgPpoD|s2_)b}4`|!@y{?}K#JT=<+zjIQ4OSRu2bX(0wIcv}P E8wIm`GXMYp delta 1669 zcmd^;?N3`(9LLYM6u1wRU1n%|OQ8=0s?e1_QrH*+!NFW7W@6?B&b73fx>6X$8K+p( z#)!@=+fk!t#HYj;)~AR=7q`fkIG0Rk98;H#z2L=6HVr0{X(Spw_xAc`e}ReT#Xb3* z-@W(zexLI>xk=qN5>OeM&il)lZm)= zgyYW}%2!FXHd!7}GF@W{_Kb6!y6n1H7Wy;EeF-V5z35&e2>rX0>9~|(e^x&A`33pz zR5X@INpZgLS#wyBx9v@gWs)OlX|uMtt}P_U_Y92;4=2*&nF_eI^^Q(i-l=RIiDt(8 zMiXgP)@ioCQnhT8duY1WNDtiP$i7MPWR@Hwr^s0{PrfHVkPBpyTqVDdKgbICll+Ae ztb!Fau)&U75JEG;=)i60K`%C8E4E=fc3=Q;JctyAk;cQw;t4#7r?4L{;$^&oSMe4; z!pAs*S$vK;oWL1;h3{}thf8!v$R=JUg#1f>uEC+-*duf+?CDYS1N3BL+mNNs;^G)t zNvHH63k?=G&-h72YG`Q^d>kVs6&JEWp_ykaC4C{=Dy-ueGO47HZ5KND3f&YhkwRQ& z>E<~J)M#9jJwd)E=NN;_|Dw!GgdM`p(q%STh z<#<694VKMI&ICB<;zW0M_ofxu(iEw##=2x;A#s+uo8B;P4sw;Eqjuj zVL~jBAITC~CfCVtDh?hZ%3y#ICdjbB1+R*uoe2@a9V&+H48bmjAjS~vK?dV^2#@1w zwf$#7aTu@Rb-aOhkjHy?AIEVDU*a^*VIB*(f@@f2bqt28ah+DVmS%YSx2y32z2ukm ztc`lL4MX5iu_w^7{#Kf9?4!#8Pqs?7m>o{5RpM0Dm9XAr6(y(_QVAPfEURPJz${y; zM6cDaDWrtfkQmk!QnPEtHccTlyFu*K6jHO@;znLtVFum6Z7%3AEHYjz+=YJJtBMiB5W6U8jNnE!p2L3X4arTj_!LL+8IIu#oW*?MWE4Hr5z%IC!UEk> X{mp;*0Dqn-ZTVI2VRHP>L0tU@v~rHX diff --git a/DESCRIPTION b/DESCRIPTION index 0f411743..5659170d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Roxygen: list(markdown = TRUE) License: GPL (>= 2) LinkingTo: Rcpp LazyData: TRUE -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Encoding: UTF-8 Imports: Rcpp (>= 0.12.16), diff --git a/R/QualityControl.R b/R/QualityControl.R index 0c76ee36..39672ab5 100644 --- a/R/QualityControl.R +++ b/R/QualityControl.R @@ -47,8 +47,10 @@ plotTSSEnrichment <- function( chr <- paste0(seqnames(chromSizes)) chr <- gtools::mixedsort(intersect(chr, paste0(seqnames(TSS)))) + .logThis(chr, paste0("chr"), logFile = logFile) TSS <- sort(sortSeqlevels(TSS)) splitTSS <- split(GenomicRanges::resize(TSS,1,"start"), seqnames(TSS))[chr] + .logThis(splitTSS, paste0("splitTSS"), logFile = logFile) window <- 2 * flank + 1 groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = FALSE) uniqGroups <- gtools::mixedsort(unique(groups[,1])) @@ -57,50 +59,68 @@ plotTSSEnrichment <- function( h5disableFileLocking() } - dfTSS <- .safelapply(seq_along(uniqGroups), function(x){ + dfTSS <- .safelapply(seq_along(uniqGroups), function(z){ - .logDiffTime(paste0(uniqGroups[x], " Computing TSS (",x," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) + .logDiffTime(paste0(uniqGroups[z], " Computing TSS (",z," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) - cellx <- rownames(groups)[which(paste0(groups[,1]) == uniqGroups[x])] + cellx <- rownames(groups)[which(paste0(groups[,1]) == uniqGroups[z])] - for(i in seq_along(chr)){ + for(k in seq_along(chr)){ - TSSi <- splitTSS[[chr[i]]] + #TSS for Chr + TSSi <- splitTSS[[chr[k]]] - covi <- unlist(suppressMessages(getFragmentsFromProject( + #Set TSS To be a dummy chr1 + TSSi <- GRanges(seqnames=rep("chr1",length(TSSi)), ranges = ranges(TSSi), strand = strand(TSSi)) + .logThis(TSSi, paste0(uniqGroups[z], " : TSSi : ", chr[k]), logFile = logFile) + + #Extract Fragments + covi <- suppressMessages(getFragmentsFromProject( ArchRProj = ArchRProj, - subsetBy = chromSizes[paste0(seqnames(chromSizes)) %in% chr[i]], + subsetBy = chromSizes[paste0(seqnames(chromSizes)) %in% chr[k]], cellNames = cellx, logFile = logFile - )), use.names=FALSE) %>% - sort %>% - {coverage(IRanges(c(start(.), end(.)), width = 1))} - - .logThis(covi, paste0(uniqGroups[x], " : Cov : ", chr[i]), logFile = logFile) - - if(i == 1){ - sumTSS <- rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + ) %>% unlist(use.names = FALSE)) + .logThis(covi, paste0(uniqGroups[z], " : Fragments : ", chr[k]), logFile = logFile) + + #Get Insertions + covi <- sort(c(start(covi), end(covi))) + .logThis(covi, paste0(uniqGroups[z], " : Insertions : ", chr[k]), logFile = logFile) + + #IRanges + covi <- IRanges(start = covi, width = 1) + .logThis(covi, paste0(uniqGroups[z], " : Insertions2 : ", chr[k]), logFile = logFile) + + #Coverage + covi <- IRanges::coverage(covi) + .logThis(covi, paste0(uniqGroups[z], " : Cov : ", chr[k]), logFile = logFile) + + #Compute Sum + sumTSSi <- rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + .logThis(sumTSSi, paste0(uniqGroups[z], " : SumTSS 1 : ", chr[k]), logFile = logFile) + + if(k == 1){ + sumTSS <- sumTSSi }else{ - sumTSS <- sumTSS + rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + sumTSS <- sumTSS + sumTSSi } - - .logThis(sumTSS, paste0(uniqGroups[x], " : SumTSS : ", chr[i]), logFile = logFile) + .logThis(sumTSS, paste0(uniqGroups[z], " : SumTSS : ", chr[k]), logFile = logFile) } normBy <- mean(sumTSS[c(1:norm,(flank*2-norm+1):(flank*2+1))]) df <- DataFrame( - group = uniqGroups[x], + group = uniqGroups[z], x = seq_along(sumTSS) - flank - 1, value = sumTSS, normValue = sumTSS / normBy, smoothValue = .centerRollMean(sumTSS/normBy, 11) ) - .logThis(df, paste0(uniqGroups[x], " : TSSDf"), logFile = logFile) + .logThis(df, paste0(uniqGroups[z], " : TSSDf"), logFile = logFile) - .logDiffTime(paste0(uniqGroups[x], " Finished Computing TSS (",x," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) + .logDiffTime(paste0(uniqGroups[z], " Finished Computing TSS (",z," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) df diff --git a/man/ArchRBrowser.Rd b/man/ArchRBrowser.Rd index 62f8fb45..bbe9fa5e 100644 --- a/man/ArchRBrowser.Rd +++ b/man/ArchRBrowser.Rd @@ -55,5 +55,6 @@ To install try devtools::install_github("rstudio/shinythemes").} } \description{ This function will open an interactive shiny session in style of a browser track. It allows for normalization of the signal which -enables direct comparison across samples. +enables direct comparison across samples. Note that the genes displayed in this browser are derived from your \code{geneAnnotation} +(i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } diff --git a/man/addGroupCoverages.Rd b/man/addGroupCoverages.Rd index be1d3792..3622b119 100644 --- a/man/addGroupCoverages.Rd +++ b/man/addGroupCoverages.Rd @@ -8,6 +8,7 @@ addGroupCoverages( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, + sampleLabels = "Sample", minCells = 40, maxCells = 500, maxFragments = 25 * 10^6, @@ -30,6 +31,11 @@ addGroupCoverages( \item{useLabels}{A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation.} +\item{sampleLabels}{The name of a column in \code{cellColData} to use to identify samples. In most cases, this parameter should be left as \code{NULL} and you +should only use this parameter if you do not want to use the default sample labels stored in \code{cellColData$Sample}. However, if your individual Arrow +files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype.} + \item{minCells}{The minimum number of cells required in a given cell group to permit insertion coverage file generation.} \item{maxCells}{The maximum number of cells to use during insertion coverage file generation.} diff --git a/man/addHarmony.Rd b/man/addHarmony.Rd index 4f82147e..260514d4 100644 --- a/man/addHarmony.Rd +++ b/man/addHarmony.Rd @@ -34,7 +34,9 @@ to sequencing depth that is greater than the \code{corCutOff}, it will be exclud \item{name}{The name to store harmony output as a \code{reducedDims} in the \code{ArchRProject} object.} -\item{groupBy}{The name of the column in \code{cellColData} to use for grouping cells together for vars in harmony batch correction.} +\item{groupBy}{The name of the column in \code{cellColData} to use for grouping cells together for vars in harmony batch correction. +The value of \code{groupBy} is passed to the \code{vars_use} parameter in \code{harmony::HarmonyMatrix()}. When run through ArchR, this parameter +defines which variables to correct for during batch correction. See \code{harmony::HarmonyMatrix()} for more information.} \item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} diff --git a/man/addIterativeLSI.Rd b/man/addIterativeLSI.Rd index 40afe0b2..fedd8496 100644 --- a/man/addIterativeLSI.Rd +++ b/man/addIterativeLSI.Rd @@ -70,8 +70,7 @@ Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logid \item{scaleDims}{A boolean that indicates whether to z-score the reduced dimensions for each cell. This is useful forminimizing the contribution of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific biases since -it is over-weighting latent PCs. If set to \code{NULL} this will scale the dimensions based on the value of \code{scaleDims} when the \code{reducedDims} were -originally created during dimensionality reduction. This idea was introduced by Timothy Stuart.} +it is over-weighting latent PCs.} \item{corCutOff}{A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to sequencing depth that is greater than the \code{corCutOff}, it will be excluded from analysis.} diff --git a/man/addModuleScore.Rd b/man/addModuleScore.Rd index d1a2964e..2a27fe91 100644 --- a/man/addModuleScore.Rd +++ b/man/addModuleScore.Rd @@ -19,16 +19,28 @@ addModuleScore( \arguments{ \item{ArchRProj}{An \code{ArchRProject} object.} -\item{seed}{A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can -reproduce results downstream.} +\item{useMatrix}{The name of the matrix to be used for calculation of the module score. See \code{getAvailableMatrices()} to view available options.} + +\item{name}{The name to be given to the designated module. If \code{features} is a list, this name will be prepended to the feature set names given in the list as shown below.} + +\item{features}{A list of feature names to be grouped into modules. For example, \code{list(BScore = c("MS4A1", "CD79A", "CD74"), TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1"))}. +Each named element in this list will be stored as a separate module. The examples given in these parameters would yield two modules called \code{Module.Bscore} and \code{Module.Tscore}. +If the elements of this list are not named, they will be numbered in order, i.e. \code{Module1}, \code{Module2}.} + +\item{nBin}{The number of bins to use to divide all features for identification of signal-matched features for background calculation} + +\item{nBgd}{The number of background features to use for signal normalization.} + +\item{seed}{A number to be used as the seed for random number generation required when sampling cells for the background set. It is recommended +to keep track of the seed used so that you can reproduce results downstream.} \item{threads}{The number of threads to be used for parallel computing.} \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ -This function computes imputations weights that describe each cell as a linear combination of many cells based on a MAGIC diffusion matrix. -} -\details{ -RRR +This function calculates a module score from a set of features across all cells. This allows for +grouping of multiple features together into a single quantitative measurement. Currently, this +function only works for modules derived from the \code{GeneScoreMatrix}. Each module is added as a +new column in \code{cellColData} } diff --git a/man/addPeak2GeneLinks.Rd b/man/addPeak2GeneLinks.Rd index 11edf142..891f9ff8 100644 --- a/man/addPeak2GeneLinks.Rd +++ b/man/addPeak2GeneLinks.Rd @@ -31,6 +31,8 @@ addPeak2GeneLinks( \item{reducedDims}{The name of the \code{reducedDims} object (i.e. "IterativeLSI") to retrieve from the designated \code{ArchRProject}.} +\item{useMatrix}{The name of the matrix containing gene expression information to be used for determining peak-to-gene links. See \code{getAvailableMatrices(ArchRProj)}} + \item{dimsToUse}{A vector containing the dimensions from the \code{reducedDims} object to use in clustering.} \item{scaleDims}{A boolean value that indicates whether to z-score the reduced dimensions for each cell. This is useful for minimizing diff --git a/man/addReproduciblePeakSet.Rd b/man/addReproduciblePeakSet.Rd index 29bea491..7bd22e82 100644 --- a/man/addReproduciblePeakSet.Rd +++ b/man/addReproduciblePeakSet.Rd @@ -56,7 +56,7 @@ This is important to allow for exclusion of pseudo-bulk replicates derived from \item{pathToMacs2}{The full path to the MACS2 executable.} -\item{genomeSize}{The genome size to be used for MACS2 peak calling (see MACS2 documentation).} +\item{genomeSize}{The genome size to be used for MACS2 peak calling (see MACS2 documentation). This is required if genome is not hg19, hg38, mm9, or mm10.} \item{shift}{The number of basepairs to shift each Tn5 insertion. When combined with \code{extsize} this allows you to create proper fragments, centered at the Tn5 insertion site, for use with MACS2 (see MACS2 documentation).} diff --git a/man/addTileMatrix.Rd b/man/addTileMatrix.Rd index 9c8849ea..b1e71406 100644 --- a/man/addTileMatrix.Rd +++ b/man/addTileMatrix.Rd @@ -42,5 +42,4 @@ is to retrieve this from the \code{ArchRProject} using \code{getBlacklist()}.} } \description{ This function, for each sample, will independently compute counts for each tile -per cell in the ArrowFile } diff --git a/man/createGenomeAnnotation.Rd b/man/createGenomeAnnotation.Rd index f90fe5a9..e52160c0 100644 --- a/man/createGenomeAnnotation.Rd +++ b/man/createGenomeAnnotation.Rd @@ -20,10 +20,11 @@ createGenomeAnnotation( \item{blacklist}{A \code{GRanges} object containing regions that should be excluded from analyses due to unwanted biases.} \item{filter}{A boolean value indicating whether non-standard chromosome scaffolds should be excluded. -These "non-standard" chromosomes are defined by \code{filterChrGR()}.} +These "non-standard" chromosomes are defined by \code{filterChrGR()} and by manual annotation using the \code{filterChr} parameter.} \item{filterChr}{A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -If no manual removal is desired, \code{filterChr} should be set to \code{NULL}.} +If no manual removal is desired, \code{filterChr} should be set to \code{NULL}. If \code{filter} is set to \code{TRUE} but \code{filterChr} is set to \code{NULL}, +non-standard chromosomes will still be removed as defined in \code{filterChrGR()}.} } \description{ This function will create a genome annotation object that can be used for creating ArrowFiles or an ArchRProject, etc. diff --git a/man/getCoAccessibility.Rd b/man/getCoAccessibility.Rd index ce8b1c10..b97163e2 100644 --- a/man/getCoAccessibility.Rd +++ b/man/getCoAccessibility.Rd @@ -16,7 +16,8 @@ getCoAccessibility( \item{corCutOff}{A numeric describing the minimum numeric peak-to-peak correlation to return.} -\item{resolution}{A numeric describing the bp resolution to return loops as. This helps with overplotting of correlated regions.} +\item{resolution}{A numeric describing the bp resolution to use when returning loops. This helps with overplotting of correlated regions. +This only takes affect if \code{returnLoops = TRUE}.} \item{returnLoops}{A boolean indicating to return the co-accessibility signal as a \code{GRanges} "loops" object designed for use with the \code{ArchRBrowser()} or as an \code{ArchRBrowserTrack()}.} diff --git a/man/getGroupBW.Rd b/man/getGroupBW.Rd index 10e80ef0..f90166fb 100644 --- a/man/getGroupBW.Rd +++ b/man/getGroupBW.Rd @@ -24,7 +24,8 @@ user-supplied \code{cellColData} metadata columns (for example, "Clusters"). Cel column will be grouped together and the average signal will be plotted.} \item{normMethod}{The name of the column in \code{cellColData} by which normalization should be performed. The recommended and default value -is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality.} +is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. Accepted values are +"None", "ReadsInTSS", "nCells", "ReadsInPromoter", or "nFrags".} \item{tileSize}{The numeric width of the tile/bin in basepairs for plotting ATAC-seq signal tracks. All insertions in a single bin will be summed.} diff --git a/man/getMatches.Rd b/man/getMatches.Rd index 3c6f7211..36e369ab 100644 --- a/man/getMatches.Rd +++ b/man/getMatches.Rd @@ -14,5 +14,6 @@ getMatches(ArchRProj = NULL, name = NULL, annoName = NULL) \item{annoName}{The name of a specific annotation to subset within the \code{peakAnnotation}.} } \description{ -This function gets peak annotation matches from a given ArchRProject. +This function gets peak annotation matches from a given ArchRProject. The peaks in the returned object are in the +same order as the peaks returned by \code{getPeakSet()}. } diff --git a/man/getMatrixFromProject.Rd b/man/getMatrixFromProject.Rd index 96b01a92..2e96d395 100644 --- a/man/getMatrixFromProject.Rd +++ b/man/getMatrixFromProject.Rd @@ -23,10 +23,16 @@ getMatrixFromProject( \item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} -\item{binarize}{A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts.} +\item{binarize}{A boolean value indicating whether the matrix should be binarized before return. +This is often desired when working with insertion counts. Note that if the matrix has already been binarized previously, this should be set to \code{TRUE}.} \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ -This function gets a given data matrix from an \code{ArchRProject}. +This function gets a given data matrix from an \code{ArchRProject} and returns it as a \code{SummarizedExperiment}. +This function will return the matrix you ask it for, without altering that matrix unless you tell it to. +For example, if you added your \code{PeakMatrix} using \code{addPeakMatrix()} with \code{binarize = TRUE}, then +\code{getMatrixFromProject()} will return a binarized \code{PeakMatrix}. Alternatively, you could set \code{binarize = TRUE} +in the parameters passed to \code{getMatrixFromProject()} and the \code{PeakMatrix} will be binarized as you pull +it out. No other normalization is applied to the matrix by this function. } diff --git a/man/import10xFeatureMatrix.Rd b/man/import10xFeatureMatrix.Rd index 79f000fc..61261bd5 100644 --- a/man/import10xFeatureMatrix.Rd +++ b/man/import10xFeatureMatrix.Rd @@ -7,6 +7,8 @@ import10xFeatureMatrix( input = NULL, names = NULL, + strictMatch = TRUE, + verbose = TRUE, featureType = "Gene Expression" ) } @@ -15,6 +17,14 @@ import10xFeatureMatrix( \item{names}{A character of sample names associated with each input file.} +\item{strictMatch}{Only relevant when multiple input files are used. A boolean that indictes whether rows (genes) that do not match perfectly in the matrices +should be removed (\code{strictMatch = TRUE}) or coerced (\code{strictMatch = FALSE}). CellRanger seems to occassionally use different ensembl ids for the same gene across +different samples. If you are comfortable tolerating such mismatches, you can coerce all matrices to fit together, in which case the gene metadata present in +the first listed sample will be applied to all matrices for that particular gene entry. Regardless of what value is used for \code{strictMatch}, this function +cannot tolerate mismatched gene names, only mismatched metadata for the same gene.} + +\item{verbose}{Only relevant when multiple input files are used. A boolean that indicates whether messaging about mismatches should be verbose (\code{TRUE}) or minimal (\code{FALSE})} + \item{featureType}{The name of the feature to extract from the 10x feature file. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information.} } diff --git a/man/plotBrowserTrack.Rd b/man/plotBrowserTrack.Rd index f3374f1b..ada70e90 100644 --- a/man/plotBrowserTrack.Rd +++ b/man/plotBrowserTrack.Rd @@ -87,7 +87,7 @@ is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth \item{threads}{The number of threads to use for parallel execution.} -\item{ylim}{The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. If not provided, the y-axis limit will be c(0, 0.999).} +\item{ylim}{The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as \verb{c(lower limit, upper limit)} such as \code{c(0,0.99)}. If not provided, the y-axis limit will be c(0, 0.999).} \item{pal}{A custom palette (see \code{paletteDiscrete} or \code{ArchRPalettes}) used to override coloring for groups.} @@ -113,5 +113,6 @@ is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth } \description{ This function will plot the coverage at an input region in the style of a browser track. It allows for normalization of the signal -which enables direct comparison across samples. +which enables direct comparison across samples. Note that the genes displayed in these plots are derived from your \code{geneAnnotation} +(i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } diff --git a/man/projectBulkATAC.Rd b/man/projectBulkATAC.Rd index 62c15c6f..4f5e9b77 100644 --- a/man/projectBulkATAC.Rd +++ b/man/projectBulkATAC.Rd @@ -12,6 +12,7 @@ projectBulkATAC( n = 250, verbose = TRUE, threads = getArchRThreads(), + force = FALSE, logFile = createLogFile("projectBulkATAC") ) } @@ -30,6 +31,8 @@ projectBulkATAC( \item{threads}{The number of threads used for parallel execution} +\item{force}{A boolean value indicating whether to force the projection of bulk ATAC data even if fewer than 25\% of the features are present in the bulk ATAC data set.} + \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ diff --git a/src/Footprinting_utils.cpp b/src/Footprinting_utils.cpp index 8698e16f..566fefb6 100644 --- a/src/Footprinting_utils.cpp +++ b/src/Footprinting_utils.cpp @@ -104,6 +104,9 @@ IntegerVector rleSumsStranded(List rleList, List grList, int width, Function as_ IntegerVector strand, debug, start; IntegerVector out = IntegerVector(width); + // Clone grList + grList = Rcpp::clone(grList); + int n = grList.size(); int shift = floor(width/2); diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index adae0404..308aeb3c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -5,6 +5,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // rowCorCpp Rcpp::NumericVector rowCorCpp(IntegerVector idxX, IntegerVector idxY, Rcpp::NumericMatrix X, Rcpp::NumericMatrix Y); RcppExport SEXP _ArchR_rowCorCpp(SEXP idxXSEXP, SEXP idxYSEXP, SEXP XSEXP, SEXP YSEXP) { From 9275d4edcdde223d2c59671ff4cdafaab1439aaf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 31 Mar 2022 05:57:08 -0700 Subject: [PATCH 112/162] normBy doesnt actually take a value Responding to https://github.com/GreenleafLab/ArchR/issues/1363 The logic of the if else statements in `.MarkersSC` was constructed in such a way that it would never apply normalization based on a column in cellColData --- R/MarkerFeatures.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 56c95e8a..e2321213 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -219,7 +219,12 @@ getMarkerFeatures <- function( }else{ if(tolower(normBy) == "none"){ normFactors <- NULL + }else if(normBy %in% colnames(ArchRProj@cellColData)) { + normFactors <- getCellColData(ArchRProj, normBy, drop=FALSE) + normFactors[,1] <- median(normFactors[,1]) / normFactors[,1] }else{ + .logMessage("Warning! Parameter 'normBy' was set to ", normBy," but no matching column was found in cellColData.\n", + "Continuing with normalization based on column sums of matrix!", verbose = verbose, logFile = logFile) normFactors <- scaleTo / mColSums normFactors <- DataFrame(normFactors) } From 231f5eacd5fb5a25d7b20077fd28b9600b460257 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 1 Apr 2022 08:15:59 -0700 Subject: [PATCH 113/162] change name param to annoName in response to https://github.com/GreenleafLab/ArchR/issues/1367 this allows passage of a 'name' param through to TFBSTools::getMatrixSet() via the `...` passthrough argument --- R/AnnotationPeaks.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 2a9f4999..c3c417ce 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -242,7 +242,7 @@ addPeakAnnotations <- function( #' @param motifSet The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" #' which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the #' corresponding motif sets from the `chromVAR` package. -#' @param name The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` +#' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` #' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR @@ -252,7 +252,7 @@ addPeakAnnotations <- function( #' (see `MOODS` for more details on this determination). #' @param width The width in basepairs to consider for motif matches. See the `motimatchr` package for more information. #' @param version An integer specifying version 1 or version 2 of chromVARmotifs see github for more info GreenleafLab/chromVARmotifs. -#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten if +#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `annoName` to be overwritten if #' it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a PWM object. @@ -260,7 +260,7 @@ addPeakAnnotations <- function( addMotifAnnotations <- function( ArchRProj = NULL, motifSet = "cisbp", - name = "Motif", + annoName = "Motif", species = NULL, collection = "CORE", motifPWMs = NULL, @@ -274,7 +274,7 @@ addMotifAnnotations <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = motifSet, name = "motifSet", valid = c("character", "null")) - .validInput(input = name, name = "name", valid = c("character")) + .validInput(input = annoName, name = "annoName", valid = c("character")) .validInput(input = species, name = "species", valid = c("character", "null")) .validInput(input = collection, name = "collection", valid = c("character", "null")) .validInput(input = cutOff, name = "cutOff", valid = c("numeric")) @@ -299,7 +299,7 @@ addMotifAnnotations <- function( .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addMotifAnnotations Input-Parameters", logFile = logFile) - if(name %in% names(ArchRProj@peakAnnotation)){ + if(annoName %in% names(ArchRProj@peakAnnotation)){ if(force){ message("peakAnnotation name already exists! Overriding.") }else{ @@ -476,16 +476,16 @@ addMotifAnnotations <- function( ) dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) - savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Positions-In-Peaks.rds")) - saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) + savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-Positions-In-Peaks.rds")) + saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-Matches-In-Peaks.rds")) - ArchRProj@peakAnnotation[[name]]$Name <- name - ArchRProj@peakAnnotation[[name]]$motifs <- motifs - ArchRProj@peakAnnotation[[name]]$motifSummary <- motifSummary - ArchRProj@peakAnnotation[[name]]$Positions <- savePositions - ArchRProj@peakAnnotation[[name]]$Matches <- saveMatches + ArchRProj@peakAnnotation[[annoName]]$Name <- annoName + ArchRProj@peakAnnotation[[annoName]]$motifs <- motifs + ArchRProj@peakAnnotation[[annoName]]$motifSummary <- motifSummary + ArchRProj@peakAnnotation[[annoName]]$Positions <- savePositions + ArchRProj@peakAnnotation[[annoName]]$Matches <- saveMatches - .safeSaveRDS(out, file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-In-Peaks-Summary.rds")), compress = FALSE) + .safeSaveRDS(out, file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-In-Peaks-Summary.rds")), compress = FALSE) .safeSaveRDS(out$motifPositions, savePositions, compress = FALSE) .safeSaveRDS(out$motifMatches, saveMatches, compress = FALSE) From 6e7abec25de9217a8830ac1d6a853702c061bf8a Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 3 Apr 2022 20:39:32 -0700 Subject: [PATCH 114/162] Bugfix and improvements Bugfix in markerTesting for NA and 0. New features for loading. Bugfix for uwot and peak annotations where motifs/regions dont overlap any peaks --- .DS_Store | Bin 16388 -> 16388 bytes DESCRIPTION | 22 ++-- NAMESPACE | 3 +- R/AllClasses.R | 1 + R/AnnotationPeaks.R | 83 ++++++++++++- R/ColorPalettes.R | 1 + R/Embedding.R | 177 +++++++++++++++------------- R/GlobalDefaults.R | 49 +++++++- R/MarkerFeatures.R | 11 +- man/dot-DollarNames.ArchRProject.Rd | 2 +- 10 files changed, 250 insertions(+), 99 deletions(-) diff --git a/.DS_Store b/.DS_Store index 6010e1e42cb6a5dfbfe254daf8f62afd56f32fb7..ab55183e87dd9182d504050b04a6853e65b51f63 100644 GIT binary patch delta 1638 zcmeH{OH5Ni6o$_~JiL`L+%}@6y`|-$mQa(H(v|>55)$QQVn_i&H*F}Pp%k@11Rs>x zXiQ8DTwp0iV@%Ze1fvUlFLc2IB`iRrY{bXPL>IUKFKriU)VPoxvpF;8%q-6LolGQ* zNEkQH=;cqQNSe)7U@_&}T}LgZ!#11CWidGmopL18VFA?yLS1FvK)aVH!RXx|@P|6A z))}yb#H8dkf@*CFU#F|BtF7k^^03+^KbH#SIkjERGNdui@gU@n_7 zK9DigOYxOmYL4yPK~I}cC4vMp&}J&6QgTx>wNoFRrV$#Yak@_vG);3fPoHUl7C`_Q zQm`Hyun}5hA`3a#gnSgE1lv%78q}i!jo6DO_|b|0+R%j~I2J@N!Z?L~$QZ;B&f*d- z;|i`~40muB_wWppc!8IAjdz&G2Ykd=EMkcf(=a`gn1NZDjTNynR?aG7EilBJYK+Ht zU{&%%{qEbd#wAV0;O{+17^|^S5R<@ z$y54toi@)RNg@-+1R+}GHp#A1G>Hk(s>3325sI|sm%NCVs1#vhV(hq`%i#j;pH%!+ z#3aqoTY67lXaUKJ0ue@-VTBW=*nyp>L=|?SE{=a6d}u~XR6&O#;1IgegX4(c1WvXp z8U~^ghB1P3IFAdsh-p#%x n@8}k@6*wAEow_>8|7(<88A-kWrgz2cYF72G&`%}L8gst`V25On delta 1585 zcmeIyOGuPa6bJD0|3>Tkyrtjhn9e)XYJ5=YI6YJ}9eb!l4;wSBET@^p)S%{w*@M(@ zllFp3CPfGtMNwjuMUO=-gfwsw6Fr2?phZxNHfAoWab{Lgt)!KA^XDAS=HB1owzs0a z75!(l7~*HOt=yorrGd&k-X^QVQ|BO3q&Vt5Zf|3H`dbK!u<$u^MJa-@`I_==727MT z__#8bE6h%wo67j{q!^7P^+m?%5*DW{&sv>lU2og$i;|VmF)GIRn90QV>5X8#LvOY* zyVrf#CD({jU?Me*G5daZtyA_&!A^Px)0BH^94?RSl*GW0Wd+kzG`SkQ?z&pJG}O$_ zVO%ntQ>|%hrseU>gf=5QWy2mT^gW!G)N=#hQ{a< zeWeMSr0+BZ1?C|VQIMgA20G}G0yDB;K>- zOA`IhEQk97mo+vRSU=$CAT>Wj-sAZl13bhdJjNhK@QU|l iCWs$JU$m@@n%&kJ-PV>x*{~A7OO#TJ7&u7N~ diff --git a/DESCRIPTION b/DESCRIPTION index 5659170d..6953679c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ArchR Type: Package -Date: 2021-02-23 +Date: 2022-04-03 Title: Analyzing single-cell regulatory chromatin in R. Version: 1.0.2 Authors@R: c( @@ -14,6 +14,14 @@ LazyData: TRUE RoxygenNote: 7.1.2 Encoding: UTF-8 Imports: + ggplot2, + SummarizedExperiment, + data.table, + Matrix, + rhdf5, + magrittr, + S4Vectors (>= 0.9.25), + BiocGenerics, Rcpp (>= 0.12.16), matrixStats, plyr, @@ -29,17 +37,9 @@ Imports: grid, gridExtra, Biostrings, - ComplexHeatmap -Depends: - ggplot2, - SummarizedExperiment, - data.table, - Matrix, - rhdf5, - magrittr, - S4Vectors (>= 0.9.25), - BiocGenerics, + ComplexHeatmap, GenomicRanges +Depends: Collate: 'AllClasses.R' 'AnnotationGenome.R' diff --git a/NAMESPACE b/NAMESPACE index ce5679a8..7fb50b4b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,10 @@ S3method("$",ArchRProject) S3method("$<-",ArchRProject) S3method("[",ArchRProject) -S3method(.DollarNames,ArchRProject) export("%bcin%") export("%bcni%") export("%ni%") +export(.DollarNames.ArchRProject) export(ArchRBrowser) export(ArchRBrowserTrack) export(ArchRPalettes) @@ -158,5 +158,6 @@ export(subsetCells) export(theme_ArchR) export(trajectoryHeatmap) export(validBSgenome) +import(GenomicRanges) importFrom(Rcpp,sourceCpp) useDynLib(ArchR) diff --git a/R/AllClasses.R b/R/AllClasses.R index be9c05e9..f7966028 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -1,5 +1,6 @@ #' @useDynLib ArchR #' @importFrom Rcpp sourceCpp +#' @import GenomicRanges NULL setClassUnion("characterOrNull", c("character", "NULL")) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 2a9f4999..79f57e74 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -141,6 +141,10 @@ addPeakAnnotations <- function( names(regions) <- paste0("Region_", seq_along(regions)) } + if(any(duplicated(names(regions)))){ + stop("Found duplicated region names! Please make unique!") + } + regionPositions <- lapply(seq_along(regions), function(x){ .logThis(regions[[x]], paste0("regions[[x]]-", x), logFile = logFile) @@ -192,11 +196,14 @@ addPeakAnnotations <- function( if(is.null(peakSet)){ .logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile) } - allPositions <- unlist(regionPositions) + allPositions <- unlist(regionPositions, use.names=TRUE) .logDiffTime("Creating Peak Overlap Matrix", t1 = tstart, verbose = TRUE, logFile = logFile) overlapRegions <- findOverlaps(peakSet, allPositions, ignore.strand=TRUE) + if(length(overlapRegions) == 0){ + stop("No Overlaps Found between regions and peak Matrix!") + } .logThis(overlapRegions, "overlapRegions", logFile = logFile) regionMat <- Matrix::sparseMatrix( @@ -211,6 +218,31 @@ addPeakAnnotations <- function( regionMat <- SummarizedExperiment::SummarizedExperiment(assays=SimpleList(matches = regionMat), rowRanges = peakSet) .logThis(regionMat, "regionSE", logFile = logFile) + ############################################################# + # Filter Regions With No Matches + ############################################################# + + #Number of Overlaps + nO <- Matrix::colSums(assay(regionMat)) + rF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(rF) > 0){ + .logDiffTime(paste0("Filtering Region Annotations with 0 overlaps :\n\n ", paste(rF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + regionPositions <- regionPositions[!(names(regionPositions) %in% rF)] + regionMat <- regionMat[,names(regionPositions),drop=FALSE] + }else{ + .logDiffTime(paste0("All Regions Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + + ############################################################# + # Summarize and Save + ############################################################# + dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Positions-In-Peaks.rds")) saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) @@ -451,6 +483,28 @@ addMotifAnnotations <- function( w = width ) + ############################################################# + # Filter Motifs With No Matches + ############################################################# + + #Number of Overlaps + nO <- lapply(motifPositions, length) %>% unlist + mF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(mF) > 0){ + .logDiffTime(paste0("Filtering Motif Annotations with 0 overlaps :\n\n ", paste(mF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + motifPositions <- motifPositions[nO > 0] + motifSummary <- motifSummary[names(motifPositions),,drop=FALSE] + motifs <- motifs[names(motifPositions)] + }else{ + .logDiffTime(paste0("All Motifs Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + ############################################################# # Motif Overlap Matrix ############################################################# @@ -665,12 +719,15 @@ addArchRAnnotations <- function( #Download if(!file.exists(file.path(annoPath, basename(url)))){ + oldTimeout <- getOption('timeout') + options(timeout=10000) message("Annotation ", basename(url)," does not exist! Downloading..") download.file( url = url, destfile = file.path(annoPath, basename(url)), quiet = FALSE ) + options(timeout=oldTimeout) } AnnoFile <- file.path(annoPath, basename(url)) @@ -747,6 +804,30 @@ addArchRAnnotations <- function( ) .logThis(regionMat, "regionSE", logFile=logFile) + ############################################################# + # Filter Regions With No Matches + ############################################################# + + #Number of Overlaps + nO <- Matrix::colSums(assay(regionMat)) + rF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(rF) > 0){ + .logDiffTime(paste0("Filtering Region Annotations with 0 overlaps :\n\n ", paste(rF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + regionMat <- regionMat[,nO > 0,drop=FALSE] + }else{ + .logDiffTime(paste0("All Regions Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + + ############################################################# + # Save + ############################################################# + dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) diff --git a/R/ColorPalettes.R b/R/ColorPalettes.R index d64b7b1c..81694ee3 100644 --- a/R/ColorPalettes.R +++ b/R/ColorPalettes.R @@ -112,6 +112,7 @@ paletteDiscrete <- function( .validInput(input = values, name = "values", valid = c("character", "factor")) .validInput(input = reverse, name = "reverse", valid = c("boolean")) + values <- unique(values) values <- gtools::mixedsort(values) n <- length(unique(values)) pal <- ArchRPalettes[[set]] diff --git a/R/Embedding.R b/R/Embedding.R index b4e9747a..c71026f4 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -209,6 +209,48 @@ addUMAP <- function( #New Save UWOT .saveUWOT <- function(model, file){ + + #save_uwot does not work because tarring doesnt work for some reason on Stanford's compute server + #Adapted from save_uwot + #this function is evaluated because it doesnt work on newer versions of uwot + #this is kept for legacy R versions + strUWOT <- " + .saveUWOT_Deprecated <- function(model, file){ + file <- file.path(normalizePath(dirname(file)), basename(file)) + wd <- getwd() + mod_dir <- tempfile(pattern = 'dir') + dir.create(mod_dir) + uwot_dir <- file.path(mod_dir, 'uwot') + dir.create(uwot_dir) + model_tmpfname <- file.path(uwot_dir, 'model') + .safeSaveRDS(model, file = model_tmpfname) + metrics <- names(model$metric) + n_metrics <- length(metrics) + for (i in seq_len(n_metrics)) { + nn_tmpfname <- file.path(uwot_dir, paste0('nn', i)) + if (n_metrics == 1) { + model$nn_index$save(nn_tmpfname) + model$nn_index$unload() + model$nn_index$load(nn_tmpfname) + } + else { + model$nn_index[[i]]$save(nn_tmpfname) + model$nn_index[[i]]$unload() + model$nn_index[[i]]$load(nn_tmpfname) + } + } + setwd(mod_dir) + system2('tar', '-cvf uwot.tar uwot', stdout = NULL, stderr = NULL) + o <- .fileRename('uwot.tar', file) + setwd(wd) + if (file.exists(mod_dir)) { + unlink(mod_dir, recursive = TRUE) + } + return(o) + } + " + eval(parse(text=strUWOT)) + tryCatch({ uwot::save_uwot(model = model, file = file, verbose = TRUE) }, error = function(e){ @@ -216,44 +258,61 @@ addUMAP <- function( }) } -#save_uwot does not work because tarring doesnt work for some reason on Stanford's compute server -#Adapted from save_uwot -.saveUWOT_Deprecated <- function(model, file){ - file <- file.path(normalizePath(dirname(file)), basename(file)) - wd <- getwd() - mod_dir <- tempfile(pattern = "dir") - dir.create(mod_dir) - uwot_dir <- file.path(mod_dir, "uwot") - dir.create(uwot_dir) - model_tmpfname <- file.path(uwot_dir, "model") - .safeSaveRDS(model, file = model_tmpfname) - metrics <- names(model$metric) - n_metrics <- length(metrics) - for (i in seq_len(n_metrics)) { - nn_tmpfname <- file.path(uwot_dir, paste0("nn", i)) - if (n_metrics == 1) { - model$nn_index$save(nn_tmpfname) - model$nn_index$unload() - model$nn_index$load(nn_tmpfname) - } - else { - model$nn_index[[i]]$save(nn_tmpfname) - model$nn_index[[i]]$unload() - model$nn_index[[i]]$load(nn_tmpfname) - } - } - setwd(mod_dir) - system2("tar", "-cvf uwot.tar uwot", stdout = NULL, stderr = NULL) - o <- .fileRename("uwot.tar", file) - setwd(wd) - if (file.exists(mod_dir)) { - unlink(mod_dir, recursive = TRUE) - } - return(o) -} - #New Save UWOT .loadUWOT <- function(file, nDim = NULL){ + + #load_uwot does not work because tarring doesnt work for some reason on Stanford's compute server + #Adapted from load_uwot + #this function is evaluated because it doesnt work on newer versions of uwot + #this is kept for legacy R versions + strUWOT <- " + .loadUWOT_Deprecated <- function(file, nDim = NULL){ + model <- NULL + tryCatch({ + mod_dir <- tempfile(pattern = 'dir') + dir.create(mod_dir) + utils::untar(file, exdir = mod_dir) + model_fname <- file.path(mod_dir, 'uwot/model') + if (!file.exists(model_fname)) { + stop('Cant find model in ', file) + } + model <- readRDS(file = model_fname) + metrics <- names(model$metric) + n_metrics <- length(metrics) + for (i in seq_len(n_metrics)){ + nn_fname <- file.path(mod_dir, paste0('uwot/nn', i)) + if (!file.exists(nn_fname)) { + stop('Cant find nearest neighbor index ', nn_fname, ' in ', file) + } + metric <- metrics[[i]] + if(length(model$metric[[i]]) == 0){ + if(!is.null(nDim)){ + nDim2 <- nDim + }else{ + nDim2 <- length(model$metric[[i]]) + } + } + if(!is.null(nDim)){ + nDim2 <- nDim + } + ann <- uwot:::create_ann(metric, ndim = nDim2) + ann$load(nn_fname) + if (n_metrics == 1) { + model$nn_index <- ann + }else{ + model$nn_index[[i]] <- ann + } + } + }, finally = { + if (file.exists(mod_dir)) { + unlink(mod_dir, recursive = TRUE) + } + }) + model + } + " + eval(parse(text=strUWOT)) + tryCatch({ uwot::load_uwot(file = file, verbose = TRUE) }, error = function(e){ @@ -261,52 +320,6 @@ addUMAP <- function( }) } -#Adapted from load_uwot -.loadUWOT_Deprecated <- function(file, nDim = NULL){ - model <- NULL - tryCatch({ - mod_dir <- tempfile(pattern = "dir") - dir.create(mod_dir) - utils::untar(file, exdir = mod_dir) - model_fname <- file.path(mod_dir, "uwot/model") - if (!file.exists(model_fname)) { - stop("Can't find model in ", file) - } - model <- readRDS(file = model_fname) - metrics <- names(model$metric) - n_metrics <- length(metrics) - for (i in seq_len(n_metrics)){ - nn_fname <- file.path(mod_dir, paste0("uwot/nn", i)) - if (!file.exists(nn_fname)) { - stop("Can't find nearest neighbor index ", nn_fname, " in ", file) - } - metric <- metrics[[i]] - if(length(model$metric[[i]]) == 0){ - if(!is.null(nDim)){ - nDim2 <- nDim - }else{ - nDim2 <- length(model$metric[[i]]) - } - } - if(!is.null(nDim)){ - nDim2 <- nDim - } - ann <- uwot:::create_ann(metric, ndim = nDim2) - ann$load(nn_fname) - if (n_metrics == 1) { - model$nn_index <- ann - }else{ - model$nn_index[[i]] <- ann - } - } - }, finally = { - if (file.exists(mod_dir)) { - unlink(mod_dir, recursive = TRUE) - } - }) - model -} - #' Add a TSNE embedding of a reduced dimensions object to an ArchRProject #' #' This function will compute a TSNE embedding and add it to an ArchRProject. diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index 14bfd94f..aab0b8a5 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -12,27 +12,72 @@ ArchRDefaults <- list( ArchR.verbose = TRUE ) +ArchRDependency <- c( + "grid", + "gridExtra", + "gtools", + "gtable", + "ggplot2", + "magrittr", + "plyr", + "stringr", + "data.table", + "matrixStats", + "S4Vectors", + "GenomicRanges", + "BiocGenerics", + "Matrix", + "Rcpp", + "SummarizedExperiment", + "rhdf5" +) + .onAttach <- function(libname, pkgname){ - if(!interactive()) return() - v <- packageVersion("ArchR") + + #Logo .ArchRLogo() + + #Package Startup + v <- packageVersion("ArchR") packageStartupMessage("ArchR : Version ", v, "\nFor more information see our website : www.ArchRProject.com\nIf you encounter a bug please report : https://github.com/GreenleafLab/ArchR/issues") + + #Load Packages + packageStartupMessage("Loading Required Packages...") + pkgs <- ArchRDependency + for(i in seq_along(pkgs)){ + packageStartupMessage("\tLoading Package : ", pkgs[i], " v", packageVersion(pkgs[i])) + tryCatch({ + suppressPackageStartupMessages(require(pkgs[i], character.only=TRUE)) + }, error = function(e){ + packageStartupMessage("\tFailed To Load Package : ", pkgs[i], " v", packageVersion(pkgs[i])) + }) + } + + if(!interactive()) return() + + #Set Default Options op <- options() toset <- !(names(ArchRDefaults) %in% names(op)) + if (any(toset)) options(ArchRDefaults[toset]) + if(!.isWholenumber(options()[["ArchR.threads"]])){ addArchRThreads() }else if(options()[["ArchR.threads"]] == 1){ addArchRThreads() } + if(!.checkCairo()){ packageStartupMessage("WARNING : Cairo check shows Cairo is not functional.\n ggplot2 rasterization will not be available without Cario.\n This may cause issues editing plots with many thousands of points from single cells.") } + if(.checkJupyter()){ packageStartupMessage("Detected Jupyer Notebook session. Disabling Log Messages!\n\tIf this is undesired use `addArchRVerbose(TRUE)`") addArchRVerbose(verbose = FALSE) } + invisible() + } #Check Jupyer Status diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 56c95e8a..a03eecea 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -419,7 +419,16 @@ getMarkerFeatures <- function( }) %>% Reduce("rbind", .) - idxFilter <- rowSums(pairwiseDF[,c("mean1","mean2")]) != 0 + #Check for Mean being 0 for both Mean1 and Mean2 + idxFilter1 <- rowSums(pairwiseDF[,c("mean1","mean2")]) != 0 + + #Check For NA in Either Mean1 Mean2 + idxFilter2 <- rowSums(is.na(pairwiseDF[,c("mean1","mean2")])) == 0 + + #Combo Check + idxFilter <- idxFilter1 & idxFilter2 + + #FDR pairwiseDF$fdr <- NA pairwiseDF$fdr[idxFilter] <- p.adjust(pairwiseDF$pval[idxFilter], method = "fdr") pairwiseDF <- pairwiseDF[rownames(featureDF), , drop = FALSE] diff --git a/man/dot-DollarNames.ArchRProject.Rd b/man/dot-DollarNames.ArchRProject.Rd index 2a965405..d0905606 100644 --- a/man/dot-DollarNames.ArchRProject.Rd +++ b/man/dot-DollarNames.ArchRProject.Rd @@ -4,7 +4,7 @@ \alias{.DollarNames.ArchRProject} \title{Accessing cellColData directly from dollar.sign accessor} \usage{ -\method{.DollarNames}{ArchRProject}(x, pattern = "") +.DollarNames.ArchRProject(x, pattern = "") } \description{ This function will allow direct access to cellColData with a \code{$} accessor. From 9d2ebbab2944f8722b4b9bd490ca3a87ea2e5dcf Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 3 Apr 2022 21:59:03 -0700 Subject: [PATCH 115/162] update readme --- .DS_Store | Bin 16388 -> 16388 bytes README.md | 10 +++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.DS_Store b/.DS_Store index ab55183e87dd9182d504050b04a6853e65b51f63..9be749afeb44d77fc6aec1464d916eb8ab59cc9b 100644 GIT binary patch delta 23 ecmZo^U~Fk%+#oE&VPa&eqhMrayjfc2r7{3cX$EQl delta 23 ecmZo^U~Fk%+#oE&VQ6TgqhMrgzFAu4r7{3cE(T@* diff --git a/README.md b/README.md index 01464e8f..0fee95a7 100755 --- a/README.md +++ b/README.md @@ -34,22 +34,22 @@ ArchR is a full-featured R package for processing and analyzing single-cell ATAC For a full walk through of installation and frequently related issues please visit www.ArchRProject.com. **First, install devtools (for installing GitHub packages) if it isn't already installed:** -```{r} +``` r if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") ``` **Then, install BiocManager (for installing bioconductor packages) if it isn't already installed:** -```{r} +``` r if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") ``` **Then, install ArchR:** -```{r} +``` r devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) ``` **Lastly, install all of the ArchR dependencies that aren't installed by default:** -```{r} +``` r library(ArchR) ArchR::installExtraPackages() ``` @@ -58,7 +58,7 @@ If any of these steps fails, you should identify the offending package and troub # Issues using ArchR? ArchR is currently in __beta__. We expect there to be bumps in the road. If you think you have found a bug, please first install the latest version of ArchR via -```{r} +``` r devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) ``` If this does not fix your problem, please [report an issue on Github](https://github.com/GreenleafLab/ArchR/issues) with the __Bug Report__ form. From 3075f1f034d8c5a9d7dcdfd6b6990814733fbf87 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 6 Apr 2022 08:26:14 -0700 Subject: [PATCH 116/162] fix impute weights completion message --- R/Imputation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Imputation.R b/R/Imputation.R index fad0239c..bda3e0da 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -198,7 +198,7 @@ addImputeWeights <- function( }, threads = threads) %>% SimpleList names(weightList) <- paste0("w",seq_along(weightList)) - .logDiffTime(sprintf("Completed Getting Magic Weights!", round(object.size(weightList) / 10^9, 3)), + .logDiffTime(sprintf("Completed Getting Magic Weights! Object size - %s.", round(object.size(weightList) / 10^9, 3)), t1 = tstart, verbose = FALSE, logFile = logFile) ArchRProj@imputeWeights <- SimpleList( From 3af7c3075a7e95ec88a290b9fecc1061b7bfe885 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 06:04:39 -0700 Subject: [PATCH 117/162] update warning message and function params --- R/BulkProjection.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 04b2f1d4..8cef79f1 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -3,9 +3,9 @@ #' This function will Project Bulk ATAC-seq data into single cell subspace. #' #' @param ArchRProj An `ArchRProject` object containing the dimensionality reduction matrix passed by `reducedDims`. -#' @param seATAC Bulk ATAC Summarized Experiment. -#' @param reducedDims A string specifying the reducedDims. -#' @param embedding A string specifying embedding. +#' @param seATAC A `SummarizedExperiment` object containing bulk ATAC-seq data. +#' @param reducedDims A string specifying the name of the `reducedDims` object to be used. +#' @param embedding A string specifying the name of the `embedding` object to be used. #' @param n An integer specifying the number of subsampled "pseudo single cells" per bulk sample. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param threads The number of threads used for parallel execution @@ -153,7 +153,7 @@ projectBulkATAC <- function( } if(embedding$params$nc != ncol(simRD)){ - .logMessage("Error incosistency found with matching LSI dimensions to those used in addEmbedding", + .logMessage("Warning! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", "\nReturning with simulated reduced dimension coordinates...", verbose = TRUE, logFile = logFile) out <- SimpleList( simulatedReducedDims = simRD From a8c48d82dd2904e7bea363c8a9ef80e05464a74d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 08:17:32 -0700 Subject: [PATCH 118/162] revert warning to error --- R/BulkProjection.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 8cef79f1..882dd9a4 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -153,7 +153,7 @@ projectBulkATAC <- function( } if(embedding$params$nc != ncol(simRD)){ - .logMessage("Warning! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", + .logMessage("Error! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", "\nReturning with simulated reduced dimension coordinates...", verbose = TRUE, logFile = logFile) out <- SimpleList( simulatedReducedDims = simRD From cf8d41deb134e725fccbf8c71d4ee64771a4535f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 08:27:46 -0700 Subject: [PATCH 119/162] fix excludeChr param definition --- R/IterativeLSI.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index f8b75a15..31ba7001 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -46,9 +46,9 @@ #' @param totalFeatures The number of features to consider for use in LSI after ranking the features by the total number of insertions. #' These features are the only ones used throught the variance identification and LSI. These are an equivalent when using a `TileMatrix` to a defined peakSet. #' @param filterQuantile A number [0,1] that indicates the quantile above which features should be removed based on insertion counts prior -#' @param excludeChr A string of chromosomes to exclude for iterativeLSI procedure. #' to the first iteration of the iterative LSI paradigm. For example, if `filterQuantile = 0.99`, any features above the 99th percentile in #' insertion counts will be ignored for the first LSI iteration. +#' @param excludeChr A string of chromosomes to exclude for iterativeLSI procedure. #' @param saveIterations A boolean value indicating whether the results of each LSI iterations should be saved as compressed `.rds` files in #' the designated `outDir`. #' @param UMAPParams The list of parameters to pass to the UMAP function if "UMAP" if `saveIterations=TRUE`. See the function `uwot::umap()`. From db42a20ee68fbbabf1e05742f7f162fdb1a4db54 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:18:24 -0700 Subject: [PATCH 120/162] add native support for Vierstra motifs create a motifSet option for "vierstra" and create collection options for "individual" and "archetype" corresponding to the motifs produced by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). The new rds files linked on amazon have fixed the issue of ":" being a part of the motif name and now use "|" for separation of name info (https://github.com/GreenleafLab/ArchR/issues/675) --- R/AnnotationPeaks.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index c98f5b6e..f4628695 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -272,13 +272,16 @@ addPeakAnnotations <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param motifSet The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" -#' which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the -#' corresponding motif sets from the `chromVAR` package. +#' which gives the 2016, 2018 or 2020 version of JASPAR motifs, (ii) one of "cisbp", "encode", or "homer" which gives the +#' corresponding motif sets from the `chromVAR` package, or (iii) "vierstra" which gives the clustered archetype motifs +#' created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). #' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` #' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR -#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. +#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is +#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences #' (see `MOODS` for more details on this determination). @@ -442,6 +445,26 @@ addMotifAnnotations <- function( motifs <- obj$motifs motifSummary <- obj$motifSummary + }else if(tolower(motifSet)=="vierstra"){ + if(tolower(collection)=="individual"){ + fileName <- "Vierstra_Individual_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else if(tolower(collection == "archetype")){ + fileName <- "Vierstra_Archetype_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else { + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + ". Accepted values are 'individual' and 'archetype'")) + } + obj <- NULL + motifSummary <- NULL + }else if(tolower(motifSet)=="custom"){ obj <- NULL From b1fb5f0dbae7d07554854e7347ee70c11b52ed35 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:20:31 -0700 Subject: [PATCH 121/162] typo --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index f4628695..23bb14f9 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -459,7 +459,7 @@ addMotifAnnotations <- function( motifs <- readRDS(fileName) file.remove(fileName) } else { - stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } obj <- NULL From d7a3105d910f00c11a8c90841d5a59b2436897a9 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:55:29 -0700 Subject: [PATCH 122/162] unify file download workflow make file download workflow match that of annotations like lola etc --- R/AnnotationPeaks.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 23bb14f9..af705ae5 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -447,21 +447,29 @@ addMotifAnnotations <- function( }else if(tolower(motifSet)=="vierstra"){ if(tolower(collection)=="individual"){ - fileName <- "Vierstra_Individual_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" } else if(tolower(collection == "archetype")){ - fileName <- "Vierstra_Archetype_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds" } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } + + annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations") + dir.create(annoPath, showWarnings = FALSE) + + #Download + if(!file.exists(file.path(annoPath, basename(url)))){ + message("Motif file ", basename(url)," does not exist! Downloading..") + download.file( + url = url, + destfile = file.path(annoPath, basename(url)), + quiet = FALSE + ) + } + motifFile <- file.path(annoPath, basename(url)) + + motifs <- readRDS(motifFile) obj <- NULL motifSummary <- NULL From 3f263a9e32662c6db9bb25eb7335bf7d231a8caa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 17:02:19 -0700 Subject: [PATCH 123/162] update collection param --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index af705ae5..ec8c06fe 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -280,7 +280,7 @@ addPeakAnnotations <- function( #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is -#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' "vierstra", then this must either be "archetype" (for the v2 clustered models) or "individual" (for the original v1 individual motif models). #' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences From 6feac133d5a5bc8a14e589bfc999329bb39e8f6b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 16 Apr 2022 05:58:16 -0700 Subject: [PATCH 124/162] update param definition for pal To make it more clear how to change the color of highlighted cells addressing https://github.com/GreenleafLab/ArchR/issues/1240 --- R/VisualizeData.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index d675abd4..d433c8e7 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -174,6 +174,10 @@ plotPDF <- function( #' @param imputeWeights The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. #' See `addImputationWeights()` and `getImutationWeights()` for more information. #' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override discreteSet/continuousSet for coloring vector. +#' If you are using `pal` in conjuction with `highlightCells`, your palette must be a named vector with two entries, one named for the value +#' of the cells in the `name` column of `cellColData` and the other named "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` +#' would be used to change the color of cells with the value "Mono" in the `cellColData` column indicated by `name`. Because of this, +#' the cells indicated by `highlightCells` must also match this value in the `name` column. #' @param size A number indicating the size of the points to plot if `plotAs` is set to "points". #' @param sampleCells A numeric describing number of cells to use for plot. If using impute weights, this will occur after imputation. #' @param highlightCells A character vector of cellNames describing which cells to hightlight if using `plotAs = "points"` (default if discrete). From c61dfca4d346515ce6a40656f8fa1b01cbd73a52 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 18 Apr 2022 05:47:15 -0700 Subject: [PATCH 125/162] require hexbin to be installed If a user doesnt have `hexbin` installed, they will get a cryptic error. Since ggplot2 only "suggests" `hexbin`, its possible to try to run this function without it installed properly. Related to https://github.com/GreenleafLab/ArchR/issues/1387 https://github.com/GreenleafLab/ArchR/issues/1292 --- R/GgplotUtils.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 4d4c31ea..ec808b4b 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -555,6 +555,9 @@ ggHex <- function( .validInput(input = hexCut, name = "quantCut", valid = c("numeric", "null")) .validInput(input = addPoints, name = "addPoints", valid = c("boolean")) + #require hexbin to be installed. otherwise, this section wont work properly + .requirePackage(x = "hexbin", source = "CRAN") + df <- data.frame(x = x, y = y) include <- which(is.finite(x) & is.finite(y)) From 0dada5f424bc5945584e4424300875bc55f43a86 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 22 Apr 2022 10:20:12 -0700 Subject: [PATCH 126/162] update vierstra archetype motifs to v2.1 --- R/AnnotationPeaks.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index ec8c06fe..e2882b38 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -280,7 +280,7 @@ addPeakAnnotations <- function( #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is -#' "vierstra", then this must either be "archetype" (for the v2 clustered models) or "individual" (for the original v1 individual motif models). +#' "vierstra", then this must either be "archetype" (for the v2.1 clustered models) or "individual" (for the original v1 individual motif models). #' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences @@ -449,7 +449,7 @@ addMotifAnnotations <- function( if(tolower(collection)=="individual"){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" } else if(tolower(collection == "archetype")){ - url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds" + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs_v2.1.rds" } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) From b40e6b39e04cd3391642631631823f7f02f09764 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 22 Apr 2022 11:09:52 -0700 Subject: [PATCH 127/162] add strictMatch to handle mismatch in cells If a GeneExpressionMatrix is added to the project but not all cells in the project have gene expression information, this causes problems with downstream functions that require info from all cells such as addIterativeLSI. This patch provides a warning to users when this is the case. Currently strictMatch defaults to FALSE but could consider changing that to TRUE or handling this downstream for ex in addIterativeLSI. --- R/MatrixGeneExpression.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 7d91a480..5a1b02c8 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -17,6 +17,9 @@ #' @param verbose A boolean describing whether to print to console messages of progress. #' @param threads The number of threads to be used for parallel computing. #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. +#' @param strictMatch A boolean value indicating whether every cell in `input` must be represented in `seRNA`. If set to `FALSE`, +#' this and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur +#' because not all cells will have relevant information. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. #' @export @@ -29,10 +32,24 @@ addGeneExpressionMatrix <- function( verbose = TRUE, threads = getArchRThreads(), parallelParam = NULL, + strictMatch = FALSE, force = TRUE, logFile = createLogFile("addGeneExpressionMatrix") ){ + .validInput(input = input, name = "input", valid = c("ArchRProj", "character")) + .validInput(input = seRNA, name = "seRNA", valid = c("SummarizedExperiment")) + .validInput(input = chromSizes, name = "chromSizes", valid = c("granges")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) + .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) + .validInput(input = verbose, name = "verbose", valid = c("boolean")) + .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = parallelParam, name = "parallelParam", valid = c("parallelparam", "null")) + .validInput(input = strictMatch, name = "strictMatch", valid = c("boolean")) + .validInput(input = force, name = "force", valid = c("boolean")) + .validInput(input = logFile, name = "logFile", valid = c("character")) + + if(inherits(input, "ArchRProject")){ ArrowFiles <- getArrowFiles(input) allCells <- rownames(getCellColData(input)) @@ -61,11 +78,18 @@ addGeneExpressionMatrix <- function( if(!is.null(allCells)){ cellsInArrows <- allCells } + overlap <- sum(cellsInArrows %in% colnames(seRNA)) / length(cellsInArrows) .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ stop("No overlapping cell names found between ArrowFiles and seRNA object! Cell names in ArrowFiles must match colnames in seRNA!") + } else if(overlap != 1) { + if(strictMatch){ + stop("Error! 'strictMatch = TRUE' and not all cells in input are represented in the provided gene expression seRNA. To proceed, please subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA or set 'strictMatch = FALSE'.") + } else { + .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix!", logFile = logFile, verbose = TRUE) + } } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From ee0c8ec29a8b8108f48cda8b9e62e0c6cfc1a41b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 26 Apr 2022 13:59:21 -0700 Subject: [PATCH 128/162] update clusterParams description --- R/IterativeLSI.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 31ba7001..156bbe80 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -11,9 +11,10 @@ #' "TileMatrix" or "PeakMatrix". #' @param name The name to use for storage of the IterativeLSI dimensionality reduction in the `ArchRProject` as a `reducedDims` object. #' @param iterations The number of LSI iterations to perform. -#' @param clusterParams A list of Additional parameters to be passed to `addClusters()` for clustering within each iteration. +#' @param clusterParams A list of additional parameters to be passed to `addClusters()` for clustering within each iteration. #' These params can be constant across each iteration, or specified for each iteration individually. Thus each param must be of -#' length == 1 or the total number of `iterations` - 1. PLEASE NOTE - We have updated these params to `resolution=2` and `maxClusters=6`! To use previous settings use `resolution=0.2` and `maxClusters=NULL`. +#' length == 1 or the total number of `iterations` - 1. If you want to use `scran` for clustering, you would pass this as `method="scran"`. +#` PLEASE NOTE - We have updated these params to `resolution=2` and `maxClusters=6`! To use previous settings use `resolution=0.2` and `maxClusters=NULL`. #' @param firstSelection First iteration selection method for features to use for LSI. Either "Top" for the top accessible/average or "Var" for the top variable features. #' "Top" should be used for all scATAC-seq data (binary) while "Var" should be used for all scRNA/other-seq data types (non-binary). #' @param depthCol A column in the `ArchRProject` that represents the coverage (scATAC = unique fragments, scRNA = unique molecular identifiers) per cell. From f162072dfb38e62d71431506db124a7a0e09b6aa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 26 Apr 2022 15:04:45 -0700 Subject: [PATCH 129/162] fix module scores when only one set of features is supplied addressing https://github.com/GreenleafLab/ArchR/issues/308#issuecomment-813905861 --- R/ModuleScore.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 2e7d4fd9..725276ab 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -132,8 +132,14 @@ addModuleScore <- function( doSampleCells = FALSE ) Matrix::colMeans(m[seq_along(idxFgd), ]) - Matrix::colMeans(m[-seq_along(idxFgd), ]) - }) %>% Reduce("cbind", .) - + }) + + if (length(features) > 1) { + dfM <- Reduce("cbind", dfM) + } else { + dfM <- as.data.frame(dfM[[1]], row.names = names(dfM), drop = FALSE) + } + #add the module scores as new columns in cellColData for(x in seq_len(ncol(dfM))){ ArchRProj <- addCellColData(ArchRProj, data = dfM[,x], name=names(featureList)[x], cells=rownames(dfM), force = TRUE) From 9b3e72e1fa82b9f6c116023e3ac09165d44b04a4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Apr 2022 15:25:42 -0700 Subject: [PATCH 130/162] update pal param def --- R/VisualizeData.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index d433c8e7..e9e327ba 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -173,11 +173,13 @@ plotPDF <- function( #' @param log2Norm A boolean value indicating whether a log2 transformation should be performed on the values (if continuous) in plotting. #' @param imputeWeights The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. #' See `addImputationWeights()` and `getImutationWeights()` for more information. -#' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override discreteSet/continuousSet for coloring vector. -#' If you are using `pal` in conjuction with `highlightCells`, your palette must be a named vector with two entries, one named for the value -#' of the cells in the `name` column of `cellColData` and the other named "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` -#' would be used to change the color of cells with the value "Mono" in the `cellColData` column indicated by `name`. Because of this, -#' the cells indicated by `highlightCells` must also match this value in the `name` column. +#' @param pal A custom palette used to override discreteSet/continuousSet for coloring cells. Typically created using `paletteDiscrete()` or `paletteContinuous()`. +#' To make a custom palette, you must construct this following strict specifications. If the coloring is for discrete data (i.e. "Clusters"), +#' then this palette must be a named vector of colors where each color is named for the corresponding group (e.g. `"C1" = "#F97070"`). If the coloring +#' for continuous data, then it just needs to be a vector of colors. If you are using `pal` in conjuction with `highlightCells`, your palette +#' must be a named vector with two entries, one named for the value of the cells in the `name` column of `cellColData` and the other named +#' "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` would be used to change the color of cells with the value +#' "Mono" in the `cellColData` column indicated by `name`. Because of this, the cells indicated by `highlightCells` must also match this value in the `name` column. #' @param size A number indicating the size of the points to plot if `plotAs` is set to "points". #' @param sampleCells A numeric describing number of cells to use for plot. If using impute weights, this will occur after imputation. #' @param highlightCells A character vector of cellNames describing which cells to hightlight if using `plotAs = "points"` (default if discrete). From ba21b05969c7d34125dcc56cdbabf0d64007e019 Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Tue, 3 May 2022 22:27:31 -0700 Subject: [PATCH 131/162] bugfix import data.table causing issues --- .DS_Store | Bin 16388 -> 16388 bytes NAMESPACE | 3 ++- R/AllClasses.R | 3 ++- man/addGeneExpressionMatrix.Rd | 5 +++++ man/addIterativeLSI.Rd | 10 +++++----- man/addMotifAnnotations.Rd | 15 +++++++++------ man/plotEmbedding.Rd | 8 +++++++- man/plotMarkers.Rd | 6 +++++- man/projectBulkATAC.Rd | 6 +++--- 9 files changed, 38 insertions(+), 18 deletions(-) diff --git a/.DS_Store b/.DS_Store index 9be749afeb44d77fc6aec1464d916eb8ab59cc9b..a885b2f8922c50a3e6fb25a2386aaf50830e8950 100644 GIT binary patch delta 1426 zcmeH_-%C?r7{{OQ>&$c7F^{IUxlLy)Xf{x{rHE3?*)Jy)ZlM&qTp3BzmD8-&*a)vQ z>*7Ga2nwtyjD9JmMN|+GR&=gfe`{NyhhhhaC#5D*iC*~vs`UqtIDP(rk0_}? z^6Ry#x_V)k6kX+%p6Xrd`j)+cHh0VMW)XvCLnk5*k1^K3&{AnKwC# z5(SP7WTh&qp*_@2A?l;^G(bZ%Ob_S9?vX}yb%WD+3mLhW z6=hS3GJdIANY!uIx~*EMQ4r?+g^-_ImL{fWEYHl!USV8aP-ri$*eKnyJ&vd?dV%A| zHQF4VP$rb~VvrYA@#lKM!He^}NaH$fj;7M7*NdF^lH)kjzfmzp?iLm0*gM)44j@EmVsA=7vt$0`k2wfHh98PtmlmRPXT++^>6rZ4qd^B1ZA*+7OE Jb!j@!{2RfHHBA5j delta 1365 zcmeH_TS${(7{{Og>!I(t^&I-!dg2-M0DAe@6GePcmL=2`$r>)MsVYl zO8TUW7FL<92A$bvIb_fsFq!OjgU;r#NzqcP0n``>^}GCmpr0s9>)#dV3H2F`Q(#%y zIk}5CesP|#MA__XZV_6gL782uv3R9fnJy;Rgb=3mD|1$)wx*pOy`8)W;z&b!a*&gn zsEdMhgig{BjZmEK(-=+CTbiNIG)MCw01SC3KoN?eKq*#WCCXt&4Qf#b4;s;eRXGx&gy_=0(S zXT;=8#nep0jLgKUnTxrZCv^ft`lz<_Ob_PW-M+ayxTh%?4EQ^eyYzH*eQa9$O}>2K z@8HyKkGnyz{svB)|6=JfQ6|qXC@d=0m9MI-a@5sJ!-EJ5ydeaPvAe3gg=NOoW})R#4xVmI&R_? zMi9pXJj5eB#uJQT5-(GEe?i`u$p=e*mq!DVqQQ diff --git a/NAMESPACE b/NAMESPACE index 7fb50b4b..5fec629b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,7 @@ export(subsetCells) export(theme_ArchR) export(trajectoryHeatmap) export(validBSgenome) -import(GenomicRanges) +import(data.table) +importFrom(GenomicRanges,GRanges) importFrom(Rcpp,sourceCpp) useDynLib(ArchR) diff --git a/R/AllClasses.R b/R/AllClasses.R index f7966028..314c4182 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -1,6 +1,7 @@ #' @useDynLib ArchR #' @importFrom Rcpp sourceCpp -#' @import GenomicRanges +#' @importFrom GenomicRanges GRanges +#' @import data.table NULL setClassUnion("characterOrNull", c("character", "NULL")) diff --git a/man/addGeneExpressionMatrix.Rd b/man/addGeneExpressionMatrix.Rd index e728ac04..06365823 100644 --- a/man/addGeneExpressionMatrix.Rd +++ b/man/addGeneExpressionMatrix.Rd @@ -13,6 +13,7 @@ addGeneExpressionMatrix( verbose = TRUE, threads = getArchRThreads(), parallelParam = NULL, + strictMatch = FALSE, force = TRUE, logFile = createLogFile("addGeneExpressionMatrix") ) @@ -36,6 +37,10 @@ for Seurat Objects (see \code{Seurat::as.SingleCellExperiment}). The provided va \item{parallelParam}{A list of parameters to be passed for biocparallel/batchtools parallel computing.} +\item{strictMatch}{A boolean value indicating whether every cell in \code{input} must be represented in \code{seRNA}. If set to \code{FALSE}, +this and this \code{GeneExpressionMatrix} is used for certain downstream analyses such as \code{addIterativeLSI()}, then errors may occur +because not all cells will have relevant information.} + \item{force}{A boolean value indicating whether to force the matrix indicated by \code{matrixName} to be overwritten if it already exist in the given \code{input}.} \item{logFile}{The path to a file to be used for logging ArchR output.} diff --git a/man/addIterativeLSI.Rd b/man/addIterativeLSI.Rd index fedd8496..ac601e66 100644 --- a/man/addIterativeLSI.Rd +++ b/man/addIterativeLSI.Rd @@ -51,9 +51,9 @@ addIterativeLSI( \item{iterations}{The number of LSI iterations to perform.} -\item{clusterParams}{A list of Additional parameters to be passed to \code{addClusters()} for clustering within each iteration. +\item{clusterParams}{A list of additional parameters to be passed to \code{addClusters()} for clustering within each iteration. These params can be constant across each iteration, or specified for each iteration individually. Thus each param must be of -length == 1 or the total number of \code{iterations} - 1. PLEASE NOTE - We have updated these params to \code{resolution=2} and \code{maxClusters=6}! To use previous settings use \code{resolution=0.2} and \code{maxClusters=NULL}.} +length == 1 or the total number of \code{iterations} - 1. If you want to use \code{scran} for clustering, you would pass this as \code{method="scran"}.} \item{firstSelection}{First iteration selection method for features to use for LSI. Either "Top" for the top accessible/average or "Var" for the top variable features. "Top" should be used for all scATAC-seq data (binary) while "Var" should be used for all scRNA/other-seq data types (non-binary).} @@ -102,12 +102,12 @@ variance calculation and TF-IDF normalization.} \item{totalFeatures}{The number of features to consider for use in LSI after ranking the features by the total number of insertions. These features are the only ones used throught the variance identification and LSI. These are an equivalent when using a \code{TileMatrix} to a defined peakSet.} -\item{filterQuantile}{A number \link{0,1} that indicates the quantile above which features should be removed based on insertion counts prior} - -\item{excludeChr}{A string of chromosomes to exclude for iterativeLSI procedure. +\item{filterQuantile}{A number \link{0,1} that indicates the quantile above which features should be removed based on insertion counts prior to the first iteration of the iterative LSI paradigm. For example, if \code{filterQuantile = 0.99}, any features above the 99th percentile in insertion counts will be ignored for the first LSI iteration.} +\item{excludeChr}{A string of chromosomes to exclude for iterativeLSI procedure.} + \item{saveIterations}{A boolean value indicating whether the results of each LSI iterations should be saved as compressed \code{.rds} files in the designated \code{outDir}.} diff --git a/man/addMotifAnnotations.Rd b/man/addMotifAnnotations.Rd index e78aa964..296da8cc 100644 --- a/man/addMotifAnnotations.Rd +++ b/man/addMotifAnnotations.Rd @@ -7,7 +7,7 @@ addMotifAnnotations( ArchRProj = NULL, motifSet = "cisbp", - name = "Motif", + annoName = "Motif", species = NULL, collection = "CORE", motifPWMs = NULL, @@ -23,16 +23,19 @@ addMotifAnnotations( \item{ArchRProj}{An \code{ArchRProject} object.} \item{motifSet}{The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" -which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the -corresponding motif sets from the \code{chromVAR} package.} +which gives the 2016, 2018 or 2020 version of JASPAR motifs, (ii) one of "cisbp", "encode", or "homer" which gives the +corresponding motif sets from the \code{chromVAR} package, or (iii) "vierstra" which gives the clustered archetype motifs +created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering).} -\item{name}{The name of the \code{peakAnnotation} object to be stored in the provided \code{ArchRProject}} +\item{annoName}{The name of the \code{peakAnnotation} object to be stored in the provided \code{ArchRProject}} \item{species}{The name of the species relevant to the supplied \code{ArchRProject}. This is used for identifying which motif to be used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from \code{getGenome()}.} \item{collection}{If one of the JASPAR motif sets is used via \code{motifSet}, this parameter allows you to indicate the JASPAR -collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all options to supply for collection.} +collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all options to supply for collection. If \code{motifSet} is +"vierstra", then this must either be "archetype" (for the v2.1 clustered models) or "individual" (for the original v1 individual motif models). +NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra.} \item{motifPWMs}{A custom set of motif PWMs as a PWMList for adding motif annotations.} @@ -43,7 +46,7 @@ collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all o \item{version}{An integer specifying version 1 or version 2 of chromVARmotifs see github for more info GreenleafLab/chromVARmotifs.} -\item{force}{A boolean value indicating whether to force the \code{peakAnnotation} object indicated by \code{name} to be overwritten if +\item{force}{A boolean value indicating whether to force the \code{peakAnnotation} object indicated by \code{annoName} to be overwritten if it already exists in the given \code{ArchRProject}.} \item{logFile}{The path to a file to be used for logging ArchR output.} diff --git a/man/plotEmbedding.Rd b/man/plotEmbedding.Rd index 147d5cd5..4ed4836a 100644 --- a/man/plotEmbedding.Rd +++ b/man/plotEmbedding.Rd @@ -46,7 +46,13 @@ is "GeneScoreMatrix" then \code{name} refers to a gene name which can be listed \item{imputeWeights}{The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. See \code{addImputationWeights()} and \code{getImutationWeights()} for more information.} -\item{pal}{A custom palette (see \code{paletteDiscrete} or \code{ArchRPalettes}) used to override discreteSet/continuousSet for coloring vector.} +\item{pal}{A custom palette used to override discreteSet/continuousSet for coloring cells. Typically created using \code{paletteDiscrete()} or \code{paletteContinuous()}. +To make a custom palette, you must construct this following strict specifications. If the coloring is for discrete data (i.e. "Clusters"), +then this palette must be a named vector of colors where each color is named for the corresponding group (e.g. \code{"C1" = "#F97070"}). If the coloring +for continuous data, then it just needs to be a vector of colors. If you are using \code{pal} in conjuction with \code{highlightCells}, your palette +must be a named vector with two entries, one named for the value of the cells in the \code{name} column of \code{cellColData} and the other named +"Non.Highlighted". For example, \code{pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")} would be used to change the color of cells with the value +"Mono" in the \code{cellColData} column indicated by \code{name}. Because of this, the cells indicated by \code{highlightCells} must also match this value in the \code{name} column.} \item{size}{A number indicating the size of the points to plot if \code{plotAs} is set to "points".} diff --git a/man/plotMarkers.Rd b/man/plotMarkers.Rd index f0baad6b..6b1a0078 100644 --- a/man/plotMarkers.Rd +++ b/man/plotMarkers.Rd @@ -9,7 +9,8 @@ plotMarkers( name = NULL, cutOff = "FDR <= 0.01 & abs(Log2FC) >= 0.5", plotAs = "Volcano", - scaleTo = 10^4 + scaleTo = 10^4, + rastr = TRUE ) } \arguments{ @@ -22,6 +23,9 @@ To see available options try \code{colnames(seMarker)}.} \code{cutoff} can contain any of the \code{assayNames} from \code{seMarker}.} \item{plotAs}{A string indicating whether to plot a volcano plot ("Volcano") or an MA plot ("MA").} + +\item{rastr}{A boolean value that indicates whether the plot should be rasterized using \code{ggrastr}. This does not rasterize +lines and labels, just the internal portions of the plot.} } \description{ This function will plot one group/column of a differential markers as an MA or Volcano plot. diff --git a/man/projectBulkATAC.Rd b/man/projectBulkATAC.Rd index 4f5e9b77..31859ab2 100644 --- a/man/projectBulkATAC.Rd +++ b/man/projectBulkATAC.Rd @@ -19,11 +19,11 @@ projectBulkATAC( \arguments{ \item{ArchRProj}{An \code{ArchRProject} object containing the dimensionality reduction matrix passed by \code{reducedDims}.} -\item{seATAC}{Bulk ATAC Summarized Experiment.} +\item{seATAC}{A \code{SummarizedExperiment} object containing bulk ATAC-seq data.} -\item{reducedDims}{A string specifying the reducedDims.} +\item{reducedDims}{A string specifying the name of the \code{reducedDims} object to be used.} -\item{embedding}{A string specifying embedding.} +\item{embedding}{A string specifying the name of the \code{embedding} object to be used.} \item{n}{An integer specifying the number of subsampled "pseudo single cells" per bulk sample.} From 7ead5412657ca1d21b5f874e2908c8d920e977cb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 4 May 2022 14:28:26 -0700 Subject: [PATCH 132/162] update ... additional params def --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index e2882b38..b969532d 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -290,7 +290,7 @@ addPeakAnnotations <- function( #' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `annoName` to be overwritten if #' it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. -#' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a PWM object. +#' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a JASPAR PWM object. #' @export addMotifAnnotations <- function( ArchRProj = NULL, From b9ee2663d9ba7d58c6737a7a8bf2b3614bf26866 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 4 May 2022 20:33:35 -0700 Subject: [PATCH 133/162] typo in geneTiles --- R/IntegrativeAnalysis.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 8695c83d..94116f75 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -1327,7 +1327,7 @@ getPeak2GeneLinks <- function( geneTiles <- floor(start(geneStarts) / resolution) * resolution + floor(resolution / 2) }else{ summitTiles <- start(peakSummits) - geneTiles <- start(geneTiles) + geneTiles <- start(geneStarts) } loops <- .constructGR( From f2d5d0583e00167878e8d15ae02a016b423b9f3b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 6 May 2022 11:49:21 -0700 Subject: [PATCH 134/162] Add message to point to Vierstra website https://github.com/GreenleafLab/ArchR/discussions/1364#discussioncomment-2700224 --- R/AnnotationPeaks.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index b969532d..2e13ff91 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -448,8 +448,10 @@ addMotifAnnotations <- function( }else if(tolower(motifSet)=="vierstra"){ if(tolower(collection)=="individual"){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" + message("Using Vierstra v1.0 motifs. See https://www.vierstra.org/resources/motif_clustering for more details.") } else if(tolower(collection == "archetype")){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs_v2.1.rds" + message("Using Vierstra v2.1beta motifs. See https://resources.altius.org/~jvierstra/projects/motif-clustering-v2.1beta/ for more details.") } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) From ee5f2176fbed5685e92a29f0bbbf8911fea434d7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 17 May 2022 20:51:02 -0700 Subject: [PATCH 135/162] remove strictMatch from batchlapply https://github.com/GreenleafLab/ArchR/issues/1427 --- R/MatrixGeneExpression.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 5a1b02c8..c2cb7c9d 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -147,6 +147,7 @@ addGeneExpressionMatrix <- function( #Remove Input from args args$input <- NULL args$chromSizes <- NULL + args$strictMatch <- NULL #Run With Parallel or lapply outList <- .batchlapply(args) From e008328cb634a266b30b25841b3d36d987263af1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:22:40 -0700 Subject: [PATCH 136/162] add marker subsetting to plotMarkerHeatMap --- R/MarkerFeatures.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index c0e655f6..e077744b 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -823,6 +823,8 @@ markerHeatmap <- function(...){ #' @param pal A custom continuous palette from `ArchRPalettes` (see `paletteContinuous()`) used to override the default continuous palette for the heatmap. #' @param binaryClusterRows A boolean value that indicates whether a binary sorting algorithm should be used for fast clustering of heatmap rows. #' @param clusterCols A boolean value that indicates whether the columns of the marker heatmap should be clustered. +#' @param subsetMarkers A vector of rownames from seMarker to use for subsetting of seMarker to only plot specific features on the heatmap. +#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. @@ -847,6 +849,7 @@ plotMarkerHeatmap <- function( pal = NULL, binaryClusterRows = TRUE, clusterCols = TRUE, + subsetMarkers = NULL, labelMarkers = NULL, nLabel = 15, nPrint = 15, @@ -919,6 +922,11 @@ plotMarkerHeatmap <- function( }else{ idx <- which(rowSums(passMat, na.rm = TRUE) > 0 & matrixStats::rowVars(mat) != 0 & !is.na(matrixStats::rowVars(mat))) } + + if(!is.null(subsetMarkers)) { + idx <- subsetMarkers + } + mat <- mat[idx,,drop=FALSE] passMat <- passMat[idx,,drop=FALSE] From eaa15993b4fdae69cacd9fd03d74c5aa8c230cc8 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:34:13 -0700 Subject: [PATCH 137/162] remove printing of marker genes when subsetMarkers is used --- R/MarkerFeatures.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index e077744b..5cecbb36 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -959,15 +959,19 @@ plotMarkerHeatmap <- function( } spmat <- passMat / rowSums(passMat) - if(metadata(seMarker)$Params$useMatrix == "GeneScoreMatrix"){ - message("Printing Top Marker Genes:") - for(x in seq_len(ncol(spmat))){ - genes <- head(order(spmat[,x], decreasing = TRUE), nPrint) - message(colnames(spmat)[x], ":") - message("\t", paste(as.vector(rownames(mat)[genes]), collapse = ", ")) + #only print out identified marker genes if subsetMarkers is NULL + if(is.null(subsetMarkers)) { + if(metadata(seMarker)$Params$useMatrix == "GeneScoreMatrix"){ + message("Printing Top Marker Genes:") + for(x in seq_len(ncol(spmat))){ + genes <- head(order(spmat[,x], decreasing = TRUE), nPrint) + message(colnames(spmat)[x], ":") + message("\t", paste(as.vector(rownames(mat)[genes]), collapse = ", ")) + } } } + if(is.null(labelMarkers)){ labelMarkers <- lapply(seq_len(ncol(spmat)), function(x){ as.vector(rownames(mat)[head(order(spmat[,x], decreasing = TRUE), nLabel)]) From d74445f35cbce0e1bb4eb150111e507110b98951 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:43:38 -0700 Subject: [PATCH 138/162] update param def for subsetMarkers --- R/MarkerFeatures.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 5cecbb36..94adfac6 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -824,7 +824,8 @@ markerHeatmap <- function(...){ #' @param binaryClusterRows A boolean value that indicates whether a binary sorting algorithm should be used for fast clustering of heatmap rows. #' @param clusterCols A boolean value that indicates whether the columns of the marker heatmap should be clustered. #' @param subsetMarkers A vector of rownames from seMarker to use for subsetting of seMarker to only plot specific features on the heatmap. -#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. +#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. If this parameter is used for +#' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. From 03a8d6b8b9cbb9ced2b13d5491287f9f29a43ed1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:44:26 -0700 Subject: [PATCH 139/162] add validInput for subsetMarkers --- R/MarkerFeatures.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 94adfac6..7d3a2657 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -872,6 +872,7 @@ plotMarkerHeatmap <- function( .validInput(input = pal, name = "pal", valid = c("character", "null")) .validInput(input = binaryClusterRows, name = "binaryClusterRows", valid = c("boolean")) .validInput(input = clusterCols, name = "clusterCols", valid = c("boolean")) + .validInput(input = subsetMarkers, name = "subsetMarkers", valid = c("integer", "null")) .validInput(input = labelMarkers, name = "labelMarkers", valid = c("character", "null")) .validInput(input = nLabel, name = "nLabel", valid = c("integer", "null")) .validInput(input = nPrint, name = "nPrint", valid = c("integer", "null")) From b3dd16436fcde4aeb3a5b3e20343e248ec34679f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:48:57 -0700 Subject: [PATCH 140/162] catch problematic inputs to subsetMarkers --- R/MarkerFeatures.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7d3a2657..2e59051c 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,12 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - idx <- subsetMarkers + if(length(which(subsetMarkers %ni% 1:nrow(mat)))){ + idx <- subsetMarkers + } else { + stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") + } + } mat <- mat[idx,,drop=FALSE] From 80d57bb2e9309263beead385aa5c967aea783f39 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:50:03 -0700 Subject: [PATCH 141/162] fix if statement typo --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 2e59051c..fe914ec5 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,7 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - if(length(which(subsetMarkers %ni% 1:nrow(mat)))){ + if(length(which(subsetMarkers %ni% 1:nrow(mat))) > 0){ idx <- subsetMarkers } else { stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") From 673943f1cae2e32060939f2ca773386f726bc98f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:50:33 -0700 Subject: [PATCH 142/162] fix if statement typo --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index fe914ec5..69879538 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,7 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - if(length(which(subsetMarkers %ni% 1:nrow(mat))) > 0){ + if(length(which(subsetMarkers %ni% 1:nrow(mat))) == 0){ idx <- subsetMarkers } else { stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") From 17dbf94f247d8b084493a49fac97664ccd3832b4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 May 2022 08:02:49 -0700 Subject: [PATCH 143/162] improve error message when no cells found passing filter https://github.com/GreenleafLab/ArchR/issues/1435#issuecomment-1139663014 --- R/CreateArrow.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 0fa4335a..ac0be626 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1884,7 +1884,7 @@ createArrowFiles <- function( bcPass <- BStringSet(dt$values.V1[dt$V1 >= minFrags & dt$V1 <= maxFrags]) if(length(bcPass) < 3){ - .logStop(sprintf("Detected 2 or less cells (%s barcodes have greater than 50 fragments) in file!\n Check inputs such as 'minFrags' or 'maxFrags' to keep cells! Exiting!", sum(dt$V1 > 50)), logFile = logFile) + .logStop(sprintf("Detected 2 or less cells (%s barcodes have greater than 50 fragments) in file!\n Check inputs such as 'minFrags' or 'maxFrags' to keep cells!\n Also check that you are using the correct reference genome.\n Exiting!", sum(dt$V1 > 50)), logFile = logFile) } .logThis(data.frame(bc = as.character(bcPass)), name = paste0(prefix, " BarcodesMinMaxFrags"), logFile = logFile) From 978cd3c8b82b332dcc31fbf9976ce8625f2b9c55 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 08:41:46 -0700 Subject: [PATCH 144/162] check to make sure tmpdir exists --- R/HiddenUtils.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 57fdad35..9502b4ff 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -326,6 +326,10 @@ .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ dir.create(tmpdir, showWarnings = FALSE) + + if(!dir.exists(tmpdir)){ + stop(paste0("Unable to create temporary directory ", tmpdir,". Check file permissions!")) + } if(addDOC){ doc <- paste0("-Date-", Sys.Date(), "_Time-", gsub(":","-", stringr::str_split(Sys.time(), pattern=" ",simplify=TRUE)[1,2])) From 6a23fa18c55e9677531095ae939cf21b6ab60cd4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 12:16:33 -0700 Subject: [PATCH 145/162] check for file named "tmp" in .tempFile directory creation https://github.com/GreenleafLab/ArchR/issues/1447#issuecomment-1146215298 --- R/HiddenUtils.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 9502b4ff..44f84f53 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -324,6 +324,10 @@ } .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ + + if(file.exists(tmpdir)){ + stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + } dir.create(tmpdir, showWarnings = FALSE) From 7358dd109053049ae19ef8c64eb129e544805579 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 16:16:33 -0700 Subject: [PATCH 146/162] patch error with file.exists https://github.com/GreenleafLab/ArchR/issues/1447#issuecomment-1146441748 --- R/HiddenUtils.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 44f84f53..1e7844dc 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -325,8 +325,11 @@ .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ - if(file.exists(tmpdir)){ - stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + #if the directory doesnt already exist and file.exists evaluates to true, then a file exists with that name + if(!dir.exists(tmpdir)){ + if(file.exists(tmpdir)){ + stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + } } dir.create(tmpdir, showWarnings = FALSE) From e2d911bbe3df32afd8160194150b60d5514bc559 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 16:47:09 -0700 Subject: [PATCH 147/162] update strictMatch warning message and typo https://github.com/GreenleafLab/ArchR/discussions/1450#discussion-4119022 --- R/MatrixGeneExpression.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index c2cb7c9d..b54f409f 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -18,7 +18,7 @@ #' @param threads The number of threads to be used for parallel computing. #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param strictMatch A boolean value indicating whether every cell in `input` must be represented in `seRNA`. If set to `FALSE`, -#' this and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur +#' and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur #' because not all cells will have relevant information. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. @@ -88,7 +88,7 @@ addGeneExpressionMatrix <- function( if(strictMatch){ stop("Error! 'strictMatch = TRUE' and not all cells in input are represented in the provided gene expression seRNA. To proceed, please subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA or set 'strictMatch = FALSE'.") } else { - .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix!", logFile = logFile, verbose = TRUE) + .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix! To remove these mis-matched cells, subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA and set 'strictMatch = TRUE'", logFile = logFile, verbose = TRUE) } } From cadcd31b6c228537d43701c63bd3ce9e7e1b2795 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 6 Jun 2022 11:07:20 -0700 Subject: [PATCH 148/162] Properly catch NULL value for quantCut mentioned in https://github.com/GreenleafLab/ArchR/issues/1452#issuecomment-1147705139 --- R/VisualizeData.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index e9e327ba..ae9eff3e 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -421,8 +421,10 @@ plotEmbedding <- function( if(!plotParamsx$discrete){ - plotParamsx$color <- .quantileCut(plotParamsx$color, min(quantCut), max(quantCut)) - + if(!is.null(quantCut)){ + plotParamsx$color <- .quantileCut(plotParamsx$color, min(quantCut), max(quantCut)) + } + plotParamsx$pal <- paletteContinuous(set = plotParamsx$continuousSet) if(!is.null(pal)){ From 92bb44c849d756b0e577fd09f19903a7dbbabfd5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:33:36 -0700 Subject: [PATCH 149/162] make tutorial downloads cleaner check for existence of each file individually rather than the download directory. Add new .downloadFiles() function to handle file download and checking to see if files downloaded properly. --- R/InputData.R | 113 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index e25e743e..a13860f1 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -11,57 +11,98 @@ getTutorialData <- function( tutorial = "hematopoiesis", threads = getArchRThreads() ){ - + #Validate .validInput(input = tutorial, name = "tutorial", valid = "character") .validInput(input = threads, name = "threads", valid = c("integer")) ######### - + #Make Sure URL doesnt timeout oldTimeout <- getOption('timeout') options(timeout=100000) - + if(tolower(tutorial) %in% c("heme","hematopoiesis")){ - if(!dir.exists("HemeFragments")){ - - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" - ) - - dir.create("HemeFragments", showWarnings = FALSE) - - downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ - download.file( - url = filesUrl[x], - destfile = file.path("HemeFragments", basename(filesUrl[x])) - ) - }, threads = min(threads, length(filesUrl))) - - #check for success of file download - if(!all(unlist(downloadFiles) == 0)) { - stop("Error! Some tutorial files did not download successfully. Please try again.") - } - } - pathFragments <- "HemeFragments" - - }else{ - + pathDownload <- "HemeFragments" + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + ) + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + + }else if(tolower(tutorial) %in% c("multiome")){ + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + ) + + pathDownload <- "Multiome" + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + fragFiles <- fragFiles[!grepl(".tbi", fragFiles)] + geneFiles <- list.files(pathDownload, pattern = "\\.h5$", full.names = TRUE) + names(geneFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + + inputFiles <- c(fragFiles, geneFiles) + + } else{ + stop("There is no tutorial data for : ", tutorial) - + } - + #Set back URL Options options(timeout=oldTimeout) - - #Return Fragment Files - inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) - names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) - inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + inputFiles + +} +#helper for file downloads +.downloadFiles <- function(filesUrl = NULL, pathDownload = NULL, threads = 1){ + if(is.null(filesUrl)) { + stop("No value supplied to filesUrl in .downloadFiles()!") + } + if(is.null(pathDownload)) { + stop("No value supplied to pathDownload in .downloadFiles()!") + } + message(paste0("Downloading files to ",pathDownload,"...")) + downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ + if(!file.exists(file.path(pathDownload, basename(filesUrl[x])))){ + message(paste0("Downloading file ", basename(filesUrl[x]),"...")) + download.file( + url = filesUrl[x], + destfile = file.path(pathDownload, basename(filesUrl[x])) + ) + } else { + message(paste0("File exists! Skipping file ", basename(filesUrl[x]),"...")) + } + }, threads = min(threads, length(filesUrl))) + + #check for success of file download + if(!all(unlist(downloadFiles) == 0)) { + stop("Some tutorial files did not download successfully. Please try again.") + } + + downloadFiles + } #' Get PBMC Small Test Fragments From cea4782584a0ea621b1d653782d988200a74ffae Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:44:22 -0700 Subject: [PATCH 150/162] fix typo --- R/InputData.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index a13860f1..ba492bdf 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -33,7 +33,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) @@ -52,7 +52,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) From 21099d6ed966f6088b7b49ebbb9cca07486e8875 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 20:58:48 -0700 Subject: [PATCH 151/162] fix error message typo --- R/IterativeLSI.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 156bbe80..c32f7795 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -268,7 +268,7 @@ addIterativeLSI <- function( .logDiffTime("Computing Variable Features", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) nFeature <- varFeatures[1] if(nFeature > 0.5 * nrow(totalAcc)){ - stop("nFeature for variable selection must be at leat 1/2 the total features!") + stop("nFeature for variable selection must be less than 1/2 the total features!") } topIdx <- head(order(totalAcc$combinedVars, decreasing=TRUE), nFeature) topFeatures <- totalAcc[sort(topIdx),] From e395adfb4916f42f73ee646954d1e0f0fa7bdc76 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 8 Jun 2022 16:02:43 -0700 Subject: [PATCH 152/162] bugfix - `error` does not exist the variable `error` doesnt exist / hasnt been declared anywhere. pretty sure this should be `throwError = FALSE` --- R/ReproduciblePeakSet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 3bb1819b..a5e47a53 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -835,7 +835,7 @@ findMacs2 <- function(){ if(search2[1] != "ERROR"){ path2Install <- gsub("Location: ","",search2[grep("Location", search2, ignore.case=TRUE)]) path2Bin <- gsub("lib/python/site-packages", "bin/macs2",path2Install) - if(.suppressAll(.checkPath(path2Bin, throwError = error))){ + if(.suppressAll(.checkPath(path2Bin, throwError = FALSE))){ message("Found with pip!") return(path2Bin) } @@ -848,7 +848,7 @@ findMacs2 <- function(){ if(search3[1] != "ERROR"){ path2Install <- gsub("Location: ","",search3[grep("Location", search3, ignore.case=TRUE)]) path2Bin <- gsub("lib/python/site-packages", "bin/macs2",path2Install) - if(.suppressAll(.checkPath(path2Bin, throwError = error))){ + if(.suppressAll(.checkPath(path2Bin, throwError = FALSE))){ message("Found with pip3!") return(path2Bin) } From 792487b7a1119054b4d4aacbffb2f5c893b50017 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 11:50:14 -0700 Subject: [PATCH 153/162] update documentation on "features" --- R/ArchRBrowser.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 98a1e251..5b378c23 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -646,8 +646,10 @@ ArchRBrowserTrack <- function(...){ #' Blue-colored genes are on the minus strand and red-colored genes are on the plus strand), and "loopTrack" (links between a peak and a gene). #' @param sizes A numeric vector containing up to 3 values that indicate the sizes of the individual components passed to `plotSummary`. #' The order must be the same as `plotSummary`. -#' @param features A `GRanges` object containing the "features" to be plotted via the "featureTrack". This should be thought of as a -#' bed track. i.e. the set of peaks obtained using `getPeakSet(ArchRProj))`. +#' @param features A `GRanges` (for a single feature track) or `GRangesList` (for multiple feature tracks) object containing the "features" to +#' be plotted via the "featureTrack". This should be thought of as a bed track. i.e. the set of peaks obtained using `getPeakSet(ArchRProj))`. +#' If you provide a `GRangesList`, then each element of that object must be named and this name will be used on the plot. +#' For example - `GRangesList("peaks" = peak_gr, "other" = other_gr)`. #' @param loops A `GRanges` object containing the "loops" to be plotted via the "loopTrack". #' This `GRanges` object start represents the center position of one loop anchor and the end represents the center position of another loop anchor. #' A "loopTrack" draws an arc between two genomic regions that show some type of interaction. This type of track can be used From 9769de9d11d3e8dc84a170c4ab4c036996a52ce3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 12:50:16 -0700 Subject: [PATCH 154/162] catch when GRangesList has no names if the GRangesList given to features in plotBrowserTrack does not have names, then the call to data.frame errors out. This fix ensures that namex isnt null or blank. --- R/ArchRBrowser.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 5b378c23..5c049e55 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1431,6 +1431,10 @@ plotBrowserTrack <- function( featureO <- lapply(seq_along(featureList), function(x){ featurex <- featureList[[x]] namex <- names(featureList)[x] + if(is.null(namex) || namex == "") { + message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") + namex <- as.character(x) + } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) if(length(sub) > 0){ From d214b0b1c23af8192d1818b6878da1b28ba346cf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 13:28:29 -0700 Subject: [PATCH 155/162] update featureList with generic name --- R/ArchRBrowser.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 5c049e55..37b7947d 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1434,6 +1434,7 @@ plotBrowserTrack <- function( if(is.null(namex) || namex == "") { message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") namex <- as.character(x) + names(featureList)[x] <- as.character(x) } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) From 851427b051937cb7ff8596d971696f23a6809a5e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 14:15:20 -0700 Subject: [PATCH 156/162] fix featureList naming --- R/ArchRBrowser.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 37b7947d..1e76ddc7 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1426,6 +1426,15 @@ plotBrowserTrack <- function( featureList <- features hideY <- FALSE } + + #make sure all elements in featureList have a name for plot display + for(i in seq_along(featureList)){ + if(is.null(names(featureList)[i]) || is.na(names(featureList)[i]) || nchar(names(featureList)[i]) == 0) { + message("Warning! Object ",i," in your GRangesList (features) is not named. Generic numbering will be used.") + names(featureList)[i] <- as.character(i) + } + } + featureList <- featureList[rev(seq_along(featureList))] featureO <- lapply(seq_along(featureList), function(x){ @@ -1434,7 +1443,6 @@ plotBrowserTrack <- function( if(is.null(namex) || namex == "") { message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") namex <- as.character(x) - names(featureList)[x] <- as.character(x) } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) From 8345fc6a01995ded5debe09c9b6aef00a8db0345 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 14:29:00 -0700 Subject: [PATCH 157/162] fix guides FALSE warning from ggplot --- R/ArchRBrowser.R | 10 +++++----- R/DoubletsScores.R | 6 +++--- R/Footprinting.R | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 1e76ddc7..41df3315 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1044,7 +1044,7 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = FALSE, colour = FALSE) + ggtitle(title) + guides(fill = "none", colour = "none") + ggtitle(title) p @@ -1346,7 +1346,7 @@ plotBrowserTrack <- function( theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank()) + theme(axis.title.y=element_blank(), axis.text.y=element_blank(),axis.ticks.y=element_blank()) + theme(legend.text = element_text(size = baseSize), strip.text.y = element_text(size = facetbaseSize, angle = 0)) + - guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = FALSE) + + guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = "none") + theme(legend.position="bottom") + theme(legend.title=element_text(size=5), legend.text=element_text(size=7), legend.key.size = unit(0.75,"line"), legend.background = element_rect(color =NA), strip.background = element_blank()) @@ -1475,7 +1475,7 @@ plotBrowserTrack <- function( scale_color_manual(values = pal) + theme(legend.text = element_text(size = baseSize)) + theme_ArchR(baseSize = baseSize, baseLineSize = borderWidth, baseRectSize = borderWidth) + - guides(color = FALSE, fill = FALSE) + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) + guides(color = "none", fill = "none") + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) }else{ @@ -1795,7 +1795,7 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = FALSE, colour = FALSE) + ggtitle(title) + guides(fill = "none", colour = "none") + ggtitle(title) p @@ -1884,7 +1884,7 @@ plotBrowserTrack <- function( pal = pal ) + facet_wrap(x~., ncol=1,scales="free_y",strip.position="right") + - guides(fill = FALSE, colour = FALSE) + + guides(fill = "none", colour = "none") + theme_ArchR(baseSize = baseSize, baseRectSize = borderWidth, baseLineSize = tickWidth, diff --git a/R/DoubletsScores.R b/R/DoubletsScores.R index c5338195..9275bd5f 100644 --- a/R/DoubletsScores.R +++ b/R/DoubletsScores.R @@ -378,7 +378,7 @@ addDoubletScores <- function( scale_colour_gradientn(colors = pal) + xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + labs(color = "Simulated Doublet Density") + - guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + guides(fill = "none") + theme_ArchR(baseSize = 10) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) + coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + @@ -395,7 +395,7 @@ addDoubletScores <- function( # geom_point(data = dfDoub, aes(x=x,y=y,colour=color), size = 0.5) + # scale_colour_gradientn(colors = pal) + # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + - # guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + # guides(fill = "none") + theme_ArchR(baseSize = 10) + # labs(color = "Simulated Doublet Density") + # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + @@ -413,7 +413,7 @@ addDoubletScores <- function( # scale_colour_gradientn(colors = pal) + # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + # labs(color = "Simulated Doublet Density") + - # guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + # guides(fill = "none") + theme_ArchR(baseSize = 10) + # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + # coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + diff --git a/R/Footprinting.R b/R/Footprinting.R index 5fd214da..9447cdd3 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -578,8 +578,8 @@ plotFootprints <- function( ylim = c(quantile(plotFootDF$mean, 0.0001), 1.15*quantile(smoothFoot, 0.999)), xlim = c(min(plotFootDF$x),max(plotFootDF$x)) ) + theme_ArchR(baseSize = baseSize) + ggtitle(name) + - guides(fill = FALSE) + - guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + guides(fill = "none") + + guides(color = "none") + ylab(paste0(title,"Normalized Insertions")) #removed ggrepel due to incompatibility with coord_cartesian - see https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 #ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) From 05a83440f3080ed1e39750b619b01e475b5e94a7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 20:39:24 -0700 Subject: [PATCH 158/162] update param for ArchRProj in plotFootprints --- R/Footprinting.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index 9447cdd3..566c41bf 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -335,7 +335,8 @@ getFootprints <- function( #' @param smoothWindow The size in basepairs of the sliding window to be used for smoothing of the footprint signal. #' @param baseSize A numeric specifying the baseSize of font in the plots. #' @param plot A boolean value indicating whether or not the footprints should be plotted (`TRUE`) or returned as grob objects (`FALSE`). -#' @param ArchRProj An `ArchRProject` object to be used for plotting directory in `getOutputDirectory`. +#' @param ArchRProj An `ArchRProject` object to be used for plotting directory in `getOutputDirectory`. If no `ArchRProj` is supplied, +#' then plots will be stored in a directory called "Plots" in the current working directory. #' @param plotName A string indicating the name/prefix of the file to be used for output plots. #' @param height The height in inches to be used for the output PDF file. #' @param width The width in inches to be used for the output PDF file. From 8877bd12226b30ab71559a8299dbf0e18683e685 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 05:55:02 -0700 Subject: [PATCH 159/162] remove NULL as option for nPrint and nLabel --- R/MarkerFeatures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 69879538..5c346019 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -874,8 +874,8 @@ plotMarkerHeatmap <- function( .validInput(input = clusterCols, name = "clusterCols", valid = c("boolean")) .validInput(input = subsetMarkers, name = "subsetMarkers", valid = c("integer", "null")) .validInput(input = labelMarkers, name = "labelMarkers", valid = c("character", "null")) - .validInput(input = nLabel, name = "nLabel", valid = c("integer", "null")) - .validInput(input = nPrint, name = "nPrint", valid = c("integer", "null")) + .validInput(input = nLabel, name = "nLabel", valid = c("integer")) + .validInput(input = nPrint, name = "nPrint", valid = c("integer")) .validInput(input = labelRows, name = "labelRows", valid = c("boolean")) .validInput(input = returnMatrix, name = "returnMatrix", valid = c("boolean")) .validInput(input = transpose, name = "transpose", valid = c("boolean")) From 44cb0950261c10292e512f96fa62efa2da16d88c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 05:56:33 -0700 Subject: [PATCH 160/162] update param def for plotMarkerHeatmap --- R/MarkerFeatures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 5c346019..ed1f96a6 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -827,8 +827,8 @@ markerHeatmap <- function(...){ #' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. If this parameter is used for #' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. -#' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. -#' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. +#' @param nLabel An integer value that indicates how many of the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. +#' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top `n` genes for each group based on how uniquely up-regulated the gene is. #' @param labelRows A boolean value that indicates whether all rows should be labeled on the side of the heatmap. #' @param returnMatrix A boolean value that indicates whether the final heatmap matrix should be returned in lieu of plotting the actual heatmap. #' @param transpose A boolean value that indicates whether the heatmap should be transposed prior to plotting or returning. From 706b88a80570f814b01959a5d9d536adf7ac64b4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 08:11:03 -0700 Subject: [PATCH 161/162] update param def for nLabel --- R/MarkerFeatures.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index ed1f96a6..b6fdbab9 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -828,6 +828,7 @@ markerHeatmap <- function(...){ #' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates how many of the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. +#' To remove all feature labels, set `nLabel = 0`. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top `n` genes for each group based on how uniquely up-regulated the gene is. #' @param labelRows A boolean value that indicates whether all rows should be labeled on the side of the heatmap. #' @param returnMatrix A boolean value that indicates whether the final heatmap matrix should be returned in lieu of plotting the actual heatmap. From 6a0ec0cc4c5f8ad1a923fc3b2912cbfde6c0de23 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 17 Jun 2022 08:59:53 -0700 Subject: [PATCH 162/162] add md5sum check for tutorial data more robust checking of if files exist and have been properly downloaded https://github.com/GreenleafLab/ArchR/discussions/1478 --- R/InputData.R | 57 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index ba492bdf..ce42a3a2 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -25,10 +25,18 @@ getTutorialData <- function( pathDownload <- "HemeFragments" - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + filesUrl <- data.frame( + fileUrl = c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + ), + md5sum = c( + "77502e1f195e21d2f7a4e8ac9c96e65e", + "618613b486e4f8c0101f4c05c69723b0", + "a8d5ae747841055ef230ba496bcfe937" + ), + stringsAsFactors = FALSE ) dir.create(pathDownload, showWarnings = FALSE) @@ -41,11 +49,20 @@ getTutorialData <- function( }else if(tolower(tutorial) %in% c("multiome")){ - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + filesUrl <- data.frame( + fileUrl = c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + ), + md5sum = c( + "d49f4012ff65d9edfee86281d6afb286", + "e326066b51ec8975197c29a7f911a4fd", + "5737fbfcb85d5ebf4dab234a1592e740", + "bd4cc4ff040987e1438f1737be606a27" + ), + stringsAsFactors = FALSE ) pathDownload <- "Multiome" @@ -83,16 +100,26 @@ getTutorialData <- function( if(is.null(pathDownload)) { stop("No value supplied to pathDownload in .downloadFiles()!") } + if(length(which(c("fileUrl","md5sum") %ni% colnames(filesUrl))) != 0) { + cat(colnames(filesUrl)) + stop("File download dataframe does not include columns named 'fileUrl' and 'md5sum' which are required!") + } message(paste0("Downloading files to ",pathDownload,"...")) - downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ - if(!file.exists(file.path(pathDownload, basename(filesUrl[x])))){ - message(paste0("Downloading file ", basename(filesUrl[x]),"...")) + downloadFiles <- .safelapply(seq_along(filesUrl$fileUrl), function(x){ + if(file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ + if(tools::md5sum(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) != filesUrl$md5sum[x]) { + message(paste0("File ",basename(filesUrl$fileUrl[x])," exists but has an incorrect md5sum. Removing...")) + file.remove(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) + } + } + if(!file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ + message(paste0("Downloading file ", basename(filesUrl$fileUrl[x]),"...")) download.file( - url = filesUrl[x], - destfile = file.path(pathDownload, basename(filesUrl[x])) + url = filesUrl$fileUrl[x], + destfile = file.path(pathDownload, basename(filesUrl$fileUrl[x])) ) } else { - message(paste0("File exists! Skipping file ", basename(filesUrl[x]),"...")) + message(paste0("File exists! Skipping file ", basename(filesUrl$fileUrl[x]),"...")) } }, threads = min(threads, length(filesUrl)))