Add files via upload

dDipankar · Jan 22, 2022 · f64cdd8 · f64cdd8
1 parent 8792ad0
commit f64cdd8
Show file tree

Hide file tree

Showing 30 changed files with 1,080,393 additions and 0 deletions.
diff --git a/...e-wide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/All_sgRNA_pos.mat b/...e-wide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/All_sgRNA_pos.mat
diff --git a/... CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/CLIB89+control(1).fasta b/... CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/CLIB89+control(1).fasta
diff --git a/...ide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/CLIB89+control.fasta b/...ide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/CLIB89+control.fasta
diff --git a/...ISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Cas12a sgRNA library.fasta b/...ISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Cas12a sgRNA library.fasta
diff --git a/...ISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Determine_sgRNA_Position.m b/...ISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Determine_sgRNA_Position.m
@@ -0,0 +1,151 @@
+% This script is used to determine the position of every sgRNA in the genome
+% along with identifiers such as the strand it targets and the chromosome it
+% aligns to. This information is also critical for generating counts using
+% inexact matching.
+
+% A BAM file is generated by mapping the list of all sgRNA in the library
+% (including nontargeting) to a genome fasta file (This should ideally have
+% an extra chromosome that contain the nontargeting sgRNA). This file is
+% read and sgRNA position, strand and chromosome information is recorded as
+% mentioned in the following sections.
+
+% Inputs: 'Library_sgRNA.bam': An example BAM file obtained by running
+% bowtie on the Cas12a library against the genome file
+% 'CLIB89+control.fasta'.
+
+% Outputs: 
+% All_sgRNA_pos.mat: variable file that contains all sgRNA names,
+% sequences and their start positions.
+
+
+%Author: Adithya Ramesh
+%PhD Candidate, Wheeldon Lab
+%UC Riverside, 900 University Ave
+%Riverside, CA-92507, USA
+%Email: arame003@ucr.edu
+%       wheeldon@ucr.edu
+%% Reading BAM file and storing alignments against all chromosomes
+%There will be an extra chromosome here as all the nontargeting sgRNA were
+%added as a seperate chromosome to allow for the aligment of the entire
+%library.
+tic
+bamFilename = 'Library_sgRNA.bam'; %This file will be the result of running bowtie on the Cas12a library against the genome file 'CLIB89+control.fasta' 
+info = baminfo(bamFilename,'ScanDictionary',true);
+bmA = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{1});
+bmB = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{2});
+bmC = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{3});
+bmD = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{4});
+bmE = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{5});
+bmF = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{6});
+if length(info.ScannedDictionary)==9
+    bmH = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{8});
+else
+    bmH = BioMap(bamFilename, 'SelectReference', info.ScannedDictionary{7});
+end
+%% Concatenate Start Positions, Strand information and Chromosome information
+
+% Start positions of every sgRNA in the genome is recorded. Other
+% information that is appended to the start position of an sgRNA is the
+% chromosome it aligned to, and the strand that it was found on.
+% For example: a start position of 2500018 is read this way: 25000|1|8.
+% 8 refers to the chromosome, a flag of 0 or 1 mean Top and Bottom strands
+% respectively, and the remaining is the actual start position of the
+% sgRNA. So this sgRNA is found at the 25000 position on the bottom strand
+% in the 8th chromosome.
+
+Astart=bmA.Start;
+Aflag=bmA.Flag;
+
+Bstart=bmB.Start;
+Bflag=bmB.Flag;
+
+Cstart=bmC.Start;
+Cflag=bmC.Flag;
+
+Dstart=bmD.Start;
+Dflag=bmD.Flag;
+
+Estart=bmE.Start;
+Eflag=bmE.Flag;
+
+Fstart=bmF.Start;
+Fflag=bmF.Flag;
+
+Hstart=bmH.Start;
+Hflag=bmH.Flag;
+
+Aflag(Aflag==16)=1;
+Bflag(Bflag==16)=1;
+Cflag(Cflag==16)=1;
+Dflag(Dflag==16)=1;
+Eflag(Eflag==16)=1;
+Fflag(Fflag==16)=1;
+Hflag(Hflag==16)=1;
+
+for i=1:length(Astart)
+as=sprintf('%1d%1d%1d ',Astart(i),Aflag(i),1);
+a1=str2num(as);
+Afn(i,1)=a1;
+end
+for i=1:length(Bstart)
+as=sprintf('%1d%1d%1d ',Bstart(i),Bflag(i),2); 
+a1=str2num(as); 
+Bfn(i,1)=a1;
+end
+for i=1:length(Cstart)
+as=sprintf('%1d%1d%1d ',Cstart(i),Cflag(i),3); 
+a1=str2num(as); 
+Cfn(i,1)=a1;
+end
+for i=1:length(Dstart)
+as=sprintf('%1d%1d%1d ',Dstart(i),Dflag(i),4); 
+a1=str2num(as); 
+Dfn(i,1)=a1;
+end
+for i=1:length(Estart)
+as=sprintf('%1d%1d%1d ',Estart(i),Eflag(i),5); 
+a1=str2num(as); 
+Efn(i,1)=a1;
+end
+for i=1:length(Fstart)
+as=sprintf('%1d%1d%1d ',Fstart(i),Fflag(i),6); 
+a1=str2num(as); 
+Ffn(i,1)=a1;
+end
+for i=1:length(Hstart)
+as=sprintf('%1d%1d%1d ',Hstart(i),Hflag(i),8); 
+a1=str2num(as); 
+Hfn(i,1)=a1;
+end
+
+A(:,1)=bmA.Header;
+A(:,2)=num2cell(Afn);
+A(:,3)=bmA.Sequence;
+
+B(:,1)=bmB.Header;
+B(:,2)=num2cell(Bfn);
+B(:,3)=bmB.Sequence;
+
+C(:,1)=bmC.Header;
+C(:,2)=num2cell(Cfn);
+C(:,3)=bmC.Sequence;
+
+D(:,1)=bmD.Header;
+D(:,2)=num2cell(Dfn);
+D(:,3)=bmD.Sequence;
+
+E(:,1)=bmE.Header;
+E(:,2)=num2cell(Efn);
+E(:,3)=bmE.Sequence;
+
+F(:,1)=bmF.Header;
+F(:,2)=num2cell(Ffn);
+F(:,3)=bmF.Sequence;
+
+H(:,1)=bmH.Header;
+H(:,2)=num2cell(Hfn);
+H(:,3)=bmH.Sequence;
+
+All_sgRNA_pos=vertcat(A,B,C,D,E,F,H);
+save All_sgRNA_pos.mat All_sgRNA_pos
+toc
diff --git a/...nome-wide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Import_fastq.m b/...nome-wide CRISPR-Cas12a Screens in Y lipolytica/Generating sgRNA abundance/Import_fastq.m
@@ -0,0 +1,45 @@
+% This script imports a fastq file and stores it into the variables 'header'
+% and 'sequence'. The DNA sequences/reads stored in the fastq file are then
+% tabulated to provide every unique read and the associated count or the
+% number of times the read appears in the fastq file.
+
+% Inputs: fastq file to import (here portrayed by
+% 'Examplefile1.fastqsanger') *Note*: This source of this fastq file is
+% likley the output of using Trimmomatic on the Illumina raw reads of a
+% given sample to keep only the 25 nt sgRNA sequence. For more information
+% regarding processing Illumina reads on Galaxy refer " ", and specifically
+% Supplementary table " ".
+
+% Outputs Variables:s
+% UniqueSeqs: Lists every unique sequence/read in the fastq file.
+% Counts: Provides number of times each of those unique read appeared in fastq file.
+% Counts_of_all_unique_reads1.mat: saves the above two variables locally as
+% a MATLAB variable file.
+
+% *Note*: This script requires the use the fast and efficient custom
+% 'count_unique' function written by the user Anthony Kendall. This
+% function may be downlaoded at:
+% https://www.mathworks.com/matlabcentral/fileexchange/23333-determine-and-count-unique-values-of-an-array
+
+
+%Author: Adithya Ramesh
+%PhD Candidate, Wheeldon Lab
+%UC Riverside, 900 University Ave
+%Riverside, CA-92507, USA
+%Email: arame003@ucr.edu
+%       wheeldon@ucr.edu
+%% Import fastq files, and generate the counts of every unique sequence
+tic
+clear
+for a=1:1
+    [header,sequence] =  fastqread('Examplefile1.fastqsanger'); %imports header and sequence of a fastq reads file of a given sample
+    if ischar(sequence)==1
+        seq=sequence;
+        sequence=cell(1);
+        sequence{1}=seq;
+    end
+end
+Seq=sequence';
+[UniqueSeqs1,Counts1]=count_unique(Seq); %Custom function that goes through the list of all sequences and provides the counts of every unique sequence in the list
+save Counts_of_all_unique_reads1.mat UniqueSeqs1 Counts1
+toc