-
Notifications
You must be signed in to change notification settings - Fork 2
/
convert2arlequin.html
143 lines (129 loc) · 4.97 KB
/
convert2arlequin.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
<!DOCTYPE html>
<!-- Bu yazılım Dr. Zafer Akçalı tarafından oluşturulmuştur -->
<!-- Programmed by Zafer Akçalı, MD-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Convert Hapl-o-Mat to Arlequin</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.0.0/papaparse.min.js"></script>
<script src="https://cdn.jsdelivr.net/g/filesaver.js"></script>
</head>
<body>
<label for="myfile">Select a csv file:</label>
<input type="file" id="csvfile" name="Csv File">
<button onclick="readFunction()">Read</button> <br/> <br/>
Genotype prefix: <input type="text" id="gpx" value="G" size="2" maxlength="2">
(*) between locus name and locus value <input type="checkbox" id="asterixbox" onclick="addAsterix()"><br/> <br/>
CSV input / Arlequin Output <br />
<textarea rows = "30" cols = "120" name = "Arl Text" id="arltext"></textarea> <br />
<button onclick="convertFunction()">Convert</button> <button onclick="copyFunction()">Copy</button>
<button onclick="saveFunction()">Save .arp</button><br /><br />
<script>
// global scope
var csvtext="";
var asterix="";
var asterixspace="";
//prefix of arp format, multiline text
var prearp=`[Profile]
Title="A sample of human HLA data"
NbSamples=1
GenotypicData=1
DataType=STANDARD
RecessiveData=0
GameticPhase=0
RecessiveAllele="NULL"
LocusSeparator=WHITESPACE
MissingData="?"
[Data]
[[Samples]]
SampleName="A test HLA sample"
SampleSize=`;
//postfix of arp format
var postarp = "}"+"\n";
function addAsterix() {
var checkBox = document.getElementById("asterixbox");
if (checkBox.checked == true) {
asterix = "*";
asterixspace = " ";}
else {
asterix="";
asterixspace = ""; }
}
function readFunction() {
var file = document.getElementById('csvfile').files[0]; //get filename
var reader = new FileReader(); // html5
reader.onload = (function(file) {
return function(e) {
csvtext = this.result; //read from csv file
document.getElementById('arltext').value=csvtext; // show in textarea
}
})(file);
reader.readAsText(file);
}
function convertFunction() {
var size, nloci;
var locusArray1=[];
var locusArray2=[];
var hlaArray=[];
haploLine=prearp;
document.getElementById('arltext').value=""; // clear textarea
results = Papa.parse(csvtext, { //parse from csv text
header: false,
skipEmptyLines: true
});
size = results.data.length-1;
nloci = (results.data[0].length-1)/2; // number of loci, calculate from first line of csv file, skip first column (id)
for (var i=1; i<results.data.length; i++) { // i=1 to skip header row
for (var j=0; j<nloci; j++) {
if (results.data[i][2*j+1] =="") locusArray1[j] = "? "+ asterixspace; // if locus value is empty
// locus1 header +/-* + locus1 value
else locusArray1[j] = results.data[0][2*j+1] + asterix + results.data[i][2*j+1]+" ";
if (results.data[i][2*j+2] =="") locusArray2[j] = "? "+ asterixspace; // if locus value is empty
// locus2 header +/-* + locus2 value
else locusArray2[j] = results.data[0][2*j+2] + asterix + results.data[i][2*j+2]+" ";
}
let haplo1="";
let haplo2="";
for (var j=0; j<nloci; j++) {
haplo1= haplo1+locusArray1[j]; // create first line (first haplotype)
haplo2= haplo2+locusArray2[j]; // create second line (second haplotype)
}
// hlaArray contains every induvidual's genotype
hlaArray[i-1] = haplo1+"\n"+ " "+ haplo2; // create individual's genotypes, i=0 because there is not a header row
}
//map will contain frequencies of Genotypes (Haplo1 + Haplo2)
const map = hlaArray.reduce((acc, e) => acc.set(e, (acc.get(e) || 0) + 1), new Map());
// midde-fix of arp file, prints SampleSize=
haploLine= haploLine+size.toString()+"\n"+ " SampleData={" + "\n";
i=0;
indent = " "; // at least one spece between Genotype descriptor and frequency, for each line of SampleData
//iterate genotypes
// same amount of spece between Genotype descriptor and frequency, for each line of SampleData
for (const [key, value] of map.entries()) {
i++;
if (i<10) indent = " ";
else if (i<100) indent = " ";
else if (i<1000) indent = " ";
else if (i<10000) indent = " ";
else if (i<100000) indent = " ";
else if (i<1000000) indent = " ";
else if (i<10000000) indent = " "; // not necessary
//Genotype prefix + genotype number + enough space -> frequency
haploLine= haploLine + document.getElementById('gpx').value+i.toString()+indent+ value +" "+ key + "\n";
// console.log(key, value);
}
haploLine= haploLine + postarp; // postfix of arp file
document.getElementById('arltext').value = haploLine;
}
function copyFunction() {
document.getElementById('arltext').select();
document.execCommand("copy");
}
function saveFunction() {
var blob = new Blob([document.getElementById('arltext').value],
{ type: "text/plain;charset=utf-8" });
saveAs(blob, "output.arp");
}
</script>
</body>
</html>