forked from viniciusvviterbo/Multilayer-Perceptron
-
Notifications
You must be signed in to change notification settings - Fork 0
/
normalizeDataset.cpp
142 lines (123 loc) · 4.08 KB
/
normalizeDataset.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
using namespace std;
/*
* Place the given x number between 0 and 1
* according to the distance it has from the minimum,
* in comparison to the distance between min and max
*/
double normalizedValue(double x, double minValue, double maxValue)
{
return (x - minValue) / (maxValue - minValue);
}
/*
* Find the minimum and maximum values of each column,
* updating the given arrays by reference
*/
void minMaxValues(double **dataset, double *minValues, double *maxValues, int nRow, int nCol)
{
//Initiating every position of the min and max arrays
//with a number from the dataset, guaranteeing the
//min and max values at the end of the algorithm
for(int j = 0; j < nCol; j++)
{
minValues[j] = dataset[0][j];
maxValues[j] = dataset[0][j];
}
for(int i = 0; i < nRow; i++)
{
for(int j = 0; j < nCol; j++)
{
//checks if the current number is less than
//the minValue of the column, updating it if true
if(dataset[i][j] < minValues[j])
{
minValues[j] = dataset[i][j];
}
else
{
//checks if the current number is greater than
//the maxValue of the column, updating it if true
if(dataset[i][j] > maxValues[j])
{
maxValues[j] = dataset[i][j];
}
}
}
}
}
/*
* Normalizes the dataset, placing every value between 0 and 1, accordig
* to the min and max values of each column
*/
void normalizeDataset(double **dataset, int nRow, int nCol)
{
double *minValues = (double*) calloc(nCol, sizeof(double));
double *maxValues = (double*) calloc(nCol, sizeof(double));
minMaxValues(dataset, minValues, maxValues, nRow, nCol);
for(int i = 0; i < nRow; i++)
{
for(int j = 0; j < nCol; j++)
{
//gets the number between 0 and 1
dataset[i][j] = normalizedValue(dataset[i][j], minValues[j], maxValues[j]);
}
}
}
//Generates a completele ramdomized and normalized dataset
double **randomDataset(int datasetLength, int inputLength, int outputLength)
{
double **dataset = (double**) calloc(datasetLength, sizeof(double));
srand(time(NULL));
for(int i = 0; i < datasetLength; i++)
{
for(int j = 0; j < inputLength; j++)
{
dataset[i][j] = ((double)rand() / (RAND_MAX));
}
for(int j = inputLength; j < outputLength; j++)
{
dataset[i][j] = round(((double)rand() / (RAND_MAX)));
}
}
}
int main()
{
// Declares and reads the main values of the dataset input
int datasetLength, inputLength, outputLength;
//cin >> datasetLength >> inputLength >> outputLength;
//int nCol = inputLength + outputLength;
// Alocates memory for the dataset reading
/*double **dataset = (double **)calloc(datasetLength, sizeof(double *));
// Reads the dataset data
for (int i = 0; i < datasetLength; i++)
{
dataset[i] = (double *)calloc(nCol, sizeof(double));
for (int j = 0; j < nCol; j++)
{
cin >> dataset[i][j];
}
}*/
datasetLength = 700;
inputLength = 128;
outputLength = 1;
double **dataset = randomDataset(datasetLength, inputLength, outputLength);
//Function that normalizes the dataset, placing every value between 0 and 1
//for a better comparison, making it better for the multilayer perceptron neural
//network converge the error to the given threshold
//normalizeDataset(dataset, datasetLength, inputLength);
cout << datasetLength << " " << inputLength << " " << outputLength << endl << endl;
int nCol = inputLength + outputLength;
//printing back all the normalized dataset (the idea is to redirect the standard output to a file)
for (int i = 0; i < datasetLength; i++)
{
for (int j = 0; j < nCol; j++)
{
printf("%g ", dataset[i][j]);
}
cout << endl;
}
return 0;
}