-
Notifications
You must be signed in to change notification settings - Fork 0
/
GAN_report.py
1133 lines (851 loc) · 43.2 KB
/
GAN_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#GAN training script.
#GAN generates DLLs with input physics taken from same datafiles as target DLLs
#Saves generator network periodically for further testing among other outputs
#Builds on GAN_corr_RNN.py. Rewritten to allow implementation of WGAN and
#gradient penalty again
#Note: cannot calculate gradient of recurrent layers currently so cannot
#implement both simultaneously
#Also note: WGAN/GP/RNN currently show no improvement over default params
# =============================================================================
# Useful code
# =============================================================================
#Basic GAN:
#github.com/eriklindernoren/Keras-GAN/blob/master/gan/gan.py
#Physics conditioning:
#github.com/eriklindernoren/Keras-GAN/blob/master/cgan/cgan.py
#WGAN-GP code:
#github.com/keras-team/keras-contrib/blob/master/examples/improved_wgan.py
#Elements of GAN-RNN implementation taken from:
#github.com/corynguyen19/midi-lstm-gan/blob/master/mlp_gan.py
#github.com/sachinruk/PyData_Keras_Talk/blob/master/cosine_LSTM.ipynb
# =============================================================================
# Expected file structure to run this file:
# =============================================================================
#Data files for kaon and pion tracks (mod refers to additonal variables added):
# '../../data/mod-PID-train-data-KAONS.hdf'
# '../../data/mod-PID-train-data-PIONS.hdf'
#csv files for normalisation of data (shifts and divisors) calc from datafiles
# '../../data/KAON_norm.csv'
# '../../data/PION_norm.csv'
# =============================================================================
# Outputs
# =============================================================================
# csv file containing defining tracks that have not been used in training:
# 'unused_data_mask.csv'
#Plots of DLL distributions during training:
# GAN_generated_DLLx_epoch_xxx.eps'
#Trained (and half trained, penultimate trained etc.) GAN:
# 'trained_gan.h5'
#Plots of loss functions:
# 'GAN_loss.eps'
#Runtime:
# GAN_runtime.txt'
# =============================================================================
# Import libraries
# =============================================================================
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from keras.layers import Input, BatchNormalization, concatenate
from keras.models import Model
from keras.layers.core import Dense, Dropout, Reshape
from keras.layers import CuDNNLSTM, Bidirectional
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras import initializers
import keras.backend as K
import tensorflow as tf
from keras.layers.merge import _Merge
from functools import partial
#Time total run from here to end
t_init = time.time()
# =============================================================================
# Input parameters
# =============================================================================
os.environ["CUDA_VISIBLE_DEVICES"]="1" #Choose GPU to use e.g. "0", "1" or "2"
os.environ["KERAS_BACKEND"] = "tensorflow" #Using tensorflow backend
plt.rcParams['agg.path.chunksize'] = 10000 #Needed for plotting lots of data
#Training variables
batch_size = 128 #Default = 128
epochs = 200 #Default = 100
#Parameters for Adam optimiser
learning_rate = 0.0001 #Default = 0.0001 (Adam default: learning_rate = 0.001)
beta_1=0.5 #Default = 0.5 (Adam default: beta_1 = 0.9)
beta_2=0.9 #Default = 0.9 (Adam default: beta_2 = 0.999)
#Define how much of the total data to use, and how much of that to train with
frac = 0.1 #Default = 0.1
train_frac = 0.7 #Default = 0.7
#The training ratio is the number of discriminator updates per generator update
training_ratio = 1 #Default 5 as per the paper
grad_penalty_weight = 10 #Default 10 as per the paper
grad_loss = False #Use gradient penalty loss
WGAN = False #Use Wasserstein loss function
RNN = False #Use recurrent layers
#DLL(DLL[i] - ref_particle) from particle_source data
#e.g. DLL(K-pi) from kaon data file
DLLs = ['e', 'mu', 'k', 'p', 'd', 'bt']
ref_particle = 'pi' #Usually pi(on)
particle_source = 'KAON' #'KAON' or 'PION'
#Physics network will be conditioned on:
physical_vars = ['TrackP', 'TrackPt', 'NumLongTracks', 'NumPVs',
'TrackVertexX', 'TrackVertexY', 'TrackVertexZ',
'TrackRich1EntryX', 'TrackRich1EntryY', 'TrackRich1EntryZ',
'TrackRich1ExitX', 'TrackRich1ExitY', 'TrackRich1ExitZ',
'TrackRich2EntryX', 'TrackRich2EntryY', 'TrackRich2EntryZ',
'TrackRich2ExitX', 'TrackRich2ExitY', 'TrackRich2ExitZ']
noise_dim = 100 # Dimension of random noise vector. Default = 100
phys_dim = len(physical_vars) #Size of phyiscs input
DLLs_dim = len(DLLs) #Number of DLLs
#Size of DLLs and physics combined (input to discim)
data_dim = DLLs_dim + phys_dim
#Size of noise and physics combined (input to gen)
gen_input_row_dim = noise_dim + phys_dim
#Internal layers of generator and discriminator
gen_layers = 9 #Default = 9
discrim_layers = 9 #Default = 9
#Number of nodes in input layers and hidden layers ('x_nodes')
gen_input_nodes = 256 #Default = 256
gen_nodes = 256 #Default = 256
discrim_input_nodes = 256 #Default = 256
discrim_nodes = 256 #Default = 256
#Discriminator output dimensions, usually single scalar corresponding to prob
#the input was real or generated
discrim_output_dim = 1 #Default = 1
#If including recurrent layers can choose how to sort data. If sort by 'None'
#will still be sorted by Event/Run number
#Still group by events, but also sort by busy-ness (RNN only)
sort_var = ['RunNumber', 'EventNumber', 'RICH1EntryDist0']
#If running recurrent layers, must change input dimensions accordingly,
#define sequence lengths etc.
if RNN:
gen_layers -= 1 #RNN increases layers before loop by one so subtract this
discrim_layers -= 1 #Same for discriminator
seq_length = batch_size // 32 #Rows, default 32 (//4)
#Making sequences out of batch_size results in [apparent_batch_size] rows
#i.e. treat as new batch size:
apparent_batch_size = batch_size - seq_length + 1
gen_output_dim = (seq_length, DLLs_dim) #Output N rows of DLL values
#Input DLLs and physics as sequences:
discrim_input_dim = (seq_length, data_dim)
discrim_noise_input_dim = (seq_length, noise_dim)
discrim_phys_input_dim = (seq_length, phys_dim)
discrim_DLL_input_dim = gen_output_dim
gen_noise_input_dim = (seq_length, noise_dim) #Noise input to combined GAN
gen_phys_input_dim = (seq_length, phys_dim) #Physics input to combined GAN
else:
apparent_batch_size = batch_size #No change to batch size
gen_output_dim = DLLs_dim #Output single row of DLLs from gen
discrim_input_dim = (data_dim,) #Input DLLs and physics for discrim
discrim_noise_input_dim = (noise_dim,) #Input noise for discrim
discrim_phys_input_dim = (phys_dim,) #Physics input for discrim
discrim_DLL_input_dim = (gen_output_dim,) #Input DLLs for discrim
gen_noise_input_dim = (noise_dim,) #Noise input to generator
gen_phys_input_dim = (phys_dim,) #Physics input to generator
#WGAN needs linear discriminator activation rather than sigmoid
if WGAN:
discrim_activation = 'linear'
else:
discrim_activation = 'sigmoid'
plot_freq = 20 #epochs//10 #Plot data for after this number of epochs
if RNN and grad_loss:
print("Error: Cannot use RNN and gradient penalty together as gradients \
cannot be calculated")
###############################################################################
#Import data of a single variable (e.g. momentum) via pandas from data files
#Inputs: variable type and particle source, corresponding to the single
# variable to be imported from the source datafile
#Returns: pandas column containing the single variable data only
def import_single_var(var_type, particle_source):
#Define data file as KAONs/PIONs. 'mod' datafile has newly calc. vars
if(particle_source == 'KAON'):
#Define datafile location, read file and select variable of interest
datafile_kaon = '../../data/mod-PID-train-data-KAONS.hdf'
data_kaon = pd.read_hdf(datafile_kaon, 'KAONS')
data = data_kaon.loc[:, var_type]
elif(particle_source == 'PION'):
#Define datafile location, read file and select variable of interest
datafile_pion = '../../data/mod-PID-train-data-PIONS.hdf'
data_pion = pd.read_hdf(datafile_pion, 'PIONS')
data = data_pion.loc[:, var_type]
else:
print("Please select either kaon or pion as particle source")
return data
#Import all data via pandas from data files
#Inputs: Particle source e.g. KAONS corresponding to the datafile from which
# data will be imported
#Returns: pandas structure containing all variables from the source
def import_all_var(particle_source):
#Import data from kaons or pions
if(particle_source == 'KAON'):
datafile = '../../data/mod-PID-train-data-KAONS.hdf'
elif(particle_source == 'PION'):
datafile = '../../data/mod-PID-train-data-PIONS.hdf'
else:
print("Please select either kaon or pion as particle source")
data = pd.read_hdf(datafile, particle_source + 'S') #Read in all data
return data
#Change DLLs e.g. from (K-pi) and (p-pi) to p-K
#Input: Two DLL arrays w.r.t. pi, to be changed s.t. the new DLL is w.r.t.
# the first particle in each DLL
#Returns: New DLL array e.g. DLL(p-K)
def change_DLL(DLL1, DLL2):
if(not np.array_equal(DLL1, DLL2)):
DLL3 = np.subtract(DLL1, DLL2)
else:
print("DLLs are the same!")
DLL3 = DLL1
return DLL3
#Import information needed to normalise all data to between -1 and 1
#Input: particle source i.e. either KAON or PION so read in respective csv
# file with vales
#Returns: div_num and shift needed to normalise all relevant data (DLLs and
# input physics)
def import_norm_info(particle_source):
#Read in csv datafile
data_norm = np.array(pd.read_csv('../../data/' + particle_source + \
'_norm.csv'))
#shift = [0,x], div_num = [1,x], where x starts at 1 for meaningful data
#Order of variables:
columns = ['RunNumber', 'EventNumber', 'MCPDGCode', 'NumPVs',
'NumLongTracks', 'NumRich1Hits', 'NumRich2Hits', 'TrackP',
'TrackPt', 'TrackChi2PerDof', 'TrackNumDof', 'TrackVertexX',
'TrackVertexY', 'TrackVertexZ', 'TrackRich1EntryX',
'TrackRich1EntryY', 'TrackRich1EntryZ', 'TrackRich1ExitX',
'TrackRich1ExitY', 'TrackRich1ExitZ', 'TrackRich2EntryX',
'TrackRich2EntryY', 'TrackRich2EntryZ','TrackRich2ExitX',
'TrackRich2ExitY', 'TrackRich2ExitZ', 'RichDLLe', 'RichDLLmu',
'RichDLLk', 'RichDLLp', 'RichDLLd', 'RichDLLbt',
'RICH1EntryDist0', 'RICH1ExitDist0', 'RICH2EntryDist0',
'RICH2ExitDist0', 'RICH1EntryDist1', 'RICH1ExitDist1',
'RICH2EntryDist1', 'RICH2ExitDist1', 'RICH1EntryDist2',
'RICH1ExitDist2', 'RICH2EntryDist2', 'RICH2ExitDist2',
'RICH1ConeNum', 'RICH2ConeNum']
#Create arrays for shift and div_num to be stored in. Only need to save
#DLLs and physics input values (data_dim)
shift = np.zeros(data_dim)
div_num = np.zeros(data_dim)
#Loop over all DLLs and input physics
for i in range(data_dim):
#First values correspond to DLLs
if i < DLLs_dim:
for j in range(len(columns)):
if columns[j] == 'RichDLL' + DLLs[i]:
shift[i] = data_norm[0,j+1]
div_num[i] = data_norm[1,j+1]
break
#Next set correspond to physics inputs
else:
for k in range(len(columns)):
if columns[k] == physical_vars[i-DLLs_dim]:
shift[i] = data_norm[0,k+1]
div_num[i] = data_norm[1,k+1]
break
return shift, div_num
#Normalise data via dividing centre on zero and divide by max s.t. range=[-1,1]
#Input: Data array to be normalised (x) and particle source, so know which
# set of normalisation values to use
#Returns: Normalised data array (x) and shift/div_num used to do so (so can
# unnormalise later)
def norm(x, particle_source):
#Import normalistion arrays (shift and div_number) from csv file
shift, div_num, = import_norm_info(particle_source)
#For each column in input data array, normalise by shifting and dividing
for i in range(x.shape[1]):
x[:,i] = np.subtract(x[:,i], shift[i])
x[:,i] = np.divide(x[:,i], div_num[i])
return x, shift, div_num
#Create sequences of data needed for RNN
#Inputs: data array to be made into sequences, length of sequences (=look_back)
#Returns: arrays of sequenced data, array containing the final row of each seq
#Note: Number of sequences = original number of rows - look_back + 1
# e.g. input 10 rows, lookback = 4 -> 7 output
def create_dataset(data, look_back=1):
dataX, dataY = [], []
#Loop from 0 to len(data)-look_back+1
for i in range(len(data)-look_back+1):
#Extract [look_back] data rows starting from the ith row and final rows
a = data[i:(i+look_back), :]
dataX.append(a)
dataY.append(data[i + look_back - 1, :])
return np.array(dataX), np.array(dataY)
#Get all relevant training (and test) data
#Inputs: List of DLLs of interest, list of physical vars of interest, particle
# source for data e.g. KAONS
#Returns: training and test data, as well as values used to normalise the data
def get_x_data(DLLs, physical_vars, particle_source):
#Inport all data from particle source datafile
all_data = import_all_var(particle_source)
#If recurrent layers, can sort by a variable e.g. TrackP.
#If 'None' will still be grouped by event/run number
if RNN and sort_var is not None:
all_data = all_data.sort_values(by=sort_var,ascending=True)
#Number of data rows (10,000,000 usually)
data_length = all_data.shape[0]
#Create array for all data to be sorted
#(size: number of rows x number of DLLs and physics inputs)
x_data_dim = (data_length, data_dim)
x_data = np.zeros((x_data_dim))
#Get DLL data
for i in range(0, DLLs_dim):
x_data[:,i] = np.array(all_data.loc[:, 'RichDLL' + DLLs[i]])
#Get physics data
for i in range(DLLs_dim, DLLs_dim + phys_dim):
phys_vars_index = i - DLLs_dim
x_data[:,i] = np.array(all_data.loc[:, physical_vars[phys_vars_index]])
#Use subset of data for training/testing
tot_split = int(frac * data_length)
#Create and array with fraction of 1s and 0s randomly mixed and apply as
#boolean mask to use fraction of data only
zero_arr = np.zeros(data_length - tot_split, dtype=bool)
ones_arr = np.ones(tot_split, dtype=bool)
frac_mask = np.concatenate((zero_arr,ones_arr))
np.random.shuffle(frac_mask)
#Apply mask to get fraction of data.
#Could also shuffle here should not be necessary. Data selected randomly
#(but in order here), then batches take a random ordering of this later
x_data = x_data[frac_mask]
#Normalise data by shifting and dividing s.t. lies between -1 and 1
x_data, shift, div_num = norm(x_data, particle_source)
#Split into training/test data e.g. 70/30
split = int(train_frac * tot_split)
#Create and array with fraction of 1s and 0s randomly mixed and apply as
#boolean mask for training/test split
zero_arr_2 =np.zeros(tot_split - split, dtype=bool)
ones_arr_2 = np.ones(split, dtype=bool)
train_mask = np.concatenate((zero_arr_2,ones_arr_2))
np.random.shuffle(train_mask)
test_mask = np.logical_not(train_mask)
#Apply masks to get training and test sets of data
x_train = x_data[train_mask]
x_test = x_data[test_mask]
#Apply the train mask to the fraction mask, to give a new mask where 0
#represents either not in the fraction, or if it was in the frac it was
#assigned as test data
frac_mask[frac_mask==1] = train_mask
#Take inverse of frac so can extract these 0s
not_frac_mask = np.logical_not(frac_mask)
#Save mask info so when testing, can use different data to training data
pd.DataFrame(not_frac_mask).to_csv('unused_data_mask.csv')
return x_train, x_test, shift, div_num
#Takes randomly-weighted average of two tensors - outputs a random point on
#the line between each pair of points
class RandomWeightedAverage(_Merge):
def _merge_function(self, inputs):
# Note: would be (batch_size, 1, 1, 1) for images
if RNN:
weights = K.random_uniform((batch_size, 1, 1))
else:
weights = K.random_uniform((batch_size, 1))
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
#Calculate the gradient penalty loss for a batch of "averaged" samples
#Penalise the network if the grad norm moves away from 1
#Random points on the lines between real and generated samples are chosen,
#and grad checked at these points
#To eval grads, must run samples through the generator and evaluate loss,
#then get grad of the discrim w.r.t. the input averaged samples
#Loss requires the original averaged samples as input, but Keras only supports
# passing y_true and y_pred to
#loss functions, so make a partial() of the function with averaged_samlpes
#argument, and use for training
def gradient_penalty_loss(y_true, y_pred, averaged_samples,
gradient_penalty_weight):
# First get the gradients:
# assuming: - that y_pred has dimensions (batch_size, 1)
# - averaged_samples has dimensions (batch_size, nbr_features)
# Gradients afterwards has dimension (batch_size, nbr_features), basically
# a list of nbr_features-dimensional gradient vectors
gradients = K.gradients(y_pred, averaged_samples)[0]
#Compute the euclidean norm by squaring
gradients_sqr = K.square(gradients)
#Sum over the rows
gradients_sqr_sum = K.sum(gradients_sqr,
axis=np.arange(1, len(gradients_sqr.shape)))
#Sqrt
gradient_l2_norm = K.sqrt(gradients_sqr_sum)
#Compute lambda * (1 - ||grad||)^2 still for each single sample
gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm)
#Return the mean as loss over all the batch samples
return K.mean(gradient_penalty)
#Get (Adam) optimiser. Can replace Adam with others if required
#Input: None
#Returns: Adam optimiser with learning rate and beta set at start of code.
def get_optimizer():
return Adam(lr=learning_rate, beta_1=beta_1, beta_2=beta_2)
#Calculate Wasserstein loss functiont for a sample batch.
#Note: if used discriminator output must be linear, and samples labelled -1 if
# gen and 1 if real (instead of 0/1)
#Inputs: True and predicted y values
#Returns: Wasserstein loss
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
#Calculate cramer loss function
#Inputs: Generated and real inputs for discriminator, as well as the
# discriminator network itself
#Returns: Cramer loss function
def cramer_critic(x, y, discriminator):
discriminated_x = discriminator(x)
return tf.norm(discriminated_x - discriminator(y),
axis=1) - tf.norm(discriminated_x, axis=1)
#Get name loss function to be used. If WGAN/Cramer etc must use full functions
#rather than calling this function (see above)
#Inputs: None
#Returns: 'binary_crossentropy' loss function currently,
#Note: can replace with other built in Keras loss functions
def get_loss_function():
return 'binary_crossentropy'
#Build generator network, including RNN layers is required.
#Input/output dimensions, number of layers etc defined at start of code
#Inputs: Optimizer and loss function used to complile network
#Returns: compiled generator network
def build_generator(optimizer, loss_func):
#Input layer
gen_input_noise = Input(shape=(gen_noise_input_dim), name='Input_noise')
gen_input_phys = Input(shape=(gen_phys_input_dim), name='Input_physics')
gen_input = concatenate([gen_input_noise, gen_input_phys], axis=-1)
#First layer connected to input layer
if RNN:
#return_sequences = True to connect two RNN layers.
#Would need to flatten after CuDNNLSTM if no Bidirectional layer
layer = CuDNNLSTM(gen_input_nodes, kernel_initializer = \
initializers.RandomNormal(stddev=0.02),
return_sequences=True)(gen_input)
layer = Bidirectional(CuDNNLSTM(gen_input_nodes))(layer)
else:
layer = Dense(gen_input_nodes, kernel_initializer = \
initializers.RandomNormal(stddev=0.02))(gen_input)
layer = LeakyReLU(0.2)(layer)
layer = BatchNormalization(momentum=0.8)(layer)
#Internal layers
for i in range(gen_layers):
layer = Dense(256)(layer)
layer = LeakyReLU(0.2)(layer)
layer = BatchNormalization(momentum=0.8)(layer)
#Output layer. Tanh activation so lies between -1 and 1
if RNN:
layer = Dense(np.prod(gen_output_dim), activation='tanh')(layer)
gen_outputs = Reshape(gen_output_dim)(layer)
else:
gen_outputs = Dense(gen_output_dim, activation='tanh')(layer)
generator = Model(inputs=[gen_input_noise, gen_input_phys],
outputs=gen_outputs)
# generator = multi_gpu_model(generator, gpus=3)
if WGAN:
generator.compile(loss=wasserstein_loss, optimizer=optimizer)
else:
generator.compile(loss=loss_func, optimizer=optimizer)
return generator
#Build discriminator network, including RNN layers is required.
#Input/output dimensions, number of layers etc defined at start of code
#Inputs: Optimizer and loss function used to complile network
#Returns: compiled discriminator network
def build_discriminator(optimizer, loss_func):
#Input layer
discrim_input_DLLs = Input(shape=(discrim_DLL_input_dim),
name='Input_DLLs')
discrim_input_phys = Input(shape=(discrim_phys_input_dim),
name='Input_physics')
discrim_input = concatenate([discrim_input_DLLs, discrim_input_phys],
axis=-1)
#Input layer
if RNN:
#return_sequences to connext two RNN layers. Would need to flatten
#after CuDNNLSTM if no Bidirectional layer
layer = CuDNNLSTM(discrim_input_nodes, kernel_initializer = \
initializers.RandomNormal(stddev=0.02),
return_sequences=True)(discrim_input)
layer = Bidirectional(CuDNNLSTM(discrim_input_nodes))(layer)
else:
layer = Dense(discrim_input_nodes, kernel_initializer = \
initializers.RandomNormal(stddev=0.02))(discrim_input)
layer = LeakyReLU(0.2)(layer)
layer = Dropout(0.3)(layer)
#Internal layers
for i in range(discrim_layers):
layer = Dense(256)(layer)
layer = LeakyReLU(0.2)(layer)
if not RNN:
layer = Dropout(0.3)(layer)
#Output layer. Usually sigmoid activation so outputs 0-1 (false-true)
#(if WGAN: linear activation)
discrim_outputs = Dense(discrim_output_dim,
activation=discrim_activation)(layer)
discriminator = Model(inputs=[discrim_input_DLLs, discrim_input_phys],
outputs=discrim_outputs)
if WGAN:
discriminator.compile(loss=wasserstein_loss, optimizer=optimizer)
else:
discriminator.compile(loss=loss_func, optimizer=optimizer)
return discriminator
#Build/compile overall GAN network to train generator - output of which is
#tested via the discriminator
#Input/output dimensions, number of layers etc defined at start of code
#Inputs: Discriminator and generator networks, as well as optimizer and
# loss function used to complile network
#Returns: compiled GAN network
def build_gan_network(discriminator, generator, optimizer, loss_func):
# =============================================================================
# Generator model
# =============================================================================
#Initially discriminator false as only want to train one at a time
for layer in discriminator.layers:
layer.trainable = False
for layer in generator.layers:
layer.trainable = True
discriminator.trainable = False
generator.trainable = True
#GAN input will be n-dimensional vectors (n = noise_dim + phys_dim)
gen_noise_input = Input(shape=(gen_noise_input_dim))
gen_phys_input = Input(shape=(gen_phys_input_dim))
#Output of the generator i.e. DLLs
gen_output = generator([gen_noise_input, gen_phys_input])
#Get output of discriminator
#(Usually probability if the image is real or generated for normal GAN)
gan_output = discriminator([gen_output, gen_phys_input])
#Combined model inputs/outputs
gen_model = Model(inputs=[gen_noise_input, gen_phys_input],
outputs=gan_output)
if WGAN:
gen_model.compile(loss=wasserstein_loss, optimizer=optimizer)
else:
gen_model.compile(loss=loss_func, optimizer=optimizer)
# =============================================================================
# Discriminator model
# =============================================================================
#Now allow discriminator to be trained but not generator
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
real_DLLs = Input(shape=(discrim_DLL_input_dim))
discrim_noise_input = Input(shape=(discrim_noise_input_dim))
discrim_phys_input = Input(shape=(discrim_phys_input_dim))
generated_DLLs_for_discrim = generator([discrim_noise_input,
discrim_phys_input])
discrim_output_from_gen = discriminator([generated_DLLs_for_discrim,
discrim_phys_input])
discrim_output_from_real_DLLs = discriminator([real_DLLs,
discrim_phys_input])
if grad_loss:
#Need to generate weighted averages of real and generated samples,
#to use for the gradient norm penalty
averaged_samples = RandomWeightedAverage()([real_DLLs,
generated_DLLs_for_discrim])
#Run these samples through the discriminator as well. Note, never
#really use the discriminator output. For these samples, only running
#them to get the gradient norm for the gradient penalty loss.
averaged_samples_out = discriminator([averaged_samples,
discrim_phys_input])
#Gradient penalty loss function requires the input averaged samples to
#get gradients. However, Keras loss functions can only have two
#arguments, y_true and y_pred. Get around this by making a partial()
#of the function with the averaged samples here.
partial_gp_loss = partial(gradient_penalty_loss,
averaged_samples=averaged_samples,
gradient_penalty_weight=grad_penalty_weight)
# Functions need names or Keras will throw an error
partial_gp_loss.__name__ = 'gradient_penalty'
discrim_model = Model(inputs=[real_DLLs, discrim_noise_input,
discrim_phys_input],
outputs=[discrim_output_from_real_DLLs,
discrim_output_from_gen,
averaged_samples_out])
if WGAN:
discrim_model.compile(optimizer=optimizer,
loss=[wasserstein_loss, wasserstein_loss,
partial_gp_loss])
else:
discrim_model.compile(optimizer=optimizer,
loss=[loss_func, loss_func, partial_gp_loss])
else:
discrim_model = Model(inputs=[real_DLLs, discrim_noise_input,
discrim_phys_input],
outputs=[discrim_output_from_real_DLLs,
discrim_output_from_gen])
if WGAN:
discrim_model.compile(optimizer=optimizer,
loss=[wasserstein_loss, wasserstein_loss])
else:
discrim_model.compile(optimizer=optimizer,
loss=[loss_func, loss_func])
return gen_model, discrim_model
#Plot examples of generated DLL vales
#Inputs: column of one of the generated DLLs, (DLL) name, epoch of the
# training, number of bins, x and y range for histogram
#Returns: 0. DLL distributions plotted and saved.
def plot_examples(generated_vars, var_name, epoch, bin_no=400, x_range = None,
y_range = None):
fig1, ax1 = plt.subplots()
ax1.cla()
title = 'GAN_generated_' + var_name + '_epoch_%d.eps'
if y_range is not None:
ax1.set_ylim(bottom = 0, top = y_range)
if x_range is not None:
ax1.set_xlim(x_range)
ax1.set_xlabel(var_name)
ax1.set_ylabel("Number of events")
ax1.hist(generated_vars, bins=bin_no, range=x_range)
fig1.savefig(title % epoch, format='eps', dpi=2500)
return 0
#Use (trained) generator network to predict DLL values and call plotting
#function to plot histograms
#Inputs: test data to generate predictions; epoch to save graphs periodically;
# generator to make predictions; shift/div_num to unnormalise generated
# data; examples to define number of predictions to be made
#Returns: 0. Graphs plotted and saved via plot_examples function
def gen_examples(x_test, epoch, generator, shift, div_num, examples=250000):
#Define random set integers, to be used to extract random rows from x_test
batch_ints = np.random.randint(0, x_test.shape[0], size=examples)
#Can use random indicies, but must still be sorted for RNN.
#Otherwise use indicies from a specified starting point
if RNN:
batch_ints = np.sort(batch_ints)
#Or have if want all in order:
# start_index = np.random.randint(0, x_test.shape[0] - examples)
# end_index = start_index + examples
# batch_ints = np.arange(start_index, end_index)
#Use batch_ints to form generator input
data_batch = x_test[batch_ints]
phys_data = data_batch[:, DLLs_dim:]
noise = np.random.normal(0, 1, size=[examples, noise_dim])
gen_input = [noise, phys_data]
#If RNN need to reshape generator inputs into sequences first
if RNN:
#Create sequences to input to generator
noise_RNN, _ = create_dataset(noise, seq_length)
phys_data_RNN, _ = create_dataset(phys_data, seq_length)
gen_input_RNN = [noise_RNN, phys_data_RNN]
#Generate fake DLLs and reshape to extract just DLLs from seq form
generated_data = generator.predict(gen_input_RNN,
batch_size=apparent_batch_size)
generated_data = np.concatenate((generated_data[0,:-1,:],
generated_data[:,-1,:]))
else:
#Generate fake data DLLs
generated_data = generator.predict(gen_input)
#Shift generated data back to proper distribution and plot data
for i in range(generated_data.shape[1]):
generated_data[:,i] = np.multiply(generated_data[:,i], div_num[i])
generated_data[:,i] = np.add(generated_data[:,i], shift[i])
plot_examples(generated_data[:,i], 'DLL'+ DLLs[i], epoch)
return 0
#Overall network training function. Imports data, split into batches to train,
#and train networks
#Inputs: Number of epochs to train over, size of batches to train at a time
#Returns: 0. Calls functions to generate and plot examples periodically.
# Generator is also saved periodically, loss functions plotted/saved
def train(epochs=20, batch_size=128):
print("Importing data...")
#Get the training/testing data and norm values
x_train, x_test, shift, div_num = get_x_data(DLLs, physical_vars,
particle_source)
print("Data imported")
# Build GAN netowrk
print("Building network...")
optimizer = get_optimizer()
loss_func = get_loss_function()
generator = build_generator(optimizer, loss_func)
discriminator = build_discriminator(optimizer, loss_func)
gen_model, discrim_model = build_gan_network(discriminator, generator,
optimizer, loss_func)
print("Network built")
#Places holders for losses to be plotted
discrim_loss_tot = []
discrim_loss_real_tot = []
discrim_loss_gen_tot = []
discrim_loss_grad_tot = []
gen_loss_tot = []
real_discrim_y = np.ones((apparent_batch_size, 1), dtype=np.float32)
gen_y = real_discrim_y
if WGAN:
fake_discrim_y = -real_discrim_y
else:
real_discrim_y[:] = 0.9 #One-sided label smoothing
fake_discrim_y = np.zeros((apparent_batch_size, 1), dtype=np.float32)
if grad_loss:
dummy_discrim_y = np.zeros((apparent_batch_size, 1), dtype=np.float32)
#Loop over number of epochs. Starting from epoch 1
for i in range(1, epochs+1):
print('-'*15, 'Epoch %d' % i, '-'*15)
#If not RNN, shuffle as rows to be taken sequentially
#(if RNN must be sorted so do not shuffle)
if not RNN:
np.random.shuffle(x_train)
#Size of minibatch (data discriminator trained on for each batch
#generator is trained on)
minibatch_size = batch_size * training_ratio
#Number of batches generator is trained on
gen_batches = int(x_train.shape[0] // (batch_size * training_ratio))
#Initialise osses for each batch
gen_loss = []
discrim_loss_batch = np.zeros(gen_batches)
discrim_loss_real_batch = np.zeros(gen_batches)
discrim_loss_gen_batch = np.zeros(gen_batches)
discrim_loss_grad_batch = np.zeros(gen_batches)
#Loop for each batch. tqdm to show progress bars
for j in tqdm(range(gen_batches)):
if RNN:
start_index = np.random.randint(0, (x_train.shape[0] -
minibatch_size))
else:
start_index = j * minibatch_size
end_index = start_index + minibatch_size
batch_ints = np.arange(start_index, end_index)
discrim_minibatches = x_train[batch_ints]
discrim_loss = np.zeros(training_ratio)
discrim_loss_real = np.zeros(training_ratio)
discrim_loss_gen = np.zeros(training_ratio)
discrim_loss_grad = np.zeros(training_ratio)
#Train discriminator [training_ratio] more times than generator
for k in range(training_ratio):
# =============================================================================
# Training the discriminator
# =============================================================================
#Use batch_ints get data batch for discriminator input
if RNN:
if training_ratio==1:
start_index=0
else:
start_index = \
np.random.randint(0, (discrim_minibatches.shape[0] -
batch_size))
else:
start_index = k * batch_size
end_index = start_index + batch_size
batch_ints = np.arange(start_index, end_index)
#Use batch_ints to form discriminator input
data_batch = discrim_minibatches[batch_ints]
phys_data = data_batch[:, DLLs_dim:]
DLL_data = data_batch[:, :DLLs_dim]
noise = np.random.normal(0, 1, size=[batch_size, noise_dim])
#If RNN data must be made into sequences
if RNN:
noise, _ = create_dataset(noise, seq_length)
phys_data, _ = create_dataset(phys_data, seq_length)
DLL_data, _ = create_dataset(DLL_data, seq_length)
#Train only discriminator, not generator
discriminator.trainable = True
generator.trainable = False
#Train discriminator network
if grad_loss:
discrim_loss[k], discrim_loss_real[k],
discrim_loss_gen[k], discrim_loss_grad[k] = \
discrim_model.train_on_batch([DLL_data, noise, phys_data],
[real_discrim_y,
fake_discrim_y,
dummy_discrim_y])
else:
discrim_loss[k], discrim_loss_real[k], \
discrim_loss_gen[k] = \
discrim_model.train_on_batch([DLL_data, noise, phys_data],
[real_discrim_y,
fake_discrim_y])
# =============================================================================