-
Notifications
You must be signed in to change notification settings - Fork 0
/
refs.bib
180 lines (155 loc) · 6.72 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
@inproceedings{goodfellow2013maxout,
title={Maxout networks},
author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
booktitle={International conference on machine learning},
pages={1319--1327},
year={2013},
organization={PMLR}
}
@article{srivastava2014dropout,
title={Dropout: a simple way to prevent neural networks from overfitting},
author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
journal={The journal of machine learning research},
volume={15},
number={1},
pages={1929--1958},
year={2014},
publisher={JMLR. org}
}
@book{Goodfellow-et-al-2016,
title={Deep Learning},
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher={MIT Press},
note={\url{http://www.deeplearningbook.org}},
year={2016}
}
@inproceedings{ng2004feature,
title={Feature selection, L1 vs. L2 regularization, and rotational invariance},
author={Ng, Andrew Y},
booktitle={Proceedings of the twenty-first international conference on Machine learning},
pages={78},
year={2004}
}
@article{simonyan2014very,
title={Very deep convolutional networks for large-scale image recognition},
author={Simonyan, Karen and Zisserman, Andrew},
journal={arXiv preprint arXiv:1409.1556},
year={2014}
}
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
@inproceedings{glorot2010understanding,
title={Understanding the difficulty of training deep feedforward neural networks},
author={Glorot, Xavier and Bengio, Yoshua},
booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
pages={249--256},
year={2010},
organization={JMLR Workshop and Conference Proceedings}
}
@inproceedings{bengio1993problem,
title={The problem of learning long-term dependencies in recurrent networks},
author={Bengio, Yoshua and Frasconi, Paolo and Simard, Patrice},
booktitle={IEEE international conference on neural networks},
pages={1183--1188},
year={1993},
organization={IEEE}
}
@inproceedings{ide2017improvement,
title={Improvement of learning for CNN with ReLU activation by sparse regularization},
author={Ide, Hidenori and Kurita, Takio},
booktitle={2017 International Joint Conference on Neural Networks (IJCNN)},
pages={2684--2691},
year={2017},
organization={IEEE}
}
@inproceedings{ioffe2015batch,
title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
author={Ioffe, Sergey and Szegedy, Christian},
booktitle={International conference on machine learning},
pages={448--456},
year={2015},
organization={PMLR}
}
@inproceedings{huang2017densely,
title={Densely connected convolutional networks},
author={Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4700--4708},
year={2017}
}
@article{rumelhart1986learning,
title={Learning representations by back-propagating errors},
author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
journal={nature},
volume={323},
number={6088},
pages={533--536},
year={1986},
publisher={Nature Publishing Group}
}
@inproceedings{du2019gradient,
title={Gradient descent finds global minima of deep neural networks},
author={Du, Simon and Lee, Jason and Li, Haochuan and Wang, Liwei and Zhai, Xiyu},
booktitle={International Conference on Machine Learning},
pages={1675--1685},
year={2019},
organization={PMLR}
}
@inproceedings{pascanu2013difficulty,
title={On the difficulty of training recurrent neural networks},
author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
booktitle={International conference on machine learning},
pages={1310--1318},
year={2013},
organization={PMLR}
}
@article{li2017visualizing,
title={Visualizing the loss landscape of neural nets},
author={Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
journal={arXiv preprint arXiv:1712.09913},
year={2017}
}
@inproceedings{santurkar2018does,
title={How does batch normalization help optimization?},
author={Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and M{\k{a}}dry, Aleksander},
booktitle={Proceedings of the 32nd international conference on neural information processing systems},
pages={2488--2498},
year={2018}
}
@article{krizhevsky2009learning,
title={Learning multiple layers of features from tiny images},
author={Krizhevsky, Alex and Hinton, Geoffrey and others},
year={2009},
publisher={Citeseer}
}
@incollection{lecun2012efficient,
title={Efficient backprop},
author={LeCun, Yann A and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
booktitle={Neural networks: Tricks of the trade},
pages={9--48},
year={2012},
publisher={Springer}
}
@book{bishop1995neural,
title={Neural networks for pattern recognition},
author={Bishop, Christopher M and others},
year={1995},
publisher={Oxford university press}
}
@misc{machinelearningmastery_2019, title={A Gentle Introduction to Batch Normalization for Deep Neural Networks}, url={https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/}, journal={Machine Learning Mastery}, author={Brownlee, Jason}, year={2019}, month={Jan}
}
@misc{johann_huber_2020, title={Batch normalization in 3 levels of understanding - Towards Data Science}, url={https://towardsdatascience.com/batch-normalization-in-3-levels-of-understanding-14c2da90a338#ad2e}, journal={Medium}, publisher={Towards Data Science}, author={Huber, Johann}, year={2020}, month={Nov}
}
@misc{bohra_2021, url={https://www.analyticsvidhya.com/blog/2021/06/the-challenge-of-vanishing-exploding-gradients-in-deep-neural-networks/}, journal={Analytics Vidhya}, author={Bohra, Yash}, year={2021}, month={Jun}
}
@misc{rnns_2018, title={What are “residual connections” in RNNs?}, url={https://stats.stackexchange.com/questions/321054/what-are-residual-connections-in-rnns}, journal={Cross Validated}, author={Perkins, Hugh}, year={2018}
}
@book{bengio_lamblin_popovici_larochelle, title={Greedy Layer-Wise Training of Deep Networks}, url={https://proceedings.neurips.cc/paper/2006/file/5da713a690c067105aeb2fae32403405-Paper.pdf}, author={Bengio, Yoshua and Lamblin, Pascal and Popovici, Dan and Larochelle, Hugo}
}
@misc{lstm_2015, url={https://colah.github.io/posts/2015-08-Understanding-LSTMs/}, journal={Github.io}, year={2015}, author={Olah, Christopher}
}