forked from PaddlePaddle/PGL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classify.py
112 lines (98 loc) · 3.55 KB
/
classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
classify.py
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
def build_lr_model(args):
"""
Build the LR model to train.
"""
emb_x = fluid.layers.data(
name="emb_x", dtype='float32', shape=[args.w2v_emb_size])
label = fluid.layers.data(name="label_y", dtype='int64', shape=[1])
logits = fluid.layers.fc(input=emb_x,
size=args.num_class,
act=None,
name='classification_layer')
proba = fluid.layers.softmax(logits)
loss = fluid.layers.softmax_with_cross_entropy(logits, label)
loss = fluid.layers.mean(loss)
acc = fluid.layers.accuracy(input=proba, label=label, k=1)
return loss, acc
def construct_feed_data(data):
"""
Construct the data to feed model.
"""
datas = []
labels = []
for sample in data:
if len(datas) < 16:
labels.append([sample[-1]])
datas.append(sample[1:-1])
else:
yield np.array(datas).astype(np.float32), np.array(labels).astype(
np.int64)
datas = []
labels = []
if len(datas) != 0:
yield np.array(datas).astype(np.float32), np.array(labels).astype(
np.int64)
def run_epoch(exe, data, program, stage, epoch, loss, acc):
"""
The epoch funtcion to run each epoch.
"""
print('start {} epoch of {}'.format(stage, epoch))
all_loss = 0.0
all_acc = 0.0
all_samples = 0.0
count = 0
for datas, labels in construct_feed_data(data):
batch_loss, batch_acc = exe.run(
program,
fetch_list=[loss, acc],
feed={"emb_x": datas,
"label_y": labels})
len_samples = len(datas)
all_loss = batch_loss * len_samples
all_acc = batch_acc * len_samples
all_samples += len_samples
count += 1
print("pass:{}, epoch:{}, loss:{}, acc:{}".format(stage, epoch, batch_loss,
all_acc / (len_samples)))
def train_lr_model(args, data):
"""
The main function to run the lr model.
"""
data_nums = len(data)
train_data_nums = int(0.8 * data_nums)
train_data = data[:train_data_nums]
test_data = data[train_data_nums:]
place = fluid.CPUPlace()
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
loss, acc = build_lr_model(args)
test_program = train_program.clone(for_test=True)
with fluid.program_guard(train_program, startup_program):
adam = fluid.optimizer.Adam(learning_rate=args.lr)
adam.minimize(loss)
exe = fluid.Executor(place)
exe.run(startup_program)
for epoch in range(0, args.epoch):
run_epoch(exe, train_data, train_program, "train", epoch, loss, acc)
print('-------------------')
run_epoch(exe, test_data, test_program, "valid", epoch, loss, acc)