-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
init mnist #3564
Merged
Merged
init mnist #3564
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
46d30ec
init minst.py
jacquesqiao 137537b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao f6dd787
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao 118dd14
can run, for debug
jacquesqiao 5a8fbb7
add data
jacquesqiao 5179202
refine code and add debug info
jacquesqiao d3c65a6
fix data reader
jacquesqiao d41a551
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao a13798e
rename add_op to append_op
jacquesqiao 6f4b968
can train the parameters
jacquesqiao 76677f2
add test
jacquesqiao cf515e4
optimize code and name
jacquesqiao 9db4ad6
reduce pass num to 1
jacquesqiao 1e4566c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao 37cd816
change 128 to BATCH_SIZE
jacquesqiao 0e300f9
use init_net and random_op to initialize parameter
jacquesqiao 3648165
add gpu support
jacquesqiao 625b153
optimize code
jacquesqiao File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
import paddle.v2.framework.core as core | ||
from paddle.v2.framework.op import Operator | ||
import numpy | ||
import paddle.v2 as paddle | ||
|
||
BATCH_SIZE = 100 | ||
|
||
scope = core.Scope() | ||
place = core.CPUPlace() | ||
# if you want to test GPU training, you can use gpu place | ||
# place = core.GPUPlace(0) | ||
dev_ctx = core.DeviceContext.create(place) | ||
|
||
init_net = core.Net.create() | ||
forward_net = core.Net.create() | ||
backward_net = None | ||
optimize_net = core.Net.create() | ||
|
||
|
||
def atom_id(): | ||
id = 0 | ||
while True: | ||
yield id | ||
id += 1 | ||
|
||
|
||
uniq_id = atom_id().next | ||
|
||
|
||
def data_layer(name, dims): | ||
var = scope.new_var(name) | ||
tensor = var.get_tensor() | ||
tensor.set_dims(dims) # 1 is batch size holder. | ||
return name | ||
|
||
|
||
def feed_data(name, data): | ||
assert isinstance(data, numpy.ndarray) | ||
tensor = scope.find_var(name).get_tensor() | ||
tensor.set_dims(data.shape) | ||
if data.dtype == numpy.dtype('int32'): | ||
tensor.alloc_int(place) | ||
elif data.dtype == numpy.dtype('float32'): | ||
tensor.alloc_float(place) | ||
else: | ||
raise ValueError("data type not supported") | ||
tensor.set(data, place) | ||
|
||
|
||
def grad_var_name(var_name): | ||
return var_name + "@GRAD" | ||
|
||
|
||
def sgd_optimizer(net, param_name, learning_rate=0.005): | ||
grad_name = grad_var_name(param_name) | ||
optimize_op = Operator( | ||
"sgd", | ||
param=param_name, | ||
grad=grad_name, | ||
param_out=param_name, | ||
learning_rate=learning_rate) | ||
net.append_op(optimize_op) | ||
|
||
|
||
# should use operator and add these to the init_network | ||
def init_param(net, param_name, dims): | ||
scope.new_var(param_name) | ||
op = Operator( | ||
"uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) | ||
op.infer_shape(scope) | ||
net.append_op(op) | ||
|
||
|
||
# fc_layer | ||
def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): | ||
""" | ||
Add a fc layer to net | ||
|
||
:param input: input variable name. | ||
:type input: str | ||
:param size: fully connected layer size. | ||
:param act: activation name | ||
:param param: parameter attribute, used for initialize parameters. | ||
:param bias: bias attribute. False will not have a bias. | ||
:param name: the name of fc layer. If not set, model will generate a | ||
readable name | ||
:return: output variable name. | ||
""" | ||
if name is None: | ||
name = 'fc_%d' % uniq_id() | ||
if not isinstance(name, str): | ||
raise ValueError("name should be string") | ||
|
||
input_dims = scope.find_var(input).get_tensor().get_dims() | ||
|
||
w_name = param or name + ".w" | ||
init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size]) | ||
sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) | ||
|
||
pre_activation = name + ".mul.out" | ||
scope.new_var(pre_activation) | ||
mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) | ||
net.append_op(mul_op) | ||
|
||
# create bias variable if needed | ||
if bias: | ||
bias_name = name + ".b" | ||
init_param(net=init_net, param_name=bias_name, dims=[size]) | ||
sgd_optimizer( | ||
net=optimize_net, param_name=bias_name, learning_rate=0.001) | ||
bias_out = name + ".rowwise_add.out" | ||
scope.new_var(bias_out) | ||
rowwise_append_op = Operator( | ||
"rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) | ||
net.append_op(rowwise_append_op) | ||
pre_activation = bias_out | ||
|
||
activation_op = Operator(act, X=pre_activation, Y=name) | ||
net.append_op(activation_op) | ||
scope.new_var(name) | ||
net.infer_shape(scope) | ||
return name | ||
|
||
|
||
def cross_entropy_layer(net, input, label): | ||
cost_name = 'cross_entropy_%d' % uniq_id() | ||
cross_entropy_op = Operator( | ||
"onehot_cross_entropy", X=input, label=label, Y=cost_name) | ||
net.append_op(cross_entropy_op) | ||
scope.new_var(cost_name) | ||
net.infer_shape(scope) | ||
return cost_name | ||
|
||
|
||
def create_backward_net(forward_net): | ||
net = core.Operator.backward(forward_net, set()) | ||
for input in net.inputs()["all"]: | ||
var = scope.new_var(input) | ||
var.get_tensor() | ||
for output in net.outputs()["all"]: | ||
var = scope.new_var(output) | ||
var.get_tensor() | ||
return net | ||
|
||
|
||
def debug_print_op(op): | ||
print("===============" + op.type() + "==============") | ||
print("***inputs:***") | ||
for input in op.inputs()["all"]: | ||
print input, scope.find_var(input).get_tensor().get_dims() | ||
print("\n***outputs:***") | ||
for output in op.outputs()["all"]: | ||
print output, scope.find_var(output).get_tensor().get_dims() | ||
print("") | ||
print("") | ||
|
||
|
||
def set_cost(cost): | ||
cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape | ||
cost_grad = \ | ||
scope.find_var(grad_var_name(cost)).get_tensor() | ||
cost_grad.set_dims(cost_shape) | ||
cost_grad.alloc_float(place) | ||
cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) | ||
|
||
|
||
def mean_cost(cost): | ||
cost_data = numpy.array(scope.find_var(cost).get_tensor()) | ||
return cost_data.sum() / len(cost_data) | ||
|
||
|
||
def error_rate(predict, label): | ||
predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax( | ||
axis=1) | ||
label = numpy.array(scope.find_var(label).get_tensor()) | ||
error_num = numpy.sum(predict_var != label) | ||
return error_num / float(len(label)) | ||
|
||
|
||
images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) | ||
labels = data_layer(name='label', dims=[BATCH_SIZE]) | ||
fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") | ||
fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") | ||
predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax") | ||
cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) | ||
|
||
init_net.complete_add_op(True) | ||
forward_net.complete_add_op(True) | ||
backward_net = create_backward_net(forward_net) | ||
optimize_net.complete_add_op(True) | ||
|
||
print(init_net) | ||
print(forward_net) | ||
print(backward_net) | ||
print(optimize_net) | ||
|
||
debug_print_op(forward_net) | ||
debug_print_op(backward_net) | ||
debug_print_op(optimize_net) | ||
|
||
train_reader = paddle.batch( | ||
paddle.reader.shuffle( | ||
paddle.dataset.mnist.train(), buf_size=8192), | ||
batch_size=BATCH_SIZE) | ||
|
||
|
||
def test(cost_name): | ||
test_reader = paddle.batch( | ||
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) | ||
cost = [] | ||
error = [] | ||
for data in test_reader(): | ||
image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") | ||
label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") | ||
feed_data(images, image_data) | ||
feed_data(labels, label_data) | ||
|
||
forward_net.infer_shape(scope) | ||
forward_net.run(scope, dev_ctx) | ||
cost.append(mean_cost(cost_name)) | ||
error.append(error_rate(predict, "label")) | ||
print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( | ||
sum(error) / float(len(error)))) | ||
|
||
|
||
PASS_NUM = 1 | ||
|
||
init_net.run(scope, dev_ctx) | ||
for pass_id in range(PASS_NUM): | ||
batch_id = 0 | ||
|
||
for data in train_reader(): | ||
image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") | ||
label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") | ||
feed_data(images, image_data) | ||
feed_data(labels, label_data) | ||
|
||
forward_net.infer_shape(scope) | ||
forward_net.run(scope, dev_ctx) | ||
set_cost(cost) | ||
backward_net.infer_shape(scope) | ||
backward_net.run(scope, dev_ctx) | ||
|
||
optimize_net.run(scope, dev_ctx) | ||
if batch_id % 100 == 0: | ||
print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") | ||
test(cost) | ||
|
||
batch_id = batch_id + 1 |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mean cost get the cross_entropy output for debugging. we'd better rename this snippet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done