-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
new parameterupdater use paddle pserver cclient of go #2413
Changes from 10 commits
99dc606
6f1c91d
28476f5
966bf9a
c44f5dd
39d0b3d
4f366be
da3e84a
dc458a0
37594ea
8941a38
ebba2b1
c093a24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Design Doc: Remote Parameter Updater for Cluster Train | ||
|
||
For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. | ||
|
||
## Parameter Updater | ||
|
||
Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. | ||
|
||
### Remote Parameter Updater | ||
|
||
Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) | ||
|
||
In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. | ||
|
||
#### Sparse Remote Parameter Updater | ||
|
||
Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. | ||
|
||
### Interface Design | ||
|
||
TBD |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,10 +164,10 @@ func paddle_finish_init_params(client C.client) C.int { | |
} | ||
|
||
//export paddle_send_grads | ||
func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int { | ||
func paddle_send_grads(client C.client, grads **C.paddle_gradient, total C.int) C.int { | ||
var gs []pserver.Gradient | ||
for i := 0; i < int(total); i++ { | ||
grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) | ||
grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) | ||
et := pserver.ElementType(grad.element_type) | ||
name := C.GoString(grad.name) | ||
content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) | ||
|
@@ -204,12 +204,14 @@ func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter | |
} | ||
|
||
p := ps[i] | ||
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) | ||
paramPtr := (**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! I got used to Go treating |
||
param := *paramPtr | ||
nameReady := false | ||
contentAllocated := false | ||
|
||
if unsafe.Pointer(param) == nullPtr { | ||
param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param)))) | ||
*paramPtr = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param)))) | ||
param = *paramPtr | ||
} else { | ||
if unsafe.Pointer(param.name) != nullPtr { | ||
if n := C.GoString(param.name); n != p.Name { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,20 @@ | ||
cmake_minimum_required(VERSION 3.0) | ||
|
||
include_directories(${CMAKE_BINARY_DIR}) | ||
|
||
add_executable(main main.c) | ||
add_dependencies(main client) | ||
add_dependencies(main paddle_pserver_cclient) | ||
add_executable(test_cclient test_cclient.c) | ||
add_dependencies(test_cclient paddle_pserver_cclient) | ||
|
||
if(APPLE) | ||
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") | ||
endif() | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a) | ||
|
||
if(PROJ_ROOT) | ||
include_directories(${CMAKE_BINARY_DIR}/go/pserver/cclient/) | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread) | ||
target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread) | ||
else(PROJ_ROOT) | ||
include_directories(${CMAKE_BINARY_DIR}) | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) | ||
target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) | ||
endif(PROJ_ROOT) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#include <stdio.h> | ||
|
||
#include "libclient.h" | ||
#include "libpaddle_pserver_cclient.h" | ||
|
||
void fail() { | ||
// TODO(helin): fix: gtest using cmake is not working, using this | ||
|
@@ -40,29 +40,39 @@ int main() { | |
} | ||
|
||
unsigned char content[] = {0x00, 0x11, 0x22}; | ||
paddle_gradient grads[2] = { | ||
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3}, | ||
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}}; | ||
paddle_gradient** grads = | ||
(paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2); | ||
grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); | ||
grads[0]->name = "param_a"; | ||
grads[0]->content = content; | ||
grads[0]->content_len = 3; | ||
grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; | ||
|
||
if (!paddle_send_grads(c, grads, 2)) { | ||
grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); | ||
grads[1]->name = "param_b"; | ||
grads[1]->content = content; | ||
grads[1]->content_len = 3; | ||
grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; | ||
|
||
if (paddle_send_grads(c, grads, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
paddle_parameter* params[2] = {NULL, NULL}; | ||
char* names[] = {"param_a", "param_b"}; | ||
if (!paddle_get_params(c, names, params, 2)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about this mistake from me! |
||
if (paddle_get_params(c, names, params, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
// get parameters again by reusing the allocated parameter buffers. | ||
if (!paddle_get_params(c, names, params, 2)) { | ||
if (paddle_get_params(c, names, params, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
paddle_release_param(params[0]); | ||
paddle_release_param(params[1]); | ||
|
||
if (!paddle_save_model(c, "/tmp/")) { | ||
if (paddle_save_model(c, "/tmp/") != 0) { | ||
fail(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import paddle.v2 as paddle | ||
import gzip | ||
|
||
|
||
def softmax_regression(img): | ||
predict = paddle.layer.fc(input=img, | ||
size=10, | ||
act=paddle.activation.Softmax()) | ||
return predict | ||
|
||
|
||
def multilayer_perceptron(img): | ||
# The first fully-connected layer | ||
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) | ||
# The second fully-connected layer and the according activation function | ||
hidden2 = paddle.layer.fc(input=hidden1, | ||
size=64, | ||
act=paddle.activation.Relu()) | ||
# The thrid fully-connected layer, note that the hidden size should be 10, | ||
# which is the number of unique digits | ||
predict = paddle.layer.fc(input=hidden2, | ||
size=10, | ||
act=paddle.activation.Softmax()) | ||
return predict | ||
|
||
|
||
def convolutional_neural_network(img): | ||
# first conv layer | ||
conv_pool_1 = paddle.networks.simple_img_conv_pool( | ||
input=img, | ||
filter_size=5, | ||
num_filters=20, | ||
num_channel=1, | ||
pool_size=2, | ||
pool_stride=2, | ||
act=paddle.activation.Tanh()) | ||
# second conv layer | ||
conv_pool_2 = paddle.networks.simple_img_conv_pool( | ||
input=conv_pool_1, | ||
filter_size=5, | ||
num_filters=50, | ||
num_channel=20, | ||
pool_size=2, | ||
pool_stride=2, | ||
act=paddle.activation.Tanh()) | ||
# The first fully-connected layer | ||
fc1 = paddle.layer.fc(input=conv_pool_2, | ||
size=128, | ||
act=paddle.activation.Tanh()) | ||
# The softmax layer, note that the hidden size should be 10, | ||
# which is the number of unique digits | ||
predict = paddle.layer.fc(input=fc1, | ||
size=10, | ||
act=paddle.activation.Softmax()) | ||
return predict | ||
|
||
|
||
def main(): | ||
paddle.init(use_gpu=False, trainer_count=1) | ||
|
||
# define network topology | ||
images = paddle.layer.data( | ||
name='pixel', type=paddle.data_type.dense_vector(784)) | ||
label = paddle.layer.data( | ||
name='label', type=paddle.data_type.integer_value(10)) | ||
|
||
# Here we can build the prediction network in different ways. Please | ||
# choose one by uncomment corresponding line. | ||
predict = softmax_regression(images) | ||
#predict = multilayer_perceptron(images) | ||
#predict = convolutional_neural_network(images) | ||
|
||
cost = paddle.layer.classification_cost(input=predict, label=label) | ||
parameters = paddle.parameters.create(cost) | ||
|
||
optimizer = paddle.optimizer.Momentum( | ||
learning_rate=0.1 / 128.0, | ||
momentum=0.9, | ||
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) | ||
|
||
trainer = paddle.trainer.SGD(cost=cost, | ||
parameters=parameters, | ||
update_equation=optimizer, | ||
is_local=False, | ||
pserver_spec="localhost:3000") | ||
|
||
lists = [] | ||
|
||
def event_handler(event): | ||
if isinstance(event, paddle.event.EndIteration): | ||
if event.batch_id % 1000 == 0: | ||
print "Pass %d, Batch %d, Cost %f, %s" % ( | ||
event.pass_id, event.batch_id, event.cost, event.metrics) | ||
|
||
elif isinstance(event, paddle.event.EndPass): | ||
result = trainer.test(reader=paddle.batch( | ||
paddle.dataset.mnist.test(), batch_size=128)) | ||
print "Test with Pass %d, Cost %f, %s\n" % ( | ||
event.pass_id, result.cost, result.metrics) | ||
lists.append((event.pass_id, result.cost, | ||
result.metrics['classification_error_evaluator'])) | ||
|
||
trainer.train( | ||
reader=paddle.batch( | ||
paddle.reader.shuffle( | ||
paddle.dataset.mnist.train(), buf_size=8192), | ||
batch_size=128), | ||
event_handler=event_handler, | ||
num_passes=100) | ||
|
||
# find the best pass | ||
best = sorted(lists, key=lambda list: float(list[1]))[0] | ||
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) | ||
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) | ||
|
||
test_creator = paddle.dataset.mnist.test() | ||
test_data = [] | ||
for item in test_creator(): | ||
test_data.append((item[0], )) | ||
if len(test_data) == 100: | ||
break | ||
|
||
# output is a softmax layer. It returns probabilities. | ||
# Shape should be (100, 10) | ||
probs = paddle.infer( | ||
output_layer=predict, parameters=parameters, input=test_data) | ||
print probs.shape | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Really sorry about this mistake from me. Must take you very long to debug!