-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
new parameterupdater use paddle pserver cclient of go #2413
Changes from 1 commit
99dc606
6f1c91d
28476f5
966bf9a
c44f5dd
39d0b3d
4f366be
da3e84a
dc458a0
37594ea
8941a38
ebba2b1
c093a24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Design Doc: Remote Parameter Updater for Cluster Train | ||
|
||
For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. | ||
|
||
## Parameter Updater | ||
|
||
Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. | ||
|
||
### Remote Parameter Updater | ||
|
||
Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) | ||
|
||
In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. | ||
|
||
#### Sparse Remote Parameter Updater | ||
|
||
Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. | ||
|
||
### Interface Design | ||
|
||
TBD |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,16 @@ | ||
cmake_minimum_required(VERSION 3.0) | ||
|
||
include_directories(${CMAKE_BINARY_DIR}) | ||
|
||
add_executable(main main.c) | ||
add_dependencies(main client) | ||
add_dependencies(main paddle_pserver_cclient) | ||
|
||
if(APPLE) | ||
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") | ||
endif() | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a) | ||
|
||
if(PROJ_ROOT) | ||
include_directories(${CMAKE_BINARY_DIR}/go/pserver/cclient/) | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread) | ||
else(PROJ_ROOT) | ||
include_directories(${CMAKE_BINARY_DIR}) | ||
target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) | ||
endif(PROJ_ROOT) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#include <stdio.h> | ||
|
||
#include "libclient.h" | ||
#include "libpaddle_pserver_cclient.h" | ||
|
||
void fail() { | ||
// TODO(helin): fix: gtest using cmake is not working, using this | ||
|
@@ -14,10 +14,11 @@ int main() { | |
client c = paddle_new_pserver_client(addr, 1); | ||
retry: | ||
if (paddle_begin_init_params(c)) { | ||
|
||
paddle_parameter param; | ||
char name_a[] = "param_a"; | ||
char name_b[] = "param_b"; | ||
unsigned char content[] = {0x00, 0x11, 0x22}; | ||
unsigned char content[] = {0x00, 0x00, 0x00}; | ||
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; | ||
param.name = name_a; | ||
param.content = content; | ||
|
@@ -32,6 +33,7 @@ int main() { | |
if (paddle_init_param(c, param, NULL, 0) != 0) { | ||
goto retry; | ||
} | ||
|
||
if (paddle_finish_init_params(c) != 0) { | ||
goto retry; | ||
} | ||
|
@@ -41,30 +43,31 @@ int main() { | |
|
||
unsigned char content[] = {0x00, 0x11, 0x22}; | ||
paddle_gradient grads[2] = { | ||
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3}, | ||
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}}; | ||
{"param_a", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}, | ||
{"param_b", PADDLE_ELEMENT_TYPE_INT32, content, 3}}; | ||
|
||
if (!paddle_send_grads(c, grads, 2)) { | ||
if (paddle_send_grads(c, grads, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
paddle_parameter* params[2] = {NULL, NULL}; | ||
char* names[] = {"param_a", "param_b"}; | ||
if (!paddle_get_params(c, names, params, 2)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about this mistake from me! |
||
if (paddle_get_params(c, names, params, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
// get parameters again by reusing the allocated parameter buffers. | ||
if (!paddle_get_params(c, names, params, 2)) { | ||
if (paddle_get_params(c, names, params, 2) != 0) { | ||
fail(); | ||
} | ||
|
||
paddle_release_param(params[0]); | ||
paddle_release_param(params[1]); | ||
|
||
if (!paddle_save_model(c, "/tmp/")) { | ||
if (paddle_save_model(c, "/tmp/") != 0) { | ||
fail(); | ||
} | ||
|
||
printf("test success!\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's follow the Unix way, do not print anything (with return code 0) when everything went well. |
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import paddle.v2 as paddle | ||
import paddle.v2.dataset.uci_housing as uci_housing | ||
|
||
|
||
def main(): | ||
# init | ||
paddle.init(use_gpu=False, trainer_count=1, trainer_id=1) | ||
|
||
# network config | ||
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) | ||
y_predict = paddle.layer.fc(input=x, | ||
param_attr=paddle.attr.Param(name='w'), | ||
size=1, | ||
act=paddle.activation.Linear(), | ||
bias_attr=paddle.attr.Param(name='b')) | ||
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) | ||
cost = paddle.layer.mse_cost(input=y_predict, label=y) | ||
|
||
# create parameters | ||
parameters = paddle.parameters.create(cost) | ||
|
||
# create optimizer | ||
optimizer = paddle.optimizer.Momentum(momentum=0) | ||
|
||
trainer = paddle.trainer.SGD(cost=cost, | ||
parameters=parameters, | ||
update_equation=optimizer, | ||
is_local=False, | ||
pserver_spec="localhost:3000") | ||
|
||
# event_handler to print training and testing info | ||
def event_handler(event): | ||
if isinstance(event, paddle.event.EndIteration): | ||
if event.batch_id % 100 == 0: | ||
print "Pass %d, Batch %d, Cost %f" % ( | ||
event.pass_id, event.batch_id, event.cost) | ||
|
||
if isinstance(event, paddle.event.EndPass): | ||
if (event.pass_id + 1) % 10 == 0: | ||
result = trainer.test( | ||
reader=paddle.batch( | ||
uci_housing.test(), batch_size=2), | ||
feeding={'x': 0, | ||
'y': 1}) | ||
print "Test %d, %.2f" % (event.pass_id, result.cost) | ||
|
||
# training | ||
trainer.train( | ||
reader=paddle.batch( | ||
paddle.reader.shuffle( | ||
uci_housing.train(), buf_size=500), | ||
batch_size=2), | ||
feeding={'x': 0, | ||
'y': 1}, | ||
event_handler=event_handler, | ||
num_passes=30) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes! I remember there is a bug here when I test the pserver program
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done