Skip to content

Commit

Permalink
Merge pull request #122 from Yancey1989/demo_trainer
Browse files Browse the repository at this point in the history
Support public datacenter
  • Loading branch information
Yancey1989 authored Jun 5, 2017
2 parents 5065ed0 + 5e83227 commit a0d4da5
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 28 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ To test or visit the website, find out the kubernetes [ingress](https://kubernet

Then open your browser and visit http://cloud.paddlepaddle.org.

- Prepare public dataset

You can create a Kubernetes Job for preparing the public dataset and cluster trainer files.
```bash
kubectl create -f k8s/prepare_dataset.yaml
```

### Run locally
Make sure you are using a virtual environment of some sort (e.g. `virtualenv` or
`pyenv`).
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ echo "pcloudjob_image": $pcloudjob_image
#Build Docker Image
cat > Dockerfile <<EOF
FROM ${base_image}
RUN pip install -U kubernetes && apt-get install iputils-ping
RUN pip install -U kubernetes && apt-get install -y iputils-ping
ADD ./paddle_k8s /usr/bin
ADD ./k8s_tools.py /root/
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 4 additions & 0 deletions docker/prepare_dataset/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM paddlepaddle/paddle:latest
RUN python -c "import paddle.v2.dataset.common as common; common.fetch_all()"
ADD ./prepare.py /root/
CMD ["python", "/root/prepare.py"]
39 changes: 39 additions & 0 deletions docker/prepare_dataset/prepare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import paddle.v2.dataset as dataset
import shutil
import os
dataset_home = os.getenv("DATASET_HOME")
os.system("mv %s %s" % (dataset.common.DATA_HOME, dataset_home))

dataset.common.DATA_HOME = dataset_home
dataset.common.split(dataset.uci_housing.train(),
line_count = 500,
suffix=dataset_home + "/uci_housing/train-%05d.pickle")
dataset.common.split(dataset.mnist.train(),
line_count = 500,
suffix=dataset_home + "/mnist/train-%05d.pickle")
dataset.common.split(dataset.cifar.train10(),
line_count = 500,
suffix=dataset_home + "/cifar/train10-%05d.pickel")

N = 5
word_dict = dataset.imikolov.build_dict()
dict_size = len(word_dict)
dataset.common.split(dataset.imikolov.train(word_dict, 5),
line_count = 500,
suffix = dataset_home + "/imikolov/train-%05d.pickle")

dataset.common.split(dataset.movielens.train(),
line_count = 500,
suffix = dataset_home + "/movielens/train-%05d.pickle")

dataset.common.split(lambda: dataset.imdb.train(dataset.imdb.word_dict()),
line_count = 500,
suffix = dataset_home + "/imdb/train-%05d.pickle")

dataset.common.split(dataset.conll05.test(),
line_count = 500,
suffix = dataset_home + "/conll05/test-%05d.pickle")

dataset.common.split(dataset.wmt14.train(30000),
line_count = 500,
suffix = dataset_home + "wmt14/train-%05d.pickle")
28 changes: 28 additions & 0 deletions k8s/prepare_dataset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: batch/v1
kind: Job
metadata:
name: paddle-prepare
spec:
template:
metadata:
name: paddle-prepare
spec:
volumes:
- name: data-storage
cephfs:
monitors:
- 172.19.32.166:6789
path: "/public"
user: "admin"
secretRef:
name: ceph-secret
containers:
- name: prepare
image: yancey1989/paddlecloud-prepare
env:
- name: DATASET_HOME
value: "/pfs/public/dataset"
volumeMounts:
- name: data-storage
mountPath: /pfs/public
restartPolicy: Never
17 changes: 14 additions & 3 deletions paddlecloud/paddlecloud/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,18 +275,29 @@
# "mount_path" "/pfs/%s/home/%s/" # mount_path % ( dc, username )
# }
#}
FSTYPE_CEPHFS = "cephfs"
FSTYPE_HOSTPATH = "hostpath"
DATACENTERS = {
"datacenter1":{
"fstype": "cephfs",
"fstype": FSTYPE_CEPHFS,
"monitors_addr": ["172.19.32.166:6789"], # must be a list
"secret": "ceph-secret",
"user": "admin",
"mount_path": "/pfs/%s/home/%s/", # mount_path % ( dc, username )
"cephfs_path": "/%s", # cephfs_path % username
"admin_key": "/certs/admin.secret"
"admin_key": "/certs/admin.secret",
},
"public": {
"fstype": FSTYPE_CEPHFS,
"monitors_addr": ["172.19.32.166:6789"], # must be a list
"secret": "ceph-secret",
"user": "admin",
"mount_path": "/pfs/%s/public/", # mount_path % ( dc, username )
"cephfs_path": "/public", # cephfs_path % username
"admin_key": "/certs/admin.secret",
"read_only": True
}
}

# where cephfs root is mounted when using cephfs storage service
STORAGE_PATH="/pfs"

Expand Down
63 changes: 41 additions & 22 deletions paddlecloud/paddlejob/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,48 @@ def post(self, request, format=None):
return utils.simple_response(500, "no topology or entry specified")
if not obj.get("datacenter"):
return utils.simple_response(500, "no datacenter specified")
cfgs = {}
dc = obj.get("datacenter")

volumes = []
cfg = settings.DATACENTERS.get(dc, None)
if cfg and cfg["fstype"] == "hostpath":
volumes.append(volume.get_volume_config(
fstype = "hostpath",
name = dc.replace("_", "-"),
mount_path = cfg["mount_path"] % (dc, username),
host_path = cfg["host_path"]
))
elif cfg and cfg["fstype"] == "cephfs":
volumes.append(volume.get_volume_config(
fstype = "cephfs",
name = dc.replace("_", "-"),
monitors_addr = cfg["monitors_addr"],
secret = cfg["secret"],
user = cfg["user"],
mount_path = cfg["mount_path"] % (dc, username),
cephfs_path = cfg["cephfs_path"] % username,
admin_key = cfg["admin_key"]
))
else:
pass
for k, cfg in settings.DATACENTERS.items():
if k != dc and k != "public":
continue
fstype = cfg["fstype"]
if fstype == settings.FSTYPE_CEPHFS:
if k == "public":
mount_path = cfg["mount_path"] % dc
cephfs_path = cfg["cephfs_path"]
else:
mount_path = cfg["mount_path"] % (dc, username)
cephfs_path = cfg["cephfs_path"] % username
volumes.append(volume.get_volume_config(
fstype = fstype,
name = k.replace("_", "-"),
monitors_addr = cfg["monitors_addr"],
secret = cfg["secret"],
user = cfg["user"],
mount_path = mount_path,
cephfs_path = cephfs_path,
admin_key = cfg["admin_key"],
read_only = cfg.get("read_only", False)
))
elif fstype == settings.FSTYPE_HOSTPATH:
if k == "public":
mount_path = cfg["mount_path"] % dc
host_path = cfg["host_path"]
else:
mount_path = cfg["mount_path"] % (dc, username)
host_path = cfg["host_path"] % username

volumes.append(volume.get_volume_config(
fstype = fstype,
name = k.replace("_", "-"),
mount_path = mount_path,
host_path = host_path
))
else:
pass

registry_secret = settings.JOB_DOCKER_IMAGE.get("registry_secret", None)
# get user specified image
Expand All @@ -80,7 +99,7 @@ def post(self, request, format=None):
# add Nvidia lib volume if training with GPU
if gpu_count > 0:
volumes.append(volume.get_volume_config(
fstype = "hostpath",
fstype = settings.FSTYPE_HOSTPATH,
name = "nvidia-libs",
mount_path = "/usr/local/nvidia/lib64",
host_path = settings.NVIDIA_LIB_PATH
Expand Down
5 changes: 3 additions & 2 deletions paddlecloud/paddlejob/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"hostpath": "{\"name\": $NAME, \"hostPath\":{\"path\": $HOST_PATH}}",
"cephfs":"{\"name\": $NAME,\"cephfs\":{\"name\": \"cephfs\", \
\"monitors\": $MONITORS_ADDR,\"path\": $CEPHFS_PATH, \
\"user\": $USER, \"secretRef\": {\"name\": $SECRET}}}"
\"readOnly\": $READ_ONLY, \"user\": $USER, \
\"secretRef\": {\"name\": $SECRET}}}"
}
tmpl_volume_mount = {
"hostpath": "{\"name\": $NAME, \"mountPath\":$MOUNT_PATH}",
Expand All @@ -19,7 +20,7 @@ def __render(tmpl, **kwargs):
if tmpl.find(tmpl_k) != -1:
if type(v) is str or type(v) is unicode:
tmpl = tmpl.replace(tmpl_k, "\"%s\"" % v)
elif type(v) is list:
elif type(v) is list or type(v) is bool:
tmpl = tmpl.replace(tmpl_k, json.dumps(v))
else:
pass
Expand Down

0 comments on commit a0d4da5

Please sign in to comment.