-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0e1e77e
commit 00c907b
Showing
11 changed files
with
326 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,5 @@ spark,join | |
clickhouse,groupby | ||
cudf,groupby | ||
cudf,join | ||
h2o,groupby | ||
h2o,join |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
if [ "$#" -ne 2 ]; then | ||
echo "usage: ./h2o/exec.sh groupby G1_1e7_1e2_0_0"; | ||
exit 1 | ||
fi; | ||
|
||
source ./h2o/h2o.sh | ||
|
||
h2o_active || echo "h2o instance should not be already running, investigate" >&2 | ||
h2o_active || exit 1 | ||
|
||
# start h2o | ||
h2o_start "h2o_$1_$2" | ||
|
||
# confirm h2o working | ||
h2o_active || sleep 30 | ||
h2o_active || echo "h2o instance should be already running, investigate" >&2 | ||
h2o_active || exit 1 | ||
|
||
# execute benchmark script | ||
./h2o/$1-h2o.R || echo "# h2o/exec.sh: benchmark script for $2 terminated with error" >&2 | ||
|
||
# stop h2o instance | ||
h2o_stop && echo "# h2o/exec.sh: stopping h2o instance finished" || echo "# h2o/exec.sh: stopping h2o instance failed" >&2 | ||
h2o_active || exit 1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
cat("# groupby-h2o.R\n") | ||
|
||
source("./_helpers/helpers.R") | ||
|
||
suppressPackageStartupMessages(library("h2o", lib.loc="./h2o/r-h2o", warn.conflicts=FALSE, quietly=TRUE)) | ||
ver = packageVersion("h2o") | ||
git = "" | ||
task = "groupby" | ||
solution = "h2o" | ||
fun = "h2o.group_by" | ||
cache = TRUE | ||
on_disk = FALSE | ||
|
||
h = h2o.init(startH2O=FALSE, port=55888) | ||
h2o.no_progress() | ||
|
||
data_name = Sys.getenv("SRC_GRP_LOCAL") | ||
src_grp = file.path("data", paste(data_name, "csv", sep=".")) | ||
cat(sprintf("loading dataset %s\n", data_name)) | ||
|
||
x = h2o.importFile(src_grp, col.types=c("string","string","string","int","int","int","int","int","real")) | ||
# using string due to ERROR caused by water.parser.ParseDataset$H2OParseException: Exceeded categorical limit on column #3 (using 1-based indexing). Consider reparsing this column as a string. | ||
# https://0xdata.atlassian.net/browse/PUBDEV-7533 | ||
print(nrow(x)) | ||
|
||
task_init = proc.time()[["elapsed"]] | ||
cat("grouping...\n") | ||
|
||
question = "sum v1 by id1" # q1 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id1", sum("v1")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id1", sum("v1")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "sum v1 by id1:id2" # q2 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id1","id2"), sum("v1")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id1","id2"), sum("v1")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "sum v1 mean v3 by id3" # q3 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id3", sum("v1"), mean("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]]), v3=sum(ans[["mean_v3"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id3", sum("v1"), mean("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]]), v3=sum(ans[["mean_v3"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "mean v1:v3 by id4" # q4 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id4", mean("v1"), mean("v2"), mean("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["mean_v1"]]), v2=sum(ans[["mean_v2"]]), v3=sum(ans[["mean_v3"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id4", mean("v1"), mean("v2"), mean("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["mean_v1"]]), v2=sum(ans[["mean_v2"]]), v3=sum(ans[["mean_v3"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "sum v1:v3 by id6" # q5 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id6", sum("v1"), sum("v2"), sum("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]]), v2=sum(ans[["sum_v2"]]), v3=sum(ans[["sum_v3"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by="id6", sum("v1"), sum("v2"), sum("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(v1=sum(ans[["sum_v1"]]), v2=sum(ans[["sum_v2"]]), v3=sum(ans[["sum_v3"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "median v3 sd v3 by id4 id5" # q6 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id4","id5"), median("v3"), sd("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["median_v3"]]), sum(ans[["sd_v3"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id4","id5"), median("v3"), sd("v3")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["median_v3"]]), sum(ans[["sd_v3"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "max v1 - min v2 by id3" # q7 | ||
|
||
|
||
question = "largest two v3 by id6" # q8 | ||
|
||
|
||
question = "regression v1 v2 by id2 id4" # q9 | ||
|
||
|
||
question = "sum v3 count by id1:id6" # q10 | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id1","id2","id3","id4","id5","id6"), sum("v3"), nrow("id1","id2","id3","id4","id5","id6")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["sum_v3"]]), sum(ans[["nrow"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.group_by(x, by=c("id1","id2","id3","id4","id5","id6"), sum("v3"), nrow("id1","id2","id3","id4","id5","id6")))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["sum_v3"]]), sum(ans[["nrow"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
h2o.removeAll() | ||
|
||
cat(sprintf("grouping finished, took %.0fs\n", proc.time()[["elapsed"]]-task_init)) | ||
|
||
if (!interactive()) q("no", status=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
h2o_start() { | ||
((!$#)) && echo "h2o_start require h2o instance name as a parameter" >&2 && return 1 | ||
echo '# h2o_start: starting h2o instance' | ||
nohup java -Xmx100G -Xms100G -cp ./h2o/r-h2o/h2o/java/h2o.jar water.H2OApp -name "$1" -baseport 55888 > ./h2o/log/$1.out 2> ./h2o/log/$1.err < /dev/null & | ||
sleep 10 | ||
} | ||
h2o_stop() { | ||
echo '# h2o_stop: stopping h2o instance' | ||
pidof java > /dev/null 2>&1 && killall -2 java > /dev/null 2>&1 | ||
sleep 2 && pidof java > /dev/null 2>&1 && sleep 15 | ||
pidof java > /dev/null 2>&1 && killall -15 java > /dev/null 2>&1 | ||
sleep 2 && pidof java > /dev/null 2>&1 && sleep 30 | ||
pidof java > /dev/null 2>&1 && killall -9 java > /dev/null 2>&1 | ||
sleep 2 && pidof java > /dev/null 2>&1 && sleep 60 && pidof java > /dev/null 2>&1 && echo "h2o instance could not be stopped" >&2 && return 1 | ||
return 0 | ||
} | ||
h2o_active() { | ||
pidof java > /dev/null 2>&1 && curl -X GET "localhost:55888/3/About" -H "accept: application/json" > /dev/null 2>&1 | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
# upgrade to latest stable from h2o repo | ||
echo 'upgrading h2o...' | ||
Rscript -e 'ap=available.packages(repos="http://h2o-release.s3.amazonaws.com/h2o/latest_stable_R", method="curl"); if (ap["h2o","Version"]!=packageVersion("h2o", lib.loc="./h2o/r-h2o")) update.packages(lib.loc="./h2o/r-h2o", repos="http://h2o-release.s3.amazonaws.com/h2o/latest_stable_R", method="curl", ask=FALSE, checkBuilt=TRUE, quiet=TRUE)' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
cat("# join-h2o.R\n") | ||
|
||
source("./_helpers/helpers.R") | ||
|
||
suppressPackageStartupMessages(library("h2o", lib.loc="./h2o/r-h2o", warn.conflicts=FALSE, quietly=TRUE)) | ||
ver = packageVersion("h2o") | ||
git = "" | ||
task = "join" | ||
solution = "h2o" | ||
fun = "h2o.merge" | ||
cache = TRUE | ||
on_disk = FALSE | ||
|
||
h = h2o.init(startH2O=FALSE, port=55888) | ||
h2o.no_progress() | ||
|
||
data_name = Sys.getenv("SRC_JN_LOCAL") | ||
src_jn_x = file.path("data", paste(data_name, "csv", sep=".")) | ||
y_data_name = join_to_tbls(data_name) | ||
src_jn_y = setNames(file.path("data", paste(y_data_name, "csv", sep=".")), names(y_data_name)) | ||
stopifnot(length(src_jn_y)==3L) | ||
cat(sprintf("loading datasets %s\n", paste(c(data_name, y_data_name), collapse=", "))) | ||
|
||
x = h2o.importFile(src_jn_x, col.types=c("int","int","int","enum","enum","string","real")) | ||
print(nrow(x)) | ||
small = h2o.importFile(src_jn_y[1L], col.types=c("int","enum","real")) | ||
medium = h2o.importFile(src_jn_y[2L], col.types=c("int","int","enum","enum","real")) | ||
big = h2o.importFile(src_jn_y[3L], col.types=c("int","int","int","enum","enum","string","real")) | ||
sapply(sapply(list(small, medium, big), nrow), print) -> nul | ||
|
||
task_init = proc.time()[["elapsed"]] | ||
cat("joining...\n") | ||
|
||
question = "small inner on int" # q1 | ||
|
||
t = system.time(print(dim(ans<-h2o.merge(x, small, by="id1"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.merge(x, small, by="id1"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "medium inner on int" # q2 | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id2"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id2"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "medium outer on int" # q3 | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id2", all.x=TRUE))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]], na.rm=TRUE)))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id2", all.x=TRUE))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]], na.rm=TRUE)))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "medium inner on factor" # q4 | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id5"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.merge(x, medium, by="id5"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
question = "big inner on int" # q5 | ||
t = system.time(print(dim(ans<-h2o.merge(x, big, by="id3"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=1L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
h2o.rm(ans) | ||
t = system.time(print(dim(ans<-h2o.merge(x, big, by="id3"))))[["elapsed"]] | ||
m = memory_usage() | ||
chkt = system.time(chk<-list(sum(ans[["v1"]]), sum(ans[["v2"]])))[["elapsed"]] | ||
write.log(run=2L, task=task, data=data_name, in_rows=nrow(x), question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) | ||
print(head(ans, 3)) | ||
print(tail(ans, 3)) | ||
h2o.rm(ans) | ||
|
||
h2o.removeAll() | ||
|
||
cat(sprintf("joining finished, took %.0fs\n", proc.time()[["elapsed"]]-task_init)) | ||
|
||
if (!interactive()) q("no", status=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
mkdir -p ./h2o/log | ||
# install h2o | ||
mkdir -p ./h2o/r-h2o | ||
Rscript -e 'install.packages(c("RCurl","jsonlite"), repos="https://cloud.r-project.org", lib="./h2o/r-h2o"); install.packages("h2o", repos="http://h2o-release.s3.amazonaws.com/h2o/latest_stable_R", method="curl", lib="./h2o/r-h2o")' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
Rscript -e 'v=read.dcf(system.file(package="h2o", lib.loc="./h2o/r-h2o", "DESCRIPTION"), fields=c("Version","Revision")); cnafill=function(x) {x=c(x); x[is.na(x)]=""; x}; fw=function(f, v) writeLines(v, file.path("h2o", f)); invisible(mapply(fw, toupper(colnames(v)), cnafill(v)))' |
Oops, something went wrong.