Skip to content

Commit

Permalink
Update corpora compressed size after re-compression with pbzip2 (elas…
Browse files Browse the repository at this point in the history
…tic#109)

Update compressed-bytes for all corpora after re-compressing them using
`pbzip2 -9 -v -k -m10000`. Together with elastic/rally#947
this allows for much faster decompression utilizing all available CPU cores.
  • Loading branch information
dliappis committed Apr 14, 2021
1 parent 490b7a2 commit e9286ea
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 24 deletions.
2 changes: 1 addition & 1 deletion eventdata/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "eventdata.json.bz2",
"document-count": 20000000,
"compressed-bytes": 791796014,
"compressed-bytes": 792768300,
"uncompressed-bytes": 16437108429
}
]
Expand Down
2 changes: 1 addition & 1 deletion geopoint/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 60844404,
"compressed-bytes": 505295401,
"compressed-bytes": 505542241,
"uncompressed-bytes": 2448564579
}
]
Expand Down
28 changes: 14 additions & 14 deletions http_logs/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,49 +52,49 @@
"target-index": "logs-181998",
"source-file": "documents-181998.unparsed.json.bz2",
"document-count": 2708746,
"compressed-bytes": 13064317,
"compressed-bytes": 13088137,
"uncompressed-bytes": 303920342
},
{
"target-index": "logs-191998",
"source-file": "documents-191998.unparsed.json.bz2",
"document-count": 9697882,
"compressed-bytes": 47211781,
"compressed-bytes": 47290776,
"uncompressed-bytes": 1088378738
},
{
"target-index": "logs-201998",
"source-file": "documents-201998.unparsed.json.bz2",
"document-count": 13053463,
"compressed-bytes": 63174979,
"compressed-bytes": 63278452,
"uncompressed-bytes": 1456836090
},
{
"target-index": "logs-211998",
"source-file": "documents-211998.unparsed.json.bz2",
"document-count": 17647279,
"compressed-bytes": 85607179,
"compressed-bytes": 85739523,
"uncompressed-bytes": 1975990671
},
{
"target-index": "logs-221998",
"source-file": "documents-221998.unparsed.json.bz2",
"document-count": 10716760,
"compressed-bytes": 53190976,
"compressed-bytes": 53264421,
"uncompressed-bytes": 1202551382
},
{
"target-index": "logs-231998",
"source-file": "documents-231998.unparsed.json.bz2",
"document-count": 11961342,
"compressed-bytes": 60705435,
"compressed-bytes": 60795929,
"uncompressed-bytes": 1334381144
},
{
"target-index": "logs-241998",
"source-file": "documents-241998.unparsed.json.bz2",
"document-count": 181463624,
"compressed-bytes": 897719968,
"compressed-bytes": 899190175,
"uncompressed-bytes": 20563705716
}
]
Expand All @@ -109,49 +109,49 @@
"target-index": "logs-181998",
"source-file": "documents-181998.json.bz2",
"document-count": 2708746,
"compressed-bytes": 13815456,
"compressed-bytes": 13843641,
"uncompressed-bytes": 363512754
},
{
"target-index": "logs-191998",
"source-file": "documents-191998.json.bz2",
"document-count": 9697882,
"compressed-bytes": 49439633,
"compressed-bytes": 49546887,
"uncompressed-bytes": 1301732149
},
{
"target-index": "logs-201998",
"source-file": "documents-201998.json.bz2",
"document-count": 13053463,
"compressed-bytes": 65623436,
"compressed-bytes": 65759419,
"uncompressed-bytes": 1744012279
},
{
"target-index": "logs-211998",
"source-file": "documents-211998.json.bz2",
"document-count": 17647279,
"compressed-bytes": 88258230,
"compressed-bytes": 88445049,
"uncompressed-bytes": 2364230815
},
{
"target-index": "logs-221998",
"source-file": "documents-221998.json.bz2",
"document-count": 10716760,
"compressed-bytes": 54160603,
"compressed-bytes": 54274027,
"uncompressed-bytes": 1438320123
},
{
"target-index": "logs-231998",
"source-file": "documents-231998.json.bz2",
"document-count": 11961342,
"compressed-bytes": 60927822,
"compressed-bytes": 61043842,
"uncompressed-bytes": 1597530673
},
{
"target-index": "logs-241998",
"source-file": "documents-241998.json.bz2",
"document-count": 181463624,
"compressed-bytes": 905378242,
"compressed-bytes": 907295259,
"uncompressed-bytes": 24555905444
}
]
Expand Down
2 changes: 1 addition & 1 deletion nested/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 11203029,
"compressed-bytes": 695293381,
"compressed-bytes": 695550727,
"uncompressed-bytes": 3637747670
}
]
Expand Down
2 changes: 1 addition & 1 deletion noaa/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 33659481,
"compressed-bytes": 993302204,
"compressed-bytes": 995480468,
"uncompressed-bytes": 9684262698
}
]
Expand Down
2 changes: 1 addition & 1 deletion nyc_taxis/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"source-file": "documents.json.bz2",
"#COMMENT": "ML benchmark rely on the fact that the document count stays constant.",
"document-count": 165346692,
"compressed-bytes": 4812721501,
"compressed-bytes": 4820107188,
"uncompressed-bytes": 79802445255
}
]
Expand Down
5 changes: 2 additions & 3 deletions percolator/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
{
"source-file": "queries-2.json.bz2",
"document-count": 2000000,
"compressed-bytes": 105192,
"uncompressed-bytes": 110039748,
"compressed-bytes": 124009,
"uncompressed-bytes": 110039748
"target-index": "queries",
"target-type": "percolator"
}
Expand All @@ -33,4 +33,3 @@
{{ rally.collect(parts="challenges/*.json") }}
]
}

2 changes: 1 addition & 1 deletion pmc/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 574199,
"compressed-bytes": 5928712141,
"compressed-bytes": 5931724449,
"uncompressed-bytes": 23256051757
}
]
Expand Down
2 changes: 1 addition & 1 deletion so/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"source-file": "posts.json.bz2",
"document-count": 36062278,
"compressed-bytes": 9599137228,
"compressed-bytes": 9600716233,
"uncompressed-bytes": 35564808298
}
]
Expand Down

0 comments on commit e9286ea

Please sign in to comment.