Skip to content

Commit

Permalink
Update corpora compressed size after re-compression with pbzip2
Browse files Browse the repository at this point in the history
Update compressed-bytes for all corpora after re-compressing them using
`pbzip2 -9 -v -k -m10000`. Together with elastic/rally#947
this allows for much faster decompression utilizing all available CPU cores.
  • Loading branch information
dliappis committed Apr 3, 2020
1 parent fc85b9d commit d6f2646
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 28 deletions.
2 changes: 1 addition & 1 deletion eventdata/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "eventdata.json.bz2",
"document-count": 20000000,
"compressed-bytes": 791796014,
"compressed-bytes": 792768300,
"uncompressed-bytes": 16437108429
}
]
Expand Down
2 changes: 1 addition & 1 deletion geopoint/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 60844404,
"compressed-bytes": 505295401,
"compressed-bytes": 505542241,
"uncompressed-bytes": 2448564579
}
]
Expand Down
2 changes: 1 addition & 1 deletion geopointshape/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 60844404,
"compressed-bytes": 493367095,
"compressed-bytes": 493689712,
"uncompressed-bytes": 2780550484
}
]
Expand Down
6 changes: 3 additions & 3 deletions geoshape/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
{
"source-file": "linestrings.json.bz2",
"document-count": 20532036,
"compressed-bytes": 3697293598,
"compressed-bytes": 3698508764,
"uncompressed-bytes": 12592499821
}
]
Expand All @@ -39,7 +39,7 @@
{
"source-file": "multilinestrings.json.bz2",
"document-count": 532036,
"compressed-bytes": 1816588880,
"compressed-bytes": 1817213095,
"uncompressed-bytes": 5992834062
}
]
Expand All @@ -52,7 +52,7 @@
{
"source-file": "polygons.json.bz2",
"document-count": 39459211,
"compressed-bytes": 8835370788,
"compressed-bytes": 8837117359,
"uncompressed-bytes": 30178820325
}
]
Expand Down
28 changes: 14 additions & 14 deletions http_logs/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,49 +48,49 @@
"target-index": "logs-181998",
"source-file": "documents-181998.unparsed.json.bz2",
"document-count": 2708746,
"compressed-bytes": 13064317,
"compressed-bytes": 13088137,
"uncompressed-bytes": 303920342
},
{
"target-index": "logs-191998",
"source-file": "documents-191998.unparsed.json.bz2",
"document-count": 9697882,
"compressed-bytes": 47211781,
"compressed-bytes": 47290776,
"uncompressed-bytes": 1088378738
},
{
"target-index": "logs-201998",
"source-file": "documents-201998.unparsed.json.bz2",
"document-count": 13053463,
"compressed-bytes": 63174979,
"compressed-bytes": 63278452,
"uncompressed-bytes": 1456836090
},
{
"target-index": "logs-211998",
"source-file": "documents-211998.unparsed.json.bz2",
"document-count": 17647279,
"compressed-bytes": 85607179,
"compressed-bytes": 85739523,
"uncompressed-bytes": 1975990671
},
{
"target-index": "logs-221998",
"source-file": "documents-221998.unparsed.json.bz2",
"document-count": 10716760,
"compressed-bytes": 53190976,
"compressed-bytes": 53264421,
"uncompressed-bytes": 1202551382
},
{
"target-index": "logs-231998",
"source-file": "documents-231998.unparsed.json.bz2",
"document-count": 11961342,
"compressed-bytes": 60705435,
"compressed-bytes": 60795929,
"uncompressed-bytes": 1334381144
},
{
"target-index": "logs-241998",
"source-file": "documents-241998.unparsed.json.bz2",
"document-count": 181463624,
"compressed-bytes": 897719968,
"compressed-bytes": 899190175,
"uncompressed-bytes": 20563705716
}
]
Expand All @@ -104,49 +104,49 @@
"target-index": "logs-181998",
"source-file": "documents-181998.json.bz2",
"document-count": 2708746,
"compressed-bytes": 13815456,
"compressed-bytes": 13843641,
"uncompressed-bytes": 363512754
},
{
"target-index": "logs-191998",
"source-file": "documents-191998.json.bz2",
"document-count": 9697882,
"compressed-bytes": 49439633,
"compressed-bytes": 49546887,
"uncompressed-bytes": 1301732149
},
{
"target-index": "logs-201998",
"source-file": "documents-201998.json.bz2",
"document-count": 13053463,
"compressed-bytes": 65623436,
"compressed-bytes": 65759419,
"uncompressed-bytes": 1744012279
},
{
"target-index": "logs-211998",
"source-file": "documents-211998.json.bz2",
"document-count": 17647279,
"compressed-bytes": 88258230,
"compressed-bytes": 88445049,
"uncompressed-bytes": 2364230815
},
{
"target-index": "logs-221998",
"source-file": "documents-221998.json.bz2",
"document-count": 10716760,
"compressed-bytes": 54160603,
"compressed-bytes": 54274027,
"uncompressed-bytes": 1438320123
},
{
"target-index": "logs-231998",
"source-file": "documents-231998.json.bz2",
"document-count": 11961342,
"compressed-bytes": 60927822,
"compressed-bytes": 61043842,
"uncompressed-bytes": 1597530673
},
{
"target-index": "logs-241998",
"source-file": "documents-241998.json.bz2",
"document-count": 181463624,
"compressed-bytes": 905378242,
"compressed-bytes": 907295259,
"uncompressed-bytes": 24555905444
}
]
Expand Down
4 changes: 2 additions & 2 deletions metricbeat/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
{
"source-file": "documents.json.bz2",
"document-count": 1079600,
"compressed-bytes":91887122,
"uncompressed-bytes":1249705758
"compressed-bytes": 91964149,
"uncompressed-bytes": 1249705758
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion nested/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 11203029,
"compressed-bytes": 695293381,
"compressed-bytes": 695550727,
"uncompressed-bytes": 3637747670
}
]
Expand Down
2 changes: 1 addition & 1 deletion noaa/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 33659481,
"compressed-bytes": 993302204,
"compressed-bytes": 995480468,
"uncompressed-bytes": 9684262698
}
]
Expand Down
2 changes: 1 addition & 1 deletion nyc_taxis/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"source-file": "documents.json.bz2",
"#COMMENT": "ML benchmark rely on the fact that the document count stays constant.",
"document-count": 165346692,
"compressed-bytes": 4812721501,
"compressed-bytes": 4820107188,
"uncompressed-bytes": 79802445255
}
]
Expand Down
2 changes: 1 addition & 1 deletion percolator/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "queries-2.json.bz2",
"document-count": 2000000,
"compressed-bytes": 105192,
"compressed-bytes": 124009,
"uncompressed-bytes": 110039748
}
]
Expand Down
2 changes: 1 addition & 1 deletion pmc/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "documents.json.bz2",
"document-count": 574199,
"compressed-bytes": 5928712141,
"compressed-bytes": 5931724449,
"uncompressed-bytes": 23256051757
}
]
Expand Down
2 changes: 1 addition & 1 deletion so/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"source-file": "posts.json.bz2",
"document-count": 36062278,
"compressed-bytes": 9599137228,
"compressed-bytes": 9600716233,
"uncompressed-bytes": 35564808298
}
]
Expand Down

0 comments on commit d6f2646

Please sign in to comment.