Skip to content

Commit

Permalink
Rename doc folder, change cpp names to match executable name, update …
Browse files Browse the repository at this point in the history
…program option docs, add top-level docs to programs, add replay data documentation.
  • Loading branch information
5had3z committed May 30, 2024
1 parent 650eea8 commit be069e5
Show file tree
Hide file tree
Showing 19 changed files with 228 additions and 130 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@ name: Docs
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
steps:
- name: Requirements
run: sudo apt-get update
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ if(SC2_CONVERTER)
target_link_options(replay_converter PUBLIC -fsanitize=address,leak,undefined)
endif()

add_executable(sc2_converter bin/replay_converter.cpp)
add_executable(sc2_converter bin/sc2_converter.cpp)
target_link_libraries(
sc2_converter
PUBLIC replay_converter
Expand All @@ -143,7 +143,7 @@ if(SC2_CONVERTER)
target_link_directories(sc2_converter PUBLIC ${Python3_INCLUDE_DIRS}\\..\\libs)
endif()

add_executable(sc2_merger bin/merge.cpp src/database.cpp)
add_executable(sc2_merger bin/sc2_merger.cpp src/database.cpp)
target_compile_features(sc2_merger PUBLIC cxx_std_23)
target_include_directories(sc2_merger PUBLIC include)
target_link_libraries(
Expand Down
14 changes: 13 additions & 1 deletion bin/experiment.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
/**
* @file experiment.cpp
* @author Bryce Ferenczi
* @brief Small program to play around and experiment with the StarCraft II Client API.
* @version 0.1
* @date 2024-05-30
*
* @copyright Copyright (c) 2024
*
*/

#include "generated_info.hpp"

#include <cxxopts.hpp>
Expand Down Expand Up @@ -226,7 +237,8 @@ class Observer : public sc2::ReplayObserver

int main(int argc, char *argv[])
{
cxxopts::Options cliopts("SC2 Replay", "Run a folder of replays and see if it works");
cxxopts::Options cliopts(
"SC2 Replay Observing Experiment", "Program and code used to experiment with sc2client-api interface.");
// clang-format off
cliopts.add_options()
("r,replays", "path to folder of replays", cxxopts::value<std::string>())
Expand Down
12 changes: 12 additions & 0 deletions bin/fix_lut.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
/**
* @file fix_lut.cpp
* @author Bryce Ferenczi
* @brief The .SC2Replay index/look-up-table originally used std::streampos to calculate the the beginning of each
* replay. Linux and Windows actually have different implementations for std::streampos and are not compatible.
* Therefore the LUT needed to be converted to just a standard integer offset, which is what this program does.
* @version 0.1
* @date 2024-05-30
*
* @copyright Copyright (c) 2024
*
*/
#include <cxxopts.hpp>
#include <spdlog/fmt/fmt.h>

Expand Down
14 changes: 14 additions & 0 deletions bin/format_compare.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
/**
* @file format_compare.cpp
* @author Bryce Ferenczi
* @brief This program is used for analysing and benchmarking file sizes. One function writes each of the individual
* data components of a SC2 replay to separate files so we can see the contribution of units, minimaps, scores etc.
* Another function this preforms is to write the unit data in various forms. From this we can check which
* rearrangement of the unit data structure reults in the smallest file. This is also timed so we can check for
* associated computational overhead.
* @version 0.1
* @date 2024-05-30
*
* @copyright Copyright (c) 2024
*
*/
// External
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/zlib.hpp>
Expand Down
28 changes: 24 additions & 4 deletions bin/format_converter.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
/**
* @file format_converter.cpp
* @author Bryce Ferenczi
* @brief This program converts one form of serialized SC2 data to another. This was originally used from some V1 to V2
* type data structure (V1 is deleted and removed from the repo). This has been kept so it can be reused if necessary
* without rewriting from scratch.
* @version 0.1
* @date 2024-05-30
*
* @copyright Copyright (c) 2024
*
*/
#include <cxxopts.hpp>
#include <spdlog/fmt/fmt.h>

Expand All @@ -13,6 +25,14 @@ namespace fs = std::filesystem;
using SrcFormat = cvt::ReplayDataSoA;
using DstFormat = cvt::ReplayDataSoANoUnits;

/**
* @brief Reads hash,steps pair from text file.
* @note This was previously required when the number of steps wasn't recorded in the original SC2Replay format. Number
* of steps in replay was gathered by another program and written to a plain-text file for this program to consume and
* add for the new format.
* @param path Path to hash-step file.
* @return Mapping from replay hash to steps in replay.
*/
[[nodiscard]] auto read_hash_steps_file(const fs::path &path) noexcept -> std::unordered_map<std::string, std::uint32_t>
{
std::unordered_map<std::string, std::uint32_t> hash_steps;
Expand Down Expand Up @@ -41,7 +61,7 @@ int main(int argc, char *argv[])
cliParser.add_options()
("i,input", "Source database to convert from", cxxopts::value<std::string>())
("o,output", "Destination database, if folder then use source filename", cxxopts::value<std::string>())
("steps-file", "Contains hash-gamestep pairs", cxxopts::value<std::string>())
// ("steps-file", "Contains hash-gamestep pairs", cxxopts::value<std::string>())
("offset", "Offset to apply to partition index", cxxopts::value<int>())
("h,help", "This help");
// clang-format on
Expand Down Expand Up @@ -86,9 +106,9 @@ int main(int argc, char *argv[])
}
cvt::ReplayDatabase<DstFormat> dest(destPath);

const fs::path hashStepFile = cliOpts["steps-file"].as<std::string>();
if (!fs::exists(hashStepFile)) { fmt::print("ERROR: Hash-Step file doesn't exist: {}\n", hashStepFile.string()); }
const auto hash_steps = read_hash_steps_file(hashStepFile);
// const fs::path hashStepFile = cliOpts["steps-file"].as<std::string>();
// if (!fs::exists(hashStepFile)) { fmt::print("ERROR: Hash-Step file doesn't exist: {}\n", hashStepFile.string());
// } const auto hash_steps = read_hash_steps_file(hashStepFile);

auto already_converted = dest.getHashes();
const auto print_modulo = source.size() / 10;
Expand Down
22 changes: 11 additions & 11 deletions bin/replay_converter.cpp → bin/sc2_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,18 +370,18 @@ auto main(int argc, char *argv[]) -> int
"SC2 Replay Converter", "Convert SC2 Replays into a database which can be sampled for machine learning");
// clang-format off
cliParser.add_options()
("r,replays", "path to folder of replays or replay file", cxxopts::value<std::string>())
("p,partition", "partition file to select a subset of replays from the folder", cxxopts::value<std::string>())
("o,output", "output filename for replay database", cxxopts::value<std::string>())
("c,converter", "type of converter to use [action|full|strided]", cxxopts::value<std::string>())
("s,stride", "stride for the strided converter", cxxopts::value<std::size_t>())
("save-actions", "strided converter will also save timestep with action", cxxopts::value<bool>())
("g,game", "path to game executable", cxxopts::value<std::string>())
("b,badfile", "file that contains a known set of bad replays", cxxopts::value<std::string>())
("r,replays", "Path to folder of replays or replay file.", cxxopts::value<std::string>())
("p,partition", "Partition file to select a subset of replays a folder.", cxxopts::value<std::string>())
("o,output", "Output filepath for replay database.", cxxopts::value<std::string>())
("c,converter", "Type of converter to use [action|full|strided].", cxxopts::value<std::string>())
("s,stride", "Stride for the strided converter (in game steps).", cxxopts::value<std::size_t>())
("save-actions", "Strided converter will also save steps with player actions.", cxxopts::value<bool>())
("g,game", "Path to 'Versions' folder of the SC2 game.", cxxopts::value<std::string>())
("b,badfile", "File to record a known set of bad replays (to skip).", cxxopts::value<std::string>())
("offset", "Offset to apply to partition index", cxxopts::value<int>())
("port", "port for serving the game", cxxopts::value<int>()->default_value("9168"))
("perflog", "log to file the time taken to convert a file", cxxopts::value<std::string>())
("h,help", "This help");
("port", "Port for communication with SC2.", cxxopts::value<int>()->default_value("9168"))
("perflog", "Path to log time taken for replay observation.", cxxopts::value<std::string>())
("h,help", "Show this help.");
// clang-format on
const auto cliOpts = cliParser.parse(argc, argv);

Expand Down
5 changes: 4 additions & 1 deletion bin/merge.cpp → bin/sc2_merger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ int main(int argc, char *argv[])
if (strat == Stratergy::Merge) { knownHashes = replayDb.getHashes(); }

bool ok = true;
if (cliOpts["folder"].count()) {
if (cliOpts["folder"].count() && cliOpts["file"].count()) {
SPDLOG_ERROR("--file or --folder should be specified exclusively");
return -1;
} else if (cliOpts["folder"].count()) {
fs::path partsFolder = cliOpts["folder"].as<std::string>();
if (!fs::exists(partsFolder)) {
SPDLOG_ERROR("--folder doesn't exist: {}", partsFolder.string());
Expand Down
10 changes: 0 additions & 10 deletions docs_sphinx/api/index.rst

This file was deleted.

6 changes: 3 additions & 3 deletions docs_sphinx/api/benchmark.rst → docs_sphinx/benchmark.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
.. _benchmark:

Benchmark
=========
Benchmarks
==========

Size and speed benchmarks are completed with a small but representative sample of 20 replays from the 4.9.2 pool.
Size and speed benchmarks are completed with a small, but representative, sample of 20 replays from the 4.9.2 pack.

- 2a5591989f900e6aa8f91803abada291755638c9292be9d1360bed0003ff4eb6
- e2d117e1a78f03f8ce00d7bc898977966486ecc53b441ebc017bdb8eee481aef
Expand Down
File renamed without changes.
12 changes: 12 additions & 0 deletions docs_sphinx/cpp_api/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.. _api1:

C++ API
=======

Automatically generated documentation from the inline doxygen comments.

.. toctree::
:maxdepth: 2
:glob:

*
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 3 additions & 1 deletion docs_sphinx/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ Table of Contents
:maxdepth: 2

self
api/index
cpp_api/index
replay_data
benchmark
39 changes: 39 additions & 0 deletions docs_sphinx/replay_data.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.. _api_data:

Replay Data
===========

Pre-Converted Tournament Data
-----------------------------

Pre-serialized tournament data is available to `download <https://bridges.monash.edu/articles/dataset/Tournament_Starcraft_II/25865566>`_. Each replay database file is named after the tournament it was gathered from. The associated SQLite database (gamedata.db) which contains metadata on each of the replays is also included in this pack. This data was gathered with the ``Action`` converter, i.e. replay observations are only recorded on player actions, which is the same method as `AlphaStar-Unplugged <https://github.com/google-deepmind/alphastar/blob/main/alphastar/unplugged/data/README.md>`_. Another tournament dataset created (and not currently posted online) used ``Strided+Action`` with a stride of ~1sec (IIRC?). This increased the overall size of the dataset to ~90GB, rather than ``Action`` only ~55.5GB.


Downloading Replay Data
-----------------------

Blizzard Replay Packs
^^^^^^^^^^^^^^^^^^^^^

Blizzard have replay packs available grouped by game version played on. These replay packs can be downloaded using their `download_replays.py` script which can be found `here <https://github.com/Blizzard/s2client-proto/tree/master/samples/replay-api>`_. The game version associated with each replay pack to actually run the replays can also be downloaded from `here <https://github.com/Blizzard/s2client-proto#downloads>`_.

Tournament Replay Packs
^^^^^^^^^^^^^^^^^^^^^^^

Tournament replay packs are gathered and provided by a `community website <https://lotv.spawningtool.com/replaypacks>`_ which is regularly updated. Unfortunately Blizzard have not released headless linux versions of StarCraft II since 4.10 (at the time of writing). The newer tournament replays must be played with the windows client. ``sc2-serializer`` is compatible with being compiled and run on windows. We include a script to launch many copies of StarCraftII using a unique port for communication to enable processing of many tournament replays in parallel on a windows machine.

A particular problem with tournament replays is that many of the game sub-versions and maps are unique and need to be downloaded. Blizzard's CLI replay client is unable to download this data automatically, hence each replay that doesn't work due to missing data needs to be individually opened by a user with the normal game client to initiate the download process. I assuming posting the client data is prohibited by some non-distribution eula, or else I would post this to save someone else many hours of this dull task. One key tip to check if data is missing and has to be downloaded is that the minimap preview in-game is displayed when the data is accounted for, and not present when missing. So skip over games if the minimap preview is there, and manually open games when it is not. After the game begins, exit the game and repeat. Then the conversion process can be run, mostly uninterrupted. There are some games when the client freezes at the same point in the replay. This usually cannot be fixed by restarting the replay, the replay is just not convertable for unknown reasons.


Converting Replays
------------------

Once you have acquired replays to serialize, ``sc2_converter`` is used to re-run the replays and record the observations to a new ``.SC2Replays`` file. ``sc2_converter`` includes a ``-h/--help`` flag to print out all the current options available for the conversion process. An example of running this program is below.

.. code::
./sc2_converter --replays=/folder/of/replays --output=/path/output.SC2Replays --game=/SC2/492/Versions --converter=strided --save-actions --stride=22
Several instances of this program can be run in parallel. Each instance needs a unique ``--port`` for communication between the client (observer) and server (SC2), unless they're running in isolated docker containers. The database interface isn't inter-process safe or anything, hence each instance should be writing to their own output file. If you want to merge the results together, ``sc2_merger`` can be used.

Originally there was some poor choices in the ``.SC2Replays`` format and structure, so other programs like ``fix_lut`` and ``format_converter`` were used to fix this (instead of running conversion from scratch). These aren't really used at this point, and are mostly historical, maybe used as a foundation for new things.
15 changes: 7 additions & 8 deletions include/replay_parsing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,7 @@ template<typename ReplayDataType> class ReplayParser
}

static_assert((HasScalarData<step_data_t> || HasMinimapData<step_data_t> || HasUnitData<step_data_t>
|| HasActionData<decltype(replayData_.data)>)
&& "At least one data type should be present");
|| HasActionData<decltype(replayData_.data)>)&&"At least one data type should be present");

return result;
}
Expand Down Expand Up @@ -506,10 +505,10 @@ template<typename ReplayDataType> class ReplayParser
*/
template<typename T, std::output_iterator<T> It>
requires std::is_arithmetic_v<T>
[[maybe_unused]] auto unpackBoolImage(const Image<bool> &img, It out) -> It
[[maybe_unused]] auto unpackBoolImage(const Image<bool> &image, It out) -> It
{
for (std::size_t i = 0; i < img.size(); ++i) {
const auto bitset = std::bitset<8>(std::to_integer<uint8_t>(img._data[i]));
for (std::size_t i = 0; i < image.size(); ++i) {
const auto bitset = std::bitset<8>(std::to_integer<uint8_t>(image._data[i]));
#pragma unroll
for (int j = 7; j > -1; --j) { *out++ = static_cast<T>(bitset[j]); }
}
Expand All @@ -524,10 +523,10 @@ template<typename T, std::output_iterator<T> It>
*/
template<typename T>
requires std::is_arithmetic_v<T>
auto unpackBoolImage(const Image<bool> &img) noexcept -> std::vector<T>
auto unpackBoolImage(const Image<bool> &image) noexcept -> std::vector<T>
{
std::vector<T> unpacked_data(img.nelem(), 0);
unpackBoolImage(img, unpacked_data.begin());
std::vector<T> unpacked_data(image.nelem(), 0);
unpackBoolImage(image, unpacked_data.begin());
return unpacked_data;
}

Expand Down
Loading

0 comments on commit be069e5

Please sign in to comment.