From 624ecec0ea3748c1ecc3a3a6495c6813d624eb4d Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Mon, 30 Oct 2023 16:11:57 -0400 Subject: [PATCH 1/4] Fixup compute_noop.py numpy example (#402) --- examples/compute_noop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/compute_noop.py b/examples/compute_noop.py index 9da92775..dd68e9fe 100644 --- a/examples/compute_noop.py +++ b/examples/compute_noop.py @@ -51,9 +51,9 @@ # import numpy as np # # numpy_data = np.frombuffer(data, np.int32) -# out = compute_with_buffers({0: numpy_data}, {1: numpy_data.nbytes}, compute_shader, n=n) +# out = compute_with_buffers({0: numpy_data}, {1: numpy_data.nbytes}, shader_source, n=n) # result = np.frombuffer(out[1], dtype=np.int32) -# print(result) +# print(result.tolist()) # %% The long version using the wgpu API From 183f0c3432daa34a0d4eb151037e373a8f166b62 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Wed, 1 Nov 2023 17:12:06 +0100 Subject: [PATCH 2/4] Track object usage (#399) * Track object usage wip * doh * better sys exit * Wrap up implementation of diagnostics * add tests * last bit of test coverage * create map to determine bpp for textures * Refactor * More refactoring * tweak * Remove old print_report * Add docs * Small refactor for codegen * small tweaks * Cleaner way to handle space between totals --- docs/utils.rst | 39 +++ tests/test_diagnostics.py | 382 ++++++++++++++++++++++ wgpu/__init__.py | 5 +- wgpu/_diagnostics.py | 521 +++++++++++++++++++++++++++++++ wgpu/backends/rs.py | 84 +---- wgpu/backends/rs_ffi.py | 10 +- wgpu/backends/rs_helpers.py | 129 ++++++++ wgpu/base.py | 56 +++- wgpu/resources/codegen_report.md | 10 +- 9 files changed, 1142 insertions(+), 94 deletions(-) create mode 100644 tests/test_diagnostics.py create mode 100644 wgpu/_diagnostics.py diff --git a/docs/utils.rst b/docs/utils.rst index 71a65fa1..6ed4557d 100644 --- a/docs/utils.rst +++ b/docs/utils.rst @@ -3,6 +3,45 @@ Utils The wgpu library provides a few utilities. Note that most functions below need to be explictly imported. +Logger +------ + +Errors, warnings, and info messages (including messages generated by +wgpu-native) are logged using Python's default logging mechanics. The +wgpu logger instance is in ``wgpu.logger``, but can also be obtained +via: + +.. code-block:: py + + import logging + logger = logging.getLogger("wgpu") + + +Diagnostics +----------- + +To print a full diagnostic report: + +.. code-block:: py + + wgpu.diagnostics.print_report() + +To inspect (for example) the total buffer usage: + +.. code-block:: py + + >>> counts = wgpu.diagnostics.object_counts.get_dict() + >>> print(counts["Buffer"]) + {'count': 3, 'resource_mem': 784} + + +.. autoclass:: wgpu._diagnostics.DiagnosticsRoot + :members: + + +.. autoclass:: wgpu._diagnostics.Diagnostics + :members: + Get default device ------------------ diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py new file mode 100644 index 00000000..6cff00c2 --- /dev/null +++ b/tests/test_diagnostics.py @@ -0,0 +1,382 @@ +""" +This tests the diagnostics logic itself. It does not do a tests that *uses* the diagnostics. +""" + + +import wgpu +from wgpu import _diagnostics +from wgpu._diagnostics import ( + DiagnosticsRoot, + Diagnostics, + ObjectTracker, + dict_to_text, + int_repr, +) + +from testutils import run_tests, can_use_wgpu_lib +from pytest import mark + + +def dedent(text, n): + return "\n".join(line[n:] for line in text.split("\n")) + + +class CustomDiagnosticsRoot(DiagnosticsRoot): + def __enter__(self): + _diagnostics.diagnostics = self + return self + + def __exit__(self, *args): + _diagnostics.diagnostics = wgpu.diagnostics + + +class CustomDiagnostics(Diagnostics): + def __init__(self, name): + super().__init__(name) + self.tracker = ObjectTracker() + + def get_dict(self): + return {k: {"count": v} for k, v in self.tracker.counts.items()} + + +def test_diagnostics_meta(): + # Test that our custom class does what we expet it to do + assert isinstance(wgpu.diagnostics, DiagnosticsRoot) + assert wgpu.diagnostics is _diagnostics.diagnostics + + with CustomDiagnosticsRoot() as custom: + assert custom is _diagnostics.diagnostics + + assert wgpu.diagnostics is _diagnostics.diagnostics + + +def test_diagnostics_main(): + with CustomDiagnosticsRoot() as custom: + d1 = CustomDiagnostics("foo") + d2 = CustomDiagnostics("bar") + + assert "foo" in repr(custom) + assert "bar" in repr(custom) + assert "spam" not in repr(custom) + + assert "foo" in repr(d1) + assert "bar" in repr(d2) + + # Showing report for one topic + + d1.tracker.increase("FooBar") + + reference1 = """ + ██ foo: + + count + + FooBar: 1 + + ██ bar: + + No data + """ + + assert custom.get_report() == dedent(reference1, 12) + + # Showing report for both topics + + d1.tracker.increase("FooBar") + d2.tracker.increase("XYZ") + + reference2 = """ + ██ foo: + + count + + FooBar: 2 + + ██ bar: + + count + + XYZ: 1 + """ + + assert custom.get_report() == dedent(reference2, 12) + + # Showing report also for newly added topic + + d3 = CustomDiagnostics("spam") + assert "spam" in repr(custom) + + d3.tracker.increase("FooBar") + d3.tracker.increase("FooBar") + d3.tracker.increase("XYZ") + + reference3 = """ + ██ foo: + + count + + FooBar: 2 + + ██ bar: + + count + + XYZ: 1 + + ██ spam: + + count + + FooBar: 2 + XYZ: 1 + """ + + assert custom.get_report() == dedent(reference3, 12) + + # Can also show one + + reference4 = """ + ██ spam: + + count + + FooBar: 2 + XYZ: 1 + """ + + # Showing report also for newly added backend + assert d3.get_report() == dedent(reference4, 12) + + # The root dict is a dict that maps topics to the per-topic dicts. + # So it's a dict of dicts of dicts. + big_dict = custom.get_dict() + assert isinstance(big_dict, dict) + for key, val in big_dict.items(): + assert isinstance(val, dict) + for k, v in val.items(): + assert isinstance(v, dict) + + # These should not fail + d3.print_report() + custom.print_report() + + +def test_dict_to_text_simple(): + # Note the left justification + + d = {"foo": 123456, "bar": "hi", "spam": 4.12345678} + + reference = """ + foo: 123K + bar: hi + spam: 4.12346 + """ + assert dict_to_text(d) == dedent(reference[1:], 8) + + +def test_dict_to_text_table(): + # Note the right justification + + d = { + "foo": {"a": 1, "b": 2, "c": 3.1000000}, + "bar": {"a": 4, "b": 5, "c": 6.123456789123}, + } + + reference = """ + a b c + + foo: 1 2 3.1 + bar: 4 5 6.12346 + """ + assert dict_to_text(d) == dedent(reference[1:], 8) + + reference = """ + title b a + + foo: 2 1 + bar: 5 4 + """ + assert dict_to_text(d, ["title", "b", "a"]) == dedent(reference[1:], 8) + + +def test_dict_to_text_justification(): + # Strain the justification + + d = { + "foobarspameggs": {"aprettylongtitle": 1, "b": "cyan", "c": 3}, + "yo": {"aprettylongtitle": 4, "b": "blueberrycake", "c": 6}, + } + + reference = """ + title aprettylongtitle b c + + foobarspameggs: 1 cyan 3 + yo: 4 blueberrycake 6 + """ + + header = ["title", "aprettylongtitle", "b", "c"] + assert dict_to_text(d, header) == dedent(reference[1:], 8) + + +def test_dict_to_text_subdicts(): + # This covers the option to create sub-rows, covering one case, multiple cases, and zero cases. + + d = { + "foo": { + "a": 1, + "b": 2, + "c": {"opt1": {"d": 101, "e": 102}, "opt2": {"d": 103, "e": 104}}, + }, + "bar": {"a": 3, "b": 4, "c": {"opt2": {"d": 105, "e": 106}}}, + "spam": {"a": 5, "b": 6, "c": {}}, + "eggs": { + "a": 7, + "b": 8, + "c": { + "opt1": {"d": 111, "e": 112}, + "opt2": {"d": 113, "e": 114}, + "opt3": {"d": 115, "e": 116}, + }, + }, + } + + reference = """ + a b c d e + + foo: 1 2 opt1: 101 102 + opt2: 103 104 + bar: 3 4 opt2: 105 106 + spam: 5 6 + eggs: 7 8 opt1: 111 112 + opt2: 113 114 + opt3: 115 116 + """ + + assert dict_to_text(d) == dedent(reference[1:], 8) + + +def test_dict_to_text_mix(): + # This covers the option to create sub-rows, covering one case, multiple cases, and zero cases. + + d = { + "foo": { + "a": 1, + "b": 2, + "c": "simple", + "z": 42, + }, + "bar": {"b": 4, "c": {"opt2": {"d": 105, "e": 106}}, "a": 3}, + "spam": {"a": 5, "b": None, "c": {}}, + "eggs": { + "z": 41, + "a": 7, + "c": { + "opt1": {"d": 111, "e": 112}, + "opt2": {"d": 113, "e": 114}, + }, + }, + } + + reference = """ + a b z c d e + + foo: 1 2 42 simple + bar: 3 4 opt2: 105 106 + spam: 5 + eggs: 7 41 opt1: 111 112 + opt2: 113 114 + """ + + assert dict_to_text(d) == dedent(reference[1:], 8) + + +def test_object_tracker(): + tracker = ObjectTracker() + counts = tracker.counts + + tracker.increase("FooBar") + tracker.increase("FooBar") + tracker.increase("FooBar") + tracker.increase("SpamEggs") + tracker.increase("SpamEggs") + tracker.increase("SpamEggs") + + assert counts == {"FooBar": 3, "SpamEggs": 3} + + tracker.decrease("FooBar") + tracker.decrease("FooBar") + tracker.decrease("FooBar") + tracker.decrease("SpamEggs") + tracker.decrease("SpamEggs") + + assert counts == {"FooBar": 0, "SpamEggs": 1} + + tracker.increase("FooBar") + tracker.increase("SpamEggs") + + assert counts == {"FooBar": 1, "SpamEggs": 2} + + tracker.decrease("FooBar") + tracker.decrease("SpamEggs") + tracker.decrease("SpamEggs") + + assert counts == {"FooBar": 0, "SpamEggs": 0} + + +def test_int_repr(): + assert int_repr(0) == "0" + assert int_repr(7) == "7" + assert int_repr(912) == "912" + + assert int_repr(1_000) == "1.00K" + assert int_repr(1_234) == "1.23K" + assert int_repr(12_345) == "12.3K" + assert int_repr(123_456) == "123K" + + assert int_repr(1_000_000) == "1.00M" + assert int_repr(1_234_000) == "1.23M" + assert int_repr(12_345_000) == "12.3M" + assert int_repr(123_456_000) == "123M" + + assert int_repr(1_000_000_000) == "1.00G" + assert int_repr(1_234_000_000) == "1.23G" + assert int_repr(12_345_000_000) == "12.3G" + assert int_repr(123_456_000_000) == "123G" + + assert int_repr(-7) == "-7" + assert int_repr(-912) == "-912" + assert int_repr(-1000) == "-1.00K" + assert int_repr(-12_345) == "-12.3K" + assert int_repr(-123_456_000) == "-123M" + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_diagnostics_with_backends(): + # Just make sure that it runs without errors + + import wgpu.backends.rs + + text = wgpu.diagnostics.get_report() + + assert "Device" in text + assert "RenderPipeline" in text + assert "ShaderModule" in text + + +def test_texture_format_map_is_complete(): + # When texture formats are added, removed, or changed, we must update our + # map. This test makes sure we don't forget. + + map_keys = set(_diagnostics.texture_format_to_bpp.keys()) + enum_keys = set(wgpu.TextureFormat) + + too_much = map_keys - enum_keys + missing = enum_keys - map_keys + + assert not too_much + assert not missing + assert map_keys == enum_keys # for good measure + + +if __name__ == "__main__": + run_tests(globals()) diff --git a/wgpu/__init__.py b/wgpu/__init__.py index 400f0d24..16d659a3 100644 --- a/wgpu/__init__.py +++ b/wgpu/__init__.py @@ -3,6 +3,7 @@ """ from ._coreutils import logger # noqa: F401,F403 +from ._diagnostics import diagnostics # noqa: F401,F403 from .flags import * # noqa: F401,F403 from .enums import * # noqa: F401,F403 from .base import * # noqa: F401,F403 @@ -36,10 +37,6 @@ def _register_backend(cls): globals()["request_adapter"] = gpu.request_adapter globals()["request_adapter_async"] = gpu.request_adapter_async globals()["wgsl_language_features"] = gpu.wgsl_language_features - if hasattr(gpu, "print_report"): - globals()["print_report"] = gpu.print_report - else: - globals()["print_report"] = _base_GPU.print_report _base_GPU = GPU # noqa: F405, N816 diff --git a/wgpu/_diagnostics.py b/wgpu/_diagnostics.py new file mode 100644 index 00000000..a19c1e15 --- /dev/null +++ b/wgpu/_diagnostics.py @@ -0,0 +1,521 @@ +""" +Logic related to providing diagnostic info on wgpu. +""" + +import os +import sys +import platform + + +class DiagnosticsRoot: + """Root object to access wgpu diagnostics (i.e. ``wgpu.diagnostics``). + + Per-topic diagnostics can be accessed as attributes on this object. + These include ``system``, ``native_info``, ``versions``, + ``object_counts``, and more. + """ + + def __init__(self): + self._diagnostics_instances = {} + + def __repr__(self): + topics = ", ".join(self._diagnostics_instances.keys()) + return f"" + + def _register_diagnostics(self, name, ob): + self._diagnostics_instances[name] = ob + setattr(self, name, ob) + + def get_dict(self): + """Get a dict that represents the full diagnostics info. + + The keys are the diagnostic topics, and the values are dicts + of dicts. See e.g. ``wgpu.diagnostics.counts.get_dict()`` for + a topic-specific dict. + """ + result = {} + for name, ob in self._diagnostics_instances.items(): + result[name] = ob.get_dict() + return result + + def get_report(self): + """Get the full textual diagnostic report (as a str).""" + text = "" + for name, ob in self._diagnostics_instances.items(): + text += ob.get_report() + return text + + def print_report(self): + """Convenience method to print the full diagnostics report.""" + print(self.get_report(), end="") + + +class Diagnostics: + """Object that represents diagnostics on a specific topic. + + This is a base class that must be subclassed to provide diagnostics + on a certain topic. Instantiating the class registers it with the + root diagnostics object. + """ + + def __init__(self, name): + diagnostics._register_diagnostics(name, self) + self.name = name + self.object_counts = {} + + def __repr__(self): + return f"" + + def get_dict(self): + """Get the diagnostics for this topic, in the form of a Python dict. + + Subclasses must implement this method. The dict can be a simple + map of keys to values (str, int, float):: + + foo: 1 + bar: 2 + + If the values are dicts, the data has a table-like layout, with + the keys representing the table header:: + + count mem + + Adapter: 1 264 + Buffer: 4 704 + + Subdicts are also supported, which results in multi-row entries. + In the report, the keys of the subdicts have colons behind them:: + + count mem backend o v e el_size + + Adapter: 1 264 vulkan: 1 0 0 264 + d3d12: 1 0 0 220 + Buffer: 4 704 vulkan: 4 0 0 176 + d3d12: 0 0 0 154 + + """ + raise NotImplementedError() + + def get_subscript(self): + """Get informative text that helps interpret the report. + + Subclasses can implement this method. The text will show below the table + in the report. + """ + return "" # Optional + + def get_report(self): + """Get the textual diagnostics report for this topic.""" + text = f"\n██ {self.name}:\n\n" + text += dict_to_text(self.get_dict()) + subscript = self.get_subscript() + if subscript: + text += "\n" + subscript.rstrip() + "\n" + return text + + def print_report(self): + """Print the diagnostics report for this topic.""" + print(self.get_report(), end="") + + +class ObjectTracker: + """Little object to help track object counts.""" + + def __init__(self): + self.counts = {} + self.amounts = {} + + def increase(self, name, amount=0): + """Bump the counter.""" + self.counts[name] = self.counts.get(name, 0) + 1 + if amount: + self.amounts[name] = self.amounts.get(name, 0) + amount + + def decrease(self, name, amount=0): + """Bump the counter back.""" + self.counts[name] -= 1 + if amount: + self.amounts[name] -= amount + + +def derive_header(dct): + """Derive a table-header from the given dict.""" + + if not isinstance(dct, dict): # no-cover + raise TypeError(f"Not a dict: {dct}") + + header = [] + sub_dicts = {} + + for key, val in dct.items(): + if not isinstance(val, dict): # no-cover + raise TypeError(f"Element not a dict: {val}") + for k, v in val.items(): + if k not in header: + header.append(k) + if isinstance(v, dict): + sub_dicts[k] = v + + for k, d in sub_dicts.items(): + while k in header: + header.remove(k) + header.append(k) + sub_header = derive_header(d) + for k in sub_header[1:]: + if k not in header: + header.append(k) + + # Add header item for first column, i.e. the key / row title + header.insert(0, "") + + return header + + +def dict_to_text(d, header=None): + """Convert a dict data structure to a textual table representation.""" + + if not d: + return "No data\n" + + # Copy the dict, with simple key-value dicts being transformed into table-like dicts. + # That wat the code in derive_header() and dict_to_table() can assume the table-like + # data structure, keeping it simpler. + d2 = {} + for key, val in d.items(): + if not isinstance(val, dict): + val = {"": val} + d2[key] = val + d = d2 + + if not header: + header = derive_header(d) + + # We have a table-like-layout if any of the values in the header is non-empty + table_layout = any(header) + + # Get the table + rows = dict_to_table(d, header) + ncols = len(header) + + # Sanity check (guard assumptions about dict_to_table) + for row in rows: + assert len(row) == ncols, "dict_to_table failed" + for i in range(ncols): + assert isinstance(row[i], str), "dict_to_table failed" + + # Insert heading + if table_layout: + rows.insert(0, header.copy()) + rows.insert(1, [""] * ncols) + + # Determine what colons have values with a colon at the end + column_has_colon = [False for _ in range(ncols)] + for row in rows: + for i in range(ncols): + column_has_colon[i] |= row[i].endswith(":") + + # Align the values that don't have a colon at the end + for row in rows: + for i in range(ncols): + word = row[i] + if column_has_colon[i] and not word.endswith(":"): + row[i] = word + " " + + # Establish max lengths + max_lens = [0 for _ in range(ncols)] + for row in rows: + for i in range(ncols): + max_lens[i] = max(max_lens[i], len(row[i])) + + # Justify first column (always rjust) + for row in rows: + row[0] = row[0].rjust(max_lens[0]) + + # For the table layour we also rjust the other columns + if table_layout: + for row in rows: + for i in range(1, ncols): + row[i] = row[i].rjust(max_lens[i]) + + # Join into a consistent text + lines = [" ".join(row).rstrip() for row in rows] + text = "\n".join(lines) + return text.rstrip() + "\n" + + +def dict_to_table(d, header, header_offest=0): + """Convert a dict data structure to a table (a list of lists of strings). + The keys form the first entry of the row. Values that are dicts recurse. + """ + + ncols = len(header) + rows = [] + + for row_title, values in d.items(): + if row_title == "total" and row_title == list(d.keys())[-1]: + rows.append([""] * ncols) + row = [row_title + ":" if row_title else ""] + rows.append(row) + for i in range(header_offest + 1, len(header)): + key = header[i] + val = values.get(key, None) + if val is None: + row.append("") + elif isinstance(val, str): + row.append(val) + elif isinstance(val, int): + row.append(int_repr(val)) + elif isinstance(val, float): + row.append(f"{val:.6g}") + elif isinstance(val, dict): + subrows = dict_to_table(val, header, i) + if len(subrows) == 0: + row += [""] * (ncols - i) + else: + row += subrows[0] + extrarows = [[""] * i + subrow for subrow in subrows[1:]] + rows.extend(extrarows) + break # header items are consumed by the sub + else: # no-cover + raise TypeError(f"Unexpected table value: {val}") + + return rows + + +def int_repr(val): + """Represent an integer using K and M suffixes.""" + prefix = "-" if val < 0 else "" + val = abs(val) + if val >= 1_000_000_000: # >= 1G + s = str(val / 1_000_000_000) + suffix = "G" + elif val >= 1_000_000: # >= 1M + s = str(val / 1_000_000) + suffix = "M" + elif val >= 1_000: # >= 1K + s = str(val / 1_000) + suffix = "K" + else: + s = str(val) + suffix = "" + if "." in s: + s1, _, s2 = s.partition(".") + n_decimals = max(0, 3 - len(s1)) + s = s1 + if n_decimals: + s2 += "000" + s = s1 + "." + s2[:n_decimals] + return prefix + s + suffix + + +# Map that we need to calculate texture resource consumption. +# We need to keep this up-to-date as formats change, we have a unit test for this. +# Also see https://wgpu.rs/doc/wgpu/enum.TextureFormat.html + +texture_format_to_bpp = { + # 8 bit + "r8unorm": 8, + "r8snorm": 8, + "r8uint": 8, + "r8sint": 8, + # 16 bit + "r16uint": 16, + "r16sint": 16, + "r16float": 16, + "rg8unorm": 16, + "rg8snorm": 16, + "rg8uint": 16, + "rg8sint": 16, + # 32 bit + "r32uint": 32, + "r32sint": 32, + "r32float": 32, + "rg16uint": 32, + "rg16sint": 32, + "rg16float": 32, + "rgba8unorm": 32, + "rgba8unorm-srgb": 32, + "rgba8snorm": 32, + "rgba8uint": 32, + "rgba8sint": 32, + "bgra8unorm": 32, + "bgra8unorm-srgb": 32, + # special fits + "rgb9e5ufloat": 32, # 3*9 + 5 + "rgb10a2uint": 32, # 3*10 + 2 + "rgb10a2unorm": 32, # 3*10 + 2 + "rg11b10ufloat": 32, # 2*11 + 10 + # 64 bit + "rg32uint": 64, + "rg32sint": 64, + "rg32float": 64, + "rgba16uint": 64, + "rgba16sint": 64, + "rgba16float": 64, + # 128 bit + "rgba32uint": 128, + "rgba32sint": 128, + "rgba32float": 128, + # depth and stencil + "stencil8": 8, + "depth16unorm": 16, + "depth24plus": 24, # "... at least 24 bit integer depth" ? + "depth24plus-stencil8": 32, + "depth32float": 32, + "depth32float-stencil8": 40, + # Compressed + "bc1-rgba-unorm": 4, # 4x4 blocks, 8 bytes per block + "bc1-rgba-unorm-srgb": 4, + "bc2-rgba-unorm": 8, # 4x4 blocks, 16 bytes per block + "bc2-rgba-unorm-srgb": 8, + "bc3-rgba-unorm": 8, # 4x4 blocks, 16 bytes per block + "bc3-rgba-unorm-srgb": 8, + "bc4-r-unorm": 4, + "bc4-r-snorm": 4, + "bc5-rg-unorm": 8, + "bc5-rg-snorm": 8, + "bc6h-rgb-ufloat": 8, + "bc6h-rgb-float": 8, + "bc7-rgba-unorm": 8, + "bc7-rgba-unorm-srgb": 8, + "etc2-rgb8unorm": 4, + "etc2-rgb8unorm-srgb": 4, + "etc2-rgb8a1unorm": 4, + "etc2-rgb8a1unorm-srgb": 4, + "etc2-rgba8unorm": 8, + "etc2-rgba8unorm-srgb": 8, + "eac-r11unorm": 4, + "eac-r11snorm": 4, + "eac-rg11unorm": 8, + "eac-rg11snorm": 8, + # astc always uses 16 bytes (128 bits) per block + "astc-4x4-unorm": 8.0, + "astc-4x4-unorm-srgb": 8.0, + "astc-5x4-unorm": 6.4, + "astc-5x4-unorm-srgb": 6.4, + "astc-5x5-unorm": 5.12, + "astc-5x5-unorm-srgb": 5.12, + "astc-6x5-unorm": 4.267, + "astc-6x5-unorm-srgb": 4.267, + "astc-6x6-unorm": 3.556, + "astc-6x6-unorm-srgb": 3.556, + "astc-8x5-unorm": 3.2, + "astc-8x5-unorm-srgb": 3.2, + "astc-8x6-unorm": 2.667, + "astc-8x6-unorm-srgb": 2.667, + "astc-8x8-unorm": 2.0, + "astc-8x8-unorm-srgb": 2.0, + "astc-10x5-unorm": 2.56, + "astc-10x5-unorm-srgb": 2.56, + "astc-10x6-unorm": 2.133, + "astc-10x6-unorm-srgb": 2.133, + "astc-10x8-unorm": 1.6, + "astc-10x8-unorm-srgb": 1.6, + "astc-10x10-unorm": 1.28, + "astc-10x10-unorm-srgb": 1.28, + "astc-12x10-unorm": 1.067, + "astc-12x10-unorm-srgb": 1.067, + "astc-12x12-unorm": 0.8889, + "astc-12x12-unorm-srgb": 0.8889, +} + + +# %% global diagnostics object, and builtin diagnostics + + +# The global root object +diagnostics = DiagnosticsRoot() + + +class SystemDiagnostics(Diagnostics): + """Provides basic system info.""" + + def get_dict(self): + return { + "platform": platform.platform(), + # "platform_version": platform.version(), # can be quite long + "python_implementation": platform.python_implementation(), + "python": platform.python_version(), + } + + +class NativeDiagnostics(Diagnostics): + """Provides metadata about the wgpu-native backend.""" + + def get_dict(self): + # Get rs modules, or skip + try: + wgpu = sys.modules["wgpu"] + rs = wgpu.backends.rs + rs_ffi = wgpu.backends.rs_ffi + except (KeyError, AttributeError): # no-cover + return {} + + # Process lib path + lib_path = rs_ffi.lib_path + wgpu_path = os.path.dirname(wgpu.__file__) + if lib_path.startswith(wgpu_path): + lib_path = "." + os.path.sep + lib_path[len(wgpu_path) :].lstrip("/\\") + + return { + "expected_version": rs.__version__, + "lib_version": ".".join(str(i) for i in rs_ffi.get_lib_version()), + "lib_path": lib_path, + } + + +class VersionDiagnostics(Diagnostics): + """Provides version numbers from relevant libraries.""" + + def get_dict(self): + core_libs = ["wgpu", "cffi"] + qt_libs = ["PySide6", "PyQt6", "PySide2", "PyQt5"] + gui_libs = qt_libs + ["glfw", "jupyter_rfb", "wx"] + extra_libs = ["numpy", "pygfx", "pylinalg", "fastplotlib"] + + info = {} + + for libname in core_libs + gui_libs + extra_libs: + try: + ver = sys.modules[libname].__version__ + except (KeyError, AttributeError): + pass + else: + info[libname] = str(ver) + + return info + + +class ObjectCountDiagnostics(Diagnostics): + """Provides object counts and resource consumption, used in base.py.""" + + def __init__(self, name): + super().__init__(name) + self.tracker = ObjectTracker() + + def get_dict(self): + """Get diagnostics as a dict.""" + object_counts = self.tracker.counts + resource_mem = self.tracker.amounts + + # Collect counts + result = {} + for name in sorted(object_counts.keys()): + d = {"count": object_counts[name]} + if name in resource_mem: + d["resource_mem"] = resource_mem[name] + result[name[3:]] = d # drop the 'GPU' from the name + + # Add totals + totals = {} + for key in ("count", "resource_mem"): + totals[key] = sum(v.get(key, 0) for v in result.values()) + result["total"] = totals + + return result + + +SystemDiagnostics("system") +NativeDiagnostics("native_info") +VersionDiagnostics("versions") +ObjectCountDiagnostics("object_counts") diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 45cd1bf2..d2abdbb4 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -350,83 +350,6 @@ async def request_adapter_async( force_fallback_adapter=force_fallback_adapter, ) # no-cover - def _generate_report(self): - """Get a dictionary with info about the internal status of WGPU. - The structure of the dict is not defined, for the moment. Use print_report(). - """ - - # H: surfaces: WGPUStorageReport, backendType: WGPUBackendType, vulkan: WGPUHubReport, metal: WGPUHubReport, dx12: WGPUHubReport, dx11: WGPUHubReport, gl: WGPUHubReport - struct = new_struct_p( - "WGPUGlobalReport *", - # not used: surfaces - # not used: backendType - # not used: vulkan - # not used: metal - # not used: dx12 - # not used: dx11 - # not used: gl - ) - - # H: void f(WGPUInstance instance, WGPUGlobalReport * report) - libf.wgpuGenerateReport(get_wgpu_instance(), struct) - - report = {} - - report["surfaces"] = { - "occupied": struct.surfaces.numOccupied, - "vacant": struct.surfaces.numVacant, - "error": struct.surfaces.numError, - "element_size": struct.surfaces.elementSize, - } - report["backend_type"] = struct.backendType # note: could make this a set - for backend in ("vulkan", "metal", "dx12", "dx11", "gl"): - c_hub_report = getattr(struct, backend) - report[backend] = {} - for key in dir(c_hub_report): - c_storage_report = getattr(c_hub_report, key) - storage_report = { - "occupied": c_storage_report.numOccupied, - "vacant": c_storage_report.numVacant, - "error": c_storage_report.numError, - "element_size": c_storage_report.elementSize, - } - # if any(x!=0 for x in storage_report.values()): - report[backend][key] = storage_report - - return report - - def print_report(self): - def print_line(topic, occupied, vacant, error, el_size): - print( - topic.rjust(20), - str(occupied).rjust(8), - str(vacant).rjust(8), - str(error).rjust(8), - str(el_size).rjust(8), - ) - - def print_storage_report(topic, d): - print_line(topic, d["occupied"], d["vacant"], d["error"], d["element_size"]) - - report = self._generate_report() - - print(f"{self.__class__.__module__}.WGPU report:") - print() - print_line("", "Occupied", "Vacant", "Error", "el-size") - print() - print_storage_report("surfaces", report["surfaces"]) - for backend in ("vulkan", "metal", "dx12", "dx11", "gl"): - backend_has_stuff = False - for hub_report in report[backend].values(): - report_has_stuff = any(x != 0 for x in hub_report.values()) - backend_has_stuff |= report_has_stuff - if backend_has_stuff: - print_line(f"--- {backend} ---", "", "", "", "") - for key, val in report[backend].items(): - print_storage_report(key, val) - else: - print_line(f"--- {backend} ---", "", "", "", "") - class GPUCanvasContext(base.GPUCanvasContext): def __init__(self, canvas): @@ -884,6 +807,11 @@ def create_texture( if not mip_level_count: mip_level_count = 1 # or lib.WGPU_MIP_LEVEL_COUNT_UNDEFINED ? + mip_level_count = int(mip_level_count) + + if not sample_count: + sample_count = 1 + sample_count = int(sample_count) # H: nextInChain: WGPUChainedStruct *, label: char *, usage: WGPUTextureUsageFlags/int, dimension: WGPUTextureDimension, size: WGPUExtent3D, format: WGPUTextureFormat, mipLevelCount: int, sampleCount: int, viewFormatCount: int, viewFormats: WGPUTextureFormat * struct = new_struct_p( @@ -2769,7 +2697,7 @@ class GPUInternalError(base.GPUInternalError, GPUError): def _copy_docstrings(): - base_classes = GPUObjectBase, GPUCanvasContext + base_classes = GPUObjectBase, GPUCanvasContext, GPUAdapter for ob in globals().values(): if not (isinstance(ob, type) and issubclass(ob, base_classes)): continue diff --git a/wgpu/backends/rs_ffi.py b/wgpu/backends/rs_ffi.py index 60507bba..81999d01 100644 --- a/wgpu/backends/rs_ffi.py +++ b/wgpu/backends/rs_ffi.py @@ -134,10 +134,11 @@ def _maybe_get_pip_hint(): ffi = FFI() ffi.cdef(get_wgpu_header()) ffi.set_source("wgpu.h", None) -lib = ffi.dlopen(get_wgpu_lib_path()) +lib_path = get_wgpu_lib_path() # store path on this module so it can be checked +lib = ffi.dlopen(lib_path) -def check_expected_version(version_info): +def get_lib_version(): # Get lib version version_int = lib.wgpuGetVersion() if version_int < 65536: # no-cover - old version encoding with 3 ints @@ -149,6 +150,11 @@ def check_expected_version(version_info): # When the 0.7.0 tag was made, the version was not bumped. if version_info_lib == (0, 6, 0, 0): version_info_lib = (0, 7, 0) + return version_info_lib + + +def check_expected_version(version_info): + version_info_lib = get_lib_version() # Compare if version_info_lib != version_info: # no-cover logger.warning( diff --git a/wgpu/backends/rs_helpers.py b/wgpu/backends/rs_helpers.py index bc5ceee0..1d682b1b 100644 --- a/wgpu/backends/rs_helpers.py +++ b/wgpu/backends/rs_helpers.py @@ -6,6 +6,7 @@ import ctypes from .rs_ffi import ffi, lib +from .._diagnostics import Diagnostics from ..base import ( GPUError, GPUOutOfMemoryError, @@ -338,3 +339,131 @@ def proxy_func(*args): proxy_func.__name__ = name return proxy_func + + +def generate_report(): + """Get a report similar to the one produced by wgpuGenerateReport(), + but in the form of a Python dict. + """ + + # H: surfaces: WGPUStorageReport, backendType: WGPUBackendType, vulkan: WGPUHubReport, metal: WGPUHubReport, dx12: WGPUHubReport, dx11: WGPUHubReport, gl: WGPUHubReport + struct = ffi.new("WGPUGlobalReport *") + + # H: void f(WGPUInstance instance, WGPUGlobalReport * report) + lib.wgpuGenerateReport(get_wgpu_instance(), struct) + + report = {} + + report["surfaces"] = { + "occupied": struct.surfaces.numOccupied, + "vacant": struct.surfaces.numVacant, + "error": struct.surfaces.numError, + "element_size": struct.surfaces.elementSize, + } + + for backend in ("vulkan", "metal", "dx12", "dx11", "gl"): + c_hub_report = getattr(struct, backend) + report[backend] = {} + for key in dir(c_hub_report): + c_storage_report = getattr(c_hub_report, key) + storage_report = { + "occupied": c_storage_report.numOccupied, + "vacant": c_storage_report.numVacant, + "error": c_storage_report.numError, + "element_size": c_storage_report.elementSize, + } + # if any(x!=0 for x in storage_report.values()): + report[backend][key] = storage_report + + return report + + +class NativeDiagnostics(Diagnostics): + def get_subscript(self): + text = "" + text += " * The o, v, e are occupied, vacant and error, respecitively.\n" + text += " * Reported memory does not include buffer/texture data.\n" + return text + + def get_dict(self): + result = {} + native_report = generate_report() + + # Names in the root of the report (backend-less) + root_names = ["surfaces"] + + # Get per-backend names and a list of backends + names = list(native_report["vulkan"].keys()) + backends = [name for name in native_report.keys() if name not in root_names] + + # Get a mapping from native names to wgpu-py names + name_map = {"surfaces": "CanvasContext"} + for name in names: + if name not in name_map: + name_map[name] = name[0].upper() + name[1:-1] + + # Initialize the result dict (sorted) + for name in sorted(names + root_names): + report_name = name_map[name] + result[report_name] = {"count": 0, "mem": 0} + + # Establish what backends are active + active_backends = [] + for backend in backends: + total = 0 + for name in names: + d = native_report[backend][name] + total += d["occupied"] + d["vacant"] + d["error"] + if total > 0: + active_backends.append(backend) + + # Process names in the root + for name in root_names: + d = native_report[name] + subtotal_count = d["occupied"] + d["vacant"] + d["error"] + impl = { + "o": d["occupied"], + "v": d["vacant"], + "e": d["error"], + "el_size": d["element_size"], + } + # Store in report + report_name = name_map[name] + result[report_name]["count"] = subtotal_count + result[report_name]["mem"] = subtotal_count * d["element_size"] + result[report_name]["backend"] = {"": impl} + + # Iterate over backends + for name in names: + total_count = 0 + total_mem = 0 + implementations = {} + for backend in active_backends: + d = native_report[backend][name] + subtotal_count = d["occupied"] + d["vacant"] + d["error"] + subtotal_mem = subtotal_count * d["element_size"] + impl = { + "o": d["occupied"], + "v": d["vacant"], + "e": d["error"], + "el_size": d["element_size"], + } + total_count += subtotal_count + total_mem += subtotal_mem + implementations[backend] = impl + # Store in report + report_name = name_map[name] + result[report_name]["count"] = total_count + result[report_name]["mem"] = total_mem + result[report_name]["backend"] = implementations + + # Add totals + totals = {} + for key in ("count", "mem"): + totals[key] = sum(v.get(key, 0) for v in result.values()) + result["total"] = totals + + return result + + +diagnostics = NativeDiagnostics("rs_counts") diff --git a/wgpu/base.py b/wgpu/base.py index 4ad51471..9528c34a 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -13,6 +13,7 @@ from typing import List, Dict, Union from ._coreutils import ApiDiff +from ._diagnostics import diagnostics, texture_format_to_bpp from . import flags, enums, structs @@ -62,6 +63,13 @@ apidiff = ApiDiff() +# Obtain the object tracker. Note that we store a ref of +# the latter on all classes that refer to it. Otherwise, on a sys exit, +# the module attributes are None-ified, and the destructors would +# therefore fail and produce warnings. +object_tracker = diagnostics.object_counts.tracker + + class GPU: """The entrypoint to the wgpu API. @@ -109,11 +117,6 @@ def get_preferred_canvas_format(self): """ raise RuntimeError("Use canvas.get_preferred_format() instead.") - @apidiff.add("Usefull") - def print_report(self): - """Print a report about the interals of the backend.""" - print(f"{self.__class__.__module__}.GPU: No report available.") - # IDL: [SameObject] readonly attribute WGSLLanguageFeatures wgslLanguageFeatures; @property def wgsl_language_features(self): @@ -131,7 +134,10 @@ class GPUCanvasContext: Can be obtained via `gui.WgpuCanvasInterface.get_context()`. """ + _ot = object_tracker + def __init__(self, canvas): + self._ot.increase(self.__class__.__name__) self._canvas_ref = weakref.ref(canvas) def _get_canvas(self): @@ -206,6 +212,7 @@ def get_preferred_format(self, adapter): return "bgra8unorm-srgb" # seems to be a good default def __del__(self): + self._ot.decrease(self.__class__.__name__) self._destroy() def _destroy(self): @@ -257,7 +264,10 @@ class GPUAdapter: Once invalid, it never becomes valid again. """ + _ot = object_tracker + def __init__(self, internal, features, limits, adapter_info): + self._ot.increase(self.__class__.__name__) self._internal = internal assert isinstance(features, set) @@ -315,6 +325,7 @@ def _destroy(self): pass def __del__(self): + self._ot.decrease(self.__class__.__name__) self._destroy() # IDL: readonly attribute boolean isFallbackAdapter; @@ -341,7 +352,11 @@ class GPUObjectBase: the GPU; the device and all objects belonging to a device. """ + _ot = object_tracker + _nbytes = 0 + def __init__(self, label, internal, device): + self._ot.increase(self.__class__.__name__, self._nbytes) self._label = label self._internal = internal # The native/raw/real GPU object self._device = device @@ -361,6 +376,7 @@ def _destroy(self): pass def __del__(self): + self._ot.decrease(self.__class__.__name__, self._nbytes) self._destroy() # Public destroy() methods are implemented on classes as the WebGPU spec specifies. @@ -939,6 +955,7 @@ class GPUBuffer(GPUObjectBase): """ def __init__(self, label, internal, device, size, usage, map_state): + self._nbytes = size super().__init__(label, internal, device) self._size = size self._usage = usage @@ -1100,9 +1117,28 @@ class GPUTexture(GPUObjectBase): """ def __init__(self, label, internal, device, tex_info): + self._nbytes = self._estimate_nbytes(tex_info) super().__init__(label, internal, device) self._tex_info = tex_info + def _estimate_nbytes(self, tex_info): + format = tex_info["format"] + size = tex_info["size"] + sample_count = tex_info["sample_count"] or 1 + mip_level_count = tex_info["mip_level_count"] or 1 + + bpp = texture_format_to_bpp.get(format, 0) + npixels = size[0] * size[1] * size[2] + nbytes_at_mip_level = sample_count * npixels * bpp / 8 + + nbytes = 0 + for i in range(mip_level_count): + nbytes += nbytes_at_mip_level + nbytes_at_mip_level /= 2 + + # Return rounded to nearest integer + return int(nbytes + 0.5) + @apidiff.add("Too useful to not-have") @property def size(self): @@ -2015,3 +2051,13 @@ def count(self): # like GPUExternalTexture and GPUUncapturedErrorEvent, and more. apidiff.remove_hidden_methods(globals()) + + +def _seed_object_counts(): + for key, val in globals().items(): + if key.startswith("GPU") and not key.endswith(("Base", "Mixin")): + if hasattr(val, "_ot"): + object_tracker.counts[key] = 0 + + +_seed_object_counts() diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 71f72a30..d15ac625 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -9,7 +9,7 @@ * Wrote 33 enums to enums.py * Wrote 59 structs to structs.py ### Patching API for base.py -* Diffs for GPU: add print_report, change get_preferred_canvas_format, change request_adapter, change request_adapter_async +* Diffs for GPU: change get_preferred_canvas_format, change request_adapter, change request_adapter_async * Diffs for GPUCanvasContext: add get_preferred_format, add present * Diffs for GPUDevice: add adapter, add create_buffer_with_data, hide import_external_texture, hide lost, hide onuncapturederror, hide pop_error_scope, hide push_error_scope * Diffs for GPUBuffer: add map_read, add map_write, add read_mapped, add write_mapped, hide get_mapped_range @@ -19,7 +19,7 @@ * Validated 37 classes, 113 methods, 43 properties ### Patching API for backends/rs.py * Diffs for GPUAdapter: add request_device_tracing -* Validated 37 classes, 101 methods, 0 properties +* Validated 37 classes, 99 methods, 0 properties ## Validating rs.py * Enum field TextureFormat.rgb10a2uint missing in wgpu.h * Enum PipelineErrorReason missing in wgpu.h @@ -28,6 +28,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum field DeviceLostReason.unknown missing in wgpu.h * Wrote 232 enum mappings and 47 struct-field mappings to rs_mappings.py -* Validated 88 C function calls -* Not using 115 C functions -* Validated 72 C structs +* Validated 87 C function calls +* Not using 116 C functions +* Validated 71 C structs From 54055fc653f453b81dece7b97d47805589555640 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Fri, 3 Nov 2023 22:52:27 +0100 Subject: [PATCH 3/4] Remove asyncio from offscreen gui (#404) --- wgpu/gui/offscreen.py | 53 +++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/wgpu/gui/offscreen.py b/wgpu/gui/offscreen.py index 79296f2f..b5a1985d 100644 --- a/wgpu/gui/offscreen.py +++ b/wgpu/gui/offscreen.py @@ -1,4 +1,4 @@ -import asyncio +import time from ._offscreen import WgpuOffscreenCanvas from .base import WgpuAutoGui @@ -38,10 +38,11 @@ def is_closed(self): return self._closed def _request_draw(self): - call_later(0, self.draw) + # Deliberately a no-op, because people use .draw() instead. + pass def present(self, texture_view): - # This gets called at the end of a draw pass via GPUCanvasContextOffline + # This gets called at the end of a draw pass via _offscreen.GPUCanvasContext device = texture_view._device size = texture_view.size bytes_per_pixel = 4 @@ -74,31 +75,33 @@ def draw(self): WgpuCanvas = WgpuManualOffscreenCanvas -def call_later(delay, callback, *args): - loop = asyncio.get_event_loop_policy().get_event_loop() - # for the offscreen canvas, we prevent new frames and callbacks - # from being queued while the loop is running. this avoids - # callbacks from one visualization leaking into the next. - if loop.is_running(): - return - loop.call_later(delay, callback, *args) - +# If we consider the use-cases for using this offscreen canvas: +# +# * Using wgpu.gui.auto in test-mode: in this case run() should not hang, +# and call_later should not cause lingering refs. +# * Using the offscreen canvas directly, in a script: in this case you +# do not have/want an event system. +# * Using the offscreen canvas in an evented app. In that case you already +# have an app with a specific event-loop (it might be PySide6 or +# something else entirely). +# +# In summary, we provide a call_later() and run() that behave pretty +# well for the first case. -async def mainloop_iter(): - pass # no op +_pending_calls = [] -def run(): - """Handle all tasks scheduled with call_later and return.""" - loop = asyncio.get_event_loop_policy().get_event_loop() +def call_later(delay, callback, *args): + # Note that this module never calls call_later() itself; request_draw() is a no-op. + etime = time.time() + delay + _pending_calls.append((etime, callback, args)) - # If the loop is already running, this is likely an interactive session or something - if loop.is_running(): - return - # Run stub mainloop, so that all currently pending tasks are handled - loop.run_until_complete(mainloop_iter()) +def run(): + # Process pending calls + for etime, callback, args in _pending_calls.copy(): + if time.time() >= etime: + callback(*args) - # Cancel all remaining tasks (those that are scheduled later) - for t in asyncio.all_tasks(loop=loop): - t.cancel() + # Clear any leftover scheduled calls, to avoid lingering refs. + _pending_calls.clear() From 6ce9ac994206721d0b7265acb627530fd1272df1 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Sat, 4 Nov 2023 17:44:22 +0100 Subject: [PATCH 4/4] Add tests for object releasing (#403) * WIP mem tests * Rename GPUCanvasContextOffscreen * Seems good practice to poll the device after releasing an object * Progress on the memtests * Fix sorting in rs_counts diagnostics * more tests, also guis * Almost there ... * Implemented all * refactor a bit for shorter module * run the tests on ci * codegen * Remove the delayed releaser * Try enabling the releases that previously panicked * clean * Tried harder, but has no effect * add comment * Add another comment * And another --- .github/workflows/ci.yml | 5 +- tests_mem/test_mem.py | 559 +++++++++++++++++++++++++++++++ tests_mem/testutils.py | 177 ++++++++++ wgpu/backends/rs.py | 108 ++++-- wgpu/backends/rs_helpers.py | 26 +- wgpu/gui/_offscreen.py | 4 +- wgpu/gui/jupyter.py | 2 +- wgpu/resources/codegen_report.md | 6 +- 8 files changed, 819 insertions(+), 68 deletions(-) create mode 100644 tests_mem/test_mem.py create mode 100644 tests_mem/testutils.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4205f59..c4319fd5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -198,9 +198,12 @@ jobs: pip install -U -r dev-requirements.txt python download-wgpu-native.py pip install -e . - - name: Test on repo + - name: Unit tests run: | pytest -v tests + - name: Memory tests + run: | + pytest -v tests_mem # The release builds are done for the platforms that we want to build wheels for. # We build wheels, test them, and then upload the wheel as an artifact. diff --git a/tests_mem/test_mem.py b/tests_mem/test_mem.py new file mode 100644 index 00000000..49c24b05 --- /dev/null +++ b/tests_mem/test_mem.py @@ -0,0 +1,559 @@ +import gc +import asyncio + +import wgpu.backends.rs + +import pytest +from testutils import can_use_glfw, can_use_wgpu_lib, can_use_pyside6 +from testutils import create_and_release, get_counts, ob_name_from_test_func + +if not can_use_wgpu_lib: + pytest.skip( + "Skipping tests that need a window or the wgpu lib", allow_module_level=True + ) + + +# Create the default device beforehand +DEVICE = wgpu.utils.get_default_device() + + +async def stub_event_loop(): + pass + + +def make_draw_func_for_canvas(canvas): + """Create a draw function for the given canvas, + so that we can really present something to a canvas being tested. + """ + ctx = canvas.get_context() + ctx.configure(device=DEVICE, format="bgra8unorm-srgb") + + def draw(): + ctx = canvas.get_context() + command_encoder = DEVICE.create_command_encoder() + current_texture_view = ctx.get_current_texture() + render_pass = command_encoder.begin_render_pass( + color_attachments=[ + { + "view": current_texture_view, + "resolve_target": None, + "clear_value": (1, 1, 1, 1), + "load_op": wgpu.LoadOp.clear, + "store_op": wgpu.StoreOp.store, + } + ], + ) + render_pass.end() + DEVICE.queue.submit([command_encoder.finish()]) + ctx.present() + + return draw + + +# %% Meta tests + + +def test_meta_all_objects_covered(): + """Test that we have a test_release test function for each known object.""" + + ref_obnames = set(key for key in get_counts().keys()) + func_obnames = set(ob_name_from_test_func(func) for func in RELEASE_TEST_FUNCS) + + missing = ref_obnames - func_obnames + extra = func_obnames - ref_obnames + assert not missing + assert not extra + + +def test_meta_all_functions_solid(): + """Test that all funcs starting with "test_release_" are decorated appropriately.""" + for func in RELEASE_TEST_FUNCS: + is_decorated = func.__code__.co_name == "core_test_func" + assert is_decorated, func.__name__ + " not decorated" + + +def test_meta_buffers_1(): + """Making sure that the test indeed fails, when holding onto the objects.""" + + lock = [] + + @create_and_release + def test_release_buffer(n): + yield {} + for i in range(n): + b = DEVICE.create_buffer(size=128, usage=wgpu.BufferUsage.COPY_DST) + lock.append(b) + yield b + + with pytest.raises(AssertionError): + test_release_buffer() + + +def test_meta_buffers_2(): + """Making sure that the test indeed fails, by disabling the release call.""" + + ori = wgpu.backends.rs.GPUBuffer._destroy + wgpu.backends.rs.GPUBuffer._destroy = lambda self: None + + try: + with pytest.raises(AssertionError): + test_release_buffer() + + finally: + wgpu.backends.rs.GPUBuffer._destroy = ori + + +# %% The actual tests + +# These tests need to do one thing: generate n objects of the correct type. + + +@create_and_release +def test_release_adapter(n): + yield {} + for i in range(n): + yield wgpu.request_adapter(canvas=None, power_preference="high-performance") + + +@create_and_release +def test_release_device(n): + pytest.skip("XFAIL") + # todo: XFAIL: Device object seem not to be cleaned up at wgpu-native. + + # Note: the WebGPU spec says: + # [request_device()] is a one-time action: if a device is returned successfully, the adapter becomes invalid. + + yield { + "expected_counts_after_create": {"Device": (n, n), "Queue": (n, 0)}, + } + adapter = DEVICE.adapter + for i in range(n): + d = adapter.request_device() + # d.queue._destroy() + # d._queue = None + yield d + + +@create_and_release +def test_release_bind_group(n): + buffer1 = DEVICE.create_buffer(size=128, usage=wgpu.BufferUsage.STORAGE) + + binding_layouts = [ + { + "binding": 0, + "visibility": wgpu.ShaderStage.COMPUTE, + "buffer": { + "type": wgpu.BufferBindingType.read_only_storage, + }, + }, + ] + + bindings = [ + { + "binding": 0, + "resource": {"buffer": buffer1, "offset": 0, "size": buffer1.size}, + }, + ] + + bind_group_layout = DEVICE.create_bind_group_layout(entries=binding_layouts) + + yield {} + + for i in range(n): + yield DEVICE.create_bind_group(layout=bind_group_layout, entries=bindings) + + +@create_and_release +def test_release_bind_group_layout(n): + # Note: when we use the same binding layout descriptor, wgpu-native + # re-uses the BindGroupLayout object. On the other hand, it also + # does not seem to clean them up. Perhaps it just caches them? There + # are only so many possible combinations, and its just 152 bytes + # (on Metal) per object. + + # todo: do we want similar behavior for *our* BindGroupLayout object? + + yield { + "expected_counts_after_create": {"BindGroupLayout": (n, 1)}, + "expected_counts_after_release": {"BindGroupLayout": (0, 1)}, + } + + binding_layouts = [ + { + "binding": 0, + "visibility": wgpu.ShaderStage.COMPUTE, + "buffer": { + "type": wgpu.BufferBindingType.read_only_storage, + }, + }, + ] + + for i in range(n): + # binding_layouts[0]["binding"] = i # force unique objects + yield DEVICE.create_bind_group_layout(entries=binding_layouts) + + +@create_and_release +def test_release_buffer(n): + yield {} + for i in range(n): + yield DEVICE.create_buffer(size=128, usage=wgpu.BufferUsage.COPY_DST) + + +@create_and_release +def test_release_canvas_context_1(n): + # Test with offscreen canvases. A context is created, but not a wgpu-native surface. + + # Note: the offscreen canvas keeps the render-texture-view alive, since it + # is used to e.g. download the resulting image. That's why we also see + # Textures and TextureViews in the counts. + + from wgpu.gui.offscreen import WgpuCanvas + + yield { + "expected_counts_after_create": { + "CanvasContext": (n, 0), + "Texture": (n, n), + "TextureView": (n, n), + }, + } + + for i in range(n): + c = WgpuCanvas() + c.request_draw(make_draw_func_for_canvas(c)) + c.draw() + yield c.get_context() + + +@create_and_release +def test_release_canvas_context_2(n): + # Test with GLFW canvases. + + # Note: in a draw, the textureview is obtained (thus creating a + # Texture and a TextureView, but these are released in present(), + # so we don't see them in the counts. + + loop = asyncio.get_event_loop_policy().get_event_loop() + + if loop.is_running(): + pytest.skip("Cannot run this test when asyncio loop is running") + if not can_use_glfw: + pytest.skip("Need glfw for this test") + + from wgpu.gui.glfw import WgpuCanvas # noqa + + yield {} + + for i in range(n): + c = WgpuCanvas() + c.request_draw(make_draw_func_for_canvas(c)) + loop.run_until_complete(stub_event_loop()) + yield c.get_context() + + # Need some shakes to get all canvas refs gone + del c + loop.run_until_complete(stub_event_loop()) + gc.collect() + loop.run_until_complete(stub_event_loop()) + + +@create_and_release +def test_release_canvas_context_3(n): + # Test with PySide canvases. + + # Note: in a draw, the textureview is obtained (thus creating a + # Texture and a TextureView, but these are released in present(), + # so we don't see them in the counts. + + if not can_use_pyside6: + pytest.skip("Need pyside6 for this test") + + import PySide6 # noqa + from wgpu.gui.qt import WgpuCanvas # noqa + + app = PySide6.QtWidgets.QApplication.instance() + if app is None: + app = PySide6.QtWidgets.QApplication([""]) + + yield {} + + for i in range(n): + c = WgpuCanvas() + c.request_draw(make_draw_func_for_canvas(c)) + app.processEvents() + yield c.get_context() + + # Need some shakes to get all canvas refs gone + del c + gc.collect() + app.processEvents() + + +@create_and_release +def test_release_command_buffer(n): + # Note: a command encoder can only be used once (it gets destroyed on finish()) + yield { + "expected_counts_after_create": { + "CommandEncoder": (n, 0), + "CommandBuffer": (n, n), + }, + } + + for i in range(n): + command_encoder = DEVICE.create_command_encoder() + yield command_encoder.finish() + + +@create_and_release +def test_release_command_encoder(n): + # Note: a CommandEncoder does not exist in wgpu-core, but we do + # observe its internal CommandBuffer. + yield { + "expected_counts_after_create": { + "CommandEncoder": (n, 0), + "CommandBuffer": (0, n), + }, + } + + for i in range(n): + yield DEVICE.create_command_encoder() + + +@create_and_release +def test_release_compute_pass_encoder(n): + # Note: ComputePassEncoder does not really exist in wgpu-core + # -> Check gpu.diagnostics.rs_counts.print_report(), nothing there that ends with "Encoder". + command_encoder = DEVICE.create_command_encoder() + + yield { + "expected_counts_after_create": { + "ComputePassEncoder": (n, 0), + }, + } + + for i in range(n): + yield command_encoder.begin_compute_pass() + + +@create_and_release +def test_release_compute_pipeline(n): + code = """ + @compute + @workgroup_size(1) + fn main(@builtin(global_invocation_id) index: vec3) { + let i: u32 = index.x; + } + """ + shader = DEVICE.create_shader_module(code=code) + + binding_layouts = [] + pipeline_layout = DEVICE.create_pipeline_layout(bind_group_layouts=binding_layouts) + + yield {} + + for i in range(n): + yield DEVICE.create_compute_pipeline( + layout=pipeline_layout, + compute={"module": shader, "entry_point": "main"}, + ) + + +@create_and_release +def test_release_pipeline_layout(n): + yield {} + for i in range(n): + yield DEVICE.create_pipeline_layout(bind_group_layouts=[]) + + +@create_and_release +def test_release_query_set(n): + # todo: implement this when we do support them + pytest.skip("Query set not implemented") + + +@create_and_release +def test_release_queue(n): + pytest.skip("XFAIL") + # todo: XFAIL: the device and queue are kinda one, and the former won't release at wgpu-native. + yield {} + adapter = DEVICE.adapter + for i in range(n): + d = adapter.request_device() + q = d.queue + d._queue = None # detach + yield q + + +@create_and_release +def test_release_render_bundle(n): + # todo: implement this when we do support them + pytest.skip("Render bundle not implemented") + + +@create_and_release +def test_release_render_bundle_encoder(n): + pytest.skip("Render bundle not implemented") + + +@create_and_release +def test_release_render_pass_encoder(n): + # Note: RenderPassEncoder does not really exist in wgpu-core + # -> Check gpu.diagnostics.rs_counts.print_report(), nothing there that ends with "Encoder". + command_encoder = DEVICE.create_command_encoder() + + yield { + "expected_counts_after_create": { + "RenderPassEncoder": (n, 0), + }, + } + + for i in range(n): + yield command_encoder.begin_render_pass(color_attachments=[]) + + +@create_and_release +def test_release_render_pipeline(n): + code = """ + struct VertexInput { + @builtin(vertex_index) vertex_index : u32, + }; + struct VertexOutput { + @location(0) color : vec4, + @builtin(position) pos: vec4, + }; + + @vertex + fn vs_main(in: VertexInput) -> VertexOutput { + var positions = array, 3>( + vec2(0.0, -0.5), + vec2(0.5, 0.5), + vec2(-0.5, 0.75), + ); + var colors = array, 3>( // srgb colors + vec3(1.0, 1.0, 0.0), + vec3(1.0, 0.0, 1.0), + vec3(0.0, 1.0, 1.0), + ); + let index = i32(in.vertex_index); + var out: VertexOutput; + out.pos = vec4(positions[index], 0.0, 1.0); + out.color = vec4(colors[index], 1.0); + return out; + } + + @fragment + fn fs_main(in: VertexOutput) -> @location(0) vec4 { + let physical_color = pow(in.color.rgb, vec3(2.2)); // gamma correct + return vec4(physical_color, in.color.a); + } + """ + shader = DEVICE.create_shader_module(code=code) + + binding_layouts = [] + pipeline_layout = DEVICE.create_pipeline_layout(bind_group_layouts=binding_layouts) + + yield {} + + for i in range(n): + yield DEVICE.create_render_pipeline( + layout=pipeline_layout, + vertex={ + "module": shader, + "entry_point": "vs_main", + "buffers": [], + }, + primitive={ + "topology": wgpu.PrimitiveTopology.triangle_list, + "front_face": wgpu.FrontFace.ccw, + "cull_mode": wgpu.CullMode.none, + }, + depth_stencil=None, + multisample=None, + fragment={ + "module": shader, + "entry_point": "fs_main", + "targets": [ + { + "format": "bgra8unorm-srgb", + "blend": { + "color": ( + wgpu.BlendFactor.one, + wgpu.BlendFactor.zero, + wgpu.BlendOperation.add, + ), + "alpha": ( + wgpu.BlendFactor.one, + wgpu.BlendFactor.zero, + wgpu.BlendOperation.add, + ), + }, + }, + ], + }, + ) + + +@create_and_release +def test_release_sampler(n): + yield {} + for i in range(n): + yield DEVICE.create_sampler() + + +@create_and_release +def test_release_shader_module(n): + yield {} + + code = """ + @fragment + fn fs_main() -> @location(0) vec4 { + return vec4(1.0, 0.0, 0.0, 1.0); + } + """ + + for i in range(n): + yield DEVICE.create_shader_module(code=code) + + +@create_and_release +def test_release_texture(n): + yield {} + for i in range(n): + yield DEVICE.create_texture( + size=(16, 16, 16), + usage=wgpu.TextureUsage.TEXTURE_BINDING, + format="rgba8unorm", + ) + + +@create_and_release +def test_release_texture_view(n): + texture = DEVICE.create_texture( + size=(16, 16, 16), usage=wgpu.TextureUsage.TEXTURE_BINDING, format="rgba8unorm" + ) + yield {} + for i in range(n): + yield texture.create_view() + + +# %% The end + + +ALL_TEST_FUNCS = [ + ob + for name, ob in list(globals().items()) + if name.startswith("test_") and callable(ob) +] +RELEASE_TEST_FUNCS = [ + func for func in ALL_TEST_FUNCS if func.__name__.startswith("test_release_") +] + + +if __name__ == "__main__": + for func in ALL_TEST_FUNCS: + print(func.__name__ + " ...") + try: + func() + except pytest.skip.Exception: + print(" skipped") + print("done") diff --git a/tests_mem/testutils.py b/tests_mem/testutils.py new file mode 100644 index 00000000..357eaead --- /dev/null +++ b/tests_mem/testutils.py @@ -0,0 +1,177 @@ +import gc +import os +import sys +import subprocess + +import wgpu + + +def _determine_can_use_wgpu_lib(): + # For some reason, since wgpu-native 5c304b5ea1b933574edb52d5de2d49ea04a053db + # the process' exit code is not zero, so we test more pragmatically. + code = "import wgpu.utils; wgpu.utils.get_default_device(); print('ok')" + result = subprocess.run( + [ + sys.executable, + "-c", + code, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + print("_determine_can_use_wgpu_lib() status code:", result.returncode) + return ( + result.stdout.strip().endswith("ok") + and "traceback" not in result.stderr.lower() + ) + + +def _determine_can_use_glfw(): + code = "import glfw;exit(0) if glfw.init() else exit(1)" + try: + subprocess.check_output([sys.executable, "-c", code]) + except Exception: + return False + else: + return True + + +def _determine_can_use_pyside6(): + code = "import PySide6.QtGui" + try: + subprocess.check_output([sys.executable, "-c", code]) + except Exception: + return False + else: + return True + + +can_use_wgpu_lib = _determine_can_use_wgpu_lib() +can_use_glfw = _determine_can_use_glfw() +can_use_pyside6 = _determine_can_use_pyside6() +is_ci = bool(os.getenv("CI", None)) + + +def get_counts(): + """Get a dict that maps object names to a 2-tuple represening + the counts in py and wgpu-native. + """ + counts_py = wgpu.diagnostics.object_counts.get_dict() + counts_native = wgpu.diagnostics.rs_counts.get_dict() + + all_keys = set(counts_py) | set(counts_native) + + default = {"count": -1} + + counts = {} + for key in sorted(all_keys): + counts[key] = ( + counts_py.get(key, default)["count"], + counts_native.get(key, default)["count"], + ) + counts.pop("total") + + return counts + + +def get_excess_counts(counts1, counts2): + """Compare two counts dicts, and return a new dict with the fields + that have increased counts. + """ + more = {} + for name in counts1: + c1 = counts1[name][0] + c2 = counts2[name][0] + more_py = 0 + if c2 > c1: + more_py = c2 - c1 + c1 = counts1[name][1] + c2 = counts2[name][1] + more_native = 0 + if c2 > c1: + more_native = c2 - c1 + if more_py or more_native: + more[name] = more_py, more_native + return more + + +def ob_name_from_test_func(func): + """Translate test_release_bind_group() to "BindGroup".""" + func_name = func.__name__ + prefix = "test_release_" + assert func_name.startswith(prefix) + words = func_name[len(prefix) :].split("_") + if words[-1].isnumeric(): + words.pop(-1) + return "".join(word.capitalize() for word in words) + + +def create_and_release(create_objects_func): + """Decorator.""" + + def core_test_func(): + """The core function that does the testing.""" + + n = 32 + + generator = create_objects_func(n) + ob_name = ob_name_from_test_func(create_objects_func) + + # ----- Collect options + + options = { + "expected_counts_after_create": {ob_name: (32, 32)}, + "expected_counts_after_release": {}, + } + + func_options = next(generator) + assert isinstance(func_options, dict), "First yield must be an options dict" + options.update(func_options) + + # Measure baseline object counts + counts1 = get_counts() + + # ----- Create + + # Create objects + objects = list(generator) + + # Test the count + assert len(objects) == n + + # Test that all objects are of the same class. + # (this for-loop is a bit weird, but its to avoid leaking refs to objects) + cls = objects[0].__class__ + assert all(isinstance(objects[i], cls) for i in range(len(objects))) + + # Test that class matches function name (should prevent a group of copy-paste errors) + assert ob_name == cls.__name__[3:] + + # Measure peak object counts + counts2 = get_counts() + more2 = get_excess_counts(counts1, counts2) + print(" more after create:", more2) + + # Make sure the actual object has increased + assert more2 # not empty + assert more2 == options["expected_counts_after_create"] + + # It's ok if other objects are created too ... + + # ----- Release + + # Delete objects + del objects + gc.collect() + + # Measure after-release object counts + counts3 = get_counts() + more3 = get_excess_counts(counts1, counts3) + print(" more after release:", more3) + + # Check! + assert more3 == options["expected_counts_after_release"] + + core_test_func.__name__ = create_objects_func.__name__ + return core_test_func diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index d2abdbb4..9dad0e71 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -35,7 +35,6 @@ get_memoryview_and_address, to_snake_case, to_camel_case, - DelayedReleaser, ErrorHandler, SafeLibCalls, ) @@ -173,7 +172,6 @@ def check_struct(struct_name, d): raise ValueError(f"Invalid keys in {struct_name}: {invalid_keys}") -delayed_releaser = DelayedReleaser() error_handler = ErrorHandler(logger) libf = SafeLibCalls(lib, error_handler) @@ -354,6 +352,7 @@ async def request_adapter_async( class GPUCanvasContext(base.GPUCanvasContext): def __init__(self, canvas): super().__init__(canvas) + self._device = None self._surface_size = (-1, -1) self._surface_id = None self._internal = None @@ -486,7 +485,7 @@ def get_preferred_format(self, adapter): return default def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUSwapChain swapChain) libf.wgpuSwapChainRelease(internal) @@ -545,9 +544,6 @@ def request_device_tracing( def _request_device( self, label, required_features, required_limits, default_queue, trace_path ): - # This is a good moment to release destroyed objects - delayed_releaser.release_all_pending() - # ---- Handle features assert isinstance(required_features, (tuple, list, set)) @@ -715,9 +711,10 @@ async def request_device_async( ) # no-cover def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal - delayed_releaser.release_soon("wgpuAdapterRelease", internal) + # H: void f(WGPUAdapter adapter) + libf.wgpuAdapterRelease(internal) class GPUDevice(base.GPUDevice, GPUObjectBase): @@ -739,6 +736,12 @@ def uncaptured_error_callback(c_type, c_message, userdata): self._internal, uncaptured_error_callback, ffi.NULL ) + def _poll(self): + # Internal function + if self._internal: + # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex) + libf.wgpuDevicePoll(self._internal, True, ffi.NULL) + def create_buffer( self, *, @@ -968,9 +971,17 @@ def create_bind_group_layout( # not used: nextInChain ) + # Note: wgpu-core re-uses BindGroupLayouts with the same (or similar + # enough) descriptor. You would think that this means that the id is + # the same when you call wgpuDeviceCreateBindGroupLayout with the same + # input, but it's not. So we cannot let wgpu-native/core decide when + # to re-use a BindGroupLayout. I don't feel confident checking here + # whether a BindGroupLayout can be re-used, so we simply don't. Higher + # level code can sometimes make this decision because it knows the app + # logic. + # H: WGPUBindGroupLayout f(WGPUDevice device, WGPUBindGroupLayoutDescriptor const * descriptor) id = libf.wgpuDeviceCreateBindGroupLayout(self._internal, struct) - return GPUBindGroupLayout(label, id, self, entries) def create_bind_group( @@ -1439,14 +1450,21 @@ def create_render_bundle_encoder( stencil_read_only: bool = False, ): raise NotImplementedError() + # Note: also enable the coresponing memtest when implementing this! def create_query_set(self, *, label="", type: "enums.QueryType", count: int): raise NotImplementedError() + # Note: also enable the coresponing memtest when implementing this! def _destroy(self): - if self._internal is not None and lib is not None: + if self._queue is not None: + self._queue._destroy() + self._queue = None + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal - delayed_releaser.release_soon("wgpuDeviceRelease", internal) + # H: void f(WGPUDevice device) + libf.wgpuDeviceRelease(internal) + # wgpuDeviceDestroy(internal) is also an option class GPUBuffer(base.GPUBuffer, GPUObjectBase): @@ -1526,8 +1544,7 @@ def callback(status_, user_data_p): ) # Let it do some cycles - # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex) - libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL) + self._device._poll() if status != 0: # no-cover raise RuntimeError(f"Could not map buffer ({status}).") @@ -1635,10 +1652,18 @@ def destroy(self): def _destroy(self): self._release_memoryviews() - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUBuffer buffer) libf.wgpuBufferRelease(internal) + self._device._poll() + # Note: from the memtests it looks like we need to poll the device + # after releasing an object for some objects (buffer, texture, + # texture view, sampler, pipeline layout, compute pipeline, and + # render pipeline). But not others. Would be nice to at some point + # have more clarity on this. In the mean time, we now poll the + # device quite a bit, so leaks by not polling the device after + # releasing something are highly unlikely. class GPUTexture(base.GPUTexture, GPUObjectBase): @@ -1692,31 +1717,34 @@ def destroy(self): self._destroy() # no-cover def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUTexture texture) libf.wgpuTextureRelease(internal) + self._device._poll() class GPUTextureView(base.GPUTextureView, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUTextureView textureView) libf.wgpuTextureViewRelease(internal) + self._device._poll() class GPUSampler(base.GPUSampler, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUSampler sampler) libf.wgpuSamplerRelease(internal) + self._device._poll() class GPUBindGroupLayout(base.GPUBindGroupLayout, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUBindGroupLayout bindGroupLayout) libf.wgpuBindGroupLayoutRelease(internal) @@ -1724,7 +1752,7 @@ def _destroy(self): class GPUBindGroup(base.GPUBindGroup, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUBindGroup bindGroup) libf.wgpuBindGroupRelease(internal) @@ -1732,10 +1760,11 @@ def _destroy(self): class GPUPipelineLayout(base.GPUPipelineLayout, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUPipelineLayout pipelineLayout) libf.wgpuPipelineLayoutRelease(internal) + self._device._poll() class GPUShaderModule(base.GPUShaderModule, GPUObjectBase): @@ -1761,8 +1790,7 @@ def get_compilation_info(self): # H: void f(WGPUShaderModule shaderModule, WGPUCompilationInfoCallback callback, void * userdata) # libf.wgpuShaderModuleGetCompilationInfo(self._internal, callback, ffi.NULL) # - # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex) - # libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL) + # self._device._poll() # # if info is None: # raise RuntimeError("Could not obtain shader compilation info.") @@ -1772,7 +1800,7 @@ def get_compilation_info(self): return [] def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUShaderModule shaderModule) libf.wgpuShaderModuleRelease(internal) @@ -1784,18 +1812,20 @@ class GPUPipelineBase(base.GPUPipelineBase): class GPUComputePipeline(base.GPUComputePipeline, GPUPipelineBase, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUComputePipeline computePipeline) libf.wgpuComputePipelineRelease(internal) + self._device._poll() class GPURenderPipeline(base.GPURenderPipeline, GPUPipelineBase, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPURenderPipeline renderPipeline) libf.wgpuRenderPipelineRelease(internal) + self._device._poll() class GPUCommandBuffer(base.GPUCommandBuffer, GPUObjectBase): @@ -1805,8 +1835,8 @@ def _destroy(self): # 'Cannot remove a vacant resource'. Got this info from the # wgpu chat. Also see # https://docs.rs/wgpu-core/latest/src/wgpu_core/device/mod.rs.html#4180-4194 - # That's why _internal is set to None in submit() - if self._internal is not None and lib is not None: + # --> That's why _internal is set to None in Queue.submit() + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUCommandBuffer commandBuffer) libf.wgpuCommandBufferRelease(internal) @@ -2308,9 +2338,9 @@ def resolve_query_set( raise NotImplementedError() def _destroy(self): - # Note that the natove object gets destroyed on finish. + # Note that the native object gets destroyed on finish. # Also see GPUCommandBuffer._destroy() - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUCommandEncoder commandEncoder) libf.wgpuCommandEncoderRelease(internal) @@ -2350,10 +2380,10 @@ def end(self): libf.wgpuComputePassEncoderEnd(self._internal) def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUComputePassEncoder computePassEncoder) - internal # panics: libf.wgpuComputePassEncoderRelease(internal) + libf.wgpuComputePassEncoderRelease(internal) class GPURenderPassEncoder( @@ -2413,10 +2443,10 @@ def end_occlusion_query(self): raise NotImplementedError() def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPURenderPassEncoder renderPassEncoder) - internal # panics: libf.wgpuRenderPassEncoderRelease(internal) + libf.wgpuRenderPassEncoderRelease(internal) class GPURenderBundleEncoder( @@ -2431,7 +2461,7 @@ def finish(self, *, label=""): raise NotImplementedError() def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPURenderBundleEncoder renderBundleEncoder) libf.wgpuRenderBundleEncoderRelease(internal) @@ -2639,10 +2669,16 @@ def read_texture(self, source, data_layout, size): def on_submitted_work_done(self): raise NotImplementedError() + def _destroy(self): + if self._internal is not None and libf is not None: + self._internal, internal = None, self._internal + # H: void f(WGPUQueue queue) + libf.wgpuQueueRelease(internal) + class GPURenderBundle(base.GPURenderBundle, GPUObjectBase): def _destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPURenderBundle renderBundle) libf.wgpuRenderBundleRelease(internal) @@ -2652,7 +2688,7 @@ class GPUQuerySet(base.GPUQuerySet, GPUObjectBase): pass def destroy(self): - if self._internal is not None and lib is not None: + if self._internal is not None and libf is not None: self._internal, internal = None, self._internal # H: void f(WGPUQuerySet querySet) libf.wgpuQuerySetRelease(internal) diff --git a/wgpu/backends/rs_helpers.py b/wgpu/backends/rs_helpers.py index 1d682b1b..43747ea8 100644 --- a/wgpu/backends/rs_helpers.py +++ b/wgpu/backends/rs_helpers.py @@ -222,29 +222,6 @@ def to_camel_case(name): return name2 -class DelayedReleaser: - """Helps release objects at a later time.""" - - # I found that when wgpuDeviceRelease() was called in Device._destroy, - # the tests would hang. I found that the release call was done around - # the time when another device was used (e.g. to create a buffer - # or shader module). For some reason, the delay in destruction (by - # Python's CG) causes a deadlock or something. We seem to be able - # to fix this by doing the actual release later - e.g. when the - # user creates a new device. Seems to be the same for the adapter. - def __init__(self): - self._things_to_release = [] - - def release_soon(self, fun, i): - self._things_to_release.append((fun, i)) - - def release_all_pending(self): - while self._things_to_release: - fun, i = self._things_to_release.pop(0) - release_func = getattr(lib, fun) - release_func(i) - - class ErrorHandler: """Object that logs errors, with the option to collect incoming errors elsewhere. @@ -403,8 +380,7 @@ def get_dict(self): name_map[name] = name[0].upper() + name[1:-1] # Initialize the result dict (sorted) - for name in sorted(names + root_names): - report_name = name_map[name] + for report_name in sorted(name_map[name] for name in names + root_names): result[report_name] = {"count": 0, "mem": 0} # Establish what backends are active diff --git a/wgpu/gui/_offscreen.py b/wgpu/gui/_offscreen.py index 894a9963..68e2adcd 100644 --- a/wgpu/gui/_offscreen.py +++ b/wgpu/gui/_offscreen.py @@ -23,7 +23,7 @@ def get_context(self, kind="gpupresent"): # the backend (e.g. rs), but here we use our own context. assert kind == "gpupresent" if self._canvas_context is None: - self._canvas_context = GPUCanvasContextOffline(self) + self._canvas_context = GPUCanvasContext(self) return self._canvas_context def present(self, texture_view): @@ -43,7 +43,7 @@ def get_preferred_format(self): return "rgba8unorm-srgb" -class GPUCanvasContextOffline(base.GPUCanvasContext): +class GPUCanvasContext(base.GPUCanvasContext): """Helper class for canvases that render to a texture.""" def __init__(self, canvas): diff --git a/wgpu/gui/jupyter.py b/wgpu/gui/jupyter.py index ffa99a95..5a84b844 100644 --- a/wgpu/gui/jupyter.py +++ b/wgpu/gui/jupyter.py @@ -91,7 +91,7 @@ def _request_draw(self): # Implementation needed for WgpuOffscreenCanvas def present(self, texture_view): - # This gets called at the end of a draw pass via GPUCanvasContextOffline + # This gets called at the end of a draw pass via _offscreen.GPUCanvasContext device = texture_view._device size = texture_view.size bytes_per_pixel = 4 diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index d15ac625..acf83431 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -19,7 +19,7 @@ * Validated 37 classes, 113 methods, 43 properties ### Patching API for backends/rs.py * Diffs for GPUAdapter: add request_device_tracing -* Validated 37 classes, 99 methods, 0 properties +* Validated 37 classes, 101 methods, 0 properties ## Validating rs.py * Enum field TextureFormat.rgb10a2uint missing in wgpu.h * Enum PipelineErrorReason missing in wgpu.h @@ -28,6 +28,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum field DeviceLostReason.unknown missing in wgpu.h * Wrote 232 enum mappings and 47 struct-field mappings to rs_mappings.py -* Validated 87 C function calls -* Not using 116 C functions +* Validated 89 C function calls +* Not using 113 C functions * Validated 71 C structs