Update

[ghstack-poisoned]
pytorch · Jan 23, 2025 · b589ce7 · b589ce7
2 parents 94d9426 + d0e434c
commit b589ce7
Show file tree

Hide file tree

Showing 12 changed files with 1,071 additions and 354 deletions.
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
@@ -2,7 +2,7 @@
 
 {% block sidebartitle %}
     <div class="version">
-        <a href='tba'>{{ version }} &#x25BC</a>
+        <a href='index.html'>{{ version }} &#x25BC</a>
     </div>
     {% include "searchbox.html" %}
 {% endblock %}
@@ -22,7 +22,7 @@
     // to point to the torchao repo.
     var overwrite = function (_) {
       if ($(this).length > 0) {
-        $(this)[0].href = "https://github.com/pytorch-labs/ao"
+        $(this)[0].href = "https://github.com/pytorch/ao"
       }
     }
     // PC

diff --git a/docs/source/api_ref_sparsity.rst b/docs/source/api_ref_sparsity.rst
@@ -12,7 +12,7 @@ torchao.sparsity
 
     WandaSparsifier
     PerChannelNormObserver
-    apply_sparse_semi_structured
     apply_fake_sparsity
-
-
+    sparsify_
+    semi_sparse_weight
+    int8_dynamic_activation_int8_semi_sparse_weight
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -3,80 +3,25 @@ Welcome to the torchao Documentation
 
 `torchao <https://github.com/pytorch/ao>`__ is a library for custom data types & optimizations. Quantize and sparsify weights, gradients, optimizers & activations for inference and training using native PyTorch. Please checkout torchao `README <https://github.com/pytorch/ao#torchao-pytorch-architecture-optimization>`__ for an overall introduction to the library and recent highlight and updates. The documentation here will focus on:
 
-1. API Reference 
-2. Developer Contribution Guide
-3. Tutorials
+1. Getting Started
+2. Developer Notes
+3. API Reference
+4. Tutorials
 
-..
-   .. grid:: 3
-
-      .. grid-item-card:: :octicon:`file-code;1em`
-         Getting Started
-         :img-top: _static/img/card-background.svg
-         :link: getting-started.html
-         :link-type: url
-
-         Learn about how to get started with torchao
-         and ts application in your projects.
-
-      .. grid-item-card:: :octicon:`file-code;1em`
-         Concepts
-         :img-top: _static/img/card-background.svg
-         :link: dtypes.html
-         :link-type: url
-
-         Learn about the key torchao concepts such
-         as dtypes, quantization, sparsity, among others.
-
-      .. grid-item-card:: :octicon:`file-code;1em`
-         API Reference
-         :img-top: _static/img/card-background.svg
-         :link: api_ref_intro.html
-         :link-type: url
-
-         A comprehensive reference for the torchao
-         API and its functionalities.
-
-   Tutorials
-   ~~~~~~~~~
-
-   Ready to experiment? Check out some of the
-   torchao tutorials.
-
-   .. customcardstart::
-
-   .. customcarditem::
-      :header: Template Tutorial
-      :card_description: A placeholder template for demo purposes
-      :image: _static/img/generic-pytorch-logo.png
-      :link: tutorials/template_tutorial.html
-      :tags: template
-
-   .. customcardend::
-
-
-.. ----------------------------------------------------------------------
-.. Below is the toctree i.e. it defines the content of the left sidebar.
-.. Each of the entry below corresponds to a file.rst in docs/source/.
-.. ----------------------------------------------------------------------
-
-..
-   .. toctree::
-      :glob:
-      :maxdepth: 1
-      :caption: Getting Started
-      :hidden:
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: Getting Started
 
-      overview
-      getting-started
+   getting-started
+   sparsity
 
-   .. toctree::
-      :glob:
-      :maxdepth: 1
-      :caption: Tutorials
-      :hidden:
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: Developer Notes
 
-      tutorials/template_tutorial
+   contributor_guide
 
 .. toctree::
    :glob:
@@ -86,20 +31,10 @@ Welcome to the torchao Documentation
    api_ref_dtypes
    api_ref_quantization
    api_ref_sparsity
-..
-      api_ref_kernel
-
-.. toctree::
-   :glob:
-   :maxdepth: 1
-   :caption: Contributor Guide
-
-   contributor_guide
 
 .. toctree::
    :glob:
    :maxdepth: 1
    :caption: Tutorials
 
    serialization
-
diff --git a/docs/source/sparsity.rst b/docs/source/sparsity.rst
diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py
@@ -8,7 +8,7 @@
     run_tests,
 )
 
-from torchao.core.config import AOBaseWorkflowConfig
+from torchao.core.config import AOBaseConfig
 from torchao.dtypes import CutlassInt4PackedLayout, Int4CPULayout, SemiSparseLayout
 from torchao.quantization import (
     float8_weight_only,
@@ -25,6 +25,10 @@
     is_sm_at_least_89,
 )
 
+is_cusparselt_available = (
+    hasattr(torch.backends, "cusparselt") and torch.backends.cusparselt.is_available()
+)
+
 
 def get_quantization_functions(
     do_sparse: bool, do_int4: bool, device: str = "cuda", int4_zp_int: bool = False
@@ -94,11 +98,12 @@ def test_tensor_core_layout_transpose(self):
 
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     @common_utils.parametrize(
-        "apply_quant", get_quantization_functions(True, True, "cuda", True)
+        "apply_quant",
+        get_quantization_functions(is_cusparselt_available, True, "cuda", True),
     )
     def test_weights_only(self, apply_quant):
         linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda")
-        if isinstance(apply_quant, AOBaseWorkflowConfig):
+        if isinstance(apply_quant, AOBaseConfig):
             quantize_(linear, apply_quant)
             ql = linear
         else:
@@ -175,12 +180,14 @@ def apply_uint6_weight_only_quant(linear):
 
         deregister_aqt_quantized_linear_dispatch(dispatch_condition)
 
-    @common_utils.parametrize("apply_quant", get_quantization_functions(True, True))
+    @common_utils.parametrize(
+        "apply_quant", get_quantization_functions(is_cusparselt_available, True)
+    )
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     def test_print_quantized_module(self, apply_quant):
         print(apply_quant)
         linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda")
-        if isinstance(apply_quant, AOBaseWorkflowConfig):
+        if isinstance(apply_quant, AOBaseConfig):
             quantize_(linear, apply_quant)
             ql = linear
         else:
@@ -198,7 +205,7 @@ def test_flatten_unflatten(self, device, dtype):
         apply_quant_list = get_quantization_functions(False, True, device)
         for apply_quant in apply_quant_list:
             linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
-            if isinstance(apply_quant, AOBaseWorkflowConfig):
+            if isinstance(apply_quant, AOBaseConfig):
                 quantize_(linear, apply_quant)
             else:
                 ql = apply_quant(linear)

diff --git a/torchao/core/config.py b/torchao/core/config.py
@@ -1,13 +1,10 @@
 import abc
 
 
-# directory location for this might need more polish
-class AOBaseWorkflowConfig(abc.ABC):
+class AOBaseConfig(abc.ABC):
     """
     If a workflow config inherits from this then `quantize_` knows
-    what to do with it.
-
-    TODO write a better docblock.
+    how to a apply it to a model.
     """
 
     pass