diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 65d5fe9b..2899282c 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -19,7 +19,31 @@
 
 
 class Device:
+    """Represent a GPU and act as an entry point for cuda.core features.
 
+    This is a singleton object that helps ensure interoperability
+    across multiple libraries imported in the process to both see
+    and use the same GPU device.
+
+    While acting as the entry point, many other CUDA resources can be
+    allocated such as streams and buffers. Any :obj:`Context` dependent
+    resource created through this device, will continue to refer to
+    this device's context.
+
+    Newly returend :obj:`Device` object are is a thread-local singleton
+    for a specified device.
+
+    Note
+    ----
+    Will not initialize the GPU.
+
+    Parameters
+    ----------
+    device_id : int, optional
+        Device ordinal to return a :obj:`Device` object for.
+        Default value of `None` return the currently used device.
+
+    """
     __slots__ = ("_id", "_mr", "_has_inited")
 
     def __new__(cls, device_id=None):
@@ -54,15 +78,29 @@ def _check_context_initialized(self, *args, **kwargs):
 
     @property
     def device_id(self) -> int:
+        """Return device ordinal."""
         return self._id
 
     @property
     def pci_bus_id(self) -> str:
+        """Return a PCI Bus Id string for this device."""
         bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
         return bus_id[:12].decode()
 
     @property
     def uuid(self) -> str:
+        """Return a UUID for the device.
+
+        Returns 16-octets identifying the device. If the device is in
+        MIG mode, returns its MIG UUID which uniquely identifies the
+        subscribed MIG compute instance.
+
+        Note
+        ----
+        MIG UUID is only returned when device is in MIG mode and the
+        driver is older than CUDA 11.4.
+
+        """
         driver_ver = handle_return(cuda.cuDriverGetVersion())
         if driver_ver >= 11040:
             uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
@@ -74,19 +112,21 @@ def uuid(self) -> str:
 
     @property
     def name(self) -> str:
-        # assuming a GPU name is less than 128 characters...
-        name = handle_return(cuda.cuDeviceGetName(128, self._id))
+        """Return the device name."""
+        # Use 256 characters to be consistent with CUDA Runtime
+        name = handle_return(cuda.cuDeviceGetName(256, self._id))
         name = name.split(b'\0')[0]
         return name.decode()
 
     @property
     def properties(self) -> dict:
+        """Return information about the compute-device."""
         # TODO: pythonize the key names
         return handle_return(cudart.cudaGetDeviceProperties(self._id))
 
     @property
     def compute_capability(self) -> ComputeCapability:
-        """Returns a named tuple with 2 fields: major and minor. """
+        """Return a named tuple with 2 fields: major and minor."""
         major = handle_return(cudart.cudaDeviceGetAttribute(
             cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id))
         minor = handle_return(cudart.cudaDeviceGetAttribute(
@@ -96,12 +136,20 @@ def compute_capability(self) -> ComputeCapability:
     @property
     @precondition(_check_context_initialized)
     def context(self) -> Context:
+        """Return the current :obj:`Context` associated with this device.
+
+        Note
+        ----
+        Device must be initialized.
+
+        """
         ctx = handle_return(cuda.cuCtxGetCurrent())
         assert int(ctx) != 0
         return Context._from_ctx(ctx, self._id)
 
     @property
     def memory_resource(self) -> MemoryResource:
+        """Return :obj:`MemoryResource` associated with this device."""
         return self._mr
 
     @memory_resource.setter
@@ -112,27 +160,53 @@ def memory_resource(self, mr):
 
     @property
     def default_stream(self) -> Stream:
+        """Return default CUDA :obj:`Stream` associated with this device.
+
+        The type of default stream returned depends on if the environment
+        variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
+
+        If set, returns a per-thread default stream. Otherwise returns
+        the legacy stream.
+
+        """
         return default_stream()
 
     def __int__(self):
+        """Return device_id."""
         return self._id
 
     def __repr__(self):
         return f"<Device {self._id} ({self.name})>"
 
     def set_current(self, ctx: Context=None) -> Union[Context, None]:
-        """
-        Entry point of this object. Users always start a code by
+        """Set device to be used for GPU executions.
+
+        Initializes CUDA and sets the calling thread to a valid CUDA
+        context. By default the primary context is used, but optional `ctx`
+        parameter can be used to explicitly supply a :obj:`Context` object.
+
+        Providing a `ctx` causes the previous set context to be popped and returned.
+
+        Parameters
+        ----------
+        ctx : :obj:`Context`, optional
+            Optional context to push onto this device's current thread stack.
+
+        Returns
+        -------
+        Union[:obj:`Context`, None], optional
+            Popped context.
+
+        Examples
+        --------
+        Acts as an entry point of this object. Users always start a code by
         calling this method, e.g.
-        
+
         >>> from cuda.core.experimental import Device
         >>> dev0 = Device(0)
         >>> dev0.set_current()
         >>> # ... do work on device 0 ...
-        
-        The optional ctx argument is for advanced users to bind a
-        CUDA context with the device. In this case, the previously
-        set context is popped and returned to the user.
+
         """
         if ctx is not None:
             if not isinstance(ctx, Context):
@@ -163,25 +237,94 @@ def set_current(self, ctx: Context=None) -> Union[Context, None]:
             self._has_inited = True
 
     def create_context(self, options: ContextOptions = None) -> Context:
-        # Create a Context object (but do NOT set it current yet!).
-        # ContextOptions is a dataclass for setting e.g. affinity or CIG
-        # options. 
+        """Create a new :obj:`Context` object.
+
+        Note
+        ----
+        The newly context will not be set as current.
+
+        Parameters
+        ----------
+        options : :obj:`ContextOptions`, optional
+            Customizable dataclass for context creation options.
+
+        Returns
+        -------
+        :obj:`Context`
+            Newly created context object.
+
+        """
         raise NotImplementedError("TODO")
 
     @precondition(_check_context_initialized)
     def create_stream(self, obj=None, options: StreamOptions=None) -> Stream:
-        # Create a Stream object by either holding a newly created
-        # CUDA stream or wrapping an existing foreign object supporting
-        # the __cuda_stream__ protocol. In the latter case, a reference
-        # to obj is held internally so that its lifetime is managed.
+        """Create a Stream object.
+
+        New stream objects can be created in two different ways:
+
+        1) Create a new CUDA stream with customizable `options`.
+        2) Wrap an existing foreign `obj` supporting the __cuda_stream__ protocol.
+
+        Option (2) internally holds a reference to the foreign object
+        such that the lifetime is managed.
+
+        Note
+        ----
+        Device must be initialized.
+
+        Parameters
+        ----------
+        obj : Any, optional
+            Any object supporting the __cuda_stream__ protocol.
+        options : :obj:`StreamOptions`, optional
+            Customizable dataclass for stream creation options.
+
+        Returns
+        -------
+        :obj:`Stream`
+            Newly created stream object.
+
+        """
         return Stream._init(obj=obj, options=options)
 
     @precondition(_check_context_initialized)
     def allocate(self, size, stream=None) -> Buffer:
+        """Allocate device memory from a specified stream.
+
+        Allocates device memory of `size` bytes on the specified `stream`
+        using the memory resource currently associated with this Device.
+
+        Parameter `stream` is optional, using a default stream by default.
+
+        Note
+        ----
+        Device must be initialized.
+
+        Parameters
+        ----------
+        size : int
+            Number of bytes to allocate.
+        stream : :obj:`Stream`, optional
+            The stream establishing the stream ordering semantic.
+            Default value of `None` uses default stream.
+
+        Returns
+        -------
+        :obj:`Buffer`
+            Newly created buffer object.
+
+        """
         if stream is None:
             stream = default_stream()
         return self._mr.allocate(size, stream)
 
     @precondition(_check_context_initialized)
     def sync(self):
+        """Synchronize the device.
+
+        Note
+        ----
+        Device must be initialized.
+
+        """
         handle_return(cudart.cudaDeviceSynchronize())
diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py
index 3c85d9fe..a6d5da28 100644
--- a/cuda_core/cuda/core/experimental/_event.py
+++ b/cuda_core/cuda/core/experimental/_event.py
@@ -13,17 +13,46 @@
 
 @dataclass
 class EventOptions:
+    """Customizable :obj:`Event` options.
+
+    Attributes
+    ----------
+    enable_timing : bool, optional
+        Event will record timing data. (Default to False)
+    busy_waited_sync : bool, optional
+        If True, event will use blocking synchronization. When a CPU
+        thread calls synchronize, the call will block until the event
+        has actually been completed.
+        Otherwise, the CPU thread will busy-wait until the event has
+        been completed. (Default to False)
+    support_ipc : bool, optional
+        Event will be suitable for interprocess use.
+        Note that enable_timing must be False. (Default to False)
+
+    """
     enable_timing: Optional[bool] = False
     busy_waited_sync: Optional[bool] = False
     support_ipc: Optional[bool] = False
 
 
 class Event:
+    """Represent a record at a specific point of execution within a CUDA stream.
 
+    Applications can asynchronously record events at any point in
+    the program. An event keeps a record of all previous work within
+    the last recorded stream.
+
+    Events can be used to monitor device's progress, query completion
+    of work up to event's record, and help establish dependencies
+    between GPU work submissions.
+
+    Directly creating an :obj:`Event` is not supported due to ambiguity,
+    and they should instead be created through a :obj:`Stream` object.
+
+    """
     __slots__ = ("_handle", "_timing_disabled", "_busy_waited")
 
     def __init__(self):
-        # minimal requirements for the destructor
         self._handle = None
         raise NotImplementedError(
             "directly creating an Event object can be ambiguous. Please call "
@@ -51,37 +80,45 @@ def _init(options: Optional[EventOptions]=None):
         return self
 
     def __del__(self):
+        """Return close(self)"""
         self.close()
 
     def close(self):
-        # Destroy the event.
+        """Destroy the event."""
         if self._handle:
             handle_return(cuda.cuEventDestroy(self._handle))
             self._handle = None
 
     @property
     def is_timing_disabled(self) -> bool:
-        # Check if this instance can be used for the timing purpose.
+        """Return True if the event does not record timing data, otherwise False."""
         return self._timing_disabled
 
     @property
     def is_sync_busy_waited(self) -> bool:
-        # Check if the event synchronization would keep the CPU busy-waiting.
+        """Return True if the event synchronization would keep the CPU busy-waiting, otherwise False."""
         return self._busy_waited
 
     @property
     def is_ipc_supported(self) -> bool:
-        # Check if this instance can be used for IPC.
+        """Return True if this event can be used as an interprocess event, otherwise False."""
         raise NotImplementedError("TODO")
 
     def sync(self):
-        # Sync over the event.
+        """Synchronize until the event completes.
+
+        If the event was created with busy_waited_sync, then the
+        calling CPU thread will block until the event has been
+        completed by the device.
+        Otherwise the CPU thread will busy-wait until the event
+        has been completed.
+
+        """
         handle_return(cuda.cuEventSynchronize(self._handle))
 
     @property
     def is_done(self) -> bool:
-        # Return True if all captured works have been completed,
-        # otherwise False.
+        """Return True if all captured works have been completed, otherwise False."""
         result, = cuda.cuEventQuery(self._handle)
         if result == cuda.CUresult.CUDA_SUCCESS:
             return True
@@ -92,4 +129,5 @@ def is_done(self) -> bool:
 
     @property
     def handle(self) -> int:
+        """Return the underlying cudaEvent_t pointer address as Python int."""
         return int(self._handle)
diff --git a/cuda_core/cuda/core/experimental/_launcher.py b/cuda_core/cuda/core/experimental/_launcher.py
index 4b9533cb..9991638f 100644
--- a/cuda_core/cuda/core/experimental/_launcher.py
+++ b/cuda_core/cuda/core/experimental/_launcher.py
@@ -37,7 +37,23 @@ def _lazy_init():
 
 @dataclass
 class LaunchConfig:
-    """
+    """Customizable launch options.
+
+    Attributes
+    ----------
+    grid : Union[tuple, int]
+        Collection of threads that will execute a kernel function.
+    block : Union[tuple, int]
+        Group of threads (Thread Block) that will execute on the same
+        multiprocessor. Threads within a thread blocks have access to
+        shared memory and can be explicitly synchronized.
+    stream : :obj:`Stream`
+        The stream establishing the stream ordering semantic of a
+        launch.
+    shmem_size : int, optional
+        Dynamic shared-memory size per thread block in bytes.
+        (Default to size 0)
+
     """
     # TODO: expand LaunchConfig to include other attributes
     grid: Union[tuple, int] = None
@@ -87,6 +103,21 @@ def _cast_to_3_tuple(self, cfg):
 
 
 def launch(kernel, config, *kernel_args):
+    """Launches a :obj:`~cuda.core.experimental._module.Kernel`
+    object with launch-time configuration.
+
+    Parameters
+    ----------
+    kernel : :obj:`~cuda.core.experimental._module.Kernel`
+        Kernel to launch.
+    config : :obj:`LaunchConfig`
+        Launch configurations inline with options provided by
+        :obj:`LaunchConfig` dataclass.
+    *kernel_args : Any
+        Variable length argument list that is provided to the
+        launching kernel.
+
+    """
     if not isinstance(kernel, Kernel):
         raise ValueError
     config = check_or_create_options(LaunchConfig, config, "launch config")
diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index 4ef2cbc3..678f26ee 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -22,6 +22,26 @@
 
 
 class Buffer:
+    """Represent a handle to allocated memory.
+
+    This generic object provides a unified representation for how
+    different memory resources are to give access to their memory
+    allocations.
+
+    Support for data interchange mechanisms are provided by
+    establishing both the DLPack and the Python-level buffer
+    protocols.
+
+    Parameters
+    ----------
+    ptr : Any
+        Allocated buffer handle object
+    size : Any
+        Memory size of the buffer
+    mr : :obj:`MemoryResource`, optional
+        Memory resource associated with the buffer
+
+    """
 
     # TODO: handle ownership? (_mr could be None)
     __slots__ = ("_ptr", "_size", "_mr",)
@@ -32,9 +52,23 @@ def __init__(self, ptr, size, mr: MemoryResource=None):
         self._mr = mr
 
     def __del__(self):
-        self.close(default_stream())
+        """Return close(self)."""
+        self.close()
 
     def close(self, stream=None):
+        """Deallocate this buffer asynchronously on the given stream.
+
+        This buffer is released back to their memory resource
+        asynchronously on the given stream.
+
+        Parameters
+        ----------
+        stream : Any, optional
+            The stream object with a __cuda_stream__ protocol to
+            use for asynchronous deallocation. Defaults to using
+            the default stream.
+
+        """
         if self._ptr and self._mr is not None:
             if stream is None:
                 stream = default_stream()
@@ -44,42 +78,56 @@ def close(self, stream=None):
 
     @property
     def handle(self):
+        """Return the buffer handle object."""
         return self._ptr
 
     @property
     def size(self):
+        """Return the memory size of this buffer."""
         return self._size
 
     @property
     def memory_resource(self) -> MemoryResource:
-        # Return the memory resource from which this buffer was allocated.
+        """Return the memory resource associated with this buffer."""
         return self._mr
 
     @property
     def is_device_accessible(self) -> bool:
-        # Check if this buffer can be accessed from GPUs.
+        """Return True if this buffer can be accessed by the GPU, otherwise False."""
         if self._mr is not None:
             return self._mr.is_device_accessible
         raise NotImplementedError
 
     @property
     def is_host_accessible(self) -> bool:
-        # Check if this buffer can be accessed from CPUs.
+        """Return True if this buffer can be accessed by the CPU, otherwise False."""
         if self._mr is not None:
             return self._mr.is_host_accessible
         raise NotImplementedError
 
     @property
     def device_id(self) -> int:
+        """Return the device ordinal of this buffer."""
         if self._mr is not None:
             return self._mr.device_id
         raise NotImplementedError
 
     def copy_to(self, dst: Buffer=None, *, stream) -> Buffer:
-        # Copy from this buffer to the dst buffer asynchronously on the
-        # given stream. The dst buffer is returned. If the dst is not provided,
-        # allocate one from self.memory_resource. Raise an exception if the
-        # stream is not provided.
+        """Copy from this buffer to the dst buffer asynchronously on the given stream.
+
+        Copies the data from this buffer to the provided dst buffer.
+        If the dst buffer is not provided, then a new buffer is first
+        allocated using the associated memory resource before the copy.
+
+        Parameters
+        ----------
+        dst : :obj:`Buffer`
+            Source buffer to copy data from
+        stream : Any
+            Keyword argument specifying the stream for the
+            asynchronous copy
+
+        """
         if stream is None:
             raise ValueError("stream must be provided")
         if dst is None:
@@ -93,8 +141,17 @@ def copy_to(self, dst: Buffer=None, *, stream) -> Buffer:
         return dst
 
     def copy_from(self, src: Buffer, *, stream):
-        # Copy from the src buffer to this buffer asynchronously on the
-        # given stream. Raise an exception if the stream is not provided. 
+        """Copy from the src buffer to this buffer asynchronously on the given stream.
+
+        Parameters
+        ----------
+        src : :obj:`Buffer`
+            Source buffer to copy data from
+        stream : Any
+            Keyword argument specifying the stream for the
+            asynchronous copy
+
+        """
         if stream is None:
             raise ValueError("stream must be provided")
         if src._size != self._size:
@@ -141,7 +198,7 @@ def __buffer__(self, flags: int, /) -> memoryview:
         raise NotImplementedError("TODO")
 
     def __release_buffer__(self, buffer: memoryview, /):
-        # Supporting methond paired with __buffer__.
+        # Supporting method paired with __buffer__.
         raise NotImplementedError("TODO")
 
 
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 60d4db97..8b0ff9a7 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -46,6 +46,15 @@ def _lazy_init():
 
 
 class Kernel:
+    """Represent a compiled kernel that had been loaded onto the device.
+
+    Kernel instances can execution when passed directly into the
+    :func:`~cuda.core.experimental.launch` function.
+
+    Directly creating a :obj:`Kernel` is not supported, and they
+    should instead be created through a :obj:`ObjectCode` object.
+
+    """
 
     __slots__ = ("_handle", "_module",)
 
@@ -65,6 +74,35 @@ def _from_obj(obj, mod):
 
 
 class ObjectCode:
+    """Represent a compiled program that was loaded onto the device.
+
+    This object provides a unified interface for different types of
+    compiled programs that are loaded onto the device.
+
+    Loads the module library with specified module code and JIT options.
+
+    Note
+    ----
+    Usage under CUDA 11.x will only load to the current device
+    context.
+
+    Parameters
+    ----------
+    module : Union[bytes, str]
+        Either a bytes object containing the module to load, or
+        a file path string containing that module for loading.
+    code_type : Any
+        String of the compiled type.
+        Supported options are "ptx", "cubin" and "fatbin".
+    jit_options : Optional
+        Mapping of JIT options to use during module loading.
+        (Default to no options)
+    symbol_mapping : Optional
+        Keyword argument dictionary specifying how symbol names
+        should be mapped before trying to retrieve them.
+        (Default to no mappings)
+
+    """
 
     __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map")
     _supported_code_type = ("cubin", "ptx", "fatbin")
@@ -107,6 +145,19 @@ def __del__(self):
         pass
 
     def get_kernel(self, name):
+        """Return the :obj:`Kernel` of a specified name from this object code.
+
+        Parameters
+        ----------
+        name : Any
+            Name of the kernel to retrieve.
+
+        Returns
+        -------
+        :obj:`Kernel`
+            Newly created kernel object.
+
+        """
         try:
             name = self._sym_map[name]
         except KeyError:
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index ec0778a3..5439c74a 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -8,6 +8,21 @@
 
 
 class Program:
+    """Represent a compilation machinery to process programs into
+    :obj:`~cuda.core.experimental._module.ObjectCode`.
+
+    This object provides a unified interface to multiple underlying
+    compiler libraries. Compilation support is enabled for a wide
+    range of code types and compilation types.
+
+    Parameters
+    ----------
+    code : Any
+        String of the CUDA Runtime Compilation program.
+    code_type : Any
+        String of the code type. Currently only ``"c++"`` is supported.
+
+    """
 
     __slots__ = ("_handle", "_backend", )
     _supported_code_type = ("c++", )
@@ -30,14 +45,40 @@ def __init__(self, code, code_type):
             raise NotImplementedError
 
     def __del__(self):
+        """Return close(self)."""
         self.close()
 
     def close(self):
+        """Destroy this program."""
         if self._handle is not None:
             handle_return(nvrtc.nvrtcDestroyProgram(self._handle))
             self._handle = None
 
     def compile(self, target_type, options=(), name_expressions=(), logs=None):
+        """Compile the program with a specific compilation type.
+
+        Parameters
+        ----------
+        target_type : Any
+            String of the targeted compilation type.
+            Supported options are "ptx", "cubin" and "ltoir".
+        options : Union[List, Tuple], optional
+            List of compilation options associated with the backend
+            of this :obj:`Program`. (Default to no options)
+        name_expressions : Union[List, Tuple], optional
+            List of explicit name expressions to become accessible.
+            (Default to no expressions)
+        logs : Any, optional
+            Object with a write method to receive the logs generated
+            from compilation.
+            (Default to no logs)
+
+        Returns
+        -------
+        :obj:`~cuda.core.experimental._module.ObjectCode`
+            Newly created code object.
+
+        """
         if target_type not in self._supported_target_type:
             raise NotImplementedError
 
@@ -80,8 +121,10 @@ def compile(self, target_type, options=(), name_expressions=(), logs=None):
 
     @property
     def backend(self):
+        """Return the backend type string associated with this program."""
         return self._backend
 
     @property
     def handle(self):
+        """Return the program handle object."""
         return self._handle
diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py
index 95f8ec50..7f50dafd 100644
--- a/cuda_core/cuda/core/experimental/_stream.py
+++ b/cuda_core/cuda/core/experimental/_stream.py
@@ -20,12 +20,38 @@
 
 @dataclass
 class StreamOptions:
+    """Customizable :obj:`Stream` options.
 
+    Attributes
+    ----------
+    nonblocking : bool, optional
+        Stream does not synchronize with the NULL stream. (Default to True)
+    priority : int, optional
+        Stream priority where lower number represents a
+        higher priority. (Default to lowest priority)
+
+    """
     nonblocking: bool = True
     priority: Optional[int] = None
 
 
 class Stream:
+    """Represent a queue of GPU operations that are executed in a specific order.
+
+    Applications use streams to control the order of execution for
+    GPU work. Work within a single stream are executed sequentially.
+    Whereas work across multiple streams can be further controlled
+    using stream priorities and :obj:`Event` managements.
+
+    Advanced users can utilize default streams for enforce complex
+    implicit synchronization behaviors.
+
+    Directly creating a :obj:`Stream` is not supported due to ambiguity.
+    New streams should instead be created through a :obj:`Device`
+    object, or created directly through using an existing handle
+    using Stream.from_handle().
+
+    """
 
     __slots__ = ("_handle", "_nonblocking", "_priority", "_owner", "_builtin",
                  "_device_id", "_ctx_handle")
@@ -74,13 +100,12 @@ def _init(obj=None, *, options: Optional[StreamOptions]=None):
         else:
             flags = cuda.CUstream_flags.CU_STREAM_DEFAULT
 
+        high, low = handle_return(cudart.cudaDeviceGetStreamPriorityRange())
         if priority is not None:
-            high, low = handle_return(
-                cudart.cudaDeviceGetStreamPriorityRange())
             if not (low <= priority <= high):
                 raise ValueError(f"{priority=} is out of range {[low, high]}")
         else:
-            priority = 0
+            priority = high
 
         self._handle = handle_return(
             cuda.cuStreamCreateWithPriority(flags, priority))
@@ -94,9 +119,16 @@ def _init(obj=None, *, options: Optional[StreamOptions]=None):
         return self
 
     def __del__(self):
+        """Return close(self)."""
         self.close()
 
     def close(self):
+        """Destroy the stream.
+
+        Destroy the stream if we own it. Borrowed foreign stream
+        object will instead have their references released.
+
+        """
         if self._owner is None:
             if self._handle and not self._builtin:
                 handle_return(cuda.cuStreamDestroy(self._handle))
@@ -106,15 +138,17 @@ def close(self):
 
     @property
     def __cuda_stream__(self) -> Tuple[int, int]:
+        """Return an instance of a __cuda_stream__ protocol."""
         return (0, int(self._handle))
 
     @property
     def handle(self) -> int:
-        # Return the underlying cudaStream_t pointer address as Python int.
+        """Return the underlying cudaStream_t pointer address as Python int."""
         return int(self._handle)
 
     @property
     def is_nonblocking(self) -> bool:
+        """Return True if this is a nonblocking stream, otherwise False."""
         if self._nonblocking is None:
             flag = handle_return(cuda.cuStreamGetFlags(self._handle))
             if flag == cuda.CUstream_flags.CU_STREAM_NON_BLOCKING:
@@ -125,15 +159,35 @@ def is_nonblocking(self) -> bool:
 
     @property
     def priority(self) -> int:
+        """Return the stream priority."""
         if self._priority is None:
             prio = handle_return(cuda.cuStreamGetPriority(self._handle))
             self._priority = prio
         return self._priority
 
     def sync(self):
+        """Synchronize the stream."""
         handle_return(cuda.cuStreamSynchronize(self._handle))
 
     def record(self, event: Event=None, options: EventOptions=None) -> Event:
+        """Record an event onto the stream.
+
+        Creates an Event object (or reuses the given one) by
+        recording on the stream.
+
+        Parameters
+        ----------
+        event : :obj:`Event`, optional
+            Optional event object to be reused for recording.
+        options : :obj:`EventOptions`, optional
+            Customizable dataclass for event creation options.
+
+        Returns
+        -------
+        :obj:`Event`
+            Newly created event object.
+
+        """
         # Create an Event object (or reusing the given one) by recording
         # on the stream. Event flags such as disabling timing, nonblocking,
         # and CU_EVENT_RECORD_EXTERNAL, can be set in EventOptions.
@@ -145,11 +199,15 @@ def record(self, event: Event=None, options: EventOptions=None) -> Event:
         return event
 
     def wait(self, event_or_stream: Union[Event, Stream]):
-        # Wait for a CUDA event or a CUDA stream to establish a stream order.
-        #
-        # If a Stream instance is provided, the effect is as if an event is
-        # recorded on the given stream, and then self waits on the recorded
-        # event.
+        """Wait for a CUDA event or a CUDA stream.
+
+        Waiting for an event or a stream establishes a stream order.
+
+        If a :obj:`Stream` is provided, then wait until the stream's
+        work is completed. This is done by recording a new :obj:`Event`
+        on the stream and then waiting on it.
+
+        """
         if isinstance(event_or_stream, Event):
             event = event_or_stream.handle
             discard_event = False
@@ -175,12 +233,15 @@ def wait(self, event_or_stream: Union[Event, Stream]):
 
     @property
     def device(self) -> Device:
-        # Inverse look-up to find on which device this stream instance was
-        # created.
-        #
-        # Note that Stream.device.context might not necessarily agree with
-        # Stream.context, in cases where a different CUDA context is set
-        # current after a stream was created.
+        """Return the :obj:`Device` singleton associated with this stream.
+
+        Note
+        ----
+        The current context on the device may differ from this
+        stream's context. This case occurs when a different CUDA
+        context is set current after a stream is created.
+
+        """
         from cuda.core.experimental._device import Device  # avoid circular import
         if self._device_id is None:
             # Get the stream context first
@@ -192,8 +253,7 @@ def device(self) -> Device:
 
     @property
     def context(self) -> Context:
-        # Inverse look-up to find in which CUDA context this stream instance
-        # was created
+        """Return the :obj:`Context` associated with this stream."""
         if self._ctx_handle is None:
             self._ctx_handle = handle_return(
                 cuda.cuStreamGetCtx(self._handle))
@@ -203,6 +263,28 @@ def context(self) -> Context:
 
     @staticmethod
     def from_handle(handle: int) -> Stream:
+        """Create a new :obj:`Stream` object from a foreign stream handle.
+
+        Uses a cudaStream_t pointer address represented as a Python int
+        to create a new :obj:`Stream` object.
+
+        Note
+        ----
+        Stream lifetime is not managed, foreign object must remain
+        alive while this steam is active.
+
+        Parameters
+        ----------
+        handle : int
+            Stream handle representing the address of a foreign
+            stream object.
+
+        Returns
+        -------
+        :obj:`Stream`
+            Newly created stream object.
+
+        """
         class _stream_holder:
             @property
             def __cuda_stream__(self):
@@ -235,6 +317,15 @@ def __init__(self):
 
 
 def default_stream():
+    """Return the default CUDA :obj:`Stream`.
+
+    The type of default stream returned depends on if the environment
+    variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
+
+    If set, returns a per-thread default stream. Otherwise returns
+    the legacy stream.
+
+    """
     # TODO: flip the default
     use_ptds = int(os.environ.get('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', 0))
     if use_ptds:
diff --git a/cuda_core/docs/source/_templates/autosummary/class.rst b/cuda_core/docs/source/_templates/autosummary/class.rst
new file mode 100644
index 00000000..b45a3fd5
--- /dev/null
+++ b/cuda_core/docs/source/_templates/autosummary/class.rst
@@ -0,0 +1,26 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   {% for item in methods %}
+   .. automethod:: {{ item }}
+   {%- endfor %}
+
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   {% for item in attributes %}
+   .. autoattribute:: {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
diff --git a/cuda_core/docs/source/_templates/autosummary/dataclass.rst b/cuda_core/docs/source/_templates/autosummary/dataclass.rst
new file mode 100644
index 00000000..b8c35324
--- /dev/null
+++ b/cuda_core/docs/source/_templates/autosummary/dataclass.rst
@@ -0,0 +1,10 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   .. automethod:: __init__
+   {% endblock %}
+
diff --git a/cuda_core/docs/source/_templates/autosummary/namedtuple.rst b/cuda_core/docs/source/_templates/autosummary/namedtuple.rst
new file mode 100644
index 00000000..d3ad7d24
--- /dev/null
+++ b/cuda_core/docs/source/_templates/autosummary/namedtuple.rst
@@ -0,0 +1,8 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members: __new__
+   :special-members: __new__
+   :exclude-members: count, index, __reduce__, __reduce_ex__, __repr__, __hash__, __str__, __getnewargs__
\ No newline at end of file
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index 756ed776..1cb9811b 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -3,6 +3,11 @@
 ``cuda.core.experimental`` API Reference
 ========================================
 
+All of the APIs listed (or cross-referenced from) below are considered *experimental*
+and subject to future changes without deprecation notice. Once stablized they will be
+moved out of the ``experimental`` namespace.
+
+
 CUDA runtime
 ------------
 
@@ -10,6 +15,14 @@ CUDA runtime
    :toctree: generated/
 
    Device
+   launch
+
+   :template: dataclass.rst
+
+   EventOptions
+   StreamOptions
+   LaunchConfig
+
 
 CUDA compilation toolchain
 --------------------------
diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst
new file mode 100644
index 00000000..f100eb7c
--- /dev/null
+++ b/cuda_core/docs/source/api_private.rst
@@ -0,0 +1,28 @@
+:orphan:
+
+.. This page is to generate documentation for private classes exposed to users,
+   i.e., users cannot instantiate it by themselves but may use it's properties
+   or methods via returned values from public APIs. These classes must be referred
+   in public APIs returning their instances.
+
+.. currentmodule:: cuda.core.experimental
+
+CUDA runtime
+------------
+
+.. autosummary::
+   :toctree: generated/
+
+   _memory.Buffer
+   _stream.Stream
+   _event.Event
+
+
+CUDA compilation toolchain
+--------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   _module.Kernel
+   _module.ObjectCode
diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py
index 4be77656..5b28d331 100644
--- a/cuda_core/docs/source/conf.py
+++ b/cuda_core/docs/source/conf.py
@@ -34,7 +34,8 @@
     'sphinx.ext.autosummary',
 	'sphinx.ext.napoleon',
 	'myst_nb',
-	'enum_tools.autoenum'
+	'enum_tools.autoenum',
+    'sphinx_copybutton',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -77,3 +78,6 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
+
+# skip cmdline prompts
+copybutton_exclude = '.linenos, .gp'
diff --git a/cuda_core/docs/source/install.md b/cuda_core/docs/source/install.md
index 593f7225..e13f37df 100644
--- a/cuda_core/docs/source/install.md
+++ b/cuda_core/docs/source/install.md
@@ -5,25 +5,19 @@
 `cuda.core` is supported on all platforms that CUDA is supported. Specific
 dependencies are as follows:
 
-* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
-* CUDA Toolkit 12.0 to 12.6
+|                   | CUDA 11      | CUDA 12     |
+|------------------ | ------------ | ----------- |
+| CUDA Toolkit [^1] | 11.2 - 11.8  | 12.0 - 12.6 |
+| Driver            | 450.80.02+ (Linux), 452.39+ (Windows) | 525.60.13+ (Linux), 527.41+ (Windows) |
 
-
-## Installing from PyPI
-
-Coming soon!
-
-
-## Installing from Conda
-
-Coming soon!
+[^1]: Including `cuda-python`.
 
 
 ## Installing from Source
 
-```shell
+```console
 $ git clone https://github.com/NVIDIA/cuda-python
 $ cd cuda-python/cuda_core
 $ pip install .
 ```
-For now `cuda-python` (`cuda-bindings` later) is a required dependency.
+For now `cuda-python` (`cuda-bindings` later) 11.x or 12.x is a required dependency.
diff --git a/cuda_core/docs/source/release/0.1.0-notes.md b/cuda_core/docs/source/release/0.1.0-notes.md
index 2247e0cb..1ebb41f9 100644
--- a/cuda_core/docs/source/release/0.1.0-notes.md
+++ b/cuda_core/docs/source/release/0.1.0-notes.md
@@ -1,15 +1,17 @@
-# ``cuda.core`` Release notes
+# `cuda.core` Release notes
 
-Released on Oct XX, 2024
+Released on Nov XX, 2024
 
 ## Hightlights
-- Initial beta 1 release
+- Initial EA1 (early access) release
 - Supports all platforms that CUDA is supported
-- Supports all CUDA 12.x drivers
-- Supports all CUDA 12.x Toolkits
+- Supports all CUDA 11.x/12.x drivers
+- Supports all CUDA 11.x/12.x Toolkits
 - Pythonic CUDA runtime and other core functionalities
 
 ## Limitations
 
-- Source code release only; Python packages coming in a future release
-- Support for CUDA 11.x coming in the next release
+- All APIs are currently *experimental* and subject to change without deprecation notice.
+  Please kindly share your feedbacks with us so that we can make `cuda.core` better!
+- Source code release only; `pip`/`conda` support is coming in a future release
+- Windows TCC mode is [not yet supported](https://github.com/NVIDIA/cuda-python/issues/206)
diff --git a/cuda_python/docs/environment-docs.yml b/cuda_python/docs/environment-docs.yml
index 2a3a8ad3..bc9588fd 100644
--- a/cuda_python/docs/environment-docs.yml
+++ b/cuda_python/docs/environment-docs.yml
@@ -11,6 +11,7 @@ dependencies:
   - pytest
   - scipy
   - sphinx
+  - sphinx-copybutton
   - pip:
     - furo
     - myst-nb