diff --git a/Project.toml b/Project.toml index 21819ea..61b5e0f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,21 +1,34 @@ name = "Hwloc" uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d" authors = ["Erik Schnetter "] -version = "3.0.0" +version = "3.1.1" [deps] CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82" Hwloc_jll = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" + +[weakdeps] +AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" [compat] +CEnum = "≥ 0.4" Hwloc_jll = "2.8" -julia = "1.6" -CEnum = "0.4" +AbstractTrees = "≥ 0.4.4" +julia = "≥ 1.6" [extras] CpuId = "adafc99b-e345-5852-983c-f28acb93d879" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" + +[extensions] +# * name of extension to the left +# * extension dependencies required to load the extension to the right +# * use a list for multiple extension dependencies +HwlocTrees = "AbstractTrees" [targets] -test = ["Test", "CpuId"] +test = ["Test", "CpuId", "AbstractTrees"] diff --git a/README.md b/README.md index 69aeb33..b3d2719 100644 --- a/README.md +++ b/README.md @@ -24,28 +24,37 @@ On my laptop this gives the following output: julia> using Hwloc julia> topology() -Machine (16.0 GB) - Package L#0 P#0 (16.0 GB) - NUMANode (16.0 GB) + +Machine (31.05 GB) + Package L#0 P#0 (31.05 GB) + NUMANode (31.05 GB) L3 (12.0 MB) - L2 (256.0 kB) + L1 (32.0 kB) + Core L#0 P#0 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#0 P#0 PU L#0 P#0 - PU L#1 P#1 - L2 (256.0 kB) + L1 (32.0 kB) + Core L#1 P#1 - PU L#2 P#2 - PU L#3 P#3 - L2 (256.0 kB) + L1 (32.0 kB) + Core L#2 P#2 - PU L#4 P#4 - PU L#5 P#5 - L2 (256.0 kB) + L1 (32.0 kB) + Core L#3 P#3 - PU L#6 P#6 + PU L#1 P#4 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#1 P#1 + PU L#2 P#1 + PU L#3 P#5 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#2 P#2 + PU L#4 P#2 + PU L#5 P#6 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#3 P#3 + PU L#6 P#3 PU L#7 P#7 - L2 (256.0 kB) + L1 (32.0 kB) + Core L#4 P#4 - PU L#8 P#8 - PU L#9 P#9 - L2 (256.0 kB) + L1 (32.0 kB) + Core L#5 P#5 - PU L#10 P#10 - PU L#11 P#11 + HostBridge + PCI 00:02.0 (VGA) + GPU "renderD128" + GPU "card0" + PCIBridge + PCI 01:00.0 (NVMExp) + Block(Disk) "nvme0n1" + PCIBridge + PCI 72:00.0 (Network) + Net "wlp114s0" + PCIBridge + PCI 73:00.0 (Other) + Block "mmcblk0" + ``` Often, one is only interested in a summary of this topology. @@ -53,21 +62,67 @@ The function `topology_info()` provides such a compact description, which is loo ```julia julia> topology_info() -Machine: 1 (16.0 GB) - Package: 1 (16.0 GB) - NUMANode: 1 (16.0 GB) +Machine: 1 (31.05 GB) + Package: 1 (31.05 GB) + NUMANode: 1 (31.05 GB) L3Cache: 1 (12.0 MB) - L2Cache: 6 (256.0 KB) - L1Cache: 6 (32.0 KB) - Core: 6 - PU: 12 + L2Cache: 4 (1.25 MB) + L1Cache: 4 (48.0 kB) + Core: 4 + PU: 8 + Bridge: 6 + PCI_Device: 22 + OS_Device: 13 ``` If you prefer a more verbose graphical visualization you may consider using `topology_graphical()`: -Screenshot 2022-09-27 at 12 06 57 - -(Note that as of now this may not produce colorful output on all systems.) +``` +julia> topology_graphical() +┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Machine (31GB total) │ +│ │ +│ ┌────────────────────────────────────────────────────────────────────┐ ├┤╶─┬─────┬─────────────┐ │ +│ │ Package L#0 │ │ │ PCI 00:02.0 │ │ +│ │ │ │ └─────────────┘ │ +│ │ ┌────────────────────────────────────────────────────────────────┐ │ │ │ +│ │ │ NUMANode L#0 P#0 (31GB) │ │ ├─────┼┤╶───────┬───────────────────┐ │ +│ │ └────────────────────────────────────────────────────────────────┘ │ │3.9 3.9 │ PCI 01:00.0 │ │ +│ │ │ │ │ │ │ +│ │ ┌────────────────────────────────────────────────────────────────┐ │ │ │ ┌───────────────┐ │ │ +│ │ │ L3 (12MB) │ │ │ │ │ Block nvme0n1 │ │ │ +│ │ └────────────────────────────────────────────────────────────────┘ │ │ │ │ │ │ │ +│ │ │ │ │ │ 953 GB │ │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ └───────────────┘ │ │ +│ │ │ L2 (1280KB) │ │ L2 (1280KB) │ │ L2 (1280KB) │ │ L2 (1280KB) │ │ │ └───────────────────┘ │ +│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ │ +│ │ │ ├─────┼┤╶───────┬──────────────────┐ │ +│ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ │0.6 0.6 │ PCI 72:00.0 │ │ +│ │ │ L1d (48KB) │ │ L1d (48KB) │ │ L1d (48KB) │ │ L1d (48KB) │ │ │ │ │ │ +│ │ └────────────┘ └────────────┘ └────────────┘ └────────────┘ │ │ │ ┌──────────────┐ │ │ +│ │ │ │ │ │ Net wlp114s0 │ │ │ +│ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ │ │ └──────────────┘ │ │ +│ │ │ L1i (32KB) │ │ L1i (32KB) │ │ L1i (32KB) │ │ L1i (32KB) │ │ │ └──────────────────┘ │ +│ │ └────────────┘ └────────────┘ └────────────┘ └────────────┘ │ │ │ +│ │ │ └─────┼┤╶───────┬───────────────┐ │ +│ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ 1.0 │ Block mmcblk0 │ │ +│ │ │ Core L#0 │ │ Core L#1 │ │ Core L#2 │ │ Core L#3 │ │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ 238 GB │ │ +│ │ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │ └───────────────┘ │ +│ │ │ │ PU L#0 │ │ │ │ PU L#2 │ │ │ │ PU L#4 │ │ │ │ PU L#6 │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ │ P#0 │ │ │ │ P#1 │ │ │ │ P#2 │ │ │ │ P#3 │ │ │ │ +│ │ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │ │ +│ │ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │ │ +│ │ │ │ PU L#1 │ │ │ │ PU L#3 │ │ │ │ PU L#5 │ │ │ │ PU L#7 │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ │ P#4 │ │ │ │ P#5 │ │ │ │ P#6 │ │ │ │ P#7 │ │ │ │ +│ │ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │ │ +│ │ └────────────┘ └────────────┘ └────────────┘ └────────────┘ │ │ +│ └────────────────────────────────────────────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` +(Note that as of now this may produce colorful output on some systems.) ## Obtaining particular information: @@ -95,18 +150,20 @@ One may also use `getinfo()` to programmatically access some of the output of `t ```julia julia> getinfo() -Dict{Symbol,Int64} with 8 entries: - :L2Cache => 6 - :NUMANode => 1 - :Core => 6 - :Package => 1 - :L1Cache => 6 - :Machine => 1 - :PU => 12 - :L3Cache => 1 +Dict{Symbol, Int64} with 11 entries: + :Package => 1 + :PU => 8 + :OS_Device => 13 + :Core => 4 + :L3Cache => 1 + :Machine => 1 + :PCI_Device => 22 + :L2Cache => 4 + :NUMANode => 1 + :Bridge => 6 + :L1Cache => 4 ``` - ### Cache properties Assuming that multiple caches of the same level (e.g. L1) have identical properties, one can use the convenience functions `cachesize()` and `cachelinesize()` to obtain the relevant sizes in Bytes: @@ -190,10 +247,151 @@ julia> collectobjects(:PU, l2cache) Hwloc.Object: PU ``` -### Manual topology query +### Manual topology query and caching On the first call of `gettopology()`, Hwloc.jl examines the current machine's hardware topology and caches the result in `Hwloc.machine_topology`. -To manually query the system topology one may use `Hwloc.topology_load` -which directly `ccall`s into `libhwloc` and directly returns the -resulting `Hwloc.Object`. + +To query the system the system topology again -- i.e. not using the cached +`Hwloc.Object` representing the entire machine -- simply pass the `reload=true` (`false` by default) kwarg: + +```julia +julia> topo = gettopology(;reload=true) +Hwloc.Object: Machine +``` + +### Do not include I/O devices in topology object + +You may prefer not to include I/O devices in you Hwloc tree, then we recommend +passing the `io=false` (`true` by default) kwarg, in addition to `reload` (cf. +above): + +```julia +julia> topo = gettopology(;reload=true, io=false) +Hwloc.Object: Machine + +julia> topology(topo) +Machine (31.05 GB) + Package L#0 P#0 (31.05 GB) + NUMANode (31.05 GB) + L3 (12.0 MB) + L2 (1.25 MB) + L1 (48.0 kB) + Core L#0 P#0 + PU L#0 P#0 + PU L#1 P#4 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#1 P#1 + PU L#2 P#1 + PU L#3 P#5 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#2 P#2 + PU L#4 P#2 + PU L#5 P#6 + L2 (1.25 MB) + L1 (48.0 kB) + Core L#3 P#3 + PU L#6 P#3 + PU L#7 P#7 +``` +(note: to avoid caching by accident, we recommend passing `reload=true` to +`gettopology`) + +### Low-level API for accessing the underlying topology object. + +**Warning:** As discussed earlier, `Hwloc.jl` makes heavy use of caching in the +high-level API. Using the low-level and high-level APIs together can result in +cached values being used by accident! We therefore recommend that the high-level +`gettopology` funcion is used, where caching is controlled via the `reload` +kwarg. + +Under the hood, `gettopology` uses `Hwloc.topology_init` and +`Hwloc.topology_load` to directly `ccall` into `libhwloc`. `Hwloc.topology_init` +is reponsible for creating a low-level `LibHwloc.hwloc_topology` object. +`Hwloc.topology_load` wraps this a `Hwloc.Object` Julia object. + +**Note:** `Hwloc.topology_load` is destructive to the `LibHwloc.hwloc_topology` +object: + +```julia +julia> htopo = Hwloc.topology_init() +Ptr{Hwloc.LibHwloc.hwloc_topology} @0x000000000883cf60 + +julia> topo = Hwloc.topology_load(htopo) +Hwloc.Object: Machine + +julia> topo = Hwloc.topology_load(htopo) +ERROR: AssertionError: ierr == 0 +Stacktrace: + [1] topology_load(htopo::Ptr{Hwloc.LibHwloc.hwloc_topology}) + @ Hwloc ~/.julia/dev/Hwloc/src/lowlevel_api.jl:347 + [2] top-level scope + @ REPL[78]:1 +``` + +This is because `LibHwloc.hwloc_topology` are not garbage-collected (a call to +`Hwloc.topology_init`, without a later call to `Hwloc.hwloc_topology_destroy` +will leak memory). This is why `Hwloc.topology_load` calls +`Hwloc.hwloc_topology_destroy` after creating the `Hwloc.Object` Julia object +(which is garbage collected!). + + +## Hwloc objects are `AbstractTrees` + +If the [`AbstractTrees`](https://github.com/JuliaCollections/AbstractTrees.jl) +module is loaded, then passing an `Hwloc.Object` to `AbstractTrees.children` +will construct an `HwlocTreeNode`. Calling `children(gettopology())` will +return the Hwloc tree root: + +```julia +julia> using AbstractTrees, Hwloc + +julia> t = children(gettopology()); + +julia> print_tree(t; maxdepth=2) +Hwloc.Object: Machine +├─ Hwloc.Object: Package [L#0 P#0] +│ ├─ Hwloc.Object: L3Cache +│ │ ⋮ +│ │ +│ └─ Hwloc.Object: NUMANode +└─ Hwloc.Object: Bridge [HostBridge] + ├─ Hwloc.Object: PCI_Device [00:00.0 (HostBridge)] + ├─ Hwloc.Object: PCI_Device [00:02.0 (VGA)] + │ ⋮ + │ + ├─ Hwloc.Object: PCI_Device [00:04.0 (SignalProcessing)] + ├─ Hwloc.Object: Bridge [PCIBridge] + │ ⋮ + │ + ├─ Hwloc.Object: Bridge [PCIBridge] + ├─ Hwloc.Object: Bridge [PCIBridge] + ├─ Hwloc.Object: PCI_Device [00:0a.0 (SignalProcessing)] + ├─ Hwloc.Object: PCI_Device [00:0d.0 (USB)] + ├─ Hwloc.Object: PCI_Device [00:0d.2 (USB)] + ├─ Hwloc.Object: PCI_Device [00:0d.3 (USB)] + ├─ Hwloc.Object: PCI_Device [00:12.0 (Serial)] + ├─ Hwloc.Object: PCI_Device [00:14.0 (USB)] + ├─ Hwloc.Object: PCI_Device [00:14.2 (RAM)] + ├─ Hwloc.Object: PCI_Device [00:15.0 (SerialBus)] + │ ⋮ + │ + ├─ Hwloc.Object: PCI_Device [00:15.1 (SerialBus)] + │ ⋮ + │ + ├─ Hwloc.Object: PCI_Device [00:16.0 (Communication)] + ├─ Hwloc.Object: PCI_Device [00:19.0 (SerialBus)] + │ ⋮ + │ + ├─ Hwloc.Object: PCI_Device [00:19.1 (SerialBus)] + │ ⋮ + │ + ├─ Hwloc.Object: Bridge [PCIBridge] + │ ⋮ + │ + ├─ Hwloc.Object: Bridge [PCIBridge] + │ ⋮ + │ + ├─ Hwloc.Object: PCI_Device [00:1f.0 (ISABridge)] + ├─ Hwloc.Object: PCI_Device [00:1f.3 (MultimediaAudio)] + ├─ Hwloc.Object: PCI_Device [00:1f.4 (SMBus)] + └─ Hwloc.Object: PCI_Device [00:1f.5 (SerialBus)] + +``` + +For examples of using the AbstracTree interface to search the Hwloc tree, see: +[NetworkInterfaceControllers.jl](https://github.com/JuliaParallel/NetworkInterfaceControllers.jl) \ No newline at end of file diff --git a/ext/HwlocTrees.jl b/ext/HwlocTrees.jl new file mode 100644 index 0000000..3e02f7c --- /dev/null +++ b/ext/HwlocTrees.jl @@ -0,0 +1,91 @@ +module HwlocTrees + +using Hwloc, Printf +import AbstractTrees + +mutable struct HwlocTreeNode{T} + object::Hwloc.Object + type::Symbol + tag::Union{Nothing, T} + + parent::Union{Nothing, HwlocTreeNode{T}} + children::Vector{HwlocTreeNode{T}} + memory_children::Vector{HwlocTreeNode{T}} + io_children::Vector{HwlocTreeNode{T}} + + function HwlocTreeNode{T}(obj::Hwloc.Object; parent=nothing, type=nothing) where {T} + this = new{T}(obj, obj.type_, nothing, parent) + + this.children = HwlocTreeNode{T}.(obj.children; parent=this) + this.memory_children = HwlocTreeNode{T}.(obj.memory_children; parent=this) + this.io_children = HwlocTreeNode{T}.(obj.io_children; parent=this) + + return this + end +end + +function AbstractTrees.children(node::Hwloc.Object) + HwlocTreeNode{UInt8}(node) +end + +function AbstractTrees.children(node::HwlocTreeNode) + tuple(node.children..., node.memory_children..., node.io_children...) +end + +AbstractTrees.nodevalue(n::HwlocTreeNode) = n.object + +AbstractTrees.ParentLinks(::Type{<:HwlocTreeNode}) = AbstractTrees.StoredParents() + +AbstractTrees.parent(n::HwlocTreeNode) = n.parent + +AbstractTrees.NodeType(::Type{<:HwlocTreeNode{T}}) where {T} = AbstractTrees.HasNodeType() +AbstractTrees.nodetype(::Type{<:HwlocTreeNode{T}}) where {T} = HwlocTreeNode{T} + +function AbstractTrees.printnode(io::IO, node::HwlocTreeNode) + obj = AbstractTrees.nodevalue(node) + label = string(obj) + if node.type in (:Package, :Core, :PU) + label = label * " [L#$(obj.logical_index) P#$(obj.os_index)]" + elseif node.type == :Bridge + if obj.attr.upstream_type == Hwloc.LibHwloc.HWLOC_OBJ_BRIDGE_HOST + label = label * " [HostBridge]" + else + label = label * " [PCIBridge]" + end + elseif node.type == :PCI_Device + class_string = Hwloc.LibHwlocExtensions.hwloc_pci_class_string(obj.attr.class_id) + label = label * " [" * @sprintf( + "%s%02x:%02x.%01x", + Char(obj.attr.domain), obj.attr.bus, obj.attr.dev, obj.attr.func + ) * " ($(class_string))]" + elseif node.type == :OS_Device + label = label * " [" * if obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_BLOCK + "Block$(Hwloc.subtype_str(obj))" + elseif obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_GPU + "GPU" + elseif obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_NETWORK + "Net" + elseif obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_OPENFABRICS + "OpenFabrics" + elseif obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_DMA + "DMA" + elseif obj.attr.type == Hwloc.LibHwloc.HWLOC_OBJ_OSDEV_COPROC + "CoProc$(subtype_str(obj))" + else + string(obj.attr) + end * " \"$(obj.name)\"]" + end + print(io, label) +end + +get_nodes(tree_node, type) = filter( + x->x.type == type, + collect(AbstractTrees.PreOrderDFS(tree_node)) +) + +function tag_subtree!(tree_node, val) + for n in collect(AbstractTrees.PreOrderDFS(tree_node)) + n.tag = val + end +end +end \ No newline at end of file diff --git a/src/Hwloc.jl b/src/Hwloc.jl index 9bcbc00..6911f5e 100644 --- a/src/Hwloc.jl +++ b/src/Hwloc.jl @@ -5,6 +5,7 @@ using Statistics import Base: show, IteratorSize, IteratorEltype, isempty, eltype, iterate include("libhwloc.jl") +include("libhwloc_extensions.jl") include("lowlevel_api.jl") include("highlevel_api.jl") @@ -15,4 +16,9 @@ export hwloc_typeof, hwloc_isa, collectobjects const machine_topology = Ref{Object}() +# Compatibility with older Julia versions + module extensions: +if !isdefined(Base, :get_extension) + include(joinpath("..", "ext", "HwlocTrees.jl")) +end + end diff --git a/src/highlevel_api.jl b/src/highlevel_api.jl index d8d8329..371ec38 100644 --- a/src/highlevel_api.jl +++ b/src/highlevel_api.jl @@ -1,5 +1,8 @@ -using ..LibHwloc: hwloc_get_api_version +using ..LibHwloc: hwloc_get_api_version, HWLOC_OBJ_BRIDGE_HOST, + HWLOC_OBJ_OSDEV_BLOCK, HWLOC_OBJ_OSDEV_GPU, HWLOC_OBJ_OSDEV_NETWORK, + HWLOC_OBJ_OSDEV_OPENFABRICS, HWLOC_OBJ_OSDEV_DMA, HWLOC_OBJ_OSDEV_COPROC +using Printf """ Returns the API version of libhwloc. @@ -14,27 +17,110 @@ function get_api_version() VersionNumber(major, minor, patch) end +const minimal_classes = [ + "VGA", "NVMExp", "SATA", "Network", "Ethernet", "InfiniBand", "3D", "Other" +] +subtype_str(obj) = obj.subtype == "" ? "" : "($(obj.subtype))" + +function is_visible(obj::Object; minimal=true) + t = hwloc_typeof(obj) + + if t == :Bridge + for child in obj.io_children + if is_visible(child) + return true + end + end + return false + end + + if t == :PCI_Device + if minimal + class_string = hwloc_pci_class_string(obj.attr.class_id) + return class_string in minimal_classes + else + return true + end + end + + return true +end + """ - print_topology([io::IO = stdout, obj::Object = gettopology()]) + print_topology( + io::IO = stdout, obj::Object = gettopology(); + indent = "", newline = true, prefix = "", minimal=true + ) -Prints the topology of the given `obj` as a tree to `io`. +Prints the topology of the given `obj` as a tree to `io`. + +**Note:** some systems have a great deal of extra PCI devices (think USB +bridges, and the many many device classes on custom systems like HPC clusters). +In order to mimmic the behaviour of the `lstopo` command, we ommit these devices +unless `minimal=false`. """ -function print_topology(io::IO = stdout, obj::Object = gettopology(); indent = "", newline = false, prefix = "") +function print_topology( + io::IO = stdout, obj::Object = gettopology(); + indent = "", newline = true, prefix = "", minimal=true + ) t = hwloc_typeof(obj) + idxstr = t in (:Package, :Core, :PU) ? "L#$(obj.logical_index) P#$(obj.os_index) " : "" attrstr = string(obj.attr) + # this is set to false whenever minimal == true and the PCI class_id strings + # don't match the minimal_classes list + print_device = is_visible(obj; minimal=minimal) + if t in (:L1Cache, :L2Cache, :L3Cache, :L1ICache) tstr = first(string(t), 2) attrstr = "("*_bytes2string(obj.attr.size)*")" + elseif t == :Bridge + if obj.attr.upstream_type == HWLOC_OBJ_BRIDGE_HOST + tstr = "HostBridge" + attrstr = "" + else + tstr = "PCIBridge" + attrstr = "" + end + elseif t == :PCI_Device + class_string = hwloc_pci_class_string(obj.attr.class_id) + tstr = "PCI" + attrstr = @sprintf( + "%s%02x:%02x.%01x", + Char(obj.attr.domain), obj.attr.bus, obj.attr.dev, obj.attr.func + ) * " ($(class_string))" + elseif t == :OS_Device + attrstr = "\"$(obj.name)\"" + tstr = if obj.attr.type == HWLOC_OBJ_OSDEV_BLOCK + "Block$(subtype_str(obj))" + elseif obj.attr.type == HWLOC_OBJ_OSDEV_GPU + "GPU" + elseif obj.attr.type == HWLOC_OBJ_OSDEV_NETWORK + "Net" + elseif obj.attr.type == HWLOC_OBJ_OSDEV_OPENFABRICS + "OpenFabrics" + elseif obj.attr.type == HWLOC_OBJ_OSDEV_DMA + "DMA" + elseif obj.attr.type == HWLOC_OBJ_OSDEV_COPROC + "CoProc$(subtype_str(obj))" + else + string(obj.attr) + end else tstr = string(t) + attrstr = obj.name end - newline && print(io, "\n", indent) - print(io, prefix, tstr, " ", - idxstr, - attrstr, obj.mem > 0 ? "("*_bytes2string(obj.mem)*")" : "") + if print_device + newline && print(io, "\n", indent) + print( + io, prefix, tstr, " ", idxstr, attrstr, + obj.mem > 0 ? "("*_bytes2string(obj.mem)*")" : "" + ) + else + return nothing + end for memchild in obj.memory_children memstr = "("*_bytes2string(memchild.mem)*")" @@ -45,11 +131,25 @@ function print_topology(io::IO = stdout, obj::Object = gettopology(); indent = " for child in obj.children no_newline = length(obj.children)==1 && t in (:L3Cache, :L2Cache, :L1Cache) if no_newline - print_topology(io, child; indent = indent, newline=false, prefix = " + ", ) + print_topology( + io, child; + indent = indent, newline=false, prefix = " + ", minimal=minimal + ) else - print_topology(io, child; indent = indent*repeat(" ", 4), newline=true) + print_topology( + io, child; + indent = indent*repeat(" ", 4), newline=true, minimal=minimal + ) end end + + for child in obj.io_children + print_topology( + io, child; + indent=indent*repeat(" ", 4), newline=true, minimal=minimal + ) + end + return nothing end print_topology(obj::Object) = print_topology(stdout, obj) @@ -57,12 +157,15 @@ print_topology(obj::Object) = print_topology(stdout, obj) """ Returns the top-level system topology `Object`. -On first call, it loads the topology by querying -libhwloc and caches the result. +On first call, it loads the topology by querying libhwloc and caches the result. +Pass `reload=true` in order to force reload. """ -function gettopology() - if !isassigned(machine_topology) - machine_topology[] = topology_load() +function gettopology(htopo=nothing; reload=false, io=true) + if reload || (!isassigned(machine_topology)) + if isnothing(htopo) + htopo=topology_init(;io=io) + end + machine_topology[] = topology_load(htopo) end return machine_topology[] @@ -71,14 +174,16 @@ end """ Prints the system topology as a tree. """ -topology() = print_topology(gettopology()) +topology(topo=gettopology()) = print_topology(topo) """ + topology_info(topo=gettopology()) + Prints a summary of the system topology (loosely similar to `hwloc-info`). """ -function topology_info() +function topology_info(topo=gettopology()) nodes = Tuple{Symbol, Int64, String}[] - for subobj in gettopology() + for subobj in topo idx = findfirst(t->t[1] == subobj.type_, nodes) if isnothing(idx) attrstr = "" @@ -97,21 +202,25 @@ function topology_info() end """ + getinfo(topo=gettopology(); list_all=false) + Programmatic version of `topology_info()`. Returns a `Dict{Symbol,Int}` whose entries indicate which and how often certain hwloc elements are present. -If the keyword argument `list_all` (default: `false`) is set to `true`, -the resulting dictionary will contain all possible hwloc elements. +If the `list_all` kwarg is `true`, then the results Dict will have a key for +each Hwloc type. **Warning:** a zero count does not necessarily mean that such +a device is not present -- e.g. the following +``` +getinfo(gettopology(;reload=true, io=false); list_all=true) +``` +will show a `PCI_Device` count of zero, even though those devices are present +(the zero count is due to the `io=false` kwarg passed to `gettopology`). """ -function getinfo(; list_all::Bool = false) +function getinfo(topo=gettopology(); list_all=false) res = list_all ? Dict{Symbol,Int}(t => 0 for t in obj_types) : Dict{Symbol, Int}() - for subobj in gettopology() + for subobj in topo t = hwloc_typeof(subobj) - if t in keys(res) - res[t] += 1 - else - res[t] = 1 - end + res[t] = get!(res, t, 0) + 1 end return res end @@ -305,7 +414,11 @@ The quality of the result might depend on the used terminal and might vary betwe **Note:** The specific visualization may change between minor versions. """ -function topology_graphical() - run(`$(lstopo_no_graphics()) --no-io --no-legend --of txt`) +function topology_graphical(;io=true) + if io + run(`$(lstopo_no_graphics()) --no-legend --of txt`) + else + run(`$(lstopo_no_graphics()) --no-io --no-legend --of txt`) + end return nothing end diff --git a/src/libhwloc.jl b/src/libhwloc.jl index 3bc26aa..4510d1e 100644 --- a/src/libhwloc.jl +++ b/src/libhwloc.jl @@ -235,6 +235,12 @@ const hwloc_nodeset_t = hwloc_bitmap_t const hwloc_const_nodeset_t = hwloc_const_bitmap_t +# TODO: do we need this? +# struct hwloc_obj_memory_page_type_s +# size::Culonglong +# count::Culonglong +# end + @cenum hwloc_obj_type_t::UInt32 begin HWLOC_OBJ_MACHINE = 0 HWLOC_OBJ_PACKAGE = 1 diff --git a/src/libhwloc_extensions.jl b/src/libhwloc_extensions.jl new file mode 100644 index 0000000..9a9984a --- /dev/null +++ b/src/libhwloc_extensions.jl @@ -0,0 +1,17 @@ +module LibHwlocExtensions + +using ..LibHwloc: libhwloc + +using CEnum + +function hwloc_pci_class_string(class_id) + val = ccall( + (:hwloc_pci_class_string, libhwloc), + Ptr{Cchar}, + (Cushort,), + class_id + ) + return unsafe_string(val) +end + +end \ No newline at end of file diff --git a/src/lowlevel_api.jl b/src/lowlevel_api.jl index d685e22..9359ca2 100644 --- a/src/lowlevel_api.jl +++ b/src/lowlevel_api.jl @@ -2,10 +2,18 @@ using ..LibHwloc: hwloc_cpuset_t, hwloc_nodeset_t, hwloc_obj_type_t, hwloc_obj_cache_type_t, hwloc_obj_bridge_type_t, hwloc_obj_osdev_type_t, hwloc_distances_s, hwloc_obj, hwloc_obj_t, hwloc_obj_attr_u, hwloc_cache_attr_s, - hwloc_group_attr_s, hwloc_pcidev_attr_s, hwloc_osdev_attr_s, - hwloc_topology_t, hwloc_topology_init, hwloc_topology_load, - hwloc_topology_get_depth, hwloc_get_nbobjs_by_depth, - hwloc_get_obj_by_depth, hwloc_topology_destroy + hwloc_group_attr_s, hwloc_bridge_attr_s, hwloc_pcidev_attr_s, + hwloc_osdev_attr_s, hwloc_topology_t, hwloc_topology_init, + hwloc_topology_load, hwloc_topology_get_depth, hwloc_get_nbobjs_by_depth, + hwloc_get_obj_by_depth, hwloc_topology_destroy, hwloc_type_filter_e, + hwloc_topology_set_type_filter, hwloc_topology_get_type_filter, + hwloc_topology_set_all_types_filter, hwloc_topology_set_cache_types_filter, + hwloc_topology_set_icache_types_filter, hwloc_topology_set_io_types_filter, + hwloc_topology_set_userdata, hwloc_topology_get_userdata, var"##Ctag#349", + var"##Ctag#350" + +using ..LibHwlocExtensions: + hwloc_pci_class_string # List of special capitalizations -- cenum_name_to_symbol will by default # convert the all-uppcase C enum name to lowercase (with capitalized leading @@ -38,15 +46,14 @@ for x in instances(hwloc_obj_cache_type_t) push!(cache_types, cenum_name_to_symbol(x, "HWLOC_OBJ_CACHE_")) end -# const bridge_types bridge_types = Symbol[] for x in instances(hwloc_obj_bridge_type_t) - push!(cache_types, cenum_name_to_symbol(x, "HWLOC_OBJ_BRIDGE_")) + push!(bridge_types, cenum_name_to_symbol(x, "HWLOC_OBJ_BRIDGE_")) end osdev_types = Symbol[] for x in instances(hwloc_obj_osdev_type_t) - push!(cache_types, cenum_name_to_symbol(x, "HWLOC_OBJ_OSDEV_")) + push!(osdev_types, cenum_name_to_symbol(x, "HWLOC_OBJ_OSDEV_")) end abstract type Attribute end @@ -63,8 +70,14 @@ struct CacheAttr <: Attribute type_::Symbol end function show(io::IO, a::CacheAttr) - print(io, "Cache{size=$(a.size),depth=$(a.depth),linesize=$(a.linesize),", - "associativity=$(a.associativity),type=$(string(a.type_))}") + print( + io, + "Cache{size=$(a.size), " * + "depth=$(a.depth), " * + "linesize=$(a.linesize), " * + "associativity=$(a.associativity), " * + "type=$(string(a.type_))}" + ) end struct GroupAttr <: Attribute @@ -87,16 +100,59 @@ struct PCIDevAttr <: Attribute revision::Int linkspeed::Float32 end -# TODO: expand this -show(io::IO, a::PCIDevAttr) = print(io, "PCIDev{...}") +function PCIDevAttr(ha::hwloc_pcidev_attr_s) + return PCIDevAttr( + ha.domain, ha.bus, ha.dev, ha.func, ha.class_id, ha.vendor_id, + ha.device_id, ha.subvendor_id, ha.subdevice_id, ha.revision, + ha.linkspeed + ) +end +function show(io::IO, a::PCIDevAttr) + print( + io, + "PCIDev(domain=$(a.domain), " * + "bus=$(a.bus), " * + "dev=$(a.dev), " * + "func=$(a.func), " * + "class_id=$(hwloc_pci_class_string(a.class_id)), " * + "vendor_id=$(a.vendor_id), " * + "device_id=$(a.device_id), " * + "subvendor_id=$(a.subvendor_id), " * + "subdevice_id=$(a.subdevice_id), " * + "revision=$(a.revision), " * + "linkspeed=$(a.linkspeed))" + ) +end -# type BridgeAttr <: Attribute end +struct BridgeAttr <: Attribute + upstream::var"##Ctag#349" + upstream_type::hwloc_obj_bridge_type_t + downstream::var"##Ctag#350" + downstream_type::hwloc_obj_bridge_type_t + depth::UInt +end +function BridgeAttr(ha::hwloc_bridge_attr_s) + return BridgeAttr( + ha.upstream, ha.upstream_type, + ha.downstream, ha.downstream_type, + ha.depth + ) +end +function show(io::IO, a::BridgeAttr) + print( + io, + "BridgeAttr(US=$(hwloc_pci_class_string(a.upstream.pci.class_id)), " * + "upstream_type=$(string(a.upstream_type)), " * + "downstream_type=$(string(a.downstream_type)) " * + ")" + ) +end struct OSDevAttr <: Attribute - type_::Symbol + type::hwloc_obj_osdev_type_t end function show(io::IO, a::OSDevAttr) - print(io, "OSDev{type=$(string(a.type_))}") + print(io, "OSDev{type=$(string(a.type))}") end struct DieAttr <: Attribute @@ -134,11 +190,14 @@ function load_attr(hattr::Ptr{hwloc_obj_attr_u}, type_::Symbol) elseif type_==:Misc error("not implemented") elseif type_==:Bridge - error("not implemented") + ha = unsafe_load(convert(Ptr{hwloc_bridge_attr_s}, hattr)) + return BridgeAttr(ha) elseif type_==:PCI_Device - error("not implemented") + ha = unsafe_load(convert(Ptr{hwloc_pcidev_attr_s}, hattr)) + return PCIDevAttr(ha) elseif type_==:OS_Device - error("not implemented") + ha = unsafe_load(convert(Ptr{hwloc_obj_osdev_type_t}, hattr)) + return OSDevAttr(ha) elseif type_==:Die ha = unsafe_load(convert(Ptr{hwloc_cache_attr_s}, hattr)) return DieAttr(ha.depth) @@ -150,10 +209,9 @@ function load_attr(hattr::Ptr{hwloc_obj_attr_u}, type_::Symbol) end - - struct Object type_::Symbol + subtype::String os_index::Int name::String attr::Attribute @@ -167,6 +225,8 @@ struct Object memory_children::Vector{Object} + io_children::Vector{Object} + # Object() = new(:Error, -1, "(nothing)", NullAttr(), # 0, -1, -1, # -1, # Object[], Object[]) @@ -178,7 +238,10 @@ IteratorSize(::Type{Object}) = Base.SizeUnknown() IteratorEltype(::Type{Object}) = Base.HasEltype() eltype(::Type{Object}) = Object isempty(::Object) = false -iterate(obj::Object) = (obj, isempty(obj.memory_children) ? obj.children : vcat(obj.memory_children, obj.children)) +function iterate(obj::Object) + state = vcat(obj.children, obj.memory_children, obj.io_children) + return obj, state +end function iterate(::Object, state::Vector{Object}) isempty(state) && return nothing # depth-first traversal @@ -186,6 +249,7 @@ function iterate(::Object, state::Vector{Object}) obj, state = state[1], state[2:end] prepend!(state, obj.children) prepend!(state, obj.memory_children) + prepend!(state, obj.io_children) return obj, state end # length(obj::Object) = mapreduce(x->1, +, obj) @@ -201,6 +265,7 @@ function load(hobj::hwloc_obj_t) @assert Integer(obj.type)>=0 && Integer(obj.type) Hwloc.Object Load the system topology by calling into libhwloc. """ -function topology_load() - htopop = Ref{hwloc_topology_t}() - ierr = hwloc_topology_init(htopop) - @assert ierr==0 - htopo = htopop[] +function topology_load(htopo=topology_init()) ierr = hwloc_topology_load(htopo) - @assert ierr==0 + @assert ierr == 0 depth = hwloc_topology_get_depth(htopo) @assert depth >= 1 @@ -255,4 +368,4 @@ function topology_load() hwloc_topology_destroy(htopo) return topo -end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 241a0e4..ccf9f28 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,7 +23,7 @@ import CpuId @testset "Topology (compact info)" begin println("Info:") topology_info() - counts = getinfo(list_all=true) + counts = getinfo(;list_all=true) @test typeof(counts) == Dict{Symbol,Int} @test length(counts) == length(Hwloc.obj_types) println(counts) @@ -34,7 +34,7 @@ import CpuId @test num_virtual_cores() == counts[:PU] @test num_packages() == counts[:Package] @test num_numa_nodes() == counts[:NUMANode] - counts = getinfo(list_all=false) + counts = getinfo() @test typeof(counts) == Dict{Symbol,Int} @test all(>(0), values(counts)) end @@ -119,4 +119,11 @@ import CpuId @test first(collectobjects(:NUMANode)).mem > 0 end end + + @testset "AbstractTrees interface" begin + using AbstractTrees + t = gettopology() + # check that `children(gettopology)` returns the root of the HwlocTree + @test children(t).type == :Machine + end end