Skip to content

Commit

Permalink
Updates for DCGM 3.3.0 (#45)
Browse files Browse the repository at this point in the history
* Update headers to 3.3 for internal RC builds

* DCGM 3.3.0 updates, fix for WatchPidFields from helinfan

* Update const for DCGM 3.3.0
  • Loading branch information
glowkey authored Nov 7, 2023
1 parent d898cc7 commit 2e092a7
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 365 deletions.
3 changes: 2 additions & 1 deletion pkg/dcgm/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"sync"
"time"
)

var (
Expand Down Expand Up @@ -89,7 +90,7 @@ func GetDeviceTopology(gpuId uint) ([]P2PLink, error) {
// WatchPidFields lets DCGM start recording stats for GPU process
// It needs to be called before calling GetProcessInfo
func WatchPidFields() (GroupHandle, error) {
return watchPidFields(defaultUpdateFreq, defaultMaxKeepAge, defaultMaxKeepSamples)
return watchPidFields(time.Microsecond*time.Duration(defaultUpdateFreq), time.Second*time.Duration(defaultMaxKeepAge), defaultMaxKeepSamples)
}

// GetProcessInfo provides detailed per GPU stats for this process
Expand Down
215 changes: 56 additions & 159 deletions pkg/dcgm/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ const (
DCGM_FI_DEV_FB_USED = 252
DCGM_FI_DEV_FB_RESERVED = 253
DCGM_FI_DEV_FB_USED_PERCENT = 254
DCGM_FI_DEV_C2C_LINK_COUNT = 285
DCGM_FI_DEV_C2C_LINK_STATUS = 286
DCGM_FI_DEV_C2C_MAX_BANDWIDTH = 287
DCGM_FI_DEV_ECC_CURRENT = 300
DCGM_FI_DEV_ECC_PENDING = 301
DCGM_FI_DEV_ECC_SBE_VOL_TOTAL = 310
Expand Down Expand Up @@ -311,82 +314,15 @@ const (
DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE = 532
DCGM_FI_DEV_VGPU_PCI_ID = 533
DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID = 534
DCGM_FI_FIRST_VGPU_FIELD_ID = 520
DCGM_FI_LAST_VGPU_FIELD_ID = 570
DCGM_FI_INTERNAL_FIELDS_0_START = 600
DCGM_FI_INTERNAL_FIELDS_0_END = 699
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00 = 700
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00 = 701
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00 = 702
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00 = 703
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01 = 704
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01 = 705
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01 = 706
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01 = 707
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02 = 708
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02 = 709
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02 = 710
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02 = 711
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03 = 712
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03 = 713
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03 = 714
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03 = 715
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04 = 716
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04 = 717
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04 = 718
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04 = 719
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05 = 720
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05 = 721
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05 = 722
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05 = 723
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06 = 724
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06 = 725
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06 = 726
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06 = 727
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07 = 728
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07 = 729
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07 = 730
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07 = 731
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08 = 732
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08 = 733
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08 = 734
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08 = 735
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09 = 736
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09 = 737
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09 = 738
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09 = 739
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10 = 740
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10 = 741
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10 = 742
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10 = 743
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11 = 744
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11 = 745
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11 = 746
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11 = 747
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12 = 748
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12 = 749
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12 = 750
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12 = 751
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13 = 752
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13 = 753
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13 = 754
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13 = 755
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14 = 756
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14 = 757
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14 = 758
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14 = 759
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15 = 760
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15 = 761
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15 = 762
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15 = 763
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16 = 764
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16 = 765
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16 = 766
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16 = 767
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17 = 768
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17 = 769
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17 = 770
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17 = 771
DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT = 701
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ = 702
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV = 703
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD = 704
DCGM_FI_DEV_NVSWITCH_POWER_VDD = 705
DCGM_FI_DEV_NVSWITCH_POWER_DVDD = 706
DCGM_FI_DEV_NVSWITCH_POWER_HVDD = 707
DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX = 780
DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX = 781
DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS = 782
Expand Down Expand Up @@ -447,8 +383,6 @@ const (
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID = 876
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID = 877
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_UUID = 878
DCGM_FI_FIRST_NVSWITCH_FIELD_ID = 700
DCGM_FI_LAST_NVSWITCH_FIELD_ID = 899
DCGM_FI_PROF_GR_ENGINE_ACTIVE = 1001
DCGM_FI_PROF_SM_ACTIVE = 1002
DCGM_FI_PROF_SM_OCCUPANCY = 1003
Expand Down Expand Up @@ -518,7 +452,20 @@ const (
DCGM_FI_PROF_NVLINK_L16_RX_BYTES = 1073
DCGM_FI_PROF_NVLINK_L17_TX_BYTES = 1074
DCGM_FI_PROF_NVLINK_L17_RX_BYTES = 1075
DCGM_FI_MAX_FIELDS = 1076
DCGM_FI_DEV_CPU_UTIL_TOTAL = 1100
DCGM_FI_DEV_CPU_UTIL_USER = 1101
DCGM_FI_DEV_CPU_UTIL_NICE = 1102
DCGM_FI_DEV_CPU_UTIL_SYS = 1103
DCGM_FI_DEV_CPU_UTIL_IRQ = 1104
DCGM_FI_DEV_CPU_TEMP_CURRENT = 1110
DCGM_FI_DEV_CPU_TEMP_WARNING = 1111
DCGM_FI_DEV_CPU_TEMP_CRITICAL = 1112
DCGM_FI_DEV_CPU_CLOCK_CURRENT = 1120
DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT = 1130
DCGM_FI_DEV_CPU_POWER_LIMIT = 1131
DCGM_FI_DEV_CPU_VENDOR = 1140
DCGM_FI_DEV_CPU_MODEL = 1141
DCGM_FI_MAX_FIELDS = 1142

DCGM_ST_OK = 0
DCGM_ST_BADPARAM = -1
Expand Down Expand Up @@ -573,15 +520,18 @@ const (
DCGM_ST_NVVS_ISOLATE_ERROR = -51
DCGM_ST_NVVS_BINARY_NOT_FOUND = -52
DCGM_ST_NVVS_KILLED = -53
DCGM_ST_PAUSED = -54
DCGM_ST_ALREADY_INITIALIZED = -55
)

var (
DCGM_FI = map[string]Short{
"DCGM_FT_BINARY": Short('b'),
"DCGM_FT_DOUBLE": Short('d'),
"DCGM_FT_INT64": Short('i'),
"DCGM_FT_STRING": Short('s'),
"DCGM_FT_TIMESTAMP": Short('t'),
"DCGM_FT_BINARY": Short('b'),
"DCGM_FT_DOUBLE": Short('d'),
"DCGM_FT_INT64": Short('i'),
"DCGM_FT_STRING": Short('s'),
"DCGM_FT_TIMESTAMP": Short('t'),

"DCGM_FI_UNKNOWN": 0,
"DCGM_FI_DRIVER_VERSION": 1,
"DCGM_FI_NVML_VERSION": 2,
Expand Down Expand Up @@ -682,6 +632,9 @@ var (
"DCGM_FI_DEV_FB_USED": 252,
"DCGM_FI_DEV_FB_RESERVED": 253,
"DCGM_FI_DEV_FB_USED_PERCENT": 254,
"DCGM_FI_DEV_C2C_LINK_COUNT": 285,
"DCGM_FI_DEV_C2C_LINK_STATUS": 286,
"DCGM_FI_DEV_C2C_MAX_BANDWIDTH": 287,
"DCGM_FI_DEV_ECC_CURRENT": 300,
"DCGM_FI_DEV_ECC_PENDING": 301,
"DCGM_FI_DEV_ECC_SBE_VOL_TOTAL": 310,
Expand Down Expand Up @@ -845,82 +798,15 @@ var (
"DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE": 532,
"DCGM_FI_DEV_VGPU_PCI_ID": 533,
"DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID": 534,
"DCGM_FI_FIRST_VGPU_FIELD_ID": 520,
"DCGM_FI_LAST_VGPU_FIELD_ID": 570,
"DCGM_FI_INTERNAL_FIELDS_0_START": 600,
"DCGM_FI_INTERNAL_FIELDS_0_END": 699,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00": 700,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00": 701,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00": 702,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00": 703,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01": 704,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01": 705,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01": 706,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01": 707,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02": 708,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02": 709,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02": 710,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02": 711,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03": 712,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03": 713,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03": 714,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03": 715,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04": 716,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04": 717,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04": 718,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04": 719,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05": 720,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05": 721,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05": 722,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05": 723,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06": 724,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06": 725,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06": 726,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06": 727,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07": 728,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07": 729,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07": 730,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07": 731,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08": 732,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08": 733,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08": 734,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08": 735,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09": 736,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09": 737,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09": 738,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09": 739,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10": 740,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10": 741,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10": 742,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10": 743,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11": 744,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11": 745,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11": 746,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11": 747,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12": 748,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12": 749,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12": 750,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12": 751,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13": 752,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13": 753,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13": 754,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13": 755,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14": 756,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14": 757,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14": 758,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14": 759,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15": 760,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15": 761,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15": 762,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15": 763,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16": 764,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16": 765,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16": 766,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16": 767,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17": 768,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17": 769,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17": 770,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17": 771,
"DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT": 701,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ": 702,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV": 703,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD": 704,
"DCGM_FI_DEV_NVSWITCH_POWER_VDD": 705,
"DCGM_FI_DEV_NVSWITCH_POWER_DVDD": 706,
"DCGM_FI_DEV_NVSWITCH_POWER_HVDD": 707,
"DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX": 780,
"DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX": 781,
"DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS": 782,
Expand Down Expand Up @@ -981,8 +867,6 @@ var (
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID": 876,
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID": 877,
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_UUID": 878,
"DCGM_FI_FIRST_NVSWITCH_FIELD_ID": 700,
"DCGM_FI_LAST_NVSWITCH_FIELD_ID": 899,
"DCGM_FI_PROF_GR_ENGINE_ACTIVE": 1001,
"DCGM_FI_PROF_SM_ACTIVE": 1002,
"DCGM_FI_PROF_SM_OCCUPANCY": 1003,
Expand Down Expand Up @@ -1052,7 +936,20 @@ var (
"DCGM_FI_PROF_NVLINK_L16_RX_BYTES": 1073,
"DCGM_FI_PROF_NVLINK_L17_TX_BYTES": 1074,
"DCGM_FI_PROF_NVLINK_L17_RX_BYTES": 1075,
"DCGM_FI_MAX_FIELDS": 1076,
"DCGM_FI_DEV_CPU_UTIL_TOTAL": 1100,
"DCGM_FI_DEV_CPU_UTIL_USER": 1101,
"DCGM_FI_DEV_CPU_UTIL_NICE": 1102,
"DCGM_FI_DEV_CPU_UTIL_SYS": 1103,
"DCGM_FI_DEV_CPU_UTIL_IRQ": 1104,
"DCGM_FI_DEV_CPU_TEMP_CURRENT": 1110,
"DCGM_FI_DEV_CPU_TEMP_WARNING": 1111,
"DCGM_FI_DEV_CPU_TEMP_CRITICAL": 1112,
"DCGM_FI_DEV_CPU_CLOCK_CURRENT": 1120,
"DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT": 1130,
"DCGM_FI_DEV_CPU_POWER_LIMIT": 1131,
"DCGM_FI_DEV_CPU_VENDOR": 1140,
"DCGM_FI_DEV_CPU_MODEL": 1141,
"DCGM_FI_MAX_FIELDS": 1142,
}
)

Expand Down
25 changes: 21 additions & 4 deletions pkg/dcgm/dcgm_agent.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,13 +17,13 @@
#ifndef DCGM_AGENT_H
#define DCGM_AGENT_H

#define DCGM_PUBLIC_API
#include "dcgm_structs.h"

#ifdef __cplusplus
extern "C" {
#endif

#define DCGM_PUBLIC_API

/***************************************************************************************************/
/** @defgroup DCGMAPI_Admin Administrative
Expand Down Expand Up @@ -274,8 +274,8 @@ DCGM_PUBLIC_API dcgmReturn_t dcgmModuleIdToName(dcgmModuleId_t id, char const **
/***************************************************************************************************/
/** @defgroup DCGMAPI_SYS System
* @{
* This chapter describes the APIs used to identify set of GPUs on the node, grouping functions to
* provide mechanism to operate on a group of GPUs, and status management APIs in
* This chapter describes the APIs used to identify entities on the node, grouping functions to
* provide mechanism to operate on a group of entities, and status management APIs in
* order to get individual statuses for each operation. The APIs in System module can be
* broken down into following categories:
*/
Expand Down Expand Up @@ -405,6 +405,23 @@ dcgmReturn_t DCGM_PUBLIC_API dcgmGetGpuInstanceHierarchy(dcgmHandle_t dcgmHandle
*/
dcgmReturn_t DCGM_PUBLIC_API dcgmGetNvLinkLinkStatus(dcgmHandle_t dcgmHandle, dcgmNvLinkStatus_v3 *linkStatus);


/**
* List supported CPUs and their cores present on the system
*
* This and other CPU APIs only support datacenter NVIDIA CPUs
*
* @param dcgmHandle IN: DCGM Handle
* @param cpuHierarchy OUT: Structure where the CPUs and their associated cores will be enumerated
*
* @return
* - \ref DCGM_ST_OK if the call was successful.
* - \ref DCGM_ST_NOT_SUPPORTED if the device is unsupported
* - \ref DCGM_ST_MODULE_NOT_LOADED if the sysmon module could not be loaded
* - \ref DCGM_ST_BADPARAM if any parameter is invalid
*/
dcgmReturn_t DCGM_PUBLIC_API dcgmGetCpuHierarchy(dcgmHandle_t dcgmHandle, dcgmCpuHierarchy_v1 *cpuHierarchy);

/** @} */

/***************************************************************************************************/
Expand Down
Loading

0 comments on commit 2e092a7

Please sign in to comment.