-
Notifications
You must be signed in to change notification settings - Fork 49
/
nvtx3.hpp
2888 lines (2716 loc) · 98.1 KB
/
nvtx3.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Temporary helper #defines, #undef'ed at end of header */
#define NVTX3_CPP_VERSION_MAJOR 1
#define NVTX3_CPP_VERSION_MINOR 0
/* This section handles the decision of whether to provide unversioned symbols.
* If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is #defined, unversioned symbols are
* not provided, and explicit-version symbols such as nvtx3::v1::scoped_range
* and NVTX3_V1_FUNC_RANGE must be used. By default, the first #include of this
* header will define the unversioned symbols such as nvtx3::scoped_range and
* NVTX3_FUNC_RANGE. Subsequently including a different major version of this
* header without #defining NVTX3_CPP_REQUIRE_EXPLICIT_VERSION triggers an error
* since the symbols would conflict. Subsequently including of a different
* minor version within the same major version is allowed. Functionality of
* minor versions is cumulative, regardless of include order.
*
* Since NVTX3_CPP_REQUIRE_EXPLICIT_VERSION allows all combinations of versions
* to coexist without problems within a translation unit, the recommended best
* practice for instrumenting header-based libraries with NVTX C++ Wrappers is
* is to #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before including nvtx3.hpp,
* #undef it afterward, and only use explicit-version symbols. This is not
* necessary in common cases, such as instrumenting a standalone application, or
* static/shared libraries in .cpp files or headers private to those projects.
*/
/* clang-format off */
#if !defined(NVTX3_CPP_REQUIRE_EXPLICIT_VERSION)
/* Define macro used by all definitions in this header to indicate the
* unversioned symbols should be defined in addition to the versioned ones.
*/
#define NVTX3_INLINE_THIS_VERSION
#if !defined(NVTX3_CPP_INLINED_VERSION_MAJOR)
/* First occurrence of this header in the translation unit. Define macros
* indicating which version shall be used for unversioned symbols.
*/
/**
* @brief Semantic major version number for NVTX C++ wrappers of unversioned symbols
*
* Breaking changes may occur between major versions, and different major versions
* cannot provide unversioned symbols in the same translation unit (.cpp file).
*
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
*
* Not to be confused with the version number of the NVTX core library.
*/
#define NVTX3_CPP_INLINED_VERSION_MAJOR 1 // NVTX3_CPP_VERSION_MAJOR
/**
* @brief Semantic minor version number for NVTX C++ wrappers of unversioned symbols
*
* No breaking changes occur between minor versions -- minor version changes within
* a major version are purely additive.
*
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
*
* Not to be confused with the version number of the NVTX core library.
*/
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
#elif NVTX3_CPP_INLINED_VERSION_MAJOR != NVTX3_CPP_VERSION_MAJOR
/* Unsupported case -- cannot define unversioned symbols for different major versions
* in the same translation unit.
*/
#error \
"Two different major versions of the NVTX C++ Wrappers are being included in a single .cpp file, with unversioned symbols enabled in both. Only one major version can enable unversioned symbols in a .cpp file. To disable unversioned symbols, #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before #including nvtx3.hpp, and use the explicit-version symbols instead -- this is the preferred way to use nvtx3.hpp from a header file."
#elif (NVTX3_CPP_INLINED_VERSION_MAJOR == NVTX3_CPP_VERSION_MAJOR) && \
(NVTX3_CPP_INLINED_VERSION_MINOR < NVTX3_CPP_VERSION_MINOR)
/* An older minor version of the same major version already defined unversioned
* symbols. The new features provided in this header will be inlined
* redefine the minor version macro to this header's version.
*/
#undef NVTX3_CPP_INLINED_VERSION_MINOR
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
// else, already have this version or newer, nothing to do
#endif
#endif
/* clang-format on */
/**
* @file nvtx3.hpp
*
* @brief Provides C++ constructs making the NVTX library safer and easier to
* use with zero overhead.
*/
/**
* \mainpage
* \tableofcontents
*
* \section QUICK_START Quick Start
*
* To add NVTX ranges to your code, use the `nvtx3::scoped_range` RAII object. A
* range begins when the object is created, and ends when the object is
* destroyed.
*
* \code{.cpp}
* #include "nvtx3.hpp"
* void some_function() {
* // Begins a NVTX range with the messsage "some_function"
* // The range ends when some_function() returns and `r` is destroyed
* nvtx3::scoped_range r{"some_function"};
*
* for(int i = 0; i < 6; ++i) {
* nvtx3::scoped_range loop{"loop range"};
* std::this_thread::sleep_for(std::chrono::seconds{1});
* }
* } // Range ends when `r` is destroyed
* \endcode
*
* The example code above generates the following timeline view in Nsight
* Systems:
*
* \image html
* https://raw.githubusercontent.com/NVIDIA/NVTX/release-v3/docs/images/example_range.png
*
* Alternatively, use the \ref MACROS like `NVTX3_FUNC_RANGE()` to add
* ranges to your code that automatically use the name of the enclosing function
* as the range's message.
*
* \code{.cpp}
* #include "nvtx3.hpp"
* void some_function() {
* // Creates a range with a message "some_function" that ends when the
* // enclosing function returns
* NVTX3_FUNC_RANGE();
* ...
* }
* \endcode
*
*
* \section Overview
*
* The NVTX library provides a set of functions for users to annotate their code
* to aid in performance profiling and optimization. These annotations provide
* information to tools like Nsight Systems to improve visualization of
* application timelines.
*
* \ref RANGES are one of the most commonly used NVTX constructs for annotating
* a span of time. For example, imagine a user wanted to see every time a
* function, `my_function`, is called and how long it takes to execute. This can
* be accomplished with an NVTX range created on the entry to the function and
* terminated on return from `my_function` using the push/pop C APIs:
*
* \code{.cpp}
* void my_function(...) {
* nvtxRangePushA("my_function"); // Begins NVTX range
* // do work
* nvtxRangePop(); // Ends NVTX range
* }
* \endcode
*
* One of the challenges with using the NVTX C API is that it requires manually
* terminating the end of the range with `nvtxRangePop`. This can be challenging
* if `my_function()` has multiple returns or can throw exceptions as it
* requires calling `nvtxRangePop()` before all possible return points.
*
* NVTX C++ solves this inconvenience through the "RAII" technique by providing
* a `nvtx3::scoped_range` class that begins a range at construction and ends
* the range on destruction. The above example then becomes:
*
* \code{.cpp}
* void my_function(...) {
* nvtx3::scoped_range r{"my_function"}; // Begins NVTX range
* // do work
* } // Range ends on exit from `my_function` when `r` is destroyed
* \endcode
*
* The range object `r` is deterministically destroyed whenever `my_function`
* returns---ending the NVTX range without manual intervention. For more
* information, see \ref RANGES and `nvtx3::scoped_range_in`.
*
* Another inconvenience of the NVTX C APIs are the several constructs where the
* user is expected to initialize an object at the beginning of an application
* and reuse that object throughout the lifetime of the application. For example
* see domains, categories, and registered messages.
*
* Example:
* \code{.cpp}
* nvtxDomainHandle_t D = nvtxDomainCreateA("my domain");
* // Reuse `D` throughout the rest of the application
* \endcode
*
* This can be problematic if the user application or library does not have an
* explicit initialization function called before all other functions to
* ensure that these long-lived objects are initialized before being used.
*
* NVTX C++ makes use of the "construct on first use" technique to alleviate
* this inconvenience. In short, a function local static object is constructed
* upon the first invocation of a function and returns a reference to that
* object on all future invocations. See the documentation for `nvtx3::domain`,
* `nvtx3::named_category`, `nvtx3::registered_string`, and
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use for more
* information.
*
* Using construct on first use, the above example becomes:
* \code{.cpp}
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* // The first invocation of `domain::get` for the type `my_domain` will
* // construct a `nvtx3::domain` object and return a reference to it. Future
* // invocations simply return a reference.
* nvtx3::domain const& D = nvtx3::domain::get<my_domain>();
* \endcode
* For more information about NVTX and how it can be used, see
* https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx and
* https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/
* for more information.
*
* \section RANGES Ranges
*
* Ranges are used to describe a span of time during the execution of an
* application. Common examples are using ranges to annotate the time it takes
* to execute a function or an iteration of a loop.
*
* NVTX C++ uses RAII to automate the generation of ranges that are tied to the
* lifetime of objects. Similar to `std::lock_guard` in the C++ Standard
* Template Library.
*
* \subsection scoped_range Scoped Range
*
* `nvtx3::scoped_range_in` is a class that begins a range upon construction
* and ends the range at destruction. This is one of the most commonly used
* constructs in NVTX C++ and is useful for annotating spans of time on a
* particular thread. These ranges can be nested to arbitrary depths.
*
* `nvtx3::scoped_range` is an alias for a `nvtx3::scoped_range_in` in the
* global NVTX domain. For more information about Domains, see \ref DOMAINS.
*
* Various attributes of a range can be configured constructing a
* `nvtx3::scoped_range_in` with a `nvtx3::event_attributes` object. For
* more information, see \ref ATTRIBUTES.
*
* Example:
*
* \code{.cpp}
* void some_function() {
* // Creates a range for the duration of `some_function`
* nvtx3::scoped_range r{};
*
* while(true) {
* // Creates a range for every loop iteration
* // `loop_range` is nested inside `r`
* nvtx3::scoped_range loop_range{};
* }
* }
* \endcode
*
* \subsection unique_range Unique Range
*
* `nvtx3::unique_range` is similar to `nvtx3::scoped_range`, with a few key differences:
* - `unique_range` objects can be destroyed in any order whereas `scoped_range` objects must be
* destroyed in exact reverse creation order
* - `unique_range` can start and end on different threads
* - `unique_range` is moveable
* - `unique_range` objects can be constructed as heap objects
*
* There is extra overhead associated with `unique_range` constructs and therefore use of
* `nvtx3::scoped_range_in` should be preferred.
*
* \section MARKS Marks
*
* `nvtx3::mark` annotates an instantaneous point in time with a "marker".
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::mark("operation failed!");
* }
* \endcode
*
* \section DOMAINS Domains
*
* Similar to C++ namespaces, domains allow for scoping NVTX events. By default,
* all NVTX events belong to the "global" domain. Libraries and applications
* should scope their events to use a custom domain to differentiate where the
* events originate from.
*
* It is common for a library or application to have only a single domain and
* for the name of that domain to be known at compile time. Therefore, Domains
* in NVTX C++ are represented by _tag types_.
*
* For example, to define a custom domain, simply define a new concrete type
* (a `class` or `struct`) with a `static` member called `name` that contains
* the desired name of the domain.
*
* \code{.cpp}
* struct my_domain{ static constexpr char const* name{"my domain"}; };
* \endcode
*
* For any NVTX C++ construct that can be scoped to a domain, the type
* `my_domain` can be passed as an explicit template argument to scope it to
* the custom domain.
*
* The tag type `nvtx3::domain::global` represents the global NVTX domain.
*
* \code{.cpp}
* // By default, `scoped_range_in` belongs to the global domain
* nvtx3::scoped_range_in<> r0{};
*
* // Alias for a `scoped_range_in` in the global domain
* nvtx3::scoped_range r1{};
*
* // `r` belongs to the custom domain
* nvtx3::scoped_range_in<my_domain> r{};
* \endcode
*
* When using a custom domain, it is recommended to define type aliases for NVTX
* constructs in the custom domain.
* \code{.cpp}
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
* using my_named_category = nvtx3::named_category_in<my_domain>;
* \endcode
*
* See `nvtx3::domain` for more information.
*
* \section ATTRIBUTES Event Attributes
*
* NVTX events can be customized with various attributes to provide additional
* information (such as a custom message) or to control visualization of the
* event (such as the color used). These attributes can be specified per-event
* via arguments to a `nvtx3::event_attributes` object.
*
* NVTX events can be customized via four "attributes":
* - \ref COLOR : color used to visualize the event in tools.
* - \ref MESSAGES : Custom message string.
* - \ref PAYLOAD : User-defined numerical value.
* - \ref CATEGORY : Intra-domain grouping.
*
* It is possible to construct a `nvtx3::event_attributes` from any number of
* attribute objects (nvtx3::color, nvtx3::message, nvtx3::payload,
* nvtx3::category) in any order. If an attribute is not specified, a tool
* specific default value is used. See `nvtx3::event_attributes` for more
* information.
*
* \code{.cpp}
* // Set message, same as passing nvtx3::message{"message"}
* nvtx3::event_attributes attr{"message"};
*
* // Set message and color
* nvtx3::event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
*
* // Set message, color, payload, category
* nvtx3::event_attributes attr{"message",
* nvtx3::rgb{127, 255, 0},
* nvtx3::payload{42},
* nvtx3::category{1}};
*
* // Same as above -- can use any order of arguments
* nvtx3::event_attributes attr{nvtx3::payload{42},
* nvtx3::category{1},
* "message",
* nvtx3::rgb{127, 255, 0}};
*
* // Multiple arguments of the same type are allowed, but only the first is
* // used -- in this example, payload is set to 42:
* nvtx3::event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
*
* // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
* using namespace nvtx3;
* event_attributes attr{"message", rgb{127, 255, 0}, payload{42}, category{1}};
* \endcode
*
* \subsection MESSAGES message
*
* `nvtx3::message` sets the message string for an NVTX event.
*
* Example:
* \code{.cpp}
* // Create an `event_attributes` with the message "my message"
* nvtx3::event_attributes attr{nvtx3::message{"my message"}};
*
* // strings and string literals implicitly assumed to be a `nvtx3::message`
* nvtx3::event_attributes attr{"my message"};
* \endcode
*
* \subsubsection REGISTERED_MESSAGE Registered Messages
*
* Associating a `nvtx3::message` with an event requires copying the contents of
* the message every time the message is used, i.e., copying the entire message
* string. This may cause non-trivial overhead in performance sensitive code.
*
* To eliminate this overhead, NVTX allows registering a message string,
* yielding a "handle" that is inexpensive to copy that may be used in place of
* a message string. When visualizing the events, tools such as Nsight Systems
* will take care of mapping the message handle to its string.
*
* A message should be registered once and the handle reused throughout the rest
* of the application. This can be done by either explicitly creating static
* `nvtx3::registered_string` objects, or using the
* `nvtx3::registered_string::get` construct on first use helper (recommended).
*
* Similar to \ref DOMAINS, `nvtx3::registered_string::get` requires defining a
* custom tag type with a static `message` member whose value will be the
* contents of the registered string.
*
* Example:
* \code{.cpp}
* // Explicitly constructed, static `registered_string` in my_domain:
* static registered_string_in<my_domain> static_message{"my message"};
*
* // Or use construct on first use:
* // Define a tag type with a `message` member string to register
* struct my_message{ static constexpr char const* message{ "my message" }; };
*
* // Uses construct on first use to register the contents of
* // `my_message::message`
* auto& msg = nvtx3::registered_string_in<my_domain>::get<my_message>();
* \endcode
*
* \subsection COLOR color
*
* Associating a `nvtx3::color` with an event allows controlling how the event
* is visualized in a tool such as Nsight Systems. This is a convenient way to
* visually differentiate among different events.
*
* \code{.cpp}
* // Define a color via rgb color values
* nvtx3::color c{nvtx3::rgb{127, 255, 0}};
* nvtx3::event_attributes attr{c};
*
* // rgb color values can be passed directly to an `event_attributes`
* nvtx3::event_attributes attr1{nvtx3::rgb{127,255,0}};
* \endcode
*
* \subsection CATEGORY category
*
* A `nvtx3::category` is simply an integer id that allows for fine-grain
* grouping of NVTX events. For example, one might use separate categories for
* IO, memory allocation, compute, etc.
*
* \code{.cpp}
* nvtx3::event_attributes{nvtx3::category{1}};
* \endcode
*
* \subsubsection NAMED_CATEGORIES Named Categories
*
* Associates a `name` string with a category `id` to help differentiate among
* categories.
*
* For any given category id `Id`, a `named_category{Id, "name"}` should only
* be constructed once and reused throughout an application. This can be done by
* either explicitly creating static `nvtx3::named_category` objects, or using
* the `nvtx3::named_category::get` construct on first use helper (recommended).
*
* Similar to \ref DOMAINS, `nvtx3::named_category::get` requires defining a
* custom tag type with static `name` and `id` members.
*
* \code{.cpp}
* // Explicitly constructed, static `named_category` in my_domain:
* static nvtx3::named_category_in<my_domain> static_category{42, "my category"};
*
* // Or use construct on first use:
* // Define a tag type with `name` and `id` members
* struct my_category {
* static constexpr char const* name{"my category"}; // category name
* static constexpr uint32_t id{42}; // category id
* };
*
* // Use construct on first use to name the category id `42`
* // with name "my category":
* auto& cat = named_category_in<my_domain>::get<my_category>();
*
* // Range `r` associated with category id `42`
* nvtx3::event_attributes attr{cat};
* \endcode
*
* \subsection PAYLOAD payload
*
* Allows associating a user-defined numerical value with an event.
*
* \code{.cpp}
* // Constructs a payload from the `int32_t` value 42
* nvtx3:: event_attributes attr{nvtx3::payload{42}};
* \endcode
*
*
* \section EXAMPLE Example
*
* Putting it all together:
* \code{.cpp}
* // Define a custom domain tag type
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* // Define a named category tag type
* struct my_category{
* static constexpr char const* name{"my category"};
* static constexpr uint32_t id{42};
* };
*
* // Define a registered string tag type
* struct my_message{ static constexpr char const* message{"my message"}; };
*
* // For convenience, use aliases for domain scoped objects
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
* using my_named_category = nvtx3::named_category_in<my_domain>;
*
* // Default values for all attributes
* nvtx3::event_attributes attr{};
* my_scoped_range r0{attr};
*
* // Custom (unregistered) message, and unnamed category
* nvtx3::event_attributes attr1{"message", nvtx3::category{2}};
* my_scoped_range r1{attr1};
*
* // Alternatively, pass arguments of `event_attributes` ctor directly to
* // `my_scoped_range`
* my_scoped_range r2{"message", nvtx3::category{2}};
*
* // construct on first use a registered string
* auto& msg = my_registered_string::get<my_message>();
*
* // construct on first use a named category
* auto& cat = my_named_category::get<my_category>();
*
* // Use registered string and named category with a custom payload
* my_scoped_range r3{msg, cat, nvtx3::payload{42}};
*
* // Any number of arguments in any order
* my_scoped_range r{nvtx3::rgb{127, 255,0}, msg};
*
* \endcode
* \section MACROS Convenience Macros
*
* Oftentimes users want to quickly and easily add NVTX ranges to their library
* or application to aid in profiling and optimization.
*
* A convenient way to do this is to use the \ref NVTX3_FUNC_RANGE and
* \ref NVTX3_FUNC_RANGE_IN macros. These macros take care of constructing an
* `nvtx3::scoped_range_in` with the name of the enclosing function as the
* range's message.
*
* \code{.cpp}
* void some_function() {
* // Automatically generates an NVTX range for the duration of the function
* // using "some_function" as the event's message.
* NVTX3_FUNC_RANGE();
* }
* \endcode
*
*/
/* Temporary helper #defines, removed with #undef at end of header */
#if !defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET)
#if defined(_MSC_VER) && _MSC_VER < 1914
/* Microsoft's compiler prior to VS2017 Update 7 (15.7) uses an older parser
* that does not work with domain::get's specialization for domain::global,
* and would require extra conditions to make SFINAE work for the overloaded
* get() functions. This macro disables use of overloaded get() in order to
* work with VS2015 and versions of VS2017 below 15.7, without penalizing
* users of newer compilers. Building with this flag set to 0 means errors
* when defining tag structs (see documentation for domain, named_category,
* and registered_string) will have more complex compiler error messages
* instead of the clear static_assert messages from the get() overloads.
*/
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 0
#else
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 1
#endif
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
#endif
/* Within this header, nvtx3::NVTX3_VERSION_NAMESPACE resolves to nvtx3::vX,
* where "X" is the major version number. */
#define NVTX3_CONCAT(A, B) A##B
#define NVTX3_NAMESPACE_FOR(VERSION) NVTX3_CONCAT(v, VERSION)
#define NVTX3_VERSION_NAMESPACE NVTX3_NAMESPACE_FOR(NVTX3_CPP_VERSION_MAJOR)
/* Avoid duplicating #if defined(NVTX3_INLINE_THIS_VERSION) for namespaces
* in each minor version by making a macro to use unconditionally, which
* resolves to "inline" or nothing as appropriate. */
#if defined(NVTX3_INLINE_THIS_VERSION)
#define NVTX3_INLINE_IF_REQUESTED inline
#else
#define NVTX3_INLINE_IF_REQUESTED
#endif
/* Enables the use of constexpr when support for C++14 constexpr is present.
*
* Initialization of a class member that is a union to a specific union member
* can only be done in the body of a constructor, not in a member initializer
* list. A constexpr constructor must have an empty body until C++14, so there
* is no way to make an initializer of a member union constexpr in C++11. This
* macro allows making functions constexpr in C++14 or newer, but non-constexpr
* in C++11 compilation. It is used here on constructors that initialize their
* member unions.
*/
#if __cpp_constexpr >= 201304L
#define NVTX3_CONSTEXPR_IF_CPP14 constexpr
#else
#define NVTX3_CONSTEXPR_IF_CPP14
#endif
/* Use a macro for static asserts, which defaults to static_assert, but that
* testing tools can replace with a logging function. For example:
* #define NVTX3_STATIC_ASSERT(c, m) \
* do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0)
*/
#if !defined(NVTX3_STATIC_ASSERT)
#define NVTX3_STATIC_ASSERT(condition, message) static_assert(condition, message);
#define NVTX3_STATIC_ASSERT_DEFINED_HERE
#endif
/* Implementation sections, enclosed in guard macros for each minor version */
#ifndef NVTX3_CPP_DEFINITIONS_V1_0
#define NVTX3_CPP_DEFINITIONS_V1_0
#include "nvToolsExt.h"
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <cstddef>
namespace nvtx3 {
NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE
{
namespace detail {
template <typename Unused>
struct always_false : std::false_type {};
template <typename T, typename = void>
struct has_name : std::false_type {};
template <typename T>
struct has_name<T, decltype((void)T::name, void())> : std::true_type {};
template <typename T, typename = void>
struct has_id : std::false_type {};
template <typename T>
struct has_id<T, decltype((void)T::id, void())> : std::true_type {};
template <typename T, typename = void>
struct has_message : std::false_type {};
template <typename T>
struct has_message<T, decltype((void)T::message, void())> : std::true_type {};
template <typename T, typename = void>
struct is_c_string : std::false_type {};
template <typename T>
struct is_c_string<T, typename std::enable_if<
std::is_convertible<T, char const* >::value ||
std::is_convertible<T, wchar_t const*>::value
>::type> : std::true_type {};
template <typename T>
using is_uint32 = std::is_same<typename std::decay<T>::type, uint32_t>;
} // namespace detail
/**
* @brief `domain`s allow for grouping NVTX events into a single scope to
* differentiate them from events in other `domain`s.
*
* By default, all NVTX constructs are placed in the "global" NVTX domain.
*
* A custom `domain` may be used in order to differentiate a library's or
* application's NVTX events from other events.
*
* `domain`s are expected to be long-lived and unique to a library or
* application. As such, it is assumed a domain's name is known at compile
* time. Therefore, all NVTX constructs that can be associated with a domain
* require the domain to be specified via a *type* `D` passed as an
* explicit template parameter.
*
* The type `domain::global` may be used to indicate that the global NVTX
* domain should be used.
*
* None of the C++ NVTX constructs require the user to manually construct a
* `domain` object. Instead, if a custom domain is desired, the user is
* expected to define a type `D` that contains a member
* `D::name` which resolves to either a `char const*` or `wchar_t
* const*`. The value of `D::name` is used to name and uniquely
* identify the custom domain.
*
* Upon the first use of an NVTX construct associated with the type
* `D`, the "construct on first use" pattern is used to construct a
* function local static `domain` object. All future NVTX constructs
* associated with `D` will use a reference to the previously
* constructed `domain` object. See `domain::get`.
*
* Example:
* \code{.cpp}
* // The type `my_domain` defines a `name` member used to name and identify
* // the `domain` object identified by `my_domain`.
* struct my_domain{ static constexpr char const* name{"my_domain"}; };
*
* // The NVTX range `r` will be grouped with all other NVTX constructs
* // associated with `my_domain`.
* nvtx3::scoped_range_in<my_domain> r{};
*
* // An alias can be created for a `scoped_range_in` in the custom domain
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* my_scoped_range my_range{};
*
* // `domain::global` indicates that the global NVTX domain is used
* nvtx3::scoped_range_in<domain::global> r2{};
*
* // For convenience, `nvtx3::scoped_range` is an alias for a range in the
* // global domain
* nvtx3::scoped_range r3{};
* \endcode
*/
class domain {
public:
domain(domain const&) = delete;
domain& operator=(domain const&) = delete;
domain(domain&&) = delete;
domain& operator=(domain&&) = delete;
/**
* @brief Tag type for the "global" NVTX domain.
*
* This type may be passed as a template argument to any function/class
* expecting a type to identify a domain to indicate that the global domain
* should be used.
*
* All NVTX events in the global domain across all libraries and
* applications will be grouped together.
*
*/
struct global {
};
#if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
/**
* @brief Returns reference to an instance of a function local static
* `domain` object.
*
* Uses the "construct on first use" idiom to safely ensure the `domain`
* object is initialized exactly once upon first invocation of
* `domain::get<D>()`. All following invocations will return a
* reference to the previously constructed `domain` object. See
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
*
* None of the constructs in this header require the user to directly invoke
* `domain::get`. It is automatically invoked when constructing objects like
* a `scoped_range_in` or `category`. Advanced users may wish to use
* `domain::get` for the convenience of the "construct on first use" idiom
* when using domains with their own use of the NVTX C API.
*
* This function is threadsafe as of C++11. If two or more threads call
* `domain::get<D>` concurrently, exactly one of them is guaranteed
* to construct the `domain` object and the other(s) will receive a
* reference to the object after it is fully constructed.
*
* The domain's name is specified via the type `D` pass as an
* explicit template parameter. `D` is required to contain a
* member `D::name` that resolves to either a `char const*` or
* `wchar_t const*`. The value of `D::name` is used to name and
* uniquely identify the `domain`.
*
* Example:
* \code{.cpp}
* // The type `my_domain` defines a `name` member used to name and identify
* // the `domain` object identified by `my_domain`.
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* auto& D1 = domain::get<my_domain>(); // First invocation constructs a
* // `domain` with the name "my domain"
*
* auto& D2 = domain::get<my_domain>(); // Quickly returns reference to
* // previously constructed `domain`.
* \endcode
*
* @tparam D Type that contains a `D::name` member used to
* name the `domain` object.
* @return Reference to the `domain` corresponding to the type `D`.
*/
template <typename D = global,
typename std::enable_if<
detail::is_c_string<decltype(D::name)>::value
, int>::type = 0>
static domain const& get() noexcept
{
static domain const d(D::name);
return d;
}
/**
* @brief Overload of `domain::get` to provide a clear compile error when
* `D` has a `name` member that is not directly convertible to either
* `char const*` or `wchar_t const*`.
*/
template <typename D = global,
typename std::enable_if<
!detail::is_c_string<decltype(D::name)>::value
, int>::type = 0>
static domain const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
"Type used to identify an NVTX domain must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is not "
"convertible to either of those types");
static domain const unused;
return unused; // Function must compile for static_assert to be triggered
}
/**
* @brief Overload of `domain::get` to provide a clear compile error when
* `D` does not have a `name` member.
*/
template <typename D = global,
typename std::enable_if<
!detail::has_name<D>::value
, int>::type = 0>
static domain const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
"Type used to identify an NVTX domain must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
static domain const unused;
return unused; // Function must compile for static_assert to be triggered
}
#else
template <typename D = global>
static domain const& get() noexcept
{
static domain const d(D::name);
return d;
}
#endif
/**
* @brief Conversion operator to `nvtxDomainHandle_t`.
*
* Allows transparently passing a domain object into an API expecting a
* native `nvtxDomainHandle_t` object.
*/
operator nvtxDomainHandle_t() const noexcept { return _domain; }
private:
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(char const* name) noexcept : _domain{nvtxDomainCreateA(name)} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(wchar_t const* name) noexcept : _domain{nvtxDomainCreateW(name)} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(std::string const& name) noexcept : domain{name.c_str()} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(std::wstring const& name) noexcept : domain{name.c_str()} {}
/**
* @brief Default constructor creates a `domain` representing the
* "global" NVTX domain.
*
* All events not associated with a custom `domain` are grouped in the
* "global" NVTX domain.
*
*/
domain() noexcept {}
/**
* @brief Intentionally avoid calling nvtxDomainDestroy on the `domain` object.
*
* No currently-available tools attempt to free domain resources when the
* nvtxDomainDestroy function is called, due to the thread-safety and
* efficiency challenges of freeing thread-local storage for other threads.
* Since libraries may be disallowed from introducing static destructors,
* and destroying the domain is likely to have no effect, the destructor
* for `domain` intentionally chooses to not destroy the domain.
*
* In a situation where domain destruction is necessary, either manually
* call nvtxDomainDestroy on the domain's handle, or make a class that
* derives from `domain` and calls nvtxDomainDestroy in its destructor.
*/
~domain() = default;
private:
nvtxDomainHandle_t const _domain{}; ///< The `domain`s NVTX handle
};
/**
* @brief Returns reference to the `domain` object that represents the global
* NVTX domain.
*
* This specialization for `domain::global` returns a default constructed,
* `domain` object for use when the "global" domain is desired.
*
* All NVTX events in the global domain across all libraries and applications
* will be grouped together.
*
* @return Reference to the `domain` corresponding to the global NVTX domain.
*
*/
template <>
inline domain const& domain::get<domain::global>() noexcept
{
static domain const d{};
return d;
}
/**
* @brief Indicates the values of the red, green, and blue color channels for
* an RGB color to use as an event attribute (assumes no transparency).
*
*/
struct rgb {
/// Type used for component values
using component_type = uint8_t;
/**
* @brief Construct a rgb with red, green, and blue channels
* specified by `red_`, `green_`, and `blue_`, respectively.
*
* Valid values are in the range `[0,255]`.
*
* @param red_ Value of the red channel
* @param green_ Value of the green channel
* @param blue_ Value of the blue channel
*/
constexpr rgb(
component_type red_,
component_type green_,
component_type blue_) noexcept
: red{red_}, green{green_}, blue{blue_}
{
}
component_type red{}; ///< Red channel value
component_type green{}; ///< Green channel value
component_type blue{}; ///< Blue channel value
};
/**
* @brief Indicates the value of the alpha, red, green, and blue color
* channels for an ARGB color to use as an event attribute.
*
*/
struct argb final : rgb {
/**
* @brief Construct an argb with alpha, red, green, and blue channels
* specified by `alpha_`, `red_`, `green_`, and `blue_`, respectively.
*
* Valid values are in the range `[0,255]`.
*
* @param alpha_ Value of the alpha channel (opacity)
* @param red_ Value of the red channel
* @param green_ Value of the green channel
* @param blue_ Value of the blue channel
*
*/
constexpr argb(
component_type alpha_,
component_type red_,
component_type green_,