Skip to content

Commit

Permalink
pw_tokenizer: Simplify implementing a custom tokenization macro
Browse files Browse the repository at this point in the history
- Provide PW_TOKENIZER_REPLACE_FORMAT_STRING() to reduce boilerplate in
  custom tokenization macros.
- Provide PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT() to support
  tokenizing strings without the argument types value, which is limited
  to 14 arguments by default.

Change-Id: Iefe57eb3b52fc717c77bd1be6231976e7b91dc95
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/169121
Pigweed-Auto-Submit: Wyatt Hepler <[email protected]>
Reviewed-by: Ted Pudlik <[email protected]>
Commit-Queue: Auto-Submit <[email protected]>
  • Loading branch information
255 authored and CQ Bot Account committed Sep 1, 2023
1 parent 0f264ed commit b3717b1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 38 deletions.
2 changes: 2 additions & 0 deletions pw_tokenizer/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Tokenization
:members:
.. doxygenfunction:: pw::tokenizer::MinEncodingBufferSizeBytes
.. doxygendefine:: PW_TOKENIZE_FORMAT_STRING
.. doxygendefine:: PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT
.. doxygendefine:: PW_TOKENIZE_STRING
.. doxygendefine:: PW_TOKENIZE_STRING_DOMAIN
.. doxygendefine:: PW_TOKENIZE_STRING_DOMAIN_EXPR
Expand All @@ -44,6 +45,7 @@ Tokenization
.. doxygendefine:: PW_TOKENIZE_TO_BUFFER
.. doxygendefine:: PW_TOKENIZE_TO_BUFFER_DOMAIN
.. doxygendefine:: PW_TOKENIZE_TO_BUFFER_MASK
.. doxygendefine:: PW_TOKENIZER_REPLACE_FORMAT_STRING
.. doxygendefine:: PW_TOKENIZER_ARG_TYPES
.. doxygenfunction:: pw_tokenizer_EncodeArgs
.. doxygentypedef:: pw_tokenizer_Token
Expand Down
90 changes: 68 additions & 22 deletions pw_tokenizer/public/pw_tokenizer/tokenize.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ typedef uint32_t pw_tokenizer_Token;
}()

/// Tokenizes a string literal in a standalone statement using the specified
/// @rstref{domain <module-pw_tokenizer-domains>}. C and C++ compatible.
/// @rstref{domain<module-pw_tokenizer-domains>}. C and C++ compatible.
#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)

Expand Down Expand Up @@ -159,23 +159,56 @@ typedef uint32_t pw_tokenizer_Token;

/// Same as @c_macro{PW_TOKENIZE_TO_BUFFER_DOMAIN}, but applies a
/// @rstref{bit mask <module-pw_tokenizer-masks>} to the token.
#define PW_TOKENIZE_TO_BUFFER_MASK( \
domain, mask, buffer, buffer_size_pointer, format, ...) \
do { \
PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
_pw_tokenizer_ToBuffer(buffer, \
buffer_size_pointer, \
_pw_tokenizer_token, \
PW_TOKENIZER_ARG_TYPES(__VA_ARGS__) \
PW_COMMA_ARGS(__VA_ARGS__)); \
#define PW_TOKENIZE_TO_BUFFER_MASK( \
domain, mask, buffer, buffer_size_pointer, format, ...) \
do { \
PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
_pw_tokenizer_ToBuffer(buffer, \
buffer_size_pointer, \
PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
} while (0)

/// @brief Low-level macro for calling functions that handle tokenized strings.
///
/// Functions that work with tokenized format strings must take the following
/// arguments:
///
/// - The 32-bit token (@cpp_type{pw_tokenizer_Token})
/// - The 32- or 64-bit argument types (@cpp_type{pw_tokenizer_ArgTypes})
/// - Variadic arguments, if any
///
/// This macro expands to those arguments. Custom tokenization macros should use
/// this macro to pass these arguments to a function or other macro.
///
/** @code{cpp}
* EncodeMyTokenizedString(uint32_t token,
* pw_tokenier_ArgTypes arg_types,
* ...);
*
* #define CUSTOM_TOKENIZATION_MACRO(format, ...) \
* PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
* EncodeMyTokenizedString(PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__))
* @endcode
*/
#define PW_TOKENIZER_REPLACE_FORMAT_STRING(...) \
_PW_TOKENIZER_REPLACE_FORMAT_STRING(PW_EMPTY_ARGS(__VA_ARGS__), __VA_ARGS__)

#define _PW_TOKENIZER_REPLACE_FORMAT_STRING(empty_args, ...) \
_PW_CONCAT_2(_PW_TOKENIZER_REPLACE_FORMAT_STRING_, empty_args)(__VA_ARGS__)

#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_1() _pw_tokenizer_token, 0u
#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_0(...) \
_pw_tokenizer_token, PW_TOKENIZER_ARG_TYPES(__VA_ARGS__), __VA_ARGS__

/// Converts a series of arguments to a compact format that replaces the format
/// string literal. Evaluates to a `pw_tokenizer_ArgTypes` value.
///
/// Depending on the size of `pw_tokenizer_ArgTypes`, the bottom 4 or 6 bits
/// store the number of arguments and the remaining bits store the types, two
/// bits per type. The arguments are not evaluated; only their types are used.
///
/// In general, @c_macro{PW_TOKENIZER_ARG_TYPES} should not be used directly.
/// Instead, use @c_macro{PW_TOKENIZER_REPLACE_FORMAT_STRING}.
#define PW_TOKENIZER_ARG_TYPES(...) \
PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZER_TYPES_, __VA_ARGS__)

Expand Down Expand Up @@ -204,34 +237,47 @@ PW_EXTERN_C_END
/// since the same variable is used in every invocation.
///
/// The tokenized string uses the specified @rstref{tokenization domain
/// <module-pw_tokenizer-domains>}. Use `PW_TOKENIZER_DEFAULT_DOMAIN` for the
/// <module-pw_tokenizer-domains>}. Use `PW_TOKENIZER_DEFAULT_DOMAIN` for the
/// default. The token also may be masked; use `UINT32_MAX` to keep all bits.
///
/// This macro checks that the printf-style format string matches the arguments,
/// stores the format string in a special section, and calculates the string's
/// token at compile time.
/// This macro checks that the printf-style format string matches the arguments
/// and that no more than @c_macro{PW_TOKENIZER_MAX_SUPPORTED_ARGS} are
/// provided. It then stores the format string in a special section, and
/// calculates the string's token at compile time.
// clang-format off
#define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...) \
if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__)); \
} \
\
/* Check that the macro is invoked with a supported number of arguments. */ \
#define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...) \
static_assert( \
PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS, \
"Tokenized strings cannot have more than " \
PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; " \
PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__)) \
" arguments were used for " #format " (" #__VA_ARGS__ ")"); \
PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, __VA_ARGS__)
// clang-format on

/// Equivalent to `PW_TOKENIZE_FORMAT_STRING`, but supports any number of
/// arguments.
///
/// This is a low-level macro that should rarely be used directly. It is
/// intended for situations when @cpp_type{pw_tokenizer_ArgTypes} is not used.
/// There are two situations where @cpp_type{pw_tokenizer_ArgTypes} is
/// unnecessary:
///
/// - The exact format string argument types and count are fixed.
/// - The format string supports a variable number of arguments of only one
/// type. In this case, @c_macro{PW_FUNCTION_ARG_COUNT} may be used to pass
/// the argument count to the function.
#define PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, ...) \
if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__)); \
} \
\
/* Tokenize the string to a pw_tokenizer_Token at compile time. */ \
static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token = \
_PW_TOKENIZER_MASK_TOKEN(mask, format); \
\
_PW_TOKENIZER_RECORD_ORIGINAL_STRING(_pw_tokenizer_token, domain, format)

// clang-format on

// Creates unique names to use for tokenized string entries and linker sections.
#define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)

Expand Down
33 changes: 17 additions & 16 deletions pw_tokenizer/tokenization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,22 +197,27 @@ Tokenize a message with arguments in a custom macro
Projects can leverage the tokenization machinery in whichever way best suits
their needs. The most efficient way to use ``pw_tokenizer`` is to pass tokenized
data to a global handler function. A project's custom tokenization macro can
handle tokenized data in a function of their choosing.
handle tokenized data in a function of their choosing. The function may accept
any arguments, but its final arguments must be:

``pw_tokenizer`` provides two low-level macros for projects to use
to create custom tokenization macros:
* The 32-bit token (:cpp:type:`pw_tokenizer_Token`)
* The argument types (:cpp:type:`pw_tokenizer_ArgTypes`)
* Variadic arguments, if any

``pw_tokenizer`` provides two low-level macros to help projects create custom
tokenization macros:

* :c:macro:`PW_TOKENIZE_FORMAT_STRING`
* :c:macro:`PW_TOKENIZER_ARG_TYPES`
* :c:macro:`PW_TOKENIZER_REPLACE_FORMAT_STRING`

.. caution::

Note the spelling difference! The first macro begins with ``PW_TOKENIZE_``
(no ``R``) whereas the second begins with ``PW_TOKENIZER_``.

The outputs of these macros are typically passed to an encoding function. That
function encodes the token, argument types, and argument data to a buffer using
helpers provided by ``pw_tokenizer/encode_args.h``:
Use these macros to invoke an encoding function with the token, argument types,
and variadic arguments. The function can then encode the tokenized message to a
buffer using helpers in ``pw_tokenizer/encode_args.h``:

.. Note: pw_tokenizer_EncodeArgs is a C function so you would expect to
.. reference it as :c:func:`pw_tokenizer_EncodeArgs`. That doesn't work because
Expand All @@ -224,7 +229,6 @@ helpers provided by ``pw_tokenizer/encode_args.h``:

Example
-------

The following example implements a custom tokenization macro similar to
:ref:`module-pw_log_tokenized`.

Expand All @@ -245,14 +249,11 @@ The following example implements a custom tokenization macro similar to
} // extern "C"
#endif
#define PW_LOG_TOKENIZED_ENCODE_MESSAGE(metadata, format, ...) \
do { \
PW_TOKENIZE_FORMAT_STRING( \
PW_TOKENIZER_DEFAULT_DOMAIN, UINT32_MAX, format, __VA_ARGS__); \
EncodeTokenizedMessage(payload, \
_pw_tokenizer_token, \
PW_TOKENIZER_ARG_TYPES(__VA_ARGS__) \
PW_COMMA_ARGS(__VA_ARGS__)); \
#define PW_LOG_TOKENIZED_ENCODE_MESSAGE(metadata, format, ...) \
do { \
PW_TOKENIZE_FORMAT_STRING("logs", UINT32_MAX, format, __VA_ARGS__); \
EncodeTokenizedMessage( \
metadata, PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
} while (0)
In this example, the ``EncodeTokenizedMessage`` function would handle encoding
Expand Down

0 comments on commit b3717b1

Please sign in to comment.