From 05ad4e9218c08a61d0cadf5e74e9ca6c10546885 Mon Sep 17 00:00:00 2001 From: frederic-tingaud-sonarsource Date: Mon, 7 Oct 2024 16:07:13 +0000 Subject: [PATCH 1/6] Create rule S7118 --- rules/S7118/cfamily/metadata.json | 25 ++++++++++++++++++ rules/S7118/cfamily/rule.adoc | 44 +++++++++++++++++++++++++++++++ rules/S7118/metadata.json | 2 ++ 3 files changed, 71 insertions(+) create mode 100644 rules/S7118/cfamily/metadata.json create mode 100644 rules/S7118/cfamily/rule.adoc create mode 100644 rules/S7118/metadata.json diff --git a/rules/S7118/cfamily/metadata.json b/rules/S7118/cfamily/metadata.json new file mode 100644 index 00000000000..c51ad130156 --- /dev/null +++ b/rules/S7118/cfamily/metadata.json @@ -0,0 +1,25 @@ +{ + "title": "FIXME", + "type": "CODE_SMELL", + "status": "ready", + "remediation": { + "func": "Constant\/Issue", + "constantCost": "5min" + }, + "tags": [ + ], + "defaultSeverity": "Major", + "ruleSpecification": "RSPEC-7118", + "sqKey": "S7118", + "scope": "All", + "defaultQualityProfiles": ["Sonar way"], + "quickfix": "unknown", + "code": { + "impacts": { + "MAINTAINABILITY": "HIGH", + "RELIABILITY": "MEDIUM", + "SECURITY": "LOW" + }, + "attribute": "CONVENTIONAL" + } +} diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc new file mode 100644 index 00000000000..69c2a03a830 --- /dev/null +++ b/rules/S7118/cfamily/rule.adoc @@ -0,0 +1,44 @@ +FIXME: add a description + +// If you want to factorize the description uncomment the following line and create the file. +//include::../description.adoc[] + +== Why is this an issue? + +FIXME: remove the unused optional headers (that are commented out) + +//=== What is the potential impact? + +== How to fix it +//== How to fix it in FRAMEWORK NAME + +=== Code examples + +==== Noncompliant code example + +[source,cpp,diff-id=1,diff-type=noncompliant] +---- +FIXME +---- + +==== Compliant solution + +[source,cpp,diff-id=1,diff-type=compliant] +---- +FIXME +---- + +//=== How does this work? + +//=== Pitfalls + +//=== Going the extra mile + + +//== Resources +//=== Documentation +//=== Articles & blog posts +//=== Conference presentations +//=== Standards +//=== External coding guidelines +//=== Benchmarks diff --git a/rules/S7118/metadata.json b/rules/S7118/metadata.json new file mode 100644 index 00000000000..2c63c085104 --- /dev/null +++ b/rules/S7118/metadata.json @@ -0,0 +1,2 @@ +{ +} From d3f3afc7ff7539e6271f08bc3e75eac82e0eeaad Mon Sep 17 00:00:00 2001 From: Fred Tingaud Date: Wed, 9 Oct 2024 18:58:40 +0200 Subject: [PATCH 2/6] CPP-5790 Rule description --- rules/S7118/cfamily/metadata.json | 8 +- rules/S7118/cfamily/rule.adoc | 145 +++++++++++++++++++++++++----- 2 files changed, 128 insertions(+), 25 deletions(-) diff --git a/rules/S7118/cfamily/metadata.json b/rules/S7118/cfamily/metadata.json index c51ad130156..5617c23549e 100644 --- a/rules/S7118/cfamily/metadata.json +++ b/rules/S7118/cfamily/metadata.json @@ -1,5 +1,5 @@ { - "title": "FIXME", + "title": "String methods should be favored over low level char functions when querying content", "type": "CODE_SMELL", "status": "ready", "remediation": { @@ -16,10 +16,8 @@ "quickfix": "unknown", "code": { "impacts": { - "MAINTAINABILITY": "HIGH", - "RELIABILITY": "MEDIUM", - "SECURITY": "LOW" + "MAINTAINABILITY": "MEDIUM" }, - "attribute": "CONVENTIONAL" + "attribute": "CLEAR" } } diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc index 69c2a03a830..3866df56f62 100644 --- a/rules/S7118/cfamily/rule.adoc +++ b/rules/S7118/cfamily/rule.adoc @@ -1,44 +1,149 @@ -FIXME: add a description - -// If you want to factorize the description uncomment the following line and create the file. -//include::../description.adoc[] +`std::string` methods should be preferred over operations on the result of `c_str()` or `data()` when manipulating a standard string. == Why is this an issue? -FIXME: remove the unused optional headers (that are commented out) +When manipulating standard strings, it is recommended to use their methods, rather than accessing the underlying char array representation with low level C methods. The string methods are clearer in their purpose, less error-prone and will sometime be more performant -//=== What is the potential impact? +[source,cpp] +---- +void func(std::string const& param1, std::string const& param2) { + int size = strlen(param1.data()); // Noncompliant + bool same = (strcmp(param1.c_str(), param2.c_str()) == 0); // Noncompliant +} +---- == How to fix it -//== How to fix it in FRAMEWORK NAME -=== Code examples +=== size calculation + +Using strlen to calculate the size of a string will necessitate to find its null-terminator. The string already stores its own size and can return it for free. ==== Noncompliant code example [source,cpp,diff-id=1,diff-type=noncompliant] ---- -FIXME +int size = strlen(string.c_str()); // Noncompliant ---- ==== Compliant solution [source,cpp,diff-id=1,diff-type=compliant] ---- -FIXME +int size = string.size(); // Compliant +---- + +=== Embedded null-terminators detection + +Note that as mentioned in the "Pitfalls" part, `size()` will return the full size, + including embedded null characters. If the goal of the call to `c_str()` is to detect + the presence of embedded null-terminators, more explicit methods like `std::string::find()` or `std::string::contains()` should be used: + +==== Noncompliant code example + +[source,cpp,diff-id=2,diff-type=noncompliant] +---- +bool hasEmbeddedNull = (string.size() != strlen(string.c_str())); // Noncompliant +---- + +==== Compliant solution + +[source,cpp,diff-id=2,diff-type=compliant] +---- +bool hasEmbeddedNull = string.contains('\0'); // Compliant - since C++23 +bool hasEmbeddedNull2 = string.find('\0') != std::string::npos; // Compliant - all C++ versions +---- + +=== string comparison + +Comparing two full strings test their equality can be directly performed using the equality operator. For other comparisons, `std::string::compare` is more readable. + +==== Noncompliant code example + +[source,cpp,diff-id=3,diff-type=noncompliant] +---- +void comparisons(std::string const& s1, std::string const& s2) { + bool equal = strcmp(s1.c_str(), s2.c_str()) == 0; // Noncompliant + int comparePart = strncmp(s1.c_str() + 2, s2.c_str() + 2, 10); // Noncompliant +} +---- + +==== Compliant solution + +[source,cpp,diff-id=3,diff-type=compliant] +---- +void comparisons(std::string const& s1, std::string const& s2) { + bool equal = s1 == s2; // Compliant + int comparePart = s1.compare(2, 10, s2, 2, 10); // Compliant +} ---- -//=== How does this work? +=== Searching a substring -//=== Pitfalls +Either searching a substring or a single character inside a string can be done with `std::string::find()` + +==== Noncompliant code example + +[source,cpp,diff-id=4,diff-type=noncompliant] +---- +void search(std::string const& string) { + char const* sub = strstr(string.c_str(), "substring"); // Noncompliant + char const* dot = strchr(string.c_str(), '.'); // Noncompliant + // ... +} +---- + +==== Compliant solution + +[source,cpp,diff-id=4,diff-type=compliant] +---- +void search(std::string const& string) { + size_t sub = string.find("substring"); // Compliant + size_t dot = string.find('.'); // Compliant + // ... +} +---- + +=== Searching one of many characters + +Four methods allow to search for any character from a list inside a string: `find_first_of`, `find_first_not_of`, `find_last_of`, and `find_last_not_of`. They are better alternative than `strspn`, `strcspn`, and `strpbrk` when manipulating strings. + +==== Noncompliant code example + +[source,cpp,diff-id=5,diff-type=noncompliant] +---- +void search(std::string const& string) { + size_t firstNonNumeric = strspn(string.c_str(), "0123456789"); // Noncompliant + size_t firstSeparator = strcspn(string.c_str(), ",. ;"); // Noncompliant + // ... +} +---- + +==== Compliant solution + +[source,cpp,diff-id=5,diff-type=compliant] +---- +void search(std::string const& string) { + size_t firstNonNumeric = string.find_first_not_of("0123456789"); // Compliant + size_t firstSeparator = string.find_first_of(",. ;"); // Compliant + // ... +} +---- + +=== Pitfalls + +Using string methods instead of the result of `c_str()` (or `data()`) will have different results when the string contains embedded null-terminators in its content. Operations on `c_str()` will stop at the first null-terminator while string methods will apply on the whole content. + +Most of the time, operating on the whole content is the intended behavior, but in cases where it isn't, using methods might be less straight-forward. It is still recommended to fix the code, to make this pitfall clear to other maintainers. One possible approach is to create a `std::string_view` and use its methods, which mirror those of `std::string`. + +[source,cpp,diff-id=5,diff-type=compliant] +---- +std::string_view beginning{string.c_str()}; +// Use string_view methods afterward +// ... +---- -//=== Going the extra mile +== Resources +=== Documentation -//== Resources -//=== Documentation -//=== Articles & blog posts -//=== Conference presentations -//=== Standards -//=== External coding guidelines -//=== Benchmarks +* {cpp} reference - https://en.cppreference.com/w/cpp/string/basic_string[Native functions for std::string] From edc4352d1a6ea1a585437e261303f8c40b21f1b4 Mon Sep 17 00:00:00 2001 From: Fred Tingaud <95592999+frederic-tingaud-sonarsource@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:37:41 +0200 Subject: [PATCH 3/6] Grammarly fixes --- rules/S7118/cfamily/rule.adoc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc index 3866df56f62..57ec7af2745 100644 --- a/rules/S7118/cfamily/rule.adoc +++ b/rules/S7118/cfamily/rule.adoc @@ -2,7 +2,7 @@ == Why is this an issue? -When manipulating standard strings, it is recommended to use their methods, rather than accessing the underlying char array representation with low level C methods. The string methods are clearer in their purpose, less error-prone and will sometime be more performant +When manipulating standard strings, it is recommended to use their methods, rather than accessing the underlying char array representation with low-level C methods. The string methods are clearer in their purpose, less error-prone, and will sometimes be more performant [source,cpp] ---- @@ -16,7 +16,7 @@ void func(std::string const& param1, std::string const& param2) { === size calculation -Using strlen to calculate the size of a string will necessitate to find its null-terminator. The string already stores its own size and can return it for free. +Using `strlen` to calculate the size of a string will necessitate finding its null terminator. The string already stores its own size and can return it for free. ==== Noncompliant code example @@ -32,7 +32,7 @@ int size = strlen(string.c_str()); // Noncompliant int size = string.size(); // Compliant ---- -=== Embedded null-terminators detection +=== Embedded null-terminator detection Note that as mentioned in the "Pitfalls" part, `size()` will return the full size, including embedded null characters. If the goal of the call to `c_str()` is to detect @@ -105,7 +105,7 @@ void search(std::string const& string) { === Searching one of many characters -Four methods allow to search for any character from a list inside a string: `find_first_of`, `find_first_not_of`, `find_last_of`, and `find_last_not_of`. They are better alternative than `strspn`, `strcspn`, and `strpbrk` when manipulating strings. +Four methods allow searching for any character from a list inside a string: `find_first_of`, `find_first_not_of`, `find_last_of`, and `find_last_not_of`. They are better alternatives than `strspn`, `strcspn`, and `strpbrk` when manipulating strings. ==== Noncompliant code example @@ -131,9 +131,9 @@ void search(std::string const& string) { === Pitfalls -Using string methods instead of the result of `c_str()` (or `data()`) will have different results when the string contains embedded null-terminators in its content. Operations on `c_str()` will stop at the first null-terminator while string methods will apply on the whole content. +Using string methods instead of the result of `c_str()` (or `data()`) will have different results when the string contains embedded null terminators in its content. Operations on `c_str()` will stop at the first null-terminator while string methods will apply to the whole content. -Most of the time, operating on the whole content is the intended behavior, but in cases where it isn't, using methods might be less straight-forward. It is still recommended to fix the code, to make this pitfall clear to other maintainers. One possible approach is to create a `std::string_view` and use its methods, which mirror those of `std::string`. +Most of the time, operating on the whole content is the intended behavior, but in cases where it isn't, using methods might be less straightforward. It is still recommended to fix the code, to make this pitfall clear to other maintainers. One possible approach is to create a `std::string_view` and use its methods, which mirror those of `std::string`. [source,cpp,diff-id=5,diff-type=compliant] ---- From 37a97c5a21f539386d03f13503144fffddbe5e93 Mon Sep 17 00:00:00 2001 From: Fred Tingaud Date: Thu, 10 Oct 2024 16:13:48 +0200 Subject: [PATCH 4/6] Apply recomendations --- rules/S7118/cfamily/metadata.json | 2 +- rules/S7118/cfamily/rule.adoc | 53 +++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/rules/S7118/cfamily/metadata.json b/rules/S7118/cfamily/metadata.json index 5617c23549e..8ec3a926075 100644 --- a/rules/S7118/cfamily/metadata.json +++ b/rules/S7118/cfamily/metadata.json @@ -1,5 +1,5 @@ { - "title": "String methods should be favored over low level char functions when querying content", + "title": "String methods should be used to query content instead of C apis", "type": "CODE_SMELL", "status": "ready", "remediation": { diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc index 57ec7af2745..1188b44258b 100644 --- a/rules/S7118/cfamily/rule.adoc +++ b/rules/S7118/cfamily/rule.adoc @@ -2,7 +2,7 @@ == Why is this an issue? -When manipulating standard strings, it is recommended to use their methods, rather than accessing the underlying char array representation with low-level C methods. The string methods are clearer in their purpose, less error-prone, and will sometimes be more performant +When manipulating standard strings, it is recommended to use their methods, rather than accessing the underlying char array representation with low-level C methods. The string methods are clearer in their purpose, less error-prone, and will sometimes be more performant. [source,cpp] ---- @@ -36,26 +36,43 @@ int size = string.size(); // Compliant Note that as mentioned in the "Pitfalls" part, `size()` will return the full size, including embedded null characters. If the goal of the call to `c_str()` is to detect - the presence of embedded null-terminators, more explicit methods like `std::string::find()` or `std::string::contains()` should be used: + the presence of embedded null-terminators, more explicit methods like `std::string::find()` or `std::string::contains()` should be used. ==== Noncompliant code example [source,cpp,diff-id=2,diff-type=noncompliant] ---- -bool hasEmbeddedNull = (string.size() != strlen(string.c_str())); // Noncompliant +if (string.size() != strlen(string.c_str())) // Noncompliant +{ + // ... +} ---- ==== Compliant solution +In C++23 `std::string::contains()` is the best solution. + [source,cpp,diff-id=2,diff-type=compliant] ---- -bool hasEmbeddedNull = string.contains('\0'); // Compliant - since C++23 -bool hasEmbeddedNull2 = string.find('\0') != std::string::npos; // Compliant - all C++ versions +if (string.contains('\0')) // Compliant +{ + // ... +} +---- + +Before that, `std::string::find()` is an alternative. + +[source,cpp,diff-id=2,diff-type=compliant] +---- +if (string.find('\0') != std::string::npos) // Compliant +{ + // ... +} ---- === string comparison -Comparing two full strings test their equality can be directly performed using the equality operator. For other comparisons, `std::string::compare` is more readable. +Testing the equality between two full strings can be directly performed using the equality operator. For other comparisons, `std::string::compare` is more readable. ==== Noncompliant code example @@ -79,7 +96,7 @@ void comparisons(std::string const& s1, std::string const& s2) { === Searching a substring -Either searching a substring or a single character inside a string can be done with `std::string::find()` +Either searching a substring or a single character inside a string can be done with `std::string::find()`. ==== Noncompliant code example @@ -133,15 +150,33 @@ void search(std::string const& string) { Using string methods instead of the result of `c_str()` (or `data()`) will have different results when the string contains embedded null terminators in its content. Operations on `c_str()` will stop at the first null-terminator while string methods will apply to the whole content. -Most of the time, operating on the whole content is the intended behavior, but in cases where it isn't, using methods might be less straightforward. It is still recommended to fix the code, to make this pitfall clear to other maintainers. One possible approach is to create a `std::string_view` and use its methods, which mirror those of `std::string`. +Most of the time, operating on the whole content is the intended behavior, but in cases where it isn't, using methods might be less straightforward. It is still recommended to fix the code, to make this pitfall clear to other maintainers. -[source,cpp,diff-id=5,diff-type=compliant] +If the goal is to only keep the first part, the string can be cropped in place using `std::string::resize()` or `std::string::erase()`. +Calling `strlen(string.c_str())` inside a call to resize or erase is compliant by exception because it is the simplest way to crop a string like that. +[source,cpp] +---- +string.resize(strlen(string.c_str())); // Compliant by exception +---- + +Otherwise, if working with {cpp}17 or higher and if the goal is to just query the content, a good approach is to create a `std::string_view` and use its methods, which mirror those of `std::string`. + +[source,cpp] ---- std::string_view beginning{string.c_str()}; // Use string_view methods afterward // ... ---- +Otherwise, if working with older {cpp} versions or if the resulting string will be modified, doing a full copy also works. + +[source,cpp] +---- +std::string beginning{string.c_str()}; +// Use beginning and its methods afterward +// ... +---- + == Resources === Documentation From 65e8b9f3d86571d6eb8d1ec28a6f0c311d4136f9 Mon Sep 17 00:00:00 2001 From: Fred Tingaud Date: Thu, 10 Oct 2024 16:42:59 +0200 Subject: [PATCH 5/6] Going the extra mile --- rules/S7118/cfamily/rule.adoc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc index 1188b44258b..f452d91c087 100644 --- a/rules/S7118/cfamily/rule.adoc +++ b/rules/S7118/cfamily/rule.adoc @@ -177,8 +177,16 @@ std::string beginning{string.c_str()}; // ... ---- +=== Going the extra mile + +Direct replacement of one C function with the corresponding string method may not lead to the best code overall. When replacing a C function call, you should consider what overall functionality this call is part of and whether your {cpp} version allows to implement that functionality more easily and effectively. + +For example using ranges (`std::ranges::views::split`, `std::ranges::views::chunk`), using algorithms, using `std::stringstream` or `std::spanstream`, using `std::string_view`. + == Resources === Documentation * {cpp} reference - https://en.cppreference.com/w/cpp/string/basic_string[Native functions for std::string] + +* {cpp} reference - https://en.cppreference.com/w/cpp/ranges#Range_adaptors[ranges documentation] \ No newline at end of file From 0b24a2ab72dd0be2552d97342bd6c49944a7d0fe Mon Sep 17 00:00:00 2001 From: Fred Tingaud Date: Thu, 10 Oct 2024 19:04:33 +0200 Subject: [PATCH 6/6] Apply more suggestions --- rules/S7118/cfamily/rule.adoc | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/rules/S7118/cfamily/rule.adoc b/rules/S7118/cfamily/rule.adoc index f452d91c087..19a5ce6494a 100644 --- a/rules/S7118/cfamily/rule.adoc +++ b/rules/S7118/cfamily/rule.adoc @@ -80,7 +80,7 @@ Testing the equality between two full strings can be directly performed using th ---- void comparisons(std::string const& s1, std::string const& s2) { bool equal = strcmp(s1.c_str(), s2.c_str()) == 0; // Noncompliant - int comparePart = strncmp(s1.c_str() + 2, s2.c_str() + 2, 10); // Noncompliant + int comparePart = strncmp(s1.c_str() + 2, s2.c_str() + 3, 10); // Noncompliant } ---- @@ -90,10 +90,17 @@ void comparisons(std::string const& s1, std::string const& s2) { ---- void comparisons(std::string const& s1, std::string const& s2) { bool equal = s1 == s2; // Compliant - int comparePart = s1.compare(2, 10, s2, 2, 10); // Compliant + int comparePart = s1.compare(2, 10, s2, 3, 10); // Compliant } ---- +With C++17 and higher, substring comparison can also be done through `std::string_view`. + +[source,cpp] +---- +auto comparePart = std::string_view(s1).substr(2, 10) <=> std::string_view(s2).substr(3, 10); +---- + === Searching a substring Either searching a substring or a single character inside a string can be done with `std::string::find()`. @@ -181,7 +188,20 @@ std::string beginning{string.c_str()}; Direct replacement of one C function with the corresponding string method may not lead to the best code overall. When replacing a C function call, you should consider what overall functionality this call is part of and whether your {cpp} version allows to implement that functionality more easily and effectively. -For example using ranges (`std::ranges::views::split`, `std::ranges::views::chunk`), using algorithms, using `std::stringstream` or `std::spanstream`, using `std::string_view`. +For example using ranges (`std::ranges::views::split`, `std::ranges::views::chunk`), using algorithms, using `std::istringstream` or `std::ispanstream`, using `std::string_view`. + +An example of a function splitting words in a moddern way could be as follow. + +[source,cpp] +---- +void treatWord(std::string_view word); + +void treatAllWords(std::string_view text) { + constexpr std::string_view delim{" "}; + for (const auto word : std::views::split(text, delim)) + treatWord(std::string_view{word}); +} +---- == Resources