Skip to content

Commit

Permalink
Make SSO support \0s, use memmove, add test (#6155)
Browse files Browse the repository at this point in the history
Supercedes #6027

Make SSO more generic by keeping track of its length explicitly,
allowing for embedded \0s to exist in the String (just like the non-SSO
ones).

Use memmove/memcpy_P when we know the length of a string to save CPU
time.

Add tests to inject \0s in a String to ensure it is still working as
designed.
  • Loading branch information
earlephilhower authored Jun 5, 2019
1 parent 7910121 commit 78a1a66
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 42 deletions.
44 changes: 24 additions & 20 deletions cores/esp8266/WString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

String::String(const char *cstr) {
init();
if(cstr)
if (cstr)
copy(cstr, strlen(cstr));
}

Expand Down Expand Up @@ -136,7 +136,7 @@ inline void String::init(void) {
}

void String::invalidate(void) {
if(!sso() && wbuffer())
if(!isSSO() && wbuffer())
free(wbuffer());
init();
}
Expand All @@ -154,17 +154,21 @@ unsigned char String::reserve(unsigned int size) {

unsigned char String::changeBuffer(unsigned int maxStrLen) {
// Can we use SSO here to avoid allocation?
if (maxStrLen < sizeof(sso_buf)) {
if (sso() || !buffer()) {
if (maxStrLen < sizeof(sso.buff) - 1) {
if (isSSO() || !buffer()) {
// Already using SSO, nothing to do
uint16_t oldLen = len();
setSSO(true);
setLen(oldLen);
return 1;
} else { // if bufptr && !sso()
// Using bufptr, need to shrink into sso_buff
char temp[sizeof(sso_buf)];
} else { // if bufptr && !isSSO()
// Using bufptr, need to shrink into sso.buff
char temp[sizeof(sso.buff)];
memcpy(temp, buffer(), maxStrLen);
free(wbuffer());
uint16_t oldLen = len();
setSSO(true);
setLen(oldLen);
memcpy(wbuffer(), temp, maxStrLen);
return 1;
}
Expand All @@ -176,12 +180,12 @@ unsigned char String::changeBuffer(unsigned int maxStrLen) {
return false;
}
uint16_t oldLen = len();
char *newbuffer = (char *) realloc(sso() ? nullptr : wbuffer(), newSize);
if(newbuffer) {
char *newbuffer = (char *) realloc(isSSO() ? nullptr : wbuffer(), newSize);
if (newbuffer) {
size_t oldSize = capacity() + 1; // include NULL.
if (sso()) {
if (isSSO()) {
// Copy the SSO buffer into allocated space
memcpy(newbuffer, sso_buf, sizeof(sso_buf));
memmove(newbuffer, sso.buff, sizeof(sso.buff));
}
if (newSize > oldSize)
{
Expand All @@ -206,7 +210,7 @@ String & String::copy(const char *cstr, unsigned int length) {
return *this;
}
setLen(length);
strcpy(wbuffer(), cstr);
memmove(wbuffer(), cstr, length + 1);
return *this;
}

Expand All @@ -216,28 +220,28 @@ String & String::copy(const __FlashStringHelper *pstr, unsigned int length) {
return *this;
}
setLen(length);
strcpy_P(wbuffer(), (PGM_P)pstr);
memcpy_P(wbuffer(), (PGM_P)pstr, length + 1); // We know wbuffer() cannot ever be in PROGMEM, so memcpy safe here
return *this;
}

#ifdef __GXX_EXPERIMENTAL_CXX0X__
void String::move(String &rhs) {
if(buffer()) {
if(capacity() >= rhs.len()) {
strcpy(wbuffer(), rhs.buffer());
memmove(wbuffer(), rhs.buffer(), rhs.length() + 1);

This comment has been minimized.

Copy link
@TD-er

TD-er Jun 6, 2019

Contributor

Just browsing through this code and this +1 looks a bit dangerous with the >= compare.

I know it is past the time to be sharp and clear headed here, and the empty beer bottle(s) next to me will support that idea. But are you sure it is correct behavior?

This comment has been minimized.

Copy link
@earlephilhower

earlephilhower Jun 6, 2019

Author Collaborator

I believe it's correct, or at least it is no worse than before.

This implements the original strcpy(), basically, and includes the terminal 0.

The confusion lies in len, capacity, and length all being subtly different.

Capacity is the allocated space not including the NUL. It's set to be 1 less than the malloc'd size. So you can memcpy 1 more than capacity always.

Length is a safe version of len (adds a null check) and returns the number of chars not including the terminal NUL (like strlen).

It's not the way I'd write it, but that's the way Arduino started out and we're stuck with it until a rewrite can happen.

setLen(rhs.len());
rhs.invalidate();
return;
} else {
if (!sso()) {
if (!isSSO()) {
free(wbuffer());
setBuffer(nullptr);
}
}
}
if (rhs.sso()) {
if (rhs.isSSO()) {
setSSO(true);
memmove(sso_buf, rhs.sso_buf, sizeof(sso_buf));
memmove(sso.buff, rhs.sso.buff, sizeof(sso.buff));
} else {
setSSO(false);
setBuffer(rhs.wbuffer());
Expand Down Expand Up @@ -309,7 +313,7 @@ unsigned char String::concat(const String &s) {
return 1;
if (!reserve(newlen))
return 0;
memcpy(wbuffer() + len(), buffer(), len());
memmove(wbuffer() + len(), buffer(), len());
setLen(newlen);
wbuffer()[len()] = 0;
return 1;
Expand All @@ -326,7 +330,7 @@ unsigned char String::concat(const char *cstr, unsigned int length) {
return 1;
if(!reserve(newlen))
return 0;
strcpy(wbuffer() + len(), cstr);
memmove(wbuffer() + len(), cstr, length + 1);
setLen(newlen);
return 1;
}
Expand Down Expand Up @@ -392,7 +396,7 @@ unsigned char String::concat(const __FlashStringHelper * str) {
if (length == 0) return 1;
unsigned int newlen = len() + length;
if (!reserve(newlen)) return 0;
strcpy_P(wbuffer() + len(), (PGM_P)str);
memcpy_P(wbuffer() + len(), (PGM_P)str, length + 1);
setLen(newlen);
return 1;
}
Expand Down
34 changes: 18 additions & 16 deletions cores/esp8266/WString.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,27 +250,29 @@ class String {
uint16_t cap;
uint16_t len;
};

// SSO is handled by checking the last byte of sso_buff.
// When not in SSO mode, that byte is set to 0xff, while when in SSO mode it is always 0x00 (so it can serve as the string terminator as well as a flag)
// This allows strings up up to 12 (11 + \0 termination) without any extra space.
enum { SSOSIZE = sizeof(struct _ptr) + 4 }; // Characters to allocate space for SSO, must be 12 or more
enum { CAPACITY_MAX = 65535 }; // If size of capacity changed, be sure to update this enum
// This allows strings up up to 11 (10 + \0 termination) without any extra space.
enum { SSOSIZE = sizeof(struct _ptr) + 4 - 1 }; // Characters to allocate space for SSO, must be 12 or more
struct _sso {
char buff[SSOSIZE];
unsigned char len : 7; // Ensure only one byte is allocated by GCC for the bitfields
unsigned char isSSO : 1;
} __attribute__((packed)); // Ensure that GCC doesn't expand the flag byte to a 32-bit word for alignment issues
enum { CAPACITY_MAX = 65535 }; // If typeof(cap) changed from uint16_t, be sure to update this enum to the max value storable in the type
union {
struct _ptr ptr;
char sso_buf[SSOSIZE];
struct _sso sso;
};
// Accessor functions
inline bool sso() const { return sso_buf[SSOSIZE - 1] == 0; }
inline unsigned int len() const { return sso() ? strlen(sso_buf) : ptr.len; }
inline unsigned int capacity() const { return sso() ? SSOSIZE - 1 : ptr.cap; }
inline void setSSO(bool sso) { sso_buf[SSOSIZE - 1] = sso ? 0x00 : 0xff; }
inline void setLen(int len) { if (!sso()) ptr.len = len; }
inline void setCapacity(int cap) { if (!sso()) ptr.cap = cap; }
inline void setBuffer(char *buff) { if (!sso()) ptr.buff = buff; }
inline bool isSSO() const { return sso.isSSO; }
inline unsigned int len() const { return isSSO() ? sso.len : ptr.len; }
inline unsigned int capacity() const { return isSSO() ? (unsigned int)SSOSIZE - 1 : ptr.cap; } // Size of max string not including terminal NUL
inline void setSSO(bool set) { sso.isSSO = set; }
inline void setLen(int len) { if (isSSO()) sso.len = len; else ptr.len = len; }
inline void setCapacity(int cap) { if (!isSSO()) ptr.cap = cap; }
inline void setBuffer(char *buff) { if (!isSSO()) ptr.buff = buff; }
// Buffer accessor functions
inline const char *buffer() const { return (const char *)(sso() ? sso_buf : ptr.buff); }
inline char *wbuffer() const { return sso() ? const_cast<char *>(sso_buf) : ptr.buff; } // Writable version of buffer
inline const char *buffer() const { return (const char *)(isSSO() ? sso.buff : ptr.buff); }
inline char *wbuffer() const { return isSSO() ? const_cast<char *>(sso.buff) : ptr.buff; } // Writable version of buffer

protected:
void init(void);
Expand Down
54 changes: 48 additions & 6 deletions tests/host/core/test_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,11 @@ TEST_CASE("String SSO works", "[core][String]")
REQUIRE(s.c_str() == savesso);
REQUIRE(s == "0123456789");
REQUIRE(s.length() == 10);
s += "a";
REQUIRE(s.c_str() == savesso);
REQUIRE(s == "0123456789a");
REQUIRE(s.length() == 11);
if (sizeof(savesso) == 4) {
s += "a";
REQUIRE(s.c_str() != savesso);
REQUIRE(s == "0123456789a");
REQUIRE(s.length() == 11);
s += "b";
REQUIRE(s.c_str() != savesso);
REQUIRE(s == "0123456789ab");
Expand All @@ -334,12 +334,16 @@ TEST_CASE("String SSO works", "[core][String]")
REQUIRE(s == "0123456789abc");
REQUIRE(s.length() == 13);
} else {
s += "a";
REQUIRE(s.c_str() == savesso);
REQUIRE(s == "0123456789a");
REQUIRE(s.length() == 11);
s += "bcde";
REQUIRE(s.c_str() == savesso);
REQUIRE(s == "0123456789abcde");
REQUIRE(s.length() == 15);
s += "fghi";
REQUIRE(s.c_str() == savesso);
REQUIRE(s.c_str() != savesso);
REQUIRE(s == "0123456789abcdefghi");
REQUIRE(s.length() == 19);
s += "j";
Expand All @@ -360,7 +364,7 @@ TEST_CASE("String SSO works", "[core][String]")
REQUIRE(s.length() == 23);
s += "nopq";
REQUIRE(s.c_str() != savesso);
REQUIRE(s == "0123456789abcdefghijklmnopq");
REQUIRE(s == "0123456789abcdefghijklmnopq");
REQUIRE(s.length() == 27);
s += "rstu";
REQUIRE(s.c_str() != savesso);
Expand Down Expand Up @@ -452,3 +456,41 @@ TEST_CASE("Issue #2736 - StreamString SSO fix", "[core][StreamString]")
s.print('\"');
REQUIRE(s == "{\"message\"");
}

TEST_CASE("Strings with NULs", "[core][String]")
{
// The following should never be done in a real app! This is only to inject 0s in the middle of a string.
// Fits in SSO...
String str("01234567");
REQUIRE(str.length() == 8);
char *ptr = (char *)str.c_str();
ptr[3] = 0;
String str2;
str2 = str;
REQUIRE(str2.length() == 8);
// Needs a buffer pointer
str = "0123456789012345678901234567890123456789";
ptr = (char *)str.c_str();
ptr[3] = 0;
str2 = str;
REQUIRE(str2.length() == 40);
String str3("a");
ptr = (char *)str3.c_str();
*ptr = 0;
REQUIRE(str3.length() == 1);
str3 += str3;
REQUIRE(str3.length() == 2);
str3 += str3;
REQUIRE(str3.length() == 4);
str3 += str3;
REQUIRE(str3.length() == 8);
str3 += str3;
REQUIRE(str3.length() == 16);
str3 += str3;
REQUIRE(str3.length() == 32);
str3 += str3;
REQUIRE(str3.length() == 64);
static char zeros[64] = {0};
const char *p = str3.c_str();
REQUIRE(!memcmp(p, zeros, 64));
}

0 comments on commit 78a1a66

Please sign in to comment.