Skip to content

Commit

Permalink
strngs: Modernize and format code
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Mar 31, 2019
1 parent 127d0e3 commit 89ba48b
Showing 1 changed file with 142 additions and 135 deletions.
277 changes: 142 additions & 135 deletions src/ccutil/strngs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* File: strngs.h (Formerly strings.h)
* Description: STRING class definition.
* Author: Ray Smith
* Created: Fri Feb 15 09:15:01 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -20,11 +19,11 @@
#ifndef STRNGS_H
#define STRNGS_H

#include <cassert> // for assert
#include <cstdint> // for uint32_t
#include <cstdio> // for FILE
#include <cstring> // for strncpy
#include "platform.h" // for TESS_API
#include <cassert> // for assert
#include <cstdint> // for uint32_t
#include <cstdio> // for FILE
#include <cstring> // for strncpy
#include "platform.h" // for TESS_API

namespace tesseract {
class TFile;
Expand All @@ -38,144 +37,152 @@ class TFile;
// like length arrays and many places cast away the const on string()
// to mutate the string. Turning this off means that internally we
// cannot assume we know the strlen.
#define STRING_IS_PROTECTED 0

template <typename T> class GenericVector;

class TESS_API STRING
{
public:
STRING();
STRING(const STRING &string);
STRING(const char *string);
STRING(const char *data, int length);
~STRING();

// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
// Writes to the given file. Returns false in case of error.
bool Serialize(tesseract::TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(tesseract::TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(tesseract::TFile* fp);

bool contains(const char c) const;
int32_t length() const;
int32_t size() const { return length(); }
// Workaround to avoid g++ -Wsign-compare warnings.
uint32_t unsigned_size() const {
const int32_t len = length();
assert(0 <= len);
return static_cast<uint32_t>(len);
}
const char *string() const;
const char *c_str() const;

inline char* strdup() const {
int32_t len = length() + 1;
return strncpy(new char[len], GetCStr(), len);
}
#define STRING_IS_PROTECTED 0

template <typename T>
class GenericVector;

class TESS_API STRING {
public:
STRING();
STRING(const STRING& string);
STRING(const char* string);
STRING(const char* data, int length);
~STRING();

// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
// Writes to the given file. Returns false in case of error.
bool Serialize(tesseract::TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(tesseract::TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(tesseract::TFile* fp);

bool contains(char c) const;
int32_t length() const;
int32_t size() const {
return length();
}
// Workaround to avoid g++ -Wsign-compare warnings.
uint32_t unsigned_size() const {
const int32_t len = length();
assert(0 <= len);
return static_cast<uint32_t>(len);
}
const char* string() const;
const char* c_str() const;

inline char* strdup() const {
int32_t len = length() + 1;
return strncpy(new char[len], GetCStr(), len);
}

#if STRING_IS_PROTECTED
const char &operator[] (int32_t index) const;
// len is number of chars in s to insert starting at index in this string
void insert_range(int32_t index, const char*s, int len);
void erase_range(int32_t index, int len);
const char& operator[](int32_t index) const;
// len is number of chars in s to insert starting at index in this string
void insert_range(int32_t index, const char* s, int len);
void erase_range(int32_t index, int len);
#else
char &operator[] (int32_t index) const;
char& operator[](int32_t index) const;
#endif
void split(const char c, GenericVector<STRING> *splited);
void truncate_at(int32_t index);

bool operator== (const STRING & string) const;
bool operator!= (const STRING & string) const;
bool operator!= (const char *string) const;

STRING & operator= (const char *string);
STRING & operator= (const STRING & string);

STRING operator+ (const STRING & string) const;
STRING operator+ (const char ch) const;

STRING & operator+= (const char *string);
STRING & operator+= (const STRING & string);
STRING & operator+= (const char ch);

// Assignment for strings which are not null-terminated.
void assign(const char *cstr, int len);

// Appends the given string and int (as a %d) to this.
// += cannot be used for ints as there as a char += operator that would
// be ambiguous, and ints usually need a string before or between them
// anyway.
void add_str_int(const char* str, int number);
// Appends the given string and double (as a %.8g) to this.
void add_str_double(const char* str, double number);

// ensure capacity but keep pointer encapsulated
inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); }

private:
typedef struct STRING_HEADER {
// How much space was allocated in the string buffer for char data.
int capacity_;

// used_ is how much of the capacity is currently being used,
// including a '\0' terminator.
//
// If used_ is 0 then string is nullptr (not even the '\0')
// else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
// else strlen is >= 0 (not nullptr) but needs to be computed.
// this condition is set when encapsulation is violated because
// an API returned a mutable string.
//
// capacity_ - used_ = excess capacity that the string can grow
// without reallocating
mutable int used_;
} STRING_HEADER;

// To preserve the behavior of the old serialization, we only have space
// for one pointer in this structure. So we are embedding a data structure
// at the start of the storage that will hold additional state variables,
// then storing the actual string contents immediately after.
STRING_HEADER* data_;

// returns the header part of the storage
inline STRING_HEADER* GetHeader() {
return data_;
}
inline const STRING_HEADER* GetHeader() const {
return data_;
}

// returns the string data part of storage
inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); }

inline const char* GetCStr() const {
return ((const char *)data_) + sizeof(STRING_HEADER);
}
inline bool InvariantOk() const {
void split(char c, GenericVector<STRING>* splited);
void truncate_at(int32_t index);

bool operator==(const STRING& string) const;
bool operator!=(const STRING& string) const;
bool operator!=(const char* string) const;

STRING& operator=(const char* string);
STRING& operator=(const STRING& string);

STRING operator+(const STRING& string) const;
STRING operator+(char ch) const;

STRING& operator+=(const char* string);
STRING& operator+=(const STRING& string);
STRING& operator+=(char ch);

// Assignment for strings which are not null-terminated.
void assign(const char* cstr, int len);

// Appends the given string and int (as a %d) to this.
// += cannot be used for ints as there as a char += operator that would
// be ambiguous, and ints usually need a string before or between them
// anyway.
void add_str_int(const char* str, int number);
// Appends the given string and double (as a %.8g) to this.
void add_str_double(const char* str, double number);

// ensure capacity but keep pointer encapsulated
inline void ensure(int32_t min_capacity) {
ensure_cstr(min_capacity);
}

private:
typedef struct STRING_HEADER {
// How much space was allocated in the string buffer for char data.
int capacity_;

// used_ is how much of the capacity is currently being used,
// including a '\0' terminator.
//
// If used_ is 0 then string is nullptr (not even the '\0')
// else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
// else strlen is >= 0 (not nullptr) but needs to be computed.
// this condition is set when encapsulation is violated because
// an API returned a mutable string.
//
// capacity_ - used_ = excess capacity that the string can grow
// without reallocating
mutable int used_;
} STRING_HEADER;

// To preserve the behavior of the old serialization, we only have space
// for one pointer in this structure. So we are embedding a data structure
// at the start of the storage that will hold additional state variables,
// then storing the actual string contents immediately after.
STRING_HEADER* data_;

// returns the header part of the storage
inline STRING_HEADER* GetHeader() {
return data_;
}
inline const STRING_HEADER* GetHeader() const {
return data_;
}

// returns the string data part of storage
inline char* GetCStr() {
return (reinterpret_cast<char*>(data_)) + sizeof(STRING_HEADER);
}

inline const char* GetCStr() const {
return (reinterpret_cast<const char*>(data_)) + sizeof(STRING_HEADER);
}
inline bool InvariantOk() const {
#if STRING_IS_PROTECTED
return (GetHeader()->used_ == 0) ?
(string() == nullptr) : (GetHeader()->used_ == (strlen(string()) + 1));
return (GetHeader()->used_ == 0)
? (string() == nullptr)
: (GetHeader()->used_ == (strlen(string()) + 1));
#else
return true;
return true;
#endif
}
}

// Ensure string has requested capacity as optimization
// to avoid unnecessary reallocations.
// The return value is a cstr buffer with at least requested capacity
char* ensure_cstr(int32_t min_capacity);
// Ensure string has requested capacity as optimization
// to avoid unnecessary reallocations.
// The return value is a cstr buffer with at least requested capacity
char* ensure_cstr(int32_t min_capacity);

void FixHeader() const; // make used_ non-negative, even if const
void FixHeader() const; // make used_ non-negative, even if const

char* AllocData(int used, int capacity);
void DiscardData();
char* AllocData(int used, int capacity);
void DiscardData();
};

#endif

0 comments on commit 89ba48b

Please sign in to comment.