tornavis/source/blender/blenlib/BLI_string_ref.hh

542 lines
15 KiB
C++

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#pragma once
/** \file
* \ingroup bli
*
* A `blender::StringRef` references a const char array owned by someone else. It is just a pointer
* and a size. Since the memory is not owned, StringRef should not be used to transfer ownership of
* the string. The data referenced by a StringRef cannot be mutated through it.
*
* A StringRef is NOT null-terminated. This makes it much more powerful within C++, because we can
* also cut off parts of the end without creating a copy. When interfacing with C code that expects
* null-terminated strings, `blender::StringRefNull` can be used. It is essentially the same as
* StringRef, but with the restriction that the string has to be null-terminated.
*
* Whenever possible, string parameters should be of type StringRef and the string return type
* should be StringRefNull. Don't forget that the StringRefNull does not own the string, so don't
* return it when the string exists only in the scope of the function. This convention makes
* functions usable in the most contexts.
*
* blender::StringRef vs. std::string_view:
* Both types are certainly very similar. The main benefit of using StringRef in Blender is that
* this allows us to add convenience methods at any time. Especially, when doing a lot of string
* manipulation, this helps to keep the code clean. Furthermore, we need StringRefNull anyway,
* because there is a lot of C code that expects null-terminated strings. Conversion between
* StringRef and string_view is very cheap and can be done at api boundaries at essentially no
* cost. Another benefit of using StringRef is that it uses signed integers, thus developers
* have to deal less with issues resulting from unsigned integers.
*/
#include <cstring>
#include <sstream>
#include <string>
#include <string_view>
#include "BLI_span.hh"
#include "BLI_utildefines.h"
namespace blender {
class StringRef;
/**
* A common base class for StringRef and StringRefNull. This should never be used in other files.
* It only exists to avoid some code duplication.
*/
class StringRefBase {
protected:
const char *data_;
int64_t size_;
StringRefBase(const char *data, const int64_t size) : data_(data), size_(size)
{
}
public:
/* Similar to string_view::npos, but signed. */
static constexpr int64_t not_found = -1;
/**
* Return the (byte-)length of the referenced string, without any null-terminator.
*/
int64_t size() const
{
return size_;
}
bool is_empty() const
{
return size_ == 0;
}
/**
* Return a pointer to the start of the string.
*/
const char *data() const
{
return data_;
}
operator Span<char>() const
{
return Span<char>(data_, size_);
}
/**
* Implicitly convert to std::string. This is convenient in most cases, but you have to be a bit
* careful not to convert to std::string accidentally.
*/
operator std::string() const
{
return std::string(data_, static_cast<size_t>(size_));
}
operator std::string_view() const
{
return std::string_view(data_, static_cast<size_t>(size_));
}
const char *begin() const
{
return data_;
}
const char *end() const
{
return data_ + size_;
}
IndexRange index_range() const
{
return IndexRange(size_);
}
/**
* Copy the string into a buffer. The buffer has to be one byte larger than the size of the
* string, because the copied string will be null-terminated. Only use this when you are
* absolutely sure that the buffer is large enough.
*/
void unsafe_copy(char *dst) const
{
memcpy(dst, data_, static_cast<size_t>(size_));
dst[size_] = '\0';
}
/**
* Copy the string into a buffer. The copied string will be null-terminated. This invokes
* undefined behavior when dst_size is too small. (Should we define the behavior?)
*/
void copy(char *dst, const int64_t dst_size) const
{
if (size_ < dst_size) {
this->unsafe_copy(dst);
}
else {
BLI_assert(false);
dst[0] = '\0';
}
}
/**
* Copy the string into a char array. The copied string will be null-terminated. This invokes
* undefined behavior when dst is too small.
*/
template<size_t N> void copy(char (&dst)[N])
{
this->copy(dst, N);
}
/**
* Returns true when the string begins with the given prefix. Otherwise false.
*/
bool startswith(StringRef prefix) const;
/**
* Returns true when the string ends with the given suffix. Otherwise false.
*/
bool endswith(StringRef suffix) const;
StringRef substr(int64_t start, const int64_t size) const;
/**
* Get the first char in the string. This invokes undefined behavior when the string is empty.
*/
const char &front() const
{
BLI_assert(size_ >= 1);
return data_[0];
}
/**
* Get the last char in the string. This invokes undefined behavior when the string is empty.
*/
const char &back() const
{
BLI_assert(size_ >= 1);
return data_[size_ - 1];
}
/**
* The behavior of those functions matches the standard library implementation of
* std::string_view.
*/
int64_t find(char c, int64_t pos = 0) const;
int64_t find(StringRef str, int64_t pos = 0) const;
int64_t rfind(char c, int64_t pos = INT64_MAX) const;
int64_t rfind(StringRef str, int64_t pos = INT64_MAX) const;
int64_t find_first_of(StringRef chars, int64_t pos = 0) const;
int64_t find_first_of(char c, int64_t pos = 0) const;
int64_t find_last_of(StringRef chars, int64_t pos = INT64_MAX) const;
int64_t find_last_of(char c, int64_t pos = INT64_MAX) const;
int64_t find_first_not_of(StringRef chars, int64_t pos = 0) const;
int64_t find_first_not_of(char c, int64_t pos = 0) const;
int64_t find_last_not_of(StringRef chars, int64_t pos = INT64_MAX) const;
int64_t find_last_not_of(char c, int64_t pos = INT64_MAX) const;
};
/**
* References a null-terminated const char array.
*/
class StringRefNull : public StringRefBase {
public:
StringRefNull() : StringRefBase("", 0)
{
}
/**
* Construct a StringRefNull from a null terminated c-string. The pointer must not point to
* NULL.
*/
StringRefNull(const char *str) : StringRefBase(str, static_cast<int64_t>(strlen(str)))
{
BLI_assert(str != NULL);
BLI_assert(data_[size_] == '\0');
}
/**
* Construct a StringRefNull from a null terminated c-string. This invokes undefined behavior
* when the given size is not the correct size of the string.
*/
StringRefNull(const char *str, const int64_t size) : StringRefBase(str, size)
{
BLI_assert(static_cast<int64_t>(strlen(str)) == size);
}
/**
* Reference a std::string. Remember that when the std::string is destructed, the StringRefNull
* will point to uninitialized memory.
*/
StringRefNull(const std::string &str) : StringRefNull(str.c_str())
{
}
/**
* Get the char at the given index.
*/
char operator[](const int64_t index) const
{
BLI_assert(index >= 0);
/* Use '<=' instead of just '<', so that the null character can be accessed as well. */
BLI_assert(index <= size_);
return data_[index];
}
/**
* Returns the beginning of a null-terminated char array.
*
* This is like ->data(), but can only be called on a StringRefNull.
*/
const char *c_str() const
{
return data_;
}
};
/**
* References a const char array. It might not be null terminated.
*/
class StringRef : public StringRefBase {
public:
StringRef() : StringRefBase(nullptr, 0)
{
}
/**
* StringRefNull can be converted into StringRef, but not the other way around.
*/
StringRef(StringRefNull other) : StringRefBase(other.data(), other.size())
{
}
/**
* Create a StringRef from a null-terminated c-string.
*/
StringRef(const char *str) : StringRefBase(str, str ? static_cast<int64_t>(strlen(str)) : 0)
{
}
StringRef(const char *str, const int64_t length) : StringRefBase(str, length)
{
}
/**
* Create a StringRef from a start and end pointer. This invokes undefined behavior when the
* second point points to a smaller address than the first one.
*/
StringRef(const char *begin, const char *one_after_end)
: StringRefBase(begin, static_cast<int64_t>(one_after_end - begin))
{
BLI_assert(begin <= one_after_end);
}
/**
* Reference a std::string. Remember that when the std::string is destructed, the StringRef
* will point to uninitialized memory.
*/
StringRef(const std::string &str) : StringRefBase(str.data(), static_cast<int64_t>(str.size()))
{
}
StringRef(std::string_view view) : StringRefBase(view.data(), static_cast<int64_t>(view.size()))
{
}
/**
* Returns a new StringRef that does not contain the first n chars.
*
* This is similar to std::string_view::remove_prefix.
*/
StringRef drop_prefix(const int64_t n) const
{
BLI_assert(n >= 0);
BLI_assert(n <= size_);
return StringRef(data_ + n, size_ - n);
}
/**
* Return a new StringRef with the given prefix being skipped. This invokes undefined behavior if
* the string does not begin with the given prefix.
*/
StringRef drop_prefix(StringRef prefix) const
{
BLI_assert(this->startswith(prefix));
return this->drop_prefix(prefix.size());
}
/**
* Return a new StringRef that does not contain the last n chars.
*
* This is similar to std::string_view::remove_suffix.
*/
StringRef drop_suffix(const int64_t n) const
{
BLI_assert(n >= 0);
BLI_assert(n <= size_);
return StringRef(data_, size_ - n);
}
/**
* Get the char at the given index.
*/
char operator[](int64_t index) const
{
BLI_assert(index >= 0);
BLI_assert(index < size_);
return data_[index];
}
};
/* More inline functions
***************************************/
inline std::ostream &operator<<(std::ostream &stream, StringRef ref)
{
stream << std::string(ref);
return stream;
}
inline std::ostream &operator<<(std::ostream &stream, StringRefNull ref)
{
stream << std::string(ref.data(), (size_t)ref.size());
return stream;
}
/**
* Adding two #StringRefs will allocate an std::string.
* This is not efficient, but convenient in most cases.
*/
inline std::string operator+(StringRef a, StringRef b)
{
return std::string(a) + std::string(b);
}
/* This does not compare StringRef and std::string_view, because of ambiguous overloads. This is
* not a problem when std::string_view is only used at api boundaries. To compare a StringRef and a
* std::string_view, one should convert the std::string_view to StringRef (which is very cheap).
* Ideally, we only use StringRef in our code to avoid this problem altogether. */
inline bool operator==(StringRef a, StringRef b)
{
if (a.size() != b.size()) {
return false;
}
return STREQLEN(a.data(), b.data(), (size_t)a.size());
}
inline bool operator!=(StringRef a, StringRef b)
{
return !(a == b);
}
inline bool operator<(StringRef a, StringRef b)
{
return std::string_view(a) < std::string_view(b);
}
inline bool operator>(StringRef a, StringRef b)
{
return std::string_view(a) > std::string_view(b);
}
inline bool operator<=(StringRef a, StringRef b)
{
return std::string_view(a) <= std::string_view(b);
}
inline bool operator>=(StringRef a, StringRef b)
{
return std::string_view(a) >= std::string_view(b);
}
/**
* Return true when the string starts with the given prefix.
*/
inline bool StringRefBase::startswith(StringRef prefix) const
{
if (size_ < prefix.size_) {
return false;
}
for (int64_t i = 0; i < prefix.size_; i++) {
if (data_[i] != prefix.data_[i]) {
return false;
}
}
return true;
}
/**
* Return true when the string ends with the given suffix.
*/
inline bool StringRefBase::endswith(StringRef suffix) const
{
if (size_ < suffix.size_) {
return false;
}
const int64_t offset = size_ - suffix.size_;
for (int64_t i = 0; i < suffix.size_; i++) {
if (data_[offset + i] != suffix.data_[i]) {
return false;
}
}
return true;
}
/**
* Return a new #StringRef containing only a sub-string of the original string.
*/
inline StringRef StringRefBase::substr(const int64_t start,
const int64_t max_size = INT64_MAX) const
{
BLI_assert(max_size >= 0);
BLI_assert(start >= 0);
const int64_t substr_size = std::min(max_size, size_ - start);
return StringRef(data_ + start, substr_size);
}
inline int64_t index_or_npos_to_int64(size_t index)
{
/* The compiler will probably optimize this check away. */
if (index == std::string_view::npos) {
return StringRef::not_found;
}
return static_cast<int64_t>(index);
}
inline int64_t StringRefBase::find(char c, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(std::string_view(*this).find(c, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find(StringRef str, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(std::string_view(*this).find(str, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find_first_of(StringRef chars, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(
std::string_view(*this).find_first_of(chars, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find_first_of(char c, int64_t pos) const
{
return this->find_first_of(StringRef(&c, 1), pos);
}
inline int64_t StringRefBase::find_last_of(StringRef chars, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(
std::string_view(*this).find_last_of(chars, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find_last_of(char c, int64_t pos) const
{
return this->find_last_of(StringRef(&c, 1), pos);
}
inline int64_t StringRefBase::find_first_not_of(StringRef chars, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(
std::string_view(*this).find_first_not_of(chars, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find_first_not_of(char c, int64_t pos) const
{
return this->find_first_not_of(StringRef(&c, 1), pos);
}
inline int64_t StringRefBase::find_last_not_of(StringRef chars, int64_t pos) const
{
BLI_assert(pos >= 0);
return index_or_npos_to_int64(
std::string_view(*this).find_last_not_of(chars, static_cast<size_t>(pos)));
}
inline int64_t StringRefBase::find_last_not_of(char c, int64_t pos) const
{
return this->find_last_not_of(StringRef(&c, 1), pos);
}
} // namespace blender