38 using List = promeki::List<String>;
48 using iterator_category = std::random_access_iterator_tag;
50 using difference_type = std::ptrdiff_t;
57 Char operator*()
const {
return _data->
charAt(_idx); }
58 Char operator[](difference_type
n)
const {
return _data->
charAt(_idx +
n); }
64 CharIterator &operator+=(difference_type
n) { _idx +=
n;
return *
this; }
65 CharIterator &operator-=(difference_type
n) { _idx -=
n;
return *
this; }
66 CharIterator operator+(difference_type
n)
const {
return {_data, _idx +
n}; }
67 CharIterator operator-(difference_type
n)
const {
return {_data, _idx -
n}; }
69 return static_cast<difference_type
>(_idx) -
static_cast<difference_type
>(
o._idx);
75 bool operator==(
const CharIterator &
o)
const {
return _idx ==
o._idx; }
76 bool operator!=(
const CharIterator &
o)
const {
return _idx !=
o._idx; }
145 return value ?
"true" :
"false";
166 template <
typename T>
179 template <
typename T>
193 template <
typename T>
197 T mask =
static_cast<T>(1) << (digits - 1);
198 for(
int i = 0;
i < digits;
i++) {
286 const std::string &
str()
const {
return d->str(); }
289 const char *
cstr()
const {
return d->cstr(); }
292 size_t size()
const {
return d->length(); }
295 size_t length()
const {
return d->length(); }
346 operator const std::string &()
const {
return d->str(); }
349 operator const char *()
const {
return d->cstr(); }
414 if(pos >= d->length())
return String();
416 return String(d->createSubstr(pos,
len));
470 if(d->isLatin1() && ch.
codepoint() > 0xFF) {
483 if(d->isLatin1() && !
s.d->isLatin1()) {
522 if(
val.isEmpty())
return *
this;
523 if(d->isLatin1() && !
val.d->isLatin1()) {
566 if(d->length() !=
val.d->length())
return false;
567 if(d->isLatin1() &&
val.d->isLatin1())
return d->str() ==
val.d->str();
568 for(
size_t i = 0;
i < d->length(); ++
i) {
569 if(d->charAt(
i) !=
val.d->charAt(
i))
return false;
588 return lhs.str() <
rhs.str();
613 std::string
s = d->str();
614 std::transform(
s.begin(),
s.end(),
s.begin(),
::toupper);
617 promeki::List<Char>
chars;
618 for(
size_t i = 0;
i < d->length(); ++
i)
619 chars.pushToBack(d->charAt(
i).toUpper());
626 std::string
s = d->str();
627 std::transform(
s.begin(),
s.end(),
s.begin(),
::tolower);
630 promeki::List<Char>
chars;
631 for(
size_t i = 0;
i < d->length(); ++
i)
632 chars.pushToBack(d->charAt(
i).toLower());
643 size_t last =
len - 1;
644 while(last > first && d->charAt(last).isSpace()) --last;
645 return substr(first, last - first + 1);
659 for(
size_t i = 0;
i < prefix.
length(); ++
i) {
660 if(d->charAt(
i) != prefix.d->charAt(
i))
return false;
670 return !
isEmpty() && d->charAt(0) ==
c;
681 for(
size_t i = 0;
i < suffix.
length(); ++
i) {
682 if(d->charAt(offset +
i) != suffix.d->charAt(
i))
return false;
697 for(
size_t i = 0;
i < d->length(); ++
i) {
698 if(!d->charAt(
i).isDigit())
return false;
715 for(
size_t i = 0;
i <
len; ++
i) {
716 char32_t a = d->charAt(
i).toLower().codepoint();
717 char32_t b =
other.d->charAt(
i).toLower().codepoint();
718 if(a != b)
return a < b ? -1 : 1;
746 if(d->isLatin1())
return *
this;
748 s.reserve(d->length());
749 for(
size_t i = 0;
i < d->length(); ++
i) {
750 char32_t cp = d->charAt(
i).codepoint();
751 s +=
static_cast<char>(
cp <= 0xFF ?
cp :
'?');
763 if(!d->isLatin1())
return *
this;
837 const char *
s =
cstr();
840 if constexpr (std::is_integral_v<OutputType> && std::is_signed_v<OutputType>) {
841 long long v = std::strtoll(
s, &
end, 10);
843 }
else if constexpr (std::is_integral_v<OutputType> && std::is_unsigned_v<OutputType>) {
844 unsigned long long v = std::strtoull(
s, &
end, 10);
846 }
else if constexpr (std::is_floating_point_v<OutputType>) {
847 double v = std::strtod(
s, &
end);
920 size_t operator()(
const promeki::String &
s)
const noexcept {
921 return static_cast<size_t>(
s.hash());
942 : _bytes{}, _codepoints{}, _charCount(0), _isAscii(
true) {
943 for(
size_t i = 0;
i <
N; ++
i) _bytes[
i] = str[
i];
946 unsigned char b =
static_cast<unsigned char>(str[pos]);
947 if(b > 0x7F) _isAscii =
false;
950 if(b < 0x80) {
cp = b;
seqLen = 1; }
951 else if(b < 0xE0) {
cp = b & 0x1F;
seqLen = 2; }
952 else if(b < 0xF0) {
cp = b & 0x0F;
seqLen = 3; }
954 for(
size_t j = 1;
j <
seqLen && pos +
j <
N - 1; ++
j)
955 cp = (
cp << 6) | (
static_cast<unsigned char>(str[pos +
j]) & 0x3F);
956 _codepoints[_charCount++] =
cp;
961 constexpr bool isAscii()
const {
return _isAscii; }
962 constexpr size_t charCount()
const {
return _charCount; }
963 constexpr size_t byteCount()
const {
return N - 1; }
964 constexpr const char *bytes()
const {
return _bytes; }
965 constexpr const char32_t *codepoints()
const {
return _codepoints; }
970 return fnv1aData(_codepoints, _charCount *
sizeof(
char32_t));
975 char32_t _codepoints[
N];
990template<
size_t Count>
995 for(
size_t i = 0;
i <
Count; ++
i) _data[
i] =
cs.codepoints()[
i];
997 constexpr const char32_t *data()
const {
return _data; }
998 constexpr size_t size()
const {
return Count; }
1000 char32_t _data[
Count];
1016 inline String operator""_ps(
const char *str,
size_t len) {
1017 for(
size_t i = 0;
i <
len; ++
i) {
1018 if(
static_cast<unsigned char>(str[
i]) > 0x7F)
1046#define PROMEKI_STRING(str) \
1047 ([]() -> ::promeki::String { \
1048 constexpr auto _cs = \
1049 ::promeki::CompiledString<sizeof(str)>(str); \
1050 if constexpr (_cs.isAscii()) { \
1051 static ::promeki::StringLiteralData _lit( \
1052 str, _cs.byteCount(), _cs.hash()); \
1053 return ::promeki::String::fromLiteralData(&_lit); \
1055 static constexpr auto _cp = \
1056 ::promeki::CompiledCodepoints<_cs.charCount()>(_cs); \
1057 static ::promeki::StringUnicodeLiteralData _lit( \
1058 _cp.data(), _cp.size(), \
1059 str, _cs.byteCount(), _cs.hash()); \
1060 return ::promeki::String::fromLiteralData(&_lit); \
Unicode-aware character class wrapping a single codepoint.
Definition char.h:23
char32_t codepoint() const
Returns the Unicode codepoint.
Definition char.h:35
Right-sized codepoint array extracted from a CompiledString.
Definition string.h:991
Compile-time string literal with encoding detection and UTF-8 decode.
Definition string.h:939
constexpr uint64_t hash() const
Compile-time FNV-1a hash of the native storage.
Definition string.h:968
Lightweight error code wrapper for the promeki library.
Definition error.h:39
@ Ok
No error.
Definition error.h:51
@ Invalid
Invalid value or argument (EINVAL).
Definition error.h:66
Dynamic array container wrapping std::vector.
Definition list.h:40
size_t count(const T &value) const
Returns the number of occurrences of value.
Definition list.h:629
bool isEmpty() const noexcept
Returns true if the list has no elements.
Definition list.h:296
A smart pointer class with reference counting and optional copy-on-write semantics.
Definition sharedptr.h:252
Pure virtual interface for String storage backends.
Definition stringdata.h:29
virtual void setCharAt(size_t idx, Char ch)=0
Sets the character at the given index.
virtual void erase(size_t pos, size_t count)=0
Erases characters from the string.
virtual void clear()=0
Removes all characters from the string.
virtual void resize(size_t len, Char fill=Char())=0
Resizes the string to len characters.
virtual void insert(size_t pos, const StringData &s)=0
Inserts a string at the given position.
static constexpr size_t npos
Sentinel value indicating "not found".
Definition stringdata.h:188
virtual void reverseInPlace()=0
Reverses the characters in place.
virtual void append(const StringData &other)=0
Appends another string's data to this storage.
virtual Char charAt(size_t idx) const =0
Returns the character at the given index.
Latin1 string storage. One byte = one character.
Definition stringdata.h:197
Manages a list of strings.
Definition stringlist.h:21
Unicode string storage. Decoded codepoints in a List<Char>.
Definition stringdata.h:278
static StringUnicodeData * fromUtf8(const char *data, size_t len)
Creates a StringUnicodeData by decoding UTF-8 bytes.
static StringUnicodeData * fromLatin1(const std::string &s)
Creates a StringUnicodeData by promoting Latin1 bytes.
Random-access iterator over characters (Char values).
Definition string.h:46
Encoding-aware string class with copy-on-write semantics.
Definition string.h:35
static String number(int8_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
Encoding encoding() const
Returns the encoding of this string (Latin1 or Unicode).
Definition string.h:324
size_t find(char val, size_t from=0) const
Finds the first occurrence of a character.
Definition string.h:361
size_t find(const String &val, size_t from=0) const
Finds the first occurrence of a character.
Definition string.h:367
String toLower() const
Returns a lowercase copy of this string.
Definition string.h:624
int64_t parseNumberWords(Error *err=nullptr) const
Parses English number words into an integer value.
String right(std::size_t count) const
Returns the last count characters.
Definition string.h:439
size_t rfind(const char *val, size_t from=npos) const
Finds the last occurrence of a character.
Definition string.h:379
uint8_t byteAt(size_t idx) const
Returns the byte at the given index.
Definition string.h:305
friend bool operator>(const String &lhs, const String &rhs)
Greater-than comparison.
Definition string.h:597
void insert(size_t pos, const String &s)
Inserts a string at the given position (promotes to Unicode if needed).
Definition string.h:482
const char * cstr() const
Returns a null-terminated C string pointer.
Definition string.h:289
bool startsWith(const String &prefix) const
Returns true if the string starts with the given prefix.
Definition string.h:657
String & arg(uint8_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:785
size_t count(const String &substr) const
Counts non-overlapping occurrences of a substring.
Definition string.h:401
size_t rfind(const String &val, size_t from=npos) const
Finds the last occurrence of a character.
Definition string.h:381
String toUnicode() const
Returns a Unicode version of this string.
Definition string.h:762
static String fromUtf8(const char *data, size_t len)
Creates a String by decoding UTF-8 data.
Definition string.h:218
String operator+(const std::string &val) const
Returns the concatenation of this string and val.
Definition string.h:550
static String number(double value, int precision=9)
Converts a float to its string representation.
String toLatin1() const
Returns a Latin1 version of this string.
Definition string.h:745
String operator+(char val) const
Returns the concatenation of this string and val.
Definition string.h:554
static constexpr const char * WhitespaceChars
Characters considered whitespace by trim().
Definition string.h:94
String(const char *str)
Constructs from a null-terminated C string.
Definition string.h:248
String reverse() const
Returns a copy with characters in reverse order.
Definition string.h:688
size_t byteCount() const
Returns the number of bytes in the encoded representation.
Definition string.h:298
bool isEmpty() const
Returns true if the string has zero length.
Definition string.h:315
uint64_t hash() const
Returns a 64-bit FNV-1a hash of this string's native data.
Definition string.h:731
friend bool operator<=(const String &lhs, const String &rhs)
Less-than-or-equal comparison.
Definition string.h:592
String operator+(const char *val) const
Returns the concatenation of this string and val.
Definition string.h:552
int compareIgnoreCase(const String &other) const
Case-insensitive comparison.
Definition string.h:713
String substr(size_t pos=0, size_t len=npos) const
Returns a substring.
Definition string.h:413
bool operator!=(char val) const
Inequality comparison.
Definition string.h:584
ConstIterator end() const noexcept
Returns a const character iterator past the last character.
Definition string.h:337
static String number(uint16_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
String & arg(int16_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:791
void erase(size_t pos, size_t count=1)
Erases characters from the string.
Definition string.h:462
String & arg(uint16_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:797
static String number(int32_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
size_t size() const
Returns the number of characters in the string.
Definition string.h:292
size_t find(Char val, size_t from=0) const
Finds the first occurrence of a character.
Definition string.h:363
bool isLiteral() const
Returns true if this string wraps an immutable literal.
Definition string.h:321
static String dec(const T &val, int padding=0, char padchar=' ')
Formats a value as a decimal string with optional padding.
Definition string.h:167
static String sprintf(const char *fmt,...)
Creates a formatted string using printf-style syntax.
String trim() const
Returns a copy with leading and trailing whitespace removed.
Definition string.h:637
static String number(float value, int precision=9)
Converts a float to its string representation.
bool operator!=(const String &val) const
Inequality comparison.
Definition string.h:580
size_t rfind(Char val, size_t from=npos) const
Finds the last occurrence of a character.
Definition string.h:377
ConstIterator cbegin() const noexcept
Returns a const character iterator to the first character.
Definition string.h:335
String left(size_t count) const
Returns the first count characters.
Definition string.h:432
static String number(bool value)
Returns "true" or "false" for a boolean value.
Definition string.h:144
bool startsWith(char c) const
Returns true if the string starts with the given character.
Definition string.h:669
OutputType to(Error *err=nullptr) const
Converts the string to a value of type OutputType.
Definition string.h:836
static String number(int16_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
bool operator!=(const char *val) const
Inequality comparison.
Definition string.h:582
bool endsWith(const String &suffix) const
Returns true if the string ends with the given suffix.
Definition string.h:678
String & arg(int8_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:779
String operator+(const String &val) const
Returns the concatenation of this string and val.
Definition string.h:543
size_t find(const char *val, size_t from=0) const
Finds the first occurrence of a character.
Definition string.h:365
ConstIterator cend() const noexcept
Returns a const character iterator past the last character.
Definition string.h:339
bool toBool(Error *err=nullptr) const
Converts the string to a boolean.
String()
Default constructor. Creates an empty Latin1 string.
Definition string.h:238
friend bool operator>=(const String &lhs, const String &rhs)
Greater-than-or-equal comparison.
Definition string.h:602
static constexpr size_t npos
Sentinel value indicating "not found".
Definition string.h:91
String replace(const String &find, const String &replacement) const
Returns a copy with all occurrences of find replaced by replacement.
String & arg(uint64_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:821
String(size_t ct, char c)
Constructs a string of repeated characters.
Definition string.h:264
String & operator+=(const char *val)
Appends a String to this string (promotes to Unicode if needed).
Definition string.h:534
unsigned int toUInt(Error *err=nullptr) const
Converts the string to an unsigned integer.
Encoding
String encoding.
Definition string.h:97
@ Latin1
One byte per character, ISO-8859-1.
Definition string.h:98
@ Unicode
Decoded codepoints, O(1) indexed access.
Definition string.h:99
String & operator+=(char val)
Appends a String to this string (promotes to Unicode if needed).
Definition string.h:536
ConstIterator begin() const noexcept
Returns a const character iterator to the first character.
Definition string.h:333
promeki::List< String > List
List of Strings.
Definition string.h:38
static String number(uint32_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
static String number(int64_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
static String hex(const T &val, int padding=0, bool addPrefix=true)
Formats a value as a hexadecimal string.
Definition string.h:180
String(std::string &&str)
Constructs from a std::string (move).
Definition string.h:278
void setCharAt(size_t idx, Char ch)
Sets the character at the given index (promotes to Unicode if needed).
Definition string.h:469
String(const std::string &str)
Constructs from a std::string (copy).
Definition string.h:271
StringList split(const std::string &delimiter) const
Splits the string by a delimiter.
int toInt(Error *err=nullptr) const
Converts the string to an integer.
String & arg(uint32_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:809
bool contains(Char val) const
Returns true if the string contains the given value.
Definition string.h:390
String(std::nullptr_t)
Constructs an empty string (null pointer overload).
Definition string.h:242
String & operator=(std::string &&str)
Assigns from a std::string (move).
Definition string.h:501
void clear()
Removes all characters from the string.
Definition string.h:449
String mid(size_t pos, size_t count=npos) const
Returns a substring starting at pos (alias for substr).
Definition string.h:425
String & arg(int64_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:815
String & arg(int32_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Replaces the lowest {N} token with the formatted numeric value.
Definition string.h:803
bool isNumeric() const
Returns true if every character is a decimal digit.
Definition string.h:695
String & operator+=(const std::string &val)
Appends a String to this string (promotes to Unicode if needed).
Definition string.h:532
static String number(uint64_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
static String fromLiteralData(StringData *data)
Wraps a static literal StringData without copying.
Definition string.h:229
Char charAt(size_t idx) const
Returns the character at the given index.
Definition string.h:312
void resize(size_t val)
Resizes the string to the given character count.
Definition string.h:455
String & operator=(const char *str)
Assigns from a C string.
Definition string.h:507
bool contains(const char *val) const
Returns true if the string contains the given value.
Definition string.h:394
static String number(uint8_t value, int base=10, int padding=0, char padchar=' ', bool addPrefix=false)
Converts a numeric value to its String representation.
bool operator==(char val) const
Equality comparison with another String.
Definition string.h:577
size_t length() const
Returns the number of characters in the string.
Definition string.h:295
static String bin(const T &val, int digits=32, bool addPrefix=true)
Formats a value as a binary string.
Definition string.h:194
String toUpper() const
Returns an uppercase copy of this string.
Definition string.h:611
String & arg(const String &str)
Replaces the lowest-numbered N placeholder with the given string.
bool contains(const String &val) const
Returns true if the string contains the given value.
Definition string.h:392
int referenceCount() const
Returns the current shared reference count.
Definition string.h:318
friend bool operator<(const String &lhs, const String &rhs)
Less-than comparison (lexicographic on the encoded bytes).
Definition string.h:587
size_t rfind(char val, size_t from=npos) const
Finds the last occurrence of a character.
Definition string.h:375
const std::string & str() const
Returns a const reference to the underlying std::string.
Definition string.h:286
bool contains(char val) const
Returns true if the string contains the given value.
Definition string.h:388
String & operator=(const std::string &str)
Assigns from a std::string (copy).
Definition string.h:495
String(const char *str, size_t len)
Constructs from a character buffer with explicit length.
Definition string.h:256
bool operator==(const char *val) const
Equality comparison with another String.
Definition string.h:575
double toDouble(Error *err=nullptr) const
Converts the string to a double.
String & operator+=(const String &val)
Appends a String to this string (promotes to Unicode if needed).
Definition string.h:521
bool operator==(const String &val) const
Equality comparison with another String.
Definition string.h:565
constexpr uint64_t fnv1aData(const void *data, size_t len, uint64_t seed=0xcbf29ce484222325ULL)
Computes the FNV-1a hash of a block of data.
Definition fnv1a.h:43
#define PROMEKI_NAMESPACE_BEGIN
Starts a promeki namespace block.
Definition namespace.h:14
#define PROMEKI_NAMESPACE_END
Ends a promeki namespace block.
Definition namespace.h:19
User-defined literal for convenient String construction.
Definition string.h:1015
const T & value(const Result< T > &r)
Returns the value from a Result.
Definition result.h:56
String operator+(const char *lhs, const String &rhs)
Concatenation with a C string on the left-hand side.
Definition string.h:911