11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
30PROMEKI_NAMESPACE_BEGIN
70 using List = ::promeki::List<String>;
73 using CharList = ::promeki::List<Char>;
83 using iterator_category = std::random_access_iterator_tag;
84 using value_type = Char;
85 using difference_type = std::ptrdiff_t;
87 using reference = Char;
89 CharIterator() : _data(nullptr), _idx(0) {}
90 CharIterator(
const StringData *data,
size_t idx) : _data(data), _idx(idx) {}
92 Char operator*()
const {
return _data->charAt(_idx); }
93 Char operator[](difference_type n)
const {
return _data->charAt(_idx + n); }
95 CharIterator &operator++() {
99 CharIterator operator++(
int) {
104 CharIterator &operator--() {
108 CharIterator operator--(
int) {
113 CharIterator &operator+=(difference_type n) {
117 CharIterator &operator-=(difference_type n) {
121 CharIterator operator+(difference_type n)
const {
return {_data, _idx + n}; }
122 CharIterator operator-(difference_type n)
const {
return {_data, _idx - n}; }
123 difference_type operator-(
const CharIterator &o)
const {
124 return static_cast<difference_type
>(_idx) -
125 static_cast<difference_type
>(o._idx);
127 friend CharIterator operator+(difference_type n,
const CharIterator &it) {
131 bool operator==(
const CharIterator &o)
const {
return _idx == o._idx; }
132 bool operator!=(
const CharIterator &o)
const {
return _idx != o._idx; }
133 bool operator<(
const CharIterator &o)
const {
return _idx < o._idx; }
134 bool operator<=(
const CharIterator &o)
const {
return _idx <= o._idx; }
135 bool operator>(
const CharIterator &o)
const {
return _idx > o._idx; }
136 bool operator>=(
const CharIterator &o)
const {
return _idx >= o._idx; }
139 const StringData *_data;
144 using ConstIterator = CharIterator;
147 static constexpr size_t npos = StringData::npos;
150 static constexpr const char *WhitespaceChars =
" \t\n\r\f\v";
171 static String number(int8_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
172 bool addPrefix =
false);
174 static String number(uint8_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
175 bool addPrefix =
false);
177 static String number(int16_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
178 bool addPrefix =
false);
180 static String number(uint16_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
181 bool addPrefix =
false);
183 static String number(int32_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
184 bool addPrefix =
false);
186 static String number(uint32_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
187 bool addPrefix =
false);
189 static String number(int64_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
190 bool addPrefix =
false);
192 static String number(uint64_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
193 bool addPrefix =
false);
200 static String number(
bool value) {
return value ?
"true" :
"false"; }
208 static String number(
float value,
int precision = 9);
210 static String number(
double value,
int precision = 9);
220 template <
typename T>
static String dec(
const T &val,
int padding = 0,
char padchar =
' ') {
221 return number(
static_cast<int64_t
>(val), 10, padding, padchar);
232 template <
typename T>
static String hex(
const T &val,
int padding = 0,
bool addPrefix =
true) {
233 int totalWidth = (addPrefix && padding > 0) ? padding + 2 : padding;
234 return number(
static_cast<uint64_t
>(val), 16, totalWidth,
'0', addPrefix);
245 template <
typename T>
static String bin(
const T &val,
int digits = 32,
bool addPrefix =
true) {
247 if (addPrefix) ret +=
"0b";
248 T mask =
static_cast<T
>(1) << (digits - 1);
249 for (
int i = 0; i < digits; i++) {
250 ret.push_back(val & mask ?
'1' :
'0');
265 PROMEKI_PRINTF_FUNC(1, 2) static String sprintf(const
char *fmt, ...);
295 template <typename... Args> static String format(std::format_string<Args...> fmt, Args &&...args) {
296 std::string s = std::format(fmt, std::forward<Args>(args)...);
297 return fromUtf8(s.data(), s.size());
318 static String vformat(std::string_view fmt, std::format_args args) {
319 std::string s = std::vformat(fmt, args);
320 return fromUtf8(s.data(), s.size());
339 static String fromUtf8(
const char *data,
size_t len) {
return String(data, len); }
355 static String fromLatin1(
const char *data,
size_t len) {
356 return String(
new StringLatin1Data(data, len));
360 static String fromLatin1(
const std::string &data) {
return String(
new StringLatin1Data(data)); }
363 static String fromLatin1(std::string &&data) {
return String(
new StringLatin1Data(std::move(data))); }
372 static String fromLiteralData(StringData *data) {
return String(data); }
379 String() : d(SharedPtr<StringData>::takeOwnership(new StringLatin1Data())) {}
382 String(std::nullptr_t) : String() {}
396 String(
const char *str)
397 : d(SharedPtr<StringData>::takeOwnership(
398 makeDataFromUtf8(str ? str :
"", str ? std::strlen(str) : 0))) {}
407 String(
const char *str,
size_t len)
408 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(str, len))) {}
421 String(
size_t ct,
char c) : d(SharedPtr<StringData>::takeOwnership(new StringLatin1Data(ct, c))) {}
429 String(
const std::string &str)
430 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(str.data(), str.size()))) {}
441 String(std::string &&str)
442 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(std::move(str)))) {}
449 const std::string &str()
const {
return d->str(); }
452 const char *cstr()
const {
return d->cstr(); }
455 size_t size()
const {
return d->length(); }
458 size_t length()
const {
return d->length(); }
461 size_t byteCount()
const {
return d->byteCount(); }
468 uint8_t byteAt(
size_t idx)
const {
return d->byteAt(idx); }
475 Char charAt(
size_t idx)
const {
return d->charAt(idx); }
478 bool isEmpty()
const {
return d->isEmpty(); }
481 int referenceCount()
const {
return d.referenceCount(); }
484 bool isLiteral()
const {
return d->isLiteral(); }
487 Encoding encoding()
const {
return d->isLatin1() ? Latin1 : Unicode; }
494 ConstIterator begin() const noexcept {
return {d.ptr(), 0}; }
496 ConstIterator cbegin() const noexcept {
return {d.ptr(), 0}; }
498 ConstIterator end() const noexcept {
return {d.ptr(), d->length()}; }
500 ConstIterator cend() const noexcept {
return {d.ptr(), d->length()}; }
507 operator const std::string &()
const {
return d->str(); }
510 operator const char *()
const {
return d->cstr(); }
522 size_t find(
char val,
size_t from = 0)
const {
return d->find(Char(val), from); }
524 size_t find(Char val,
size_t from = 0)
const {
return d->find(val, from); }
532 size_t find(
const char *val,
size_t from = 0)
const {
533 if (val ==
nullptr)
return npos;
535 while (val[len]) ++len;
536 return d->find(*String::fromUtf8(val, len).d, from);
539 size_t find(
const String &val,
size_t from = 0)
const {
return d->find(*val.d, from); }
547 size_t rfind(
char val,
size_t from = npos)
const {
return d->rfind(Char(val), from); }
549 size_t rfind(Char val,
size_t from = npos)
const {
return d->rfind(val, from); }
555 size_t rfind(
const char *val,
size_t from = npos)
const {
556 if (val ==
nullptr)
return npos;
558 while (val[len]) ++len;
559 return d->rfind(*String::fromUtf8(val, len).d, from);
562 size_t rfind(
const String &val,
size_t from = npos)
const {
return d->rfind(*val.d, from); }
569 bool contains(
char val)
const {
return d->find(Char(val)) != npos; }
571 bool contains(Char val)
const {
return d->find(val) != npos; }
573 bool contains(
const String &val)
const {
return d->find(*val.d) != npos; }
579 bool contains(
const char *val)
const {
return find(val) != npos; }
586 size_t count(
const String &substr)
const {
return d->count(*substr.d); }
598 String substr(
size_t pos = 0,
size_t len = npos)
const {
599 if (pos >= d->length())
return String();
600 if (len == npos) len = d->length() - pos;
601 return String(d->createSubstr(pos, len));
610 String mid(
size_t pos,
size_t count = npos)
const {
return substr(pos, count); }
617 String left(
size_t count)
const {
return substr(0, count); }
624 String right(std::size_t count)
const {
625 if (count >= length())
return *
this;
626 return substr(length() - count, count);
651 String truncated(
size_t maxChars)
const {
652 if (length() <= maxChars)
return *
this;
653 if (maxChars < 3)
return left(maxChars);
654 return left(maxChars - 3) +
"...";
662 void clear() { d.modify()->clear(); }
668 void resize(
size_t val) { d.modify()->resize(val); }
680 void reserve(
size_t capacity) { d.modify()->reserve(capacity); }
686 void pushBack(Char ch) {
687 if (d->isLatin1() && ch.codepoint() > 0xFF) {
688 auto *ud = StringUnicodeData::fromLatin1(d->str());
689 d = SharedPtr<StringData>::takeOwnership(ud);
691 d.modify()->append(ch);
695 void pushBack(
char ch) { d.modify()->append(Char(ch)); }
702 void erase(
size_t pos,
size_t count = 1) { d.modify()->erase(pos, count); }
709 void setCharAt(
size_t idx, Char ch) {
710 if (d->isLatin1() && ch.codepoint() > 0xFF) {
711 auto *ud = StringUnicodeData::fromLatin1(d->str());
712 d = SharedPtr<StringData>::takeOwnership(ud);
714 d.modify()->setCharAt(idx, ch);
722 void insert(
size_t pos,
const String &s) {
723 if (d->isLatin1() && !s.d->isLatin1()) {
724 auto *ud = StringUnicodeData::fromLatin1(d->str());
725 d = SharedPtr<StringData>::takeOwnership(ud);
727 d.modify()->insert(pos, *s.d);
735 String &operator=(
const std::string &str) {
736 d = SharedPtr<StringData>::takeOwnership(
new StringLatin1Data(str));
741 String &operator=(std::string &&str) {
742 d = SharedPtr<StringData>::takeOwnership(
new StringLatin1Data(std::move(str)));
747 String &operator=(
const char *str) {
748 d = SharedPtr<StringData>::takeOwnership(
new StringLatin1Data(str ? str :
""));
761 String &operator+=(
const String &val) {
762 if (val.isEmpty())
return *
this;
763 if (d->isLatin1() && !val.d->isLatin1()) {
764 auto *ud = StringUnicodeData::fromLatin1(d->str());
765 d = SharedPtr<StringData>::takeOwnership(ud);
767 d.modify()->append(*val.d);
772 String &operator+=(
const std::string &val) {
return *
this += String(val); }
774 String &operator+=(
const char *val) {
return *
this += String(val); }
776 String &operator+=(
char val) {
777 d.modify()->append(Char(val));
786 String operator+(
const String &val)
const {
787 String result = *
this;
793 String operator+(
const std::string &val)
const {
return *
this + String(val); }
795 String operator+(
const char *val)
const {
return *
this + String(val); }
797 String operator+(
char val)
const {
return *
this + String(1, val); }
808 bool operator==(
const String &val)
const {
809 if (d->length() != val.d->length())
return false;
810 if (d->isLatin1() && val.d->isLatin1())
return d->str() == val.d->str();
811 for (
size_t i = 0; i < d->length(); ++i) {
812 if (d->charAt(i) != val.d->charAt(i))
return false;
828 bool operator==(
const char *val)
const {
829 if (val ==
nullptr)
return d->length() == 0;
830 const size_t len = d->length();
833 if (i >= len)
return false;
834 size_t bytesRead = 0;
835 Char vc = Char::fromUtf8(val, &bytesRead);
836 if (d->charAt(i) != vc)
return false;
843 bool operator==(
char val)
const {
return d->length() == 1 && d->charAt(0) == val; }
846 bool operator!=(
const String &val)
const {
return !(*
this == val); }
848 bool operator!=(
const char *val)
const {
return !(*
this == val); }
850 bool operator!=(
char val)
const {
return !(*
this == val); }
862 friend bool operator<(
const String &lhs,
const String &rhs) {
863 if (lhs.d->isLatin1() == rhs.d->isLatin1()) {
867 return lhs.d->str() < rhs.d->str();
872 const size_t llen = lhs.d->length();
873 const size_t rlen = rhs.d->length();
874 const size_t n = std::min(llen, rlen);
875 for (
size_t i = 0; i < n; ++i) {
876 char32_t a = lhs.d->charAt(i).codepoint();
877 char32_t b = rhs.d->charAt(i).codepoint();
878 if (a != b)
return a < b;
884 friend bool operator<=(
const String &lhs,
const String &rhs) {
return !(rhs < lhs); }
887 friend bool operator>(
const String &lhs,
const String &rhs) {
return rhs < lhs; }
890 friend bool operator>=(
const String &lhs,
const String &rhs) {
return !(lhs < rhs); }
903 String toUpper()
const {
905 const std::string &src = d->str();
907 s.resize(src.size());
908 for (
size_t i = 0; i < src.size(); ++i) {
909 char32_t cp = Char(src[i]).toUpper().codepoint();
912 s[i] =
static_cast<char>(cp);
916 return fromLatin1(std::move(s));
919 chars.reserve(d->length());
920 for (
size_t i = 0; i < d->length(); ++i) chars.pushToBack(d->charAt(i).toUpper());
921 return String(
new StringUnicodeData(std::move(chars)));
931 String toLower()
const {
933 const std::string &src = d->str();
935 s.resize(src.size());
936 for (
size_t i = 0; i < src.size(); ++i) {
937 char32_t cp = Char(src[i]).toLower().codepoint();
938 s[i] =
static_cast<char>(cp);
942 return fromLatin1(std::move(s));
945 chars.reserve(d->length());
946 for (
size_t i = 0; i < d->length(); ++i) chars.pushToBack(d->charAt(i).toLower());
947 return String(
new StringUnicodeData(std::move(chars)));
951 String trim()
const {
952 size_t len = length();
953 if (len == 0)
return String();
955 while (first < len && d->charAt(first).isSpace()) ++first;
956 if (first == len)
return String();
957 size_t last = len - 1;
958 while (last > first && d->charAt(last).isSpace()) --last;
959 return substr(first, last - first + 1);
971 bool startsWith(
const String &prefix)
const {
972 if (prefix.length() > length())
return false;
973 for (
size_t i = 0; i < prefix.length(); ++i) {
974 if (d->charAt(i) != prefix.d->charAt(i))
return false;
983 bool startsWith(
char c)
const {
return !isEmpty() && d->charAt(0) == c; }
990 bool endsWith(
const String &suffix)
const {
991 if (suffix.length() > length())
return false;
992 size_t offset = length() - suffix.length();
993 for (
size_t i = 0; i < suffix.length(); ++i) {
994 if (d->charAt(offset + i) != suffix.d->charAt(i))
return false;
1003 bool endsWith(
char c)
const {
return !isEmpty() && d->charAt(d->length() - 1) == c; }
1006 String reverse()
const {
1007 String result = *
this;
1008 result.d.modify()->reverseInPlace();
1013 bool isNumeric()
const {
1014 if (isEmpty())
return false;
1015 for (
size_t i = 0; i < d->length(); ++i) {
1016 if (!d->charAt(i).isDigit())
return false;
1033 bool isIdentifier()
const {
1034 if (isEmpty())
return false;
1035 auto c0 = d->charAt(0);
1036 if (!c0.isAlpha() && c0.codepoint() !=
'_')
return false;
1037 for (
size_t i = 1; i < d->length(); ++i) {
1038 auto c = d->charAt(i);
1039 if (!c.isAlphaNumeric() && c.codepoint() !=
'_')
return false;
1048 String replace(
const String &find,
const String &replacement)
const;
1054 int compareIgnoreCase(
const String &other)
const {
1055 size_t len = std::min(length(), other.length());
1056 for (
size_t i = 0; i < len; ++i) {
1057 char32_t a = d->charAt(i).toLower().codepoint();
1058 char32_t b = other.d->charAt(i).toLower().codepoint();
1059 if (a != b)
return a < b ? -1 : 1;
1061 if (length() < other.length())
return -1;
1062 if (length() > other.length())
return 1;
1072 uint64_t hash()
const {
return d->hash(); }
1084 String toLatin1()
const {
1085 if (d->isLatin1())
return *
this;
1087 s.reserve(d->length());
1088 for (
size_t i = 0; i < d->length(); ++i) {
1089 char32_t cp = d->charAt(i).codepoint();
1090 s +=
static_cast<char>(cp <= 0xFF ? cp :
'?');
1094 return fromLatin1(std::move(s));
1103 String toUnicode()
const {
1104 if (!d->isLatin1())
return *
this;
1105 return String(StringUnicodeData::fromLatin1(d->str()));
1117 String &arg(
const String &str);
1120 String &arg(int8_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
bool addPrefix =
false) {
1121 return arg(number(value, base, padding, padchar, addPrefix));
1125 String &arg(uint8_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
bool addPrefix =
false) {
1126 return arg(number(value, base, padding, padchar, addPrefix));
1130 String &arg(int16_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
bool addPrefix =
false) {
1131 return arg(number(value, base, padding, padchar, addPrefix));
1135 String &arg(uint16_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
1136 bool addPrefix =
false) {
1137 return arg(number(value, base, padding, padchar, addPrefix));
1141 String &arg(int32_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
bool addPrefix =
false) {
1142 return arg(number(value, base, padding, padchar, addPrefix));
1146 String &arg(uint32_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
1147 bool addPrefix =
false) {
1148 return arg(number(value, base, padding, padchar, addPrefix));
1152 String &arg(int64_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
bool addPrefix =
false) {
1153 return arg(number(value, base, padding, padchar, addPrefix));
1157 String &arg(uint64_t value,
int base = 10,
int padding = 0,
char padchar =
' ',
1158 bool addPrefix =
false) {
1159 return arg(number(value, base, padding, padchar, addPrefix));
1172 template <
typename OutputType> OutputType to(Error *err =
nullptr)
const {
1173 if constexpr (std::is_same_v<OutputType, bool>) {
1175 }
else if constexpr (std::is_integral_v<OutputType> && std::is_signed_v<OutputType>) {
1177 String cleaned = prepareIntParse(cstr(), &base);
1178 const char *s = cleaned.cstr();
1179 char *end =
nullptr;
1181 long long v = std::strtoll(s, &end, base);
1182 if (end == s || *end !=
'\0') {
1183 if (err !=
nullptr) *err = Error::Invalid;
1184 return OutputType{};
1186 if (errno == ERANGE) {
1187 if (err !=
nullptr) *err = Error::OutOfRange;
1188 return OutputType{};
1190 if (v <
static_cast<long long>(std::numeric_limits<OutputType>::min()) ||
1191 v >
static_cast<long long>(std::numeric_limits<OutputType>::max())) {
1192 if (err !=
nullptr) *err = Error::OutOfRange;
1193 return OutputType{};
1195 if (err !=
nullptr) *err = Error::Ok;
1196 return static_cast<OutputType
>(v);
1197 }
else if constexpr (std::is_integral_v<OutputType> && std::is_unsigned_v<OutputType>) {
1199 String cleaned = prepareIntParse(cstr(), &base);
1200 const char *s = cleaned.cstr();
1201 char *end =
nullptr;
1203 unsigned long long v = std::strtoull(s, &end, base);
1204 if (end == s || *end !=
'\0') {
1205 if (err !=
nullptr) *err = Error::Invalid;
1206 return OutputType{};
1208 if (errno == ERANGE) {
1209 if (err !=
nullptr) *err = Error::OutOfRange;
1210 return OutputType{};
1212 if (v >
static_cast<unsigned long long>(std::numeric_limits<OutputType>::max())) {
1213 if (err !=
nullptr) *err = Error::OutOfRange;
1214 return OutputType{};
1216 if (err !=
nullptr) *err = Error::Ok;
1217 return static_cast<OutputType
>(v);
1218 }
else if constexpr (std::is_floating_point_v<OutputType>) {
1219 String cleaned = stripNumericSeparators(cstr());
1220 const char *s = cleaned.cstr();
1221 char *end =
nullptr;
1223 double v = std::strtod(s, &end);
1224 if (end == s || *end !=
'\0') {
1225 if (err !=
nullptr) *err = Error::Invalid;
1226 return OutputType{};
1228 if (errno == ERANGE) {
1229 if (err !=
nullptr) *err = Error::OutOfRange;
1230 return OutputType{};
1235 if constexpr (!std::is_same_v<OutputType, double> &&
1236 !std::is_same_v<OutputType, long double>) {
1237 const double absv = v < 0 ? -v : v;
1238 if (absv >
static_cast<double>(std::numeric_limits<OutputType>::max())) {
1239 if (err !=
nullptr) *err = Error::OutOfRange;
1240 return OutputType{};
1243 if (err !=
nullptr) *err = Error::Ok;
1244 return static_cast<OutputType
>(v);
1246 if (err !=
nullptr) *err = Error::Invalid;
1247 return OutputType{};
1256 bool toBool(Error *err =
nullptr)
const;
1263 int toInt(Error *err =
nullptr)
const;
1270 unsigned int toUInt(Error *err =
nullptr)
const;
1277 double toDouble(Error *err =
nullptr)
const;
1284 int64_t parseNumberWords(Error *err =
nullptr)
const;
1293 StringList split(
const String &delimiter)
const;
1296 StringList split(
const char *delimiter)
const;
1299 StringList split(
char delimiter)
const;
1302 SharedPtr<StringData> d;
1304 explicit String(StringData *data) : d(SharedPtr<StringData>::takeOwnership(data)) {}
1319 static StringData *makeDataFromUtf8(
const char *data,
size_t len) {
1320 for (
size_t i = 0; i < len; ++i) {
1321 if (
static_cast<unsigned char>(data[i]) > 0x7F)
1322 return StringUnicodeData::fromUtf8(data, len);
1324 return new StringLatin1Data(data, len);
1333 static StringData *makeDataFromUtf8(std::string &&data) {
1334 for (
unsigned char c : data) {
1335 if (c > 0x7F)
return StringUnicodeData::fromUtf8(data.data(), data.size());
1337 return new StringLatin1Data(std::move(data));
1349 static String stripNumericSeparators(
const char *s);
1364 static String prepareIntParse(
const char *s,
int *base);
1368inline String operator+(
const char *lhs,
const String &rhs) {
1369 return String(lhs) + rhs;
1372PROMEKI_NAMESPACE_END
1375template <>
struct std::hash<promeki::String> {
1376 size_t operator()(
const promeki::String &s)
const noexcept {
return static_cast<size_t>(s.hash()); }
1390template <>
struct std::formatter<promeki::String> : std::formatter<std::string_view> {
1391 using Base = std::formatter<std::string_view>;
1392 template <
typename FormatContext>
auto format(
const promeki::String &s, FormatContext &ctx)
const {
1393 return Base::format(std::string_view(s.cstr(), s.byteCount()), ctx);
1406template <>
struct std::formatter<promeki::Char> : std::formatter<std::string_view> {
1407 using Base = std::formatter<std::string_view>;
1408 template <
typename FormatContext>
auto format(
const promeki::Char &c, FormatContext &ctx)
const {
1410 size_t n = c.toUtf8(buf);
1411 return Base::format(std::string_view(buf, n), ctx);
1415PROMEKI_NAMESPACE_BEGIN
1446template <
typename T>
struct ToStringFormatter : std::formatter<std::string_view> {
1447 using Base = std::formatter<std::string_view>;
1448 template <
typename FormatContext>
auto format(
const T &v, FormatContext &ctx)
const {
1449 String s = v.toString();
1450 return Base::format(std::string_view(s.cstr(), s.byteCount()), ctx);
1454PROMEKI_NAMESPACE_END
1473#define PROMEKI_FORMAT_VIA_TOSTRING(...) \
1474 template <> struct std::formatter<__VA_ARGS__> : ::promeki::ToStringFormatter<__VA_ARGS__> {}
1476PROMEKI_NAMESPACE_BEGIN
1488template <
size_t N>
class CompiledString {
1490 consteval CompiledString(
const char (&str)[N])
1491 : _bytes{}, _codepoints{}, _charCount(0), _isAscii(true) {
1492 for (
size_t i = 0; i < N; ++i) _bytes[i] = str[i];
1494 while (pos < N - 1) {
1495 unsigned char b =
static_cast<unsigned char>(str[pos]);
1496 if (b > 0x7F) _isAscii =
false;
1502 }
else if (b < 0xE0) {
1505 }
else if (b < 0xF0) {
1512 for (
size_t j = 1; j < seqLen && pos + j < N - 1; ++j)
1513 cp = (cp << 6) | (
static_cast<unsigned char>(str[pos + j]) & 0x3F);
1514 _codepoints[_charCount++] = cp;
1519 constexpr bool isAscii()
const {
return _isAscii; }
1520 constexpr size_t charCount()
const {
return _charCount; }
1521 constexpr size_t byteCount()
const {
return N - 1; }
1522 constexpr const char *bytes()
const {
return _bytes; }
1523 constexpr const char32_t *codepoints()
const {
return _codepoints; }
1533 constexpr uint64_t hash()
const {
1534 if (_isAscii)
return fnv1aLatin1AsCodepoints(_bytes, N - 1);
1535 return fnv1aCodepoints(_codepoints, _charCount);
1544 char32_t _codepoints[N];
1559template <
size_t Count>
class CompiledCodepoints {
1561 template <
size_t N>
consteval CompiledCodepoints(
const CompiledString<N> &cs) : _data{} {
1562 for (
size_t i = 0; i < Count; ++i) _data[i] = cs.codepoints()[i];
1564 constexpr const char32_t *data()
const {
return _data; }
1565 constexpr size_t size()
const {
return Count; }
1568 char32_t _data[Count];
1585 inline String
operator""_ps(
const char *str,
size_t len) {
1586 return String::fromUtf8(str, len);
1590PROMEKI_NAMESPACE_END
1611#define PROMEKI_STRING(str) \
1612 ([]() -> ::promeki::String { \
1613 constexpr auto _cs = ::promeki::CompiledString<sizeof(str)>(str); \
1614 if constexpr (_cs.isAscii()) { \
1615 static ::promeki::StringLiteralData _lit(str, _cs.byteCount(), _cs.hash()); \
1616 return ::promeki::String::fromLiteralData(&_lit); \
1618 static constexpr auto _cp = ::promeki::CompiledCodepoints<_cs.charCount()>(_cs); \
1619 static ::promeki::StringUnicodeLiteralData _lit(_cp.data(), _cp.size(), str, _cs.byteCount(), \
1621 return ::promeki::String::fromLiteralData(&_lit); \