libpromeki 1.0.0-alpha
PROfessional MEdia toolKIt
 
Loading...
Searching...
No Matches
string.h
Go to the documentation of this file.
1
8#pragma once
9
10
11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
13#include <string>
14#include <string_view>
15#include <algorithm>
16#include <cstdint>
17#include <cstdio>
18#include <cerrno>
19#include <cstring>
20#include <format>
21#include <limits>
22#include <utility>
23#include <promeki/namespace.h>
24#include <promeki/util.h>
25#include <promeki/error.h>
26#include <promeki/sharedptr.h>
27#include <promeki/char.h>
28#include <promeki/stringdata.h>
29
30PROMEKI_NAMESPACE_BEGIN
31
32class StringList;
33
67class String {
68 public:
70 using List = ::promeki::List<String>;
71
73 using CharList = ::promeki::List<Char>;
74
81 class CharIterator {
82 public:
83 using iterator_category = std::random_access_iterator_tag;
84 using value_type = Char;
85 using difference_type = std::ptrdiff_t;
86 using pointer = void;
87 using reference = Char;
88
89 CharIterator() : _data(nullptr), _idx(0) {}
90 CharIterator(const StringData *data, size_t idx) : _data(data), _idx(idx) {}
91
92 Char operator*() const { return _data->charAt(_idx); }
93 Char operator[](difference_type n) const { return _data->charAt(_idx + n); }
94
95 CharIterator &operator++() {
96 ++_idx;
97 return *this;
98 }
99 CharIterator operator++(int) {
100 auto tmp = *this;
101 ++_idx;
102 return tmp;
103 }
104 CharIterator &operator--() {
105 --_idx;
106 return *this;
107 }
108 CharIterator operator--(int) {
109 auto tmp = *this;
110 --_idx;
111 return tmp;
112 }
113 CharIterator &operator+=(difference_type n) {
114 _idx += n;
115 return *this;
116 }
117 CharIterator &operator-=(difference_type n) {
118 _idx -= n;
119 return *this;
120 }
121 CharIterator operator+(difference_type n) const { return {_data, _idx + n}; }
122 CharIterator operator-(difference_type n) const { return {_data, _idx - n}; }
123 difference_type operator-(const CharIterator &o) const {
124 return static_cast<difference_type>(_idx) -
125 static_cast<difference_type>(o._idx);
126 }
127 friend CharIterator operator+(difference_type n, const CharIterator &it) {
128 return it + n;
129 }
130
131 bool operator==(const CharIterator &o) const { return _idx == o._idx; }
132 bool operator!=(const CharIterator &o) const { return _idx != o._idx; }
133 bool operator<(const CharIterator &o) const { return _idx < o._idx; }
134 bool operator<=(const CharIterator &o) const { return _idx <= o._idx; }
135 bool operator>(const CharIterator &o) const { return _idx > o._idx; }
136 bool operator>=(const CharIterator &o) const { return _idx >= o._idx; }
137
138 private:
139 const StringData *_data;
140 size_t _idx;
141 };
142
144 using ConstIterator = CharIterator;
145
147 static constexpr size_t npos = StringData::npos;
148
150 static constexpr const char *WhitespaceChars = " \t\n\r\f\v";
151
153 enum Encoding {
154 Latin1,
155 Unicode
156 };
157
158 // ============================================================
159 // Static factory methods (implemented in string.cpp)
160 // ============================================================
161
171 static String number(int8_t value, int base = 10, int padding = 0, char padchar = ' ',
172 bool addPrefix = false);
174 static String number(uint8_t value, int base = 10, int padding = 0, char padchar = ' ',
175 bool addPrefix = false);
177 static String number(int16_t value, int base = 10, int padding = 0, char padchar = ' ',
178 bool addPrefix = false);
180 static String number(uint16_t value, int base = 10, int padding = 0, char padchar = ' ',
181 bool addPrefix = false);
183 static String number(int32_t value, int base = 10, int padding = 0, char padchar = ' ',
184 bool addPrefix = false);
186 static String number(uint32_t value, int base = 10, int padding = 0, char padchar = ' ',
187 bool addPrefix = false);
189 static String number(int64_t value, int base = 10, int padding = 0, char padchar = ' ',
190 bool addPrefix = false);
192 static String number(uint64_t value, int base = 10, int padding = 0, char padchar = ' ',
193 bool addPrefix = false);
194
200 static String number(bool value) { return value ? "true" : "false"; }
201
208 static String number(float value, int precision = 9);
210 static String number(double value, int precision = 9);
211
220 template <typename T> static String dec(const T &val, int padding = 0, char padchar = ' ') {
221 return number(static_cast<int64_t>(val), 10, padding, padchar);
222 }
223
232 template <typename T> static String hex(const T &val, int padding = 0, bool addPrefix = true) {
233 int totalWidth = (addPrefix && padding > 0) ? padding + 2 : padding;
234 return number(static_cast<uint64_t>(val), 16, totalWidth, '0', addPrefix);
235 }
236
245 template <typename T> static String bin(const T &val, int digits = 32, bool addPrefix = true) {
246 std::string ret;
247 if (addPrefix) ret += "0b";
248 T mask = static_cast<T>(1) << (digits - 1);
249 for (int i = 0; i < digits; i++) {
250 ret.push_back(val & mask ? '1' : '0');
251 mask >>= 1;
252 }
253 return ret;
254 }
255
265 PROMEKI_PRINTF_FUNC(1, 2) static String sprintf(const char *fmt, ...);
266
295 template <typename... Args> static String format(std::format_string<Args...> fmt, Args &&...args) {
296 std::string s = std::format(fmt, std::forward<Args>(args)...);
297 return fromUtf8(s.data(), s.size());
298 }
299
318 static String vformat(std::string_view fmt, std::format_args args) {
319 std::string s = std::vformat(fmt, args);
320 return fromUtf8(s.data(), s.size());
321 }
322
339 static String fromUtf8(const char *data, size_t len) { return String(data, len); }
340
355 static String fromLatin1(const char *data, size_t len) {
356 return String(new StringLatin1Data(data, len));
357 }
358
360 static String fromLatin1(const std::string &data) { return String(new StringLatin1Data(data)); }
361
363 static String fromLatin1(std::string &&data) { return String(new StringLatin1Data(std::move(data))); }
364
372 static String fromLiteralData(StringData *data) { return String(data); }
373
374 // ============================================================
375 // Constructors
376 // ============================================================
377
379 String() : d(SharedPtr<StringData>::takeOwnership(new StringLatin1Data())) {}
380
382 String(std::nullptr_t) : String() {}
383
396 String(const char *str)
397 : d(SharedPtr<StringData>::takeOwnership(
398 makeDataFromUtf8(str ? str : "", str ? std::strlen(str) : 0))) {}
399
407 String(const char *str, size_t len)
408 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(str, len))) {}
409
421 String(size_t ct, char c) : d(SharedPtr<StringData>::takeOwnership(new StringLatin1Data(ct, c))) {}
422
429 String(const std::string &str)
430 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(str.data(), str.size()))) {}
431
441 String(std::string &&str)
442 : d(SharedPtr<StringData>::takeOwnership(makeDataFromUtf8(std::move(str)))) {}
443
444 // ============================================================
445 // Const access (pure delegation)
446 // ============================================================
447
449 const std::string &str() const { return d->str(); }
450
452 const char *cstr() const { return d->cstr(); }
453
455 size_t size() const { return d->length(); }
456
458 size_t length() const { return d->length(); }
459
461 size_t byteCount() const { return d->byteCount(); }
462
468 uint8_t byteAt(size_t idx) const { return d->byteAt(idx); }
469
475 Char charAt(size_t idx) const { return d->charAt(idx); }
476
478 bool isEmpty() const { return d->isEmpty(); }
479
481 int referenceCount() const { return d.referenceCount(); }
482
484 bool isLiteral() const { return d->isLiteral(); }
485
487 Encoding encoding() const { return d->isLatin1() ? Latin1 : Unicode; }
488
489 // ============================================================
490 // Character iterators
491 // ============================================================
492
494 ConstIterator begin() const noexcept { return {d.ptr(), 0}; }
496 ConstIterator cbegin() const noexcept { return {d.ptr(), 0}; }
498 ConstIterator end() const noexcept { return {d.ptr(), d->length()}; }
500 ConstIterator cend() const noexcept { return {d.ptr(), d->length()}; }
501
502 // ============================================================
503 // Implicit conversions
504 // ============================================================
505
507 operator const std::string &() const { return d->str(); }
508
510 operator const char *() const { return d->cstr(); }
511
512 // ============================================================
513 // Find / contains
514 // ============================================================
515
522 size_t find(char val, size_t from = 0) const { return d->find(Char(val), from); }
524 size_t find(Char val, size_t from = 0) const { return d->find(val, from); }
532 size_t find(const char *val, size_t from = 0) const {
533 if (val == nullptr) return npos;
534 size_t len = 0;
535 while (val[len]) ++len;
536 return d->find(*String::fromUtf8(val, len).d, from);
537 }
539 size_t find(const String &val, size_t from = 0) const { return d->find(*val.d, from); }
540
547 size_t rfind(char val, size_t from = npos) const { return d->rfind(Char(val), from); }
549 size_t rfind(Char val, size_t from = npos) const { return d->rfind(val, from); }
555 size_t rfind(const char *val, size_t from = npos) const {
556 if (val == nullptr) return npos;
557 size_t len = 0;
558 while (val[len]) ++len;
559 return d->rfind(*String::fromUtf8(val, len).d, from);
560 }
562 size_t rfind(const String &val, size_t from = npos) const { return d->rfind(*val.d, from); }
563
569 bool contains(char val) const { return d->find(Char(val)) != npos; }
571 bool contains(Char val) const { return d->find(val) != npos; }
573 bool contains(const String &val) const { return d->find(*val.d) != npos; }
579 bool contains(const char *val) const { return find(val) != npos; }
580
586 size_t count(const String &substr) const { return d->count(*substr.d); }
587
588 // ============================================================
589 // Substring
590 // ============================================================
591
598 String substr(size_t pos = 0, size_t len = npos) const {
599 if (pos >= d->length()) return String();
600 if (len == npos) len = d->length() - pos;
601 return String(d->createSubstr(pos, len));
602 }
603
610 String mid(size_t pos, size_t count = npos) const { return substr(pos, count); }
611
617 String left(size_t count) const { return substr(0, count); }
618
624 String right(std::size_t count) const {
625 if (count >= length()) return *this;
626 return substr(length() - count, count);
627 }
628
651 String truncated(size_t maxChars) const {
652 if (length() <= maxChars) return *this;
653 if (maxChars < 3) return left(maxChars);
654 return left(maxChars - 3) + "...";
655 }
656
657 // ============================================================
658 // Mutation (COW)
659 // ============================================================
660
662 void clear() { d.modify()->clear(); }
663
668 void resize(size_t val) { d.modify()->resize(val); }
669
680 void reserve(size_t capacity) { d.modify()->reserve(capacity); }
681
686 void pushBack(Char ch) {
687 if (d->isLatin1() && ch.codepoint() > 0xFF) {
688 auto *ud = StringUnicodeData::fromLatin1(d->str());
689 d = SharedPtr<StringData>::takeOwnership(ud);
690 }
691 d.modify()->append(ch);
692 }
693
695 void pushBack(char ch) { d.modify()->append(Char(ch)); }
696
702 void erase(size_t pos, size_t count = 1) { d.modify()->erase(pos, count); }
703
709 void setCharAt(size_t idx, Char ch) {
710 if (d->isLatin1() && ch.codepoint() > 0xFF) {
711 auto *ud = StringUnicodeData::fromLatin1(d->str());
712 d = SharedPtr<StringData>::takeOwnership(ud);
713 }
714 d.modify()->setCharAt(idx, ch);
715 }
716
722 void insert(size_t pos, const String &s) {
723 if (d->isLatin1() && !s.d->isLatin1()) {
724 auto *ud = StringUnicodeData::fromLatin1(d->str());
725 d = SharedPtr<StringData>::takeOwnership(ud);
726 }
727 d.modify()->insert(pos, *s.d);
728 }
729
730 // ============================================================
731 // Assignment
732 // ============================================================
733
735 String &operator=(const std::string &str) {
736 d = SharedPtr<StringData>::takeOwnership(new StringLatin1Data(str));
737 return *this;
738 }
739
741 String &operator=(std::string &&str) {
742 d = SharedPtr<StringData>::takeOwnership(new StringLatin1Data(std::move(str)));
743 return *this;
744 }
745
747 String &operator=(const char *str) {
748 d = SharedPtr<StringData>::takeOwnership(new StringLatin1Data(str ? str : ""));
749 return *this;
750 }
751
752 // ============================================================
753 // Concatenation
754 // ============================================================
755
761 String &operator+=(const String &val) {
762 if (val.isEmpty()) return *this;
763 if (d->isLatin1() && !val.d->isLatin1()) {
764 auto *ud = StringUnicodeData::fromLatin1(d->str());
765 d = SharedPtr<StringData>::takeOwnership(ud);
766 }
767 d.modify()->append(*val.d);
768 return *this;
769 }
770
772 String &operator+=(const std::string &val) { return *this += String(val); }
774 String &operator+=(const char *val) { return *this += String(val); }
776 String &operator+=(char val) {
777 d.modify()->append(Char(val));
778 return *this;
779 }
780
786 String operator+(const String &val) const {
787 String result = *this;
788 result += val;
789 return result;
790 }
791
793 String operator+(const std::string &val) const { return *this + String(val); }
795 String operator+(const char *val) const { return *this + String(val); }
797 String operator+(char val) const { return *this + String(1, val); }
798
799 // ============================================================
800 // Comparison
801 // ============================================================
802
808 bool operator==(const String &val) const {
809 if (d->length() != val.d->length()) return false;
810 if (d->isLatin1() && val.d->isLatin1()) return d->str() == val.d->str();
811 for (size_t i = 0; i < d->length(); ++i) {
812 if (d->charAt(i) != val.d->charAt(i)) return false;
813 }
814 return true;
815 }
816
828 bool operator==(const char *val) const {
829 if (val == nullptr) return d->length() == 0;
830 const size_t len = d->length();
831 size_t i = 0;
832 while (*val) {
833 if (i >= len) return false;
834 size_t bytesRead = 0;
835 Char vc = Char::fromUtf8(val, &bytesRead);
836 if (d->charAt(i) != vc) return false;
837 val += bytesRead;
838 ++i;
839 }
840 return i == len;
841 }
843 bool operator==(char val) const { return d->length() == 1 && d->charAt(0) == val; }
844
846 bool operator!=(const String &val) const { return !(*this == val); }
848 bool operator!=(const char *val) const { return !(*this == val); }
850 bool operator!=(char val) const { return !(*this == val); }
851
862 friend bool operator<(const String &lhs, const String &rhs) {
863 if (lhs.d->isLatin1() == rhs.d->isLatin1()) {
864 // Same encoding: byte-level comparison is correct.
865 // - Latin1 vs Latin1: byte order == codepoint order.
866 // - Unicode vs Unicode: UTF-8 byte order == codepoint order.
867 return lhs.d->str() < rhs.d->str();
868 }
869 // Mixed encodings: walk codepoint-by-codepoint so that
870 // logically equal strings (e.g. Latin1 0xE9 and Unicode
871 // U+00E9) compare as equal under operator<.
872 const size_t llen = lhs.d->length();
873 const size_t rlen = rhs.d->length();
874 const size_t n = std::min(llen, rlen);
875 for (size_t i = 0; i < n; ++i) {
876 char32_t a = lhs.d->charAt(i).codepoint();
877 char32_t b = rhs.d->charAt(i).codepoint();
878 if (a != b) return a < b;
879 }
880 return llen < rlen;
881 }
882
884 friend bool operator<=(const String &lhs, const String &rhs) { return !(rhs < lhs); }
885
887 friend bool operator>(const String &lhs, const String &rhs) { return rhs < lhs; }
888
890 friend bool operator>=(const String &lhs, const String &rhs) { return !(lhs < rhs); }
891
892 // ============================================================
893 // Case / whitespace
894 // ============================================================
895
903 String toUpper() const {
904 if (d->isLatin1()) {
905 const std::string &src = d->str();
906 std::string s;
907 s.resize(src.size());
908 for (size_t i = 0; i < src.size(); ++i) {
909 char32_t cp = Char(src[i]).toUpper().codepoint();
910 // Latin1 toUpper stays within the
911 // 0x00–0xFF range, so this is lossless.
912 s[i] = static_cast<char>(cp);
913 }
914 // The bytes are already Latin1 by construction —
915 // bypass the UTF-8-aware ctor.
916 return fromLatin1(std::move(s));
917 }
918 CharList chars;
919 chars.reserve(d->length());
920 for (size_t i = 0; i < d->length(); ++i) chars.pushToBack(d->charAt(i).toUpper());
921 return String(new StringUnicodeData(std::move(chars)));
922 }
923
931 String toLower() const {
932 if (d->isLatin1()) {
933 const std::string &src = d->str();
934 std::string s;
935 s.resize(src.size());
936 for (size_t i = 0; i < src.size(); ++i) {
937 char32_t cp = Char(src[i]).toLower().codepoint();
938 s[i] = static_cast<char>(cp);
939 }
940 // The bytes are already Latin1 by construction —
941 // bypass the UTF-8-aware ctor.
942 return fromLatin1(std::move(s));
943 }
944 CharList chars;
945 chars.reserve(d->length());
946 for (size_t i = 0; i < d->length(); ++i) chars.pushToBack(d->charAt(i).toLower());
947 return String(new StringUnicodeData(std::move(chars)));
948 }
949
951 String trim() const {
952 size_t len = length();
953 if (len == 0) return String();
954 size_t first = 0;
955 while (first < len && d->charAt(first).isSpace()) ++first;
956 if (first == len) return String();
957 size_t last = len - 1;
958 while (last > first && d->charAt(last).isSpace()) --last;
959 return substr(first, last - first + 1);
960 }
961
962 // ============================================================
963 // Starts / ends / reverse / numeric
964 // ============================================================
965
971 bool startsWith(const String &prefix) const {
972 if (prefix.length() > length()) return false;
973 for (size_t i = 0; i < prefix.length(); ++i) {
974 if (d->charAt(i) != prefix.d->charAt(i)) return false;
975 }
976 return true;
977 }
978
983 bool startsWith(char c) const { return !isEmpty() && d->charAt(0) == c; }
984
990 bool endsWith(const String &suffix) const {
991 if (suffix.length() > length()) return false;
992 size_t offset = length() - suffix.length();
993 for (size_t i = 0; i < suffix.length(); ++i) {
994 if (d->charAt(offset + i) != suffix.d->charAt(i)) return false;
995 }
996 return true;
997 }
998
1003 bool endsWith(char c) const { return !isEmpty() && d->charAt(d->length() - 1) == c; }
1004
1006 String reverse() const {
1007 String result = *this;
1008 result.d.modify()->reverseInPlace();
1009 return result;
1010 }
1011
1013 bool isNumeric() const {
1014 if (isEmpty()) return false;
1015 for (size_t i = 0; i < d->length(); ++i) {
1016 if (!d->charAt(i).isDigit()) return false;
1017 }
1018 return true;
1019 }
1020
1033 bool isIdentifier() const {
1034 if (isEmpty()) return false;
1035 auto c0 = d->charAt(0);
1036 if (!c0.isAlpha() && c0.codepoint() != '_') return false;
1037 for (size_t i = 1; i < d->length(); ++i) {
1038 auto c = d->charAt(i);
1039 if (!c.isAlphaNumeric() && c.codepoint() != '_') return false;
1040 }
1041 return true;
1042 }
1043
1048 String replace(const String &find, const String &replacement) const;
1049
1054 int compareIgnoreCase(const String &other) const {
1055 size_t len = std::min(length(), other.length());
1056 for (size_t i = 0; i < len; ++i) {
1057 char32_t a = d->charAt(i).toLower().codepoint();
1058 char32_t b = other.d->charAt(i).toLower().codepoint();
1059 if (a != b) return a < b ? -1 : 1;
1060 }
1061 if (length() < other.length()) return -1;
1062 if (length() > other.length()) return 1;
1063 return 0;
1064 }
1065
1072 uint64_t hash() const { return d->hash(); }
1073
1074 // ============================================================
1075 // Encoding conversion
1076 // ============================================================
1077
1084 String toLatin1() const {
1085 if (d->isLatin1()) return *this;
1086 std::string s;
1087 s.reserve(d->length());
1088 for (size_t i = 0; i < d->length(); ++i) {
1089 char32_t cp = d->charAt(i).codepoint();
1090 s += static_cast<char>(cp <= 0xFF ? cp : '?');
1091 }
1092 // The bytes are Latin1 by construction (codepoints
1093 // 0–0xFF or '?') — bypass the UTF-8-aware ctor.
1094 return fromLatin1(std::move(s));
1095 }
1096
1103 String toUnicode() const {
1104 if (!d->isLatin1()) return *this;
1105 return String(StringUnicodeData::fromLatin1(d->str()));
1106 }
1107
1108 // ============================================================
1109 // Arg replacement (implemented in string.cpp)
1110 // ============================================================
1111
1117 String &arg(const String &str);
1118
1120 String &arg(int8_t value, int base = 10, int padding = 0, char padchar = ' ', bool addPrefix = false) {
1121 return arg(number(value, base, padding, padchar, addPrefix));
1122 }
1123
1125 String &arg(uint8_t value, int base = 10, int padding = 0, char padchar = ' ', bool addPrefix = false) {
1126 return arg(number(value, base, padding, padchar, addPrefix));
1127 }
1128
1130 String &arg(int16_t value, int base = 10, int padding = 0, char padchar = ' ', bool addPrefix = false) {
1131 return arg(number(value, base, padding, padchar, addPrefix));
1132 }
1133
1135 String &arg(uint16_t value, int base = 10, int padding = 0, char padchar = ' ',
1136 bool addPrefix = false) {
1137 return arg(number(value, base, padding, padchar, addPrefix));
1138 }
1139
1141 String &arg(int32_t value, int base = 10, int padding = 0, char padchar = ' ', bool addPrefix = false) {
1142 return arg(number(value, base, padding, padchar, addPrefix));
1143 }
1144
1146 String &arg(uint32_t value, int base = 10, int padding = 0, char padchar = ' ',
1147 bool addPrefix = false) {
1148 return arg(number(value, base, padding, padchar, addPrefix));
1149 }
1150
1152 String &arg(int64_t value, int base = 10, int padding = 0, char padchar = ' ', bool addPrefix = false) {
1153 return arg(number(value, base, padding, padchar, addPrefix));
1154 }
1155
1157 String &arg(uint64_t value, int base = 10, int padding = 0, char padchar = ' ',
1158 bool addPrefix = false) {
1159 return arg(number(value, base, padding, padchar, addPrefix));
1160 }
1161
1162 // ============================================================
1163 // Conversion (implemented in string.cpp)
1164 // ============================================================
1165
1172 template <typename OutputType> OutputType to(Error *err = nullptr) const {
1173 if constexpr (std::is_same_v<OutputType, bool>) {
1174 return toBool(err);
1175 } else if constexpr (std::is_integral_v<OutputType> && std::is_signed_v<OutputType>) {
1176 int base = 10;
1177 String cleaned = prepareIntParse(cstr(), &base);
1178 const char *s = cleaned.cstr();
1179 char *end = nullptr;
1180 errno = 0;
1181 long long v = std::strtoll(s, &end, base);
1182 if (end == s || *end != '\0') {
1183 if (err != nullptr) *err = Error::Invalid;
1184 return OutputType{};
1185 }
1186 if (errno == ERANGE) {
1187 if (err != nullptr) *err = Error::OutOfRange;
1188 return OutputType{};
1189 }
1190 if (v < static_cast<long long>(std::numeric_limits<OutputType>::min()) ||
1191 v > static_cast<long long>(std::numeric_limits<OutputType>::max())) {
1192 if (err != nullptr) *err = Error::OutOfRange;
1193 return OutputType{};
1194 }
1195 if (err != nullptr) *err = Error::Ok;
1196 return static_cast<OutputType>(v);
1197 } else if constexpr (std::is_integral_v<OutputType> && std::is_unsigned_v<OutputType>) {
1198 int base = 10;
1199 String cleaned = prepareIntParse(cstr(), &base);
1200 const char *s = cleaned.cstr();
1201 char *end = nullptr;
1202 errno = 0;
1203 unsigned long long v = std::strtoull(s, &end, base);
1204 if (end == s || *end != '\0') {
1205 if (err != nullptr) *err = Error::Invalid;
1206 return OutputType{};
1207 }
1208 if (errno == ERANGE) {
1209 if (err != nullptr) *err = Error::OutOfRange;
1210 return OutputType{};
1211 }
1212 if (v > static_cast<unsigned long long>(std::numeric_limits<OutputType>::max())) {
1213 if (err != nullptr) *err = Error::OutOfRange;
1214 return OutputType{};
1215 }
1216 if (err != nullptr) *err = Error::Ok;
1217 return static_cast<OutputType>(v);
1218 } else if constexpr (std::is_floating_point_v<OutputType>) {
1219 String cleaned = stripNumericSeparators(cstr());
1220 const char *s = cleaned.cstr();
1221 char *end = nullptr;
1222 errno = 0;
1223 double v = std::strtod(s, &end);
1224 if (end == s || *end != '\0') {
1225 if (err != nullptr) *err = Error::Invalid;
1226 return OutputType{};
1227 }
1228 if (errno == ERANGE) {
1229 if (err != nullptr) *err = Error::OutOfRange;
1230 return OutputType{};
1231 }
1232 // Range-check narrowing conversions (e.g. double -> float):
1233 // strtod accepts values that exceed the OutputType's range and
1234 // a static_cast would silently produce ±inf.
1235 if constexpr (!std::is_same_v<OutputType, double> &&
1236 !std::is_same_v<OutputType, long double>) {
1237 const double absv = v < 0 ? -v : v;
1238 if (absv > static_cast<double>(std::numeric_limits<OutputType>::max())) {
1239 if (err != nullptr) *err = Error::OutOfRange;
1240 return OutputType{};
1241 }
1242 }
1243 if (err != nullptr) *err = Error::Ok;
1244 return static_cast<OutputType>(v);
1245 } else {
1246 if (err != nullptr) *err = Error::Invalid;
1247 return OutputType{};
1248 }
1249 }
1250
1256 bool toBool(Error *err = nullptr) const;
1257
1263 int toInt(Error *err = nullptr) const;
1264
1270 unsigned int toUInt(Error *err = nullptr) const;
1271
1277 double toDouble(Error *err = nullptr) const;
1278
1284 int64_t parseNumberWords(Error *err = nullptr) const;
1285
1293 StringList split(const String &delimiter) const;
1294
1296 StringList split(const char *delimiter) const;
1297
1299 StringList split(char delimiter) const;
1300
1301 private:
1302 SharedPtr<StringData> d;
1303
1304 explicit String(StringData *data) : d(SharedPtr<StringData>::takeOwnership(data)) {}
1305
1319 static StringData *makeDataFromUtf8(const char *data, size_t len) {
1320 for (size_t i = 0; i < len; ++i) {
1321 if (static_cast<unsigned char>(data[i]) > 0x7F)
1322 return StringUnicodeData::fromUtf8(data, len);
1323 }
1324 return new StringLatin1Data(data, len);
1325 }
1326
1333 static StringData *makeDataFromUtf8(std::string &&data) {
1334 for (unsigned char c : data) {
1335 if (c > 0x7F) return StringUnicodeData::fromUtf8(data.data(), data.size());
1336 }
1337 return new StringLatin1Data(std::move(data));
1338 }
1339
1349 static String stripNumericSeparators(const char *s);
1350
1364 static String prepareIntParse(const char *s, int *base);
1365};
1366
1368inline String operator+(const char *lhs, const String &rhs) {
1369 return String(lhs) + rhs;
1370}
1371
1372PROMEKI_NAMESPACE_END
1373
1375template <> struct std::hash<promeki::String> {
1376 size_t operator()(const promeki::String &s) const noexcept { return static_cast<size_t>(s.hash()); }
1377};
1379
1390template <> struct std::formatter<promeki::String> : std::formatter<std::string_view> {
1391 using Base = std::formatter<std::string_view>;
1392 template <typename FormatContext> auto format(const promeki::String &s, FormatContext &ctx) const {
1393 return Base::format(std::string_view(s.cstr(), s.byteCount()), ctx);
1394 }
1395};
1396
1406template <> struct std::formatter<promeki::Char> : std::formatter<std::string_view> {
1407 using Base = std::formatter<std::string_view>;
1408 template <typename FormatContext> auto format(const promeki::Char &c, FormatContext &ctx) const {
1409 char buf[4];
1410 size_t n = c.toUtf8(buf);
1411 return Base::format(std::string_view(buf, n), ctx);
1412 }
1413};
1414
1415PROMEKI_NAMESPACE_BEGIN
1416
1446template <typename T> struct ToStringFormatter : std::formatter<std::string_view> {
1447 using Base = std::formatter<std::string_view>;
1448 template <typename FormatContext> auto format(const T &v, FormatContext &ctx) const {
1449 String s = v.toString();
1450 return Base::format(std::string_view(s.cstr(), s.byteCount()), ctx);
1451 }
1452};
1453
1454PROMEKI_NAMESPACE_END
1455
1473#define PROMEKI_FORMAT_VIA_TOSTRING(...) \
1474 template <> struct std::formatter<__VA_ARGS__> : ::promeki::ToStringFormatter<__VA_ARGS__> {}
1475
1476PROMEKI_NAMESPACE_BEGIN
1477
1488template <size_t N> class CompiledString {
1489 public:
1490 consteval CompiledString(const char (&str)[N])
1491 : _bytes{}, _codepoints{}, _charCount(0), _isAscii(true) {
1492 for (size_t i = 0; i < N; ++i) _bytes[i] = str[i];
1493 size_t pos = 0;
1494 while (pos < N - 1) {
1495 unsigned char b = static_cast<unsigned char>(str[pos]);
1496 if (b > 0x7F) _isAscii = false;
1497 char32_t cp;
1498 size_t seqLen;
1499 if (b < 0x80) {
1500 cp = b;
1501 seqLen = 1;
1502 } else if (b < 0xE0) {
1503 cp = b & 0x1F;
1504 seqLen = 2;
1505 } else if (b < 0xF0) {
1506 cp = b & 0x0F;
1507 seqLen = 3;
1508 } else {
1509 cp = b & 0x07;
1510 seqLen = 4;
1511 }
1512 for (size_t j = 1; j < seqLen && pos + j < N - 1; ++j)
1513 cp = (cp << 6) | (static_cast<unsigned char>(str[pos + j]) & 0x3F);
1514 _codepoints[_charCount++] = cp;
1515 pos += seqLen;
1516 }
1517 }
1518
1519 constexpr bool isAscii() const { return _isAscii; }
1520 constexpr size_t charCount() const { return _charCount; }
1521 constexpr size_t byteCount() const { return N - 1; }
1522 constexpr const char *bytes() const { return _bytes; }
1523 constexpr const char32_t *codepoints() const { return _codepoints; }
1524
1533 constexpr uint64_t hash() const {
1534 if (_isAscii) return fnv1aLatin1AsCodepoints(_bytes, N - 1);
1535 return fnv1aCodepoints(_codepoints, _charCount);
1536 }
1537
1538 // Data members are public so the class qualifies as a
1539 // structural type (C++20 [temp.param]/7) and can therefore
1540 // be used as a class-type non-type template parameter —
1541 // e.g. `template <CompiledString Name>` in StringRegistry
1542 // and VariantDatabase. Treat them as private.
1543 char _bytes[N];
1544 char32_t _codepoints[N]; // worst case: N-1 codepoints
1545 size_t _charCount;
1546 bool _isAscii;
1547};
1548
1559template <size_t Count> class CompiledCodepoints {
1560 public:
1561 template <size_t N> consteval CompiledCodepoints(const CompiledString<N> &cs) : _data{} {
1562 for (size_t i = 0; i < Count; ++i) _data[i] = cs.codepoints()[i];
1563 }
1564 constexpr const char32_t *data() const { return _data; }
1565 constexpr size_t size() const { return Count; }
1566
1567 private:
1568 char32_t _data[Count];
1569};
1570
1584namespace literals {
1585 inline String operator""_ps(const char *str, size_t len) {
1586 return String::fromUtf8(str, len);
1587 }
1588} // namespace literals
1589
1590PROMEKI_NAMESPACE_END
1591
1592// NOLINTNEXTLINE(bugprone-macro-parentheses)
1593
1611#define PROMEKI_STRING(str) \
1612 ([]() -> ::promeki::String { \
1613 constexpr auto _cs = ::promeki::CompiledString<sizeof(str)>(str); \
1614 if constexpr (_cs.isAscii()) { \
1615 static ::promeki::StringLiteralData _lit(str, _cs.byteCount(), _cs.hash()); \
1616 return ::promeki::String::fromLiteralData(&_lit); \
1617 } else { \
1618 static constexpr auto _cp = ::promeki::CompiledCodepoints<_cs.charCount()>(_cs); \
1619 static ::promeki::StringUnicodeLiteralData _lit(_cp.data(), _cp.size(), str, _cs.byteCount(), \
1620 _cs.hash()); \
1621 return ::promeki::String::fromLiteralData(&_lit); \
1622 } \
1623 }())
1624
1625#endif // PROMEKI_ENABLE_CORE