11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
22PROMEKI_NAMESPACE_BEGIN
37 RefCount _promeki_refct;
40 virtual StringData *_promeki_clone()
const = 0;
43 virtual ~StringData();
49 virtual size_t length()
const = 0;
52 virtual Char charAt(
size_t idx)
const = 0;
59 virtual void setCharAt(
size_t idx, Char ch) = 0;
62 virtual bool isLatin1()
const = 0;
65 virtual bool isLiteral()
const {
return false; }
78 virtual size_t find(Char ch,
size_t from = 0)
const = 0;
86 virtual size_t find(
const StringData &s,
size_t from = 0)
const = 0;
94 virtual size_t rfind(Char ch,
size_t from = npos)
const = 0;
102 virtual size_t rfind(
const StringData &s,
size_t from = npos)
const = 0;
110 virtual StringData *createSubstr(
size_t pos,
size_t len)
const = 0;
113 virtual void reverseInPlace() = 0;
120 virtual size_t count(
const StringData &substr)
const = 0;
131 virtual void append(
const StringData &other) = 0;
137 virtual void append(Char ch) = 0;
144 virtual void insert(
size_t pos,
const StringData &s) = 0;
151 virtual void erase(
size_t pos,
size_t count) = 0;
154 virtual void clear() = 0;
161 virtual void resize(
size_t len, Char fill = Char()) = 0;
172 virtual void reserve(
size_t capacity) { (void)capacity; }
180 virtual size_t byteCount()
const = 0;
187 virtual uint8_t byteAt(
size_t idx)
const = 0;
190 virtual const char *cstr()
const = 0;
193 virtual const std::string &str()
const = 0;
198 bool isEmpty()
const {
return length() == 0; }
201 virtual uint64_t hash()
const = 0;
204 static constexpr size_t npos =
static_cast<size_t>(-1);
213class StringLatin1Data :
public StringData {
214 PROMEKI_SHARED_DERIVED(StringLatin1Data)
217 StringLatin1Data() =
default;
223 StringLatin1Data(
const std::string &s) : _s(s) {}
229 StringLatin1Data(std::string &&s) : _s(std::move(s)) {}
235 StringLatin1Data(
const char *s) : _s(s ? s :
"") {}
242 StringLatin1Data(
const char *s,
size_t len) : _s(s, len) {}
249 StringLatin1Data(
size_t ct,
char c) : _s(ct, c) {}
251 size_t length()
const override {
return _s.size(); }
252 Char charAt(
size_t idx)
const override;
253 void setCharAt(
size_t idx, Char ch)
override;
254 bool isLatin1()
const override {
return true; }
256 size_t find(Char ch,
size_t from = 0)
const override;
257 size_t find(
const StringData &s,
size_t from = 0)
const override;
258 size_t rfind(Char ch,
size_t from = npos)
const override;
259 size_t rfind(
const StringData &s,
size_t from = npos)
const override;
260 StringData *createSubstr(
size_t pos,
size_t len)
const override;
261 void reverseInPlace()
override;
262 size_t count(
const StringData &substr)
const override;
264 void append(
const StringData &other)
override;
265 void append(Char ch)
override;
266 void insert(
size_t pos,
const StringData &s)
override;
267 void erase(
size_t pos,
size_t count)
override;
268 void clear()
override { _s.clear(); }
269 void resize(
size_t len, Char fill = Char())
override;
270 void reserve(
size_t capacity)
override { _s.reserve(capacity); }
272 size_t byteCount()
const override {
return _s.size(); }
273 uint8_t byteAt(
size_t idx)
const override {
return static_cast<uint8_t
>(_s[idx]); }
274 const char *cstr()
const override {
return _s.c_str(); }
275 const std::string &str()
const override {
return _s; }
277 uint64_t hash()
const override {
281 return fnv1aLatin1AsCodepoints(_s.data(), _s.size());
285 std::string &rawStr() {
return _s; }
298class StringUnicodeData :
public StringData {
299 PROMEKI_SHARED_DERIVED(StringUnicodeData)
302 StringUnicodeData() : _strDirty(true) {}
308 StringUnicodeData(
const List<Char> &chars) : _chars(chars), _strDirty(true) {}
314 StringUnicodeData(List<Char> &&chars) : _chars(std::move(chars)), _strDirty(true) {}
322 static StringUnicodeData *fromUtf8(
const char *data,
size_t len);
329 static StringUnicodeData *fromLatin1(
const std::string &s);
331 size_t length()
const override {
return _chars.size(); }
332 Char charAt(
size_t idx)
const override {
return _chars[idx]; }
333 void setCharAt(
size_t idx, Char ch)
override;
334 bool isLatin1()
const override {
return false; }
336 size_t find(Char ch,
size_t from = 0)
const override;
337 size_t find(
const StringData &s,
size_t from = 0)
const override;
338 size_t rfind(Char ch,
size_t from = npos)
const override;
339 size_t rfind(
const StringData &s,
size_t from = npos)
const override;
340 StringData *createSubstr(
size_t pos,
size_t len)
const override;
341 void reverseInPlace()
override;
342 size_t count(
const StringData &substr)
const override;
344 void append(
const StringData &other)
override;
345 void append(Char ch)
override;
346 void insert(
size_t pos,
const StringData &s)
override;
347 void erase(
size_t pos,
size_t count)
override;
348 void clear()
override;
349 void resize(
size_t len, Char fill = Char())
override;
350 void reserve(
size_t capacity)
override { _chars.reserve(capacity); }
352 size_t byteCount()
const override;
353 uint8_t byteAt(
size_t idx)
const override;
354 const char *cstr()
const override;
355 const std::string &str()
const override;
357 uint64_t hash()
const override;
360 void ensureEncoded()
const;
363 mutable std::string _strCache;
364 mutable bool _strDirty;
375class StringLiteralData :
public StringData {
383 StringLiteralData(
const char *s,
size_t len, uint64_t precomputedHash = 0)
384 : _s(s), _len(len), _hash(precomputedHash) {
388 if (_hash == 0 && _len > 0) _hash = fnv1aLatin1AsCodepoints(_s, _len);
389 _promeki_refct.setImmortal();
392 StringData *_promeki_clone()
const override {
return new StringLatin1Data(_s, _len); }
395 size_t length()
const override {
return _len; }
396 Char charAt(
size_t idx)
const override {
return Char(
static_cast<char>(_s[idx])); }
397 void setCharAt(
size_t, Char)
override { assert(
false); }
398 bool isLatin1()
const override {
return true; }
399 bool isLiteral()
const override {
return true; }
402 size_t find(Char ch,
size_t from = 0)
const override;
403 size_t find(
const StringData &s,
size_t from = 0)
const override;
404 size_t rfind(Char ch,
size_t from = npos)
const override;
405 size_t rfind(
const StringData &s,
size_t from = npos)
const override;
406 StringData *createSubstr(
size_t pos,
size_t len)
const override;
407 void reverseInPlace()
override { assert(
false); }
408 size_t count(
const StringData &substr)
const override;
411 void append(
const StringData &)
override { assert(
false); }
412 void append(Char)
override { assert(
false); }
413 void insert(
size_t,
const StringData &)
override { assert(
false); }
414 void erase(
size_t,
size_t)
override { assert(
false); }
415 void clear()
override { assert(
false); }
416 void resize(
size_t, Char)
override { assert(
false); }
419 size_t byteCount()
const override {
return _len; }
420 uint8_t byteAt(
size_t idx)
const override {
return static_cast<uint8_t
>(_s[idx]); }
421 const char *cstr()
const override {
return _s; }
422 const std::string &str()
const override;
424 uint64_t hash()
const override {
return _hash; }
430 mutable OnceFlag _strOnce;
431 mutable std::string _strCache;
442class StringUnicodeLiteralData :
public StringData {
452 StringUnicodeLiteralData(
const char32_t *codepoints,
size_t charCount,
const char *bytes,
453 size_t byteLen, uint64_t precomputedHash = 0)
454 : _codepoints(codepoints), _charCount(charCount), _bytes(bytes), _byteLen(byteLen),
455 _hash(precomputedHash) {
459 if (_hash == 0 && _charCount > 0) _hash = fnv1aCodepoints(_codepoints, _charCount);
460 _promeki_refct.setImmortal();
463 StringData *_promeki_clone()
const override;
466 size_t length()
const override {
return _charCount; }
467 Char charAt(
size_t idx)
const override {
return Char(_codepoints[idx]); }
468 void setCharAt(
size_t, Char)
override { assert(
false); }
469 bool isLatin1()
const override {
return false; }
470 bool isLiteral()
const override {
return true; }
473 size_t find(Char ch,
size_t from = 0)
const override;
474 size_t find(
const StringData &s,
size_t from = 0)
const override;
475 size_t rfind(Char ch,
size_t from = npos)
const override;
476 size_t rfind(
const StringData &s,
size_t from = npos)
const override;
477 StringData *createSubstr(
size_t pos,
size_t len)
const override;
478 void reverseInPlace()
override { assert(
false); }
479 size_t count(
const StringData &substr)
const override;
482 void append(
const StringData &)
override { assert(
false); }
483 void append(Char)
override { assert(
false); }
484 void insert(
size_t,
const StringData &)
override { assert(
false); }
485 void erase(
size_t,
size_t)
override { assert(
false); }
486 void clear()
override { assert(
false); }
487 void resize(
size_t, Char)
override { assert(
false); }
490 size_t byteCount()
const override {
return _byteLen; }
491 uint8_t byteAt(
size_t idx)
const override {
return static_cast<uint8_t
>(_bytes[idx]); }
492 const char *cstr()
const override {
return _bytes; }
493 const std::string &str()
const override;
495 uint64_t hash()
const override {
return _hash; }
498 const char32_t *_codepoints;
503 mutable OnceFlag _strOnce;
504 mutable std::string _strCache;