libpromeki 1.0.0-alpha
PROfessional MEdia toolKIt
 
Loading...
Searching...
No Matches
stringdata.h
Go to the documentation of this file.
1
8#pragma once
9
10
11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
13#include <string>
14#include <cstdint>
15#include <promeki/namespace.h>
16#include <promeki/once.h>
17#include <promeki/sharedptr.h>
18#include <promeki/char.h>
19#include <promeki/list.h>
20#include <promeki/fnv1a.h>
21
22PROMEKI_NAMESPACE_BEGIN
23
34class StringData {
35 public:
37 RefCount _promeki_refct;
38
40 virtual StringData *_promeki_clone() const = 0;
41
43 virtual ~StringData();
44
47
49 virtual size_t length() const = 0;
50
52 virtual Char charAt(size_t idx) const = 0;
53
59 virtual void setCharAt(size_t idx, Char ch) = 0;
60
62 virtual bool isLatin1() const = 0;
63
65 virtual bool isLiteral() const { return false; }
66
68
71
78 virtual size_t find(Char ch, size_t from = 0) const = 0;
79
86 virtual size_t find(const StringData &s, size_t from = 0) const = 0;
87
94 virtual size_t rfind(Char ch, size_t from = npos) const = 0;
95
102 virtual size_t rfind(const StringData &s, size_t from = npos) const = 0;
103
110 virtual StringData *createSubstr(size_t pos, size_t len) const = 0;
111
113 virtual void reverseInPlace() = 0;
114
120 virtual size_t count(const StringData &substr) const = 0;
121
123
126
131 virtual void append(const StringData &other) = 0;
132
137 virtual void append(Char ch) = 0;
138
144 virtual void insert(size_t pos, const StringData &s) = 0;
145
151 virtual void erase(size_t pos, size_t count) = 0;
152
154 virtual void clear() = 0;
155
161 virtual void resize(size_t len, Char fill = Char()) = 0;
162
172 virtual void reserve(size_t capacity) { (void)capacity; }
173
175
178
180 virtual size_t byteCount() const = 0;
181
187 virtual uint8_t byteAt(size_t idx) const = 0;
188
190 virtual const char *cstr() const = 0;
191
193 virtual const std::string &str() const = 0;
194
196
198 bool isEmpty() const { return length() == 0; }
199
201 virtual uint64_t hash() const = 0;
202
204 static constexpr size_t npos = static_cast<size_t>(-1);
205};
206
213class StringLatin1Data : public StringData {
214 PROMEKI_SHARED_DERIVED(StringLatin1Data)
215 public:
217 StringLatin1Data() = default;
218
223 StringLatin1Data(const std::string &s) : _s(s) {}
224
229 StringLatin1Data(std::string &&s) : _s(std::move(s)) {}
230
235 StringLatin1Data(const char *s) : _s(s ? s : "") {}
236
242 StringLatin1Data(const char *s, size_t len) : _s(s, len) {}
243
249 StringLatin1Data(size_t ct, char c) : _s(ct, c) {}
250
251 size_t length() const override { return _s.size(); }
252 Char charAt(size_t idx) const override;
253 void setCharAt(size_t idx, Char ch) override;
254 bool isLatin1() const override { return true; }
255
256 size_t find(Char ch, size_t from = 0) const override;
257 size_t find(const StringData &s, size_t from = 0) const override;
258 size_t rfind(Char ch, size_t from = npos) const override;
259 size_t rfind(const StringData &s, size_t from = npos) const override;
260 StringData *createSubstr(size_t pos, size_t len) const override;
261 void reverseInPlace() override;
262 size_t count(const StringData &substr) const override;
263
264 void append(const StringData &other) override;
265 void append(Char ch) override;
266 void insert(size_t pos, const StringData &s) override;
267 void erase(size_t pos, size_t count) override;
268 void clear() override { _s.clear(); }
269 void resize(size_t len, Char fill = Char()) override;
270 void reserve(size_t capacity) override { _s.reserve(capacity); }
271
272 size_t byteCount() const override { return _s.size(); }
273 uint8_t byteAt(size_t idx) const override { return static_cast<uint8_t>(_s[idx]); }
274 const char *cstr() const override { return _s.c_str(); }
275 const std::string &str() const override { return _s; }
276
277 uint64_t hash() const override {
278 // Hash each Latin1 byte as a 4-byte little-endian
279 // codepoint so the result matches StringUnicodeData
280 // for the same logical characters.
281 return fnv1aLatin1AsCodepoints(_s.data(), _s.size());
282 }
283
285 std::string &rawStr() { return _s; }
286
287 private:
288 std::string _s;
289};
290
298class StringUnicodeData : public StringData {
299 PROMEKI_SHARED_DERIVED(StringUnicodeData)
300 public:
302 StringUnicodeData() : _strDirty(true) {}
303
308 StringUnicodeData(const List<Char> &chars) : _chars(chars), _strDirty(true) {}
309
314 StringUnicodeData(List<Char> &&chars) : _chars(std::move(chars)), _strDirty(true) {}
315
322 static StringUnicodeData *fromUtf8(const char *data, size_t len);
323
329 static StringUnicodeData *fromLatin1(const std::string &s);
330
331 size_t length() const override { return _chars.size(); }
332 Char charAt(size_t idx) const override { return _chars[idx]; }
333 void setCharAt(size_t idx, Char ch) override;
334 bool isLatin1() const override { return false; }
335
336 size_t find(Char ch, size_t from = 0) const override;
337 size_t find(const StringData &s, size_t from = 0) const override;
338 size_t rfind(Char ch, size_t from = npos) const override;
339 size_t rfind(const StringData &s, size_t from = npos) const override;
340 StringData *createSubstr(size_t pos, size_t len) const override;
341 void reverseInPlace() override;
342 size_t count(const StringData &substr) const override;
343
344 void append(const StringData &other) override;
345 void append(Char ch) override;
346 void insert(size_t pos, const StringData &s) override;
347 void erase(size_t pos, size_t count) override;
348 void clear() override;
349 void resize(size_t len, Char fill = Char()) override;
350 void reserve(size_t capacity) override { _chars.reserve(capacity); }
351
352 size_t byteCount() const override;
353 uint8_t byteAt(size_t idx) const override;
354 const char *cstr() const override;
355 const std::string &str() const override;
356
357 uint64_t hash() const override;
358
359 private:
360 void ensureEncoded() const;
361
362 List<Char> _chars;
363 mutable std::string _strCache;
364 mutable bool _strDirty;
365};
366
375class StringLiteralData : public StringData {
376 public:
383 StringLiteralData(const char *s, size_t len, uint64_t precomputedHash = 0)
384 : _s(s), _len(len), _hash(precomputedHash) {
385 // Hash each Latin1 byte as a 4-byte little-endian
386 // codepoint so the result matches StringUnicodeData
387 // for the same logical characters.
388 if (_hash == 0 && _len > 0) _hash = fnv1aLatin1AsCodepoints(_s, _len);
389 _promeki_refct.setImmortal();
390 }
391
392 StringData *_promeki_clone() const override { return new StringLatin1Data(_s, _len); }
393
394 // Character access
395 size_t length() const override { return _len; }
396 Char charAt(size_t idx) const override { return Char(static_cast<char>(_s[idx])); }
397 void setCharAt(size_t, Char) override { assert(false); }
398 bool isLatin1() const override { return true; }
399 bool isLiteral() const override { return true; }
400
401 // Search
402 size_t find(Char ch, size_t from = 0) const override;
403 size_t find(const StringData &s, size_t from = 0) const override;
404 size_t rfind(Char ch, size_t from = npos) const override;
405 size_t rfind(const StringData &s, size_t from = npos) const override;
406 StringData *createSubstr(size_t pos, size_t len) const override;
407 void reverseInPlace() override { assert(false); }
408 size_t count(const StringData &substr) const override;
409
410 // Mutation (unreachable — COW always clones first)
411 void append(const StringData &) override { assert(false); }
412 void append(Char) override { assert(false); }
413 void insert(size_t, const StringData &) override { assert(false); }
414 void erase(size_t, size_t) override { assert(false); }
415 void clear() override { assert(false); }
416 void resize(size_t, Char) override { assert(false); }
417
418 // Byte-level
419 size_t byteCount() const override { return _len; }
420 uint8_t byteAt(size_t idx) const override { return static_cast<uint8_t>(_s[idx]); }
421 const char *cstr() const override { return _s; }
422 const std::string &str() const override;
423
424 uint64_t hash() const override { return _hash; }
425
426 private:
427 const char *_s;
428 size_t _len;
429 uint64_t _hash;
430 mutable OnceFlag _strOnce;
431 mutable std::string _strCache;
432};
433
442class StringUnicodeLiteralData : public StringData {
443 public:
452 StringUnicodeLiteralData(const char32_t *codepoints, size_t charCount, const char *bytes,
453 size_t byteLen, uint64_t precomputedHash = 0)
454 : _codepoints(codepoints), _charCount(charCount), _bytes(bytes), _byteLen(byteLen),
455 _hash(precomputedHash) {
456 // Endian-independent codepoint mixing so the result
457 // matches StringLatin1Data and StringUnicodeData for
458 // the same logical characters.
459 if (_hash == 0 && _charCount > 0) _hash = fnv1aCodepoints(_codepoints, _charCount);
460 _promeki_refct.setImmortal();
461 }
462
463 StringData *_promeki_clone() const override;
464
465 // Character access
466 size_t length() const override { return _charCount; }
467 Char charAt(size_t idx) const override { return Char(_codepoints[idx]); }
468 void setCharAt(size_t, Char) override { assert(false); }
469 bool isLatin1() const override { return false; }
470 bool isLiteral() const override { return true; }
471
472 // Search
473 size_t find(Char ch, size_t from = 0) const override;
474 size_t find(const StringData &s, size_t from = 0) const override;
475 size_t rfind(Char ch, size_t from = npos) const override;
476 size_t rfind(const StringData &s, size_t from = npos) const override;
477 StringData *createSubstr(size_t pos, size_t len) const override;
478 void reverseInPlace() override { assert(false); }
479 size_t count(const StringData &substr) const override;
480
481 // Mutation (unreachable — COW always clones first)
482 void append(const StringData &) override { assert(false); }
483 void append(Char) override { assert(false); }
484 void insert(size_t, const StringData &) override { assert(false); }
485 void erase(size_t, size_t) override { assert(false); }
486 void clear() override { assert(false); }
487 void resize(size_t, Char) override { assert(false); }
488
489 // Byte-level — uses the original UTF-8 bytes directly
490 size_t byteCount() const override { return _byteLen; }
491 uint8_t byteAt(size_t idx) const override { return static_cast<uint8_t>(_bytes[idx]); }
492 const char *cstr() const override { return _bytes; }
493 const std::string &str() const override;
494
495 uint64_t hash() const override { return _hash; }
496
497 private:
498 const char32_t *_codepoints;
499 size_t _charCount;
500 const char *_bytes;
501 size_t _byteLen;
502 uint64_t _hash;
503 mutable OnceFlag _strOnce;
504 mutable std::string _strCache;
505};
506
507PROMEKI_NAMESPACE_END
508
509#endif // PROMEKI_ENABLE_CORE