libpromeki main
PROfessional MEdia toolKIt
 
Loading...
Searching...
No Matches
stringdata.h
Go to the documentation of this file.
1
8#pragma once
9
10#include <string>
11#include <cstdint>
12#include <mutex>
15#include <promeki/core/char.h>
16#include <promeki/core/list.h>
17#include <promeki/core/fnv1a.h>
18
20
30 public:
33
35 virtual StringData *_promeki_clone() const = 0;
36
38 virtual ~StringData();
39
42
44 virtual size_t length() const = 0;
45
47 virtual Char charAt(size_t idx) const = 0;
48
54 virtual void setCharAt(size_t idx, Char ch) = 0;
55
57 virtual bool isLatin1() const = 0;
58
60 virtual bool isLiteral() const { return false; }
61
63
66
73 virtual size_t find(Char ch, size_t from = 0) const = 0;
74
81 virtual size_t find(const StringData &s, size_t from = 0) const = 0;
82
89 virtual size_t rfind(Char ch, size_t from = npos) const = 0;
90
97 virtual size_t rfind(const StringData &s, size_t from = npos) const = 0;
98
105 virtual StringData *createSubstr(size_t pos, size_t len) const = 0;
106
108 virtual void reverseInPlace() = 0;
109
115 virtual size_t count(const StringData &substr) const = 0;
116
118
121
126 virtual void append(const StringData &other) = 0;
127
132 virtual void append(Char ch) = 0;
133
139 virtual void insert(size_t pos, const StringData &s) = 0;
140
146 virtual void erase(size_t pos, size_t count) = 0;
147
149 virtual void clear() = 0;
150
156 virtual void resize(size_t len, Char fill = Char()) = 0;
157
159
162
164 virtual size_t byteCount() const = 0;
165
171 virtual uint8_t byteAt(size_t idx) const = 0;
172
174 virtual const char *cstr() const = 0;
175
177 virtual const std::string &str() const = 0;
178
180
182 bool isEmpty() const { return length() == 0; }
183
185 virtual uint64_t hash() const = 0;
186
188 static constexpr size_t npos = static_cast<size_t>(-1);
189};
190
199 public:
201 StringLatin1Data() = default;
202
207 StringLatin1Data(const std::string &s) : _s(s) {}
208
213 StringLatin1Data(std::string &&s) : _s(std::move(s)) {}
214
219 StringLatin1Data(const char *s) : _s(s ? s : "") {}
220
226 StringLatin1Data(const char *s, size_t len) : _s(s, len) {}
227
233 StringLatin1Data(size_t ct, char c) : _s(ct, c) {}
234
235 size_t length() const override { return _s.size(); }
236 Char charAt(size_t idx) const override;
237 void setCharAt(size_t idx, Char ch) override;
238 bool isLatin1() const override { return true; }
239
240 size_t find(Char ch, size_t from = 0) const override;
241 size_t find(const StringData &s, size_t from = 0) const override;
242 size_t rfind(Char ch, size_t from = npos) const override;
243 size_t rfind(const StringData &s, size_t from = npos) const override;
244 StringData *createSubstr(size_t pos, size_t len) const override;
245 void reverseInPlace() override;
246 size_t count(const StringData &substr) const override;
247
248 void append(const StringData &other) override;
249 void append(Char ch) override;
250 void insert(size_t pos, const StringData &s) override;
251 void erase(size_t pos, size_t count) override;
252 void clear() override { _s.clear(); }
253 void resize(size_t len, Char fill = Char()) override;
254
255 size_t byteCount() const override { return _s.size(); }
256 uint8_t byteAt(size_t idx) const override { return static_cast<uint8_t>(_s[idx]); }
257 const char *cstr() const override { return _s.c_str(); }
258 const std::string &str() const override { return _s; }
259
260 uint64_t hash() const override {
261 return fnv1aData(_s.data(), _s.size());
262 }
263
265 std::string &rawStr() { return _s; }
266
267 private:
268 std::string _s;
269};
270
280 public:
282 StringUnicodeData() : _strDirty(true) {}
283
288 StringUnicodeData(const List<Char> &chars) : _chars(chars), _strDirty(true) {}
289
294 StringUnicodeData(List<Char> &&chars) : _chars(std::move(chars)), _strDirty(true) {}
295
302 static StringUnicodeData *fromUtf8(const char *data, size_t len);
303
309 static StringUnicodeData *fromLatin1(const std::string &s);
310
311 size_t length() const override { return _chars.size(); }
312 Char charAt(size_t idx) const override { return _chars[idx]; }
313 void setCharAt(size_t idx, Char ch) override;
314 bool isLatin1() const override { return false; }
315
316 size_t find(Char ch, size_t from = 0) const override;
317 size_t find(const StringData &s, size_t from = 0) const override;
318 size_t rfind(Char ch, size_t from = npos) const override;
319 size_t rfind(const StringData &s, size_t from = npos) const override;
320 StringData *createSubstr(size_t pos, size_t len) const override;
321 void reverseInPlace() override;
322 size_t count(const StringData &substr) const override;
323
324 void append(const StringData &other) override;
325 void append(Char ch) override;
326 void insert(size_t pos, const StringData &s) override;
327 void erase(size_t pos, size_t count) override;
328 void clear() override;
329 void resize(size_t len, Char fill = Char()) override;
330
331 size_t byteCount() const override;
332 uint8_t byteAt(size_t idx) const override;
333 const char *cstr() const override;
334 const std::string &str() const override;
335
336 uint64_t hash() const override;
337
338 private:
339 void ensureEncoded() const;
340
341 List<Char> _chars;
342 mutable std::string _strCache;
343 mutable bool _strDirty;
344};
345
355 public:
363 : _s(s), _len(len), _hash(precomputedHash) {
364 if(_hash == 0 && _len > 0) _hash = fnv1aData(_s, _len);
366 }
367
368 StringData *_promeki_clone() const override {
369 return new StringLatin1Data(_s, _len);
370 }
371
372 // Character access
373 size_t length() const override { return _len; }
374 Char charAt(size_t idx) const override {
375 return Char(static_cast<char>(_s[idx]));
376 }
377 void setCharAt(size_t, Char) override { assert(false); }
378 bool isLatin1() const override { return true; }
379 bool isLiteral() const override { return true; }
380
381 // Search
382 size_t find(Char ch, size_t from = 0) const override;
383 size_t find(const StringData &s, size_t from = 0) const override;
384 size_t rfind(Char ch, size_t from = npos) const override;
385 size_t rfind(const StringData &s, size_t from = npos) const override;
386 StringData *createSubstr(size_t pos, size_t len) const override;
387 void reverseInPlace() override { assert(false); }
388 size_t count(const StringData &substr) const override;
389
390 // Mutation (unreachable — COW always clones first)
391 void append(const StringData &) override { assert(false); }
392 void append(Char) override { assert(false); }
393 void insert(size_t, const StringData &) override { assert(false); }
394 void erase(size_t, size_t) override { assert(false); }
395 void clear() override { assert(false); }
396 void resize(size_t, Char) override { assert(false); }
397
398 // Byte-level
399 size_t byteCount() const override { return _len; }
400 uint8_t byteAt(size_t idx) const override {
401 return static_cast<uint8_t>(_s[idx]);
402 }
403 const char *cstr() const override { return _s; }
404 const std::string &str() const override;
405
406 uint64_t hash() const override { return _hash; }
407
408 private:
409 const char *_s;
410 size_t _len;
411 uint64_t _hash;
412 mutable std::once_flag _strOnce;
413 mutable std::string _strCache;
414};
415
425 public:
434 StringUnicodeLiteralData(const char32_t *codepoints, size_t charCount,
435 const char *bytes, size_t byteLen,
437 : _codepoints(codepoints), _charCount(charCount),
438 _bytes(bytes), _byteLen(byteLen),
439 _hash(precomputedHash) {
440 if(_hash == 0 && _charCount > 0)
441 _hash = fnv1aData(_codepoints, _charCount * sizeof(char32_t));
443 }
444
445 StringData *_promeki_clone() const override;
446
447 // Character access
448 size_t length() const override { return _charCount; }
449 Char charAt(size_t idx) const override {
450 return Char(_codepoints[idx]);
451 }
452 void setCharAt(size_t, Char) override { assert(false); }
453 bool isLatin1() const override { return false; }
454 bool isLiteral() const override { return true; }
455
456 // Search
457 size_t find(Char ch, size_t from = 0) const override;
458 size_t find(const StringData &s, size_t from = 0) const override;
459 size_t rfind(Char ch, size_t from = npos) const override;
460 size_t rfind(const StringData &s, size_t from = npos) const override;
461 StringData *createSubstr(size_t pos, size_t len) const override;
462 void reverseInPlace() override { assert(false); }
463 size_t count(const StringData &substr) const override;
464
465 // Mutation (unreachable — COW always clones first)
466 void append(const StringData &) override { assert(false); }
467 void append(Char) override { assert(false); }
468 void insert(size_t, const StringData &) override { assert(false); }
469 void erase(size_t, size_t) override { assert(false); }
470 void clear() override { assert(false); }
471 void resize(size_t, Char) override { assert(false); }
472
473 // Byte-level — uses the original UTF-8 bytes directly
474 size_t byteCount() const override { return _byteLen; }
475 uint8_t byteAt(size_t idx) const override {
476 return static_cast<uint8_t>(_bytes[idx]);
477 }
478 const char *cstr() const override { return _bytes; }
479 const std::string &str() const override;
480
481 uint64_t hash() const override { return _hash; }
482
483 private:
484 const char32_t *_codepoints;
485 size_t _charCount;
486 const char *_bytes;
487 size_t _byteLen;
488 uint64_t _hash;
489 mutable std::once_flag _strOnce;
490 mutable std::string _strCache;
491};
492
Unicode-aware character class wrapping a single codepoint.
Definition char.h:23
Dynamic array container wrapping std::vector.
Definition list.h:40
size_t size() const noexcept
Returns the number of elements in the list.
Definition list.h:301
An atomic reference count object.
Definition sharedptr.h:101
void setImmortal()
Marks this refcount as immortal. inc/dec become no-ops.
Definition sharedptr.h:149
Pure virtual interface for String storage backends.
Definition stringdata.h:29
virtual const char * cstr() const =0
Returns a null-terminated C string pointer.
virtual bool isLatin1() const =0
Returns true if the storage uses Latin1 encoding (one byte per character).
virtual size_t byteCount() const =0
Returns the byte count of the encoded string data.
virtual void setCharAt(size_t idx, Char ch)=0
Sets the character at the given index.
virtual void erase(size_t pos, size_t count)=0
Erases characters from the string.
virtual void append(Char ch)=0
Appends a single character.
virtual uint64_t hash() const =0
Returns a 64-bit hash of the string data.
virtual size_t count(const StringData &substr) const =0
Counts non-overlapping occurrences of a substring.
virtual void clear()=0
Removes all characters from the string.
virtual StringData * _promeki_clone() const =0
Creates a deep copy of this storage backend.
virtual size_t find(Char ch, size_t from=0) const =0
Finds the first occurrence of a character starting from from.
virtual void resize(size_t len, Char fill=Char())=0
Resizes the string to len characters.
virtual bool isLiteral() const
Returns true if the storage wraps an immutable literal.
Definition stringdata.h:60
virtual uint8_t byteAt(size_t idx) const =0
Returns the byte at the given index.
virtual void insert(size_t pos, const StringData &s)=0
Inserts a string at the given position.
virtual StringData * createSubstr(size_t pos, size_t len) const =0
Creates a new StringData containing a substring.
virtual const std::string & str() const =0
Returns a const reference to the internal std::string representation.
virtual size_t rfind(const StringData &s, size_t from=npos) const =0
Finds the last occurrence of a substring at or before from.
bool isEmpty() const
Returns true if the string has zero length.
Definition stringdata.h:182
static constexpr size_t npos
Sentinel value indicating "not found".
Definition stringdata.h:188
virtual size_t find(const StringData &s, size_t from=0) const =0
Finds the first occurrence of a substring starting from from.
virtual ~StringData()
Virtual destructor.
virtual size_t rfind(Char ch, size_t from=npos) const =0
Finds the last occurrence of a character at or before from.
virtual size_t length() const =0
Returns the number of characters in the string.
virtual void reverseInPlace()=0
Reverses the characters in place.
virtual void append(const StringData &other)=0
Appends another string's data to this storage.
RefCount _promeki_refct
Manual reference count (PROMEKI_SHARED cannot be used on abstract classes).
Definition stringdata.h:32
virtual Char charAt(size_t idx) const =0
Returns the character at the given index.
Latin1 string storage. One byte = one character.
Definition stringdata.h:197
StringLatin1Data()=default
Default constructor. Creates empty Latin1 storage.
size_t rfind(const StringData &s, size_t from=npos) const override
Finds the last occurrence of a substring at or before from.
StringLatin1Data(size_t ct, char c)
Constructs a string of repeated characters.
Definition stringdata.h:233
size_t rfind(Char ch, size_t from=npos) const override
Finds the last occurrence of a character at or before from.
StringLatin1Data(std::string &&s)
Constructs from a std::string (move).
Definition stringdata.h:213
size_t find(const StringData &s, size_t from=0) const override
Finds the first occurrence of a substring starting from from.
const std::string & str() const override
Returns a const reference to the internal std::string representation.
Definition stringdata.h:258
void resize(size_t len, Char fill=Char()) override
Resizes the string to len characters.
StringLatin1Data(const char *s, size_t len)
Constructs from a buffer with explicit length.
Definition stringdata.h:226
uint64_t hash() const override
Returns a 64-bit hash of the string data.
Definition stringdata.h:260
std::string & rawStr()
Direct access to the underlying std::string.
Definition stringdata.h:265
void clear() override
Removes all characters from the string.
Definition stringdata.h:252
Char charAt(size_t idx) const override
Returns the character at the given index.
size_t count(const StringData &substr) const override
Counts non-overlapping occurrences of a substring.
void append(const StringData &other) override
Appends another string's data to this storage.
StringData * createSubstr(size_t pos, size_t len) const override
Creates a new StringData containing a substring.
uint8_t byteAt(size_t idx) const override
Returns the byte at the given index.
Definition stringdata.h:256
void setCharAt(size_t idx, Char ch) override
Sets the character at the given index.
void reverseInPlace() override
Reverses the characters in place.
StringLatin1Data(const char *s)
Constructs from a C string.
Definition stringdata.h:219
bool isLatin1() const override
Returns true if the storage uses Latin1 encoding (one byte per character).
Definition stringdata.h:238
void insert(size_t pos, const StringData &s) override
Inserts a string at the given position.
StringLatin1Data(const std::string &s)
Constructs from a std::string (copy).
Definition stringdata.h:207
void erase(size_t pos, size_t count) override
Erases characters from the string.
const char * cstr() const override
Returns a null-terminated C string pointer.
Definition stringdata.h:257
size_t find(Char ch, size_t from=0) const override
Finds the first occurrence of a character starting from from.
size_t byteCount() const override
Returns the byte count of the encoded string data.
Definition stringdata.h:255
void append(Char ch) override
Appends a single character.
size_t length() const override
Returns the number of characters in the string.
Definition stringdata.h:235
Immutable string storage wrapping a string literal pointer.
Definition stringdata.h:354
void insert(size_t, const StringData &) override
Inserts a string at the given position.
Definition stringdata.h:393
StringData * _promeki_clone() const override
Creates a deep copy of this storage backend.
Definition stringdata.h:368
void reverseInPlace() override
Reverses the characters in place.
Definition stringdata.h:387
uint64_t hash() const override
Returns a 64-bit hash of the string data.
Definition stringdata.h:406
size_t byteCount() const override
Returns the byte count of the encoded string data.
Definition stringdata.h:399
uint8_t byteAt(size_t idx) const override
Returns the byte at the given index.
Definition stringdata.h:400
void append(Char) override
Appends a single character.
Definition stringdata.h:392
void erase(size_t, size_t) override
Erases characters from the string.
Definition stringdata.h:394
bool isLatin1() const override
Returns true if the storage uses Latin1 encoding (one byte per character).
Definition stringdata.h:378
size_t rfind(const StringData &s, size_t from=npos) const override
Finds the last occurrence of a substring at or before from.
void resize(size_t, Char) override
Resizes the string to len characters.
Definition stringdata.h:396
StringLiteralData(const char *s, size_t len, uint64_t precomputedHash=0)
Constructs from a string literal pointer.
Definition stringdata.h:362
size_t rfind(Char ch, size_t from=npos) const override
Finds the last occurrence of a character at or before from.
bool isLiteral() const override
Returns true if the storage wraps an immutable literal.
Definition stringdata.h:379
void setCharAt(size_t, Char) override
Sets the character at the given index.
Definition stringdata.h:377
size_t length() const override
Returns the number of characters in the string.
Definition stringdata.h:373
size_t find(const StringData &s, size_t from=0) const override
Finds the first occurrence of a substring starting from from.
Char charAt(size_t idx) const override
Returns the character at the given index.
Definition stringdata.h:374
void append(const StringData &) override
Appends another string's data to this storage.
Definition stringdata.h:391
size_t count(const StringData &substr) const override
Counts non-overlapping occurrences of a substring.
StringData * createSubstr(size_t pos, size_t len) const override
Creates a new StringData containing a substring.
void clear() override
Removes all characters from the string.
Definition stringdata.h:395
const std::string & str() const override
Returns a const reference to the internal std::string representation.
const char * cstr() const override
Returns a null-terminated C string pointer.
Definition stringdata.h:403
size_t find(Char ch, size_t from=0) const override
Finds the first occurrence of a character starting from from.
Unicode string storage. Decoded codepoints in a List<Char>.
Definition stringdata.h:278
Char charAt(size_t idx) const override
Returns the character at the given index.
Definition stringdata.h:312
static StringUnicodeData * fromUtf8(const char *data, size_t len)
Creates a StringUnicodeData by decoding UTF-8 bytes.
size_t byteCount() const override
Returns the byte count of the encoded string data.
void resize(size_t len, Char fill=Char()) override
Resizes the string to len characters.
size_t length() const override
Returns the number of characters in the string.
Definition stringdata.h:311
StringData * createSubstr(size_t pos, size_t len) const override
Creates a new StringData containing a substring.
StringUnicodeData(List< Char > &&chars)
Constructs from a list of codepoints (move).
Definition stringdata.h:294
void append(const StringData &other) override
Appends another string's data to this storage.
size_t find(const StringData &s, size_t from=0) const override
Finds the first occurrence of a substring starting from from.
size_t rfind(const StringData &s, size_t from=npos) const override
Finds the last occurrence of a substring at or before from.
StringUnicodeData(const List< Char > &chars)
Constructs from a list of codepoints (copy).
Definition stringdata.h:288
void erase(size_t pos, size_t count) override
Erases characters from the string.
StringUnicodeData()
Default constructor. Creates empty Unicode storage.
Definition stringdata.h:282
uint64_t hash() const override
Returns a 64-bit hash of the string data.
size_t rfind(Char ch, size_t from=npos) const override
Finds the last occurrence of a character at or before from.
void append(Char ch) override
Appends a single character.
void setCharAt(size_t idx, Char ch) override
Sets the character at the given index.
const char * cstr() const override
Returns a null-terminated C string pointer.
void insert(size_t pos, const StringData &s) override
Inserts a string at the given position.
void reverseInPlace() override
Reverses the characters in place.
uint8_t byteAt(size_t idx) const override
Returns the byte at the given index.
void clear() override
Removes all characters from the string.
const std::string & str() const override
Returns a const reference to the internal std::string representation.
bool isLatin1() const override
Returns true if the storage uses Latin1 encoding (one byte per character).
Definition stringdata.h:314
size_t count(const StringData &substr) const override
Counts non-overlapping occurrences of a substring.
size_t find(Char ch, size_t from=0) const override
Finds the first occurrence of a character starting from from.
static StringUnicodeData * fromLatin1(const std::string &s)
Creates a StringUnicodeData by promoting Latin1 bytes.
Immutable Unicode string storage wrapping compile-time decoded data.
Definition stringdata.h:424
bool isLatin1() const override
Returns true if the storage uses Latin1 encoding (one byte per character).
Definition stringdata.h:453
uint64_t hash() const override
Returns a 64-bit hash of the string data.
Definition stringdata.h:481
void insert(size_t, const StringData &) override
Inserts a string at the given position.
Definition stringdata.h:468
const char * cstr() const override
Returns a null-terminated C string pointer.
Definition stringdata.h:478
Char charAt(size_t idx) const override
Returns the character at the given index.
Definition stringdata.h:449
uint8_t byteAt(size_t idx) const override
Returns the byte at the given index.
Definition stringdata.h:475
void erase(size_t, size_t) override
Erases characters from the string.
Definition stringdata.h:469
size_t byteCount() const override
Returns the byte count of the encoded string data.
Definition stringdata.h:474
void clear() override
Removes all characters from the string.
Definition stringdata.h:470
void reverseInPlace() override
Reverses the characters in place.
Definition stringdata.h:462
void resize(size_t, Char) override
Resizes the string to len characters.
Definition stringdata.h:471
void append(const StringData &) override
Appends another string's data to this storage.
Definition stringdata.h:466
size_t find(const StringData &s, size_t from=0) const override
Finds the first occurrence of a substring starting from from.
bool isLiteral() const override
Returns true if the storage wraps an immutable literal.
Definition stringdata.h:454
size_t rfind(Char ch, size_t from=npos) const override
Finds the last occurrence of a character at or before from.
StringData * createSubstr(size_t pos, size_t len) const override
Creates a new StringData containing a substring.
const std::string & str() const override
Returns a const reference to the internal std::string representation.
void append(Char) override
Appends a single character.
Definition stringdata.h:467
void setCharAt(size_t, Char) override
Sets the character at the given index.
Definition stringdata.h:452
size_t count(const StringData &substr) const override
Counts non-overlapping occurrences of a substring.
StringData * _promeki_clone() const override
Creates a deep copy of this storage backend.
size_t rfind(const StringData &s, size_t from=npos) const override
Finds the last occurrence of a substring at or before from.
size_t length() const override
Returns the number of characters in the string.
Definition stringdata.h:448
size_t find(Char ch, size_t from=0) const override
Finds the first occurrence of a character starting from from.
StringUnicodeLiteralData(const char32_t *codepoints, size_t charCount, const char *bytes, size_t byteLen, uint64_t precomputedHash=0)
Constructs from pre-decoded codepoints and original UTF-8 bytes.
Definition stringdata.h:434
constexpr uint64_t fnv1aData(const void *data, size_t len, uint64_t seed=0xcbf29ce484222325ULL)
Computes the FNV-1a hash of a block of data.
Definition fnv1a.h:43
#define PROMEKI_NAMESPACE_BEGIN
Starts a promeki namespace block.
Definition namespace.h:14
#define PROMEKI_NAMESPACE_END
Ends a promeki namespace block.
Definition namespace.h:19
#define PROMEKI_SHARED_DERIVED(BASE, DERIVED)
Macro to simplify making a derived object into a native shared object.
Definition sharedptr.h:67