VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/ministring.cpp@ 68094

Last change on this file since 68094 was 68094, checked in by vboxsync, 8 years ago

RTCString: Added startsWithWord method.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.7 KB
Line 
1/* $Id: ministring.cpp 68094 2017-07-24 12:44:02Z vboxsync $ */
2/** @file
3 * IPRT - Mini C++ string class.
4 *
5 * This is a base for both Utf8Str and other places where IPRT may want to use
6 * a lean C++ string class.
7 */
8
9/*
10 * Copyright (C) 2007-2016 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.215389.xyz. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*********************************************************************************************************************************
32* Header Files *
33*********************************************************************************************************************************/
34#include <iprt/cpp/ministring.h>
35#include <iprt/ctype.h>
36#include <iprt/uni.h>
37
38
39/*********************************************************************************************************************************
40* Global Variables *
41*********************************************************************************************************************************/
42const size_t RTCString::npos = ~(size_t)0;
43
44
45/*********************************************************************************************************************************
46* Defined Constants And Macros *
47*********************************************************************************************************************************/
48/** Allocation block alignment used when appending bytes to a string. */
49#define IPRT_MINISTRING_APPEND_ALIGNMENT 64
50
51
52RTCString &RTCString::printf(const char *pszFormat, ...)
53{
54 va_list va;
55 va_start(va, pszFormat);
56 printfV(pszFormat, va);
57 va_end(va);
58 return *this;
59}
60
61/**
62 * Callback used with RTStrFormatV by RTCString::printfV.
63 *
64 * @returns The number of bytes added (not used).
65 *
66 * @param pvArg The string object.
67 * @param pachChars The characters to append.
68 * @param cbChars The number of characters. 0 on the final callback.
69 */
70/*static*/ DECLCALLBACK(size_t)
71RTCString::printfOutputCallback(void *pvArg, const char *pachChars, size_t cbChars)
72{
73 RTCString *pThis = (RTCString *)pvArg;
74 if (cbChars)
75 {
76 size_t cchBoth = pThis->m_cch + cbChars;
77 if (cchBoth >= pThis->m_cbAllocated)
78 {
79 /* Double the buffer size, if it's less that _4M. Align sizes like
80 for append. */
81 size_t cbAlloc = RT_ALIGN_Z(pThis->m_cbAllocated, IPRT_MINISTRING_APPEND_ALIGNMENT);
82 cbAlloc += RT_MIN(cbAlloc, _4M);
83 if (cbAlloc <= cchBoth)
84 cbAlloc = RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT);
85 pThis->reserve(cbAlloc);
86#ifndef RT_EXCEPTIONS_ENABLED
87 AssertReleaseReturn(pThis->capacity() > cchBoth, 0);
88#endif
89 }
90
91 memcpy(&pThis->m_psz[pThis->m_cch], pachChars, cbChars);
92 pThis->m_cch = cchBoth;
93 pThis->m_psz[cchBoth] = '\0';
94 }
95 return cbChars;
96}
97
98RTCString &RTCString::printfV(const char *pszFormat, va_list va)
99{
100 cleanup();
101 RTStrFormatV(printfOutputCallback, this, NULL, NULL, pszFormat, va);
102 return *this;
103}
104
105RTCString &RTCString::append(const RTCString &that)
106{
107 Assert(&that != this);
108 return appendWorker(that.c_str(), that.length());
109}
110
111RTCString &RTCString::append(const char *pszThat)
112{
113 return appendWorker(pszThat, strlen(pszThat));
114}
115
116RTCString &RTCString::append(const RTCString &rThat, size_t offStart, size_t cchMax /*= RTSTR_MAX*/)
117{
118 if (offStart < rThat.length())
119 {
120 size_t cchLeft = rThat.length() - offStart;
121 return appendWorker(rThat.c_str() + offStart, RT_MIN(cchLeft, cchMax));
122 }
123 return *this;
124}
125
126RTCString &RTCString::append(const char *pszThat, size_t cchMax)
127{
128 return appendWorker(pszThat, RTStrNLen(pszThat, cchMax));
129}
130
131RTCString &RTCString::appendWorker(const char *pszSrc, size_t cchSrc)
132{
133 if (cchSrc)
134 {
135 size_t cchThis = length();
136 size_t cchBoth = cchThis + cchSrc;
137
138 if (cchBoth >= m_cbAllocated)
139 {
140 reserve(RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
141 // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
142#ifndef RT_EXCEPTIONS_ENABLED
143 AssertRelease(capacity() > cchBoth);
144#endif
145 }
146
147 memcpy(&m_psz[cchThis], pszSrc, cchSrc);
148 m_psz[cchBoth] = '\0';
149 m_cch = cchBoth;
150 }
151 return *this;
152}
153
154RTCString &RTCString::append(char ch)
155{
156 Assert((unsigned char)ch < 0x80); /* Don't create invalid UTF-8. */
157 if (ch)
158 {
159 // allocate in chunks of 20 in case this gets called several times
160 if (m_cch + 1 >= m_cbAllocated)
161 {
162 reserve(RT_ALIGN_Z(m_cch + 2, IPRT_MINISTRING_APPEND_ALIGNMENT));
163 // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
164#ifndef RT_EXCEPTIONS_ENABLED
165 AssertRelease(capacity() > m_cch + 1);
166#endif
167 }
168
169 m_psz[m_cch] = ch;
170 m_psz[++m_cch] = '\0';
171 }
172 return *this;
173}
174
175RTCString &RTCString::appendCodePoint(RTUNICP uc)
176{
177 /*
178 * Single byte encoding.
179 */
180 if (uc < 0x80)
181 return RTCString::append((char)uc);
182
183 /*
184 * Multibyte encoding.
185 * Assume max encoding length when resizing the string, that's simpler.
186 */
187 AssertReturn(uc <= UINT32_C(0x7fffffff), *this);
188
189 if (m_cch + 6 >= m_cbAllocated)
190 {
191 reserve(RT_ALIGN_Z(m_cch + 6 + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
192 // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
193#ifndef RT_EXCEPTIONS_ENABLED
194 AssertRelease(capacity() > m_cch + 6);
195#endif
196 }
197
198 char *pszNext = RTStrPutCp(&m_psz[m_cch], uc);
199 m_cch = pszNext - m_psz;
200 *pszNext = '\0';
201
202 return *this;
203}
204
205
206RTCString &RTCString::replace(size_t offStart, size_t cchLength, const RTCString &rStrReplacement)
207{
208 return replaceWorker(offStart, cchLength, rStrReplacement.c_str(), rStrReplacement.length());
209}
210
211RTCString &RTCString::replace(size_t offStart, size_t cchLength, const RTCString &rStrReplacement,
212 size_t offReplacement, size_t cchReplacement)
213{
214 Assert(this != &rStrReplacement);
215 if (cchReplacement > 0)
216 {
217 if (offReplacement < rStrReplacement.length())
218 {
219 size_t cchMaxReplacement = rStrReplacement.length() - offReplacement;
220 return replaceWorker(offStart, cchLength, rStrReplacement.c_str() + offReplacement,
221 RT_MIN(cchReplacement, cchMaxReplacement));
222 }
223 /* Our non-standard handling of out_of_range situations. */
224 AssertMsgFailed(("offReplacement=%zu (cchReplacement=%zu) rStrReplacement.length()=%zu\n",
225 offReplacement, cchReplacement, rStrReplacement.length()));
226 }
227 return replaceWorker(offStart, cchLength, "", 0);
228}
229
230RTCString &RTCString::replace(size_t offStart, size_t cchLength, const char *pszReplacement)
231{
232 return replaceWorker(offStart, cchLength, pszReplacement, strlen(pszReplacement));
233}
234
235RTCString &RTCString::replace(size_t offStart, size_t cchLength, const char *pszReplacement, size_t cchReplacement)
236{
237 return replaceWorker(offStart, cchLength, pszReplacement, RTStrNLen(pszReplacement, cchReplacement));
238}
239
240RTCString &RTCString::replaceWorker(size_t offStart, size_t cchLength, const char *pszSrc, size_t cchSrc)
241{
242 /*
243 * Our non-standard handling of out_of_range situations.
244 */
245 size_t const cchOldLength = length();
246 AssertMsgReturn(offStart < cchOldLength, ("offStart=%zu (cchLength=%zu); length()=%zu\n", offStart, cchLength, cchOldLength),
247 *this);
248
249 /*
250 * Correct the length parameter.
251 */
252 size_t cchMaxLength = cchOldLength - offStart;
253 if (cchMaxLength < cchLength)
254 cchLength = cchMaxLength;
255
256 /*
257 * Adjust string allocation if necessary.
258 */
259 size_t cchNew = cchOldLength - cchLength + cchSrc;
260 if (cchNew >= m_cbAllocated)
261 {
262 reserve(RT_ALIGN_Z(cchNew + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
263 // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
264#ifndef RT_EXCEPTIONS_ENABLED
265 AssertRelease(capacity() > cchNew);
266#endif
267 }
268
269 /*
270 * Make the change.
271 */
272 size_t cchAfter = cchOldLength - offStart - cchLength;
273 if (cchAfter > 0)
274 memmove(&m_psz[offStart + cchSrc], &m_psz[offStart + cchLength], cchAfter);
275 memcpy(&m_psz[offStart], pszSrc, cchSrc);
276 m_psz[cchNew] = '\0';
277 m_cch = cchNew;
278
279 return *this;
280}
281
282
283size_t RTCString::find(const char *pszNeedle, size_t offStart /*= 0*/) const
284{
285 if (offStart < length())
286 {
287 const char *pszThis = c_str();
288 if (pszThis)
289 {
290 if (pszNeedle && *pszNeedle != '\0')
291 {
292 const char *pszHit = strstr(pszThis + offStart, pszNeedle);
293 if (pszHit)
294 return pszHit - pszThis;
295 }
296 }
297 }
298
299 return npos;
300}
301
302size_t RTCString::find(const RTCString *pStrNeedle, size_t offStart /*= 0*/) const
303{
304 if (offStart < length())
305 {
306 const char *pszThis = c_str();
307 if (pszThis)
308 {
309 if (pStrNeedle)
310 {
311 const char *pszNeedle = pStrNeedle->c_str();
312 if (pszNeedle && *pszNeedle != '\0')
313 {
314 const char *pszHit = strstr(pszThis + offStart, pszNeedle);
315 if (pszHit)
316 return pszHit - pszThis;
317 }
318 }
319 }
320 }
321
322 return npos;
323}
324
325void RTCString::findReplace(char chFind, char chReplace)
326{
327 Assert((unsigned int)chFind < 128U);
328 Assert((unsigned int)chReplace < 128U);
329
330 for (size_t i = 0; i < length(); ++i)
331 {
332 char *p = &m_psz[i];
333 if (*p == chFind)
334 *p = chReplace;
335 }
336}
337
338size_t RTCString::count(char ch) const
339{
340 Assert((unsigned int)ch < 128U);
341
342 size_t c = 0;
343 const char *psz = m_psz;
344 if (psz)
345 {
346 char chCur;
347 while ((chCur = *psz++) != '\0')
348 if (chCur == ch)
349 c++;
350 }
351 return c;
352}
353
354#if 0 /** @todo implement these when needed. */
355size_t RTCString::count(const char *psz, CaseSensitivity cs = CaseSensitive) const
356{
357}
358
359size_t RTCString::count(const RTCString *pStr, CaseSensitivity cs = CaseSensitive) const
360{
361
362}
363#endif
364
365
366RTCString &RTCString::strip()
367{
368 stripRight();
369 return stripLeft();
370}
371
372
373RTCString &RTCString::stripLeft()
374{
375 char *psz = m_psz;
376 size_t const cch = m_cch;
377 size_t off = 0;
378 while (off < cch && RT_C_IS_SPACE(psz[off]))
379 off++;
380 if (off > 0)
381 {
382 if (off != cch)
383 {
384 memmove(psz, &psz[off], cch - off + 1);
385 m_cch = cch - off;
386 }
387 else
388 setNull();
389 }
390 return *this;
391}
392
393
394RTCString &RTCString::stripRight()
395{
396 char *psz = m_psz;
397 size_t cch = m_cch;
398 while (cch > 0 && RT_C_IS_SPACE(psz[cch - 1]))
399 cch--;
400 if (m_cch != cch)
401 {
402 m_cch = cch;
403 psz[cch] = '\0';
404 }
405 return *this;
406}
407
408
409
410RTCString RTCString::substrCP(size_t pos /*= 0*/, size_t n /*= npos*/) const
411{
412 RTCString ret;
413
414 if (n)
415 {
416 const char *psz;
417
418 if ((psz = c_str()))
419 {
420 RTUNICP cp;
421
422 // walk the UTF-8 characters until where the caller wants to start
423 size_t i = pos;
424 while (*psz && i--)
425 if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
426 return ret; // return empty string on bad encoding
427
428 const char *pFirst = psz;
429
430 if (n == npos)
431 // all the rest:
432 ret = pFirst;
433 else
434 {
435 i = n;
436 while (*psz && i--)
437 if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
438 return ret; // return empty string on bad encoding
439
440 size_t cbCopy = psz - pFirst;
441 if (cbCopy)
442 {
443 ret.reserve(cbCopy + 1); // may throw bad_alloc
444#ifndef RT_EXCEPTIONS_ENABLED
445 AssertRelease(capacity() >= cbCopy + 1);
446#endif
447 memcpy(ret.m_psz, pFirst, cbCopy);
448 ret.m_cch = cbCopy;
449 ret.m_psz[cbCopy] = '\0';
450 }
451 }
452 }
453 }
454
455 return ret;
456}
457
458bool RTCString::endsWith(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
459{
460 size_t l1 = length();
461 if (l1 == 0)
462 return false;
463
464 size_t l2 = that.length();
465 if (l1 < l2)
466 return false;
467 /** @todo r=bird: If l2 is 0, then m_psz can be NULL and we will crash. See
468 * also handling of l2 == in startsWith. */
469
470 size_t l = l1 - l2;
471 if (cs == CaseSensitive)
472 return ::RTStrCmp(&m_psz[l], that.m_psz) == 0;
473 return ::RTStrICmp(&m_psz[l], that.m_psz) == 0;
474}
475
476bool RTCString::startsWith(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
477{
478 size_t l1 = length();
479 size_t l2 = that.length();
480 if (l1 == 0 || l2 == 0) /** @todo r=bird: this differs from endsWith, and I think other IPRT code. If l2 == 0, it matches anything. */
481 return false;
482
483 if (l1 < l2)
484 return false;
485
486 if (cs == CaseSensitive)
487 return ::RTStrNCmp(m_psz, that.m_psz, l2) == 0;
488 return ::RTStrNICmp(m_psz, that.m_psz, l2) == 0;
489}
490
491bool RTCString::startsWithWord(const char *pszWord, CaseSensitivity enmCase /*= CaseSensitive*/) const
492{
493 const char *pszSrc = RTStrStripL(c_str()); /** @todo RTStrStripL doesn't use RTUniCpIsSpace (nbsp) */
494 size_t cchWord = strlen(pszWord);
495 if ( enmCase == CaseSensitive
496 ? RTStrNCmp(pszSrc, pszWord, cchWord) == 0
497 : RTStrNICmp(pszSrc, pszWord, cchWord) == 0)
498 {
499 if ( pszSrc[cchWord] == '\0'
500 || RT_C_IS_SPACE(pszSrc[cchWord])
501 || RT_C_IS_PUNCT(pszSrc[cchWord]) )
502 return true;
503 RTUNICP uc = RTStrGetCp(&pszSrc[cchWord]);
504 if (RTUniCpIsSpace(uc))
505 return true;
506 }
507 return false;
508}
509
510bool RTCString::startsWithWord(const RTCString &rThat, CaseSensitivity enmCase /*= CaseSensitive*/) const
511{
512 return startsWithWord(rThat.c_str(), enmCase);
513}
514
515bool RTCString::contains(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
516{
517 /** @todo r-bird: Not checking for NULL strings like startsWith does (and
518 * endsWith only does half way). */
519 if (cs == CaseSensitive)
520 return ::RTStrStr(m_psz, that.m_psz) != NULL;
521 return ::RTStrIStr(m_psz, that.m_psz) != NULL;
522}
523
524bool RTCString::contains(const char *pszNeedle, CaseSensitivity cs /*= CaseSensitive*/) const
525{
526 /** @todo r-bird: Not checking for NULL strings like startsWith does (and
527 * endsWith only does half way). */
528 if (cs == CaseSensitive)
529 return ::RTStrStr(m_psz, pszNeedle) != NULL;
530 return ::RTStrIStr(m_psz, pszNeedle) != NULL;
531}
532
533int RTCString::toInt(uint64_t &i) const
534{
535 if (!m_psz)
536 return VERR_NO_DIGITS;
537 return RTStrToUInt64Ex(m_psz, NULL, 0, &i);
538}
539
540int RTCString::toInt(uint32_t &i) const
541{
542 if (!m_psz)
543 return VERR_NO_DIGITS;
544 return RTStrToUInt32Ex(m_psz, NULL, 0, &i);
545}
546
547RTCList<RTCString, RTCString *>
548RTCString::split(const RTCString &a_rstrSep, SplitMode mode /* = RemoveEmptyParts */) const
549{
550 RTCList<RTCString> strRet;
551 if (!m_psz)
552 return strRet;
553 if (a_rstrSep.isEmpty())
554 {
555 strRet.append(RTCString(m_psz));
556 return strRet;
557 }
558
559 size_t cch = m_cch;
560 char const *pszTmp = m_psz;
561 while (cch > 0)
562 {
563 char const *pszNext = strstr(pszTmp, a_rstrSep.c_str());
564 if (!pszNext)
565 {
566 strRet.append(RTCString(pszTmp, cch));
567 break;
568 }
569 size_t cchNext = pszNext - pszTmp;
570 if ( cchNext > 0
571 || mode == KeepEmptyParts)
572 strRet.append(RTCString(pszTmp, cchNext));
573 pszTmp += cchNext + a_rstrSep.length();
574 cch -= cchNext + a_rstrSep.length();
575 }
576
577 return strRet;
578}
579
580/* static */
581RTCString
582RTCString::joinEx(const RTCList<RTCString, RTCString *> &a_rList,
583 const RTCString &a_rstrPrefix /* = "" */,
584 const RTCString &a_rstrSep /* = "" */)
585{
586 RTCString strRet;
587 if (a_rList.size() > 1)
588 {
589 /* calc the required size */
590 size_t cbNeeded = a_rstrSep.length() * (a_rList.size() - 1) + 1;
591 cbNeeded += a_rstrPrefix.length() * (a_rList.size() - 1) + 1;
592 for (size_t i = 0; i < a_rList.size(); ++i)
593 cbNeeded += a_rList.at(i).length();
594 strRet.reserve(cbNeeded);
595
596 /* do the appending. */
597 for (size_t i = 0; i < a_rList.size() - 1; ++i)
598 {
599 if (a_rstrPrefix.isNotEmpty())
600 strRet.append(a_rstrPrefix);
601 strRet.append(a_rList.at(i));
602 strRet.append(a_rstrSep);
603 }
604 strRet.append(a_rList.last());
605 }
606 /* special case: one list item. */
607 else if (a_rList.size() > 0)
608 {
609 if (a_rstrPrefix.isNotEmpty())
610 strRet.append(a_rstrPrefix);
611 strRet.append(a_rList.last());
612 }
613
614 return strRet;
615}
616
617/* static */
618RTCString
619RTCString::join(const RTCList<RTCString, RTCString *> &a_rList,
620 const RTCString &a_rstrSep /* = "" */)
621{
622 return RTCString::joinEx(a_rList,
623 "" /* a_rstrPrefix */, a_rstrSep);
624}
625
626const RTCString operator+(const RTCString &a_rStr1, const RTCString &a_rStr2)
627{
628 RTCString strRet(a_rStr1);
629 strRet += a_rStr2;
630 return strRet;
631}
632
633const RTCString operator+(const RTCString &a_rStr1, const char *a_pszStr2)
634{
635 RTCString strRet(a_rStr1);
636 strRet += a_pszStr2;
637 return strRet;
638}
639
640const RTCString operator+(const char *a_psz1, const RTCString &a_rStr2)
641{
642 RTCString strRet(a_psz1);
643 strRet += a_rStr2;
644 return strRet;
645}
646
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette