VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/ministring.cpp@ 33563

Last change on this file since 33563 was 33563, checked in by vboxsync, 15 years ago

iprt:ministring: Added the java-style equals() and equalsIgnoreCase() as equals() can optimize the comparison by first checking if the length is the same (compare() cannot as it needs to determin the ordering). Added appendCodePoint() for UTF-8. Fixed the incorrect assumption in toUpper and toLower that the string length remained unchanged - the string might shrink as the folded code points may have a shorter encoding. Added testcase that verifies that a code point will not grow during folding and that have a stable encoding length after it has been changed in a folding.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1/* $Id: ministring.cpp 33563 2010-10-28 14:46:26Z vboxsync $ */
2/** @file
3 * IPRT - Mini C++ string class.
4 *
5 * This is a base for both Utf8Str and other places where IPRT may want to use
6 * a lean C++ string class.
7 */
8
9/*
10 * Copyright (C) 2007-2010 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.215389.xyz. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*******************************************************************************
32* Header Files *
33*******************************************************************************/
34#include <iprt/cpp/ministring.h>
35using namespace iprt;
36
37
38/*******************************************************************************
39* Global Variables *
40*******************************************************************************/
41const size_t MiniString::npos = ~(size_t)0;
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46/** Allocation block alignment used when appending bytes to a string. */
47#define IPRT_MINISTRING_APPEND_ALIGNMENT 64
48
49
50MiniString &MiniString::append(const MiniString &that)
51{
52 size_t cchThat = that.length();
53 if (cchThat)
54 {
55 size_t cchThis = length();
56 size_t cchBoth = cchThis + cchThat;
57
58 if (cchBoth >= m_cbAllocated)
59 {
60 reserve(RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
61 // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
62#ifndef RT_EXCEPTIONS_ENABLED
63 AssertRelease(capacity() > cchBoth);
64#endif
65 }
66
67 memcpy(m_psz + cchThis, that.m_psz, cchThat);
68 m_psz[cchBoth] = '\0';
69 m_cch = cchBoth;
70 }
71 return *this;
72}
73
74MiniString &MiniString::append(const char *pszThat)
75{
76 size_t cchThat = strlen(pszThat);
77 if (cchThat)
78 {
79 size_t cchThis = length();
80 size_t cchBoth = cchThis + cchThat;
81
82 if (cchBoth >= m_cbAllocated)
83 {
84 reserve(RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
85 // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
86#ifndef RT_EXCEPTIONS_ENABLED
87 AssertRelease(capacity() > cchBoth);
88#endif
89 }
90
91 memcpy(&m_psz[cchThis], pszThat, cchThat);
92 m_psz[cchBoth] = '\0';
93 m_cch = cchBoth;
94 }
95 return *this;
96}
97
98MiniString& MiniString::append(char ch)
99{
100 Assert((unsigned char)ch < 0x80); /* Don't create invalid UTF-8. */
101 if (ch)
102 {
103 // allocate in chunks of 20 in case this gets called several times
104 if (m_cch + 1 >= m_cbAllocated)
105 {
106 reserve(RT_ALIGN_Z(m_cch + 2, IPRT_MINISTRING_APPEND_ALIGNMENT));
107 // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
108#ifndef RT_EXCEPTIONS_ENABLED
109 AssertRelease(capacity() > m_cch + 1);
110#endif
111 }
112
113 m_psz[m_cch] = ch;
114 m_psz[++m_cch] = '\0';
115 }
116 return *this;
117}
118
119MiniString &MiniString::appendCodePoint(RTUNICP uc)
120{
121 /*
122 * Single byte encoding.
123 */
124 if (uc < 0x80)
125 return MiniString::append((char)uc);
126
127 /*
128 * Multibyte encoding.
129 * Assume max encoding length when resizing the string, that's simpler.
130 */
131 AssertReturn(uc <= UINT32_C(0x7fffffff), *this);
132
133 if (m_cch + 6 >= m_cbAllocated)
134 {
135 reserve(RT_ALIGN_Z(m_cch + 6 + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
136 // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
137#ifndef RT_EXCEPTIONS_ENABLED
138 AssertRelease(capacity() > m_cch + 6);
139#endif
140 }
141
142 char *pszNext = RTStrPutCp(&m_psz[m_cch], uc);
143 m_cch = pszNext - m_psz;
144 *pszNext = '\0';
145
146 return *this;
147}
148
149size_t MiniString::find(const char *pcszFind, size_t pos /*= 0*/)
150 const
151{
152 const char *pszThis, *p;
153
154 if ( ((pszThis = c_str()))
155 && (pos < length())
156 && ((p = strstr(pszThis + pos, pcszFind)))
157 )
158 return p - pszThis;
159
160 return npos;
161}
162
163MiniString MiniString::substr(size_t pos /*= 0*/, size_t n /*= npos*/)
164 const
165{
166 MiniString ret;
167
168 if (n)
169 {
170 const char *psz;
171
172 if ((psz = c_str()))
173 {
174 RTUNICP cp;
175
176 // walk the UTF-8 characters until where the caller wants to start
177 size_t i = pos;
178 while (*psz && i--)
179 if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
180 return ret; // return empty string on bad encoding
181
182 const char *pFirst = psz;
183
184 if (n == npos)
185 // all the rest:
186 ret = pFirst;
187 else
188 {
189 i = n;
190 while (*psz && i--)
191 if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
192 return ret; // return empty string on bad encoding
193
194 size_t cbCopy = psz - pFirst;
195 ret.reserve(cbCopy + 1); // may throw bad_alloc
196#ifndef RT_EXCEPTIONS_ENABLED
197 AssertRelease(capacity() >= cbCopy + 1);
198#endif
199 memcpy(ret.m_psz, pFirst, cbCopy);
200 ret.m_cch = cbCopy;
201 ret.m_psz[cbCopy] = '\0';
202 }
203 }
204 }
205
206 return ret;
207}
208
209bool MiniString::endsWith(const MiniString &that, CaseSensitivity cs /*= CaseSensitive*/) const
210{
211 size_t l1 = length();
212 if (l1 == 0)
213 return false;
214
215 size_t l2 = that.length();
216 if (l1 < l2)
217 return false;
218 /** @todo r=bird: If l2 is 0, then m_psz can be NULL and we will crash. See
219 * also handling of l2 == in startsWith. */
220
221 size_t l = l1 - l2;
222 if (cs == CaseSensitive)
223 return ::RTStrCmp(&m_psz[l], that.m_psz) == 0;
224 else
225 return ::RTStrICmp(&m_psz[l], that.m_psz) == 0;
226}
227
228bool MiniString::startsWith(const MiniString &that, CaseSensitivity cs /*= CaseSensitive*/) const
229{
230 size_t l1 = length();
231 size_t l2 = that.length();
232 if (l1 == 0 || l2 == 0) /** @todo r=bird: this differs from endsWith, and I think other IPRT code. If l2 == 0, it matches anything. */
233 return false;
234
235 if (l1 < l2)
236 return false;
237
238 if (cs == CaseSensitive)
239 return ::RTStrNCmp(m_psz, that.m_psz, l2) == 0;
240 else
241 return ::RTStrNICmp(m_psz, that.m_psz, l2) == 0;
242}
243
244bool MiniString::contains(const MiniString &that, CaseSensitivity cs /*= CaseSensitive*/) const
245{
246 /** @todo r-bird: Not checking for NULL strings like startsWith does (and
247 * endsWith only does half way). */
248 if (cs == CaseSensitive)
249 return ::RTStrStr(m_psz, that.m_psz) != NULL;
250 else
251 return ::RTStrIStr(m_psz, that.m_psz) != NULL;
252}
253
254int MiniString::toInt(uint64_t &i) const
255{
256 if (!m_psz)
257 return VERR_NO_DIGITS;
258 return RTStrToUInt64Ex(m_psz, NULL, 0, &i);
259}
260
261int MiniString::toInt(uint32_t &i) const
262{
263 if (!m_psz)
264 return VERR_NO_DIGITS;
265 return RTStrToUInt32Ex(m_psz, NULL, 0, &i);
266}
267
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette