HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_String.h (SYS Library, C++)
7  *
8  * COMMENTS:
9  * System-independent string manipulation functions.
10  */
11 
12 #ifndef __SYS_String__
13 #define __SYS_String__
14 
15 #include "SYS_API.h"
16 
17 #include "SYS_Inline.h"
18 #include "SYS_Types.h"
19 
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 
24 /// Append to a hash for a char string
25 static inline uint32
26 SYSstring_hashseed(const char *str, exint length = SYS_EXINT_MAX, uint32 hash = 0)
27 {
28  if (str != nullptr && length >= 0 && *str)
29  {
30  // A note on the magic number 37.
31  // We want to scale by SOMETHING so that order is preserved.
32  // That something should be prime and not a power of two to
33  // avoid wrapping issues.
34  // That something should be larger than our range of expected
35  // values to avoid interference between consecutive letters.
36  // 0-9a-z is 36 letters long.
37  //
38  // The real reason is that this is what Perl uses.
39  do
40  {
41  hash = (37 * hash) + (*str);
42  ++str;
43  --length;
44  } while (length >= 0 && *str);
45 
46  // Make sure we never return zero for non-zero hash, since in many
47  // cases we precompute string hashes and zero means "not initialized".
48  // This allows us conforming values across different string
49  // container implementations.
50  if (hash == 0)
51  hash = 1;
52  }
53 
54  return hash;
55 }
56 
57 /// Generate a hash for a char string
58 static inline uint32
59 SYSstring_hash(const char *str, exint len = SYS_EXINT_MAX)
60 {
61  return SYSstring_hashseed(str, len, 0);
62 }
63 
64 
65 /// A constexpr version of the above. In C++11 we can't use iteration, but
66 /// we can do recursion. Go figure.
67 namespace
68 {
69  static inline constexpr uint32
70  SYSstring_hash_inner(const char *str, uint32 h)
71  {
72  return str[0] ? SYSstring_hash_inner(str+1, str[0] + h * 37) : h;
73  }
74 }
75 
76 static inline constexpr uint32
77 SYSstring_hash_literal(const char *str)
78 {
79  return str[0]
80  ? (SYSstring_hash_inner(str, 0) == 0 ? 1 : SYSstring_hash_inner(str, 0))
81  : 0;
82 }
83 
84 
85 /// A standard name for a strtok that doesn't maintain state between calls.
86 /// This version is thus both reentrant and threadsafe.
87 /// SYSstrtok parses a string into a sequence of tokens. On the first call to
88 /// SYSstrtok, the string to be parsed must be specified as the parameter
89 /// 'string'. This parameter *will be modified* (destroying your copy).
90 /// 'delimit' specifies an array of single characters that will be used
91 /// as delimiters.
92 /// 'context' is a char * variable used internally by SYSstrtok to maintain
93 /// context between calls. Subsequent calls must specify the same unchanged
94 /// context variable as the first call.
95 /// To use SYSstrtok, on the first call first pass in your string as the
96 /// parameter 'string'; on subsequent calls, pass it in as nullptr.
97 /// SYSstrtok returns non-empty strings pointing to the first non-delimiter
98 /// character of each token, or nullptr if no further tokens are available.
99 /// Example:
100 /// @code
101 /// char *string = strdup(getString());
102 /// char *strptr = string;
103 /// char *context;
104 /// char *token = SYSstrtok(string, MY_DELIMITERS, &context);
105 /// while (token)
106 /// {
107 /// do_some_stuff();
108 /// SYSstrtok(nullptr, MY_DELIMITERS, &context);
109 /// }
110 /// free(strptr);
111 /// @endcode
112 inline char *
113 SYSstrtok(char *string, const char *delimit, char **context)
114 {
115 #ifdef LINUX
116  return strtok_r(string, delimit, context);
117 #else
118  // MSVC 2003 doesn't have strtok_r. 2005 has strtok_s, which is the same
119  // as strtok_r. Until we upgrade, use this C version of strtok_r.
120  if (string == nullptr)
121  {
122  string = *context;
123  }
124 
125  // Find and skip any leading delimiters.
126  string += strspn(string, delimit);
127 
128  // There are only delimiters (or no text at all), so we've reached the end
129  // of the string.
130  if (*string == '\0')
131  {
132  *context = string;
133  return nullptr;
134  }
135 
136  // String now points at a token.
137  char *token = string;
138 
139  // Find the end of the token.
140  string = strpbrk(token, delimit);
141  if (!string)
142  {
143  // This token is at the end of the string. Set the context to point at
144  // the end of the string so on the next call, we'll return nullptr.
145  *context = strchr(token, '\0');
146  }
147  else
148  {
149  // This is a token somewhere in the string. Set the found delimiter to
150  // zero and initialize the context to the next character.
151  *string = '\0';
152  *context = string + 1;
153  }
154 
155  return token;
156 #endif
157 }
158 
159 /// The semantics for strncpy() leave a little to be desired
160 /// - If the buffer limit is hit, the string isn't guaranteed to be null
161 /// terminated.
162 /// - If the buffer limit isn't hit, the entire remainder of the string is
163 /// filled with nulls (which can be costly with large buffers).
164 /// The following implements the strlcpy() function from OpenBSD. The function
165 /// is very similar to strncpy() but
166 /// The return code is the length of the src string
167 /// The resulting string is always null terminated (unless size == 0)
168 /// The remaining buffer is not touched
169 /// It's possible to check for errors by testing rcode >= size.
170 ///
171 /// The size is the size of the buffer, not the portion of the sub-string to
172 /// copy. If you want to only copy a portion of a string, make sure that the
173 /// @c size passed in is one @b larger than the length of the string since
174 /// SYSstrlcpy() will always ensure the string is null terminated.
175 ///
176 /// It is invalid to pass a size of 0.
177 ///
178 /// Examples: @code
179 /// char buf[8];
180 /// strncpy(buf, "dog", 8) // buf == ['d','o','g',0,0,0,0,0]
181 /// SYSstrlcpy(buf, "dog", 8) // buf == ['d','o','g',0,?,?,?,?]
182 /// strncpy(buf, "dog", 2) // buf == ['d','o',0,0,0,0,0,0]
183 /// SYSstrlcpy(buf, "dog", 2) // buf == ['d',0,?,?,?,?,?,?]
184 /// SYSstrlcpy(buf, "dog", 3) // buf == ['d','o',0,?,?,?,?]
185 /// @endcode
186 inline size_t
187 SYSstrlcpy(char *dest, const char *src, size_t size)
188 {
189  char *end = (char *)::memccpy(dest, src, 0, size);
190  if (end)
191  {
192  return end - dest - 1;
193  }
194  // No null terminator found in the first size bytes
195  if (size)
196  dest[size-1] = 0;
197 
198  // Return rcode >= size to indicate that we would've busted the buffer.
199  return size + 1;
200 }
201 
202 /// The following implements the strlcpy() function from OpenBSD. The
203 /// differences between strlcpy() and strncpy() are:
204 /// - The buffer will not be filled with null
205 /// - The size passed in is the full length of the buffer (not
206 /// remaining length)
207 /// - The dest will always be null terminated (unless it is already larger
208 /// than the size passed in)
209 /// The function returns strln(src) + SYSmin(size, strlen(dest))
210 /// If rcode >= size, truncation occurred
211 inline size_t
212 SYSstrlcat(char *dest, const char *src, size_t size)
213 {
214  // Find the length of the dest buffer. Only check for a null within the
215  // allocated space of the buffer (i.e. we can't use strlen()).
216  size_t dlen;
217  for (dlen = 0; dlen < size; dlen++)
218  if (!dest[dlen])
219  break;
220  if (dlen == size)
221  return size + 1; // Not enough space left
222  // Now, copy the source over
223  return dlen + SYSstrlcpy(dest+dlen, src, size-dlen);
224 }
225 
226 inline int
227 SYSstrcasecmp(const char *a, const char *b)
228 {
229 #if defined(WIN32)
230  return ::stricmp(a, b);
231 #else
232  return ::strcasecmp(a, b);
233 #endif
234 }
235 
236 inline int
237 SYSstrncasecmp(const char *a, const char *b, size_t n)
238 {
239 #if defined(WIN32)
240  return ::strnicmp(a, b, n);
241 #else
242  return ::strncasecmp(a, b, n);
243 #endif
244 }
245 
246 /// Replacement for strcasestr, since no equivalent exists on Win32.
247 inline char *
248 SYSstrcasestr(const char *haystack, const char *needle)
249 {
250 #if defined(WIN32)
251  // Designed for the normal case (small needle, large haystack).
252  // Asymptotic cases will probably perform very poorly. For those, we'll
253  // need: https://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
254  if (!haystack || !needle)
255  return nullptr;
256 
257  // Empty needle gives beginning of string.
258  if (!*needle)
259  return const_cast<char *>(haystack);
260  for(;;)
261  {
262  // Find the start of the pattern in the string.
263  while(*haystack && tolower(*haystack) != tolower(*needle))
264  haystack++;
265 
266  if (!*haystack)
267  return nullptr;
268 
269  // Found the start of the pattern.
270  const char *h = haystack, *n = needle;
271  do
272  {
273  // End of needle? We found our man.
274  if (!*++n)
275  return const_cast<char *>(haystack);
276  // End of haystack? Nothing more to look for.
277  if (!*++h)
278  return nullptr;
279  } while(tolower(*h) == tolower(*n));
280 
281  haystack++;
282  }
283 #else
284  return const_cast<char*>(::strcasestr(const_cast<char*>(haystack),needle));
285 #endif
286 }
287 
288 // Implementation of strndup for Windows.
289 inline char *
290 SYSstrndup(const char *s, size_t n)
291 {
292 #if defined(WIN32)
293  size_t l = ::strlen(s);
294  if (l < n) n = l;
295  char *r = (char *)::malloc(n + 1);
296  ::memcpy(r, s, n);
297  r[n] = '\0';
298  return r;
299 #else
300  return ::strndup(s, n);
301 #endif
302 }
303 
304 // On Windows, is*() methods are badly implemented.
305 // Running testut -i -t SYS_String shows about at least a 1.3x speed up.
306 #ifdef _WIN32
307 SYS_FORCE_INLINE bool
308 SYSisalpha(unsigned char c)
309 {
310  // This test relies on promoting to unsigned integer
311  return (unsigned(c & ~(1<<5)) - 'A') <= ('Z' - 'A');
312 }
313 SYS_FORCE_INLINE bool
314 SYSisdigit(unsigned char c)
315 {
316  // Interestingly, this tends to perform better than one comparison
317  return (c >= '0' && c <= '9');
318 }
319 #endif // _WIN32
320 
321 // Windows decided in their infinite wisdom that negative values
322 // should crash their isfoo() functions, guard by only taking unsigned char
323 // arguments which get casted again to int's.
324 
325 #define SYS_IS_WRAPPER(TEST) \
326 SYS_FORCE_INLINE bool \
327 SYS##TEST(unsigned char c) \
328 { \
329  return TEST(c); \
330 } \
331 /**/
332 
333 SYS_IS_WRAPPER(isalnum)
334 #ifndef _WIN32
335 SYS_IS_WRAPPER(isalpha)
336 #endif
337 // isascii is specifically marked deprecated
338 // SYS_IS_WRAPPER(isascii)
339 // This does have a POSIX standard, but isn't in Windows.
340 // SYS_IS_WRAPPER(isblank)
341 SYS_IS_WRAPPER(iscntrl)
342 #ifndef _WIN32
343 SYS_IS_WRAPPER(isdigit)
344 #endif
345 SYS_IS_WRAPPER(isgraph)
346 SYS_IS_WRAPPER(islower)
347 SYS_IS_WRAPPER(isprint)
348 SYS_IS_WRAPPER(ispunct)
349 // isspace is rather important we get very, very, fast.
350 // SYS_IS_WRAPPER(isspace)
351 SYS_IS_WRAPPER(isupper)
352 SYS_IS_WRAPPER(isxdigit)
353 
354 #undef SYS_IS_WRAPPER
355 
356 
357 #define CREATE_SYSisspace(TYPE) \
358 inline bool \
359 SYSisspace(TYPE c) \
360 { \
361  /* Fastest exit for non-spaces. */ \
362  if (c > ' ') \
363  return false; \
364  /* Either equal to space, or between tab and carriage return */ \
365  return (c == ' ' || (c <= '\xd' && c >= '\x9')); \
366 }
367 
369 CREATE_SYSisspace(unsigned char)
370 CREATE_SYSisspace(signed char)
371 
372 #undef CREATE_SYSisspace
373 
374 
375 #endif
size_t SYSstrlcat(char *dest, const char *src, size_t size)
Definition: SYS_String.h:212
#define SYS_IS_WRAPPER(TEST)
Definition: SYS_String.h:325
GLsizei const GLchar *const * string
Definition: glcorearb.h:813
#define CREATE_SYSisspace(TYPE)
Definition: SYS_String.h:357
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
#define SYS_EXINT_MAX
Definition: SYS_Types.h:172
GLsizeiptr size
Definition: glcorearb.h:663
GLdouble n
Definition: glcorearb.h:2007
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:237
int64 exint
Definition: SYS_Types.h:116
GLuint GLuint end
Definition: glcorearb.h:474
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:113
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:187
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2001
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:248
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:227
char * SYSstrndup(const char *s, size_t n)
Definition: SYS_String.h:290
GLboolean r
Definition: glcorearb.h:1221
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794
GLenum src
Definition: glcorearb.h:1792
unsigned int uint32
Definition: SYS_Types.h:36