HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_String.h (SYS Library, C++)
7  *
8  * COMMENTS:
9  * System-independent string manipulation functions.
10  */
11 
12 #ifndef __SYS_String__
13 #define __SYS_String__
14 
15 #include "SYS_API.h"
16 
17 #include "SYS_Inline.h"
18 #include "SYS_Types.h"
19 
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 
24 /// Append to a hash for a char string
25 static inline uint32
26 SYSstring_hashseed(const char *str, exint length = SYS_EXINT_MAX, uint32 hash = 0)
27 {
28  if (str != nullptr && length >= 0 && *str)
29  {
30  // A note on the magic number 37.
31  // We want to scale by SOMETHING so that order is preserved.
32  // That something should be prime and not a power of two to
33  // avoid wrapping issues.
34  // That something should be larger than our range of expected
35  // values to avoid interference between consecutive letters.
36  // 0-9a-z is 36 letters long.
37  //
38  // The real reason is that this is what Perl uses.
39  do
40  {
41  hash = (37 * hash) + (*str);
42  ++str;
43  --length;
44  } while (length >= 0 && *str != '\0');
45 
46  // Make sure we never return zero for non-zero hash, since in many
47  // cases we precompute string hashes and zero means "not initialized".
48  // This allows us conforming values across different string
49  // container implementations.
50  if (hash == 0)
51  hash = 1;
52  }
53 
54  return hash;
55 }
56 
57 /// Generate a hash for a char string
58 static inline uint32
59 SYSstring_hash(const char *str, exint len = SYS_EXINT_MAX)
60 {
61  return SYSstring_hashseed(str, len, 0);
62 }
63 
64 
65 /// A constexpr version of the above. In C++11 we can't use iteration, but
66 /// we can do recursion. Go figure.
67 namespace
68 {
69  static inline constexpr uint32
70  SYSstring_hash_inner(const char *str, uint32 h)
71  {
72  return str[0] ? SYSstring_hash_inner(str+1, str[0] + h * 37) : h;
73  }
74 }
75 
76 static inline constexpr uint32
77 SYSstring_hash_literal(const char *str)
78 {
79  return str[0]
80  ? (SYSstring_hash_inner(str, 0) == 0 ? 1 : SYSstring_hash_inner(str, 0))
81  : 0;
82 }
83 
84 
85 /// A standard name for a strtok that doesn't maintain state between calls.
86 /// This version is thus both reentrant and threadsafe.
87 /// SYSstrtok parses a string into a sequence of tokens. On the first call to
88 /// SYSstrtok, the string to be parsed must be specified as the parameter
89 /// 'string'. This parameter *will be modified* (destroying your copy).
90 /// 'delimit' specifies an array of single characters that will be used
91 /// as delimiters.
92 /// 'context' is a char * variable used internally by SYSstrtok to maintain
93 /// context between calls. Subsequent calls must specify the same unchanged
94 /// context variable as the first call.
95 /// To use SYSstrtok, on the first call first pass in your string as the
96 /// parameter 'string'; on subsequent calls, pass it in as nullptr.
97 /// SYSstrtok returns non-empty strings pointing to the first non-delimiter
98 /// character of each token, or nullptr if no further tokens are available.
99 /// Example:
100 /// @code
101 /// char *string = strdup(getString());
102 /// char *strptr = string;
103 /// char *context;
104 /// char *token = SYSstrtok(string, MY_DELIMITERS, &context);
105 /// while (token)
106 /// {
107 /// do_some_stuff();
108 /// SYSstrtok(nullptr, MY_DELIMITERS, &context);
109 /// }
110 /// free(strptr);
111 /// @endcode
112 inline char *
113 SYSstrtok(char *string, const char *delimit, char **context)
114 {
115 #ifdef LINUX
116  return strtok_r(string, delimit, context);
117 #else
118  // MSVC 2003 doesn't have strtok_r. 2005 has strtok_s, which is the same
119  // as strtok_r. Until we upgrade, use this C version of strtok_r.
120  if (string == nullptr)
121  {
122  string = *context;
123  }
124 
125  // Find and skip any leading delimiters.
126  string += strspn(string, delimit);
127 
128  // There are only delimiters (or no text at all), so we've reached the end
129  // of the string.
130  if (*string == '\0')
131  {
132  *context = string;
133  return nullptr;
134  }
135 
136  // String now points at a token.
137  char *token = string;
138 
139  // Find the end of the token.
140  string = strpbrk(token, delimit);
141  if (!string)
142  {
143  // This token is at the end of the string. Set the context to point at
144  // the end of the string so on the next call, we'll return nullptr.
145  *context = strchr(token, '\0');
146  }
147  else
148  {
149  // This is a token somewhere in the string. Set the found delimiter to
150  // zero and initialize the context to the next character.
151  *string = '\0';
152  *context = string + 1;
153  }
154 
155  return token;
156 #endif
157 }
158 
159 /// The semantics for strncpy() leave a little to be desired
160 /// - If the buffer limit is hit, the string isn't guaranteed to be null
161 /// terminated.
162 /// - If the buffer limit isn't hit, the entire remainder of the string is
163 /// filled with nulls (which can be costly with large buffers).
164 /// The following implements the strlcpy() function from OpenBSD. The function
165 /// is very similar to strncpy() but
166 /// The return code is the length of the src string
167 /// The resulting string is always null terminated (unless size == 0)
168 /// The remaining buffer is not touched
169 /// It's possible to check for errors by testing rcode >= size.
170 ///
171 /// The size is the size of the buffer, not the portion of the sub-string to
172 /// copy. If you want to only copy a portion of a string, make sure that the
173 /// @c size passed in is one @b larger than the length of the string since
174 /// SYSstrlcpy() will always ensure the string is null terminated.
175 ///
176 /// It is invalid to pass a size of 0.
177 ///
178 /// Examples: @code
179 /// char buf[8];
180 /// strncpy(buf, "dog", 8) // buf == ['d','o','g',0,0,0,0,0]
181 /// SYSstrlcpy(buf, "dog", 8) // buf == ['d','o','g',0,?,?,?,?]
182 /// strncpy(buf, "dog", 2) // buf == ['d','o',0,0,0,0,0,0]
183 /// SYSstrlcpy(buf, "dog", 2) // buf == ['d',0,?,?,?,?,?,?]
184 /// SYSstrlcpy(buf, "dog", 3) // buf == ['d','o',0,?,?,?,?]
185 /// @endcode
186 inline size_t
187 SYSstrlcpy(char *dest, const char *src, size_t size)
188 {
189  char *end = (char *)::memccpy(dest, src, 0, size);
190  if (end)
191  {
192  return end - dest - 1;
193  }
194  // No null terminator found in the first size bytes
195  if (size)
196  dest[size-1] = 0;
197 
198  // Return rcode >= size to indicate that we would've busted the buffer.
199  return size + 1;
200 }
201 
202 /// The following implements the strlcpy() function from OpenBSD. The
203 /// differences between strlcpy() and strncpy() are:
204 /// - The buffer will not be filled with null
205 /// - The size passed in is the full length of the buffer (not
206 /// remaining length)
207 /// - The dest will always be null terminated (unless it is already larger
208 /// than the size passed in)
209 /// The function returns strln(src) + SYSmin(size, strlen(dest))
210 /// If rcode >= size, truncation occurred
211 inline size_t
212 SYSstrlcat(char *dest, const char *src, size_t size)
213 {
214  // Find the length of the dest buffer. Only check for a null within the
215  // allocated space of the buffer (i.e. we can't use strlen()).
216  size_t dlen;
217  for (dlen = 0; dlen < size; dlen++)
218  if (!dest[dlen])
219  break;
220  if (dlen == size)
221  return size + 1; // Not enough space left
222  // Now, copy the source over
223  return dlen + SYSstrlcpy(dest+dlen, src, size-dlen);
224 }
225 
226 inline int
227 SYSstrcasecmp(const char *a, const char *b)
228 {
229  // Properly compare null strings, matching UT_String.
230  if (!a || !b)
231  {
232  if (a) return 1;
233  if (b) return -1;
234  return 0;
235  }
236 #if defined(WIN32)
237  return ::stricmp(a, b);
238 #else
239  return ::strcasecmp(a, b);
240 #endif
241 }
242 
243 inline int
244 SYSstrcmp(const char *a, const char *b)
245 {
246  // Properly compare null strings, matching UT_String.
247  if (!a || !b)
248  {
249  if (a) return 1;
250  if (b) return -1;
251  return 0;
252  }
253  return ::strcmp(a, b);
254 }
255 
256 #define WRAP_NULLTEST_C(FUNCTION, CONST) \
257 inline CONST char * \
258 SYS##FUNCTION(CONST char *s, int c) \
259 { \
260  if (!s) return nullptr; \
261  return ::FUNCTION(s, c); \
262 } \
263 /**/
264 
265 #define WRAP_NULLTEST(FUNCTION) \
266 WRAP_NULLTEST_C(FUNCTION, ) \
267 WRAP_NULLTEST_C(FUNCTION, const) \
268 /**/
269 
270 // The standard does not specify behaviour on null strings, but it
271 // is reasonable to say a search token is never inside a null string,
272 // thereby increasing safety.
273 WRAP_NULLTEST(strchr)
274 WRAP_NULLTEST(strrchr)
275 
276 #undef WRAP_NULLTEST
277 #undef WRAP_NULLTEST_C
278 
279 inline int
280 SYSstrncasecmp(const char *a, const char *b, size_t n)
281 {
282  if (!a || !b)
283  {
284  if (a) return 1;
285  if (b) return -1;
286  return 0;
287  }
288 #if defined(WIN32)
289  return ::strnicmp(a, b, n);
290 #else
291  return ::strncasecmp(a, b, n);
292 #endif
293 }
294 
295 /// Replacement for strcasestr, since no equivalent exists on Win32.
296 inline char *
297 SYSstrcasestr(const char *haystack, const char *needle)
298 {
299 #if defined(WIN32)
300  // Designed for the normal case (small needle, large haystack).
301  // Asymptotic cases will probably perform very poorly. For those, we'll
302  // need: https://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
303  if (!haystack || !needle)
304  return nullptr;
305 
306  // Empty needle gives beginning of string.
307  if (!*needle)
308  return const_cast<char *>(haystack);
309  for(;;)
310  {
311  // Find the start of the pattern in the string.
312  while(*haystack && tolower(*haystack) != tolower(*needle))
313  haystack++;
314 
315  if (!*haystack)
316  return nullptr;
317 
318  // Found the start of the pattern.
319  const char *h = haystack, *n = needle;
320  do
321  {
322  // End of needle? We found our man.
323  if (!*++n)
324  return const_cast<char *>(haystack);
325  // End of haystack? Nothing more to look for.
326  if (!*++h)
327  return nullptr;
328  } while(tolower(*h) == tolower(*n));
329 
330  haystack++;
331  }
332 #else
333  return const_cast<char*>(::strcasestr(const_cast<char*>(haystack),needle));
334 #endif
335 }
336 
337 // Implementation of strndup for Windows.
338 inline char *
339 SYSstrndup(const char *s, size_t n)
340 {
341 #if defined(WIN32)
342  size_t l = ::strlen(s);
343  if (l < n) n = l;
344  char *r = (char *)::malloc(n + 1);
345  ::memcpy(r, s, n);
346  r[n] = '\0';
347  return r;
348 #else
349  return ::strndup(s, n);
350 #endif
351 }
352 
353 // On Windows, is*() methods are badly implemented.
354 // Running testut -i -t SYS_String shows about at least a 1.3x speed up.
355 #ifdef _WIN32
356 SYS_FORCE_INLINE bool
357 SYSisalpha(unsigned char c)
358 {
359  // This test relies on promoting to unsigned integer
360  return (unsigned(c & ~(1<<5)) - 'A') <= ('Z' - 'A');
361 }
362 SYS_FORCE_INLINE bool
363 SYSisdigit(unsigned char c)
364 {
365  // Interestingly, this tends to perform better than one comparison
366  return (c >= '0' && c <= '9');
367 }
368 #endif // _WIN32
369 
370 // Windows decided in their infinite wisdom that negative values
371 // should crash their isfoo() functions, guard by only taking unsigned char
372 // arguments which get casted again to int's.
373 
374 #define SYS_IS_WRAPPER(TEST) \
375 SYS_FORCE_INLINE bool \
376 SYS##TEST(unsigned char c) \
377 { \
378  return TEST(c); \
379 } \
380 /**/
381 
382 SYS_IS_WRAPPER(isalnum)
383 #ifndef _WIN32
384 SYS_IS_WRAPPER(isalpha)
385 #endif
386 // isascii is specifically marked deprecated
387 // SYS_IS_WRAPPER(isascii)
388 // This does have a POSIX standard, but isn't in Windows.
389 // SYS_IS_WRAPPER(isblank)
390 SYS_IS_WRAPPER(iscntrl)
391 #ifndef _WIN32
392 SYS_IS_WRAPPER(isdigit)
393 #endif
394 SYS_IS_WRAPPER(isgraph)
395 SYS_IS_WRAPPER(islower)
396 SYS_IS_WRAPPER(isprint)
397 SYS_IS_WRAPPER(ispunct)
398 // isspace is rather important we get very, very, fast.
399 // SYS_IS_WRAPPER(isspace)
400 SYS_IS_WRAPPER(isupper)
401 SYS_IS_WRAPPER(isxdigit)
402 
403 #undef SYS_IS_WRAPPER
404 
405  static SYS_FORCE_INLINE const void *
406  SYSmemrchr(const void *v, int c, exint n)
407  {
408 #if defined(LINUX)
409  return ::memrchr(v, c, n);
410 #else
411  const unsigned char *beg = (const unsigned char *)v;
412  const unsigned char *full_end = (const unsigned char *)v + n;
413  const unsigned char *end = (const unsigned char *)v + (n/4)*4;
414  for (const unsigned char *s = full_end; s-->end;)
415  {
416  if (*s == c)
417  return s;
418  }
419  for (const unsigned char *s = end-1; s > beg;)
420  {
421  if (*s == c) return s;
422  --s;
423  if (*s == c) return s;
424  --s;
425  if (*s == c) return s;
426  --s;
427  if (*s == c) return s;
428  --s;
429  }
430  return nullptr;
431 #endif
432  }
433 
434 
435 
436 #define CREATE_SYSisspace(TYPE) \
437 inline bool \
438 SYSisspace(TYPE c) \
439 { \
440  /* Fastest exit for non-spaces. */ \
441  if (c > ' ') \
442  return false; \
443  /* Either equal to space, or between tab and carriage return */ \
444  return (c == ' ' || (c <= '\xd' && c >= '\x9')); \
445 }
446 
448 CREATE_SYSisspace(unsigned char)
449 CREATE_SYSisspace(signed char)
450 
451 #undef CREATE_SYSisspace
452 
453 
454 #endif
GLdouble s
Definition: glew.h:1390
size_t SYSstrlcat(char *dest, const char *src, size_t size)
Definition: SYS_String.h:212
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
#define SYS_IS_WRAPPER(TEST)
Definition: SYS_String.h:374
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
#define CREATE_SYSisspace(TYPE)
Definition: SYS_String.h:436
int64 exint
Definition: SYS_Types.h:125
GLdouble l
Definition: glew.h:9122
const GLdouble * v
Definition: glew.h:1391
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:280
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLuint GLuint end
Definition: glew.h:1253
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:113
GLsizei n
Definition: glew.h:4040
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
int SYSstrcmp(const char *a, const char *b)
Definition: SYS_String.h:244
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:187
GLfloat GLfloat GLfloat GLfloat h
Definition: glew.h:8011
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
GLsizei const GLchar *const * string
Definition: glew.h:1844
basic_printf_context_t< buffer >::type context
Definition: printf.h:631
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:297
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:227
GLdouble GLdouble GLdouble r
Definition: glew.h:1406
unsigned int uint32
Definition: SYS_Types.h:40
char * SYSstrndup(const char *s, size_t n)
Definition: SYS_String.h:339
#define WRAP_NULLTEST(FUNCTION)
Definition: SYS_String.h:265
GLenum GLsizei len
Definition: glew.h:7752