HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_String.h (SYS Library, C++)
7  *
8  * COMMENTS:
9  * System-independent string manipulation functions.
10  */
11 
12 #ifndef __SYS_String__
13 #define __SYS_String__
14 
15 #include "SYS_API.h"
16 
17 #include "SYS_Inline.h"
18 #include "SYS_Types.h"
19 
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 
24 static inline constexpr uint32
25 SYSstring_hashseed(
26  const char *str,
28  uint32 hash = 0,
29  bool allow_nulls = false)
30 {
31  if (!str || length <= 0 || (!allow_nulls && *str == '\0'))
32  return hash;
33 
34  // A note on the magic number 37.
35  // We want to scale by SOMETHING so that order is preserved.
36  // That something should be prime and not a power of two to
37  // avoid wrapping issues.
38  // That something should be larger than our range of expected
39  // values to avoid interference between consecutive letters.
40  // 0-9a-z is 36 letters long.
41  //
42  // The real reason is that this is what Perl uses.
43  if (!allow_nulls)
44  {
45  for (; length > 0 && *str != '\0'; length--, str++)
46  {
47  hash = (37 * hash) + (*str);
48  }
49  }
50  else
51  {
52  for (; length > 0; length--, str++)
53  {
54  hash = (37 * hash) + (*str);
55  }
56  }
57 
58  // Make sure we never return zero for non-zero hash, since in many
59  // cases we precompute string hashes and zero means "not initialized".
60  // This allows us conforming values across different string
61  // container implementations.
62  if (hash == 0)
63  hash = 1;
64 
65  return hash;
66 }
67 
68 /// Generate a hash for a char string
69 static inline constexpr uint32
70 SYSstring_hash(
71  const char *str,
72  exint len = SYS_EXINT_MAX,
73  bool allow_nulls = false)
74 {
75  return SYSstring_hashseed(str, len, /*seed*/ 0, allow_nulls);
76 }
77 
78 /// A standard name for a strtok that doesn't maintain state between calls.
79 /// This version is thus both reentrant and threadsafe.
80 /// SYSstrtok parses a string into a sequence of tokens. On the first call to
81 /// SYSstrtok, the string to be parsed must be specified as the parameter
82 /// 'string'. This parameter *will be modified* (destroying your copy).
83 /// 'delimit' specifies an array of single characters that will be used
84 /// as delimiters.
85 /// 'context' is a char * variable used internally by SYSstrtok to maintain
86 /// context between calls. Subsequent calls must specify the same unchanged
87 /// context variable as the first call.
88 /// To use SYSstrtok, on the first call first pass in your string as the
89 /// parameter 'string'; on subsequent calls, pass it in as nullptr.
90 /// SYSstrtok returns non-empty strings pointing to the first non-delimiter
91 /// character of each token, or nullptr if no further tokens are available.
92 /// Example:
93 /// @code
94 /// char *string = strdup(getString());
95 /// char *strptr = string;
96 /// char *context;
97 /// char *token = SYSstrtok(string, MY_DELIMITERS, &context);
98 /// while (token)
99 /// {
100 /// do_some_stuff();
101 /// SYSstrtok(nullptr, MY_DELIMITERS, &context);
102 /// }
103 /// free(strptr);
104 /// @endcode
105 inline char *
106 SYSstrtok(char *string, const char *delimit, char **context)
107 {
108 #ifdef LINUX
109  return strtok_r(string, delimit, context);
110 #else
111  // MSVC 2003 doesn't have strtok_r. 2005 has strtok_s, which is the same
112  // as strtok_r. Until we upgrade, use this C version of strtok_r.
113  if (string == nullptr)
114  {
115  string = *context;
116  }
117 
118  // Find and skip any leading delimiters.
119  string += strspn(string, delimit);
120 
121  // There are only delimiters (or no text at all), so we've reached the end
122  // of the string.
123  if (*string == '\0')
124  {
125  *context = string;
126  return nullptr;
127  }
128 
129  // String now points at a token.
130  char *token = string;
131 
132  // Find the end of the token.
133  string = strpbrk(token, delimit);
134  if (!string)
135  {
136  // This token is at the end of the string. Set the context to point at
137  // the end of the string so on the next call, we'll return nullptr.
138  *context = strchr(token, '\0');
139  }
140  else
141  {
142  // This is a token somewhere in the string. Set the found delimiter to
143  // zero and initialize the context to the next character.
144  *string = '\0';
145  *context = string + 1;
146  }
147 
148  return token;
149 #endif
150 }
151 
152 /// The semantics for strncpy() leave a little to be desired
153 /// - If the buffer limit is hit, the string isn't guaranteed to be null
154 /// terminated.
155 /// - If the buffer limit isn't hit, the entire remainder of the string is
156 /// filled with nulls (which can be costly with large buffers).
157 /// The following implements the strlcpy() function from OpenBSD. The function
158 /// is very similar to strncpy() but
159 /// The return code is the length of the src string
160 /// The resulting string is always null terminated (unless size == 0)
161 /// The remaining buffer is not touched
162 /// It's possible to check for errors by testing rcode >= size.
163 ///
164 /// The size is the size of the buffer, not the portion of the sub-string to
165 /// copy. If you want to only copy a portion of a string, make sure that the
166 /// @c size passed in is one @b larger than the length of the string since
167 /// SYSstrlcpy() will always ensure the string is null terminated.
168 ///
169 /// It is invalid to pass a size of 0.
170 ///
171 /// Examples: @code
172 /// char buf[8];
173 /// strncpy(buf, "dog", 8) // buf == ['d','o','g',0,0,0,0,0]
174 /// SYSstrlcpy(buf, "dog", 8) // buf == ['d','o','g',0,?,?,?,?]
175 /// strncpy(buf, "dog", 2) // buf == ['d','o',0,0,0,0,0,0]
176 /// SYSstrlcpy(buf, "dog", 2) // buf == ['d',0,?,?,?,?,?,?]
177 /// SYSstrlcpy(buf, "dog", 3) // buf == ['d','o',0,?,?,?,?]
178 /// @endcode
179 inline size_t
180 SYSstrlcpy(char *dest, const char *src, size_t size)
181 {
182  char *end = (char *)::memccpy(dest, src, 0, size);
183  if (end)
184  {
185  return end - dest - 1;
186  }
187  // No null terminator found in the first size bytes
188  if (size)
189  dest[size-1] = 0;
190 
191  // Return rcode >= size to indicate that we would've busted the buffer.
192  return size + 1;
193 }
194 
195 /// The following implements the strlcpy() function from OpenBSD. The
196 /// differences between strlcpy() and strncpy() are:
197 /// - The buffer will not be filled with null
198 /// - The size passed in is the full length of the buffer (not
199 /// remaining length)
200 /// - The dest will always be null terminated (unless it is already larger
201 /// than the size passed in)
202 /// The function returns strln(src) + SYSmin(size, strlen(dest))
203 /// If rcode >= size, truncation occurred
204 inline size_t
205 SYSstrlcat(char *dest, const char *src, size_t size)
206 {
207  // Find the length of the dest buffer. Only check for a null within the
208  // allocated space of the buffer (i.e. we can't use strlen()).
209  size_t dlen;
210  for (dlen = 0; dlen < size; dlen++)
211  if (!dest[dlen])
212  break;
213  if (dlen == size)
214  return size + 1; // Not enough space left
215  // Now, copy the source over
216  return dlen + SYSstrlcpy(dest+dlen, src, size-dlen);
217 }
218 
219 inline int
220 SYSstrcasecmp(const char *a, const char *b)
221 {
222  // Properly compare null strings, matching UT_String.
223  if (!a || !b)
224  {
225  if (a) return 1;
226  if (b) return -1;
227  return 0;
228  }
229 #if defined(WIN32)
230  return ::stricmp(a, b);
231 #else
232  return ::strcasecmp(a, b);
233 #endif
234 }
235 
236 inline int
237 SYSstrcmp(const char *a, const char *b)
238 {
239  // Properly compare null strings, matching UT_String.
240  if (!a || !b)
241  {
242  if (a) return 1;
243  if (b) return -1;
244  return 0;
245  }
246  return ::strcmp(a, b);
247 }
248 
249 #define WRAP_NULLTEST_C(FUNCTION, CONST) \
250 inline CONST char * \
251 SYS##FUNCTION(CONST char *s, int c) \
252 { \
253  if (!s) return nullptr; \
254  return ::FUNCTION(s, c); \
255 } \
256 /**/
257 
258 #define WRAP_NULLTEST(FUNCTION) \
259 WRAP_NULLTEST_C(FUNCTION, ) \
260 WRAP_NULLTEST_C(FUNCTION, const) \
261 /**/
262 
263 // The standard does not specify behaviour on null strings, but it
264 // is reasonable to say a search token is never inside a null string,
265 // thereby increasing safety.
266 WRAP_NULLTEST(strchr)
267 WRAP_NULLTEST(strrchr)
268 
269 #undef WRAP_NULLTEST
270 #undef WRAP_NULLTEST_C
271 
272 inline int
273 SYSstrncasecmp(const char *a, const char *b, size_t n)
274 {
275  if (!a || !b)
276  {
277  if (a) return 1;
278  if (b) return -1;
279  return 0;
280  }
281 #if defined(WIN32)
282  return ::strnicmp(a, b, n);
283 #else
284  return ::strncasecmp(a, b, n);
285 #endif
286 }
287 
288 /// Replacement for strcasestr, since no equivalent exists on Win32.
289 inline char *
290 SYSstrcasestr(const char *haystack, const char *needle)
291 {
292 #if defined(WIN32)
293  // Designed for the normal case (small needle, large haystack).
294  // Asymptotic cases will probably perform very poorly. For those, we'll
295  // need: https://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
296  if (!haystack || !needle)
297  return nullptr;
298 
299  // Empty needle gives beginning of string.
300  if (!*needle)
301  return const_cast<char *>(haystack);
302  for(;;)
303  {
304  // Find the start of the pattern in the string.
305  while(*haystack && tolower(*haystack) != tolower(*needle))
306  haystack++;
307 
308  if (!*haystack)
309  return nullptr;
310 
311  // Found the start of the pattern.
312  const char *h = haystack, *n = needle;
313  do
314  {
315  // End of needle? We found our man.
316  if (!*++n)
317  return const_cast<char *>(haystack);
318  // End of haystack? Nothing more to look for.
319  if (!*++h)
320  return nullptr;
321  } while(tolower(*h) == tolower(*n));
322 
323  haystack++;
324  }
325 #else
326  return const_cast<char*>(::strcasestr(const_cast<char*>(haystack),needle));
327 #endif
328 }
329 
330 // Implementation of strndup for Windows.
331 inline char *
332 SYSstrndup(const char *s, size_t n)
333 {
334 #if defined(WIN32)
335  size_t l = ::strlen(s);
336  if (l < n) n = l;
337  char *r = (char *)::malloc(n + 1);
338  ::memcpy(r, s, n);
339  r[n] = '\0';
340  return r;
341 #else
342  return ::strndup(s, n);
343 #endif
344 }
345 
346 // On Windows, is*() methods are badly implemented.
347 // Running testut -i -t SYS_String shows about at least a 1.3x speed up.
348 #ifdef _WIN32
349 SYS_FORCE_INLINE bool
350 SYSisalpha(unsigned char c)
351 {
352  // This test relies on promoting to unsigned integer
353  return (unsigned(c & ~(1<<5)) - 'A') <= ('Z' - 'A');
354 }
355 SYS_FORCE_INLINE bool
356 SYSisdigit(unsigned char c)
357 {
358  // Interestingly, this tends to perform better than one comparison
359  return (c >= '0' && c <= '9');
360 }
361 #endif // _WIN32
362 
363 // From time to time, we run into problems with the locale changing
364 // unexpectedly on us leading to things like SYSisprint(227) returning true
365 // when we're in the en_CA.UTF-8 LC_CTYPE locale on macOS. Since everything
366 // assumes we're in the C locale, hardcode the range explicitly.
367 SYS_FORCE_INLINE bool
368 SYSisprint(unsigned char c)
369 {
370  return ( c >= static_cast<unsigned char>(32)
371  && c < static_cast<unsigned char>(127));
372 }
373 
374 // Windows decided in their infinite wisdom that negative values
375 // should crash their isfoo() functions, guard by only taking unsigned char
376 // arguments which get casted again to int's.
377 
378 #define SYS_IS_WRAPPER(TEST) \
379 SYS_FORCE_INLINE bool \
380 SYS##TEST(unsigned char c) \
381 { \
382  return TEST(c); \
383 } \
384 /**/
385 
386 SYS_IS_WRAPPER(isalnum)
387 #ifndef _WIN32
389 #endif
390 // isascii is specifically marked deprecated
391 // SYS_IS_WRAPPER(isascii)
392 // This does have a POSIX standard, but isn't in Windows.
393 // SYS_IS_WRAPPER(isblank)
394 SYS_IS_WRAPPER(iscntrl)
395 #ifndef _WIN32
396 SYS_IS_WRAPPER(isdigit)
397 #endif
398 SYS_IS_WRAPPER(isgraph)
399 SYS_IS_WRAPPER(islower)
400 //SYS_IS_WRAPPER(isprint) // see above
401 SYS_IS_WRAPPER(ispunct)
402 // isspace is rather important we get very, very, fast.
403 // SYS_IS_WRAPPER(isspace)
404 SYS_IS_WRAPPER(isupper)
405 SYS_IS_WRAPPER(isxdigit)
406 
407 #undef SYS_IS_WRAPPER
408 
409  static SYS_FORCE_INLINE const void *
410  SYSmemrchr(const void *v, int c, exint n)
411  {
412 #if defined(LINUX)
413  return ::memrchr(v, c, n);
414 #else
415  const unsigned char *beg = (const unsigned char *)v;
416  const unsigned char *full_end = (const unsigned char *)v + n;
417  const unsigned char *end = (const unsigned char *)v + (n/4)*4;
418  for (const unsigned char *s = full_end; s-->end;)
419  {
420  if (*s == c)
421  return s;
422  }
423  for (const unsigned char *s = end-1; s > beg;)
424  {
425  if (*s == c) return s;
426  --s;
427  if (*s == c) return s;
428  --s;
429  if (*s == c) return s;
430  --s;
431  if (*s == c) return s;
432  --s;
433  }
434  return nullptr;
435 #endif
436  }
437 
438 static SYS_FORCE_INLINE constexpr int
439 SYSmemcmp(const void *lhs, const void *rhs, size_t count)
440 {
441  return __builtin_memcmp(lhs, rhs, count);
442 }
443 
444 #define CREATE_SYSisspace(TYPE) \
445 inline bool \
446 SYSisspace(TYPE c) \
447 { \
448  /* Fastest exit for non-spaces. */ \
449  if (c > ' ') \
450  return false; \
451  /* Either equal to space, or between tab and carriage return */ \
452  return (c == ' ' || (c <= '\xd' && c >= '\x9')); \
453 }
454 
456 CREATE_SYSisspace(unsigned char)
457 CREATE_SYSisspace(signed char)
458 
459 #undef CREATE_SYSisspace
460 
461 
462 #endif
size_t SYSstrlcat(char *dest, const char *src, size_t size)
Definition: SYS_String.h:205
const GLdouble * v
Definition: glcorearb.h:837
#define SYS_IS_WRAPPER(TEST)
Definition: SYS_String.h:378
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
#define CREATE_SYSisspace(TYPE)
Definition: SYS_String.h:444
int64 exint
Definition: SYS_Types.h:125
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
GLdouble s
Definition: glad.h:3009
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
SYS_FORCE_INLINE bool SYSisprint(unsigned char c)
Definition: SYS_String.h:368
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
GLdouble n
Definition: glcorearb.h:2008
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:273
GLuint GLuint end
Definition: glcorearb.h:475
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:106
int SYSstrcmp(const char *a, const char *b)
Definition: SYS_String.h:237
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:180
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
GLsizeiptr size
Definition: glcorearb.h:664
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2002
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:290
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:220
unsigned int uint32
Definition: SYS_Types.h:40
char * SYSstrndup(const char *s, size_t n)
Definition: SYS_String.h:332
GLboolean r
Definition: glcorearb.h:1222
bool isalpha(const std::string &str)
Verify that str consists of letters only.
Definition: CLI11.h:340
GLint GLsizei count
Definition: glcorearb.h:405
#define WRAP_NULLTEST(FUNCTION)
Definition: SYS_String.h:258
GLenum src
Definition: glcorearb.h:1793