HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SYS_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_String.h (SYS Library, C++)
7  *
8  * COMMENTS:
9  * System-independent string manipulation functions.
10  */
11 
12 #ifndef __SYS_String__
13 #define __SYS_String__
14 
15 #include "SYS_API.h"
16 
17 #include "SYS_Types.h"
18 #include "SYS_Inline.h"
19 
20 #include <ctype.h>
21 #include <string.h>
22 #include <stddef.h>
23 
24 /// Append to a hash for a char string
25 static inline uint32
26 SYSstring_hashseed(const char *str, exint length = SYS_EXINT_MAX, uint32 hash = 0)
27 {
28  if (str != NULL && length >= 0 && *str)
29  {
30  // A note on the magic number 37.
31  // We want to scale by SOMETHING so that order is preserved.
32  // That something should be prime and not a power of two to
33  // avoid wrapping issues.
34  // That something should be larger than our range of expected
35  // values to avoid interference between consecutive letters.
36  // 0-9a-z is 36 letters long.
37  //
38  // The real reason is that this is what Perl uses.
39  do
40  {
41  hash = (37 * hash) + (*str);
42  ++str;
43  --length;
44  } while (length >= 0 && *str);
45  }
46 
47  // Make sure we never return zero for hash, since in many cases we
48  // precompute string hashes and zero means "not initialized". This allows
49  // us conforming values across different string container implementations.
50  if (hash == 0)
51  hash = 1;
52 
53  return hash;
54 }
55 
56 /// Generate a hash for a char string
57 static inline uint32
58 SYSstring_hash(const char *str, exint len = SYS_EXINT_MAX)
59 {
60  return SYSstring_hashseed(str, len, 0);
61 }
62 
63 
64 /// A constexpr version of the above. In C++11 we can't use iteration, but
65 /// we can do recursion. Go figure.
66 namespace
67 {
68  static inline constexpr uint32
69  SYSstring_hash_inner(const char *str, uint32 h)
70  {
71  return str[0] ? SYSstring_hash_inner(str+1, str[0] + h * 37) : h;
72  }
73 }
74 
75 static inline constexpr uint32
76 SYSstring_hash_literal(const char *str)
77 {
78  return SYSstring_hash_inner(str, 0) == 0 ? 1 : SYSstring_hash_inner(str, 0);
79 }
80 
81 
82 /// A standard name for a strtok that doesn't maintain state between calls.
83 /// This version is thus both reentrant and threadsafe.
84 /// SYSstrtok parses a string into a sequence of tokens. On the first call to
85 /// SYSstrtok, the string to be parsed must be specified as the parameter
86 /// 'string'. This parameter *will be modified* (destroying your copy).
87 /// 'delimit' specifies an array of single characters that will be used
88 /// as delimiters.
89 /// 'context' is a char * variable used internally by SYSstrtok to maintain
90 /// context between calls. Subsequent calls must specify the same unchanged
91 /// context variable as the first call.
92 /// To use SYSstrtok, on the first call first pass in your string as the
93 /// parameter 'string'; on subsequent calls, pass it in as NULL.
94 /// SYSstrtok returns non-empty strings pointing to the first non-delimiter
95 /// character of each token, or NULL if no further tokens are available.
96 /// Example:
97 /// @code
98 /// char *string = strdup(getString());
99 /// char *strptr = string;
100 /// char *context;
101 /// char *token = SYSstrtok(string, MY_DELIMITERS, &context);
102 /// while (token)
103 /// {
104 /// do_some_stuff();
105 /// SYSstrtok(NULL, MY_DELIMITERS, &context);
106 /// }
107 /// free(strptr);
108 /// @endcode
109 inline char *
110 SYSstrtok(char *string, const char *delimit, char **context)
111 {
112 #ifdef LINUX
113  return strtok_r(string, delimit, context);
114 #else
115  // MSVC 2003 doesn't have strtok_r. 2005 has strtok_s, which is the same
116  // as strtok_r. Until we upgrade, use this C version of strtok_r.
117  if (string == NULL)
118  {
119  string = *context;
120  }
121 
122  // Find and skip any leading delimiters.
123  string += strspn(string, delimit);
124 
125  // There are only delimiters (or no text at all), so we've reached the end
126  // of the string.
127  if (*string == '\0')
128  {
129  *context = string;
130  return NULL;
131  }
132 
133  // String now points at a token.
134  char *token = string;
135 
136  // Find the end of the token.
137  string = strpbrk(token, delimit);
138  if (!string)
139  {
140  // This token is at the end of the string. Set the context to point at
141  // the end of the string so on the next call, we'll return NULL.
142  *context = strchr(token, '\0');
143  }
144  else
145  {
146  // This is a token somewhere in the string. Set the found delimiter to
147  // zero and initialize the context to the next character.
148  *string = '\0';
149  *context = string + 1;
150  }
151 
152  return token;
153 #endif
154 }
155 
156 /// The semantics for strncpy() leave a little to be desired
157 /// - If the buffer limit is hit, the string isn't guaranteed to be null
158 /// terminated.
159 /// - If the buffer limit isn't hit, the entire remainder of the string is
160 /// filled with nulls (which can be costly with large buffers).
161 /// The following implements the strlcpy() function from OpenBSD. The function
162 /// is very similar to strncpy() but
163 /// The return code is the length of the src string
164 /// The resulting string is always null terminated (unless size == 0)
165 /// The remaining buffer is not touched
166 /// It's possible to check for errors by testing rcode >= size.
167 ///
168 /// The size is the size of the buffer, not the portion of the sub-string to
169 /// copy. If you want to only copy a portion of a string, make sure that the
170 /// @c size passed in is one @b larger than the length of the string since
171 /// SYSstrlcpy() will always ensure the string is null terminated.
172 ///
173 /// It is invalid to pass a size of 0.
174 ///
175 /// Examples: @code
176 /// char buf[8];
177 /// strncpy(buf, "dog", 8) // buf == ['d','o','g',0,0,0,0,0]
178 /// SYSstrlcpy(buf, "dog", 8) // buf == ['d','o','g',0,?,?,?,?]
179 /// strncpy(buf, "dog", 2) // buf == ['d','o',0,0,0,0,0,0]
180 /// SYSstrlcpy(buf, "dog", 2) // buf == ['d',0,?,?,?,?,?,?]
181 /// SYSstrlcpy(buf, "dog", 3) // buf == ['d','o',0,?,?,?,?]
182 /// @endcode
183 inline size_t
184 SYSstrlcpy(char *dest, const char *src, size_t size)
185 {
186  char *end = (char *)::memccpy(dest, src, 0, size);
187  if (end)
188  {
189  return end - dest - 1;
190  }
191  // No null terminator found in the first size bytes
192  if (size)
193  dest[size-1] = 0;
194 
195  // Return rcode >= size to indicate that we would've busted the buffer.
196  return size + 1;
197 }
198 
199 /// The following implements the strlcpy() function from OpenBSD. The
200 /// differences between strlcpy() and strncpy() are:
201 /// - The buffer will not be filled with null
202 /// - The size passed in is the full length of the buffer (not
203 /// remaining length)
204 /// - The dest will always be null terminated (unless it is already larger
205 /// than the size passed in)
206 /// The function returns strln(src) + SYSmin(size, strlen(dest))
207 /// If rcode >= size, truncation occurred
208 inline size_t
209 SYSstrlcat(char *dest, const char *src, size_t size)
210 {
211  // Find the length of the dest buffer. Only check for a null within the
212  // allocated space of the buffer (i.e. we can't use strlen()).
213  size_t dlen;
214  for (dlen = 0; dlen < size; dlen++)
215  if (!dest[dlen])
216  break;
217  if (dlen == size)
218  return size + 1; // Not enough space left
219  // Now, copy the source over
220  return dlen + SYSstrlcpy(dest+dlen, src, size-dlen);
221 }
222 
223 inline int
224 SYSstrcasecmp(const char *a, const char *b)
225 {
226 #if defined(WIN32)
227  return ::stricmp(a, b);
228 #else
229  return ::strcasecmp(a, b);
230 #endif
231 }
232 
233 inline int
234 SYSstrncasecmp(const char *a, const char *b, size_t n)
235 {
236 #if defined(WIN32)
237  return ::strnicmp(a, b, n);
238 #else
239  return ::strncasecmp(a, b, n);
240 #endif
241 }
242 
243 /// Replacement for strcasestr, since no equivalent exists on Win32.
244 inline char *
245 SYSstrcasestr(const char *haystack, const char *needle)
246 {
247 #if defined(WIN32)
248  // Designed for the normal case (small needle, large haystack).
249  // Asymptotic cases will probably perform very poorly. For those, we'll
250  // need: https://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
251  if (!haystack || !needle)
252  return NULL;
253 
254  // Empty needle gives beginning of string.
255  if (!*needle)
256  return const_cast<char *>(haystack);
257  for(;;)
258  {
259  // Find the start of the pattern in the string.
260  while(*haystack && tolower(*haystack) != tolower(*needle))
261  haystack++;
262 
263  if (!*haystack)
264  return NULL;
265 
266  // Found the start of the pattern.
267  const char *h = haystack, *n = needle;
268  do
269  {
270  // End of needle? We found our man.
271  if (!*++n)
272  return const_cast<char *>(haystack);
273  // End of haystack? Nothing more to look for.
274  if (!*++h)
275  return NULL;
276  } while(tolower(*h) == tolower(*n));
277 
278  haystack++;
279  }
280 #else
281  return const_cast<char*>(::strcasestr(const_cast<char*>(haystack),needle));
282 #endif
283 }
284 
285 // Implementation of strndup for Windows.
286 inline char *
287 SYSstrndup(const char *s, size_t n)
288 {
289 #if defined(WIN32)
290  size_t l = ::strlen(s);
291  if (l < n) n = l;
292  char *r = (char *)::malloc(n + 1);
293  ::memcpy(r, s, n);
294  r[n] = '\0';
295  return r;
296 #else
297  return ::strndup(s, n);
298 #endif
299 }
300 
301 
302 // Windows decided in their infinite wisdom that negative values
303 // should crash their isfoo() functions. So we have to guard
304 // against unexpected casting.
305 
306 #define CREATE_SYSis(TEST) \
307 inline bool \
308 SYSis##TEST(char c) \
309 { \
310  return is##TEST((unsigned char)c); \
311 } \
312  \
313 inline bool \
314 SYSis##TEST(unsigned char c) \
315 { \
316  return is##TEST(c); \
317 } \
318  \
319 inline bool \
320 SYSis##TEST(int c) \
321 { \
322  if (c < 0 || c > 127) \
323  return false; \
324  else \
325  return is##TEST(c); \
326 }
327 
328 CREATE_SYSis(alnum)
330 // isascii is specifically marked deprecated
331 // CREATE_SYSis(ascii)
332 // This does have a POSIX standard, but isn't in Windows.
333 // CREATE_SYSis(blank)
334 CREATE_SYSis(cntrl)
335 CREATE_SYSis(digit)
336 CREATE_SYSis(graph)
337 CREATE_SYSis(lower)
338 CREATE_SYSis(print)
339 CREATE_SYSis(punct)
340 // isspace is rather important we get very, very, fast.
341 // CREATE_SYSis(space)
342 CREATE_SYSis(upper)
343 CREATE_SYSis(xdigit)
344 
345 #undef CREATE_SYSis
346 
347 
348 #define CREATE_SYSisspace(TYPE) \
349 inline bool \
350 SYSisspace(TYPE c) \
351 { \
352  /* Fastest exit for non-spaces. */ \
353  if (c > ' ') \
354  return false; \
355  /* Either equal to space, or between tab and carriage return */ \
356  return (c == ' ' || (c <= '\xd' && c >= '\x9')); \
357 }
358 
360 CREATE_SYSisspace(unsigned char)
361 CREATE_SYSisspace(signed char)
362 
363 #endif
size_t SYSstrlcat(char *dest, const char *src, size_t size)
Definition: SYS_String.h:209
GLsizei const GLchar *const * string
Definition: glcorearb.h:813
#define CREATE_SYSisspace(TYPE)
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
#define SYS_EXINT_MAX
Definition: SYS_Types.h:165
GLsizeiptr size
Definition: glcorearb.h:663
GLdouble n
Definition: glcorearb.h:2007
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:234
int64 exint
Definition: SYS_Types.h:109
GLuint GLuint end
Definition: glcorearb.h:474
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:110
GLfloat GLfloat GLfloat alpha
Definition: glcorearb.h:111
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:184
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2001
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:245
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:224
char * SYSstrndup(const char *s, size_t n)
Definition: SYS_String.h:287
GLboolean r
Definition: glcorearb.h:1221
#define CREATE_SYSis(TEST)
Definition: SYS_String.h:306
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794
GLenum src
Definition: glcorearb.h:1792
unsigned int uint32
Definition: SYS_Types.h:29