HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_Regex.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_Regex.h ( UT Library, C++)
7  *
8  * COMMENTS: A simple wrapper for hboost::regex
9  *
10  * RELATION TO THE STL:
11  *
12  * Use UT_Regex instead of std::regex
13  *
14  * Reasoning:
15  *
16  * std::regex can be much slower than other regex libraries,
17  * e.g. https://stackoverflow.com/q/70583395
18  */
19 
20 #ifndef __UT_Regex__
21 #define __UT_Regex__
22 
23 #include "UT_API.h"
24 #include "UT_String.h"
25 #include "UT_StringHolder.h"
26 #include "UT_StringView.h"
27 #include <SYS/SYS_Types.h>
28 
29 #include <hboost/xpressive/basic_regex.hpp>
30 
31 #include <utility>
32 
33 class UT_StringArray;
34 class UT_WorkBuffer;
35 template <typename T> class UT_Array;
36 
38 {
39 public:
40  struct FromGlobTag {};
42  struct FromBundleGlobTag {};
44 
45  /// Constructs an empty regular expression. It will match nothing and
46  /// isValid will return false.
47  UT_Regex() {}
48 
49  /// Initializes the regular expression from the expression string. Use
50  /// isValid to test for validity.
51  explicit UT_Regex(const char *expr)
52  {
53  init(expr);
54  }
55 
56  /// Initializes the regular expression from a glob pattern. Use
57  /// isValid to test for validity.
58  /// @see convertGlobToExpr() for exact accepted rules
60  {
61  initFromGlob(pattern);
62  }
63 
64  /// Initializes the regular expression from a glob pattern. Use
65  /// isValid to test for validity.
66  /// @see convertGlobToExpr() for exact accepted rules
68  {
69  initFromGlob(pattern, true, true);
70  }
71 
72  /// Initializes the regular expression from the expression string. If the
73  /// expression parses correctly, this function returns true.
74  bool init(const char *expr, bool case_sensitive = true);
75 
76  /// Initializes the regular expression from the glob pattern. If the
77  /// expression parses correctly, this function returns true. See
78  /// convertGlobToExpr() for syntax.
79  bool initFromGlob(const char *pattern,
80  bool case_sensitive=true,
81  bool is_bundle_glob=false);
82 
83  /// The accepted glob pattern rules are as follows:
84  /// - a '*' matches any path component, but it stops at slashes.
85  /// - use '**' to match anything, including slashes.
86  /// - a '?' matches any character except a slash (/).
87  /// - '[',']' matches one of the enclosed characters, eg. [abc]
88  /// - '-' within '[',']' denotes a character range, eg. [a-c]
89  /// - If first character after '[' is ! or ^, then it matches anything
90  /// except the enclosed characters.
91  /// - '{','}' matches the any of the comma(',') separated patterns within
92  /// - If is_bundle_glob is true, then the following rules apply as well:
93  /// - '*' matches anything, including slashes.
94  /// - '%' to match any path component, but stops at slashes.
95  /// - '(', '|', ')' for grouping.
96  /// @return false if glob is NULL or the empty string.
97  static bool convertGlobToExpr(
98  UT_WorkBuffer &expr,
99  const char *glob,
100  bool is_bundle_glob = false);
101 
102  /// Returns true if the regular expression parses correctly. If the return
103  /// value is false, use getErrorString to get a human readable error string.
104  bool isValid() const;
105  const char *getErrorString() const;
106 
107  /// @{
108  /// Returns true if the entire input string matches the regular expression,
109  /// false otherwise.
110  bool match(const char *string) const;
111  bool match(const UT_StringView &str) const;
112  bool match(const UT_String &str) const
113  { return match(str.c_str()); }
114  bool match(const UT_StringRef &str) const
115  { return match(UT_StringView(str)); }
116  /// @}
117 
118 
119  /// @{
120  /// Returns true if the entire input string matches the regular expression,
121  /// false otherwise. If successful, returns the list of the captured
122  /// sub-strings through the captured string array, excluding the string
123  /// itself.
124  bool match(const char *string,
125  UT_StringArray &captured) const;
126  bool match(const UT_StringView &string,
127  UT_StringViewArray &captured) const;
128  /// @}
129  /// Returns true if the regular expression is found somewhere in the input
130  /// string, false otherwise.
131  bool search(const char *string,
132  exint start = 0, exint end = -1) const;
133 
134  /// Returns true if the regular expression is found somewhere in the input
135  /// string, false otherwise. If successful, returns the list of the
136  /// captured sub-strings through the captured string array.
137  bool search(const char *string,
138  UT_StringArray &captured,
139  exint start = 0, exint end = -1) const;
140 
141  /// Returns true if the regular expression is found somewhere in the input
142  /// string, false otherwise. If successful, returns an array of
143  /// pairs of start and end indicies the items where found at.
144  bool search(const char *string,
145  UT_Array<std::pair<exint, exint>> &indicies,
146  exint start = 0,
147  exint end = -1) const;
148 
149  /// Returns true if the regular expression is found somewhere in the input
150  /// string, false otherwise. If successful, fills results array
151  /// with strings that match the entire regex expression (no capture
152  /// groups)
153  bool searchall(const char* string, UT_StringArray& results,
154  exint start = 0, exint end = -1) const;
155 
156  /// Splits the given string into substrings when the regular expression
157  /// is found. Returns true if at least one split has occured. If no
158  /// splits occur, results will contain one entry which is the
159  /// original string.
160  /// NOTE: Does not split empty trailing components, a,b, split by ,
161  /// will only generate two elements. This differs from standard split
162  /// definitions, but results from how C++ standard has evolved.
163  /// You can append `|($(?!\s))` to your reg ex to get the python-style
164  /// This will match end-of line ($) but also use the negative lookahead
165  /// to not match embedded \n and only the end-of-string.
166  bool split(const char* string, UT_StringArray& results,
167  exint maxsplits = 0) const;
168 
169  bool split(const UT_StringView &str,
170  UT_StringViewArray &results,
171  exint maxsplits = 0) const;
172 
173  /// Replaces the first occurence of the regular expression in the source
174  /// string with the replacement value from replace, and writes out the
175  /// result. If replace_all is set to true, all occurences are replaced.
176  /// If there is no match, the result buffer is set to identical to the
177  /// input string, unless copy_on_fail is false, in which case it is left
178  /// untouched.
180  const char *string,
181  const char *replace,
182  bool replace_all = false,
183  bool copy_on_fail = true
184  ) const;
185 
186  /// Same as above except a fixed number of replacements can be set
187  /// instead of just all or just one. 0 means replace all.
189  const char *string,
190  const char *replace,
191  exint num_replacements,
192  bool copy_on_fail = true
193  ) const;
194 
195 private:
196  hboost::xpressive::cregex myExpr;
197  UT_String myError;
198 };
199 
200 #endif
bool match(const UT_StringRef &str) const
Definition: UT_Regex.h:114
std::string getErrorString(int errorNum)
Return a string (possibly empty) describing the given system error code.
static FromBundleGlobTag FromBundleGlob
Definition: UT_Regex.h:43
GLuint start
Definition: glcorearb.h:475
static FromGlobTag FromGlob
Definition: UT_Regex.h:41
int64 exint
Definition: SYS_Types.h:125
#define UT_API
Definition: UT_API.h:14
**But if you need a result
Definition: thread.h:613
const char * c_str() const
Definition: UT_String.h:508
UT_Regex(const char *expr)
Definition: UT_Regex.h:51
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:39
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
GLuint GLuint end
Definition: glcorearb.h:475
UT_Regex()
Definition: UT_Regex.h:47
GLushort pattern
Definition: glad.h:2583
UT_Regex(const char *pattern, FromBundleGlobTag)
Definition: UT_Regex.h:67
auto search(const T &set, const V &val) -> std::pair< bool, decltype(std::begin(detail::smart_deref(set)))>
A search function.
Definition: CLI11.h:3170
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
UT_Regex(const char *pattern, FromGlobTag)
Definition: UT_Regex.h:59
bool match(const UT_String &str) const
Definition: UT_Regex.h:112