HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_Regex.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_Regex.h ( UT Library, C++)
7  *
8  * COMMENTS: A simple wrapper for hboost::regex
9  *
10  * RELATION TO THE STL:
11  *
12  * Use UT_Regex instead of std::regex
13  *
14  * Reasoning:
15  *
16  * std::regex can be much slower than other regex libraries,
17  * e.g. https://stackoverflow.com/q/70583395
18  */
19 
20 #ifndef __UT_Regex__
21 #define __UT_Regex__
22 
23 #include "UT_API.h"
24 #include "UT_String.h"
25 #include "UT_StringHolder.h"
26 #include "UT_StringView.h"
27 #include <SYS/SYS_Types.h>
28 
29 #include <hboost/xpressive/basic_regex.hpp>
30 
31 #include <utility>
32 
33 class UT_StringArray;
34 class UT_WorkBuffer;
35 template <typename T> class UT_Array;
36 
38 {
39 public:
40  struct FromGlobTag {};
42  struct FromBundleGlobTag {};
44 
45  /// Constructs an empty regular expression. It will match nothing and
46  /// isValid will return false.
47  UT_Regex() {}
48 
49  /// Initializes the regular expression from the expression string. Use
50  /// isValid to test for validity.
51  explicit UT_Regex(const char *expr, bool case_sensitive = true)
52  {
53  init(expr, case_sensitive);
54  }
55 
56  /// Initializes the regular expression from a glob pattern. Use
57  /// isValid to test for validity.
58  /// @see convertGlobToExpr() for exact accepted rules
60  {
61  initFromGlob(pattern);
62  }
63 
64  /// Initializes the regular expression from a glob pattern. Use
65  /// isValid to test for validity.
66  /// @see convertGlobToExpr() for exact accepted rules
68  {
69  initFromGlob(pattern, true, true);
70  }
71 
72  /// Initializes the regular expression from the expression string. If the
73  /// expression parses correctly, this function returns true.
74  bool init(const char *expr, bool case_sensitive = true);
75 
76  /// Initializes the regular expression from the glob pattern. If the
77  /// expression parses correctly, this function returns true. See
78  /// convertGlobToExpr() for syntax.
79  bool initFromGlob(const char *pattern,
80  bool case_sensitive=true,
81  bool is_bundle_glob=false);
82 
83  /// The accepted glob pattern rules are as follows:
84  /// - a '*' matches any path component, but it stops at slashes.
85  /// - use '**' to match anything, including slashes.
86  /// - a '?' matches any character except a slash (/).
87  /// - '[',']' matches one of the enclosed characters, eg. [abc]
88  /// - '-' within '[',']' denotes a character range, eg. [a-c]
89  /// - If first character after '[' is ! or ^, then it matches anything
90  /// except the enclosed characters.
91  /// - '{','}' matches the any of the comma(',') separated patterns within
92  /// - If is_bundle_glob is true, then the following rules apply as well:
93  /// - '*' matches anything, including slashes.
94  /// - '%' to match any path component, but stops at slashes.
95  /// - '(', '|', ')' for grouping.
96  /// @return false if glob is NULL or the empty string.
97  static bool convertGlobToExpr(
98  UT_WorkBuffer &expr,
99  const char *glob,
100  bool is_bundle_glob = false);
101 
102  /// Returns true if the regular expression parses correctly. If the return
103  /// value is false, use getErrorString to get a human readable error string.
104  bool isValid() const;
105  const char *getErrorString() const;
106 
107  /// @{
108  /// Returns true if the entire input string matches the regular expression,
109  /// false otherwise.
110  bool match(const char *string) const;
111  bool match(const UT_StringView &str) const;
112  bool match(const UT_String &str) const
113  { return match(str.c_str()); }
114  bool match(const UT_StringRef &str) const
115  { return match(UT_StringView(str)); }
116  /// @}
117 
118 
119  /// @{
120  /// Returns true if the entire input string matches the regular expression,
121  /// false otherwise. If successful, returns the list of the captured
122  /// sub-strings through the captured string array, excluding the string
123  /// itself.
124  bool match(const char *string,
125  UT_StringArray &captured) const;
126  bool match(const UT_StringView &string,
127  UT_StringViewArray &captured) const;
128  /// @}
129 
130  class Result;
131 
132  /// Returns true if the entire input string matches the regular expression,
133  /// false otherwise. If successful, returns an results object that can be
134  /// queried for named matches.
135  /// NOTE: Need to include UT_RegexResult.h use this.
136  bool match(const UT_StringView &string,
137  Result &captured) const;
138 
139  /// Returns true if the regular expression is found somewhere in the input
140  /// string, false otherwise.
141  bool search(const char *string,
142  exint start = 0, exint end = -1) const;
143 
144  /// Returns true if the regular expression is found somewhere in the input
145  /// string, false otherwise. If successful, returns the list of the
146  /// captured sub-strings through the captured string array.
147  bool search(const char *string,
148  UT_StringArray &captured,
149  exint start = 0, exint end = -1) const;
150 
151  /// Returns true if the regular expression is found somewhere in the input
152  /// string, false otherwise. If successful, returns an array of
153  /// pairs of start and end indicies the items where found at.
154  bool search(const char *string,
155  UT_Array<std::pair<exint, exint>> &indicies,
156  exint start = 0,
157  exint end = -1) const;
158 
159  /// Returns true if the regular expression is found somewhere in the input
160  /// string, false otherwise. If successful, returns an results object
161  /// that can be queried for named matches.
162  /// NOTE: Need to include UT_RegexResult.h use this.
163  bool search(const char *string,
164  Result &result,
165  exint start = 0,
166  exint end = -1) const;
167 
168  /// Returns true if the regular expression is found somewhere in the input
169  /// string, false otherwise. If successful, fills results array
170  /// with strings that match the entire regex expression (no capture
171  /// groups)
172  bool searchall(const char* string, UT_StringArray& results,
173  exint start = 0, exint end = -1) const;
174 
175  /// Splits the given string into substrings when the regular expression
176  /// is found. Returns true if at least one split has occured. If no
177  /// splits occur, results will contain one entry which is the
178  /// original string.
179  /// NOTE: Does not split empty trailing components, a,b, split by ,
180  /// will only generate two elements. This differs from standard split
181  /// definitions, but results from how C++ standard has evolved.
182  /// You can append `|($(?!\s))` to your reg ex to get the python-style
183  /// This will match end-of line ($) but also use the negative lookahead
184  /// to not match embedded \n and only the end-of-string.
185  bool split(const char* string, UT_StringArray& results,
186  exint maxsplits = 0) const;
187 
188  bool split(const UT_StringView &str,
189  UT_StringViewArray &results,
190  exint maxsplits = 0) const;
191 
192  /// Replaces the first occurence of the regular expression in the source
193  /// string with the replacement value from replace, and writes out the
194  /// result. If replace_all is set to true, all occurences are replaced.
195  /// If there is no match, the result buffer is set to identical to the
196  /// input string, unless copy_on_fail is false, in which case it is left
197  /// untouched.
199  const char *string,
200  const char *replace,
201  bool replace_all = false,
202  bool copy_on_fail = true
203  ) const;
204 
205  /// Same as above except a fixed number of replacements can be set
206  /// instead of just all or just one. 0 means replace all.
208  const char *string,
209  const char *replace,
210  exint num_replacements,
211  bool copy_on_fail = true
212  ) const;
213 
214 private:
215  hboost::xpressive::cregex myExpr;
216  UT_String myError;
217 };
218 
219 #endif
bool match(const UT_StringRef &str) const
Definition: UT_Regex.h:114
std::string getErrorString(int errorNum)
Return a string (possibly empty) describing the given system error code.
static FromBundleGlobTag FromBundleGlob
Definition: UT_Regex.h:43
GLuint start
Definition: glcorearb.h:475
static FromGlobTag FromGlob
Definition: UT_Regex.h:41
int64 exint
Definition: SYS_Types.h:125
#define UT_API
Definition: UT_API.h:14
**But if you need a result
Definition: thread.h:622
const char * c_str() const
Definition: UT_String.h:515
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
GLuint GLuint end
Definition: glcorearb.h:475
UT_Regex()
Definition: UT_Regex.h:47
GLushort pattern
Definition: glad.h:2583
UT_Regex(const char *pattern, FromBundleGlobTag)
Definition: UT_Regex.h:67
auto search(const T &set, const V &val) -> std::pair< bool, decltype(std::begin(detail::smart_deref(set)))>
A search function.
Definition: CLI11.h:3170
UT_Regex(const char *expr, bool case_sensitive=true)
Definition: UT_Regex.h:51
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
UT_Regex(const char *pattern, FromGlobTag)
Definition: UT_Regex.h:59
bool match(const UT_String &str) const
Definition: UT_Regex.h:112