HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_Regex.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_Regex.h ( UT Library, C++)
7  *
8  * COMMENTS: A simple wrapper for hboost::regex
9  *
10  * RELATION TO THE STL:
11  *
12  * Use UT_Regex instead of std::regex
13  *
14  * Reasoning:
15  *
16  * std::regex can be much slower than other regex libraries,
17  * e.g. https://stackoverflow.com/q/70583395
18  */
19 
20 #ifndef __UT_Regex__
21 #define __UT_Regex__
22 
23 #include "UT_Array.h"
24 #include "UT_WorkBuffer.h"
25 #include "UT_StringArray.h"
26 
27 #include <hboost/xpressive/basic_regex.hpp>
28 
31 
33 {
34 public:
35  struct FromGlobTag {};
37  struct FromBundleGlobTag {};
39 
40  /// Constructs an empty regular expression. It will match nothing and
41  /// isValid will return false.
42  UT_Regex() {}
43 
44  /// Initializes the regular expression from the expression string. Use
45  /// isValid to test for validity.
46  explicit UT_Regex(const char *expr)
47  {
48  init(expr);
49  }
50 
51  /// Initializes the regular expression from a glob pattern. Use
52  /// isValid to test for validity.
53  /// @see convertGlobToExpr() for exact accepted rules
55  {
56  initFromGlob(pattern);
57  }
58 
59  /// Initializes the regular expression from a glob pattern. Use
60  /// isValid to test for validity.
61  /// @see convertGlobToExpr() for exact accepted rules
63  {
64  initFromGlob(pattern, true, true);
65  }
66 
67  /// Initializes the regular expression from the expression string. If the
68  /// expression parses correctly, this function returns true.
69  bool init(const char *expr, bool case_sensitive = true);
70 
71  /// Initializes the regular expression from the glob pattern. If the
72  /// expression parses correctly, this function returns true. See
73  /// convertGlobToExpr() for syntax.
74  bool initFromGlob(const char *pattern,
75  bool case_sensitive=true,
76  bool is_bundle_glob=false);
77 
78  /// The accepted glob pattern rules are as follows:
79  /// - a '*' matches any path component, but it stops at slashes.
80  /// - use '**' to match anything, including slashes.
81  /// - a '?' matches any character except a slash (/).
82  /// - '[',']' matches one of the enclosed characters, eg. [abc]
83  /// - '-' within '[',']' denotes a character range, eg. [a-c]
84  /// - If first character after '[' is ! or ^, then it matches anything
85  /// except the enclosed characters.
86  /// - '{','}' matches the any of the comma(',') separated patterns within
87  /// - If is_bundle_glob is true, then the following rules apply as well:
88  /// - '*' matches anything, including slashes.
89  /// - '%' to match any path component, but stops at slashes.
90  /// - '(', '|', ')' for grouping.
91  /// @return false if glob is NULL or the empty string.
92  static bool convertGlobToExpr(
93  UT_WorkBuffer &expr,
94  const char *glob,
95  bool is_bundle_glob = false);
96 
97  /// Returns true if the regular expression parses correctly. If the return
98  /// value is false, use getErrorString to get a human readable error string.
99  bool isValid() const;
100  const char *getErrorString() const;
101 
102  /// @{
103  /// Returns true if the entire input string matches the regular expression,
104  /// false otherwise.
105  bool match(const char *string) const;
106  bool match(const UT_StringView &str) const;
107  bool match(const UT_String &str) const
108  { return match(str.c_str()); }
109  bool match(const UT_StringRef &str) const
110  { return match(UT_StringView(str)); }
111  /// @}
112 
113 
114  /// @{
115  /// Returns true if the entire input string matches the regular expression,
116  /// false otherwise. If successful, returns the list of the captured
117  /// sub-strings through the captured string array, excluding the string
118  /// itself.
119  bool match(const char *string,
120  UT_StringArray &captured) const;
121  bool match(const UT_StringView &string,
122  UT_StringViewArray &captured) const;
123  /// @}
124  /// Returns true if the regular expression is found somewhere in the input
125  /// string, false otherwise.
126  bool search(const char *string,
127  exint start = 0, exint end = -1) const;
128 
129  /// Returns true if the regular expression is found somewhere in the input
130  /// string, false otherwise. If successful, returns the list of the
131  /// captured sub-strings through the captured string array.
132  bool search(const char *string,
133  UT_StringArray &captured,
134  exint start = 0, exint end = -1) const;
135 
136  /// Returns true if the regular expression is found somewhere in the input
137  /// string, false otherwise. If successful, returns an array of
138  /// pairs of start and end indicies the items where found at.
139  bool search(const char *string,
140  UT_Array<std::pair<exint, exint>> &indicies,
141  exint start = 0,
142  exint end = -1) const;
143 
144  /// Returns true if the regular expression is found somewhere in the input
145  /// string, false otherwise. If successful, fills results array
146  /// with strings that match the entire regex expression (no capture
147  /// groups)
148  bool searchall(const char* string, UT_StringArray& results,
149  exint start = 0, exint end = -1) const;
150 
151  /// Splits the given string into substrings when the regular expression
152  /// is found. Returns true if at least one split has occured. If no
153  /// splits occur, results will contain one entry which is the
154  /// original string.
155  /// NOTE: Does not split empty trailing components, a,b, split by ,
156  /// will only generate two elements. This differs from standard split
157  /// definitions, but results from how C++ standard has evolved.
158  /// You can append |($(?!\s)) to your reg ex to get the python-style
159  /// This will match end-of line ($) but also use the negative lookahead
160  /// to not match embedded \n and only the end-of-string.
161  bool split(const char* string, UT_StringArray& results,
162  exint maxsplits = 0) const;
163 
164  bool split(const UT_StringView &str,
165  UT_StringViewArray &results,
166  exint maxsplits = 0) const;
167 
168  /// Replaces the first occurence of the regular expression in the source
169  /// string with the replacement value from replace, and writes out the
170  /// result. If replace_all is set to true, all occurences are replaced.
171  /// If there is no match, the result buffer is set to identical to the
172  /// input string, unless copy_on_fail is false, in which case it is left
173  /// untouched.
175  const char *string,
176  const char *replace,
177  bool replace_all = false,
178  bool copy_on_fail = true
179  ) const;
180 
181  /// Same as above except a fixed number of replacements can be set
182  /// instead of just all or just one. 0 means replace all.
184  const char *string,
185  const char *replace,
186  exint num_replacements,
187  bool copy_on_fail = true
188  ) const;
189 
190 private:
191  hboost::xpressive::cregex myExpr;
192  UT_String myError;
193 };
194 
195 #endif
bool match(const UT_StringRef &str) const
Definition: UT_Regex.h:109
std::string getErrorString(int errorNum)
Return a string (possibly empty) describing the given system error code.
static FromBundleGlobTag FromBundleGlob
Definition: UT_Regex.h:38
GLuint start
Definition: glcorearb.h:475
static FromGlobTag FromGlob
Definition: UT_Regex.h:36
int64 exint
Definition: SYS_Types.h:125
#define UT_API
Definition: UT_API.h:14
UT_Array< UT_StringView > UT_StringViewArray
Definition: UT_Regex.h:29
const char * c_str() const
Definition: UT_String.h:498
UT_Regex(const char *expr)
Definition: UT_Regex.h:46
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
GLuint64EXT * result
Definition: glew.h:14311
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
GLuint GLuint end
Definition: glcorearb.h:475
UT_Regex()
Definition: UT_Regex.h:42
UT_Regex(const char *pattern, FromBundleGlobTag)
Definition: UT_Regex.h:62
GLubyte * pattern
Definition: glew.h:5741
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
UT_Regex(const char *pattern, FromGlobTag)
Definition: UT_Regex.h:54
bool match(const UT_String &str) const
Definition: UT_Regex.h:107