HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UT_Regex.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_Regex.h ( UT Library, C++)
7  *
8  * COMMENTS: A simple wrapper for hboost::regex
9  */
10 
11 #ifndef __UT_Regex__
12 #define __UT_Regex__
13 
14 #include "UT_WorkBuffer.h"
15 #include "UT_StringArray.h"
16 #include "UT_StringView.h"
17 #include "UT_Pair.h"
18 
19 #include <hboost/xpressive/xpressive.hpp>
20 
22 {
23 public:
24  struct FromGlobTag {};
26  struct FromBundleGlobTag {};
28 
29  /// Constructs an empty regular expression. It will match nothing and
30  /// isValid will return false.
31  UT_Regex() {}
32 
33  /// Initializes the regular expression from the expression string. Use
34  /// isValid to test for validity.
35  explicit UT_Regex(const char *expr)
36  {
37  init(expr);
38  }
39 
40  /// Initializes the regular expression from a glob pattern. Use
41  /// isValid to test for validity.
42  /// @see convertGlobToExpr() for exact accepted rules
43  UT_Regex(const char *pattern, FromGlobTag)
44  {
45  initFromGlob(pattern);
46  }
47 
48  /// Initializes the regular expression from a glob pattern. Use
49  /// isValid to test for validity.
50  /// @see convertGlobToExpr() for exact accepted rules
51  UT_Regex(const char *pattern, FromBundleGlobTag)
52  {
53  initFromGlob(pattern, true, true);
54  }
55 
56  /// Initializes the regular expression from the expression string. If the
57  /// expression parses correctly, this function returns true.
58  bool init(const char *expr, bool case_sensitive = true);
59 
60  /// Initializes the regular expression from the glob pattern. If the
61  /// expression parses correctly, this function returns true. See
62  /// convertGlobToExpr() for syntax.
63  bool initFromGlob(const char *pattern,
64  bool case_sensitive=true,
65  bool is_bundle_glob=false);
66 
67  /// The accepted glob pattern rules are as follows:
68  /// - a '*' matches any path component, but it stops at slashes.
69  /// - use '**' to match anything, including slashes.
70  /// - a '?' matches any character except a slash (/).
71  /// - '[',']' matches one of the enclosed characters, eg. [abc]
72  /// - '-' within '[',']' denotes a character range, eg. [a-c]
73  /// - If first character after '[' is ! or ^, then it matches anything
74  /// except the enclosed characters.
75  /// - '{','}' matches the any of the comma(',') separated patterns within
76  /// - If is_bundle_glob is true, then the following rules apply as well:
77  /// - '*' matches anything, including slashes.
78  /// - '%' to match any path component, but stops at slashes.
79  /// - '(', '|', ')' for grouping.
80  /// @return false if glob is NULL or the empty string.
81  static bool convertGlobToExpr(
82  UT_WorkBuffer &expr,
83  const char *glob,
84  bool is_bundle_glob = false);
85 
86  /// Returns true if the regular expression parses correctly. If the return
87  /// value is false, use getErrorString to get a human readable error string.
88  bool isValid() const;
89  const char *getErrorString() const;
90 
91  /// Returns true if the entire input string matches the regular expression,
92  /// false otherwise.
93  bool match(const char *string) const;
94 
95  /// Returns true if the entire input string matches the regular expression,
96  /// false otherwise. If successful, returns the list of the captured
97  /// sub-strings through the captured string array, excluding the string
98  /// itself.
99  bool match(const char *string,
100  UT_StringArray &captured) const;
101 
102  /// Returns true if the entire input string matches the regular expression,
103  /// false otherwise.
104  bool match(const UT_StringView &str) const;
105 
106  /// Returns true if the entire input string matches the regular expression,
107  /// false otherwise. If successful, returns the list of the captured
108  /// sub-strings through the captured string array, excluding the string
109  /// itself.
110  bool match(const UT_StringView &string,
111  UT_StringViewArray &captured) const;
112 
113  /// Returns true if the regular expression is found somewhere in the input
114  /// string, false otherwise.
115  bool search(const char *string,
116  exint start = 0, exint end = -1) const;
117 
118  /// Returns true if the regular expression is found somewhere in the input
119  /// string, false otherwise. If successful, returns the list of the
120  /// captured sub-strings through the captured string array.
121  bool search(const char *string,
122  UT_StringArray &captured,
123  exint start = 0, exint end = -1) const;
124 
125  /// Returns true if the regular expression is found somewhere in the input
126  /// string, false otherwise. If successful, returns an array of
127  /// pairs of start and end indicies the items where found at.
128  bool search(const char* string, UT_Array< UT_Pair<exint, exint> >& indicies,
129  exint start = 0, exint end = -1) const;
130 
131  /// Returns true if the regular expression is found somewhere in the input
132  /// string, false otherwise. If successful, fills results array
133  /// with strings that match the entire regex expression (no capture
134  /// groups)
135  bool searchall(const char* string, UT_StringArray& results,
136  exint start = 0, exint end = -1) const;
137 
138  /// Splits the given string into substrings when the regular expression
139  /// is found. Returns true if at least one split has occoured. If no
140  /// splits occour, results will contain one entry which is the
141  /// original string.
142  bool split(const char* string, UT_StringArray& results,
143  exint maxsplits = 0) const;
144 
145  bool split(const UT_StringView &str,
146  UT_StringViewArray &results,
147  exint maxsplits = 0) const;
148 
149  /// Replaces the first occurence of the regular expression in the source
150  /// string with the replacement value from replace, and writes out the
151  /// result. If replace_all is set to true, all occurences are replaced.
152  /// If there is no match, the result buffer is set to identical to the
153  /// input string, unless copy_on_fail is false, in which case it is left
154  /// untouched.
155  bool replace(UT_WorkBuffer &result,
156  const char *string,
157  const char *replace,
158  bool replace_all = false,
159  bool copy_on_fail = true
160  ) const;
161 
162  /// Same as above except a fixed number of replacements can be set
163  /// instead of just all or just one. 0 means replace all.
164  bool replace(UT_WorkBuffer &result,
165  const char *string,
166  const char *replace,
167  exint num_replacements,
168  bool copy_on_fail = true
169  ) const;
170 
171 private:
172  hboost::xpressive::cregex myExpr;
173  UT_String myError;
174 };
175 
176 #endif
std::string getErrorString(int errorNum)
Return a string (possibly empty) describing the given system error code.
static FromBundleGlobTag FromBundleGlob
Definition: UT_Regex.h:27
GLuint start
Definition: glcorearb.h:474
static FromGlobTag FromGlob
Definition: UT_Regex.h:25
#define UT_API
Definition: UT_API.h:12
UT_Regex(const char *expr)
Definition: UT_Regex.h:35
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:30
int64 exint
Definition: SYS_Types.h:109
GLuint GLuint end
Definition: glcorearb.h:474
UT_Regex()
Definition: UT_Regex.h:31
UT_Regex(const char *pattern, FromBundleGlobTag)
Definition: UT_Regex.h:51
UT_Regex(const char *pattern, FromGlobTag)
Definition: UT_Regex.h:43