00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __UT_String_h__
00022 #define __UT_String_h__
00023
00024 #include "UT_API.h"
00025 #include <iostream.h>
00026 #include <string.h>
00027 #include <malloc.h>
00028 #include <ctype.h>
00029 #include <stddef.h>
00030 #include <string>
00031 #include <SYS/SYS_String.h>
00032 #include "UT_Assert.h"
00033 #include "UT_Algorithm.h"
00034
00035 #ifdef WIN32
00036 #define strcasecmp stricmp
00037 #define strncasecmp strnicmp
00038 #endif
00039
00040 class UT_OStream;
00041 class UT_String;
00042 class UT_StringCshIO;
00043 class UT_WorkArgs;
00044 class UT_IStream;
00045 class ut_PatternRecord;
00046 class UT_StringMMPattern;
00047 class UT_StringArray;
00048
00049
00050
00051
00052 UT_API extern void UTvarLookup(const char *name, UT_String &result);
00053 UT_API extern void UTexprLookup(const char *name, UT_String &result);
00054
00055 inline bool UTisstring(const char *s) { return s && *s; }
00056
00057
00058
00059
00060 inline bool UTisdigit(char c)
00061 { return isdigit((unsigned char) c); }
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077 class UT_API UT_String
00078 {
00079 public:
00080
00081
00082
00083 enum UT_AlwaysDeepType { ALWAYS_DEEP };
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 UT_String(const char *str = 0, int deepCopy = 0, int len = -1);
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 explicit UT_String(const std::string &str)
00109 : myIsReference(false),
00110 myIsAlwaysDeep(false)
00111 { myData = strdup(str.c_str()); }
00112
00113
00114 UT_String(UT_AlwaysDeepType, const char *str = 0)
00115 : myIsReference(false),
00116 myIsAlwaysDeep(true)
00117 { myData = str ? strdup(str) : 0; }
00118
00119
00120
00121 UT_String(UT_AlwaysDeepType, const std::string &str)
00122 : myIsReference(false),
00123 myIsAlwaysDeep(true)
00124 { myData = strdup(str.c_str()); }
00125
00126
00127
00128
00129
00130
00131 UT_String(const UT_String &str);
00132
00133 ~UT_String();
00134
00135
00136 void setAlwaysDeep(bool deep)
00137 {
00138 myIsAlwaysDeep = deep;
00139 if (deep && myIsReference)
00140 {
00141 if (myData != NULL)
00142 harden();
00143 else
00144 {
00145
00146
00147
00148 myIsReference = false;
00149 }
00150 }
00151 }
00152 bool isAlwaysDeep() const
00153 {
00154 return myIsAlwaysDeep;
00155 }
00156
00157 void swap( UT_String &other );
00158
00159
00160
00161 void harden();
00162 void harden(const char *s, int len = -1);
00163 void hardenIfNeeded()
00164 {
00165 if (myIsReference)
00166 {
00167 if (isstring())
00168 harden();
00169 else
00170 *this = "";
00171 }
00172 }
00173 void hardenIfNeeded(const char *s)
00174 {
00175 if (s && *s)
00176 harden(s);
00177 else
00178 *this = "";
00179 }
00180
00181
00182
00183 bool isHard() const { return !myIsReference; }
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194 char * steal(void)
00195 {
00196 if (!myIsAlwaysDeep)
00197 {
00198 if (myIsReference)
00199 myData = strdup(myData ? myData : "");
00200 myIsReference = true;
00201 return myData;
00202 }
00203 else
00204 {
00205
00206
00207 return strdup(myData ? myData : "");
00208 }
00209 }
00210
00211
00212
00213
00214
00215
00216 void adopt(char *s)
00217 {
00218 if (!myIsReference)
00219 {
00220 if (s != myData)
00221 free(myData);
00222 }
00223 myData = s;
00224 myIsReference = false;
00225 }
00226 void adopt(UT_String &str)
00227 {
00228 adopt(str.steal());
00229 }
00230
00231
00232
00233 void saveBinary(ostream &os) const { save(os, true); }
00234
00235
00236
00237 void saveAscii(ostream &os) const { save(os, false); }
00238 void saveAscii(UT_OStream &os) const { save(os, false); }
00239
00240
00241 void save(ostream &os, bool binary) const;
00242 void save(UT_OStream &os, bool binary) const;
00243
00244
00245 bool load(UT_IStream &is);
00246
00247
00248 void clear()
00249 { *this = (const char *)NULL; }
00250
00251
00252
00253 void prepend(const char *prefix);
00254 void prepend(char ch);
00255
00256
00257
00258 void append(char ch);
00259
00260 UT_String &operator=(const UT_String &str);
00261 UT_String &operator=(const char *str);
00262 UT_String &operator=(const std::string &str);
00263 UT_String &operator+=(const char *str)
00264 {
00265 if (!isstring())
00266 {
00267
00268
00269
00270 harden(str);
00271 }
00272 else
00273 {
00274 bool same = (str == myData);
00275 harden();
00276 if (str)
00277 {
00278 int mylen = (int)strlen(myData);
00279 myData = (char *)realloc(myData,
00280 mylen+strlen(str)+1);
00281 if (!same)
00282 {
00283 strcpy(&myData[mylen], str);
00284 }
00285 else
00286 {
00287 memcpy(myData + mylen, myData, mylen);
00288 myData[mylen * 2] = '\0';
00289 }
00290 }
00291 }
00292 return *this;
00293 }
00294
00295 UT_String &operator+=(const UT_String &str)
00296 {
00297 *this += (const char *)str.myData;
00298 return *this;
00299 }
00300
00301
00302 int compare(const char *str, bool case_sensitive=true) const
00303 {
00304
00305
00306 if (myData==0 || str==0)
00307 {
00308 if (myData) return 1;
00309 if(str) return -1;
00310 return 0;
00311 }
00312 if (case_sensitive)
00313 return strcmp(myData, str);
00314 return strcasecmp(myData, str);
00315 }
00316 int compare(const UT_String &str, bool case_sensitive=true) const
00317 {
00318 return compare(str.myData,case_sensitive);
00319 }
00320
00321 bool equal(const char *str, bool case_sensitive=true) const
00322 {
00323 return compare(str,case_sensitive)==0;
00324 }
00325 bool equal(const UT_String &str, bool case_sensitive=true) const
00326 {
00327 return compare(str.myData,case_sensitive)==0;
00328 }
00329
00330 bool operator==(const char *str) const
00331 {
00332 return compare(str)==0;
00333 }
00334 bool operator==(const UT_String &str) const
00335 {
00336 return compare(str.myData)==0;
00337 }
00338 bool operator!=(const char *str) const
00339 {
00340 return compare(str)!=0;
00341 }
00342 bool operator!=(const UT_String &str) const
00343 {
00344 return compare(str.myData)!=0;
00345 }
00346 bool operator<(const char *str) const
00347 {
00348 return compare(str)<0;
00349 }
00350 bool operator<(const UT_String &str) const
00351 {
00352 return compare(str.myData)<0;
00353 }
00354 bool operator<=(const char *str) const
00355 {
00356 return compare(str)<=0;
00357 }
00358 bool operator<=(const UT_String &str) const
00359 {
00360 return compare(str.myData)<=0;
00361 }
00362 bool operator>(const char *str) const
00363 {
00364 return compare(str)>0;
00365 }
00366 bool operator>(const UT_String &str) const
00367 {
00368 return compare(str.myData)>0;
00369 }
00370 bool operator>=(const char *str) const
00371 {
00372 return compare(str)>=0;
00373 }
00374 bool operator>=(const UT_String &str) const
00375 {
00376 return compare(str.myData)>=0;
00377 }
00378
00379 #ifdef METROWERKS_BUILD
00380 operator char *() const
00381 { return myData; }
00382 #else
00383 operator const char *() const
00384 { return (const char *)myData; }
00385 operator char *()
00386 { return myData; }
00387 #endif
00388 const char *buffer() const
00389 { return myData; }
00390 const char *nonNullBuffer() const
00391 { return myData ? myData : ""; }
00392 char operator()(unsigned i) const
00393 {
00394 UT_ASSERT_P( isstring() );
00395 UT_ASSERT_SLOW(i <= strlen(myData));
00396 return myData[i];
00397 }
00398 char &operator()(unsigned i)
00399 {
00400 harden();
00401 return myData[i];
00402 }
00403 int toInt() const;
00404 fpreal toFloat() const;
00405
00406
00407
00408
00409 std::string toStdString() const;
00410
00411
00412
00413
00414
00415
00416 unsigned length(void) const
00417 { return (myData) ? (unsigned)strlen(myData) : 0; }
00418
00419
00420 int64 getMemoryUsage() const
00421 {
00422 return
00423 sizeof(*this)
00424 + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
00425 }
00426
00427
00428 char *findChar(int c) const
00429 { return (myData) ? strchr(myData, c) : 0; }
00430
00431
00432
00433 char *findChar(const char *str) const
00434 { return (myData) ? strpbrk(myData, str) : NULL; }
00435 char *findChar(const UT_String &str) const
00436 { return findChar((const char *) str); }
00437
00438
00439
00440 char *lastChar(int c) const
00441 { return myData ? strrchr(myData, c):0; }
00442
00443
00444 int countChar(int c) const;
00445
00446
00447 int count(const char *str) const;
00448
00449 char *findNonSpace() const;
00450 const char *findWord(const char *word) const;
00451 bool findString(const char *str, bool fullword,
00452 bool usewildcards) const;
00453 int changeWord(const char *from, const char *to, int all=1);
00454 int changeString(const char *from, const char *to, bool fullword);
00455 int changeQuotedWord(const char *from, const char *to,
00456 int quote = '`', int all = 1);
00457
00458 int findLongestCommonSuffix( const char *with );
00459
00460
00461
00462
00463
00464
00465 int substr(UT_String &buf, int index, int len=0) const;
00466
00467
00468 unsigned isFloat(int skip_spaces = 0, int loose = 0) const;
00469
00470 unsigned isInteger(int skip_spaces = 0) const;
00471
00472 void toUpper()
00473 {
00474 char *ptr;
00475 harden();
00476 for (ptr=myData; *ptr; ptr++)
00477 *ptr = (char)toupper(*ptr);
00478 }
00479 void toLower()
00480 {
00481 char *ptr;
00482 harden();
00483 for (ptr=myData; *ptr; ptr++)
00484 *ptr = (char)tolower(*ptr);
00485 }
00486
00487
00488
00489
00490
00491
00492
00493 const char *fileName() const
00494 {
00495 const char *fname;
00496
00497 if (!myData)
00498 return 0;
00499
00500 fname = lastChar('/');
00501
00502 if (!fname)
00503 {
00504 fname = myData;
00505 }
00506 else
00507 {
00508 fname++;
00509 }
00510 return fname;
00511 }
00512
00513 const char *fileExtension( ) const
00514 {
00515 if( !isstring() )
00516 return 0;
00517
00518 const char *dot = lastChar('.');
00519 if (dot)
00520 {
00521 const char *slash = lastChar('/');
00522
00523 if (slash && slash > dot)
00524 dot = NULL;
00525 }
00526 return dot;
00527 }
00528
00529
00530
00531
00532 bool matchFileExtension(const char *match_extension) const
00533 {
00534 const char *ext = fileExtension();
00535 return ext && !SYSstrcasecmp(ext, match_extension);
00536 }
00537
00538
00539
00540 UT_String pathUpToExtension() const;
00541
00542
00543
00544
00545
00546
00547
00548 void splitPath(UT_String &dir_name, UT_String &file_name) const;
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558 int parseNumberedFilename(UT_String &prefix,
00559 UT_String &frame,
00560 UT_String &suff,
00561 bool negative = true,
00562 bool fractional = false) const;
00563
00564 int isstring() const
00565 { return (myData && *myData); }
00566
00567
00568
00569
00570 int trimSpace(bool leaveSingleSpaceBetweenWords = false);
00571
00572
00573
00574 int trimBoundingSpace();
00575
00576
00577
00578
00579 int strip(const char *chars);
00580
00581
00582
00583
00584 void protectString();
00585
00586
00587
00588 bool isQuotedString(char delimiter='\'') const;
00589
00590
00591
00592
00593
00594 UT_String makeQuotedString(char delimiter='\'') const;
00595
00596
00597
00598
00599 UT_String makeSmartQuotedString(char default_delimiter='\'') const;
00600
00601 void expandControlSequences();
00602
00603 bool hasWhiteSpace() const;
00604
00605 void removeTrailingSpace();
00606 void removeTrailingChars(char chr);
00607
00608 void removeTrailingDigits();
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620 int cshParse(char *argv[], int maxArgs,
00621 void (*vlookup)(const char *, UT_String&)=UTvarLookup,
00622 void (*elookup)(const char *, UT_String&)=UTexprLookup,
00623 int *error = 0,
00624 UT_StringCshIO *io=0);
00625
00626 int cshParse(UT_WorkArgs &argv,
00627 void (*vlookup)(const char *, UT_String&)=UTvarLookup,
00628 void (*elookup)(const char *, UT_String&)=UTexprLookup,
00629 int *error = 0,
00630 UT_StringCshIO *io=0);
00631
00632
00633
00634
00635
00636
00637
00638 int dosParse(UT_WorkArgs &argv);
00639 int dosParse(char *argv[], int maxArgs);
00640
00641
00642
00643
00644
00645
00646 int parse(char *argv[], int maxArgs)
00647 {
00648 harden();
00649 return parseInPlace(argv, maxArgs);
00650 }
00651
00652
00653 int parseInPlace(char *argv[], int maxArgs);
00654 int parse(UT_WorkArgs &argv)
00655 {
00656 harden();
00657 return parseInPlace(argv);
00658 }
00659 int parseInPlace(UT_WorkArgs &argv);
00660
00661 int tokenize(char *argv[], int maxArgs, char separator)
00662 {
00663 harden();
00664 return tokenizeInPlace(argv, maxArgs, separator);
00665 }
00666 int tokenizeInPlace(char *argv[], int maxArgs, char separator);
00667 int tokenize(UT_WorkArgs &argv, char separator)
00668 {
00669 harden();
00670 return tokenizeInPlace(argv, separator);
00671 }
00672 int tokenizeInPlace(UT_WorkArgs &argv, char separator);
00673 int tokenize(char *argv[], int maxArgs,
00674 const char *separators = " \t\n")
00675 {
00676 harden();
00677 return tokenizeInPlace(argv, maxArgs, separators);
00678 }
00679 int tokenizeInPlace(char *argv[], int maxArgs,
00680 const char *separators = " \t\n");
00681 int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
00682 {
00683 harden();
00684 return tokenizeInPlace(argv, separators);
00685 }
00686 int tokenizeInPlace(UT_WorkArgs &argv,
00687 const char *separators = " \t\n");
00688 int tokenize(UT_StringArray &list, const char *separators = " \t\n")
00689 {
00690 harden();
00691 return tokenizeInPlace(list, separators);
00692 }
00693 int tokenizeInPlace(UT_StringArray &list,
00694 const char *separators = " \t\n");
00695
00696
00697
00698 void expandVariables();
00699
00700
00701 inline unsigned hash() const
00702 {
00703 return hash(myData);
00704 }
00705
00706 static inline unsigned hash(const char *str, unsigned code = 0)
00707 {
00708 char c;
00709
00710
00711
00712
00713
00714
00715
00716
00717 if (str)
00718 {
00719 for (; (c = *str); str++)
00720 {
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730 code = 37 * code + c;
00731 }
00732 }
00733 return code;
00734 }
00735
00736
00737
00738
00739
00740
00741 unsigned match(const char *pattern, int caseSensitive=1) const;
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752 unsigned pathMatch(const char *pattern, int caseSensitive=1) const;
00753
00754
00755
00756
00757
00758 unsigned multiMatch(const char *pattern,
00759 int caseSensitive, char separator) const;
00760 unsigned multiMatch(const char *pattern, int caseSensitive = 1,
00761 const char *separators = ", ",
00762 bool *explicitlyExcluded = 0,
00763 int *matchIndex = 0,
00764 ut_PatternRecord *pattern_record=NULL) const;
00765 unsigned multiMatch(const UT_StringMMPattern &pattern,
00766 bool *explicitlyExcluded = 0,
00767 int *matchIndex = 0,
00768 ut_PatternRecord *pattern_record=NULL) const;
00769
00770
00771
00772 unsigned multiMatchRecord(const char *pattern, int maxpatterns,
00773 char *singles, int &nsingles,
00774 char **words, int &nwords,
00775 int case_sensitive = 1,
00776 const char *separators = ", ") const;
00777 unsigned multiMatchRecord(const UT_StringMMPattern &pattern,
00778 int maxpatterns,
00779 char *singles, int &nsingles,
00780 char **words, int &nwords) const;
00781
00782
00783
00784
00785
00786
00787
00788 bool matchPattern(const UT_WorkArgs &pattern_args,
00789 bool assume_match=false) const;
00790
00791 static int multiMatchCheck(const char *pattern);
00792 static int wildcardMatchCheck(const char *pattern);
00793
00794
00795 unsigned contains(const char *pattern, int caseSensitive=1) const;
00796
00797 bool startsWith(const char *prefix) const;
00798
00799 bool endsWith(const char *suffix) const;
00800 bool endsWith(const char *suffix, bool case_sensitive) const;
00801
00802
00803
00804
00805
00806
00807
00808
00809 int traversePattern(int max, void *data,
00810 int (*func)(int num, int sec, void *data),
00811 unsigned int (*secfunc)(int num,void *data)=0,
00812 int offset=0) const;
00813
00814
00815 const char *fcontain(const char *pattern) const
00816 { return (myData) ? strstr(myData, pattern) : 0; }
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827 bool patternRename(const char *match_pattern, const char *replace);
00828
00829
00830
00831
00832
00833
00834 UT_String *base(void) const;
00835 const char *suffix(void) const;
00836
00837
00838
00839
00840 void incrementNumberedName();
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853 static ostream &setFormat(ostream &os, const char *fmt);
00854 ostream &setFormat(ostream &os);
00855
00856 int replacePrefix(const char *oldpref,
00857 const char *newpref);
00858 int replaceSuffix(const char *oldsuffix,
00859 const char *newsuffix);
00860
00861
00862
00863
00864
00865
00866 int expandArrays(char *names[], int max);
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876 int format(int cols);
00877
00878
00879
00880 int substitute( const char *find, const char *replacement,
00881 bool all = true );
00882
00883
00884
00885 int replace( int pos, int len, const char *str );
00886
00887
00888 int eraseHead(int len)
00889 { return replace(0, len, ""); }
00890
00891
00892 int eraseTail(int len)
00893 { return replace(length() - len, len, ""); }
00894
00895
00896 int erase(int pos = 0, int len = -1)
00897 {
00898 if (len < 0)
00899 len = length() - pos;
00900 return replace(pos, len, "");
00901 }
00902
00903
00904 int insert(int pos, const char *str)
00905 { return replace(pos, 0, str); }
00906
00907
00908
00909
00910 static int compareNumberedString(const char *s1, const char *s2,
00911 bool case_sensitive=true);
00912 static int qsortCmpNumberedString(const void *v1, const void *v2);
00913 static int qsortCmpNumberedString(char *const*v1, char *const*v2);
00914
00915
00916
00917 static int compareNumberedFilename(const char *s1, const char *s2,
00918 bool case_sensitive=false);
00919 static int qsortCmpNumberedFilename(const void *v1, const void *v2);
00920 static int qsortCmpNumberedFilename(char *const*v1, char *const*v2);
00921
00922
00923
00924
00925 static int compareVersionString(const char *s1, const char *s2);
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939 void extractProgramName(const char *path,
00940 bool strip_extension=true,
00941 bool normalize_path=true);
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953 static bool matchProgramName(const char *path, const char *expected,
00954 bool normalize_path=false);
00955
00956
00957
00958
00959 void normalizePath();
00960
00961
00962
00963
00964 static int itoa(char *str, int i);
00965 static int utoa(char *str, unsigned i);
00966
00967
00968 void itoa(int i);
00969 void utoa(int i);
00970
00971
00972
00973
00974 void itoa_pretty(int64 val);
00975
00976
00977
00978 void timeDeltaToPrettyString(fpreal time_ms);
00979
00980
00981
00982
00983 int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994 int forceValidVariableName(const char *safechars = NULL);
00995
00996
00997
00998 bool forceAlphaNumeric();
00999
01000
01001
01002
01003
01004 void getRelativePath(const char *src_fullpath,
01005 const char *dest_fullpath);
01006
01007
01008
01009
01010
01011
01012
01013
01014 static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
01015 const char *fullpath2, int len2);
01016
01017
01018
01019 bool isAbsolutePath(bool file_path=false) const;
01020
01021
01022
01023
01024
01025 bool collapseAbsolutePath(bool file_path=false);
01026
01027
01028
01029
01030
01031 bool truncateMiddle(int max_length);
01032
01033
01034
01035
01036
01037
01038
01039 static const UT_String &getEmptyString();
01040
01041
01042
01043
01044
01045 void extractModifiers(UT_String &modifiers);
01046
01047
01048
01049
01050
01051 bool applyModifiers(const UT_String &modifiers);
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061 bool applyNextModifier(const char *mod, bool all);
01062
01063 static bool isValidModifier(const char c, bool have_subst);
01064
01065
01066
01067
01068 static int findModifiers(const char *src);
01069
01070
01071
01072
01073
01074 UT_String removeRange ();
01075
01076
01077
01078 void formatByteSize(exint size, int digits=2);
01079
01080 private:
01081 template <typename OSTREAM>
01082 void saveInternal(OSTREAM &os, bool binary) const;
01083
01084 private:
01085 void freeData();
01086
01087
01088
01089
01090
01091 void doSmartCopyFrom(const char* other_string);
01092
01093 char *myData;
01094 bool myIsReference:1,
01095 myIsAlwaysDeep:1;
01096
01097
01098
01099
01100 friend UT_API ostream &operator<<(ostream &os, const UT_String &d);
01101 friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
01102 };
01103
01104 class UT_API UT_StringCshIO {
01105 public:
01106 UT_String myOut;
01107 UT_String myErr;
01108 UT_String myIn;
01109 short myDoubleOut;
01110 short myDoubleIn;
01111 };
01112
01113 UT_API ostream & do_setformat(ostream &os, const char fmt[]);
01114
01115 UT_SWAPPER_CLASS(UT_String);
01116
01117 #endif