00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __UT_String_h__
00022 #define __UT_String_h__
00023
00024 #include "UT_API.h"
00025 #include <iostream.h>
00026 #include <string.h>
00027 #include <malloc.h>
00028 #include <ctype.h>
00029 #include <stddef.h>
00030 #include <string>
00031 #include "UT_Assert.h"
00032 #include "UT_Algorithm.h"
00033
00034 #ifdef WIN32
00035 #define strcasecmp stricmp
00036 #define strncasecmp strnicmp
00037 #endif
00038
00039 class UT_WorkBuffer;
00040 class UT_String;
00041 class UT_StringCshIO;
00042 class UT_WorkArgs;
00043 class UT_IStream;
00044 class ut_PatternRecord;
00045
00046
00047
00048
00049 UT_API extern void UTvarLookup(const char *name, UT_String &result);
00050 UT_API extern void UTexprLookup(const char *name, UT_String &result);
00051
00052 inline bool UTisstring(const char *s) { return s && *s; }
00053
00054
00055
00056
00057 inline bool UTisdigit(char c)
00058 { return isdigit((unsigned char) c); }
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 class UT_API UT_String
00075 {
00076 public:
00077
00078
00079
00080 enum UT_AlwaysDeepType { ALWAYS_DEEP };
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091 UT_String(const char *str = 0, int deepCopy = 0, int len = -1);
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105 explicit UT_String(const std::string &str)
00106 : myIsReference(false),
00107 myIsAlwaysDeep(false)
00108 { myData = strdup(str.c_str()); }
00109
00110
00111 UT_String(UT_AlwaysDeepType, const char *str = 0)
00112 : myIsReference(false),
00113 myIsAlwaysDeep(true)
00114 { myData = str ? strdup(str) : 0; }
00115
00116
00117
00118 UT_String(UT_AlwaysDeepType, const std::string &str)
00119 : myIsReference(false),
00120 myIsAlwaysDeep(true)
00121 { myData = strdup(str.c_str()); }
00122
00123
00124
00125
00126
00127
00128 UT_String(const UT_String &str);
00129
00130 ~UT_String();
00131
00132
00133 void setAlwaysDeep(bool deep)
00134 {
00135 myIsAlwaysDeep = deep;
00136 if (deep && myIsReference)
00137 {
00138 if (myData != NULL)
00139 harden();
00140 else
00141 {
00142
00143
00144
00145 myIsReference = false;
00146 }
00147 }
00148 }
00149 bool isAlwaysDeep() const
00150 {
00151 return myIsAlwaysDeep;
00152 }
00153
00154 void swap( UT_String &other );
00155
00156
00157
00158 void harden();
00159 void harden(const char *s, int len = -1);
00160 void hardenIfNeeded();
00161 void hardenIfNeeded(const char *s);
00162
00163
00164
00165 bool isHard() const { return !myIsReference; }
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176 char * steal(void)
00177 {
00178 if (!myIsAlwaysDeep)
00179 {
00180 if (myIsReference)
00181 myData = strdup(myData ? myData : "");
00182 myIsReference = true;
00183 return myData;
00184 }
00185 else
00186 {
00187
00188
00189 return strdup(myData ? myData : "");
00190 }
00191 }
00192
00193
00194
00195
00196
00197
00198 void adopt(char *s)
00199 {
00200 if (!myIsReference)
00201 {
00202 if (s != myData)
00203 free(myData);
00204 }
00205 myData = s;
00206 myIsReference = false;
00207 }
00208 void adopt(UT_String &str)
00209 {
00210 adopt(str.steal());
00211 }
00212
00213
00214
00215 void saveBinary(ostream &os) const { save(os, 1); }
00216
00217
00218
00219 void saveAscii(ostream &os) const { save(os, 0); }
00220
00221
00222 void save(ostream &os, int binary) const;
00223
00224
00225 bool load(UT_IStream &is);
00226
00227
00228 void clear()
00229 { *this = (const char *)NULL; }
00230
00231
00232
00233 void prepend(const char *prefix);
00234 void prepend(char ch);
00235
00236
00237
00238 void append(char ch);
00239
00240 UT_String &operator=(const UT_String &str);
00241 UT_String &operator=(const char *str);
00242 UT_String &operator=(const std::string &str);
00243 UT_String &operator+=(const char *str)
00244 {
00245 if (!isstring())
00246 {
00247
00248
00249
00250 harden(str);
00251 }
00252 else
00253 {
00254 bool same = (str == myData);
00255 harden();
00256 if (str)
00257 {
00258 int mylen = (int)strlen(myData);
00259 myData = (char *)realloc(myData,
00260 mylen+strlen(str)+1);
00261 if (!same)
00262 {
00263 strcpy(&myData[mylen], str);
00264 }
00265 else
00266 {
00267 memcpy(myData + mylen, myData, mylen);
00268 myData[mylen * 2] = '\0';
00269 }
00270 }
00271 }
00272 return *this;
00273 }
00274
00275 UT_String &operator+=(const UT_String &str)
00276 {
00277 *this += (const char *)str.myData;
00278 return *this;
00279 }
00280 unsigned operator==(const char *str) const
00281 {
00282 if (!myData || !str)
00283 return (!myData && !str) ? 1 : 0;
00284 return (strcmp(str, myData) == 0) ? 1 : 0;
00285 }
00286 unsigned operator==(const UT_String &str) const
00287 {
00288 if (!myData || !str.myData)
00289 return (!myData && !str.myData) ? 1 : 0;
00290 return (strcmp(str.myData, myData) == 0) ? 1 : 0;
00291 }
00292 unsigned operator!=(const char *str) const
00293 {
00294 unsigned rval;
00295 if (!myData || !str)
00296 {
00297 rval = (!myData && !str) ? 0U : 1U;
00298 }
00299 else rval = (strcmp(str, myData) != 0) ? 1U : 0U;
00300 return rval;
00301 }
00302 unsigned operator!=(const UT_String &str) const
00303 {
00304 return *this != (const char *)str.myData;
00305 }
00306 unsigned operator<(const char *str) const
00307 {
00308 unsigned rval;
00309 if (!myData)
00310 {
00311 if (str)
00312 rval = 1;
00313 else
00314 rval = 0;
00315 }
00316 else
00317 {
00318 if (!str)
00319 rval = 0;
00320 else
00321 rval = (strcmp(myData, str) < 0) ? 1U : 0U;
00322 }
00323 return rval;
00324 }
00325 unsigned operator<(const UT_String &str) const
00326 {
00327 return *this < (const char *)str.myData;
00328 }
00329
00330 unsigned operator<=(const char *str) const
00331 {
00332 unsigned rval;
00333 if (!myData)
00334 rval = 1;
00335 else
00336 {
00337 if (!str)
00338 rval = 0;
00339 else
00340 rval = (strcmp(myData, str) > 0) ? 0U : 1U;
00341 }
00342 return rval;
00343 }
00344 unsigned operator<=(const UT_String &str) const
00345 {
00346 return *this <= (const char *)str.myData;
00347 }
00348 unsigned operator>(const char *str) const
00349 {
00350 unsigned rval;
00351 if (!myData)
00352 rval = 0;
00353 else
00354 {
00355 if (!str)
00356 rval = 1;
00357 else
00358 rval = (strcmp(myData, str) > 0) ? 1U : 0U;
00359 }
00360 return rval;
00361 }
00362 unsigned operator>(const UT_String &str) const
00363 {
00364 return *this > (const char *)str.myData;
00365 }
00366 unsigned operator>=(const char *str) const
00367 {
00368 unsigned rval;
00369 if (!myData)
00370 {
00371 if (str)
00372 rval = 0;
00373 else
00374 rval = 1;
00375 }
00376 else
00377 {
00378 if (!str)
00379 rval = 1;
00380 else
00381 rval = (strcmp(myData, str) < 0) ? 0U : 1U;
00382 }
00383 return rval;
00384 }
00385 unsigned operator>=(const UT_String &str) const
00386 {
00387 return *this >= (const char *)str.myData;
00388 }
00389
00390 #ifdef METROWERKS_BUILD
00391 operator char *() const
00392 { return myData; }
00393 #else
00394 operator const char *() const
00395 { return (const char *)myData; }
00396 operator char *()
00397 { return myData; }
00398 #endif
00399 const char *buffer() const
00400 { return myData; }
00401 const char *nonNullBuffer() const
00402 { return myData ? myData : ""; }
00403 char operator()(unsigned i) const
00404 {
00405 UT_ASSERT_P( isstring() );
00406 UT_ASSERT_SLOW(i <= strlen(myData));
00407 return myData[i];
00408 }
00409 char &operator()(unsigned i)
00410 {
00411 harden();
00412 return myData[i];
00413 }
00414 int toInt() const;
00415 fpreal toFloat() const;
00416
00417
00418
00419
00420 std::string toStdString() const;
00421
00422
00423
00424
00425
00426
00427 unsigned length(void) const
00428 { return (myData) ? (unsigned)strlen(myData) : 0; }
00429
00430
00431 int64 getMemoryUsage() const
00432 {
00433 return
00434 sizeof(*this)
00435 + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
00436 }
00437
00438
00439 char *findChar(int c) const
00440 { return (myData) ? strchr(myData, c) : 0; }
00441
00442
00443
00444 char *findChar(const char *str) const
00445 { return (myData) ? strpbrk(myData, str) : NULL; }
00446 char *findChar(const UT_String &str) const
00447 { return findChar((const char *) str); }
00448
00449
00450
00451 char *lastChar(int c) const
00452 { return myData ? strrchr(myData, c):0; }
00453
00454
00455 int countChar(int c) const;
00456
00457 char *findNonSpace() const;
00458 const char *findWord(const char *word) const;
00459 bool findString(const char *str, bool fullword,
00460 bool usewildcards) const;
00461 int changeWord(const char *from, const char *to, int all=1);
00462 int changeString(const char *from, const char *to, bool fullword);
00463 int changeQuotedWord(const char *from, const char *to,
00464 int quote = '`', int all = 1);
00465
00466 int findLongestCommonSuffix( const char *with );
00467
00468
00469
00470
00471
00472
00473 int substr(UT_String &buf, int index, int len=0) const;
00474
00475
00476 unsigned isFloat(int skip_spaces = 0, int loose = 0) const;
00477
00478 unsigned isInteger(int skip_spaces = 0) const;
00479
00480 void toUpper()
00481 {
00482 char *ptr;
00483 harden();
00484 for (ptr=myData; *ptr; ptr++)
00485 *ptr = (char)toupper(*ptr);
00486 }
00487 void toLower()
00488 {
00489 char *ptr;
00490 harden();
00491 for (ptr=myData; *ptr; ptr++)
00492 *ptr = (char)tolower(*ptr);
00493 }
00494
00495
00496
00497
00498
00499
00500
00501 const char *fileName() const
00502 {
00503 const char *fname;
00504
00505 if (!myData) return 0;
00506
00507 fname = strrchr(myData, '/');
00508 if (!fname)
00509 fname = myData;
00510 else
00511 fname++;
00512 return fname;
00513 }
00514
00515 const char *fileExtension( ) const
00516 {
00517 const char * slash;
00518 const char * dot;
00519
00520 if( !isstring() )
00521 return 0;
00522
00523 dot = lastChar( '.' );
00524 slash = lastChar( '/' );
00525
00526 return (slash < dot) ? dot : 0;
00527 }
00528
00529
00530
00531 UT_String pathUpToExtension() const;
00532
00533
00534
00535
00536
00537
00538
00539 void splitPath(UT_String &dir_name, UT_String &file_name) const;
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549 int parseNumberedFilename(UT_String &prefix,
00550 UT_String &frame,
00551 UT_String &suff,
00552 bool negative = true,
00553 bool fractional = false) const;
00554
00555 int isstring() const
00556 { return (myData && *myData); }
00557
00558
00559
00560
00561 int trimSpace(bool leaveSingleSpaceBetweenWords = false);
00562
00563
00564
00565 int trimBoundingSpace();
00566
00567
00568
00569
00570 int strip(const char *chars);
00571
00572
00573
00574 void protectString();
00575
00576
00577
00578 bool isQuotedString(char delimiter='\'') const;
00579
00580
00581
00582
00583
00584 UT_String makeQuotedString(char delimiter='\'') const;
00585
00586
00587
00588
00589 UT_String makeSmartQuotedString(char default_delimiter='\'') const;
00590
00591 void expandControlSequences();
00592
00593 bool hasWhiteSpace() const;
00594
00595 void removeTrailingSpace();
00596
00597 void removeTrailingDigits();
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609 int cshParse(char *argv[], int maxArgs,
00610 void (*vlookup)(const char *, UT_String&)=UTvarLookup,
00611 void (*elookup)(const char *, UT_String&)=UTexprLookup,
00612 int *error = 0,
00613 UT_StringCshIO *io=0);
00614
00615 int cshParse(UT_WorkArgs &argv,
00616 void (*vlookup)(const char *, UT_String&)=UTvarLookup,
00617 void (*elookup)(const char *, UT_String&)=UTexprLookup,
00618 int *error = 0,
00619 UT_StringCshIO *io=0);
00620
00621
00622
00623
00624
00625
00626
00627 int dosParse(UT_WorkArgs &argv);
00628 int dosParse(char *argv[], int maxArgs);
00629
00630
00631
00632
00633
00634
00635 int parse(char *argv[], int maxArgs)
00636 {
00637 harden();
00638 return parseInPlace(argv, maxArgs);
00639 }
00640
00641
00642 int parseInPlace(char *argv[], int maxArgs);
00643 int parse(UT_WorkArgs &argv)
00644 {
00645 harden();
00646 return parseInPlace(argv);
00647 }
00648 int parseInPlace(UT_WorkArgs &argv);
00649
00650 int tokenize(char *argv[], int maxArgs, char separator)
00651 {
00652 harden();
00653 return tokenizeInPlace(argv, maxArgs, separator);
00654 }
00655 int tokenizeInPlace(char *argv[], int maxArgs, char separator);
00656 int tokenize(UT_WorkArgs &argv, char separator)
00657 {
00658 harden();
00659 return tokenizeInPlace(argv, separator);
00660 }
00661 int tokenizeInPlace(UT_WorkArgs &argv, char separator);
00662 int tokenize(char *argv[], int maxArgs,
00663 const char *separators = " \t\n")
00664 {
00665 harden();
00666 return tokenizeInPlace(argv, maxArgs, separators);
00667 }
00668 int tokenizeInPlace(char *argv[], int maxArgs,
00669 const char *separators = " \t\n");
00670 int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
00671 {
00672 harden();
00673 return tokenizeInPlace(argv, separators);
00674 }
00675 int tokenizeInPlace(UT_WorkArgs &argv,
00676 const char *separators = " \t\n");
00677
00678
00679 void expandVariables();
00680
00681
00682 inline unsigned hash() const
00683 {
00684 return hash(myData);
00685 }
00686
00687 static inline unsigned hash(const char *str, unsigned code = 0)
00688 {
00689 char c;
00690
00691
00692
00693
00694
00695
00696
00697
00698 if (str)
00699 {
00700 for (; c = *str; str++)
00701 {
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711 code = 37 * code + c;
00712 }
00713 }
00714 return code;
00715 }
00716
00717
00718
00719
00720
00721
00722 unsigned match(const char *pattern, int caseSensitive=1) const;
00723
00724
00725
00726
00727 unsigned multiMatch(const char *pattern,
00728 int caseSensitive, char separator) const;
00729 unsigned multiMatch(const char *pattern, int caseSensitive = 1,
00730 const char *separators = ", ",
00731 bool *explicitlyExcluded = 0,
00732 int *matchIndex = 0,
00733 ut_PatternRecord *pattern_record=NULL) const;
00734
00735
00736
00737 unsigned multiMatchRecord(const char *pattern, int maxpatterns,
00738 char *singles, int &nsingles,
00739 char **words, int &nwords,
00740 int case_sensitive = 1,
00741 const char *separators = ", ") const;
00742
00743
00744
00745
00746
00747
00748
00749 bool matchPattern(const UT_WorkArgs &pattern_args,
00750 bool assume_match=false);
00751
00752 static int multiMatchCheck(const char *pattern);
00753 static int wildcardMatchCheck(const char *pattern);
00754
00755
00756 unsigned contains(const char *pattern, int caseSensitive=1) const;
00757
00758 bool startsWith(const char *prefix) const;
00759
00760 bool endsWith(const char *suffix) const;
00761 bool endsWith(const char *suffix, bool case_sensitive) const;
00762
00763
00764
00765
00766
00767
00768
00769
00770 int traversePattern(int max, void *data,
00771 int (*func)(int num, int sec, void *data),
00772 unsigned int (*secfunc)(int num,void *data)=0,
00773 int offset=0) const;
00774
00775
00776 const char *fcontain(const char *pattern) const
00777 { return (myData) ? strstr(myData, pattern) : 0; }
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788 bool patternRename(const char *match_pattern, const char *replace);
00789
00790
00791
00792
00793
00794
00795 UT_String *base(void) const;
00796 const char *suffix(void) const;
00797
00798
00799
00800
00801 void incrementNumberedName();
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814 static ostream &setFormat(ostream &os, const char *fmt);
00815 ostream &setFormat(ostream &os);
00816
00817 int replacePrefix(const char *oldpref,
00818 const char *newpref);
00819 int replaceSuffix(const char *oldsuffix,
00820 const char *newsuffix);
00821
00822
00823
00824
00825
00826
00827 int expandArrays(char *names[], int max);
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837 int format(int cols);
00838
00839
00840
00841 int substitute( const char *find, const char *replacement,
00842 bool all = true );
00843
00844
00845
00846 int replace( int pos, int len, const char *str );
00847
00848
00849 int insert(int pos, const char *str)
00850 { return replace(pos, 0, str); }
00851
00852
00853
00854
00855 static int compareNumberedString(const char *s1, const char *s2,
00856 bool case_sensitive=true);
00857 static int qsortCmpNumberedString(const void *v1, const void *v2);
00858 static int qsortCmpNumberedString(char *const*v1, char *const*v2);
00859
00860
00861
00862 static int compareNumberedFilename(const char *s1, const char *s2,
00863 bool case_sensitive=false);
00864 static int qsortCmpNumberedFilename(const void *v1, const void *v2);
00865 static int qsortCmpNumberedFilename(char *const*v1, char *const*v2);
00866
00867
00868
00869
00870 static int itoa(char *str, int i);
00871 static int utoa(char *str, unsigned i);
00872
00873
00874 void itoa(int i);
00875 void utoa(int i);
00876
00877
00878
00879
00880 void itoa_pretty(int64 val);
00881
00882
00883
00884
00885 int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896 int forceValidVariableName(const char *safechars = NULL);
00897
00898
00899
00900 bool forceAlphaNumeric();
00901
00902
00903
00904
00905
00906 void getRelativePath(const char *src_fullpath,
00907 const char *dest_fullpath);
00908
00909
00910
00911
00912
00913
00914
00915
00916 static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
00917 const char *fullpath2, int len2);
00918
00919
00920
00921 bool isAbsolutePath(bool file_path=false) const;
00922
00923
00924
00925
00926
00927 bool collapseAbsolutePath(bool file_path=false);
00928
00929
00930
00931
00932
00933 bool truncateMiddle(int max_length);
00934
00935 static const UT_String &getEmptyString();
00936
00937
00938
00939
00940
00941 void extractModifiers(UT_String &modifiers);
00942
00943
00944
00945
00946
00947 bool applyModifiers(const UT_String &modifiers);
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957 bool applyNextModifier(const char *mod, bool all);
00958
00959 static bool isValidModifier(const char c, bool have_subst);
00960
00961
00962
00963
00964 static int findModifiers(const char *src);
00965
00966
00967
00968
00969
00970 UT_String removeRange ();
00971
00972 private:
00973 void freeData();
00974 char *myData;
00975 bool myIsReference:1,
00976 myIsAlwaysDeep:1;
00977
00978
00979
00980
00981 friend UT_API ostream &operator<<(ostream &os, const UT_String &d);
00982 };
00983
00984 class UT_API UT_StringCshIO {
00985 public:
00986 UT_String myOut;
00987 UT_String myErr;
00988 UT_String myIn;
00989 short myDoubleOut;
00990 short myDoubleIn;
00991 };
00992
00993 UT_API ostream & do_setformat(ostream &os, const char fmt[]);
00994
00995 UT_SWAPPER_CLASS(UT_String);
00996
00997 #endif