core/vul/vul_expand_path.cxx
Go to the documentation of this file.
00001 // This is core/vul/vul_expand_path.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \author fsm
00008 
00009 #include "vul_expand_path.h"
00010 #include <vcl_vector.h>
00011 
00012 #if defined(VCL_WIN32) || defined(como4301)
00013 
00014 //:
00015 // \note This Windows version only performs some of the operations done by the Unix version.
00016 vcl_string vul_expand_path_internal(vcl_string path)
00017 {
00018   if (path == "/")
00019     return path; // FIXME: without this something breaks; not sure why.
00020 
00021   { // main processing and reduction goes here.
00022     vcl_vector<vcl_string> bits;
00023 
00024     // split the path into bits. a "bit" is either a single slash or a
00025     // sequence of non-slash characters.
00026     for (unsigned int i=0; i<path.size(); ) {
00027       if (path[i] == '/') {
00028         bits.push_back("/");
00029         ++i;
00030       }
00031       else {
00032         unsigned int j=i;
00033         while (j<path.size() && path[j]!='/')
00034           ++j;
00035         bits.push_back(vcl_string(path.c_str()+i, path.c_str()+j));
00036         i = j;
00037       }
00038     }
00039 
00040     // process the bits
00041     while (true)
00042     {
00043       bool again = false;
00044       for (unsigned int i=0; i<bits.size(); ++i)
00045       {
00046         // remove repeated / unless it is initial '//' as used in windows UNC names
00047         if (i>0 && i+1<bits.size() && bits[i] == "/" && bits[i+1] == "/") {
00048           bits.erase(bits.begin() + i);
00049           again = true;
00050         }
00051 
00052         // remove trailing /
00053         if (i+1 == bits.size() && bits[i] == "/") {
00054           bits.pop_back();
00055           again = true;
00056         }
00057 
00058         // collapse foo/.. into /
00059         if (i+2<bits.size() && !(bits[i]=="/") && bits[i+1]=="/" && bits[i+2]=="..") {
00060           bits.erase(bits.begin() + i+2); // ..
00061           bits.erase(bits.begin() + i);   // foo
00062           again = true;
00063         }
00064 
00065         // remove /. altogether
00066         if (i+1<bits.size() && bits[i]=="/" && bits[i+1]==".") {
00067           bits.erase(bits.begin() + i+1); // /
00068           bits.erase(bits.begin() + i);   // .
00069           again = true;
00070         }
00071       }
00072       if (!again)
00073         break;
00074     }
00075 
00076     // recompose the path from its bits
00077     path = "";
00078     for (unsigned int i=0; i<bits.size(); ++i)
00079       path += bits[i];
00080 #ifdef DEBUG
00081     vcl_cerr << "recomposed : " << path << '\n';
00082 #endif
00083   }
00084 
00085   // no more ideas
00086   return path;
00087 }
00088 
00089 //:
00090 // Note: this Windows version in similar to the uncached Unix version
00091 vcl_string vul_expand_path(vcl_string path)
00092 {
00093   return vul_expand_path_internal(path);
00094 }
00095 
00096 
00097 #if VXL_USE_WIN_WCHAR_T
00098 //:
00099 // \note This Windows version only performs some of the operations done by the Unix version.
00100 std::wstring vul_expand_path_internal(std::wstring path)
00101 {
00102   if (path == L"/")
00103     return path; // FIXME: without this something breaks; not sure why.
00104 
00105   { // main processing and reduction goes here.
00106     vcl_vector<std::wstring> bits;
00107 
00108     // split the path into bits. a "bit" is either a single slash or a
00109     // sequence of non-slash characters.
00110     for (unsigned int i=0; i<path.size(); ) {
00111       if (path[i] == L'/') {
00112         bits.push_back(L"/");
00113         ++i;
00114       }
00115       else {
00116         unsigned int j=i;
00117         while (j<path.size() && path[j]!=L'/')
00118           ++j;
00119         bits.push_back(std::wstring(path.c_str()+i, path.c_str()+j));
00120         i = j;
00121       }
00122     }
00123 
00124     // process the bits
00125     while (true)
00126     {
00127       bool again = false;
00128       for (unsigned int i=0; i<bits.size(); ++i)
00129       {
00130         // remove repeated / unless it is initial '//' as used in windows UNC names
00131         if (i>0 && i+1<bits.size() && bits[i] == L"/" && bits[i+1] == L"/") {
00132           bits.erase(bits.begin() + i);
00133           again = true;
00134         }
00135 
00136         // remove trailing /
00137         if (i+1 == bits.size() && bits[i] == L"/") {
00138           bits.pop_back();
00139           again = true;
00140         }
00141 
00142         // collapse foo/.. into /
00143         if (i+2<bits.size() && !(bits[i]==L"/") && bits[i+1]==L"/" && bits[i+2]==L"..") {
00144           bits.erase(bits.begin() + i+2); // ..
00145           bits.erase(bits.begin() + i);   // foo
00146           again = true;
00147         }
00148 
00149         // remove /. altogether
00150         if (i+1<bits.size() && bits[i]==L"/" && bits[i+1]==L".") {
00151           bits.erase(bits.begin() + i+1); // /
00152           bits.erase(bits.begin() + i);   // .
00153           again = true;
00154         }
00155       }
00156       if (!again)
00157         break;
00158     }
00159 
00160     // recompose the path from its bits
00161     path = L"";
00162     for (unsigned int i=0; i<bits.size(); ++i)
00163       path += bits[i];
00164 #ifdef DEBUG
00165     vcl_cerr << "recomposed : " << path << '\n';
00166 #endif
00167   }
00168 
00169   // no more ideas
00170   return path;
00171 }
00172 
00173 //:
00174 // Note: this Windows version in similar to the uncached Unix version
00175 std::wstring vul_expand_path(std::wstring path)
00176 {
00177   return vul_expand_path_internal(path);
00178 }
00179 
00180 #endif  //VXL_USE_WIN_WCHAR_T
00181 
00182 #else // #if defined(VCL_WIN32) || defined(como4301)
00183 
00184 #include <vcl_functional.h>
00185 #include <vcl_map.h>
00186 #include <vcl_cstdlib.h> // for getenv()
00187 #include <sys/types.h>
00188 #include <sys/stat.h>
00189 #include <dirent.h>
00190 #include <unistd.h>
00191 
00192 static
00193 vcl_string vul_expand_path_internal(vcl_string path)
00194 {
00195   if (path == "/")
00196     return path; // FIXME: without this something breaks; not sure why.
00197 
00198   // expand ~/ or just ~
00199   if ((path.size()>=2 && path[0] == '~' && path[1] == '/') || path == "~") {
00200     char const *HOME = vcl_getenv("HOME");
00201     if (! HOME) {
00202       // urgh!
00203       HOME = "/HOME";
00204     }
00205     path = vcl_string(HOME) + vcl_string(path.c_str() + 1);
00206   }
00207 
00208   // if the path doesn't begin with a / then it must be relative to the
00209   // current directory.
00210   if (path.size()>=1 && path[0] != '/')
00211     path = vcl_string("./") + path;
00212 
00213   // expand ./ or just .
00214   if ((path.size()>=2 && path[0] == '.' && path[1] == '/') || path == ".") {
00215     char cwd[4096];
00216     if( getcwd(cwd, sizeof cwd) == NULL ) {
00217       path = "<error: current working directory path > 4096 characters>";
00218     } else {
00219       path = vcl_string(cwd) + path.substr(1);
00220     }
00221   }
00222 
00223   { // main processing and reduction goes here.
00224     vcl_vector<vcl_string> bits;
00225 
00226     // split the path into bits. a "bit" is either a single slash or a
00227     // sequence of non-slash characters.
00228     for (unsigned int i=0; i<path.size(); ) {
00229       if (path[i] == '/') {
00230         bits.push_back("/");
00231         ++i;
00232       }
00233       else {
00234         unsigned int j=i;
00235         while (j<path.size() && path[j]!='/')
00236           ++j;
00237         bits.push_back(vcl_string(path.c_str()+i, path.c_str()+j));
00238         i = j;
00239       }
00240     }
00241 
00242     // process the bits
00243     while (true)
00244     {
00245       bool again = false;
00246       for (unsigned int i=0; i<bits.size(); ++i)
00247       {
00248         // remove repeated /
00249         if (i+1<bits.size() && bits[i] == "/" && bits[i+1] == "/") {
00250           bits.erase(bits.begin() + i);
00251           again = true;
00252         }
00253 
00254         // remove trailing /
00255         if (i+1 == bits.size() && bits[i] == "/") {
00256           bits.pop_back();
00257           again = true;
00258         }
00259 
00260         // collapse foo/.. into /
00261         if (i+2<bits.size() && !(bits[i]=="/") && bits[i+1]=="/" && bits[i+2]=="..") {
00262           bits.erase(bits.begin() + i+2); // ..
00263           bits.erase(bits.begin() + i);   // foo
00264           again = true;
00265         }
00266 
00267         // remove /. altogether
00268         if (i+1<bits.size() && bits[i]=="/" && bits[i+1]==".") {
00269           bits.erase(bits.begin() + i+1); // /
00270           bits.erase(bits.begin() + i);   // .
00271           again = true;
00272         }
00273       }
00274       if (!again)
00275         break;
00276     }
00277 
00278     // recompose the path from its bits
00279     path = "";
00280     for (unsigned int i=0; i<bits.size(); ++i)
00281       path += bits[i];
00282 #ifdef DEBUG
00283     vcl_cerr << "recomposed : " << path << '\n';
00284 #endif
00285   }
00286 
00287   // look for symbolic links to expand
00288   for (unsigned int i=1; i<=path.size(); ++i)
00289   {
00290     if (i==path.size() || path[i] == '/')
00291     {
00292       vcl_string sub(path.c_str(), path.c_str() + i);
00293       char buf[4096];
00294       int len = readlink(sub.c_str(), buf, sizeof buf);
00295       if (len != -1)
00296       {
00297         // it's a symlink. we should expand it and recurse.
00298 #ifdef DEBUG
00299         vcl_cerr << "before expansion : " << path << '\n';
00300 #endif
00301         if (buf[0] == '/') {
00302           // the target of the link starts with '/' so must be an
00303           // absolute path : ...foo/bar/etc... => buf/etc...
00304           path = vcl_string(buf, buf+len) + vcl_string(path.c_str() + i);
00305         }
00306         else
00307         {
00308           // the target is relative to the symlink's directory.
00309           int j=i-1;
00310           while (j>=0 && path[j] != '/')
00311             --j;
00312           if (j>=0) {
00313             // found another slash :   ...foo/bar/etc... where bar is the symlink.
00314             vcl_string a = vcl_string(path.c_str(), path.c_str()+j+1);
00315             vcl_string b = vcl_string(buf, buf+len);
00316             vcl_string c = vcl_string(path.c_str() + i, path.c_str() + path.size());
00317 #ifdef DEBUG
00318             vcl_cerr << "a = " << a << "\nb = " << b << "\nc = " << c << '\n';
00319 #endif
00320             path = a + b + c;
00321           }
00322           else {
00323             // gurgle. only one slash. must be : /bar/etc where bar is the symlink.
00324             path = vcl_string(buf, buf+len) + vcl_string(path.c_str() + i);
00325           }
00326         }
00327 
00328 #ifdef DEBUG
00329         vcl_cerr << "after expansion : " << path << '\n';
00330 #endif
00331         return vul_expand_path_internal(path);
00332       }
00333     }
00334   }
00335 
00336   // no more ideas
00337   return path;
00338 }
00339 
00340 typedef vcl_map<vcl_string, vcl_string, vcl_less<vcl_string> > map_t;
00341 
00342 vcl_string vul_expand_path(vcl_string path)
00343 {
00344   // create the cache.
00345   static map_t the_map;
00346 
00347   // look for the given path in the map.
00348   map_t::iterator i = the_map.find(path);
00349 
00350   if (i == the_map.end()) {
00351     // not in the map, so compute it :
00352     vcl_string mapped = vul_expand_path_internal(path);
00353     // cache it :
00354     i = the_map.insert(map_t::value_type(path, mapped)).first;
00355   }
00356 
00357   //
00358   return (*i).second;
00359 }
00360 
00361 vcl_string vul_expand_path_uncached(vcl_string path)
00362 {
00363   return vul_expand_path_internal(path);
00364 }
00365 
00366 #endif // VCL_WIN32