00001
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "vul_url.h"
00015 #include <vcl_cstdio.h>
00016 #include <vcl_cstring.h>
00017 #include <vcl_cstdlib.h>
00018 #include <vcl_sstream.h>
00019 #include <vcl_cassert.h>
00020 #include <vcl_fstream.h>
00021 #include <vul/vul_file.h>
00022
00023 #if defined(unix) || defined(__unix) || defined(__unix__)
00024
00025 # include <unistd.h>
00026 # include <netdb.h>
00027 # include <sys/socket.h>
00028 # include <netinet/in.h>
00029 # ifdef __alpha
00030 # include <fp.h>
00031 # endif
00032 # define SOCKET int
00033
00034 #elif defined (VCL_WIN32) && !defined(__CYGWIN__)
00035
00036 # include <winsock2.h>
00037
00038 #endif // unix
00039
00040 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00041
00042 static int called_WSAStartup = 0;
00043 #endif
00044
00045
00046 vcl_istream * vul_http_open(char const *url)
00047 {
00048
00049 vcl_string host;
00050 vcl_string path;
00051 vcl_string auth;
00052 int port = 80;
00053
00054
00055 assert (vcl_strncmp(url, "http://", 7) == 0);
00056
00057 char const *p = url + 7;
00058 while (*p && *p!='/')
00059 ++ p;
00060 host = vcl_string(url+7, p);
00061
00062
00063 if (*p)
00064 path = p+1;
00065 else
00066 path = "";
00067
00068
00069 for (unsigned int i=0; i<host.size(); ++i)
00070 if (host[i] == '@') {
00071 auth = vcl_string(host.c_str(), host.c_str()+i);
00072 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00073 break;
00074 }
00075
00076
00077 if (host.size() > 0L)
00078 for (unsigned int i=(unsigned int)(host.size()-1); i>0; --i)
00079 if (host[i] == ':') {
00080 port = vcl_atoi(host.c_str() + i + 1);
00081 host = vcl_string(host.c_str(), host.c_str() + i);
00082 break;
00083 }
00084
00085
00086 unsigned k =0;
00087 while (k < path.size())
00088 {
00089 if (path[k] == ' ')
00090 path.replace(k, 1, "%20");
00091 else if (path[k] == '%')
00092 path.replace(k, 1, "%25");
00093 ++k;
00094 }
00095
00096
00097 #ifdef DEBUG
00098 vcl_cerr << "auth = \'" << auth << "\'\n"
00099 << "host = \'" << host << "\'\n"
00100 << "path = \'" << path << "\'\n"
00101 << "port = " << port << vcl_endl;
00102 #endif
00103
00104 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00105 if (called_WSAStartup==0)
00106 {
00107 WORD wVersionRequested;
00108 WSADATA wsaData;
00109
00110 wVersionRequested = MAKEWORD( 2, 2 );
00111
00112 WSAStartup( wVersionRequested, &wsaData );
00113 }
00114 #endif
00115
00116
00117 SOCKET tcp_socket = socket(PF_INET,
00118 SOCK_STREAM,
00119
00120 PF_UNSPEC);
00121 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00122 if (tcp_socket == INVALID_SOCKET) {
00123 # ifndef NDEBUG
00124 vcl_cerr << __FILE__ "error code : " << WSAGetLastError() << '\n';
00125 # endif
00126 #else
00127 if (tcp_socket < 0) {
00128 #endif
00129 vcl_cerr << __FILE__ ": failed to create socket.\n";
00130 return 0;
00131 }
00132
00133 #ifdef DEBUG
00134 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << '\n';
00135 #endif
00136
00137
00138 hostent *hp = gethostbyname(host.c_str());
00139 if (! hp) {
00140 vcl_cerr << __FILE__ ": failed to lookup host\n";
00141
00142 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00143 closesocket(tcp_socket);
00144 #else
00145 close(tcp_socket);
00146 #endif
00147
00148 return 0;
00149 }
00150
00151
00152 sockaddr_in my_addr;
00153 my_addr.sin_family = AF_INET;
00154
00155 my_addr.sin_port = htons(port);
00156 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00157
00158
00159 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0) {
00160 vcl_cerr << __FILE__ ": failed to connect to host\n";
00161
00162
00163 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00164 closesocket(tcp_socket);
00165 #else
00166 close(tcp_socket);
00167 #endif
00168
00169 return 0;
00170 }
00171
00172
00173 char buffer[4096];
00174
00175
00176 vcl_snprintf(buffer, 4090-vcl_strlen(buffer),
00177 "GET %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00178 url, host.c_str());
00179
00180 if (auth != "")
00181 vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer),
00182 "Authorization: Basic %s\r\n",
00183 vul_url::encode_base64(auth).c_str());
00184
00185 if (vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer), "\r\n") < 0)
00186 {
00187 vcl_cerr << "ERROR: vul_http_open buffer overflow.";
00188 vcl_abort();
00189 }
00190
00191 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00192 if (send(tcp_socket, buffer, (int)vcl_strlen(buffer), 0) < 0) {
00193 #else
00194 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00195 #endif
00196 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00197
00198 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00199 closesocket(tcp_socket);
00200 #else
00201 close(tcp_socket);
00202 #endif
00203 return 0;
00204 }
00205
00206
00207
00208 vcl_string contents;
00209 {
00210 int n;
00211 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00212 while ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00213 #else
00214 while ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00215 #endif
00216 contents.append(buffer, n);
00217 #ifdef DEBUG
00218 vcl_cerr << n << " bytes\n";
00219 #endif
00220 }
00221 }
00222
00223
00224 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00225 closesocket(tcp_socket);
00226 #else
00227 close(tcp_socket);
00228 #endif
00229
00230 #ifdef DEBUG
00231 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00232 #endif
00233
00234 if (contents.find("HTTP/1.1 200") == contents.npos)
00235 {
00236 return 0;
00237 }
00238 vcl_string::size_type n = contents.find("\r\n\r\n");
00239 if (n == contents.npos)
00240 {
00241 return 0;
00242 }
00243
00244 contents.erase(0,n+4);
00245 #ifdef DEBUG
00246 vcl_cerr << "vul_url::vul_http_open() returns:\n" << contents << '\n';
00247 #endif
00248 return new vcl_istringstream(contents);
00249 }
00250
00251
00252
00253 bool vul_http_exists(char const *url)
00254 {
00255
00256 vcl_string host;
00257 vcl_string path;
00258 vcl_string auth;
00259 int port = 80;
00260 assert (vcl_strncmp(url, "http://", 7) == 0);
00261
00262 char const *p = url + 7;
00263 while (*p && *p!='/')
00264 ++ p;
00265 host = vcl_string(url+7, p);
00266
00267
00268 if (*p)
00269 path = p+1;
00270 else
00271 path = "";
00272
00273
00274 for (unsigned int i=0; i<host.size(); ++i)
00275 if (host[i] == '@') {
00276 auth = vcl_string(host.c_str(), host.c_str()+i);
00277 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00278 break;
00279 }
00280
00281
00282 for (unsigned int i=0; i<host.size(); ++i)
00283 if (host[i] == ':') {
00284 port = vcl_atoi(host.c_str() + i + 1);
00285 host = vcl_string(host.c_str(), host.c_str() + i);
00286 break;
00287 }
00288
00289
00290 unsigned k =0;
00291 while (k < path.size())
00292 {
00293 if (path[k] == ' ')
00294 path.replace(k, 1, "%20");
00295 else if (path[k] == '%')
00296 path.replace(k, 1, "%25");
00297 k++;
00298 }
00299
00300
00301 #ifdef DEBUG
00302 vcl_cerr << "auth = \'" << auth << "\'\n"
00303 << "host = \'" << host << "\'\n"
00304 << "path = \'" << path << "\'\n"
00305 << "port = " << port << vcl_endl;
00306 #endif
00307
00308 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00309 if (called_WSAStartup==0)
00310 {
00311 WORD wVersionRequested;
00312 WSADATA wsaData;
00313
00314 wVersionRequested = MAKEWORD( 2, 2 );
00315
00316 WSAStartup( wVersionRequested, &wsaData );
00317 }
00318 #endif
00319
00320
00321 SOCKET tcp_socket = socket(PF_INET,
00322 SOCK_STREAM,
00323
00324 PF_UNSPEC);
00325
00326 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00327 if (tcp_socket == INVALID_SOCKET) {
00328 # ifndef NDEBUG
00329 vcl_cerr << "error code : " << WSAGetLastError() << vcl_endl;
00330 # endif
00331 #else
00332 if (tcp_socket < 0) {
00333 #endif
00334 vcl_cerr << __FILE__ ": failed to create socket.\n";
00335 return false;
00336 }
00337
00338 #ifdef DEBUG
00339 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << vcl_endl;
00340 #endif
00341
00342
00343 hostent *hp = gethostbyname(host.c_str());
00344 if (! hp) {
00345 vcl_cerr << __FILE__ ": failed to lookup host\n";
00346 return false;
00347 }
00348
00349
00350 sockaddr_in my_addr;
00351 my_addr.sin_family = AF_INET;
00352
00353 my_addr.sin_port = htons(port);
00354 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00355
00356
00357 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0)
00358 {
00359 vcl_cerr << __FILE__ ": failed to connect to host\n";
00360
00361 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00362 closesocket(tcp_socket);
00363 #else
00364 close(tcp_socket);
00365 #endif
00366
00367 return false;
00368 }
00369
00370
00371 char buffer[4096];
00372
00373
00374 vcl_snprintf(buffer, 4090,
00375 "HEAD %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00376 url, host.c_str());
00377 if (auth != "")
00378 vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer),
00379 "Authorization: Basic %s\r\n",
00380 vul_url::encode_base64(auth).c_str() );
00381
00382 if (vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer), "\r\n") < 0)
00383 {
00384 vcl_cerr << "ERROR: vul_http_exists buffer overflow.";
00385 vcl_abort();
00386 }
00387
00388 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00389 if (send(tcp_socket, buffer, (int)vcl_strlen(buffer), 0) < 0) {
00390 #else
00391 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00392 #endif
00393 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00394
00395 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00396 closesocket(tcp_socket);
00397 #else
00398 close(tcp_socket);
00399 #endif
00400 return false;
00401 }
00402
00403
00404
00405 vcl_string contents;
00406 {
00407 int n;
00408 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00409 if ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00410 #else
00411 if ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00412 #endif
00413 contents.append(buffer, n);
00414
00415 }
00416 else
00417 {
00418 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00419 closesocket(tcp_socket);
00420 #else
00421 close(tcp_socket);
00422 #endif
00423 return false;
00424 }
00425 }
00426
00427
00428 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00429 closesocket(tcp_socket);
00430 #else
00431 close(tcp_socket);
00432 #endif
00433
00434 #ifdef DEBUG
00435 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00436 #endif
00437
00438 return contents.find("HTTP/1.1 200") != contents.npos;
00439 }
00440
00441
00442 vcl_istream * vul_url::open(const char * url, vcl_ios_openmode mode)
00443 {
00444
00445 if (!url || !*url)
00446 return 0;
00447 unsigned int l = (unsigned int)vcl_strlen(url);
00448
00449
00450 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00451 return new vcl_ifstream(url+7,mode);
00452
00453
00454 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00455 return vul_http_open(url);
00456
00457
00458 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00459 {
00460 vcl_cerr << __LINE__ << "ERROR:\n vul_read_url(const char * url)\n"
00461 "Doesn't support FTP yet, url=" << url << vcl_endl;
00462 return 0;
00463 }
00464
00465
00466 return new vcl_ifstream(url, mode);
00467 }
00468
00469
00470
00471 bool vul_url::exists(const char * url)
00472 {
00473
00474 if (!url || !*url)
00475 return false;
00476 unsigned int l = (unsigned int)vcl_strlen(url);
00477
00478
00479 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00480 return vul_file::exists(url+7);
00481
00482
00483 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00484 return vul_http_exists(url);
00485
00486
00487 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00488 {
00489 vcl_cerr << "ERROR: vul_read_url(const char * url)\n"
00490 "Doesn't support FTP yet, url=" << url << vcl_endl;
00491 return false;
00492 }
00493
00494
00495 return vul_file::exists(url);
00496 }
00497
00498
00499 bool vul_url::is_url(const char * url)
00500 {
00501
00502 if (!url || !*url)
00503 return false;
00504 unsigned int l = (unsigned int)vcl_strlen(url);
00505
00506
00507 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00508 return true;
00509
00510
00511 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00512 return true;
00513
00514
00515 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00516 return true;
00517
00518 return false;
00519 }
00520
00521
00522
00523 bool vul_url::is_file(const char * fn)
00524 {
00525 if (vul_url::is_url(fn))
00526 return vul_url::exists(fn);
00527 else
00528 return vul_file::exists(fn) && ! vul_file::is_directory(fn);
00529 }
00530
00531
00532
00533 static const
00534 char base64_encoding[]=
00535 {
00536 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
00537 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
00538 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
00539 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
00540 };
00541
00542 static char out_buf[4];
00543
00544 static const char * encode_triplet(char data[3], unsigned int n)
00545 {
00546 assert (n>0 && n <4);
00547 out_buf[0] = base64_encoding[(data[0] & 0xFC) >> 2];
00548 out_buf[1] = base64_encoding[
00549 ((data[0] & 0x3) << 4) + ((data[1] & 0xf0)>>4)];
00550
00551 if (n==1)
00552 {
00553 out_buf[2] = out_buf[3] = '=';
00554 return out_buf;
00555 }
00556
00557 out_buf[2] = base64_encoding[
00558 ((data[1] & 0xf) << 2) + ((data[2] & 0xc0)>>6)];
00559
00560 if (n==2)
00561 {
00562 out_buf[3] = '=';
00563 return out_buf;
00564 }
00565
00566 out_buf[3] = base64_encoding[ (data[2] & 0x3f) ];
00567 return out_buf;
00568 }
00569
00570
00571
00572 vcl_string vul_url::encode_base64(const vcl_string& in)
00573 {
00574 vcl_string out;
00575 unsigned int i = 0, line_octets = 0;
00576 const unsigned int l = (unsigned int)(in.size());
00577 char data[3];
00578 while (i <= l)
00579 {
00580 if (i == l)
00581 {
00582 out.append("=");
00583 return out;
00584 }
00585
00586 data[0] = in[i++];
00587 data[1] = data[2] = 0;
00588
00589 if (i == l)
00590 {
00591 out.append(encode_triplet(data,1),4);
00592 return out;
00593 }
00594
00595 data[1] = in[i++];
00596
00597 if (i == l)
00598 {
00599 out.append(encode_triplet(data,2),4);
00600 return out;
00601 }
00602
00603 data[2] = in[i++];
00604
00605 out.append(encode_triplet(data,3),4);
00606
00607 if (line_octets >= 68/4)
00608 {
00609 out.append("\r\n",2);
00610 line_octets = 0;
00611 }
00612 else
00613 ++line_octets;
00614 }
00615
00616 return out;
00617 }
00618
00619
00620
00621 static int get_next_char(const vcl_string &in, unsigned int *i)
00622 {
00623 while (*i < in.size())
00624 {
00625 char c;
00626 c = in[(*i)++];
00627
00628 if (c == '+')
00629 return 62;
00630
00631 if (c == '/')
00632 return 63;
00633
00634 if (c >= 'A' && c <= 'Z')
00635 return 0 + (int)c - (int)'A';
00636
00637 if (c >= 'a' && c <= 'z')
00638 return 26 + (int)c - (int)'a';
00639
00640 if (c >= '0' && c <= '9')
00641 return 52 + (int)c - (int)'0';
00642
00643 if (c == '=')
00644 return 64;
00645 }
00646 return -1;
00647 }
00648
00649
00650
00651 vcl_string vul_url::decode_base64(const vcl_string& in)
00652 {
00653 int c;
00654 char data[3];
00655
00656 unsigned int i=0;
00657 const unsigned int l = (unsigned int)(in.size());
00658 vcl_string out;
00659 while (i < l)
00660 {
00661 data[0] = data[1] = data[2] = 0;
00662
00663
00664
00665 c = get_next_char(in , &i);
00666
00667
00668 if (c == 64)
00669 return out;
00670 if (c==-1)
00671 return "";
00672
00673 data[0] = char(((c & 0x3f) << 2) | (0x3 & data[0]));
00674
00675
00676
00677 c = get_next_char(in , &i);
00678
00679
00680 if (c == 64 || c==-1)
00681 return "";
00682
00683 data[0] = char(((c & 0x30) >> 4) | (0xfc & data[0]));
00684 data[1] = char(((c & 0x0f) << 4) | (0x0f & data[1]));
00685
00686
00687
00688
00689 c = get_next_char(in , &i);
00690
00691 if (c==-1)
00692 return "";
00693 if (c == 64)
00694 {
00695
00696 out.append(data,1);
00697 return out;
00698 }
00699
00700 data[1] = char(((c & 0x3c) >> 2) | (0xf0 & data[1]));
00701 data[2] = char(((c & 0x03) << 6) | (0x3f & data[2]));
00702
00703
00704
00705 c = get_next_char(in , &i);
00706
00707 if (c==-1)
00708 return "";
00709
00710 if (c == 64)
00711 {
00712 out.append(data,2);
00713 return out;
00714 }
00715
00716 data[2] = char((c & 0x3f) | (0xc0 & data[2]));
00717
00718 out.append(data,3);
00719 }
00720
00721 return out;
00722 }