core/vul/vul_url.cxx

Go to the documentation of this file.
00001 // This is core/vul/vul_url.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \author Ian Scott
00008 // Based on vil_stream_url by fsm
00009 // \verbatim
00010 //  Modifications
00011 //   8 Nov 2002 - Peter Vanroose - corrected HTTP client request syntax
00012 // \endverbatim
00013 
00014 #include "vul_url.h"
00015 #include <vcl_cstdio.h>  // sprintf()
00016 #include <vcl_cstring.h>
00017 #include <vcl_cstdlib.h>
00018 #include <vcl_sstream.h>
00019 #include <vcl_cassert.h>
00020 #include <vcl_fstream.h>
00021 #include <vul/vul_file.h>
00022 
00023 #if defined(unix) || defined(__unix)
00024 
00025 # include <unistd.h>       // read(), write(), close()
00026 # include <netdb.h>        // gethostbyname(), sockaddr_in()
00027 # include <sys/socket.h>
00028 # include <netinet/in.h>   // htons()
00029 # ifdef __alpha
00030 #  include <fp.h>          // htons() [ on e.g. DEC alpha, htons is in machine/endian.h ]
00031 # endif
00032 # define SOCKET int
00033 
00034 #elif defined (VCL_WIN32) && !defined(__CYGWIN__)
00035 
00036 # include <winsock2.h>
00037 
00038 #endif // unix
00039 
00040 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00041 // So that we don't call WSAStartup more than we need to
00042 static int called_WSAStartup = 0;
00043 #endif
00044 
00045 //: only call this method with a correctly formatted http URL
00046 vcl_istream * vul_http_open(char const *url)
00047 {
00048   // split URL into auth, host, path and port number.
00049   vcl_string host;
00050   vcl_string path;
00051   vcl_string auth;
00052   int port = 80; // default
00053 
00054   // check it is an http URL.
00055   assert (vcl_strncmp(url, "http://", 7) == 0);
00056 
00057   char const *p = url + 7;
00058   while (*p && *p!='/')
00059     ++ p;
00060   host = vcl_string(url+7, p);
00061 
00062 
00063   if (*p)
00064     path = p+1;
00065   else
00066     path = "";
00067 
00068   //authentication
00069   for (unsigned int i=0; i<host.size(); ++i)
00070     if (host[i] == '@') {
00071       auth = vcl_string(host.c_str(), host.c_str()+i);
00072       host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00073       break;
00074     }
00075 
00076   // port?
00077   for (unsigned int i=host.size()-1; i>0; --i)
00078     if (host[i] == ':') {
00079       port = vcl_atoi(host.c_str() + i + 1);
00080       host = vcl_string(host.c_str(), host.c_str() + i);
00081       break;
00082     }
00083 
00084   // do character translation
00085   unsigned k =0;
00086   while (k < path.size())
00087   {
00088     if (path[k] == ' ')
00089       path.replace(k, 1, "%20");
00090     else if (path[k] == '%')
00091       path.replace(k, 1, "%25");
00092     k++;
00093   }
00094 
00095   // so far so good.
00096 #ifdef DEBUG
00097   vcl_cerr << "auth = \'" << auth << "\'\n"
00098            << "host = \'" << host << "\'\n"
00099            << "path = \'" << path << "\'\n"
00100            << "port = " << port << vcl_endl;
00101 #endif
00102 
00103 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00104   if (called_WSAStartup==0)
00105   {
00106     WORD wVersionRequested;
00107     WSADATA wsaData;
00108 
00109     wVersionRequested = MAKEWORD( 2, 2 );
00110 
00111     /* int err = */ WSAStartup( wVersionRequested, &wsaData );
00112   }
00113 #endif
00114 
00115   // create socket endpoint.
00116   SOCKET tcp_socket = socket(PF_INET,      // IPv4 protocols.
00117                              SOCK_STREAM,  // two-way, reliable,
00118                                            // connection-based stream socket.
00119                              PF_UNSPEC);   // protocol number.
00120 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00121   if (tcp_socket == INVALID_SOCKET) {
00122 # ifndef NDEBUG
00123     vcl_cerr << __FILE__ "error code : " << WSAGetLastError() << '\n';
00124 # endif
00125 #else
00126   if (tcp_socket < 0) {
00127 #endif
00128     vcl_cerr << __FILE__ ": failed to create socket.\n";
00129     return 0;
00130   }
00131 
00132 #ifdef DEBUG
00133   vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << '\n';
00134 #endif
00135 
00136   // get network address of server.
00137   hostent *hp = gethostbyname(host.c_str());
00138   if (! hp) {
00139     vcl_cerr << __FILE__ ": failed to lookup host\n";
00140 
00141 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00142     closesocket(tcp_socket);
00143 #else
00144     close(tcp_socket);
00145 #endif
00146 
00147     return 0;
00148   }
00149 
00150   // make socket address.
00151   sockaddr_in my_addr;
00152   my_addr.sin_family = AF_INET;
00153   // convert port number to network byte order..
00154   my_addr.sin_port = htons(port);
00155   vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00156 
00157   // connect to server.
00158   if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0) {
00159     vcl_cerr << __FILE__ ": failed to connect to host\n";
00160     //perror(__FILE__);
00161 
00162 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00163     closesocket(tcp_socket);
00164 #else
00165     close(tcp_socket);
00166 #endif
00167 
00168     return 0;
00169   }
00170 
00171   // buffer for data transfers over socket.
00172   char buffer[4096];
00173 
00174   // send HTTP 1.1 request.
00175   vcl_sprintf(buffer, "GET %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00176               url, host.c_str());
00177 
00178   if (auth != "")
00179     vcl_sprintf(buffer+vcl_strlen(buffer),
00180                 "Authorization: Basic %s\r\n",
00181                 vul_url::encode_base64(auth).c_str());
00182 
00183   vcl_sprintf(buffer+vcl_strlen(buffer), "\r\n");
00184 
00185 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00186   if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00187 #else
00188   if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00189 #endif
00190     vcl_cerr << __FILE__ ": error sending HTTP request\n";
00191 
00192 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00193     closesocket(tcp_socket);
00194 #else
00195     close(tcp_socket);
00196 #endif
00197     return 0;
00198   }
00199 
00200   // force the data to be sent.
00201 #if 1
00202   shutdown(tcp_socket, 1); // disallow further sends.
00203 #else
00204   for (int i=0; i<4096; ++i) ::write(tcp_socket, "\n\n\n\n", 4);
00205 #endif
00206 
00207   // read from socket into memory.
00208   vcl_string contents;
00209   {
00210     int n;
00211 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00212     while ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00213 #else
00214     while ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00215 #endif
00216       contents.append(buffer, n);
00217 #ifdef DEBUG
00218       vcl_cerr << n << " bytes\n";
00219 #endif
00220     }
00221   }
00222 
00223   // close connection to server.
00224 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00225   closesocket(tcp_socket);
00226 #else
00227   close(tcp_socket);
00228 #endif
00229 
00230 #ifdef DEBUG
00231   vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00232 #endif
00233 
00234   if (contents.find("HTTP/1.1 200") == contents.npos)
00235   {
00236     return 0;
00237   }
00238   vcl_string::size_type n = contents.find("\r\n\r\n");
00239   if (n == contents.npos)
00240   {
00241     return 0;
00242   }
00243 
00244   contents.erase(0,n+4);
00245 #ifdef DEBUG
00246   vcl_cerr << "vul_url::vul_http_open() returns:\n" << contents << '\n';
00247 #endif
00248   return new vcl_istringstream(contents);
00249 }
00250 
00251 
00252 //: only call this method with a correctly formatted http URL
00253 bool vul_http_exists(char const *url)
00254 {
00255   // split URL into auth, host, path and port number.
00256   vcl_string host;
00257   vcl_string path;
00258   vcl_string auth;
00259   int port = 80; // default
00260   assert (vcl_strncmp(url, "http://", 7) == 0);
00261 
00262   char const *p = url + 7;
00263   while (*p && *p!='/')
00264     ++ p;
00265   host = vcl_string(url+7, p);
00266 
00267 
00268   if (*p)
00269     path = p+1; // may be the empty string, if URL ends in a slash
00270   else
00271     path = "";
00272 
00273   //authentication
00274   for (unsigned int i=0; i<host.size(); ++i)
00275     if (host[i] == '@') {
00276       auth = vcl_string(host.c_str(), host.c_str()+i);
00277       host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00278       break;
00279     }
00280 
00281   // port?
00282   for (unsigned int i=0; i<host.size(); ++i)
00283     if (host[i] == ':') {
00284       port = vcl_atoi(host.c_str() + i + 1);
00285       host = vcl_string(host.c_str(), host.c_str() + i);
00286       break;
00287     }
00288 
00289   // do character translation
00290   unsigned k =0;
00291   while (k < path.size())
00292   {
00293     if (path[k] == ' ')
00294       path.replace(k, 1, "%20");
00295     else if (path[k] == '%')
00296       path.replace(k, 1, "%25");
00297     k++;
00298   }
00299 
00300   // so far so good.
00301 #ifdef DEBUG
00302   vcl_cerr << "auth = \'" << auth << "\'\n"
00303            << "host = \'" << host << "\'\n"
00304            << "path = \'" << path << "\'\n"
00305            << "port = " << port << vcl_endl;
00306 #endif
00307 
00308 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00309   if (called_WSAStartup==0)
00310   {
00311     WORD wVersionRequested;
00312     WSADATA wsaData;
00313 
00314     wVersionRequested = MAKEWORD( 2, 2 );
00315 
00316     /* int err = */ WSAStartup( wVersionRequested, &wsaData );
00317   }
00318 #endif
00319 
00320   // create socket endpoint.
00321   SOCKET tcp_socket = socket(PF_INET,      // IPv4 protocols.
00322                              SOCK_STREAM,  // two-way, reliable,
00323                                            // connection-based stream socket.
00324                              PF_UNSPEC);   // protocol number.
00325 
00326 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00327   if (tcp_socket == INVALID_SOCKET) {
00328 # ifndef NDEBUG
00329     vcl_cerr << "error code : " << WSAGetLastError() << vcl_endl;
00330 # endif
00331 #else
00332   if (tcp_socket < 0) {
00333 #endif
00334     vcl_cerr << __FILE__ ": failed to create socket.\n";
00335     return false;
00336   }
00337 
00338 #ifdef DEBUG
00339   vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << vcl_endl;
00340 #endif
00341 
00342   // get network address of server.
00343   hostent *hp = gethostbyname(host.c_str());
00344   if (! hp) {
00345     vcl_cerr << __FILE__ ": failed to lookup host\n";
00346     return false;
00347   }
00348 
00349   // make socket address.
00350   sockaddr_in my_addr;
00351   my_addr.sin_family = AF_INET;
00352     // convert port number to network byte order..
00353   my_addr.sin_port = htons(port);
00354   vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00355 
00356   // connect to server.
00357   if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0)
00358   {
00359     vcl_cerr << __FILE__ ": failed to connect to host\n";
00360     //perror(__FILE__);
00361 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00362     closesocket(tcp_socket);
00363 #else
00364     close(tcp_socket);
00365 #endif
00366 
00367     return false;
00368   }
00369 
00370   // buffer for data transfers over socket.
00371   char buffer[4096];
00372 
00373   // send HTTP 1.1 request.
00374   vcl_sprintf(buffer, "HEAD %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00375               url, host.c_str());
00376   if (auth != "")
00377     vcl_sprintf(buffer+vcl_strlen(buffer), "Authorization: Basic %s\r\n",
00378                 vul_url::encode_base64(auth).c_str());
00379   vcl_sprintf(buffer+vcl_strlen(buffer),"\r\n");
00380 
00381 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00382   if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00383 #else
00384   if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00385 #endif
00386     vcl_cerr << __FILE__ ": error sending HTTP request\n";
00387 
00388 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00389     closesocket(tcp_socket);
00390 #else
00391     close(tcp_socket);
00392 #endif
00393     return false;
00394   }
00395 
00396   // force the data to be sent.
00397 #if 1
00398   shutdown(tcp_socket, 1); // disallow further sends.
00399 #else
00400   for (int i=0; i<4096; ++i) ::write(tcp_socket, "\n\n\n\n", 4);
00401 #endif
00402 
00403   // read from socket into memory.
00404   vcl_string contents;
00405   {
00406     int n;
00407 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00408     if ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00409 #else
00410     if ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00411 #endif
00412       contents.append(buffer, n);
00413       //vcl_cerr << n << " bytes\n";
00414     }
00415     else
00416     {
00417 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00418       closesocket(tcp_socket);
00419 #else
00420       close(tcp_socket);
00421 #endif
00422       return false;
00423     }
00424   }
00425 
00426   // close connection to server.
00427 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00428   closesocket(tcp_socket);
00429 #else
00430   close(tcp_socket);
00431 #endif
00432 
00433 #ifdef DEBUG
00434   vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00435 #endif
00436 
00437   return contents.find("HTTP/1.1 200") != contents.npos;
00438 }
00439 
00440 
00441 vcl_istream * vul_url::open(const char * url, vcl_ios_openmode mode)
00442 {
00443   // check for null pointer or empty strings.
00444   if (!url || !*url)
00445     return 0;
00446   unsigned l = vcl_strlen(url);
00447 
00448   // check for filenames beginning "file:".
00449   if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00450     return new vcl_ifstream(url+7,mode);
00451 
00452   // maybe it's an http URL?
00453   if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00454     return vul_http_open(url);
00455 
00456   // maybe it's an ftp URL?
00457   if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00458   {
00459     vcl_cerr << __LINE__ << "ERROR:\n vul_read_url(const char * url)\n"
00460       "Doesn't support FTP yet, url=" << url << vcl_endl;
00461     return 0;
00462   }
00463 
00464   // try an ordinary filename
00465   return new vcl_ifstream(url, mode);
00466 }
00467 
00468 
00469 //: Does that URL exist
00470 bool vul_url::exists(const char * url)
00471 {
00472   // check for null pointer or empty strings.
00473   if (!url || !*url)
00474     return false;
00475   unsigned l = vcl_strlen(url);
00476 
00477   // check for filenames beginning "file:".
00478   if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00479     return vul_file::exists(url+7);
00480 
00481   // maybe it's an http URL?
00482   if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00483     return vul_http_exists(url);
00484 
00485   // maybe it's an ftp URL?
00486   if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00487   {
00488     vcl_cerr << "ERROR: vul_read_url(const char * url)\n"
00489       "Doesn't support FTP yet, url=" << url << vcl_endl;
00490     return false;
00491   }
00492 
00493   // try an ordinary filename
00494   return vul_file::exists(url);
00495 }
00496 
00497 //: Is that a URL
00498 bool vul_url::is_url(const char * url)
00499 {
00500   // check for null pointer or empty strings.
00501   if (!url || !*url)
00502     return false;
00503   unsigned l = vcl_strlen(url);
00504 
00505   // check for filenames beginning "file:".
00506   if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00507     return true;
00508 
00509   // maybe it's an http URL?
00510   if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00511     return true;
00512 
00513   // maybe it's an ftp URL?
00514   if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00515     return true;
00516 
00517   return false;
00518 }
00519 
00520 //=======================================================================
00521 
00522 bool vul_url::is_file(const char * fn)
00523 {
00524   if (vul_url::is_url(fn))
00525     return vul_url::exists(fn);
00526   else
00527     return vul_file::exists(fn) && ! vul_file::is_directory(fn);
00528 }
00529 
00530 //=======================================================================
00531 
00532 static const
00533 int base64_encoding[]=
00534 {
00535   'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
00536   'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
00537   'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
00538   'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
00539 };
00540 
00541 static char out_buf[4];
00542 
00543 static const char * encode_triplet(char data[3], unsigned n)
00544 {
00545   assert (n>0 && n <4);
00546   out_buf[0] = base64_encoding[(data[0] & 0xFC) >> 2];
00547   out_buf[1] = base64_encoding[
00548     ((data[0] & 0x3) << 4) + ((data[1] & 0xf0)>>4)];
00549 
00550   if (n==1)
00551   {
00552     out_buf[2] = out_buf[3] = '=';
00553     return out_buf;
00554   }
00555 
00556   out_buf[2] = base64_encoding[
00557     ((data[1] & 0xf) << 2) + ((data[2] & 0xc0)>>6)];
00558 
00559   if (n==2)
00560   {
00561     out_buf[3] = '=';
00562     return out_buf;
00563   }
00564 
00565   out_buf[3] = base64_encoding[ (data[2] & 0x3f) ];
00566   return out_buf;
00567 }
00568 
00569 //=======================================================================
00570 
00571 vcl_string vul_url::encode_base64(const vcl_string& in)
00572 {
00573   vcl_string out;
00574   unsigned i = 0, line_octets = 0;
00575   const unsigned l = in.size();
00576   char data[3];
00577   while (i <= l)
00578   {
00579     if (i == l)
00580     {
00581       out.append("=");
00582       return out;
00583     }
00584 
00585     data[0] = in[i++];
00586     data[1] = data[2] = 0;
00587 
00588     if (i == l)
00589     {
00590       out.append(encode_triplet(data,1),4);
00591       return out;
00592     }
00593 
00594     data[1] = in[i++];
00595 
00596     if (i == l)
00597     {
00598       out.append(encode_triplet(data,2),4);
00599       return out;
00600     }
00601 
00602     data[2] = in[i++];
00603 
00604     out.append(encode_triplet(data,3),4);
00605 
00606     if (line_octets >= 68/4) // print carriage return
00607     {
00608       out.append("\r\n",2);
00609       line_octets = 0;
00610     }
00611     else
00612       ++line_octets;
00613   }
00614 
00615   return out;
00616 }
00617 
00618 //=======================================================================
00619 
00620 static int get_next_char(const vcl_string &in, unsigned int *i)
00621 {
00622   while (*i < in.size())
00623   {
00624     char c;
00625     c = in[(*i)++];
00626 
00627     if (c == '+')
00628       return 62;
00629 
00630     if (c == '/')
00631       return 63;
00632 
00633     if (c >= 'A' && c <= 'Z')
00634       return 0 + (int)c - (int)'A';
00635 
00636     if (c >= 'a' && c <= 'z')
00637       return 26 + (int)c - (int)'a';
00638 
00639     if (c >= '0' && c <= '9')
00640       return 52 + (int)c - (int)'0';
00641 
00642     if (c == '=')
00643       return 64;
00644   }
00645   return -1;
00646 }
00647 
00648 //=======================================================================
00649 
00650 vcl_string vul_url::decode_base64(const vcl_string& in)
00651 {
00652   int c;
00653   char data[3];
00654 
00655   unsigned i=0;
00656   const unsigned l = in.size();
00657   vcl_string out;
00658   while (i < l)
00659   {
00660     data[0] = data[1] = data[2] = 0;
00661 
00662     // -=- 0 -=-
00663     // Search next valid char...
00664     c = get_next_char(in , &i);
00665 
00666     // treat '=' as end of message
00667     if (c == 64)
00668       return out;
00669     if (c==-1)
00670       return "";
00671 
00672     data[0] = ((c & 0x3f) << 2) | (0x3 & data[0]);
00673 
00674     // -=- 1 -=-
00675     // Search next valid char...
00676     c = get_next_char(in , &i);
00677 
00678       // Error! Second character in octet can't be '='
00679     if (c == 64 || c==-1)
00680       return "";
00681 
00682     data[0] = ((c & 0x30) >> 4) | (0xfc & data[0]);
00683     data[1] = ((c & 0xf) << 4) | (0xf & data[1]);
00684 
00685 
00686     // -=- 2 -=-
00687     // Search next valid char...
00688 
00689     c = get_next_char(in , &i);
00690 
00691     if (c==-1)
00692       return "";
00693     if (c == 64)
00694     {
00695       // should really read next char and check it is '='
00696       out.append(data,1);  // write 1 byte to output
00697       return out;
00698     }
00699 
00700     data[1] = ((c & 0x3C) >> 2) | (0xf0 & data[1]);
00701     data[2] = ((c & 0x3) << 6) | (0x3f & data[2]);
00702 
00703 
00704     // -=- 3 -=-
00705     // Search next valid char...
00706     c = get_next_char(in , &i);
00707 
00708     if (c==-1)
00709       return "";
00710 
00711     if (c == 64)
00712     {
00713       out.append(data,2);  // write 2 bytes to output
00714       return out;
00715     }
00716 
00717     data[2] = (c & 0x3f) | (0xc0 & data[2]);
00718 
00719     out.append(data,3);  // write 3 bytes to output
00720   }
00721 
00722   return out;
00723 }

Generated on Tue Dec 2 05:08:48 2008 for core/vul by  doxygen 1.5.1