FireBreath  1.4.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Pages
URI.cpp
1 /**********************************************************\
2 Original Author: Dan Weatherford
3 
4 Imported into FireBreath: Oct 4, 2010
5 License: Dual license model; choose one of two:
6 New BSD License
7 http://www.opensource.org/licenses/bsd-license.php
8 - or -
9 GNU Lesser General Public License, version 2.1
10 http://www.gnu.org/licenses/lgpl-2.1.html
11 
12 Copyright 2010 Dan Weatherford and Facebook, Inc
13 \**********************************************************/
14 
15 #ifdef _WIN32
16 #include "win_targetver.h"
17 #endif
18 #include <boost/lexical_cast.hpp>
19 #include <boost/algorithm/string.hpp>
20 #include <boost/algorithm/string/trim.hpp>
21 #include <boost/logic/tribool.hpp>
22 #include <vector>
23 #include <sstream>
24 #include <boost/asio.hpp>
25 #include "precompiled_headers.h" // On windows, everything above this line in PCH
26 
27 #include "URI.h"
28 
29 using namespace boost::algorithm;
30 using namespace boost::logic;
31 using std::string;
32 using std::vector;
33 using FB::URI;
34 
35 URI::StringStringMap URI::m_lhMap;
36 
37 std::string URI::url_encode(const std::string& in) {
38  std::stringstream res;
39  for (size_t i = 0; i < in.size(); ++i) {
40  char c = in[i];
41  if (c > 0 && (isalnum(c) || c == '+' ||
42  c == '$' || c == '-' || c == '_' || c == '.' || c == '!' ||
43  c == '*' || c == '\''|| c == '(' || c == ')' || c == ',' || c == '/')) res << c;
44  else {
45  char buf[4];
46  sprintf(buf, "%%%.2x", c & 0xff);
47  res << buf;
48  }
49  }
50  return res.str();
51 }
52 
53 std::string URI::url_decode(const std::string& in) {
54  std::stringstream res;
55  for (size_t i = 0; i < in.size(); ++i) {
56  if (in[i] == '%' && (i + 2) < in.size() && isxdigit(in[i+1]) && isxdigit(in[i+2])) {
57  char buf[3];
58  ++i;
59  buf[0] = in[i++]; buf[1] = in[i]; buf[2] = '\0';
60  res << ((char)strtol(buf, NULL, 16));
61  } else res << in[i];
62  }
63  return res.str();
64 }
65 
66 std::string URI::toString(bool include_host_part) const {
67  std::stringstream res;
68  if (include_host_part) {
69  res << protocol << string("://");
70  if (!login.empty()) res << login << "@";
71  res << domain;
72  if (port) res << ":" << boost::lexical_cast<string>(port);
73  }
74  res << url_encode(path);
75  if (!query_data.empty()) {
76  char separator = '?';
77  for (std::map<std::string, std::string>::const_iterator it = query_data.begin(); it != query_data.end(); ++it) {
78  res << separator;
79  separator = '&';
80  res << url_encode(it->first);
81  res << '=';
82  res << url_encode(it->second);
83  }
84  }
85  if (!fragment.empty())
86  res << "#" << fragment;
87  return res.str();
88 }
89 
90 URI URI::fromString(const std::string& in_str) {
91  return URI(in_str);
92 }
93 
94 URI::URI(const std::string& in_str) : port(0) {
95  string w = in_str;
96 
97  size_t l = w.find("://");
98  if (l != std::string::npos) {
99  protocol = w.substr(0, l);
100  std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
101  w = w.substr(l + 3);
102  }
103  // validate protocol -- should only contain [a-z0-9]
104  for (l = 0; l < protocol.size(); ++l) {
105  if (!isalnum(protocol[l])) throw std::runtime_error("URI: invalid characters in protocol part");
106  }
107 
108  if (protocol != "file") { // file has neither a domain nor a port
109  l = w.find_first_of("/\\");
110  // chomp at the '/' (if it exists) so parsing the login/domain/port is easier
111  string domain_str;
112  if (l == std::string::npos) {
113  domain_str = w;
114  w = "/";
115  } else {
116  domain_str = w.substr(0, l);
117  w = w.substr(l);
118  }
119 
120  // check for login info
121  l = domain_str.find("@");
122  if (l != std::string::npos) {
123  login = domain_str.substr(0, l);
124  domain_str = domain_str.substr(l + 1);
125  }
126 
127  // split port, if it exists
128  size_t p = domain_str.find(":");
129  if (p != std::string::npos && p < l) {
130  domain = domain_str.substr(0, p);
131  string port_str = domain_str.substr(p + 1);
132  port = boost::lexical_cast<int>(port_str);
133  } else {
134  domain = domain_str;
135  }
136  // domains are case insensitive; transform to lower case for convenience.
137  std::transform(domain.begin(), domain.end(), domain.begin(), ::tolower);
138  }
139 
140  l = w.find('#');
141  if (l != std::string::npos) {
142  fragment = w.substr(l + 1);
143  w = w.substr(0, l);
144  }
145  l = w.find('?');
146  if (l != std::string::npos) {
147  parse_query_data(w.substr(l + 1));
148  w = w.substr(0, l);
149  }
150  path = url_decode(w);
151 }
152 
153 bool URI::operator==(const URI& right) const {
154  return ( (protocol == right.protocol) &&
155  (login == right.login) &&
156  (domain == right.domain) &&
157  (port == right.port) &&
158  (path == right.path) &&
159  (fragment == right.fragment) &&
160  (query_data == right.query_data)
161  );
162 }
163 
164 void URI::appendPathComponent(const std::string& pc) {
165  if (! pc.size()) return;
166  // make sure we have exactly one '/' between the old path and new path component[s]
167  if (path.size() && path[path.size() - 1] == '/') path.resize(path.size() - 1);
168  if (pc[0] != '/') path.push_back('/');
169  path += pc;
170 }
171 
172 std::string URI::filename() const {
173  if (path.empty() || path[path.size()-1] == '/') return string();
174  size_t loc = path.rfind("/");
175  if (loc == std::string::npos) return path;
176  return path.substr(loc+1);
177 }
178 
180 {
181  m_lhMap.clear();
182 }
183 void URI::registerValidLocalhost(std::string domain, std::string ip)
184 {
185  if (ip.empty()) {
186  ip = boost::asio::ip::address_v4::loopback().to_string();
187  }
188  m_lhMap[domain] = ip;
189 }
190 
191 bool URI::isLocalhost() const {
192  // To avoid a security breach by DNS poisioning, we make sure that the allowed
193  // domain (either localhost or registered with registerValidLocalhost) actually
194  // is pointing at localhost / the correct IP.
195  StringStringMap tmp;
196  StringStringMap::const_iterator fnd(m_lhMap.find(domain));
197  if (fnd != m_lhMap.end()) {
198  if (fnd->first == boost::asio::ip::address_v4::loopback().to_string()) {
199  return true;
200  }
201  static boost::tribool lastResult(boost::indeterminate);
202  if (!boost::indeterminate(lastResult)) return lastResult;
203 
204  boost::asio::io_service io_service;
205  boost::asio::ip::tcp::resolver resolver(io_service);
206  // The resolver wants a service name, that's what the "80" is.
207  // The numeric_service flag tells it not to do anything with the service name anyway.
208  boost::asio::ip::tcp::resolver::query query(boost::asio::ip::tcp::v4(), domain, "80", boost::asio::ip::resolver_query_base::numeric_service);
209  boost::asio::ip::tcp::resolver::iterator it = resolver.resolve(query);
210  if (it == boost::asio::ip::tcp::resolver::iterator()) return false;
211  boost::asio::ip::tcp::endpoint ep = it->endpoint();
212 
213  lastResult = (bool) (ep.address() == boost::asio::ip::address_v4::loopback());
214  return lastResult;
215  }
216  return false;
217 }
218 
219 void URI::parse_query_data(const std::string& in_str) {
220  std::vector<string> parts;
221  split(parts, in_str, is_any_of("&"));
222  for (std::vector<string>::iterator it = parts.begin(); it != parts.end(); ++it) {
223  std::vector<string> kvp;
224  split(kvp, *it, is_any_of("="));
225  if (kvp.empty()) continue;
226  else if (kvp.size() == 1) query_data[url_decode(kvp[0])] = string();
227  else query_data[url_decode(kvp[0])] = url_decode(kvp[1]);
228  }
229 }
230 
231 std::string FB::URI::UrlDirectory() const
232 {
233  std::stringstream res;
234  res << protocol << string("://");
235  if (!login.empty()) res << login << "@";
236  res << domain;
237  if (port) res << ":" << boost::lexical_cast<string>(port);
238  std::string dir = res.str();
239  if (path.empty() || path[path.size()-1] == '/') return string();
240  size_t loc = path.rfind("/");
241  if (loc == std::string::npos) return path;
242  dir += path.substr(0,loc);
243  return dir;
244 }
std::string filename() const
Definition: URI.cpp:172
static void registerValidLocalhost(std::string domain, std::string ip="")
Definition: URI.cpp:183
static std::string url_decode(const std::string &in)
Decodes the given urlencoded URL.
Definition: URI.cpp:53
void appendPathComponent(const std::string &pc)
Definition: URI.cpp:164
std::string UrlDirectory() const
Returns current Directory from the url of the page.
Definition: URI.cpp:231
Data structure for dealing with URI strings.
Definition: URI.h:42
bool isLocalhost() const
Definition: URI.cpp:191
void parse_query_data(const std::string &in_str)
Parses a urlencoded QueryString and stores the results in the URI.
Definition: URI.cpp:219
static void resetValidLocalhost()
Definition: URI.cpp:179
bool operator==(const URI &right) const
Compares two FB::URI objects.
Definition: URI.cpp:153