1 module reurl.url; 2 3 import std.regex; 4 import std.conv; 5 import std.format; 6 import std.string; 7 import std.algorithm; 8 9 @safe: 10 11 class InvalidURLException : Exception { 12 this(string url) { 13 super("Invalid URL: %s".format(url)); 14 } 15 } 16 17 struct URL { 18 string scheme; 19 string username; 20 string password; 21 string hostname; 22 string port; 23 string path; 24 string query; 25 string fragment; 26 27 @property string host() { 28 return this.hostname ~ (this.port == "" ? "" : ":%s".format(this.port)); 29 } 30 31 string toString() { 32 auto usernamePassword = this.username.length == 0 ? "" : (this.username ~ (this.password.length == 0 ? "" : ":" ~ this.password) ~ "@"); 33 34 return this.scheme ~ "://" ~ usernamePassword ~ this.host ~ this.path ~ this.query ~ this.fragment; 35 } 36 37 URL opOpAssign(string op : "~")(in string url) { 38 if (url.startsWith("/")) { 39 // The URL appended starts with // - replace host, path, query and fragment 40 auto splitDoubleDashPart = regex(`(//([\w\.\-]*)(?::(\d*))?)?(/[\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`); 41 42 auto m = url.matchFirst(splitDoubleDashPart); 43 44 with (this) { 45 if (m[1].length > 0) { 46 hostname = m[2]; 47 port = m[3]; 48 } 49 50 path = m[4]; 51 query = m[5]; 52 fragment = m[6]; 53 } 54 } 55 else { 56 if (url.canFind("://")) { 57 // The URL appended is an absolute URL - replace this one with it 58 this = url.parseURL(); 59 } 60 else { 61 // The URL appended is a relative path - append it to the current one and replace query and fragment 62 auto splitPart = regex(`([\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`); 63 auto m = url.matchFirst(splitPart); 64 with (this) { 65 path ~= (path.endsWith("/") ? "" : "/") ~ m[1]; 66 query = m[2]; 67 fragment = m[3]; 68 } 69 } 70 } 71 72 return this; 73 } 74 75 URL opBinary(string op : "~")(in string url) { 76 auto newURL = this; 77 78 newURL ~= url; 79 return newURL; 80 } 81 } 82 83 URL parseURL(in string url) { 84 URL purl; 85 86 auto splitUrl = regex(`(\w*)://(?:([\w\-_]*)(?::([\w\-_]*))?@)?([\w\-\.]*)(?::(\d*))?(/[\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`); 87 88 auto m = url.matchFirst(splitUrl); 89 if (m.empty) { 90 throw new InvalidURLException(url); 91 } 92 93 with (purl) { 94 scheme = m[1]; 95 username = m[2]; 96 password = m[3]; 97 hostname = m[4]; 98 port = m[5]; 99 path = m[6]; 100 query = m[7]; 101 fragment = m[8]; 102 } 103 104 return purl; 105 } 106 107 unittest { 108 auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 109 auto purl = parseURL(url); 110 111 assert(purl.scheme == "http"); 112 assert(purl.username == "username"); 113 assert(purl.password == "password"); 114 assert(purl.hostname == "www.host-name.com"); 115 assert(purl.port == "1234"); 116 assert(purl.path == "/path1/path2"); 117 assert(purl.query == "?param1=value1¶m2=value2"); 118 assert(purl.fragment == "#fragment"); 119 assert(purl.host == "www.host-name.com:1234"); 120 assert(purl.toString() == url); 121 } 122 123 unittest { 124 auto url = "http://www.host-name.com/path?param=value"; 125 auto purl = parseURL(url); 126 127 assert(purl.scheme == "http"); 128 assert(purl.username == ""); 129 assert(purl.password == ""); 130 assert(purl.hostname == "www.host-name.com"); 131 assert(purl.port == ""); 132 assert(purl.path == "/path"); 133 assert(purl.query == "?param=value"); 134 assert(purl.fragment == ""); 135 assert(purl.host == "www.host-name.com"); 136 assert(purl.toString() == url); 137 } 138 139 unittest { 140 auto url = "http://www.host-name.com/path"; 141 auto purl = parseURL(url); 142 143 assert(purl.scheme == "http"); 144 assert(purl.username == ""); 145 assert(purl.password == ""); 146 assert(purl.hostname == "www.host-name.com"); 147 assert(purl.port == ""); 148 assert(purl.path == "/path"); 149 assert(purl.query == ""); 150 assert(purl.fragment == ""); 151 assert(purl.host == "www.host-name.com"); 152 assert(purl.toString() == url); 153 } 154 155 unittest { 156 auto url = "http://www.host-name.com"; 157 auto purl = parseURL(url); 158 159 assert(purl.scheme == "http"); 160 assert(purl.username == ""); 161 assert(purl.password == ""); 162 assert(purl.hostname == "www.host-name.com"); 163 assert(purl.port == ""); 164 assert(purl.path == ""); 165 assert(purl.query == ""); 166 assert(purl.fragment == ""); 167 assert(purl.host == "www.host-name.com"); 168 assert(purl.toString() == url); 169 } 170 171 unittest { 172 auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 173 auto purl = parseURL(url); 174 purl ~= "//newhost.org/newpath"; 175 176 assert(purl.scheme == "http"); 177 assert(purl.username == "username"); 178 assert(purl.password == "password"); 179 assert(purl.hostname == "newhost.org"); 180 assert(purl.port == ""); 181 assert(purl.path == "/newpath"); 182 assert(purl.query == ""); 183 assert(purl.fragment == ""); 184 assert(purl.host == "newhost.org"); 185 assert(purl.toString() == "http://username:password@newhost.org/newpath"); 186 } 187 188 unittest { 189 auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 190 auto newUrl = "newscheme://newusername:newpassword@www.newhostname.com:2345/newpath?newparam=newvalue#newfragment"; 191 auto purl = parseURL(url); 192 purl ~= newUrl; 193 194 assert(purl.scheme == "newscheme"); 195 assert(purl.username == "newusername"); 196 assert(purl.password == "newpassword"); 197 assert(purl.hostname == "www.newhostname.com"); 198 assert(purl.port == "2345"); 199 assert(purl.path == "/newpath"); 200 assert(purl.query == "?newparam=newvalue"); 201 assert(purl.fragment == "#newfragment"); 202 assert(purl.host == "www.newhostname.com:2345"); 203 assert(purl.toString() == newUrl); 204 } 205 206 unittest { 207 auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 208 auto purl = parseURL(url); 209 purl ~= "/newpath?newparam=newvalue#newfragment"; 210 211 assert(purl.scheme == "http"); 212 assert(purl.username == "username"); 213 assert(purl.password == "password"); 214 assert(purl.hostname == "www.hostname.com"); 215 assert(purl.port == "1234"); 216 assert(purl.path == "/newpath"); 217 assert(purl.query == "?newparam=newvalue"); 218 assert(purl.fragment == "#newfragment"); 219 assert(purl.host == "www.hostname.com:1234"); 220 assert(purl.toString() == "http://username:password@www.hostname.com:1234/newpath?newparam=newvalue#newfragment"); 221 } 222 223 unittest { 224 auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 225 auto purl = parseURL(url); 226 purl ~= "path3?newparam=newvalue#newfragment"; 227 228 assert(purl.scheme == "http"); 229 assert(purl.username == "username"); 230 assert(purl.password == "password"); 231 assert(purl.hostname == "www.hostname.com"); 232 assert(purl.port == "1234"); 233 assert(purl.path == "/path1/path2/path3"); 234 assert(purl.query == "?newparam=newvalue"); 235 assert(purl.fragment == "#newfragment"); 236 assert(purl.host == "www.hostname.com:1234"); 237 assert(purl.toString() == "http://username:password@www.hostname.com:1234/path1/path2/path3?newparam=newvalue#newfragment"); 238 } 239 240 unittest { 241 auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1¶m2=value2#fragment"; 242 auto purl = parseURL(url); 243 auto purl2 = purl ~ "//newhost.org/newpath"; 244 245 assert(purl.toString() == url); 246 assert(purl2.toString() == "http://username:password@newhost.org/newpath"); 247 }