1 module reurl.url;
2 
3 import std.regex;
4 import std.conv;
5 import std.format;
6 import std.string;
7 import std.algorithm;
8 
9 @safe:
10 
11 class InvalidURLException : Exception {
12     this(string url) {
13         super("Invalid URL: %s".format(url));
14     }
15 }
16 
17 struct URL {
18     string scheme;
19     string username;
20     string password;
21     string hostname;
22     string port;
23     string path;
24     string query;
25     string fragment;
26 
27     @property string host() {
28         return this.hostname ~ (this.port == "" ? "" : ":%s".format(this.port));
29     }
30 
31     string toString() {
32         auto usernamePassword = this.username.length == 0 ? "" : (this.username ~ (this.password.length == 0 ? "" : ":" ~ this.password) ~ "@");
33 
34         return this.scheme ~ "://" ~ usernamePassword ~ this.host ~ this.path ~ this.query ~ this.fragment;
35     }
36 
37     URL opOpAssign(string op : "~")(in string url) {
38         if (url.startsWith("/")) {
39             // The URL appended starts with // - replace host, path, query and fragment
40             auto splitDoubleDashPart = regex(`(//([\w\.\-]*)(?::(\d*))?)?(/[\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`);
41 
42             auto m = url.matchFirst(splitDoubleDashPart);
43 
44             with (this) {
45                 if (m[1].length > 0) {
46                     hostname = m[2];
47                     port = m[3];
48                 }
49 
50                 path = m[4];
51                 query = m[5];
52                 fragment = m[6];
53             }
54         }
55         else {
56             if (url.canFind("://")) {
57                 // The URL appended is an absolute URL - replace this one with it
58                 this = url.parseURL();
59             }
60             else {
61                 // The URL appended is a relative path - append it to the current one and replace query and fragment
62                 auto splitPart = regex(`([\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`);
63                 auto m = url.matchFirst(splitPart);
64                 with (this) {
65                     path ~= (path.endsWith("/") ? "" : "/") ~ m[1];
66                     query = m[2];
67                     fragment = m[3];
68                 }
69             }
70         }
71 
72         return this;
73     }
74 
75     URL opBinary(string op : "~")(in string url) {
76         auto newURL = this;
77 
78         newURL ~= url;
79         return newURL;
80     }
81 }
82 
83 URL parseURL(in string url) {
84     URL purl;
85 
86     auto splitUrl = regex(`(\w*)://(?:([\w\-_]*)(?::([\w\-_]*))?@)?([\w\-\.]*)(?::(\d*))?(/[\w\-_\.\/]*)?(\?[\w\-_&=]*)?(#[\w\-_=]*)?`);
87 
88     auto m = url.matchFirst(splitUrl);
89     if (m.empty) {
90         throw new InvalidURLException(url);
91     }
92 
93     with (purl) {
94         scheme = m[1];
95         username = m[2];
96         password = m[3];
97         hostname = m[4];
98         port = m[5];
99         path = m[6];
100         query = m[7];
101         fragment = m[8];
102     }
103 
104     return purl;
105 }
106 
107 unittest {
108     auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1&param2=value2#fragment";
109     auto purl = parseURL(url);
110 
111     assert(purl.scheme == "http");
112     assert(purl.username == "username");
113     assert(purl.password == "password");
114     assert(purl.hostname == "www.host-name.com");
115     assert(purl.port == "1234");
116     assert(purl.path == "/path1/path2");
117     assert(purl.query == "?param1=value1&param2=value2");
118     assert(purl.fragment == "#fragment");
119     assert(purl.host == "www.host-name.com:1234");
120     assert(purl.toString() == url);
121 }
122 
123 unittest {
124     auto url = "http://www.host-name.com/path?param=value";
125     auto purl = parseURL(url);
126 
127     assert(purl.scheme == "http");
128     assert(purl.username == "");
129     assert(purl.password == "");
130     assert(purl.hostname == "www.host-name.com");
131     assert(purl.port == "");
132     assert(purl.path == "/path");
133     assert(purl.query == "?param=value");
134     assert(purl.fragment == "");
135     assert(purl.host == "www.host-name.com");
136     assert(purl.toString() == url);
137 }
138 
139 unittest {
140     auto url = "http://www.host-name.com/path";
141     auto purl = parseURL(url);
142 
143     assert(purl.scheme == "http");
144     assert(purl.username == "");
145     assert(purl.password == "");
146     assert(purl.hostname == "www.host-name.com");
147     assert(purl.port == "");
148     assert(purl.path == "/path");
149     assert(purl.query == "");
150     assert(purl.fragment == "");
151     assert(purl.host == "www.host-name.com");
152     assert(purl.toString() == url);
153 }
154 
155 unittest {
156     auto url = "http://www.host-name.com";
157     auto purl = parseURL(url);
158 
159     assert(purl.scheme == "http");
160     assert(purl.username == "");
161     assert(purl.password == "");
162     assert(purl.hostname == "www.host-name.com");
163     assert(purl.port == "");
164     assert(purl.path == "");
165     assert(purl.query == "");
166     assert(purl.fragment == "");
167     assert(purl.host == "www.host-name.com");
168     assert(purl.toString() == url);
169 }
170 
171 unittest {
172     auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1&param2=value2#fragment";
173     auto purl = parseURL(url);
174     purl ~= "//newhost.org/newpath";
175 
176     assert(purl.scheme == "http");
177     assert(purl.username == "username");
178     assert(purl.password == "password");
179     assert(purl.hostname == "newhost.org");
180     assert(purl.port == "");
181     assert(purl.path == "/newpath");
182     assert(purl.query == "");
183     assert(purl.fragment == "");
184     assert(purl.host == "newhost.org");
185     assert(purl.toString() == "http://username:password@newhost.org/newpath");
186 }
187 
188 unittest {
189     auto url = "http://username:password@www.host-name.com:1234/path1/path2?param1=value1&param2=value2#fragment";
190     auto newUrl = "newscheme://newusername:newpassword@www.newhostname.com:2345/newpath?newparam=newvalue#newfragment";
191     auto purl = parseURL(url);
192     purl ~= newUrl;
193 
194     assert(purl.scheme == "newscheme");
195     assert(purl.username == "newusername");
196     assert(purl.password == "newpassword");
197     assert(purl.hostname == "www.newhostname.com");
198     assert(purl.port == "2345");
199     assert(purl.path == "/newpath");
200     assert(purl.query == "?newparam=newvalue");
201     assert(purl.fragment == "#newfragment");
202     assert(purl.host == "www.newhostname.com:2345");
203     assert(purl.toString() == newUrl);
204 }
205 
206 unittest {
207     auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1&param2=value2#fragment";
208     auto purl = parseURL(url);
209     purl ~= "/newpath?newparam=newvalue#newfragment";
210 
211     assert(purl.scheme == "http");
212     assert(purl.username == "username");
213     assert(purl.password == "password");
214     assert(purl.hostname == "www.hostname.com");
215     assert(purl.port == "1234");
216     assert(purl.path == "/newpath");
217     assert(purl.query == "?newparam=newvalue");
218     assert(purl.fragment == "#newfragment");
219     assert(purl.host == "www.hostname.com:1234");
220     assert(purl.toString() == "http://username:password@www.hostname.com:1234/newpath?newparam=newvalue#newfragment");
221 }
222 
223 unittest {
224     auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1&param2=value2#fragment";
225     auto purl = parseURL(url);
226     purl ~= "path3?newparam=newvalue#newfragment";
227 
228     assert(purl.scheme == "http");
229     assert(purl.username == "username");
230     assert(purl.password == "password");
231     assert(purl.hostname == "www.hostname.com");
232     assert(purl.port == "1234");
233     assert(purl.path == "/path1/path2/path3");
234     assert(purl.query == "?newparam=newvalue");
235     assert(purl.fragment == "#newfragment");
236     assert(purl.host == "www.hostname.com:1234");
237     assert(purl.toString() == "http://username:password@www.hostname.com:1234/path1/path2/path3?newparam=newvalue#newfragment");
238 }
239 
240 unittest {
241     auto url = "http://username:password@www.hostname.com:1234/path1/path2?param1=value1&param2=value2#fragment";
242     auto purl = parseURL(url);
243     auto purl2 = purl ~ "//newhost.org/newpath";
244 
245     assert(purl.toString() == url);
246     assert(purl2.toString() == "http://username:password@newhost.org/newpath");
247 }