NeoMutt  2024-10-02-37-gfa9146
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
url.c File Reference

Parse and identify different URL schemes. More...

#include "config.h"
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "url.h"
#include "mime.h"
+ Include dependency graph for url.c:

Go to the source code of this file.

Functions

static bool parse_query_string (struct UrlQueryList *list, char *src)
 Parse a URL query string.
 
static enum UrlScheme get_scheme (const char *src, const regmatch_t *match)
 Extract the scheme part from a matched URL.
 
static struct Urlurl_new (void)
 Create a Url.
 
void url_free (struct Url **ptr)
 Free the contents of a URL.
 
void url_pct_encode (char *buf, size_t buflen, const char *src)
 Percent-encode a string.
 
int url_pct_decode (char *s)
 Decode a percent-encoded string.
 
enum UrlScheme url_check_scheme (const char *str)
 Check the protocol of a URL.
 
struct Urlurl_parse (const char *src)
 Fill in Url.
 
int url_tobuffer (const struct Url *url, struct Buffer *buf, uint8_t flags)
 Output the URL string for a given Url object.
 
int url_tostring (const struct Url *url, char *dest, size_t len, uint8_t flags)
 Output the URL string for a given Url object.
 

Variables

static const struct Mapping UrlMap []
 Constants for URL protocols.
 

Detailed Description

Parse and identify different URL schemes.

Authors
  • Richard Russon
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file url.c.

Function Documentation

◆ parse_query_string()

static bool parse_query_string ( struct UrlQueryList *  list,
char *  src 
)
static

Parse a URL query string.

Parameters
listList to store the results
srcString to parse
Return values
trueSuccess
falseError

Definition at line 56 of file url.c.

57{
58 if (!src || (*src == '\0'))
59 return false;
60
61 bool again = true;
62 while (again)
63 {
64 regmatch_t *match = mutt_prex_capture(PREX_URL_QUERY_KEY_VAL, src);
65 if (!match)
66 return false;
67
68 regmatch_t *mkey = &match[PREX_URL_QUERY_KEY_VAL_MATCH_KEY];
69 regmatch_t *mval = &match[PREX_URL_QUERY_KEY_VAL_MATCH_VAL];
70
71 again = src[mutt_regmatch_end(mval)] != '\0';
72
73 char *key = src + mutt_regmatch_start(mkey);
74 char *val = src + mutt_regmatch_start(mval);
75 src[mutt_regmatch_end(mkey)] = '\0';
76 src[mutt_regmatch_end(mval)] = '\0';
77 if ((url_pct_decode(key) < 0) || (url_pct_decode(val) < 0))
78 return false;
79
80 struct UrlQuery *qs = mutt_mem_calloc(1, sizeof(struct UrlQuery));
81 qs->name = key;
82 qs->value = val;
83 STAILQ_INSERT_TAIL(list, qs, entries);
84
85 src += mutt_regmatch_end(mval) + again;
86 }
87
88 return true;
89}
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:298
@ PREX_URL_QUERY_KEY_VAL_MATCH_VAL
key=[val]
Definition: prex.h:85
@ PREX_URL_QUERY_KEY_VAL_MATCH_KEY
[key]=val
Definition: prex.h:84
@ PREX_URL_QUERY_KEY_VAL
https://example.com/?[q=foo]
Definition: prex.h:35
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:67
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:57
Parsed Query String.
Definition: url.h:58
char * name
Query name.
Definition: url.h:59
char * value
Query value.
Definition: url.h:60
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:190
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_scheme()

static enum UrlScheme get_scheme ( const char *  src,
const regmatch_t *  match 
)
static

Extract the scheme part from a matched URL.

Parameters
srcOriginal string that was matched
matchResult from a matched regex
Return values
enumScheme

Definition at line 97 of file url.c.

98{
99 enum UrlScheme rc = U_UNKNOWN;
100 if (src && match)
101 {
103 if (rc == -1)
104 rc = U_UNKNOWN;
105 }
106 return rc;
107}
int mutt_map_get_value_n(const char *name, size_t len, const struct Mapping *map)
Lookup the constant for a string.
Definition: mapping.c:62
@ PREX_URL_MATCH_SCHEME
[imaps]://...
Definition: prex.h:55
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:77
static const struct Mapping UrlMap[]
Constants for URL protocols.
Definition: url.c:41
UrlScheme
All recognised Url types.
Definition: url.h:34
@ U_UNKNOWN
Url wasn't recognised.
Definition: url.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_new()

static struct Url * url_new ( void  )
static

Create a Url.

Return values
ptrNew Url

Definition at line 113 of file url.c.

114{
115 struct Url *url = mutt_mem_calloc(1, sizeof(struct Url));
117 return url;
118}
#define STAILQ_INIT(head)
Definition: queue.h:372
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:69
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:76
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_free()

void url_free ( struct Url **  ptr)

Free the contents of a URL.

Parameters
ptrUrl to free

Definition at line 124 of file url.c.

125{
126 if (!ptr || !*ptr)
127 return;
128
129 struct Url *url = *ptr;
130
131 struct UrlQueryList *l = &url->query_strings;
132 while (!STAILQ_EMPTY(l))
133 {
134 struct UrlQuery *np = STAILQ_FIRST(l);
135 STAILQ_REMOVE_HEAD(l, entries);
136 // Don't free 'name', 'value': they are pointers into the 'src' string
137 FREE(&np);
138 }
139
140 FREE(&url->src);
141 FREE(ptr);
142}
#define FREE(x)
Definition: memory.h:45
#define STAILQ_REMOVE_HEAD(head, field)
Definition: queue.h:422
#define STAILQ_FIRST(head)
Definition: queue.h:350
#define STAILQ_EMPTY(head)
Definition: queue.h:348
char * src
Raw URL string.
Definition: url.h:77
+ Here is the caller graph for this function:

◆ url_pct_encode()

void url_pct_encode ( char *  buf,
size_t  buflen,
const char *  src 
)

Percent-encode a string.

Parameters
bufBuffer for the result
buflenLength of buffer
srcString to encode

e.g. turn "hello world" into "hello%20world"

Definition at line 152 of file url.c.

153{
154 static const char *hex = "0123456789ABCDEF";
155
156 if (!buf)
157 return;
158
159 *buf = '\0';
160 buflen--;
161 while (src && *src && (buflen != 0))
162 {
163 if (strchr(" /:&%=", *src))
164 {
165 if (buflen < 3)
166 break;
167
168 *buf++ = '%';
169 *buf++ = hex[(*src >> 4) & 0xf];
170 *buf++ = hex[*src & 0xf];
171 src++;
172 buflen -= 3;
173 continue;
174 }
175 *buf++ = *src++;
176 buflen--;
177 }
178 *buf = '\0';
179}
+ Here is the caller graph for this function:

◆ url_pct_decode()

int url_pct_decode ( char *  s)

Decode a percent-encoded string.

Parameters
sString to decode
Return values
0Success
-1Error

e.g. turn "hello%20world" into "hello world" The string is decoded in-place.

Definition at line 190 of file url.c.

191{
192 if (!s)
193 return -1;
194
195 char *d = NULL;
196
197 for (d = s; *s; s++)
198 {
199 if (*s == '%')
200 {
201 if ((s[1] != '\0') && (s[2] != '\0') && isxdigit((unsigned char) s[1]) &&
202 isxdigit((unsigned char) s[2]) && (hexval(s[1]) >= 0) && (hexval(s[2]) >= 0))
203 {
204 *d++ = (hexval(s[1]) << 4) | (hexval(s[2]));
205 s += 2;
206 }
207 else
208 {
209 return -1;
210 }
211 }
212 else
213 {
214 *d++ = *s;
215 }
216 }
217 *d = '\0';
218 return 0;
219}
#define hexval(ch)
Definition: mime.h:80
+ Here is the caller graph for this function:

◆ url_check_scheme()

enum UrlScheme url_check_scheme ( const char *  str)

Check the protocol of a URL.

Parameters
strString to check
Return values
enumUrlScheme, e.g. U_IMAPS

Definition at line 226 of file url.c.

227{
228 return get_scheme(str, mutt_prex_capture(PREX_URL, str));
229}
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:34
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:97
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_parse()

struct Url * url_parse ( const char *  src)

Fill in Url.

Parameters
srcString to parse
Return values
ptrParsed URL
NULLError
Note
Caller must free returned Url with url_free()

Definition at line 239 of file url.c.

240{
241 const regmatch_t *match = mutt_prex_capture(PREX_URL, src);
242 if (!match)
243 return NULL;
244
245 enum UrlScheme scheme = get_scheme(src, match);
246 if (scheme == U_UNKNOWN)
247 return NULL;
248
249 const regmatch_t *userinfo = &match[PREX_URL_MATCH_USERINFO];
250 const regmatch_t *user = &match[PREX_URL_MATCH_USER];
251 const regmatch_t *pass = &match[PREX_URL_MATCH_PASS];
252 const regmatch_t *host = &match[PREX_URL_MATCH_HOSTNAME];
253 const regmatch_t *ipvx = &match[PREX_URL_MATCH_HOSTIPVX];
254 const regmatch_t *port = &match[PREX_URL_MATCH_PORT];
255 const regmatch_t *path = &match[PREX_URL_MATCH_PATH];
256 const regmatch_t *query = &match[PREX_URL_MATCH_QUERY];
257 const regmatch_t *pathonly = &match[PREX_URL_MATCH_PATH_ONLY];
258
259 struct Url *url = url_new();
260 url->scheme = scheme;
261 url->src = mutt_str_dup(src);
262
263 /* If the scheme is not followed by two forward slashes, then it's a simple
264 * path (see https://tools.ietf.org/html/rfc3986#section-3). */
265 if (mutt_regmatch_start(pathonly) != -1)
266 {
267 url->src[mutt_regmatch_end(pathonly)] = '\0';
268 url->path = url->src + mutt_regmatch_start(pathonly);
269 if (url_pct_decode(url->path) < 0)
270 goto err;
271 }
272
273 /* separate userinfo part */
274 if (mutt_regmatch_end(userinfo) != -1)
275 {
276 url->src[mutt_regmatch_end(userinfo) - 1] = '\0';
277 }
278
279 /* user */
280 if (mutt_regmatch_end(user) != -1)
281 {
282 url->src[mutt_regmatch_end(user)] = '\0';
283 url->user = url->src + mutt_regmatch_start(user);
284 if (url_pct_decode(url->user) < 0)
285 goto err;
286 }
287
288 /* pass */
289 if (mutt_regmatch_end(pass) != -1)
290 {
291 url->pass = url->src + mutt_regmatch_start(pass);
292 if (url_pct_decode(url->pass) < 0)
293 goto err;
294 }
295
296 /* host */
297 if (mutt_regmatch_len(host) != 0)
298 {
299 url->host = url->src + mutt_regmatch_start(host);
300 url->src[mutt_regmatch_end(host)] = '\0';
301 }
302 else if (mutt_regmatch_end(ipvx) != -1)
303 {
304 url->host = url->src + mutt_regmatch_start(ipvx) + 1; /* skip opening '[' */
305 url->src[mutt_regmatch_end(ipvx) - 1] = '\0'; /* skip closing ']' */
306 }
307
308 /* port */
309 if (mutt_regmatch_end(port) != -1)
310 {
311 url->src[mutt_regmatch_end(port)] = '\0';
312 const char *ports = url->src + mutt_regmatch_start(port);
313 unsigned short num;
314 if (!mutt_str_atous_full(ports, &num))
315 {
316 goto err;
317 }
318 url->port = num;
319 }
320
321 /* path */
322 if (mutt_regmatch_end(path) != -1)
323 {
324 url->src[mutt_regmatch_end(path)] = '\0';
325 url->path = url->src + mutt_regmatch_start(path);
326 if (!url->host)
327 {
328 /* If host is not provided, restore the '/': this is an absolute path */
329 *(--url->path) = '/';
330 }
331 if (url_pct_decode(url->path) < 0)
332 goto err;
333 }
334
335 /* query */
336 if (mutt_regmatch_end(query) != -1)
337 {
338 char *squery = url->src + mutt_regmatch_start(query);
339 if (!parse_query_string(&url->query_strings, squery))
340 goto err;
341 }
342
343 return url;
344
345err:
346 url_free(&url);
347 return NULL;
348}
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
@ PREX_URL_MATCH_USER
...//[user]:pass@...
Definition: prex.h:60
@ PREX_URL_MATCH_QUERY
...Inbox?[foo=bar&baz=value]
Definition: prex.h:72
@ PREX_URL_MATCH_HOSTNAME
imaps://...[host.com]...
Definition: prex.h:64
@ PREX_URL_MATCH_PORT
imaps://host.com:[993]/...
Definition: prex.h:67
@ PREX_URL_MATCH_PATH_ONLY
mailto:[me@example.com]?foo=bar
Definition: prex.h:70
@ PREX_URL_MATCH_USERINFO
...//[user:pass@]...
Definition: prex.h:59
@ PREX_URL_MATCH_PATH
...:993/[Inbox]
Definition: prex.h:69
@ PREX_URL_MATCH_HOSTIPVX
imaps://...[127.0.0.1]...
Definition: prex.h:65
@ PREX_URL_MATCH_PASS
...//user:[pass]@...
Definition: prex.h:62
char * user
Username.
Definition: url.h:71
unsigned short port
Port.
Definition: url.h:74
char * host
Host.
Definition: url.h:73
char * pass
Password.
Definition: url.h:72
char * path
Path.
Definition: url.h:75
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:70
static bool parse_query_string(struct UrlQueryList *list, char *src)
Parse a URL query string.
Definition: url.c:56
void url_free(struct Url **ptr)
Free the contents of a URL.
Definition: url.c:124
static struct Url * url_new(void)
Create a Url.
Definition: url.c:113
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_tobuffer()

int url_tobuffer ( const struct Url url,
struct Buffer buf,
uint8_t  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
bufBuffer for the result
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 358 of file url.c.

359{
360 if (!url || !buf)
361 return -1;
362 if (url->scheme == U_UNKNOWN)
363 return -1;
364
365 buf_printf(buf, "%s:", mutt_map_get_name(url->scheme, UrlMap));
366
367 if (url->host)
368 {
369 if (!(flags & U_PATH))
370 buf_addstr(buf, "//");
371
372 if (url->user && (url->user[0] || !(flags & U_PATH)))
373 {
374 char str[256] = { 0 };
375 url_pct_encode(str, sizeof(str), url->user);
376 buf_add_printf(buf, "%s@", str);
377 }
378
379 if (strchr(url->host, ':'))
380 buf_add_printf(buf, "[%s]", url->host);
381 else
382 buf_addstr(buf, url->host);
383
384 if (url->port)
385 buf_add_printf(buf, ":%hu/", url->port);
386 else
387 buf_addstr(buf, "/");
388 }
389
390 if (url->path)
391 buf_addstr(buf, url->path);
392
393 if (STAILQ_FIRST(&url->query_strings))
394 {
395 buf_addstr(buf, "?");
396
397 char str[256] = { 0 };
398 struct UrlQuery *np = NULL;
399 STAILQ_FOREACH(np, &url->query_strings, entries)
400 {
401 url_pct_encode(str, sizeof(str), np->name);
402 buf_addstr(buf, str);
403 buf_addstr(buf, "=");
404 url_pct_encode(str, sizeof(str), np->value);
405 buf_addstr(buf, str);
406 if (STAILQ_NEXT(np, entries))
407 buf_addstr(buf, "&");
408 }
409 }
410
411 return 0;
412}
int buf_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:161
int buf_add_printf(struct Buffer *buf, const char *fmt,...)
Format a string appending a Buffer.
Definition: buffer.c:204
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:226
const char * mutt_map_get_name(int val, const struct Mapping *map)
Lookup a string for a constant.
Definition: mapping.c:42
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_NEXT(elm, field)
Definition: queue.h:400
void url_pct_encode(char *buf, size_t buflen, const char *src)
Percent-encode a string.
Definition: url.c:152
#define U_PATH
Definition: url.h:50
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_tostring()

int url_tostring ( const struct Url url,
char *  dest,
size_t  len,
uint8_t  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
destBuffer for the result
lenLength of buffer
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 423 of file url.c.

424{
425 if (!url || !dest)
426 return -1;
427
428 struct Buffer *dest_buf = buf_pool_get();
429
430 int rc = url_tobuffer(url, dest_buf, flags);
431 if (rc == 0)
432 mutt_str_copy(dest, buf_string(dest_buf), len);
433
434 buf_pool_release(&dest_buf);
435
436 return rc;
437}
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition: buffer.h:96
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:581
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:81
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition: pool.c:94
String manipulation buffer.
Definition: buffer.h:36
int url_tobuffer(const struct Url *url, struct Buffer *buf, uint8_t flags)
Output the URL string for a given Url object.
Definition: url.c:358
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ UrlMap

const struct Mapping UrlMap[]
static
Initial value:
= {
{ "file", U_FILE }, { "imap", U_IMAP }, { "imaps", U_IMAPS },
{ "pop", U_POP }, { "pops", U_POPS }, { "news", U_NNTP },
{ "nntp", U_NNTP }, { "snews", U_NNTPS }, { "nntps", U_NNTPS },
{ "mailto", U_MAILTO }, { "notmuch", U_NOTMUCH }, { "smtp", U_SMTP },
{ "smtps", U_SMTPS }, { NULL, U_UNKNOWN },
}
@ U_NOTMUCH
Url is notmuch://.
Definition: url.h:46
@ U_FILE
Url is file://.
Definition: url.h:36
@ U_NNTPS
Url is nntps://.
Definition: url.h:42
@ U_MAILTO
Url is mailto://.
Definition: url.h:45
@ U_SMTPS
Url is smtps://.
Definition: url.h:44
@ U_SMTP
Url is smtp://.
Definition: url.h:43
@ U_NNTP
Url is nntp://.
Definition: url.h:41
@ U_IMAP
Url is imap://.
Definition: url.h:39
@ U_POPS
Url is pops://.
Definition: url.h:38
@ U_IMAPS
Url is imaps://.
Definition: url.h:40
@ U_POP
Url is pop://.
Definition: url.h:37

Constants for URL protocols.

Definition at line 41 of file url.c.