scorpius/lib/url.c

354 lines
6.8 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <url.h>
#ifdef TESTURLLIB
int
main (int argc,
char **argv)
{
Url *url;
char url_str[1025];
url = urllib_init ();
if (argc == 2)
{
if (urllib_parse (url, argv[1]))
return 1;
}
if (argc >= 3)
{
if (urllib_parse (url, argv[1]))
return 1;
if (urllib_join (url, argv[2]))
return 1;
}
else
{
printf ("Enter a url : ");
scanf ("%s", url_str);
if (urllib_parse (url, url_str))
return 1;
printf ("Enter a path : ");
scanf ("%s", url_str);
if (urllib_join (url, url_str))
return 1;
}
urllib_tostring (url, url_str);
printf ("=> %s\n", url_str);
urllib_free (url);
return 0;
}
#endif
Url*
urllib_init (void)
{
Url *url = malloc (sizeof (Url));
url->scheme = NULL;
url->host = NULL;
url->port = NULL;
url->path = NULL;
return url;
}
void
urllib_free (Url *url)
{
free (url->scheme);
free (url->host);
free (url->port);
free (url->path);
free (url);
}
int
urllib_parse (Url *url,
const char *url_string)
{
const char *cursor = url_string;
char *tmpcursor = NULL;
int len, i;
/* get scheme */
tmpcursor = strchr (cursor, ':');
if (tmpcursor == NULL)
return 1;//error if not found
len = tmpcursor - cursor;
/* Verify scheme syntax */
if (!isalpha (cursor[0]))
return 1;
for (i = 1; i < len; i++)
if (!(isalnum(cursor[i]) || cursor[i] == '+' || cursor[i] == '-' || cursor[i] == '.'))
return 1;
url->scheme = malloc (sizeof (char) * (len + 1));
if (url->scheme == NULL)
return 1;
strncpy (url->scheme, cursor, len);
url->scheme[len] = '\0';
//Move after first ':'
cursor = tmpcursor + 1;
if (strlen (cursor) <= 2)
return 1;//There must be 2 '/' and an host
if (cursor[0] != '/' || cursor[1] != '/')
return 1;
//parse IPv6, IPv4 or hostname
cursor += 2;
if (cursor[0] == '[') // IPv6
{
tmpcursor = strchr (cursor, ']');
if (tmpcursor == NULL)
return 1;
len = tmpcursor - cursor;
for (i = 1; i < len; i++)//verify
if (!isdigit (cursor[i]) && !(tolower(cursor[i]) >= 'a' && tolower(cursor[i]) <= 'f') && !(cursor[i] == ':'))
return 1;
}
else // IPv4 or hostname
{
tmpcursor = strchr (cursor, ':');
if (tmpcursor == NULL)
tmpcursor = strchr (cursor, '/');
if (tmpcursor == NULL)
len = strlen (cursor);
else
len = tmpcursor - cursor;
for (i = 0; i < len; i++)
if (!isalnum (cursor[i]) && !(cursor[i] == '.'))
return 1;
}
url->host = malloc (sizeof (char) * (len + 1));
if (url->host == NULL)
return 1;
strncpy (url->host, cursor, len);
url->host[len] = '\0';
//parse port (or not)
cursor += len;
if (cursor[0] == ':')
{
tmpcursor = strchr (cursor, '/');
if (tmpcursor == NULL)
len = strlen (cursor);
else
len = tmpcursor - cursor;
for (i = 1; i < len; i++)
if (!isdigit (cursor[i]))
return 1;
url->port = malloc (sizeof (char) * len);
if (url->port == NULL)
return 1;
strncpy (url->port, cursor + 1, len - 1);
cursor += len;
}
//parse path
len = strlen (cursor);
if (len > 0)
{
url->path = malloc (sizeof (char) * (len + 1));
if (url->path == NULL)
return 1;
strncpy (url->path, cursor, len);
}
return 0;
}
void
urllib_tostring (Url *url,
char *dest)
{
if (url->port == NULL)
if (url->path == NULL)
sprintf (dest, "%s://%s\r\n", url->scheme, url->host);
else
sprintf (dest, "%s://%s%s\r\n", url->scheme, url->host, url->path);
else
if (url->path == NULL)
sprintf (dest, "%s://%s:%s\r\n", url->scheme, url->host, url->port);
else
sprintf (dest, "%s://%s:%s%s\r\n", url->scheme, url->host, url->port, url->path);
}
int
urllib_join (Url *url,
const char *path)
{// view RFC 3986, section 5.2.4
int len = strlen (path);
char *base, *result, *inputcursor;
if (len == 0)//nothing to merge
return 0;
if (url->path == NULL)//no current path, path = '/'
{
url->path = malloc (sizeof (char) * 2);
if (url->path == NULL)
return 1;
url->path[0] = '/';
url->path[1] = '\0';
}
if (path[0] != '/') //relative path
{
int url_path_len = strlen (path);
if (url->path[url_path_len - 1] != '/')//we are on a file
{
char *tmpcursor = strrchr (url->path, '/');
if (tmpcursor == NULL)
return -1;
tmpcursor[1] = '\0';
}
len += url_path_len;
base = malloc (sizeof (char) * (len + 1));
if (base == NULL)
return 1;
result = malloc (sizeof (char) * (len + 1));
if (result == NULL)
{
free (base);
return 1;
}
sprintf (base, "%s%s", url->path, path);
result[0] = '\0';
}
else //absolute path
{
base = malloc (sizeof (char) * (len + 1));
if (base == NULL)
return 1;
result = malloc (sizeof (char) * (len + 1));
if (result == NULL)
{
free (base);
return 1;
}
strncpy (base, path, len);
base[len] = '\0';
result[0] = '\0';
}
inputcursor = base;
len = strlen (inputcursor);
while (len > 0) //while input buffer is not empty
{
char *tmpcursor;
if (len >= 3 && inputcursor[0] == '.' && inputcursor[1] == '.' && inputcursor[2] == '/')
{
inputcursor += 3;
len = strlen (inputcursor);
}
else if (len >= 2 && inputcursor[0] == '.' && inputcursor[1] == '/')
{
inputcursor += 2;
len = strlen (inputcursor);
}
else if (len >= 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '/')
{
inputcursor += 2;
len = strlen (inputcursor);
}
else if (len == 2 && inputcursor[0] == '/' && inputcursor[1] == '.')
{
inputcursor += 1;
inputcursor[0] = '/';
len = strlen (inputcursor);
}
else if (len >= 4 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.' && inputcursor[3] == '/')
{
inputcursor += 3;
len = strlen (inputcursor);
//remove result last segment
tmpcursor = strrchr (result, '/');
if (tmpcursor != NULL)
*tmpcursor = '\0';
}
else if (len == 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.')
{
inputcursor += 2;
inputcursor[0] = '/';
len = strlen (inputcursor);
//remove result last segment
tmpcursor = strrchr (result, '/');
if (tmpcursor != NULL)
*tmpcursor = '\0';
}
else if ((len == 2 && inputcursor[0] == '.' && inputcursor[1] == '.') || (len == 1 && inputcursor[0] == '.'))
{
inputcursor[0] = '\0';
len = strlen (inputcursor);
}
else
{
//get first segment of inputcursor then append it to result and delete it
tmpcursor = strchr (inputcursor + 1, '/');
if (tmpcursor == NULL)//last segment in input buffer
{
sprintf (result, "%s%s", result, inputcursor);
*inputcursor = '\0';
}
else
{
len = tmpcursor - inputcursor;
strncpy (result + strlen (result), inputcursor, len);
inputcursor = tmpcursor;
}
len = strlen (inputcursor);
}
}
free (url->path);
url->path = result;
free (base);
return 0;
}