354 lines
6.8 KiB
C
354 lines
6.8 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#include <url.h>
|
|
|
|
#ifdef TESTURLLIB
|
|
|
|
int
|
|
main (int argc,
|
|
char **argv)
|
|
{
|
|
Url *url;
|
|
char url_str[1025];
|
|
|
|
url = urllib_init ();
|
|
|
|
if (argc == 2)
|
|
{
|
|
if (urllib_parse (url, argv[1]))
|
|
return 1;
|
|
}
|
|
if (argc >= 3)
|
|
{
|
|
if (urllib_parse (url, argv[1]))
|
|
return 1;
|
|
|
|
if (urllib_join (url, argv[2]))
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
printf ("Enter a url : ");
|
|
scanf ("%s", url_str);
|
|
|
|
if (urllib_parse (url, url_str))
|
|
return 1;
|
|
|
|
printf ("Enter a path : ");
|
|
scanf ("%s", url_str);
|
|
|
|
if (urllib_join (url, url_str))
|
|
return 1;
|
|
}
|
|
|
|
urllib_tostring (url, url_str);
|
|
|
|
printf ("=> %s\n", url_str);
|
|
|
|
urllib_free (url);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
Url*
|
|
urllib_init (void)
|
|
{
|
|
Url *url = malloc (sizeof (Url));
|
|
|
|
url->scheme = NULL;
|
|
url->host = NULL;
|
|
url->port = NULL;
|
|
url->path = NULL;
|
|
|
|
return url;
|
|
}
|
|
|
|
void
|
|
urllib_free (Url *url)
|
|
{
|
|
free (url->scheme);
|
|
free (url->host);
|
|
free (url->port);
|
|
free (url->path);
|
|
free (url);
|
|
}
|
|
|
|
int
|
|
urllib_parse (Url *url,
|
|
const char *url_string)
|
|
{
|
|
const char *cursor = url_string;
|
|
char *tmpcursor = NULL;
|
|
int len, i;
|
|
|
|
/* get scheme */
|
|
tmpcursor = strchr (cursor, ':');
|
|
if (tmpcursor == NULL)
|
|
return 1;//error if not found
|
|
|
|
len = tmpcursor - cursor;
|
|
|
|
/* Verify scheme syntax */
|
|
if (!isalpha (cursor[0]))
|
|
return 1;
|
|
for (i = 1; i < len; i++)
|
|
if (!(isalnum(cursor[i]) || cursor[i] == '+' || cursor[i] == '-' || cursor[i] == '.'))
|
|
return 1;
|
|
|
|
url->scheme = malloc (sizeof (char) * (len + 1));
|
|
if (url->scheme == NULL)
|
|
return 1;
|
|
|
|
strncpy (url->scheme, cursor, len);
|
|
url->scheme[len] = '\0';
|
|
|
|
//Move after first ':'
|
|
cursor = tmpcursor + 1;
|
|
if (strlen (cursor) <= 2)
|
|
return 1;//There must be 2 '/' and an host
|
|
if (cursor[0] != '/' || cursor[1] != '/')
|
|
return 1;
|
|
|
|
//parse IPv6, IPv4 or hostname
|
|
|
|
cursor += 2;
|
|
if (cursor[0] == '[') // IPv6
|
|
{
|
|
tmpcursor = strchr (cursor, ']');
|
|
|
|
if (tmpcursor == NULL)
|
|
return 1;
|
|
|
|
len = tmpcursor - cursor;
|
|
|
|
for (i = 1; i < len; i++)//verify
|
|
if (!isdigit (cursor[i]) && !(tolower(cursor[i]) >= 'a' && tolower(cursor[i]) <= 'f') && !(cursor[i] == ':'))
|
|
return 1;
|
|
}
|
|
else // IPv4 or hostname
|
|
{
|
|
tmpcursor = strchr (cursor, ':');
|
|
|
|
if (tmpcursor == NULL)
|
|
tmpcursor = strchr (cursor, '/');
|
|
|
|
if (tmpcursor == NULL)
|
|
len = strlen (cursor);
|
|
else
|
|
len = tmpcursor - cursor;
|
|
|
|
for (i = 0; i < len; i++)
|
|
if (!isalnum (cursor[i]) && !(cursor[i] == '.'))
|
|
return 1;
|
|
}
|
|
|
|
url->host = malloc (sizeof (char) * (len + 1));
|
|
if (url->host == NULL)
|
|
return 1;
|
|
|
|
strncpy (url->host, cursor, len);
|
|
url->host[len] = '\0';
|
|
|
|
//parse port (or not)
|
|
|
|
cursor += len;
|
|
if (cursor[0] == ':')
|
|
{
|
|
tmpcursor = strchr (cursor, '/');
|
|
if (tmpcursor == NULL)
|
|
len = strlen (cursor);
|
|
else
|
|
len = tmpcursor - cursor;
|
|
|
|
for (i = 1; i < len; i++)
|
|
if (!isdigit (cursor[i]))
|
|
return 1;
|
|
|
|
url->port = malloc (sizeof (char) * len);
|
|
if (url->port == NULL)
|
|
return 1;
|
|
|
|
strncpy (url->port, cursor + 1, len - 1);
|
|
|
|
cursor += len;
|
|
}
|
|
|
|
//parse path
|
|
|
|
len = strlen (cursor);
|
|
|
|
if (len > 0)
|
|
{
|
|
url->path = malloc (sizeof (char) * (len + 1));
|
|
if (url->path == NULL)
|
|
return 1;
|
|
|
|
strncpy (url->path, cursor, len);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
urllib_tostring (Url *url,
|
|
char *dest)
|
|
{
|
|
if (url->port == NULL)
|
|
if (url->path == NULL)
|
|
sprintf (dest, "%s://%s\r\n", url->scheme, url->host);
|
|
else
|
|
sprintf (dest, "%s://%s%s\r\n", url->scheme, url->host, url->path);
|
|
else
|
|
if (url->path == NULL)
|
|
sprintf (dest, "%s://%s:%s\r\n", url->scheme, url->host, url->port);
|
|
else
|
|
sprintf (dest, "%s://%s:%s%s\r\n", url->scheme, url->host, url->port, url->path);
|
|
}
|
|
|
|
int
|
|
urllib_join (Url *url,
|
|
const char *path)
|
|
{// view RFC 3986, section 5.2.4
|
|
int len = strlen (path);
|
|
char *base, *result, *inputcursor;
|
|
|
|
if (len == 0)//nothing to merge
|
|
return 0;
|
|
|
|
if (url->path == NULL)//no current path, path = '/'
|
|
{
|
|
url->path = malloc (sizeof (char) * 2);
|
|
if (url->path == NULL)
|
|
return 1;
|
|
|
|
url->path[0] = '/';
|
|
url->path[1] = '\0';
|
|
}
|
|
|
|
if (path[0] != '/') //relative path
|
|
{
|
|
int url_path_len = strlen (path);
|
|
|
|
if (url->path[url_path_len - 1] != '/')//we are on a file
|
|
{
|
|
char *tmpcursor = strrchr (url->path, '/');
|
|
if (tmpcursor == NULL)
|
|
return -1;
|
|
|
|
tmpcursor[1] = '\0';
|
|
}
|
|
|
|
len += url_path_len;
|
|
base = malloc (sizeof (char) * (len + 1));
|
|
if (base == NULL)
|
|
return 1;
|
|
result = malloc (sizeof (char) * (len + 1));
|
|
if (result == NULL)
|
|
{
|
|
free (base);
|
|
return 1;
|
|
}
|
|
|
|
sprintf (base, "%s%s", url->path, path);
|
|
result[0] = '\0';
|
|
}
|
|
else //absolute path
|
|
{
|
|
base = malloc (sizeof (char) * (len + 1));
|
|
if (base == NULL)
|
|
return 1;
|
|
result = malloc (sizeof (char) * (len + 1));
|
|
if (result == NULL)
|
|
{
|
|
free (base);
|
|
return 1;
|
|
}
|
|
|
|
strncpy (base, path, len);
|
|
base[len] = '\0';
|
|
result[0] = '\0';
|
|
}
|
|
|
|
inputcursor = base;
|
|
len = strlen (inputcursor);
|
|
while (len > 0) //while input buffer is not empty
|
|
{
|
|
char *tmpcursor;
|
|
|
|
if (len >= 3 && inputcursor[0] == '.' && inputcursor[1] == '.' && inputcursor[2] == '/')
|
|
{
|
|
inputcursor += 3;
|
|
len = strlen (inputcursor);
|
|
}
|
|
else if (len >= 2 && inputcursor[0] == '.' && inputcursor[1] == '/')
|
|
{
|
|
inputcursor += 2;
|
|
len = strlen (inputcursor);
|
|
}
|
|
else if (len >= 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '/')
|
|
{
|
|
inputcursor += 2;
|
|
len = strlen (inputcursor);
|
|
}
|
|
else if (len == 2 && inputcursor[0] == '/' && inputcursor[1] == '.')
|
|
{
|
|
inputcursor += 1;
|
|
inputcursor[0] = '/';
|
|
len = strlen (inputcursor);
|
|
}
|
|
else if (len >= 4 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.' && inputcursor[3] == '/')
|
|
{
|
|
inputcursor += 3;
|
|
len = strlen (inputcursor);
|
|
//remove result last segment
|
|
tmpcursor = strrchr (result, '/');
|
|
if (tmpcursor != NULL)
|
|
*tmpcursor = '\0';
|
|
}
|
|
else if (len == 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.')
|
|
{
|
|
inputcursor += 2;
|
|
inputcursor[0] = '/';
|
|
len = strlen (inputcursor);
|
|
//remove result last segment
|
|
tmpcursor = strrchr (result, '/');
|
|
if (tmpcursor != NULL)
|
|
*tmpcursor = '\0';
|
|
}
|
|
else if ((len == 2 && inputcursor[0] == '.' && inputcursor[1] == '.') || (len == 1 && inputcursor[0] == '.'))
|
|
{
|
|
inputcursor[0] = '\0';
|
|
len = strlen (inputcursor);
|
|
}
|
|
else
|
|
{
|
|
//get first segment of inputcursor then append it to result and delete it
|
|
tmpcursor = strchr (inputcursor + 1, '/');
|
|
if (tmpcursor == NULL)//last segment in input buffer
|
|
{
|
|
sprintf (result, "%s%s", result, inputcursor);
|
|
*inputcursor = '\0';
|
|
}
|
|
else
|
|
{
|
|
len = tmpcursor - inputcursor;
|
|
strncpy (result + strlen (result), inputcursor, len);
|
|
inputcursor = tmpcursor;
|
|
}
|
|
|
|
len = strlen (inputcursor);
|
|
}
|
|
}
|
|
|
|
free (url->path);
|
|
url->path = result;
|
|
free (base);
|
|
|
|
return 0;
|
|
}
|