/*
 * url.c
 *
 *  Created on: 14 oct. 2009
 *  Modified on: 22 oct. 2009
 *      Author: Samuel ROZE <samuel.roze@gmail.com>
 */
#include "postgres.h"
#include <string.h>
#include <ctype.h>
#include "fmgr.h"
#include "access/heapam.h"
#include "utils/builtins.h"
#include "funcapi.h"

#include "url.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

PG_FUNCTION_INFO_V1(url_in);
Datum url_in(PG_FUNCTION_ARGS)
{
    char       *str = PG_GETARG_CSTRING(0);
    urltype    *result;

    elog(NOTICE, "url_in: str=%s", str);

    result = parse_url_exec(str);

    elog(NOTICE, "url_in: result=%s host=%s path=%s", result, result->host, result->path);

    PG_RETURN_URL_P(result);
}

PG_FUNCTION_INFO_V1(url_out);
Datum url_out(PG_FUNCTION_ARGS)
{
    urltype    *url = PG_GETARG_URL_P(0);
    int			size = url_size(url, URL_SIZE_CONTENTS_OUT) + 1; // + '\0
    char       *result = (char *) palloc(size);

    memset(result, 0, size);

    elog(NOTICE, "url_out: size=%d url=%s host=%s path=%s", size, url, url->host, url->path);

    // Concat strings !
    if (url->scheme) {
    	strcat(result, url->scheme);
    	strcat(result, "://");
    }
    if (url->user) {
    	strcat(result, url->user);
    	if (url->pass) {
    		strcat(result, ":");
    		strcat(result, url->pass);
    	}
    	strcat(result, "@");
    }
    if (url->host) {
    	strcat(result, url->host);
    }
    if (url->path) {
    	strcat(result, url->path);
    }
    if (url->query) {
    	strcat(result, "?");
    	strcat(result, url->query);
    }
    if (url->fragment) {
    	strcat(result, "#");
    	strcat(result, url->fragment);
    }

    PG_RETURN_CSTRING(result);
}

static inline struct varlena *url_make_varlena(urltype *url) {
	struct varlena *vdat;
	int size;

	if (url != NULL) {
		size = url_size(url, URL_SIZE_TOTAL) + VARHDRSZ;
		vdat = palloc(size);
		SET_VARSIZE(vdat, size);
		memcpy(VARDATA(vdat), url, (size - VARHDRSZ));

		return vdat;
	} else {
		return NULL;
	}
}

int url_size (urltype *url, int type)
{
	int	size = 0, size_out = 0;

	elog(NOTICE, "url_size: url=%s host=%s", url, url->host);

	if (type == URL_SIZE_EMPTY) {
		return sizeof(urltype);
	} // In others types, the content will be counted
	else {
		if (url->scheme) {
		   	size += (int) strlen(url->scheme)*sizeof(char);
			size_out += 3; // ://
		}
		if (url->user) {
			size += (int) strlen(url->user)*sizeof(char);
			size_out += 1; // @
		}
	    if (url->pass) {
	    	size += (int) strlen(url->pass)*sizeof(char);
	    	size_out += 1; // :
	    }
	    if (url->port) {
	    	size += (int) strlen((char *) (unsigned *) url->port)*sizeof(char);
	    	size_out += 1; // :
	    }
	    if (url->path) {
	    	size += (int) strlen(url->path)*sizeof(char);
	    }
	    if (url->query) {
	    	size += (int) strlen(url->query)*sizeof(char);
	    	size_out += 1; // ?
	    }
	    if (url->fragment) {
	    	size += (int) strlen(url->fragment)*sizeof(char);
	    	size_out += 1; // #
	    }

	    if (type == URL_SIZE_CONTENTS) {
	    	return (int) size;
	    } else if (type == URL_SIZE_TOTAL) {
	    	return (int) size + sizeof(urltype);
	    } else if (type == URL_SIZE_CONTENTS_OUT) {
	    	return (int) size + size_out;
	    }
	}
	return 0;
}

PG_FUNCTION_INFO_V1(parse_url_key);
Datum parse_url_key (PG_FUNCTION_ARGS)
{
	char *key = TextDatumGetCString(PG_GETARG_TEXT_P(1));
	char *ret = {0};

	urltype *url_ret = PG_GETARG_URL_P(0);

	if (strcmp(key, "scheme") == 0) {
		ret = url_ret->scheme;
	} else if (strcmp(key, "user") == 0) {
		ret = url_ret->user;
	} else if (strcmp(key, "pass") == 0) {
		ret = url_ret->pass;
	} else if (strcmp(key, "host") == 0) {
		ret = url_ret->host;
	} else if (strcmp(key, "port") == 0) {
		ret = (char *) (unsigned *) url_ret->port;
	} else if (strcmp(key, "path") == 0) {
		ret = url_ret->path;
	} else if (strcmp(key, "query") == 0) {
		ret = url_ret->query;
	} else if (strcmp(key, "fragment") == 0) {
		ret = url_ret->fragment;
	} else {
		ereport(ERROR,
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			 errmsg("Invalid part name of url")));
	}

	if (ret) {
		PG_RETURN_TEXT_P(CStringGetTextDatum(ret));
	} else {
		PG_RETURN_NULL();
	}
}

PG_FUNCTION_INFO_V1(parse_url_record);
Datum parse_url_record (PG_FUNCTION_ARGS)
{
	// Some vars which will used to create the composite output type
	TupleDesc	tupdesc;
	char		**values;
	HeapTuple	tuple;
	AttInMetadata *attinmeta;
	bool		nulls[8];
	urltype 		*ret;

	// Get pointer
	ret = PG_GETARG_URL_P(0);

	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) {
	    ereport(ERROR,
	    		(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
	             errmsg("function returning record called in context that cannot accept type record")));
	}
	attinmeta = TupleDescGetAttInMetadata(tupdesc);

	// ...
	values = (char **) palloc(8 * sizeof(char *));
	memset(values, 0, (8 * sizeof(char *)));

	// Add datas into the values Datum
	values[0] = (char *) ret->scheme;
	values[1] = (char *) ret->user;
	values[2] = (char *) ret->pass;
	values[3] = (char *) ret->host;
	values[4] = (char *)(unsigned *) ret->port;
	values[5] = (char *) ret->path;
	values[6] = (char *) ret->query;
	values[7] = (char *) ret->fragment;

	// Convert values into a composite type
	memset(nulls, 0, sizeof(nulls));

	// build tuple from datum array
	tuple = BuildTupleFromCStrings(attinmeta, values);

	// Return the composite type
	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
}

// Inspired from PHP's parse_url function
urltype* parse_url_exec (char* str) {
	char port_buf[6];
	int length = strlen(str);
	urltype *result;
	char const *s, *e, *p, *pp, *ue;

	result = (urltype *) palloc(sizeof(urltype));
	memset(result, 0, sizeof(urltype));

	s = str;
	ue = s + length;

	/* parse scheme */
	if ((e = memchr(s, ':', length)) && (e - s)) {
		/* validate scheme */
		p = s;
		while (p < e) {
			/* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
			if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
				if (e + 1 < ue) {
					goto parse_port;
				} else {
					goto just_path;
				}
			}
			p++;
		}

		if (*(e + 1) == '\0') { /* only scheme is available */
			result->scheme = _url_alloc_str(s, (e - s));
			goto end;
		}

		/*
		 * certain schemas like mailto: and zlib: may not have any / after them
		 * this check ensures we support those.
		 */
		if (*(e+1) != '/') {
			/* check if the data we get is a port this allows us to
			 * correctly parse things like a.com:80
			 */
			p = e + 1;
			while (isdigit(*p)) {
				p++;
			}

			if ((*p == '\0' || *p == '/') && (p - e) < 7) {
				goto parse_port;
			}

			result->scheme = _url_alloc_str(s, (e-s));

			length -= ++e - s;
			s = e;
			goto just_path;
		} else {
			result->scheme = _url_alloc_str(s, (e-s));

			if (*(e+2) == '/') {
				s = e + 3;
				if (!strncasecmp("file", result->scheme, sizeof("file"))) {
					if (*(e + 3) == '/') {
						/* support windows drive letters as in:
						   file:///c:/somedir/file.txt
						*/
						if (*(e + 5) == ':') {
							s = e + 4;
						}
						goto nohost;
					}
				}
			} else {
				if (!strncasecmp("file", result->scheme, sizeof("file"))) {
					s = e + 1;
					goto nohost;
				} else {
					length -= ++e - s;
					s = e;
					goto just_path;
				}
			}
		}
	} else if (e) { /* no scheme, look for port */
		parse_port:
		p = e + 1;
		pp = p;

		while (pp-p < 6 && isdigit(*pp)) {
			pp++;
		}

		if (pp-p < 6 && (*pp == '/' || *pp == '\0')) {
			memcpy(port_buf, p, (pp-p));
			port_buf[pp-p] = '\0';
			result->port = atoi(port_buf);
		} else {
			goto just_path;
		}
	} else {
		just_path:
		ue = s + length;
		goto nohost;
	}

	e = ue;

	if (!(p = memchr(s, '/', (ue - s)))) {
		if ((p = memchr(s, '?', (ue - s)))) {
			e = p;
		} else if ((p = memchr(s, '#', (ue - s)))) {
			e = p;
		}
	} else {
		e = p;
	}

	/* check for login and password */
	if ((p = memchr(s, '@', (e-s)))) { //zend_memrchr
		if ((pp = memchr(s, ':', (p-s)))) {
			if ((pp-s) > 0) {
				result->user = _url_alloc_str(s, (pp-s));;
			}

			pp++;
			if (p-pp > 0) {
				result->pass = _url_alloc_str(pp, (p-pp));
			}
		} else {
			result->user = _url_alloc_str(s, (p-s));
		}

		s = p + 1;
	}

	/* check for port */
	if (*s == '[' && *(e-1) == ']') {
		/* Short circuit portscan,
		   we're dealing with an
		   IPv6 embedded address */
		p = s;
	} else {
		/* memrchr is a GNU specific extension
		   Emulate for wide compatability */
		for(p = e; *p != ':' && p >= s; p--);
	}

	if (p >= s && *p == ':') {
		if (!result->port) {
			p++;
			if (e-p > 5) { /* port cannot be longer then 5 characters */
				_url_free_str(result->scheme);
				_url_free_str(result->user);
				_url_free_str(result->pass);
				free(result);
				return NULL;
			} else if (e - p > 0) {
				memcpy(port_buf, p, (e-p));
				port_buf[e-p] = '\0';
				result->port = atoi(port_buf);
			}
			p--;
		}
	} else {
		p = e;
	}

	/* check if we have a valid host, if we don't reject the string as url */
	if ((p-s) < 1) {
		_url_free_str(result->scheme);
		_url_free_str(result->user);
		_url_free_str(result->pass);
		free(result);
		return NULL;
	}

	result->host = _url_alloc_str(s, (p-s));

	if (e == ue) {
		return result;
	}

	s = e;

	nohost:

	if ((p = memchr(s, '?', (ue - s)))) {
		pp = strchr(s, '#');

		if (pp && pp < p) {
			p = pp;
			pp = strchr(pp+2, '#');
		}

		if (p - s) {
			result->path = _url_alloc_str(s, (p-s));
		}

		if (pp) {
			if (pp - ++p) {
				result->query = _url_alloc_str(p, (pp-p));
			}
			p = pp;
			goto label_parse;
		} else if (++p - ue) {
			result->query = _url_alloc_str(p, (ue-p));
		}
	} else if ((p = memchr(s, '#', (ue - s)))) {
		if (p - s) {
			result->path = _url_alloc_str(s, (p-s));
		}

		label_parse:
		p++;

		if (ue - p) {
			result->fragment = _url_alloc_str(p, (ue-p));
		}
	} else {
		result->path = _url_alloc_str(s, (ue-s));
	}
end:
	return result;
}

char *_url_alloc_str (const char *s, int length)
{
	char *p;

	p = (char *) palloc(length+1);
	if (p == NULL) {
		return p;
	}
	memcpy(p, s, length);
	p[length] = 0;

	return p;
}

void _url_free_str (const char *s)
{
	pfree((void *) s);
}