/* * url.c * * Created on: 14 oct. 2009 * Modified on: 22 oct. 2009 * Author: Samuel ROZE */ #include "postgres.h" #include #include #include "fmgr.h" #include "access/heapam.h" #include "utils/builtins.h" #include "funcapi.h" #include "url.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif PG_FUNCTION_INFO_V1(url_in); Datum url_in(PG_FUNCTION_ARGS) { char *str = PG_GETARG_CSTRING(0); urltype *result; elog(NOTICE, "url_in: str=%s", str); result = parse_url_exec(str); elog(NOTICE, "url_in: result=%s host=%s path=%s", result, result->host, result->path); PG_RETURN_URL_P(result); } PG_FUNCTION_INFO_V1(url_out); Datum url_out(PG_FUNCTION_ARGS) { urltype *url = PG_GETARG_URL_P(0); int size = url_size(url, URL_SIZE_CONTENTS_OUT) + 1; // + '\0 char *result = (char *) palloc(size); memset(result, 0, size); elog(NOTICE, "url_out: size=%d url=%s host=%s path=%s", size, url, url->host, url->path); // Concat strings ! if (url->scheme) { strcat(result, url->scheme); strcat(result, "://"); } if (url->user) { strcat(result, url->user); if (url->pass) { strcat(result, ":"); strcat(result, url->pass); } strcat(result, "@"); } if (url->host) { strcat(result, url->host); } if (url->path) { strcat(result, url->path); } if (url->query) { strcat(result, "?"); strcat(result, url->query); } if (url->fragment) { strcat(result, "#"); strcat(result, url->fragment); } PG_RETURN_CSTRING(result); } static inline struct varlena *url_make_varlena(urltype *url) { struct varlena *vdat; int size; if (url != NULL) { size = url_size(url, URL_SIZE_TOTAL) + VARHDRSZ; vdat = palloc(size); SET_VARSIZE(vdat, size); memcpy(VARDATA(vdat), url, (size - VARHDRSZ)); return vdat; } else { return NULL; } } int url_size (urltype *url, int type) { int size = 0, size_out = 0; elog(NOTICE, "url_size: url=%s host=%s", url, url->host); if (type == URL_SIZE_EMPTY) { return sizeof(urltype); } // In others types, the content will be counted else { if (url->scheme) { size += (int) strlen(url->scheme)*sizeof(char); size_out += 3; // :// } if (url->user) { size += (int) strlen(url->user)*sizeof(char); size_out += 1; // @ } if (url->pass) { size += (int) strlen(url->pass)*sizeof(char); size_out += 1; // : } if (url->port) { size += (int) strlen((char *) (unsigned *) url->port)*sizeof(char); size_out += 1; // : } if (url->path) { size += (int) strlen(url->path)*sizeof(char); } if (url->query) { size += (int) strlen(url->query)*sizeof(char); size_out += 1; // ? } if (url->fragment) { size += (int) strlen(url->fragment)*sizeof(char); size_out += 1; // # } if (type == URL_SIZE_CONTENTS) { return (int) size; } else if (type == URL_SIZE_TOTAL) { return (int) size + sizeof(urltype); } else if (type == URL_SIZE_CONTENTS_OUT) { return (int) size + size_out; } } return 0; } PG_FUNCTION_INFO_V1(parse_url_key); Datum parse_url_key (PG_FUNCTION_ARGS) { char *key = TextDatumGetCString(PG_GETARG_TEXT_P(1)); char *ret = {0}; urltype *url_ret = PG_GETARG_URL_P(0); if (strcmp(key, "scheme") == 0) { ret = url_ret->scheme; } else if (strcmp(key, "user") == 0) { ret = url_ret->user; } else if (strcmp(key, "pass") == 0) { ret = url_ret->pass; } else if (strcmp(key, "host") == 0) { ret = url_ret->host; } else if (strcmp(key, "port") == 0) { ret = (char *) (unsigned *) url_ret->port; } else if (strcmp(key, "path") == 0) { ret = url_ret->path; } else if (strcmp(key, "query") == 0) { ret = url_ret->query; } else if (strcmp(key, "fragment") == 0) { ret = url_ret->fragment; } else { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Invalid part name of url"))); } if (ret) { PG_RETURN_TEXT_P(CStringGetTextDatum(ret)); } else { PG_RETURN_NULL(); } } PG_FUNCTION_INFO_V1(parse_url_record); Datum parse_url_record (PG_FUNCTION_ARGS) { // Some vars which will used to create the composite output type TupleDesc tupdesc; char **values; HeapTuple tuple; AttInMetadata *attinmeta; bool nulls[8]; urltype *ret; // Get pointer ret = PG_GETARG_URL_P(0); if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context that cannot accept type record"))); } attinmeta = TupleDescGetAttInMetadata(tupdesc); // ... values = (char **) palloc(8 * sizeof(char *)); memset(values, 0, (8 * sizeof(char *))); // Add datas into the values Datum values[0] = (char *) ret->scheme; values[1] = (char *) ret->user; values[2] = (char *) ret->pass; values[3] = (char *) ret->host; values[4] = (char *)(unsigned *) ret->port; values[5] = (char *) ret->path; values[6] = (char *) ret->query; values[7] = (char *) ret->fragment; // Convert values into a composite type memset(nulls, 0, sizeof(nulls)); // build tuple from datum array tuple = BuildTupleFromCStrings(attinmeta, values); // Return the composite type PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); } // Inspired from PHP's parse_url function urltype* parse_url_exec (char* str) { char port_buf[6]; int length = strlen(str); urltype *result; char const *s, *e, *p, *pp, *ue; result = (urltype *) palloc(sizeof(urltype)); memset(result, 0, sizeof(urltype)); s = str; ue = s + length; /* parse scheme */ if ((e = memchr(s, ':', length)) && (e - s)) { /* validate scheme */ p = s; while (p < e) { /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */ if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') { if (e + 1 < ue) { goto parse_port; } else { goto just_path; } } p++; } if (*(e + 1) == '\0') { /* only scheme is available */ result->scheme = _url_alloc_str(s, (e - s)); goto end; } /* * certain schemas like mailto: and zlib: may not have any / after them * this check ensures we support those. */ if (*(e+1) != '/') { /* check if the data we get is a port this allows us to * correctly parse things like a.com:80 */ p = e + 1; while (isdigit(*p)) { p++; } if ((*p == '\0' || *p == '/') && (p - e) < 7) { goto parse_port; } result->scheme = _url_alloc_str(s, (e-s)); length -= ++e - s; s = e; goto just_path; } else { result->scheme = _url_alloc_str(s, (e-s)); if (*(e+2) == '/') { s = e + 3; if (!strncasecmp("file", result->scheme, sizeof("file"))) { if (*(e + 3) == '/') { /* support windows drive letters as in: file:///c:/somedir/file.txt */ if (*(e + 5) == ':') { s = e + 4; } goto nohost; } } } else { if (!strncasecmp("file", result->scheme, sizeof("file"))) { s = e + 1; goto nohost; } else { length -= ++e - s; s = e; goto just_path; } } } } else if (e) { /* no scheme, look for port */ parse_port: p = e + 1; pp = p; while (pp-p < 6 && isdigit(*pp)) { pp++; } if (pp-p < 6 && (*pp == '/' || *pp == '\0')) { memcpy(port_buf, p, (pp-p)); port_buf[pp-p] = '\0'; result->port = atoi(port_buf); } else { goto just_path; } } else { just_path: ue = s + length; goto nohost; } e = ue; if (!(p = memchr(s, '/', (ue - s)))) { if ((p = memchr(s, '?', (ue - s)))) { e = p; } else if ((p = memchr(s, '#', (ue - s)))) { e = p; } } else { e = p; } /* check for login and password */ if ((p = memchr(s, '@', (e-s)))) { //zend_memrchr if ((pp = memchr(s, ':', (p-s)))) { if ((pp-s) > 0) { result->user = _url_alloc_str(s, (pp-s));; } pp++; if (p-pp > 0) { result->pass = _url_alloc_str(pp, (p-pp)); } } else { result->user = _url_alloc_str(s, (p-s)); } s = p + 1; } /* check for port */ if (*s == '[' && *(e-1) == ']') { /* Short circuit portscan, we're dealing with an IPv6 embedded address */ p = s; } else { /* memrchr is a GNU specific extension Emulate for wide compatability */ for(p = e; *p != ':' && p >= s; p--); } if (p >= s && *p == ':') { if (!result->port) { p++; if (e-p > 5) { /* port cannot be longer then 5 characters */ _url_free_str(result->scheme); _url_free_str(result->user); _url_free_str(result->pass); free(result); return NULL; } else if (e - p > 0) { memcpy(port_buf, p, (e-p)); port_buf[e-p] = '\0'; result->port = atoi(port_buf); } p--; } } else { p = e; } /* check if we have a valid host, if we don't reject the string as url */ if ((p-s) < 1) { _url_free_str(result->scheme); _url_free_str(result->user); _url_free_str(result->pass); free(result); return NULL; } result->host = _url_alloc_str(s, (p-s)); if (e == ue) { return result; } s = e; nohost: if ((p = memchr(s, '?', (ue - s)))) { pp = strchr(s, '#'); if (pp && pp < p) { p = pp; pp = strchr(pp+2, '#'); } if (p - s) { result->path = _url_alloc_str(s, (p-s)); } if (pp) { if (pp - ++p) { result->query = _url_alloc_str(p, (pp-p)); } p = pp; goto label_parse; } else if (++p - ue) { result->query = _url_alloc_str(p, (ue-p)); } } else if ((p = memchr(s, '#', (ue - s)))) { if (p - s) { result->path = _url_alloc_str(s, (p-s)); } label_parse: p++; if (ue - p) { result->fragment = _url_alloc_str(p, (ue-p)); } } else { result->path = _url_alloc_str(s, (ue-s)); } end: return result; } char *_url_alloc_str (const char *s, int length) { char *p; p = (char *) palloc(length+1); if (p == NULL) { return p; } memcpy(p, s, length); p[length] = 0; return p; } void _url_free_str (const char *s) { pfree((void *) s); }