PgBench Functions

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 0ac40f1..38d0994 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -771,24 +771,35 @@ pgbench options dbname Sets variable varname to an integer value calculated from expression. The expression may contain integer constants such as 5432, - references to variables :variablename, + double constants such as 3.14156, + references to integer variables :variablename, and expressions composed of unary (-) or binary operators - (+, -, *, /, %) - with their usual associativity, and parentheses. + (+, -, *, /, + %) with their usual associativity, function calls and + parentheses. + shows the available + functions. + + + + Typing between integer and double is implicit and descendant: the type of + an operator or function depends on the expected type of the result. + For instance, if an integer is expected, exp1 + exp2 will cast + both operands to int and use the integer addition. Examples: \set ntellers 10 * :scale -\set aid (1021 * :aid) % (100000 * :scale) + 1 +\set aid (1021 * rand(1, 100000 * :scale)) % (100000 * :scale) + 1 - \setrandom varname min max [ uniform | { gaussian | exponential } threshold ] + \setrandom varname min max [ uniform | { gaussian | exponential } param ] @@ -801,57 +812,35 @@ pgbench options dbname - By default, or when uniform is specified, all values in the - range are drawn with equal probability. Specifying gaussian - or exponential options modifies this behavior; each - requires a mandatory threshold which determines the precise shape of the - distribution. - + + + + \setrandom n 1 10 or \setrandom n 1 10 uniform + is equivalent to \set n random(1, 10) and uses a uniform + distribution. + + - - For a Gaussian distribution, the interval is mapped onto a standard - normal distribution (the classical bell-shaped Gaussian curve) truncated - at -threshold on the left and +threshold - on the right. - To be precise, if PHI(x) is the cumulative distribution - function of the standard normal distribution, with mean mu - defined as (max + min) / 2.0, then value i - between min and max inclusive is drawn - with probability: - - (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) - - PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) / - (2.0 * PHI(threshold) - 1.0). - Intuitively, the larger the threshold, the more - frequently values close to the middle of the interval are drawn, and the - less frequently values close to the min and - max bounds. - About 67% of values are drawn from the middle 1.0 / threshold - and 95% in the middle 2.0 / threshold; for instance, if - threshold is 4.0, 67% of values are drawn from the middle - quarter and 95% from the middle half of the interval. - The minimum threshold is 2.0 for performance of - the Box-Muller transform. - + + + \setrandom n 1 10 exponential 3.0 is equivalent to + \set n random_exponential(1, 10, 3.0) and uses an + exponential distribution. + + - - For an exponential distribution, the threshold - parameter controls the distribution by truncating a quickly-decreasing - exponential distribution at threshold, and then - projecting onto integers between the bounds. - To be precise, value i between min and - max inclusive is drawn with probability: - (exp(-threshold*(i-min)/(max+1-min)) - - exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold)). - Intuitively, the larger the threshold, the more - frequently values close to min are accessed, and the - less frequently values close to max are accessed. - The closer to 0 the threshold, the flatter (more uniform) the access - distribution. - A crude approximation of the distribution is that the most frequent 1% - values in the range, close to min, are drawn - threshold% of the time. - The threshold value must be strictly positive. + + + \setrandom n 1 10 gaussian 2.0 is equivalent to + \set n random_gaussian(1, 10, 2.0), and uses a gaussian + distribution. + + + + + See the documentation of these functions below for further information + about the precise shape of these distributions, depending on the value + of the parameter. @@ -931,18 +920,196 @@ pgbench options dbname + + + PgBench Functions + + + + Function + Return Type + Description + Example + Result + + + + + abs(a) + same as a + integer or double absolute value + abs(-17) + 17 + + + ddebug(x) + double + stderr print for debug and return argument + ddebug(5432.1) + 5432.1 + + + double(i) + double + evaluate as int and cast to double + double(5432) + 5432.0 + + + idebug(i) + integer + stderr print for debug and return argument + idebug(5432) + 5432 + + + int(x) + integer + evaluate as double and cast to int + int(5.4 + 3.8) + 9 + + + max(i, ...) + integer + maximum value + max(5, 4, 3, 2) + 5 + + + min(i, ...) + integer + minimum value + min(5, 4, 3, 2) + 2 + + + pi() + double + value of the PI constant + pi() + 3.14159265358979323846 + + + random(lb, ub) + integer + uniformly distributed random integer in [lb,ub] + random(1, 10) + an int between 1 and 10 + + + random_exponential(lb, ub, param) + integer + exponentially distributed random integer in [ub,lb], + see below + random_exponential(1, 10, 3.0) + an int between 1 and 10 + + + random_gaussian(lb, ub, param) + integer + gaussian distributed random integer in [ub,lb], + see below + random_gaussian(1, 10, 2.5) + an int between 1 and 10 + + + sqrt(x) + double + square root + sqrt(2.0) + 1.414213562 + + + +

+ + + The random function generated values are uniform, that is + all values in the specified range are drawn with equal probability. + + + + The random_exponential and random_gaussian + functions require an additional double parameter which determines the + precise shape of the distribution. + + + + + + For an exponential distribution, the param parameter + controls the distribution by truncating a quickly-decreasing + exponential distribution at param, and then + projecting onto integers between the bounds. + To be precise, with + +f(x) = exp(-param * (x-min) / (max-min+1)) / (1 - exp(-param)) + + Then value i between min and + max inclusive is drawn with probability: + f(x) - f(x+1). + + + + Intuitively, the larger the param, the more + frequently values close to min are accessed, and the + less frequently values close to max are accessed. + The closer to 0 the parameter, the flatter (more uniform) the access + distribution. + A crude approximation of the distribution is that the most frequent 1% + values in the range, close to min, are drawn + param% of the time. + The param value must be strictly positive. + + + + + + For a Gaussian distribution, the interval is mapped onto a standard + normal distribution (the classical bell-shaped Gaussian curve) truncated + at -param on the left and +param + on the right. + Values in the middle of the interval are more likely to be drawn. + To be precise, if PHI(x) is the cumulative distribution + function of the standard normal distribution, with mean mu + defined as (max+min)/2, with + +f(x) = PHI(2 * param * (x-mu) / (max-min+1)) / (2 * PHI(param) - 1) + + then value i between min and + max inclusive is drawn with probability: + f(i+0.5) - f(i-0.5). + + + Intuitively, the larger the param, the more + frequently values close to the middle of the interval are drawn, and the + less frequently values close to the min and + max bounds. + About 67% of values are drawn from the middle 1/param, + that is a relative 0.5/param around the mean, + and 95% in the middle 2/param, that is + a relative 1/param around the mean; + for instance, if param is 4.0, 67% of values are drawn + from the middle quarter (1/4.0) of the interval + (i.e. from 3/8 to 5/8) + and 95% from the middle half (2/4.0) of the interval (second and third + quartiles). + The minimum param is 2.0 for performance of + the Box-Muller transform. + + + + As an example, the full definition of the built-in TPC-B-like transaction is: -\set nbranches :scale -\set ntellers 10 * :scale -\set naccounts 100000 * :scale -\setrandom aid 1 :naccounts -\setrandom bid 1 :nbranches -\setrandom tid 1 :ntellers -\setrandom delta -5000 5000 +\set aid random(1, 100000 * :scale) +\set bid random(1, 1 * :scale) +\set tid random(1, 10 * :scale) +\set delta random(-5000, 5000) BEGIN; UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; SELECT abalance FROM pgbench_accounts WHERE aid = :aid; @@ -1097,27 +1264,26 @@ starting vacuum...end. transaction type: TPC-B (sort of) scaling factor: 1 query mode: simple -number of clients: 10 +number of clients: 4 number of threads: 1 -number of transactions per client: 1000 -number of transactions actually processed: 10000/10000 -tps = 618.764555 (including connections establishing) -tps = 622.977698 (excluding connections establishing) +duration: 3 s +number of transactions actually processed: 1239 +latency average: 9.584 ms +latency stddev: 5.204 ms +tps = 411.913509 (including connections establishing) +tps = 413.088125 (excluding connections establishing) statement latencies in milliseconds: - 0.004386 \set nbranches 1 * :scale - 0.001343 \set ntellers 10 * :scale - 0.001212 \set naccounts 100000 * :scale - 0.001310 \setrandom aid 1 :naccounts - 0.001073 \setrandom bid 1 :nbranches - 0.001005 \setrandom tid 1 :ntellers - 0.001078 \setrandom delta -5000 5000 - 0.326152 BEGIN; - 0.603376 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; - 0.454643 SELECT abalance FROM pgbench_accounts WHERE aid = :aid; - 5.528491 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; - 7.335435 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; - 0.371851 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); - 1.212976 END; + 0.010948 \set aid random(1, 100000 * :scale) + 0.003161 \set bid random(1, 1 * :scale) + 0.002203 \set tid random(1, 10 * :scale) + 0.002266 \set delta random(-5000, 5000) + 0.144510 BEGIN; + 0.498513 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; + 0.334889 SELECT abalance FROM pgbench_accounts WHERE aid = :aid; + 1.436523 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; + 4.914786 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; + 0.338837 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + 1.877755 END; diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index e68631e..a1477f9 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -16,10 +16,14 @@ PgBenchExpr *expr_parse_result; +static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list); static PgBenchExpr *make_integer_constant(int64 ival); +static PgBenchExpr *make_double_constant(double dval); static PgBenchExpr *make_variable(char *varname); static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr); +static int find_func(const char * fname); +static PgBenchExpr *make_func(const int fnumber, PgBenchExprList *args); %} @@ -29,15 +33,19 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr, %union { int64 ival; + double dval; char *str; PgBenchExpr *expr; + PgBenchExprList *elist; } +%type elist %type expr -%type INTEGER -%type VARIABLE +%type INTEGER function +%type DOUBLE +%type VARIABLE FUNCTION -%token INTEGER VARIABLE +%token INTEGER DOUBLE VARIABLE FUNCTION %token CHAR_ERROR /* never used, will raise a syntax error */ /* Precedence: lowest to highest */ @@ -49,6 +57,11 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr, result: expr { expr_parse_result = $1; } +elist: { $$ = NULL; } + | expr { $$ = make_elist($1, NULL); } + | elist ',' expr { $$ = make_elist($3, $1); } + ; + expr: '(' expr ')' { $$ = $2; } | '+' expr %prec UMINUS { $$ = $2; } | '-' expr %prec UMINUS { $$ = make_op('-', make_integer_constant(0), $2); } @@ -58,7 +71,12 @@ expr: '(' expr ')' { $$ = $2; } | expr '/' expr { $$ = make_op('/', $1, $3); } | expr '%' expr { $$ = make_op('%', $1, $3); } | INTEGER { $$ = make_integer_constant($1); } + | DOUBLE { $$ = make_double_constant($1); } | VARIABLE { $$ = make_variable($1); } + | function '(' elist ')'{ $$ = make_func($1, $3); } + ; + +function: FUNCTION { $$ = find_func($1); pg_free($1); } ; %% @@ -74,6 +92,16 @@ make_integer_constant(int64 ival) } static PgBenchExpr * +make_double_constant(double dval) +{ + PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr)); + + expr->etype = ENODE_DOUBLE_CONSTANT; + expr->u.double_constant.dval = dval; + return expr; +} + +static PgBenchExpr * make_variable(char *varname) { PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr)); @@ -95,4 +123,123 @@ make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr) return expr; } +/* list of available functions + * - fname: function name + * - nargs: number of arguments (-1 is a special value for min & max) + * - tag: function identifier from PgBenchFunction enum + */ +static struct { + char * fname; + int nargs; + PgBenchFunction tag; +} PGBENCH_FUNCTIONS[] = { + { "pi", 0, PGBENCH_PI }, + { "abs", 1, PGBENCH_ABS }, + { "sqrt", 1, PGBENCH_SQRT }, + { "int", 1, PGBENCH_INT }, + { "double", 1, PGBENCH_DOUBLE }, + { "min", -1, PGBENCH_MIN }, + { "max", -1, PGBENCH_MAX }, + { "random", 2, PGBENCH_RANDOM }, + { "random_gaussian", 3, PGBENCH_RANDOM_GAUSSIAN }, + { "random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL }, + { "idebug", 1, PGBENCH_IDEBUG }, + { "ddebug", 1, PGBENCH_DDEBUG }, + + /* keep as last array element */ + { NULL, 0, 0 } +}; + +/* + * Find a function from its name + * + * return the index of the function from the PGBENCH_FUNCTIONS array + * or fail if the function is unknown. + */ +static int +find_func(const char * fname) +{ + int i = 0; + + while (PGBENCH_FUNCTIONS[i].fname) + { + if (pg_strcasecmp(fname, PGBENCH_FUNCTIONS[i].fname) == 0) + return i; + i++; + } + + expr_yyerror_more("unexpected function name", fname); + + /* not reached */ + return -1; +} + +/* Expression linked list builder */ +static PgBenchExprList * +make_elist(PgBenchExpr *expr, PgBenchExprList *list) +{ + PgBenchExprList *cons = pg_malloc(sizeof(PgBenchExprList)); + cons->expr = expr; + cons->next = list; + return cons; +} + +/* + * Reverse expression linked list + * + * The list of function arguments is built in reverse order, and reversed once + * at the end so as to avoid appending repeatedly at the end of the list. + */ +static PgBenchExprList * +reverse_elist(PgBenchExprList *list) +{ + PgBenchExprList *cur = list, *prec = NULL, *next = NULL; + + while (cur != NULL) + { + next = cur->next; + cur->next = prec; + prec = cur; + cur = next; + } + + return prec; +} + +/* Return the length of an expression list */ +static int +elist_length(PgBenchExprList *list) +{ + int len = 0; + + for (; list != NULL; list = list->next) + len++; + + return len; +} + +/* Build function call expression */ +static PgBenchExpr * +make_func(const int fnumber, PgBenchExprList *args) +{ + PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr)); + + Assert(fnumber >= 0); + + if ((PGBENCH_FUNCTIONS[fnumber].nargs >= 0 && + PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args)) || + /* check at least one arg for min & max */ + (PGBENCH_FUNCTIONS[fnumber].nargs == -1 && + elist_length(args) == 0)) + expr_yyerror_more("unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + expr->etype = ENODE_FUNCTION; + expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag; + /* the argument list has been built in reverse order, it is fixed here */ + expr->u.function.args = reverse_elist(args); + + return expr; +} + #include "exprscan.c" diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l index 5331ab7..1f8fc65 100644 --- a/src/bin/pgbench/exprscan.l +++ b/src/bin/pgbench/exprscan.l @@ -46,6 +46,7 @@ space [ \t\r\f] "%" { yycol += yyleng; return '%'; } "(" { yycol += yyleng; return '('; } ")" { yycol += yyleng; return ')'; } +"," { yycol += yyleng; return ','; } :[a-zA-Z0-9_]+ { yycol += yyleng; @@ -57,8 +58,19 @@ space [ \t\r\f] yylval.ival = strtoint64(yytext); return INTEGER; } +[0-9]+\.[0-9]+ { + yycol += yyleng; + yylval.dval = atof(yytext); + return DOUBLE; + } +[a-zA-Z0-9_]+ { + yycol += yyleng; + yylval.str = pg_strdup(yytext); + return FUNCTION; + } + +[\n] { yycol = 0; yyline++; /* never occurs, input on one line */ } -[\n] { yycol = 0; yyline++; } {space}+ { yycol += yyleng; /* ignore */ } . { @@ -71,10 +83,16 @@ space [ \t\r\f] %% void -yyerror(const char *message) +expr_yyerror_more(const char *message, const char *more) { syntax_error(expr_source, expr_lineno, expr_full_line, expr_command, - message, NULL, expr_col + yycol); + message, more, expr_col + yycol); +} + +void +yyerror(const char *message) +{ + expr_yyerror_more(message, NULL); } /* @@ -94,15 +112,14 @@ expr_scanner_init(const char *str, const char *source, expr_command = (char *) cmd; expr_col = (int) ecol; - /* - * Might be left over after error - */ + /* reset column count for this scan */ + yycol = 0; + + /* Might be left over after error */ if (YY_CURRENT_BUFFER) yy_delete_buffer(YY_CURRENT_BUFFER); - /* - * Make a scan buffer with special termination needed by flex. - */ + /* Make a scan buffer with special termination needed by flex. */ scanbuflen = slen; scanbuf = pg_malloc(slen + 2); memcpy(scanbuf, str, slen); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index f2d435b..124dd5c 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -90,7 +90,7 @@ static int pthread_join(pthread_t th, void **thread_return); #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ -#define MIN_GAUSSIAN_THRESHOLD 2.0 /* minimum threshold for gauss */ +#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ @@ -303,13 +303,10 @@ static int debug = 0; /* debug flag */ /* default scenario */ static char *tpc_b = { - "\\set nbranches " CppAsString2(nbranches) " * :scale\n" - "\\set ntellers " CppAsString2(ntellers) " * :scale\n" - "\\set naccounts " CppAsString2(naccounts) " * :scale\n" - "\\setrandom aid 1 :naccounts\n" - "\\setrandom bid 1 :nbranches\n" - "\\setrandom tid 1 :ntellers\n" - "\\setrandom delta -5000 5000\n" + "\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n" + "\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n" + "\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n" + "\\set delta random(-5000, 5000)\n" "BEGIN;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" @@ -321,13 +318,10 @@ static char *tpc_b = { /* -N case */ static char *simple_update = { - "\\set nbranches " CppAsString2(nbranches) " * :scale\n" - "\\set ntellers " CppAsString2(ntellers) " * :scale\n" - "\\set naccounts " CppAsString2(naccounts) " * :scale\n" - "\\setrandom aid 1 :naccounts\n" - "\\setrandom bid 1 :nbranches\n" - "\\setrandom tid 1 :ntellers\n" - "\\setrandom delta -5000 5000\n" + "\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n" + "\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n" + "\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n" + "\\set delta random(-5000, 5000)\n" "BEGIN;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" @@ -337,8 +331,7 @@ static char *simple_update = { /* -S case */ static char *select_only = { - "\\set naccounts " CppAsString2(naccounts) " * :scale\n" - "\\setrandom aid 1 :naccounts\n" + "\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" }; @@ -488,47 +481,47 @@ getrand(TState *thread, int64 min, int64 max) /* * random number generator: exponential distribution from min to max inclusive. - * the threshold is so that the density of probability for the last cut-off max - * value is exp(-threshold). + * the parameter is so that the density of probability for the last cut-off max + * value is exp(-param). */ static int64 -getExponentialRand(TState *thread, int64 min, int64 max, double threshold) +getExponentialRand(TState *thread, int64 min, int64 max, double param) { double cut, uniform, rand; - Assert(threshold > 0.0); - cut = exp(-threshold); + Assert(param > 0.0); + cut = exp(-param); /* erand in [0, 1), uniform in (0, 1] */ uniform = 1.0 - pg_erand48(thread->random_state); /* - * inner expresion in (cut, 1] (if threshold > 0), rand in [0, 1) + * inner expresion in (cut, 1] (if param > 0), rand in [0, 1) */ Assert((1.0 - cut) != 0.0); - rand = -log(cut + (1.0 - cut) * uniform) / threshold; + rand = -log(cut + (1.0 - cut) * uniform) / param; /* return int64 random number within between min and max */ return min + (int64) ((max - min + 1) * rand); } /* random number generator: gaussian distribution from min to max inclusive */ static int64 -getGaussianRand(TState *thread, int64 min, int64 max, double threshold) +getGaussianRand(TState *thread, int64 min, int64 max, double param) { double stdev; double rand; /* - * Get user specified random number from this loop, with -threshold < - * stdev <= threshold + * Get user specified random number from this loop, + * with -param < stdev <= param * * This loop is executed until the number is in the expected range. * - * As the minimum threshold is 2.0, the probability of looping is low: + * As the minimum parameter is 2.0, the probability of looping is low: * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the * average sinus multiplier as 2/pi, we have a 8.6% looping probability in - * the worst case. For a 5.0 threshold value, the looping probability is + * the worst case. For a 5.0 param value, the looping probability is * about e^{-5} * 2 / pi ~ 0.43%. */ do @@ -553,10 +546,10 @@ getGaussianRand(TState *thread, int64 min, int64 max, double threshold) * over. */ } - while (stdev < -threshold || stdev >= threshold); + while (stdev < -param || stdev >= param); - /* stdev is in [-threshold, threshold), normalization to [0,1) */ - rand = (stdev + threshold) / (threshold * 2.0); + /* stdev is in [-param, param), normalization to [0,1) */ + rand = (stdev + param) / (param * 2.0); /* return int64 random number within between min and max */ return min + (int64) ((max - min + 1) * rand); @@ -887,13 +880,191 @@ getQueryParams(CState *st, const Command *command, const char **params) } /* + * Recursive evaluation of int and double expressions + * + * Pgbench uses an implicit descendant typing, that is depending on the + * expected type (int or double) the corresponding evalInt or evalDouble + * function is called. + * + * The expected type at the highest level is always an integer, as pgbench + * only supports integer variables. Some function arguments are expected to + * be double, and thus the expected type can change during the recursion + * when evaluation those arguments. + * + * The expected type can also be enforced with functions "int" which expects + * a double argument and cast the result to int, and "double" which expects an + * int argument and casts it to double. + * + * This approach departs from usual type systems which are ascendant, starting + * from the type of leaves and typing nodes while going upwards in the + * expression syntax tree. + * + * This is not perceived as an issue for the typical pgbench scripts which + * mostly deal with integers, as the double type is only used as a parameter + * for exponential and gaussian distributed random generation, so the + * difference should seldom be noticed, and can always be fixed by inserting + * an explicit conversion (int or double function) at the right place. + * + * This convention greatly simplifies the handling of types, as there is no + * need of an explicit typing/compilation phase which would insert conversions + * and resolve overloaded operators, or to put these conversions and + * resolutions in the expression evaluation which would induce repeated + * code to test types and handle conversion for each function and operators. + */ + +static bool evalInt(TState *, CState *, PgBenchExpr *, int64 *); + +/* + * Recursive evaluation of an expression in a pgbench script + * using the current state of variables. + * Returns whether the evaluation was ok, + * the value itself is returned through the retval pointer. + */ +static bool +evalDouble(TState *thread, CState *st, PgBenchExpr *expr, double *retval) +{ + switch (expr->etype) + { + case ENODE_DOUBLE_CONSTANT: + { + *retval = expr->u.double_constant.dval; + return true; + } + case ENODE_OPERATOR: + { + double lval, rval; + + if (!evalDouble(thread, st, expr->u.operator.lexpr, &lval)) + return false; + if (!evalDouble(thread, st, expr->u.operator.rexpr, &rval)) + return false; + + switch (expr->u.operator.operator) + { + case '+': + *retval = lval + rval; + return true; + + case '-': + *retval = lval - rval; + return true; + + case '*': + *retval = lval * rval; + return true; + + case '/': + *retval = lval / rval; + return true; + + /* cast any int operator */ + case '%': + { + int64 ival; + if (!evalInt(thread, st, expr, &ival)) + return false; + *retval = (double) ival; + return true; + } + default: + fprintf(stderr, "unexpected operator '%c'\n", + expr->u.operator.operator); + exit(1); + } + } + case ENODE_FUNCTION: + { + PgBenchFunction func = expr->u.function.function; + PgBenchExprList *args = expr->u.function.args; + + switch (func) + { + case PGBENCH_PI: + *retval = M_PI; + return true; + case PGBENCH_ABS: /* also an integer function */ + { + if (!evalDouble(thread, st, args->expr, retval)) + return false; + + if ((*retval) < 0.0) + *retval = - *retval; + + return true; + } + case PGBENCH_SQRT: + { + double arg; + + if (!evalDouble(thread, st, args->expr, &arg)) + return false; + + *retval = sqrt(arg); + + return true; + } + case PGBENCH_DDEBUG: + { + if (!evalDouble(thread, st, args->expr, retval)) + return false; + + fprintf(stderr, "ddebug(script=%d,command=%d): %f\n", + st->use_file, st->state+1, *retval); + + return true; + } + case PGBENCH_DOUBLE: + { + int64 ival; + if (!evalInt(thread, st, args->expr, &ival)) + return false; + *retval = (double) ival; + return true; + } + /* integer-specific functions are evaluated as int and cast */ + case PGBENCH_INT: + case PGBENCH_IDEBUG: + case PGBENCH_MIN: + case PGBENCH_MAX: + case PGBENCH_RANDOM: + case PGBENCH_RANDOM_EXPONENTIAL: + case PGBENCH_RANDOM_GAUSSIAN: + { + int64 ival; + if (!evalInt(thread, st, expr, &ival)) + return false; + *retval = (double) ival; + return true; + } + default: + fprintf(stderr, "unexpected function tag: %d\n", func); + exit(1); + } + } + case ENODE_INTEGER_CONSTANT: + case ENODE_VARIABLE: + { + int64 ival; + if (!evalInt(thread, st, expr, &ival)) + return false; + *retval = (double) ival; + return true; + } + default: + fprintf(stderr, "unexpected enode type in double evaluation: %d\n", + expr->etype); + exit(1); + } +} + +/* * Recursive evaluation of an expression in a pgbench script * using the current state of variables. * Returns whether the evaluation was ok, * the value itself is returned through the retval pointer. */ static bool -evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval) +evalInt(TState *thread, CState *st, PgBenchExpr *expr, int64 *retval) { switch (expr->etype) { @@ -903,6 +1074,12 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval) return true; } + case ENODE_DOUBLE_CONSTANT: + { + *retval = (int64) expr->u.double_constant.dval; + return true; + } + case ENODE_VARIABLE: { char *var; @@ -922,49 +1099,180 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval) int64 lval; int64 rval; - if (!evaluateExpr(st, expr->u.operator.lexpr, &lval)) + if (!evalInt(thread, st, expr->u.operator.lexpr, &lval)) return false; - if (!evaluateExpr(st, expr->u.operator.rexpr, &rval)) + if (!evalInt(thread, st, expr->u.operator.rexpr, &rval)) return false; + switch (expr->u.operator.operator) { - case '+': - *retval = lval + rval; - return true; + case '+': + *retval = lval + rval; + return true; - case '-': - *retval = lval - rval; - return true; + case '-': + *retval = lval - rval; + return true; - case '*': - *retval = lval * rval; - return true; + case '*': + *retval = lval * rval; + return true; - case '/': - if (rval == 0) + case '/': + if (rval == 0) + { + fprintf(stderr, "division by zero\n"); + return false; + } + *retval = lval / rval; + return true; + + case '%': + if (rval == 0) + { + fprintf(stderr, "division by zero\n"); + return false; + } + *retval = lval % rval; + return true; + + default: + fprintf(stderr, "unexpected integer operator '%c'\n", + expr->u.operator.operator); + return false; + } + } + + case ENODE_FUNCTION: + { + PgBenchFunction func = expr->u.function.function; + PgBenchExprList *args = expr->u.function.args; + + switch (func) + { + case PGBENCH_RANDOM: + case PGBENCH_RANDOM_EXPONENTIAL: + case PGBENCH_RANDOM_GAUSSIAN: + { + int64 arg1, arg2; + + if (!evalInt(thread, st, args->expr, &arg1)) + return false; + if (!evalInt(thread, st, args->next->expr, &arg2)) + return false; + + /* check random range */ + if (arg1 > arg2) { - fprintf(stderr, "division by zero\n"); + fprintf(stderr, "empty range given to random\n"); + st->ecnt++; + return false; + } + else if (arg2 - arg1 < 0 || (arg2 - arg1) + 1 < 0) + { + /* prevent int overflows in random functions */ + fprintf(stderr, "random range is too large\n"); + st->ecnt++; return false; } - *retval = lval / rval; - return true; - case '%': - if (rval == 0) + if (func == PGBENCH_RANDOM) + *retval = getrand(thread, arg1, arg2); + else /* gaussian & exponential */ { - fprintf(stderr, "division by zero\n"); + double param; + if (!evalDouble(thread, st, args->next->next->expr, + ¶m)) + return false; + if (func == PGBENCH_RANDOM_GAUSSIAN) + *retval = getGaussianRand(thread, arg1, arg2, param); + else /* exponential */ + *retval = getExponentialRand(thread, arg1, arg2, param); + } + + return true; + } + case PGBENCH_IDEBUG: /* unary functions */ + { + if (!evalInt(thread, st, args->expr, retval)) + return false; + + fprintf(stderr, "idebug(script=%d,command=%d): " + INT64_FORMAT "\n", st->use_file, st->state+1, *retval); + + return true; + } + case PGBENCH_ABS: /* both an int & double function */ + { + if (!evalInt(thread, st, args->expr, retval)) return false; + + if ((*retval) < 0) + *retval = - *retval; + + return true; + } + case PGBENCH_MIN: /* n-ary, at least one argument */ + case PGBENCH_MAX: + { + int64 val = -1; + bool first = true; + while (args != NULL) + { + int64 arg; + + if (!evalInt(thread, st, args->expr, &arg)) + return false; + + if (first) + val = arg; + else if (func == PGBENCH_MIN) + val = val < arg? val: arg; + else if (func == PGBENCH_MAX) + val = val > arg? val: arg; + + args = args->next; + first = false; } - *retval = lval % rval; + + *retval = val; return true; - } + } + case PGBENCH_INT: /* eval as double & cast to int */ + { + double arg; + + if (!evalDouble(thread, st, args->expr, &arg)) + return false; + + *retval = (int64) arg; + return true; + } - fprintf(stderr, "bad operator\n"); - return false; + /* cast double specific functions to int */ + case PGBENCH_PI: + case PGBENCH_DOUBLE: + case PGBENCH_DDEBUG: + case PGBENCH_SQRT: + { + double arg; + + if (!evalDouble(thread, st, expr, &arg)) + return false; + + *retval = (int64) arg; + return true; + } + default: + fprintf(stderr, "unexpected function tag %d\n", func); + exit(1); + } } - default: - break; + default: /* abort on internal error */ + fprintf(stderr, "unexpected enode type in int evaluation: %d\n", + expr->etype); + exit(1); } fprintf(stderr, "bad expression\n"); @@ -1483,7 +1791,7 @@ top: char *var; int64 min, max; - double threshold = 0; + double param = 0; char res[64]; if (*argv[2] == ':') @@ -1554,41 +1862,41 @@ top: { if ((var = getVariable(st, argv[5] + 1)) == NULL) { - fprintf(stderr, "%s: invalid threshold number: \"%s\"\n", + fprintf(stderr, "%s: invalid parameter: \"%s\"\n", argv[0], argv[5]); st->ecnt++; return true; } - threshold = strtod(var, NULL); + param = strtod(var, NULL); } else - threshold = strtod(argv[5], NULL); + param = strtod(argv[5], NULL); if (pg_strcasecmp(argv[4], "gaussian") == 0) { - if (threshold < MIN_GAUSSIAN_THRESHOLD) + if (param < MIN_GAUSSIAN_PARAM) { - fprintf(stderr, "gaussian threshold must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_THRESHOLD, argv[5]); + fprintf(stderr, "gaussian parameter must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_PARAM, argv[5]); st->ecnt++; return true; } #ifdef DEBUG - printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold)); + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, param)); #endif - snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold)); + snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, param)); } else if (pg_strcasecmp(argv[4], "exponential") == 0) { - if (threshold <= 0.0) + if (param <= 0.0) { - fprintf(stderr, "exponential threshold must be greater than zero (not \"%s\")\n", argv[5]); + fprintf(stderr, "exponential parameter must be greater than zero (not \"%s\")\n", argv[5]); st->ecnt++; return true; } #ifdef DEBUG - printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold)); + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, param)); #endif - snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold)); + snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, param)); } } else /* this means an error somewhere in the parsing phase... */ @@ -1613,7 +1921,7 @@ top: PgBenchExpr *expr = commands[st->state]->expr; int64 result; - if (!evaluateExpr(st, expr, &result)) + if (!evalInt(thread, st, expr, &result)) { st->ecnt++; return true; @@ -2283,7 +2591,7 @@ process_commands(char *buf, const char *source, const int lineno) { /* * parsing: \setrandom variable min max [uniform] \setrandom - * variable min max (gaussian|exponential) threshold + * variable min max (gaussian|exponential) parameter */ if (my_commands->argc < 4) @@ -2308,7 +2616,7 @@ process_commands(char *buf, const char *source, const int lineno) if (my_commands->argc < 6) { syntax_error(source, lineno, my_commands->line, my_commands->argv[0], - "missing threshold argument", my_commands->argv[4], -1); + "missing parameter", my_commands->argv[4], -1); } else if (my_commands->argc > 6) { diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index 42e2aae..fffd355 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -14,11 +14,31 @@ typedef enum PgBenchExprType { ENODE_INTEGER_CONSTANT, + ENODE_DOUBLE_CONSTANT, ENODE_VARIABLE, - ENODE_OPERATOR + ENODE_OPERATOR, + ENODE_FUNCTION } PgBenchExprType; +typedef enum PgBenchFunction +{ + PGBENCH_NONE, + PGBENCH_PI, + PGBENCH_INT, + PGBENCH_DOUBLE, + PGBENCH_IDEBUG, + PGBENCH_DDEBUG, + PGBENCH_ABS, + PGBENCH_SQRT, + PGBENCH_MIN, + PGBENCH_MAX, + PGBENCH_RANDOM, + PGBENCH_RANDOM_GAUSSIAN, + PGBENCH_RANDOM_EXPONENTIAL +} PgBenchFunction; + typedef struct PgBenchExpr PgBenchExpr; +typedef struct PgBenchExprList PgBenchExprList; struct PgBenchExpr { @@ -31,6 +51,10 @@ struct PgBenchExpr } integer_constant; struct { + double dval; + } double_constant; + struct + { char *varname; } variable; struct @@ -39,14 +63,25 @@ struct PgBenchExpr PgBenchExpr *lexpr; PgBenchExpr *rexpr; } operator; + struct + { + PgBenchFunction function; + PgBenchExprList *args; + } function; } u; }; +struct PgBenchExprList { + PgBenchExpr *expr; + PgBenchExprList *next; +}; + extern PgBenchExpr *expr_parse_result; extern int expr_yyparse(void); extern int expr_yylex(void); extern void expr_yyerror(const char *str); +extern void expr_yyerror_more(const char *str, const char *more); extern void expr_scanner_init(const char *str, const char *source, const int lineno, const char *line, const char *cmd, const int ecol);