From 79e51be780dd733c6789e519176b26ea79282ea8 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 10 Mar 2017 17:26:26 +0300 Subject: [PATCH 3/8] Base for reversed executor. Framework for implementing reversed executor. Substitutes ExecutePlan call with RunNode, which invokes pushTuple on leaf nodes in proper order. See README for more details. --- src/backend/executor/README | 45 +++++++ src/backend/executor/execMain.c | 255 +++++++++++++++++------------------- src/backend/executor/execProcnode.c | 53 +++++++- src/include/executor/executor.h | 3 + src/include/nodes/execnodes.h | 11 ++ 5 files changed, 230 insertions(+), 137 deletions(-) diff --git a/src/backend/executor/README b/src/backend/executor/README index f1d1e4c76c..86f6e99e86 100644 --- a/src/backend/executor/README +++ b/src/backend/executor/README @@ -3,6 +3,51 @@ src/backend/executor/README The Postgres Executor ===================== +This is an attempt to implement proof concept of executor with push-based +achitecture like in [1]. We will call it 'reversed' executor. Right now we will +not support both reversed and original executor, because it would involve a lot +of either code copy-pasting (or time to avoid it), while our current goal is +just to implement working proof of concept to estimate the benefits. + +Since this is a huge change, we need to outline the general strategy, things +we will start with and how we will deal with the old code, remembering that we +will reuse a great deal of it. + +Key points: +* ExecProcNode is now a stub. All nodes code (ExecSomeNode, etc) is + unreachable. However, we leave it to avoid 19k lines removal commit and to + produce more useful diffs later; a lot of code will be reused. +* Base for implementing push model, common for all nodes, is in execMain.c and + execProcNode.c. We will substitute execProcNode with pushTuple, it's interface + described in the comment to the definition, and this is the only change to the + node's interface. We make necessary changes to execMain.c, namely to + ExecutorRun, to run nodes in proper order from the below. +* Then we are ready to implement the nodes one by one. + +At first, +* parallel execution will not be supported. +* subplans will not be supported. +* we will not support ExecReScan too for now. +* only CMD_SELECT operation will be supported. +* only forward direction will be supported. +* we will not support set returning functions either. + +In general, we try to treat the old code as follows: +* As said above, leave it even if it dead for now. +* If is not dead, but not yet updated for reversed executor, remove it. + Example is contents of ExecInitNode. +* Sometimes we need to make minimal changes to some existing function, but these + changes will make it incompatible with existing code which is not yet + reworked. In that case, to avoid deleting a lot of code we will just + copypaste it until some more generic solution will be provided. Example is + heapgettup_pagemode and it's 'reversed' analogue added for seqscan. + + +[1] Efficiently Compiling Efficient Query Plans for Modern Hardware, + http://www.vldb.org/pvldb/vol4/p539-neumann.pdf + +Below goes the original README text. + The executor processes a tree of "plan nodes". The plan tree is essentially a demand-pull pipeline of tuple processing operations. Each node, when called, will produce the next tuple in its output sequence, or NULL if no diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index f629f0098f..bb25a4137c 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -63,6 +63,7 @@ #include "utils/ruleutils.h" #include "utils/snapmgr.h" #include "utils/tqual.h" +#include "executor/executor.h" /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */ @@ -79,13 +80,7 @@ static void InitPlan(QueryDesc *queryDesc, int eflags); static void CheckValidRowMarkRel(Relation rel, RowMarkType markType); static void ExecPostprocessPlan(EState *estate); static void ExecEndPlan(PlanState *planstate, EState *estate); -static void ExecutePlan(EState *estate, PlanState *planstate, - bool use_parallel_mode, - CmdType operation, - bool sendTuples, - uint64 numberTuples, - ScanDirection direction, - DestReceiver *dest); +static void RunNode(PlanState *planstate); static bool ExecCheckRTEPerms(RangeTblEntry *rte); static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols, @@ -341,18 +336,24 @@ standard_ExecutorRun(QueryDesc *queryDesc, if (sendTuples) (*dest->rStartup) (dest, operation, queryDesc->tupDesc); + /* set up state needed for sending tuples to the dest */ + estate->es_current_tuple_count = 0; + estate->es_sendTuples = sendTuples; + estate->es_numberTuplesRequested = count; + estate->es_operation = operation; + estate->es_dest = dest; + + /* + * Set the direction. + */ + estate->es_direction = direction; + /* * run plan */ if (!ScanDirectionIsNoMovement(direction)) - ExecutePlan(estate, - queryDesc->planstate, - queryDesc->plannedstmt->parallelModeNeeded, - operation, - sendTuples, - count, - direction, - dest); + /* Run each leaf in right order */ + RunNode(queryDesc->planstate); /* * shutdown tuple receiver, if we started it @@ -1533,126 +1534,6 @@ ExecEndPlan(PlanState *planstate, EState *estate) } } -/* ---------------------------------------------------------------- - * ExecutePlan - * - * Processes the query plan until we have retrieved 'numberTuples' tuples, - * moving in the specified direction. - * - * Runs to completion if numberTuples is 0 - * - * Note: the ctid attribute is a 'junk' attribute that is removed before the - * user can see it - * ---------------------------------------------------------------- - */ -static void -ExecutePlan(EState *estate, - PlanState *planstate, - bool use_parallel_mode, - CmdType operation, - bool sendTuples, - uint64 numberTuples, - ScanDirection direction, - DestReceiver *dest) -{ - TupleTableSlot *slot; - uint64 current_tuple_count; - - /* - * initialize local variables - */ - current_tuple_count = 0; - - /* - * Set the direction. - */ - estate->es_direction = direction; - - /* - * If a tuple count was supplied, we must force the plan to run without - * parallelism, because we might exit early. Also disable parallelism - * when writing into a relation, because no database changes are allowed - * in parallel mode. - */ - if (numberTuples || dest->mydest == DestIntoRel) - use_parallel_mode = false; - - if (use_parallel_mode) - EnterParallelMode(); - - /* - * Loop until we've processed the proper number of tuples from the plan. - */ - for (;;) - { - /* Reset the per-output-tuple exprcontext */ - ResetPerTupleExprContext(estate); - - /* - * Execute the plan and obtain a tuple - */ - slot = ExecProcNode(planstate); - - /* - * if the tuple is null, then we assume there is nothing more to - * process so we just end the loop... - */ - if (TupIsNull(slot)) - { - /* Allow nodes to release or shut down resources. */ - (void) ExecShutdownNode(planstate); - break; - } - - /* - * If we have a junk filter, then project a new tuple with the junk - * removed. - * - * Store this new "clean" tuple in the junkfilter's resultSlot. - * (Formerly, we stored it back over the "dirty" tuple, which is WRONG - * because that tuple slot has the wrong descriptor.) - */ - if (estate->es_junkFilter != NULL) - slot = ExecFilterJunk(estate->es_junkFilter, slot); - - /* - * If we are supposed to send the tuple somewhere, do so. (In - * practice, this is probably always the case at this point.) - */ - if (sendTuples) - { - /* - * If we are not able to send the tuple, we assume the destination - * has closed and no more tuples can be sent. If that's the case, - * end the loop. - */ - if (!((*dest->receiveSlot) (slot, dest))) - break; - } - - /* - * Count tuples processed, if this is a SELECT. (For other operation - * types, the ModifyTable plan node must count the appropriate - * events.) - */ - if (operation == CMD_SELECT) - (estate->es_processed)++; - - /* - * check our tuple count.. if we've processed the proper number then - * quit, else loop again and process more tuples. Zero numberTuples - * means no limit. - */ - current_tuple_count++; - if (numberTuples && numberTuples == current_tuple_count) - break; - } - - if (use_parallel_mode) - ExitParallelMode(); -} - - /* * ExecRelCheck --- check that tuple meets constraints for result relation * @@ -3291,3 +3172,107 @@ ExecBuildSlotPartitionKeyDescription(Relation rel, return buf.data; } + +/* + * This function pushes the ready tuple to it's destination. It should + * be called by top-level PlanState. + * For now, I added the state needed for this to estate, specifically + * current_tuple_count, sendTuples, numberTuplesRequested (old numberTuples), + * cmdType, dest. + * + * slot is the tuple to push + * planstate is top-level node + * returns true, if we are ready to accept more tuples, false otherwise + */ +bool +SendReadyTuple(TupleTableSlot *slot, PlanState *planstate) +{ + EState *estate; + bool sendTuples; + CmdType operation; + DestReceiver *dest; + + estate = planstate->state; + sendTuples = estate->es_sendTuples; + operation = estate->es_operation; + dest = estate->es_dest; + + if (TupIsNull(slot)) + { + /* Allow nodes to release or shut down resources. */ + (void) ExecShutdownNode(planstate); + return false; + } + + /* + * If we have a junk filter, then project a new tuple with the junk + * removed. + * + * Store this new "clean" tuple in the junkfilter's resultSlot. + * (Formerly, we stored it back over the "dirty" tuple, which is WRONG + * because that tuple slot has the wrong descriptor.) + */ + if (estate->es_junkFilter != NULL) + slot = ExecFilterJunk(estate->es_junkFilter, slot); + + /* + * If we are supposed to send the tuple somewhere, do so. (In + * practice, this is probably always the case at this point.) + */ + if (sendTuples) + { + /* + * If we are not able to send the tuple, we assume the destination + * has closed and no more tuples can be sent. + */ + if (!((*dest->receiveSlot) (slot, dest))) + return false; + } + + /* + * Count tuples processed, if this is a SELECT. (For other operation + * types, the ModifyTable plan node must count the appropriate + * events.) + */ + if (operation == CMD_SELECT) + (estate->es_processed)++; + + /* + * check our tuple count.. if we've processed the proper number then + * quit, else process more tuples. Zero numberTuplesRequested + * means no limit. + */ + estate->es_current_tuple_count++; + if (estate->es_numberTuplesRequested && + estate->es_numberTuplesRequested == estate->es_current_tuple_count) + return false; + + ResetPerTupleExprContext(estate); + return true; +} + +/* + * When pushing, we have to call pushTuple on each leaf of the tree in correct + * order: first inner sides, then outer. This function does exactly that. + */ +void +RunNode(PlanState *planstate) +{ + Assert(planstate != NULL); + + if (innerPlanState(planstate) != NULL) + { + RunNode(innerPlanState(planstate)); + /* I assume that if inner node exists, outer exists too */ + RunNode(outerPlanState(planstate)); + return; + } + if (outerPlanState(planstate) != NULL) + { + RunNode(outerPlanState(planstate)); + return; + } + + /* node has no childs, it is a leaf */ + pushTuple(NULL, planstate, NULL); +} diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 649d1e58f6..a95cfe5430 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -155,7 +155,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent) result = NULL; /* keep compiler quiet */ break; } - return NULL; /* Set up instrumentation for this node if requested */ if (estate->es_instrument) @@ -164,7 +163,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent) return result; } - /* * Unsupported, left to avoid deleting 19k lines of existing code */ @@ -175,6 +173,57 @@ ExecProcNode(PlanState *node) return NULL; } +/* + * Instead of old ExecProcNode, here we will have function pushTuple + * pushing one tuple. + * 'tuple' is a tuple to push + * 'node' is a receiver of tuple + * 'pusher' is a sender of a tuple, it's parent is 'node'. We need it to + * distinguish inner and outer pushes. + * Returns true if node is still accepting tuples, false if not. + * ReScans are not supported yet. + * In general, if a tuple (even NULL) was pushed into a node which returned + * 'false' before, the behaviour is undefined, i.e. it is not allowed; + * however, we will try to catch such situations with asserts. + * If lower node have sent NULL tuple to upper node, we for now will not care + * to return it meaningful bool result and sent just false by convention. + */ +bool +pushTuple(TupleTableSlot *slot, PlanState *node, PlanState *pusher) +{ + bool push_from_outer; + + CHECK_FOR_INTERRUPTS(); + + /* If the receiver is NULL, then pusher is top-level node, so we need + * to send the tuple to the dest + */ + if (!node) + { + return SendReadyTuple(slot, pusher); + } + + /* + * If pusher is NULL, then node is a bottom node, another special case: + * bottom nodes obviously don't need neither tuple nor pusher + */ + if (!pusher) + { + switch (nodeTag(node)) + { + default: + elog(ERROR, "bottom node type not supported: %d", + (int) nodeTag(node)); + return false; + } + } + + /* does push come from the outer side? */ + push_from_outer = outerPlanState(node) == pusher; + + elog(ERROR, "node type not supported: %d", (int) nodeTag(node)); +} + /* ---------------------------------------------------------------- * Unsupported too; we don't need it in push model * ---------------------------------------------------------------- diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 716362970f..eb4e27ce21 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -179,6 +179,7 @@ extern void ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count); extern void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count); +extern bool SendReadyTuple(TupleTableSlot *slot, PlanState *planstate); extern void ExecutorFinish(QueryDesc *queryDesc); extern void standard_ExecutorFinish(QueryDesc *queryDesc); extern void ExecutorEnd(QueryDesc *queryDesc); @@ -240,6 +241,8 @@ extern TupleTableSlot *ExecProcNode(PlanState *node); extern Node *MultiExecProcNode(PlanState *node); extern void ExecEndNode(PlanState *node); extern bool ExecShutdownNode(PlanState *node); +extern bool pushTuple(TupleTableSlot *slot, PlanState *node, + PlanState *pusher); /* * prototypes from functions in execQual.c diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 738f098b00..da7fd9c7ac 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -28,6 +28,7 @@ #include "utils/tuplesort.h" #include "nodes/tidbitmap.h" #include "storage/condition_variable.h" +#include "tcop/dest.h" /* for DestReceiver type in EState */ /* ---------------- @@ -416,6 +417,16 @@ typedef struct EState List *es_auxmodifytables; /* List of secondary ModifyTableStates */ /* + * State needed to push tuples to dest in push model, technically it is + * local variables from old ExecutePlan + */ + uint64 es_current_tuple_count; + bool es_sendTuples; + uint64 es_numberTuplesRequested; + CmdType es_operation; + DestReceiver *es_dest; + + /* * this ExprContext is for per-output-tuple operations, such as constraint * checks and index-value computations. It will be reset for each output * tuple. Note that it will be created only if needed. -- 2.11.0