From 79e51be780dd733c6789e519176b26ea79282ea8 Mon Sep 17 00:00:00 2001
From: Arseny Sher
Date: Fri, 10 Mar 2017 17:26:26 +0300
Subject: [PATCH 3/8] Base for reversed executor.
Framework for implementing reversed executor. Substitutes ExecutePlan call
with RunNode, which invokes pushTuple on leaf nodes in proper order.
See README for more details.
---
src/backend/executor/README | 45 +++++++
src/backend/executor/execMain.c | 255 +++++++++++++++++-------------------
src/backend/executor/execProcnode.c | 53 +++++++-
src/include/executor/executor.h | 3 +
src/include/nodes/execnodes.h | 11 ++
5 files changed, 230 insertions(+), 137 deletions(-)
diff --git a/src/backend/executor/README b/src/backend/executor/README
index f1d1e4c76c..86f6e99e86 100644
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -3,6 +3,51 @@ src/backend/executor/README
The Postgres Executor
=====================
+This is an attempt to implement proof concept of executor with push-based
+achitecture like in [1]. We will call it 'reversed' executor. Right now we will
+not support both reversed and original executor, because it would involve a lot
+of either code copy-pasting (or time to avoid it), while our current goal is
+just to implement working proof of concept to estimate the benefits.
+
+Since this is a huge change, we need to outline the general strategy, things
+we will start with and how we will deal with the old code, remembering that we
+will reuse a great deal of it.
+
+Key points:
+* ExecProcNode is now a stub. All nodes code (ExecSomeNode, etc) is
+ unreachable. However, we leave it to avoid 19k lines removal commit and to
+ produce more useful diffs later; a lot of code will be reused.
+* Base for implementing push model, common for all nodes, is in execMain.c and
+ execProcNode.c. We will substitute execProcNode with pushTuple, it's interface
+ described in the comment to the definition, and this is the only change to the
+ node's interface. We make necessary changes to execMain.c, namely to
+ ExecutorRun, to run nodes in proper order from the below.
+* Then we are ready to implement the nodes one by one.
+
+At first,
+* parallel execution will not be supported.
+* subplans will not be supported.
+* we will not support ExecReScan too for now.
+* only CMD_SELECT operation will be supported.
+* only forward direction will be supported.
+* we will not support set returning functions either.
+
+In general, we try to treat the old code as follows:
+* As said above, leave it even if it dead for now.
+* If is not dead, but not yet updated for reversed executor, remove it.
+ Example is contents of ExecInitNode.
+* Sometimes we need to make minimal changes to some existing function, but these
+ changes will make it incompatible with existing code which is not yet
+ reworked. In that case, to avoid deleting a lot of code we will just
+ copypaste it until some more generic solution will be provided. Example is
+ heapgettup_pagemode and it's 'reversed' analogue added for seqscan.
+
+
+[1] Efficiently Compiling Efficient Query Plans for Modern Hardware,
+ http://www.vldb.org/pvldb/vol4/p539-neumann.pdf
+
+Below goes the original README text.
+
The executor processes a tree of "plan nodes". The plan tree is essentially
a demand-pull pipeline of tuple processing operations. Each node, when
called, will produce the next tuple in its output sequence, or NULL if no
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f629f0098f..bb25a4137c 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -63,6 +63,7 @@
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
+#include "executor/executor.h"
/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
@@ -79,13 +80,7 @@ static void InitPlan(QueryDesc *queryDesc, int eflags);
static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
static void ExecPostprocessPlan(EState *estate);
static void ExecEndPlan(PlanState *planstate, EState *estate);
-static void ExecutePlan(EState *estate, PlanState *planstate,
- bool use_parallel_mode,
- CmdType operation,
- bool sendTuples,
- uint64 numberTuples,
- ScanDirection direction,
- DestReceiver *dest);
+static void RunNode(PlanState *planstate);
static bool ExecCheckRTEPerms(RangeTblEntry *rte);
static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
Bitmapset *modifiedCols,
@@ -341,18 +336,24 @@ standard_ExecutorRun(QueryDesc *queryDesc,
if (sendTuples)
(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
+ /* set up state needed for sending tuples to the dest */
+ estate->es_current_tuple_count = 0;
+ estate->es_sendTuples = sendTuples;
+ estate->es_numberTuplesRequested = count;
+ estate->es_operation = operation;
+ estate->es_dest = dest;
+
+ /*
+ * Set the direction.
+ */
+ estate->es_direction = direction;
+
/*
* run plan
*/
if (!ScanDirectionIsNoMovement(direction))
- ExecutePlan(estate,
- queryDesc->planstate,
- queryDesc->plannedstmt->parallelModeNeeded,
- operation,
- sendTuples,
- count,
- direction,
- dest);
+ /* Run each leaf in right order */
+ RunNode(queryDesc->planstate);
/*
* shutdown tuple receiver, if we started it
@@ -1533,126 +1534,6 @@ ExecEndPlan(PlanState *planstate, EState *estate)
}
}
-/* ----------------------------------------------------------------
- * ExecutePlan
- *
- * Processes the query plan until we have retrieved 'numberTuples' tuples,
- * moving in the specified direction.
- *
- * Runs to completion if numberTuples is 0
- *
- * Note: the ctid attribute is a 'junk' attribute that is removed before the
- * user can see it
- * ----------------------------------------------------------------
- */
-static void
-ExecutePlan(EState *estate,
- PlanState *planstate,
- bool use_parallel_mode,
- CmdType operation,
- bool sendTuples,
- uint64 numberTuples,
- ScanDirection direction,
- DestReceiver *dest)
-{
- TupleTableSlot *slot;
- uint64 current_tuple_count;
-
- /*
- * initialize local variables
- */
- current_tuple_count = 0;
-
- /*
- * Set the direction.
- */
- estate->es_direction = direction;
-
- /*
- * If a tuple count was supplied, we must force the plan to run without
- * parallelism, because we might exit early. Also disable parallelism
- * when writing into a relation, because no database changes are allowed
- * in parallel mode.
- */
- if (numberTuples || dest->mydest == DestIntoRel)
- use_parallel_mode = false;
-
- if (use_parallel_mode)
- EnterParallelMode();
-
- /*
- * Loop until we've processed the proper number of tuples from the plan.
- */
- for (;;)
- {
- /* Reset the per-output-tuple exprcontext */
- ResetPerTupleExprContext(estate);
-
- /*
- * Execute the plan and obtain a tuple
- */
- slot = ExecProcNode(planstate);
-
- /*
- * if the tuple is null, then we assume there is nothing more to
- * process so we just end the loop...
- */
- if (TupIsNull(slot))
- {
- /* Allow nodes to release or shut down resources. */
- (void) ExecShutdownNode(planstate);
- break;
- }
-
- /*
- * If we have a junk filter, then project a new tuple with the junk
- * removed.
- *
- * Store this new "clean" tuple in the junkfilter's resultSlot.
- * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
- * because that tuple slot has the wrong descriptor.)
- */
- if (estate->es_junkFilter != NULL)
- slot = ExecFilterJunk(estate->es_junkFilter, slot);
-
- /*
- * If we are supposed to send the tuple somewhere, do so. (In
- * practice, this is probably always the case at this point.)
- */
- if (sendTuples)
- {
- /*
- * If we are not able to send the tuple, we assume the destination
- * has closed and no more tuples can be sent. If that's the case,
- * end the loop.
- */
- if (!((*dest->receiveSlot) (slot, dest)))
- break;
- }
-
- /*
- * Count tuples processed, if this is a SELECT. (For other operation
- * types, the ModifyTable plan node must count the appropriate
- * events.)
- */
- if (operation == CMD_SELECT)
- (estate->es_processed)++;
-
- /*
- * check our tuple count.. if we've processed the proper number then
- * quit, else loop again and process more tuples. Zero numberTuples
- * means no limit.
- */
- current_tuple_count++;
- if (numberTuples && numberTuples == current_tuple_count)
- break;
- }
-
- if (use_parallel_mode)
- ExitParallelMode();
-}
-
-
/*
* ExecRelCheck --- check that tuple meets constraints for result relation
*
@@ -3291,3 +3172,107 @@ ExecBuildSlotPartitionKeyDescription(Relation rel,
return buf.data;
}
+
+/*
+ * This function pushes the ready tuple to it's destination. It should
+ * be called by top-level PlanState.
+ * For now, I added the state needed for this to estate, specifically
+ * current_tuple_count, sendTuples, numberTuplesRequested (old numberTuples),
+ * cmdType, dest.
+ *
+ * slot is the tuple to push
+ * planstate is top-level node
+ * returns true, if we are ready to accept more tuples, false otherwise
+ */
+bool
+SendReadyTuple(TupleTableSlot *slot, PlanState *planstate)
+{
+ EState *estate;
+ bool sendTuples;
+ CmdType operation;
+ DestReceiver *dest;
+
+ estate = planstate->state;
+ sendTuples = estate->es_sendTuples;
+ operation = estate->es_operation;
+ dest = estate->es_dest;
+
+ if (TupIsNull(slot))
+ {
+ /* Allow nodes to release or shut down resources. */
+ (void) ExecShutdownNode(planstate);
+ return false;
+ }
+
+ /*
+ * If we have a junk filter, then project a new tuple with the junk
+ * removed.
+ *
+ * Store this new "clean" tuple in the junkfilter's resultSlot.
+ * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
+ * because that tuple slot has the wrong descriptor.)
+ */
+ if (estate->es_junkFilter != NULL)
+ slot = ExecFilterJunk(estate->es_junkFilter, slot);
+
+ /*
+ * If we are supposed to send the tuple somewhere, do so. (In
+ * practice, this is probably always the case at this point.)
+ */
+ if (sendTuples)
+ {
+ /*
+ * If we are not able to send the tuple, we assume the destination
+ * has closed and no more tuples can be sent.
+ */
+ if (!((*dest->receiveSlot) (slot, dest)))
+ return false;
+ }
+
+ /*
+ * Count tuples processed, if this is a SELECT. (For other operation
+ * types, the ModifyTable plan node must count the appropriate
+ * events.)
+ */
+ if (operation == CMD_SELECT)
+ (estate->es_processed)++;
+
+ /*
+ * check our tuple count.. if we've processed the proper number then
+ * quit, else process more tuples. Zero numberTuplesRequested
+ * means no limit.
+ */
+ estate->es_current_tuple_count++;
+ if (estate->es_numberTuplesRequested &&
+ estate->es_numberTuplesRequested == estate->es_current_tuple_count)
+ return false;
+
+ ResetPerTupleExprContext(estate);
+ return true;
+}
+
+/*
+ * When pushing, we have to call pushTuple on each leaf of the tree in correct
+ * order: first inner sides, then outer. This function does exactly that.
+ */
+void
+RunNode(PlanState *planstate)
+{
+ Assert(planstate != NULL);
+
+ if (innerPlanState(planstate) != NULL)
+ {
+ RunNode(innerPlanState(planstate));
+ /* I assume that if inner node exists, outer exists too */
+ RunNode(outerPlanState(planstate));
+ return;
+ }
+ if (outerPlanState(planstate) != NULL)
+ {
+ RunNode(outerPlanState(planstate));
+ return;
+ }
+
+ /* node has no childs, it is a leaf */
+ pushTuple(NULL, planstate, NULL);
+}
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 649d1e58f6..a95cfe5430 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -155,7 +155,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent)
result = NULL; /* keep compiler quiet */
break;
}
- return NULL;
/* Set up instrumentation for this node if requested */
if (estate->es_instrument)
@@ -164,7 +163,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent)
return result;
}
-
/*
* Unsupported, left to avoid deleting 19k lines of existing code
*/
@@ -175,6 +173,57 @@ ExecProcNode(PlanState *node)
return NULL;
}
+/*
+ * Instead of old ExecProcNode, here we will have function pushTuple
+ * pushing one tuple.
+ * 'tuple' is a tuple to push
+ * 'node' is a receiver of tuple
+ * 'pusher' is a sender of a tuple, it's parent is 'node'. We need it to
+ * distinguish inner and outer pushes.
+ * Returns true if node is still accepting tuples, false if not.
+ * ReScans are not supported yet.
+ * In general, if a tuple (even NULL) was pushed into a node which returned
+ * 'false' before, the behaviour is undefined, i.e. it is not allowed;
+ * however, we will try to catch such situations with asserts.
+ * If lower node have sent NULL tuple to upper node, we for now will not care
+ * to return it meaningful bool result and sent just false by convention.
+ */
+bool
+pushTuple(TupleTableSlot *slot, PlanState *node, PlanState *pusher)
+{
+ bool push_from_outer;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* If the receiver is NULL, then pusher is top-level node, so we need
+ * to send the tuple to the dest
+ */
+ if (!node)
+ {
+ return SendReadyTuple(slot, pusher);
+ }
+
+ /*
+ * If pusher is NULL, then node is a bottom node, another special case:
+ * bottom nodes obviously don't need neither tuple nor pusher
+ */
+ if (!pusher)
+ {
+ switch (nodeTag(node))
+ {
+ default:
+ elog(ERROR, "bottom node type not supported: %d",
+ (int) nodeTag(node));
+ return false;
+ }
+ }
+
+ /* does push come from the outer side? */
+ push_from_outer = outerPlanState(node) == pusher;
+
+ elog(ERROR, "node type not supported: %d", (int) nodeTag(node));
+}
+
/* ----------------------------------------------------------------
* Unsupported too; we don't need it in push model
* ----------------------------------------------------------------
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 716362970f..eb4e27ce21 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -179,6 +179,7 @@ extern void ExecutorRun(QueryDesc *queryDesc,
ScanDirection direction, uint64 count);
extern void standard_ExecutorRun(QueryDesc *queryDesc,
ScanDirection direction, uint64 count);
+extern bool SendReadyTuple(TupleTableSlot *slot, PlanState *planstate);
extern void ExecutorFinish(QueryDesc *queryDesc);
extern void standard_ExecutorFinish(QueryDesc *queryDesc);
extern void ExecutorEnd(QueryDesc *queryDesc);
@@ -240,6 +241,8 @@ extern TupleTableSlot *ExecProcNode(PlanState *node);
extern Node *MultiExecProcNode(PlanState *node);
extern void ExecEndNode(PlanState *node);
extern bool ExecShutdownNode(PlanState *node);
+extern bool pushTuple(TupleTableSlot *slot, PlanState *node,
+ PlanState *pusher);
/*
* prototypes from functions in execQual.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 738f098b00..da7fd9c7ac 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -28,6 +28,7 @@
#include "utils/tuplesort.h"
#include "nodes/tidbitmap.h"
#include "storage/condition_variable.h"
+#include "tcop/dest.h" /* for DestReceiver type in EState */
/* ----------------
@@ -416,6 +417,16 @@ typedef struct EState
List *es_auxmodifytables; /* List of secondary ModifyTableStates */
/*
+ * State needed to push tuples to dest in push model, technically it is
+ * local variables from old ExecutePlan
+ */
+ uint64 es_current_tuple_count;
+ bool es_sendTuples;
+ uint64 es_numberTuplesRequested;
+ CmdType es_operation;
+ DestReceiver *es_dest;
+
+ /*
* this ExprContext is for per-output-tuple operations, such as constraint
* checks and index-value computations. It will be reset for each output
* tuple. Note that it will be created only if needed.
--
2.11.0