From 14de4960934c101e860df1249eeb3c8996617ee5 Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 30 Jul 2019 10:51:35 +0900 Subject: [PATCH v19 1/2] Revise child-to-root tuple conversion map management Transition tuple capture requires to convert child tuples to the inheritance root table format because that's the format the transition tuplestore stores tuple in. For INSERTs into partitioned tables, the conversion is handled by tuple routing code which constructs the map for a given partition only if the partition is targeted, but for UPDATE and DELETE, maps for all result relations are made and stored in an array in ModifyTableState during ExecInitModifyTable, which requires their ResultRelInfos to have been already built. During execution, map for the currently active result relation is set in TransitionCaptureState.tcs_map. This commit removes TransitionCaptureMap.tcs_map in favor a new map field in ResultRelInfo named ri_ChildToRootMap that is initialized when the ResultRelInfo for a given result relation is. This way is less confusing and less bug-prone than setting and resetting tcs_map. Also, this will also allow us to delay creating the map for a given result relation to when that relation is actually processed during execution. --- src/backend/commands/copy.c | 30 +---- src/backend/commands/trigger.c | 7 +- src/backend/executor/execPartition.c | 23 ++-- src/backend/executor/nodeModifyTable.c | 221 +++++++++------------------------ src/include/commands/trigger.h | 10 +- src/include/executor/execPartition.h | 6 - src/include/nodes/execnodes.h | 11 +- 7 files changed, 90 insertions(+), 218 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 531bd7c..eb326de 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -3105,32 +3105,14 @@ CopyFrom(CopyState cstate) } /* - * If we're capturing transition tuples, we might need to convert - * from the partition rowtype to root rowtype. + * If we're capturing transition tuples and there are no BEFORE + * triggers on the partition which may change the tuple, we can + * just remember the original unconverted tuple to avoid a + * needless round trip conversion. */ if (cstate->transition_capture != NULL) - { - if (has_before_insert_row_trig) - { - /* - * If there are any BEFORE triggers on the partition, - * we'll have to be ready to convert their result back to - * tuplestore format. - */ - cstate->transition_capture->tcs_original_insert_tuple = NULL; - cstate->transition_capture->tcs_map = - resultRelInfo->ri_PartitionInfo->pi_PartitionToRootMap; - } - else - { - /* - * Otherwise, just remember the original unconverted - * tuple, to avoid a needless round trip conversion. - */ - cstate->transition_capture->tcs_original_insert_tuple = myslot; - cstate->transition_capture->tcs_map = NULL; - } - } + cstate->transition_capture->tcs_original_insert_tuple = + !has_before_insert_row_trig ? myslot : NULL; /* * We might need to convert from the root rowtype to the partition diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 3b4fbda..e76f5d4 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -35,6 +35,7 @@ #include "commands/defrem.h" #include "commands/trigger.h" #include "executor/executor.h" +#include "executor/execPartition.h" #include "miscadmin.h" #include "nodes/bitmapset.h" #include "nodes/makefuncs.h" @@ -4293,8 +4294,8 @@ GetAfterTriggersTableData(Oid relid, CmdType cmdType) * tables, then return NULL. * * The resulting object can be passed to the ExecAR* functions. The caller - * should set tcs_map or tcs_original_insert_tuple as appropriate when dealing - * with child tables. + * should set tcs_original_insert_tuple as appropriate when dealing with child + * tables * * Note that we copy the flags from a parent table into this struct (rather * than subsequently using the relation's TriggerDesc directly) so that we can @@ -5389,7 +5390,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, if (row_trigger && transition_capture != NULL) { TupleTableSlot *original_insert_tuple = transition_capture->tcs_original_insert_tuple; - TupleConversionMap *map = transition_capture->tcs_map; + TupleConversionMap *map = relinfo->ri_ChildToRootMap; bool delete_old_table = transition_capture->tcs_delete_old_table; bool update_old_table = transition_capture->tcs_update_old_table; bool update_new_table = transition_capture->tcs_update_new_table; diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 33d2c6f..08f91e5 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -908,6 +908,15 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, } /* + * Also, if transition capture is required, store a map to convert tuples + * from partition's rowtype to the root partition table's. + */ + if (mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture) + leaf_part_rri->ri_ChildToRootMap = + convert_tuples_by_name(RelationGetDescr(leaf_part_rri->ri_RelationDesc), + RelationGetDescr(leaf_part_rri->ri_PartitionRoot)); + + /* * Since we've just initialized this ResultRelInfo, it's not in any list * attached to the estate as yet. Add it, so that it can be found later. * @@ -977,20 +986,6 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, partrouteinfo->pi_PartitionTupleSlot = NULL; /* - * Also, if transition capture is required, store a map to convert tuples - * from partition's rowtype to the root partition table's. - */ - if (mtstate && - (mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture)) - { - partrouteinfo->pi_PartitionToRootMap = - convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc), - RelationGetDescr(partRelInfo->ri_PartitionRoot)); - } - else - partrouteinfo->pi_PartitionToRootMap = NULL; - - /* * If the partition is a foreign table, let the FDW init itself for * routing tuples to the partition. */ diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0c055ed..5efbfb6 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -73,9 +73,6 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, TupleTableSlot *slot, ResultRelInfo **partRelInfo); static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node); -static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate); -static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node, - int whichplan); /* * Verify that the tuples to be produced by INSERT or UPDATE match the @@ -1087,9 +1084,7 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate, { EState *estate = mtstate->ps.state; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; - int map_index; TupleConversionMap *tupconv_map; - TupleConversionMap *saved_tcs_map = NULL; bool tuple_deleted; TupleTableSlot *epqslot = NULL; @@ -1164,38 +1159,26 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate, /* * resultRelInfo is one of the per-subplan resultRelInfos. So we should - * convert the tuple into root's tuple descriptor, since ExecInsert() - * starts the search from root. The tuple conversion map list is in the - * order of mtstate->resultRelInfo[], so to retrieve the one for this - * resultRel, we need to know the position of the resultRel in - * mtstate->resultRelInfo[]. + * convert the tuple into root's tuple descriptor if needed, since + * ExecInsert() starts the search from root. */ - map_index = resultRelInfo - mtstate->resultRelInfo; - Assert(map_index >= 0 && map_index < mtstate->mt_nplans); - tupconv_map = tupconv_map_for_subplan(mtstate, map_index); + tupconv_map = resultRelInfo->ri_ChildToRootMap; if (tupconv_map != NULL) slot = execute_attr_map_slot(tupconv_map->attrMap, slot, mtstate->mt_root_tuple_slot); - /* - * ExecInsert() may scribble on mtstate->mt_transition_capture, so save - * the currently active map. - */ - if (mtstate->mt_transition_capture) - saved_tcs_map = mtstate->mt_transition_capture->tcs_map; - /* Tuple routing starts from the root table. */ Assert(mtstate->rootResultRelInfo != NULL); *inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot, planSlot, estate, canSetTag); - /* Clear the INSERT's tuple and restore the saved map. */ + /* + * Reset the transition state that may possibly have been written + * by INSERT. + */ if (mtstate->mt_transition_capture) - { mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; - mtstate->mt_transition_capture->tcs_map = saved_tcs_map; - } /* We're done moving. */ return true; @@ -1902,28 +1885,6 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc, RelationGetRelid(targetRelInfo->ri_RelationDesc), CMD_UPDATE); - - /* - * If we found that we need to collect transition tuples then we may also - * need tuple conversion maps for any children that have TupleDescs that - * aren't compatible with the tuplestores. (We can share these maps - * between the regular and ON CONFLICT cases.) - */ - if (mtstate->mt_transition_capture != NULL || - mtstate->mt_oc_transition_capture != NULL) - { - ExecSetupChildParentMapForSubplan(mtstate); - - /* - * Install the conversion map for the first plan for UPDATE and DELETE - * operations. It will be advanced each time we switch to the next - * plan. (INSERT operations set it every time, so we need not update - * mtstate->mt_oc_transition_capture here.) - */ - if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT) - mtstate->mt_transition_capture->tcs_map = - tupconv_map_for_subplan(mtstate, 0); - } } /* @@ -1947,6 +1908,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, ResultRelInfo *partrel; PartitionRoutingInfo *partrouteinfo; TupleConversionMap *map; + bool has_before_insert_row_trig; /* * Lookup the target partition's ResultRelInfo. If ExecFindPartition does @@ -1960,37 +1922,15 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, Assert(partrouteinfo != NULL); /* - * If we're capturing transition tuples, we might need to convert from the - * partition rowtype to root partitioned table's rowtype. + * If we're capturing transition tuples and there are no BEFORE triggers + * on the partition which may change the tuple, we can just remember the + * original unconverted tuple to avoid a needless round trip conversion. */ + has_before_insert_row_trig = (partrel->ri_TrigDesc && + partrel->ri_TrigDesc->trig_insert_before_row); if (mtstate->mt_transition_capture != NULL) - { - if (partrel->ri_TrigDesc && - partrel->ri_TrigDesc->trig_insert_before_row) - { - /* - * If there are any BEFORE triggers on the partition, we'll have - * to be ready to convert their result back to tuplestore format. - */ - mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; - mtstate->mt_transition_capture->tcs_map = - partrouteinfo->pi_PartitionToRootMap; - } - else - { - /* - * Otherwise, just remember the original unconverted tuple, to - * avoid a needless round trip conversion. - */ - mtstate->mt_transition_capture->tcs_original_insert_tuple = slot; - mtstate->mt_transition_capture->tcs_map = NULL; - } - } - if (mtstate->mt_oc_transition_capture != NULL) - { - mtstate->mt_oc_transition_capture->tcs_map = - partrouteinfo->pi_PartitionToRootMap; - } + mtstate->mt_transition_capture->tcs_original_insert_tuple = + !has_before_insert_row_trig ? slot : NULL; /* * Convert the tuple, if necessary. @@ -2007,58 +1947,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, return slot; } -/* - * Initialize the child-to-root tuple conversion map array for UPDATE subplans. - * - * This map array is required to convert the tuple from the subplan result rel - * to the target table descriptor. This requirement arises for two independent - * scenarios: - * 1. For update-tuple-routing. - * 2. For capturing tuples in transition tables. - */ -static void -ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) -{ - ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate); - ResultRelInfo *resultRelInfos = mtstate->resultRelInfo; - TupleDesc outdesc; - int numResultRelInfos = mtstate->mt_nplans; - int i; - - /* - * Build array of conversion maps from each child's TupleDesc to the one - * used in the target relation. The map pointers may be NULL when no - * conversion is necessary, which is hopefully a common case. - */ - - /* Get tuple descriptor of the target rel. */ - outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc); - - mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **) - palloc(sizeof(TupleConversionMap *) * numResultRelInfos); - - for (i = 0; i < numResultRelInfos; ++i) - { - mtstate->mt_per_subplan_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), - outdesc); - } -} - -/* - * For a given subplan index, get the tuple conversion map. - */ -static TupleConversionMap * -tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan) -{ - /* If nobody else set the per-subplan array of maps, do so ourselves. */ - if (mtstate->mt_per_subplan_tupconv_maps == NULL) - ExecSetupChildParentMapForSubplan(mtstate); - - Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans); - return mtstate->mt_per_subplan_tupconv_maps[whichplan]; -} - /* ---------------------------------------------------------------- * ExecModifyTable * @@ -2154,17 +2042,6 @@ ExecModifyTable(PlanState *pstate) junkfilter = resultRelInfo->ri_junkFilter; EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan, node->mt_arowmarks[node->mt_whichplan]); - /* Prepare to convert transition tuples from this child. */ - if (node->mt_transition_capture != NULL) - { - node->mt_transition_capture->tcs_map = - tupconv_map_for_subplan(node, node->mt_whichplan); - } - if (node->mt_oc_transition_capture != NULL) - { - node->mt_oc_transition_capture->tcs_map = - tupconv_map_for_subplan(node, node->mt_whichplan); - } continue; } else @@ -2334,6 +2211,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; + ResultRelInfo *rootResultRelInfo; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2355,13 +2233,24 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) palloc(nplans * sizeof(ResultRelInfo)); mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans); - /* If modifying a partitioned table, initialize the root table info */ + /* + * Initialize the designated "root" result relation. When modifying + * partitioned tables, it's given by node->rootRelation, while in other + * cases, it's the first relation in node->resultRelations. We need to + * initialize this one before any others, because + * ExecSetupTransitionCaptureState() needs it. + */ if (node->rootRelation > 0) { mtstate->rootResultRelInfo = makeNode(ResultRelInfo); ExecInitResultRelation(estate, mtstate->rootResultRelInfo, node->rootRelation); } + else + ExecInitResultRelation(estate, mtstate->resultRelInfo, + linitial_int(node->resultRelations)); + + rootResultRelInfo = getTargetResultRelInfo(mtstate); mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; @@ -2371,6 +2260,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->fireBSTriggers = true; /* + * Build state for collecting transition tuples. This requires having a + * valid trigger query context, so skip it in explain-only mode. + */ + if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) + ExecSetupTransitionCaptureState(mtstate, estate); + + /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "mt_plans". This is also a convenient place to * verify that the proposed target relations are valid and open their @@ -2384,8 +2280,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) subplan = (Plan *) lfirst(l1); - /* This opens the relation and fills ResultRelInfo. */ - ExecInitResultRelation(estate, resultRelInfo, resultRelation); + /* + * This opens result relation and fills ResultRelInfo. + * ("root" relation already opened.) + */ + if (resultRelInfo != rootResultRelInfo) + ExecInitResultRelation(estate, resultRelInfo, resultRelation); /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, @@ -2441,12 +2341,29 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) eflags); } + /* + * If needed, initialize a map to convert tuples in the child format + * to the format of the table mentioned in the query (root relation). + * It's needed for update tuple routing, because the routing starts + * from the root relation. It's also needed for capturing transition + * tuples, because the transition tuple store can only store tuples + * in the root table format. + * + * For INSERT, the map is only initialized for a given partition when + * the partition itself is first initialized by ExecFindPartition(). + */ + if (update_tuple_routing_needed || + (mtstate->mt_transition_capture && + mtstate->operation != CMD_INSERT)) + resultRelInfo->ri_ChildToRootMap = + convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc), + RelationGetDescr(rootResultRelInfo->ri_RelationDesc)); resultRelInfo++; i++; } /* Get the target relation */ - rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc; + rel = rootResultRelInfo->ri_RelationDesc; /* * If it's not a partitioned table after all, UPDATE tuple routing should @@ -2465,26 +2382,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ExecSetupPartitionTupleRouting(estate, mtstate, rel); /* - * Build state for collecting transition tuples. This requires having a - * valid trigger query context, so skip it in explain-only mode. - */ - if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) - ExecSetupTransitionCaptureState(mtstate, estate); - - /* - * Construct mapping from each of the per-subplan partition attnos to the - * root attno. This is required when during update row movement the tuple - * descriptor of a source partition does not match the root partitioned - * table descriptor. In such a case we need to convert tuples to the root - * tuple descriptor, because the search for destination partition starts - * from the root. We'll also need a slot to store these converted tuples. - * We can skip this setup if it's not a partition key update. + * For update row movement we'll need a dedicated slot to store the + * tuples that have been converted from partition format to the root + * table format. */ if (update_tuple_routing_needed) - { - ExecSetupChildParentMapForSubplan(mtstate); mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL); - } /* * Initialize any WITH CHECK OPTION constraints if needed. diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index a40ddf5..e38d732 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -46,7 +46,7 @@ typedef struct TriggerData * The state for capturing old and new tuples into transition tables for a * single ModifyTable node (or other operation source, e.g. copy.c). * - * This is per-caller to avoid conflicts in setting tcs_map or + * This is per-caller to avoid conflicts in setting * tcs_original_insert_tuple. Note, however, that the pointed-to * private data may be shared across multiple callers. */ @@ -66,14 +66,6 @@ typedef struct TransitionCaptureState bool tcs_insert_new_table; /* - * For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the - * new and old tuples from a child table's format to the format of the - * relation named in a query so that it is compatible with the transition - * tuplestores. The caller must store the conversion map here if so. - */ - TupleConversionMap *tcs_map; - - /* * For INSERT and COPY, it would be wasteful to convert tuples from child * format to parent format after they have already been converted in the * opposite direction during routing. In that case we bypass conversion diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 6d1b722..74c3991 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -37,12 +37,6 @@ typedef struct PartitionRoutingInfo TupleConversionMap *pi_RootToPartitionMap; /* - * Map for converting tuples in partition format into the root partitioned - * table format, or NULL if no conversion is required. - */ - TupleConversionMap *pi_PartitionToRootMap; - - /* * Slot to store tuples in partition format, or NULL when no translation * is required between root and partition. */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b7e9e5d..ce83b81 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -488,6 +488,14 @@ typedef struct ResultRelInfo /* for use by copy.c when performing multi-inserts */ struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer; + + /* + * Map to convert child result relation tuples to the format of the + * table actually mentioned in the query (called "root"). Set only + * if either transition tuple capture or update partition row + * movement is active. + */ + TupleConversionMap *ri_ChildToRootMap; } ResultRelInfo; /* ---------------- @@ -1174,9 +1182,6 @@ typedef struct ModifyTableState /* controls transition table population for INSERT...ON CONFLICT UPDATE */ struct TransitionCaptureState *mt_oc_transition_capture; - - /* Per plan map for tuple conversion from child to root */ - TupleConversionMap **mt_per_subplan_tupconv_maps; } ModifyTableState; /* ---------------- -- 1.8.3.1