diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 3395445..e89343f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -575,11 +575,18 @@ heapgettup(HeapScanDesc scan, * forward scanners. */ scan->rs_syncscan = false; + /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; + if (scan->rs_numblocks == InvalidBlockNumber) + { + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; + } else - page = scan->rs_nblocks - 1; + page = scan->rs_startblock + scan->rs_numblocks - 1; + heapgetpage(scan, page); } else @@ -876,11 +883,18 @@ heapgettup_pagemode(HeapScanDesc scan, * forward scanners. */ scan->rs_syncscan = false; + /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; + if (scan->rs_numblocks == InvalidBlockNumber) + { + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; + } else - page = scan->rs_nblocks - 1; + page = scan->rs_startblock + scan->rs_numblocks - 1; + heapgetpage(scan, page); } else diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index ed6afe7..aed7016 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -111,6 +111,7 @@ static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static void show_eval_params(Bitmapset *bms_params, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static void show_buffer_usage(ExplainState *es, const BufferUsage *usage); +static void show_scan_direction(ExplainState *es, ScanDirection direction); static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, ExplainState *es); static void ExplainScanTarget(Scan *plan, ExplainState *es); @@ -1245,7 +1246,6 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_SeqScan: case T_SampleScan: case T_BitmapHeapScan: - case T_TidScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -1254,6 +1254,10 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_WorkTableScan: ExplainScanTarget((Scan *) plan, es); break; + case T_TidScan: + show_scan_direction(es, ((TidScan *) plan)->direction); + ExplainScanTarget((Scan *) plan, es); + break; case T_ForeignScan: case T_CustomScan: if (((Scan *) plan)->scanrelid > 0) @@ -2867,25 +2871,21 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage) } /* - * Add some additional details about an IndexScan or IndexOnlyScan + * Show the direction of a scan. */ static void -ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, - ExplainState *es) +show_scan_direction(ExplainState *es, ScanDirection direction) { - const char *indexname = explain_get_index_name(indexid); - if (es->format == EXPLAIN_FORMAT_TEXT) { - if (ScanDirectionIsBackward(indexorderdir)) + if (ScanDirectionIsBackward(direction)) appendStringInfoString(es->str, " Backward"); - appendStringInfo(es->str, " using %s", indexname); } else { const char *scandir; - switch (indexorderdir) + switch (direction) { case BackwardScanDirection: scandir = "Backward"; @@ -2901,8 +2901,24 @@ ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, break; } ExplainPropertyText("Scan Direction", scandir, es); + } +} + +/* + * Add some additional details about an IndexScan or IndexOnlyScan + */ +static void +ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es) +{ + const char *indexname = explain_get_index_name(indexid); + + show_scan_direction(es, indexorderdir); + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " using %s", indexname); + else ExplainPropertyText("Index Name", indexname, es); - } } /* diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 0cb1946..9b455d8 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -22,7 +22,9 @@ */ #include "postgres.h" +#include "access/relscan.h" #include "access/sysattr.h" +#include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "executor/execdebug.h" #include "executor/nodeTidscan.h" @@ -39,21 +41,78 @@ ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ ((Var *) (node))->varlevelsup == 0) +typedef enum +{ + TIDEXPR_CURRENT_OF, + TIDEXPR_IN_ARRAY, + TIDEXPR_EQ, + TIDEXPR_LT, + TIDEXPR_GT, + TIDEXPR_BETWEEN, + TIDEXPR_ANY +} TidExprType; + /* one element in tss_tidexprs */ typedef struct TidExpr { + TidExprType type; ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ - bool isarray; /* if true, it yields tid[] not just tid */ - CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */ + ExprState *exprstate2; /* For TIDEXPR_BETWEEN */ + CurrentOfExpr *cexpr; /* For TIDEXPR_CURRENT_OF */ + bool strict; /* Indicates < rather than <=, or > rather */ + bool strict2; /* than >= */ } TidExpr; +typedef struct TidRange +{ + ItemPointerData first; + ItemPointerData last; +} TidRange; + +static ExprState *MakeTidOpExprState(OpExpr *expr, TidScanState *tidstate, bool *strict, bool *invert); static void TidExprListCreate(TidScanState *tidstate); +static TidRange * EnlargeTidRangeArray(TidRange * tidRanges, int numRanges, int *numAllocRanges); +static bool SetTidLowerBound(ItemPointer tid, bool strict, int nblocks, ItemPointer lowerBound); +static bool SetTidUpperBound(ItemPointer tid, bool strict, int nblocks, ItemPointer upperBound); static void TidListEval(TidScanState *tidstate); +static bool MergeTidRanges(TidRange * a, TidRange * b); static int itemptr_comparator(const void *a, const void *b); +static int tidrange_comparator(const void *a, const void *b); +static HeapScanDesc BeginTidRangeScan(TidScanState *node, TidRange * range); +static HeapTuple NextInTidRange(HeapScanDesc scandesc, ScanDirection direction, TidRange * range); static TupleTableSlot *TidNext(TidScanState *node); /* + * Create an ExprState corresponding to the value part of a TID comparison. + * If the comparison operator is > or <, strict is set. + * If the comparison is of the form VALUE op CTID, then invert is set. + */ +static ExprState * +MakeTidOpExprState(OpExpr *expr, TidScanState *tidstate, bool *strict, bool *invert) +{ + Node *arg1 = get_leftop((Expr *) expr); + Node *arg2 = get_rightop((Expr *) expr); + ExprState *exprstate = NULL; + + *invert = false; + + if (IsCTIDVar(arg1)) + exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps); + else if (IsCTIDVar(arg2)) + { + exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps); + *invert = true; + } + else + elog(ERROR, "could not identify CTID variable"); + + *strict = expr->opno == TIDLessOperator || expr->opno == TIDGreaterOperator; + + return exprstate; +} + +/* * Extract the qual subexpressions that yield TIDs to search for, * and compile them into ExprStates if they're ordinary expressions. * @@ -69,6 +128,14 @@ TidExprListCreate(TidScanState *tidstate) tidstate->tss_tidexprs = NIL; tidstate->tss_isCurrentOf = false; + if (!node->tidquals) + { + TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr)); + + tidexpr->type = TIDEXPR_ANY; + tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr); + } + foreach(l, node->tidquals) { Expr *expr = (Expr *) lfirst(l); @@ -76,20 +143,16 @@ TidExprListCreate(TidScanState *tidstate) if (is_opclause(expr)) { - Node *arg1; - Node *arg2; + OpExpr *opexpr = (OpExpr *) expr; + bool invert; - arg1 = get_leftop(expr); - arg2 = get_rightop(expr); - if (IsCTIDVar(arg1)) - tidexpr->exprstate = ExecInitExpr((Expr *) arg2, - &tidstate->ss.ps); - else if (IsCTIDVar(arg2)) - tidexpr->exprstate = ExecInitExpr((Expr *) arg1, - &tidstate->ss.ps); + tidexpr->exprstate = MakeTidOpExprState(opexpr, tidstate, &tidexpr->strict, &invert); + if (opexpr->opno == TIDLessOperator || opexpr->opno == TIDLessEqOperator) + tidexpr->type = invert ? TIDEXPR_GT : TIDEXPR_LT; + else if (opexpr->opno == TIDGreaterOperator || opexpr->opno == TIDGreaterEqOperator) + tidexpr->type = invert ? TIDEXPR_LT : TIDEXPR_GT; else - elog(ERROR, "could not identify CTID variable"); - tidexpr->isarray = false; + tidexpr->type = TIDEXPR_EQ; } else if (expr && IsA(expr, ScalarArrayOpExpr)) { @@ -98,15 +161,46 @@ TidExprListCreate(TidScanState *tidstate) Assert(IsCTIDVar(linitial(saex->args))); tidexpr->exprstate = ExecInitExpr(lsecond(saex->args), &tidstate->ss.ps); - tidexpr->isarray = true; + tidexpr->type = TIDEXPR_IN_ARRAY; } else if (expr && IsA(expr, CurrentOfExpr)) { CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; tidexpr->cexpr = cexpr; + tidexpr->type = TIDEXPR_CURRENT_OF; tidstate->tss_isCurrentOf = true; } + else if (and_clause((Node *) expr)) + { + OpExpr *arg1; + OpExpr *arg2; + bool invert; + bool invert2; + + Assert(list_length(((BoolExpr *) expr)->args) == 2); + arg1 = (OpExpr *) linitial(((BoolExpr *) expr)->args); + arg2 = (OpExpr *) lsecond(((BoolExpr *) expr)->args); + tidexpr->exprstate = MakeTidOpExprState(arg1, tidstate, &tidexpr->strict, &invert); + tidexpr->exprstate2 = MakeTidOpExprState(arg2, tidstate, &tidexpr->strict2, &invert2); + + /* If the LHS is not the lower bound, swap them. */ + if (invert == (arg1->opno == TIDGreaterOperator || arg1->opno == TIDGreaterEqOperator)) + { + bool temp_strict; + ExprState *temp_es; + + temp_es = tidexpr->exprstate; + tidexpr->exprstate = tidexpr->exprstate2; + tidexpr->exprstate2 = temp_es; + + temp_strict = tidexpr->strict; + tidexpr->strict = tidexpr->strict2; + tidexpr->strict2 = temp_strict; + } + + tidexpr->type = TIDEXPR_BETWEEN; + } else elog(ERROR, "could not identify CTID expression"); @@ -118,6 +212,113 @@ TidExprListCreate(TidScanState *tidstate) !tidstate->tss_isCurrentOf); } +static TidRange * +EnlargeTidRangeArray(TidRange * tidRanges, int numRanges, int *numAllocRanges) +{ + if (numRanges >= *numAllocRanges) + { + *numAllocRanges *= 2; + tidRanges = (TidRange *) + repalloc(tidRanges, + *numAllocRanges * sizeof(TidRange)); + } + return tidRanges; +} + +/* + * Set a lower bound tid, taking into account the strictness of the bound. + * Return false if the lower bound is outside the size of the table. + */ +static bool +SetTidLowerBound(ItemPointer tid, bool strict, int nblocks, ItemPointer lowerBound) +{ + OffsetNumber offset; + + if (tid == NULL) + { + ItemPointerSetBlockNumber(lowerBound, 0); + ItemPointerSetOffsetNumber(lowerBound, 1); + return true; + } + + if (ItemPointerGetBlockNumberNoCheck(tid) > nblocks) + return false; + + *lowerBound = *tid; + offset = ItemPointerGetOffsetNumberNoCheck(tid); + + if (strict) + ItemPointerSetOffsetNumber(lowerBound, OffsetNumberNext(offset)); + else if (offset == 0) + ItemPointerSetOffsetNumber(lowerBound, 1); + + return true; +} + +/* + * Set an upper bound tid, taking into account the strictness of the bound. + * Return false if the bound excludes anything from the table. + */ +static bool +SetTidUpperBound(ItemPointer tid, bool strict, int nblocks, ItemPointer upperBound) +{ + OffsetNumber offset; + + /* If the table is empty, the range must be empty. */ + if (nblocks == 0) + return false; + + if (tid == NULL) + { + ItemPointerSetBlockNumber(upperBound, nblocks - 1); + ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber); + return true; + } + + *upperBound = *tid; + offset = ItemPointerGetOffsetNumberNoCheck(tid); + + /* + * If the expression was non-strict (<=) and the offset is 0, then just + * pretend it was strict, because offset 0 doesn't exist and we may as + * well exclude that block. + */ + if (!strict && offset == 0) + strict = true; + + if (strict) + { + if (offset == 0) + { + BlockNumber block = ItemPointerGetBlockNumberNoCheck(upperBound); + + /* + * If the upper bound was already block 0, then there is no valid + * range. + */ + if (block == 0) + return false; + + ItemPointerSetBlockNumber(upperBound, block - 1); + ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber); + } + else + ItemPointerSetOffsetNumber(upperBound, OffsetNumberPrev(offset)); + } + + /* + * If the upper bound is beyond the last block of the table, truncate it + * to the last TID of the last block. + */ + if (ItemPointerGetBlockNumberNoCheck(upperBound) > nblocks) + { + ItemPointerSetBlockNumber(upperBound, nblocks - 1); + ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber); + } + + return true; +} + /* * Compute the list of TIDs to be visited, by evaluating the expressions * for them. @@ -129,9 +330,9 @@ TidListEval(TidScanState *tidstate) { ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; BlockNumber nblocks; - ItemPointerData *tidList; - int numAllocTids; - int numTids; + TidRange *tidRanges; + int numAllocRanges; + int numRanges; ListCell *l; /* @@ -147,10 +348,9 @@ TidListEval(TidScanState *tidstate) * are simple OpExprs or CurrentOfExprs. If there are any * ScalarArrayOpExprs, we may have to enlarge the array. */ - numAllocTids = list_length(tidstate->tss_tidexprs); - tidList = (ItemPointerData *) - palloc(numAllocTids * sizeof(ItemPointerData)); - numTids = 0; + numAllocRanges = list_length(tidstate->tss_tidexprs); + tidRanges = (TidRange *) palloc0(numAllocRanges * sizeof(TidRange)); + numRanges = 0; foreach(l, tidstate->tss_tidexprs) { @@ -158,7 +358,7 @@ TidListEval(TidScanState *tidstate) ItemPointer itemptr; bool isNull; - if (tidexpr->exprstate && !tidexpr->isarray) + if (tidexpr->exprstate && tidexpr->type == TIDEXPR_EQ) { itemptr = (ItemPointer) DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, @@ -168,17 +368,76 @@ TidListEval(TidScanState *tidstate) ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) { - if (numTids >= numAllocTids) - { - numAllocTids *= 2; - tidList = (ItemPointerData *) - repalloc(tidList, - numAllocTids * sizeof(ItemPointerData)); - } - tidList[numTids++] = *itemptr; + tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges); + tidRanges[numRanges].first = *itemptr; + tidRanges[numRanges].last = *itemptr; + numRanges++; } } - else if (tidexpr->exprstate && tidexpr->isarray) + else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_LT) + { + bool upper_isNull; + ItemPointer upper_itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, + econtext, + &upper_isNull)); + + if (upper_isNull) + continue; + + tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges); + + SetTidLowerBound(NULL, false, nblocks, &tidRanges[numRanges].first); + if (SetTidUpperBound(upper_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].last)) + numRanges++; + } + else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_GT) + { + bool lower_isNull; + ItemPointer lower_itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, + econtext, + &lower_isNull)); + + if (lower_isNull) + continue; + + tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges); + + if (SetTidLowerBound(lower_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].first) && + SetTidUpperBound(NULL, false, nblocks, &tidRanges[numRanges].last)) + numRanges++; + } + else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_BETWEEN) + { + bool lower_isNull, + upper_isNull; + ItemPointer lower_itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, + econtext, + &lower_isNull)); + ItemPointer upper_itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate2, + econtext, + &upper_isNull)); + + if (lower_isNull || upper_isNull) + continue; + + tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges); + + if (SetTidLowerBound(lower_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].first) && + SetTidUpperBound(upper_itemptr, tidexpr->strict2, nblocks, &tidRanges[numRanges].last)) + numRanges++; + } + else if (tidexpr->type == TIDEXPR_ANY) + { + tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges); + SetTidLowerBound(NULL, false, nblocks, &tidRanges[numRanges].first); + SetTidUpperBound(NULL, false, nblocks, &tidRanges[numRanges].last); + numRanges++; + } + else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_IN_ARRAY) { Datum arraydatum; ArrayType *itemarray; @@ -196,12 +455,12 @@ TidListEval(TidScanState *tidstate) deconstruct_array(itemarray, TIDOID, sizeof(ItemPointerData), false, 's', &ipdatums, &ipnulls, &ndatums); - if (numTids + ndatums > numAllocTids) + if (numRanges + ndatums > numAllocRanges) { - numAllocTids = numTids + ndatums; - tidList = (ItemPointerData *) - repalloc(tidList, - numAllocTids * sizeof(ItemPointerData)); + numAllocRanges = numRanges + ndatums; + tidRanges = (TidRange *) + repalloc(tidRanges, + numAllocRanges * sizeof(TidRange)); } for (i = 0; i < ndatums; i++) { @@ -210,13 +469,15 @@ TidListEval(TidScanState *tidstate) itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); if (ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) - tidList[numTids++] = *itemptr; + tidRanges[numRanges].first = *itemptr; + tidRanges[numRanges].last = *itemptr; + numRanges++; } } pfree(ipdatums); pfree(ipnulls); } - else + else if (tidexpr->type == TIDEXPR_CURRENT_OF) { ItemPointerData cursor_tid; @@ -225,16 +486,20 @@ TidListEval(TidScanState *tidstate) RelationGetRelid(tidstate->ss.ss_currentRelation), &cursor_tid)) { - if (numTids >= numAllocTids) - { - numAllocTids *= 2; - tidList = (ItemPointerData *) - repalloc(tidList, - numAllocTids * sizeof(ItemPointerData)); - } - tidList[numTids++] = cursor_tid; + /* + * A current-of TidExpr only exists by itself, and we should + * already have allocated a tidList entry for it. We don't + * need to check whether the tidList array needs to be + * resized. + */ + Assert(numRanges < numAllocRanges); + tidRanges[numRanges].first = cursor_tid; + tidRanges[numRanges].last = cursor_tid; + numRanges++; } } + else + Assert(false); } /* @@ -243,31 +508,55 @@ TidListEval(TidScanState *tidstate) * the list. Sorting makes it easier to detect duplicates, and as a bonus * ensures that we will visit the heap in the most efficient way. */ - if (numTids > 1) + if (numRanges > 1) { - int lastTid; + int lastRange; int i; /* CurrentOfExpr could never appear OR'd with something else */ Assert(!tidstate->tss_isCurrentOf); - qsort((void *) tidList, numTids, sizeof(ItemPointerData), - itemptr_comparator); - lastTid = 0; - for (i = 1; i < numTids; i++) + qsort((void *) tidRanges, numRanges, sizeof(TidRange), tidrange_comparator); + lastRange = 0; + for (i = 1; i < numRanges; i++) { - if (!ItemPointerEquals(&tidList[lastTid], &tidList[i])) - tidList[++lastTid] = tidList[i]; + if (!MergeTidRanges(&tidRanges[lastRange], &tidRanges[i])) + tidRanges[++lastRange] = tidRanges[i]; } - numTids = lastTid + 1; + numRanges = lastRange + 1; } - tidstate->tss_TidList = tidList; - tidstate->tss_NumTids = numTids; + tidstate->tss_TidRanges = tidRanges; + tidstate->tss_NumRanges = numRanges; tidstate->tss_TidPtr = -1; } /* + * If two ranges overlap, merge them into one. + * Assumes the two ranges are already ordered by (first, last). + * Returns true if they were merged. + */ +static bool +MergeTidRanges(TidRange * a, TidRange * b) +{ + ItemPointerData a_last = a->last; + ItemPointerData b_last; + + if (!ItemPointerIsValid(&a_last)) + a_last = a->first; + + if (itemptr_comparator(&a_last, &b->first) <= 0) + return false; + + b_last = b->last; + if (!ItemPointerIsValid(&b_last)) + b_last = b->first; + + a->last = b->last; + return true; +} + +/* * qsort comparator for ItemPointerData items */ static int @@ -291,6 +580,86 @@ itemptr_comparator(const void *a, const void *b) return 0; } +/* + * qsort comparator for TidRange items + */ +static int +tidrange_comparator(const void *a, const void *b) +{ + const TidRange *tra = (const TidRange *) a; + const TidRange *trb = (const TidRange *) b; + int cmp_first = itemptr_comparator(&tra->first, &trb->first); + + if (cmp_first != 0) + return cmp_first; + else + return itemptr_comparator(&tra->last, &trb->last); +} + +static HeapScanDesc +BeginTidRangeScan(TidScanState *node, TidRange * range) +{ + HeapScanDesc scandesc = node->ss.ss_currentScanDesc; + BlockNumber first_block = ItemPointerGetBlockNumberNoCheck(&range->first); + BlockNumber last_block = ItemPointerGetBlockNumberNoCheck(&range->last); + + if (!scandesc) + { + EState *estate = node->ss.ps.state; + + scandesc = heap_beginscan_strat(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, + false, false); + node->ss.ss_currentScanDesc = scandesc; + } + else + heap_rescan(scandesc, NULL); + + heap_setscanlimits(scandesc, first_block, last_block - first_block + 1); + node->tss_inScan = true; + return scandesc; +} + +static HeapTuple +NextInTidRange(HeapScanDesc scandesc, ScanDirection direction, TidRange * range) +{ + BlockNumber first_block = ItemPointerGetBlockNumber(&range->first); + OffsetNumber first_offset = ItemPointerGetOffsetNumber(&range->first); + BlockNumber last_block = ItemPointerGetBlockNumber(&range->last); + OffsetNumber last_offset = ItemPointerGetOffsetNumber(&range->last); + HeapTuple tuple; + + for (;;) + { + BlockNumber block; + OffsetNumber offset; + + tuple = heap_getnext(scandesc, direction); + if (!tuple) + break; + + /* Check that the tuple is within the required range. */ + block = ItemPointerGetBlockNumber(&tuple->t_self); + offset = ItemPointerGetOffsetNumber(&tuple->t_self); + + /* + * TODO if scanning forward, can stop as soon as we see a tuple + * greater than last_offset + */ + /* similarly with backward, less than, first_offset */ + if (block == first_block && offset < first_offset) + continue; + + if (block == last_block && offset > last_offset) + continue; + + break; + } + + return tuple; +} + /* ---------------------------------------------------------------- * TidNext * @@ -302,6 +671,7 @@ itemptr_comparator(const void *a, const void *b) static TupleTableSlot * TidNext(TidScanState *node) { + HeapScanDesc scandesc; EState *estate; ScanDirection direction; Snapshot snapshot; @@ -309,105 +679,149 @@ TidNext(TidScanState *node) HeapTuple tuple; TupleTableSlot *slot; Buffer buffer = InvalidBuffer; - ItemPointerData *tidList; - int numTids; + int numRanges; bool bBackward; /* * extract necessary information from tid scan node */ + scandesc = node->ss.ss_currentScanDesc; estate = node->ss.ps.state; direction = estate->es_direction; snapshot = estate->es_snapshot; heapRelation = node->ss.ss_currentRelation; slot = node->ss.ss_ScanTupleSlot; - /* - * First time through, compute the list of TIDs to be visited - */ - if (node->tss_TidList == NULL) + /* First time through, compute the list of TID ranges to be visited */ + if (node->tss_TidRanges == NULL) + { TidListEval(node); - tidList = node->tss_TidList; - numTids = node->tss_NumTids; + node->tss_TidPtr = -1; + } - /* - * We use node->tss_htup as the tuple pointer; note this can't just be a - * local variable here, as the scan tuple slot will keep a pointer to it. - */ - tuple = &(node->tss_htup); + numRanges = node->tss_NumRanges; - /* - * Initialize or advance scan position, depending on direction. - */ - bBackward = ScanDirectionIsBackward(direction); - if (bBackward) + /* If the plan direction is backward, invert the direction. */ + if (ScanDirectionIsBackward(((TidScan *) node->ss.ps.plan)->direction)) { - if (node->tss_TidPtr < 0) - { - /* initialize for backward scan */ - node->tss_TidPtr = numTids - 1; - } - else - node->tss_TidPtr--; + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; } - else + + tuple = NULL; + for (;;) { - if (node->tss_TidPtr < 0) + TidRange *currentRange; + + if (!node->tss_inScan) { - /* initialize for forward scan */ - node->tss_TidPtr = 0; + /* Initialize or advance scan position, depending on direction. */ + bBackward = ScanDirectionIsBackward(direction); + if (bBackward) + { + if (node->tss_TidPtr < 0) + { + /* initialize for backward scan */ + node->tss_TidPtr = numRanges - 1; + } + else + node->tss_TidPtr--; + } + else + { + if (node->tss_TidPtr < 0) + { + /* initialize for forward scan */ + node->tss_TidPtr = 0; + } + else + node->tss_TidPtr++; + } } - else - node->tss_TidPtr++; - } - while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) - { - tuple->t_self = tidList[node->tss_TidPtr]; + if (node->tss_TidPtr >= numRanges || node->tss_TidPtr < 0) + break; + + currentRange = &node->tss_TidRanges[node->tss_TidPtr]; - /* - * For WHERE CURRENT OF, the tuple retrieved from the cursor might - * since have been updated; if so, we should fetch the version that is - * current according to our snapshot. - */ + /* TODO ranges of size 1 should also use a simple tuple fetch */ if (node->tss_isCurrentOf) - heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self); - - if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) { /* - * Store the scanned tuple in the scan tuple slot of the scan - * state. Eventually we will only do this and not return a tuple. + * We use node->tss_htup as the tuple pointer; note this can't + * just be a local variable here, as the scan tuple slot will keep + * a pointer to it. */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - buffer); /* buffer associated with - * tuple */ + tuple = &(node->tss_htup); + tuple->t_self = currentRange->first; /* - * At this point we have an extra pin on the buffer, because - * ExecStoreHeapTuple incremented the pin count. Drop our local - * pin. + * For WHERE CURRENT OF, the tuple retrieved from the cursor might + * since have been updated; if so, we should fetch the version + * that is current according to our snapshot. */ - ReleaseBuffer(buffer); + if (node->tss_isCurrentOf) + heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self); - return slot; + if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) + { + /* + * Store the scanned tuple in the scan tuple slot of the scan + * state. Eventually we will only do this and not return a + * tuple. + */ + ExecStoreBufferHeapTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + buffer); /* buffer associated with + * tuple */ + + /* + * At this point we have an extra pin on the buffer, because + * ExecStoreHeapTuple incremented the pin count. Drop our + * local pin. + */ + ReleaseBuffer(buffer); + + return slot; + } + else + { + tuple = NULL; + } } - /* Bad TID or failed snapshot qual; try next */ - if (bBackward) - node->tss_TidPtr--; else - node->tss_TidPtr++; + { + if (!node->tss_inScan) + scandesc = BeginTidRangeScan(node, currentRange); - CHECK_FOR_INTERRUPTS(); + tuple = NextInTidRange(scandesc, direction, currentRange); + if (tuple) + break; + + node->tss_inScan = false; + } } /* - * if we get here it means the tid scan failed so we are at the end of the - * scan.. + * save the tuple and the buffer returned to us by the access methods in + * our scan tuple slot and return the slot. Note: we pass 'false' because + * tuples returned by heap_getnext() are pointers onto disk pages and were + * not created with palloc() and so should not be pfree()'d. Note also + * that ExecStoreHeapTuple will increment the refcount of the buffer; the + * refcount will not be dropped until the tuple table slot is cleared. */ - return ExecClearTuple(slot); + if (tuple) + ExecStoreBufferHeapTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + scandesc->rs_cbuf); /* buffer associated + * with this tuple */ + else + ExecClearTuple(slot); + + return slot; } /* @@ -460,11 +874,13 @@ ExecTidScan(PlanState *pstate) void ExecReScanTidScan(TidScanState *node) { - if (node->tss_TidList) - pfree(node->tss_TidList); - node->tss_TidList = NULL; - node->tss_NumTids = 0; + if (node->tss_TidRanges) + pfree(node->tss_TidRanges); + + node->tss_TidRanges = NULL; + node->tss_NumRanges = 0; node->tss_TidPtr = -1; + node->tss_inScan = false; ExecScanReScan(&node->ss); } @@ -479,6 +895,8 @@ ExecReScanTidScan(TidScanState *node) void ExecEndTidScan(TidScanState *node) { + HeapScanDesc scan = node->ss.ss_currentScanDesc; + /* * Free the exprcontext */ @@ -490,6 +908,10 @@ ExecEndTidScan(TidScanState *node) ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); ExecClearTuple(node->ss.ss_ScanTupleSlot); + /* close heap scan */ + if (scan != NULL) + heap_endscan(scan); + /* * close the heap relation. */ @@ -529,11 +951,12 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags) ExecAssignExprContext(estate, &tidstate->ss.ps); /* - * mark tid list as not computed yet + * mark tid range list as not computed yet */ - tidstate->tss_TidList = NULL; - tidstate->tss_NumTids = 0; + tidstate->tss_TidRanges = NULL; + tidstate->tss_NumRanges = 0; tidstate->tss_TidPtr = -1; + tidstate->tss_inScan = false; /* * open the base relation and acquire appropriate lock on it. diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 7c8220c..5f84984 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -583,6 +583,7 @@ _copyTidScan(const TidScan *from) * copy remainder of node */ COPY_NODE_FIELD(tidquals); + COPY_SCALAR_FIELD(direction); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 93f1e2c..e20ef0e 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -619,6 +619,7 @@ _outTidScan(StringInfo str, const TidScan *node) _outScanInfo(str, (const Scan *) node); WRITE_NODE_FIELD(tidquals); + WRITE_ENUM_FIELD(direction, ScanDirection); } static void @@ -1895,6 +1896,7 @@ _outTidPath(StringInfo str, const TidPath *node) _outPathInfo(str, (const Path *) node); WRITE_NODE_FIELD(tidquals); + WRITE_ENUM_FIELD(direction, ScanDirection); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 519deab..79de340 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1848,6 +1848,7 @@ _readTidScan(void) ReadCommonScan(&local_node->scan); READ_NODE_FIELD(tidquals); + READ_ENUM_FIELD(direction, ScanDirection); READ_DONE(); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7bf67a0..72b4fc6 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1185,8 +1185,11 @@ cost_tidscan(Path *path, PlannerInfo *root, Cost cpu_per_tuple; QualCost tid_qual_cost; int ntuples; + int nrandompages; + int nseqpages; ListCell *l; double spc_random_page_cost; + double spc_seq_page_cost; /* Should only be applied to base relations */ Assert(baserel->relid > 0); @@ -1200,6 +1203,8 @@ cost_tidscan(Path *path, PlannerInfo *root, /* Count how many tuples we expect to retrieve */ ntuples = 0; + nrandompages = 0; + nseqpages = 0; foreach(l, tidquals) { if (IsA(lfirst(l), ScalarArrayOpExpr)) @@ -1207,19 +1212,37 @@ cost_tidscan(Path *path, PlannerInfo *root, /* Each element of the array yields 1 tuple */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) lfirst(l); Node *arraynode = (Node *) lsecond(saop->args); + int array_len = estimate_array_length(arraynode); - ntuples += estimate_array_length(arraynode); + ntuples += array_len; + nrandompages += array_len; } else if (IsA(lfirst(l), CurrentOfExpr)) { /* CURRENT OF yields 1 tuple */ isCurrentOf = true; ntuples++; + nrandompages++; } else { - /* It's just CTID = something, count 1 tuple */ - ntuples++; + /* + * For anything else, we'll use the normal selectivity estimate. + * Count the first page as a random page, the rest as sequential. + */ + Selectivity selectivity = clause_selectivity(root, lfirst(l), + baserel->relid, + JOIN_INNER, + NULL); + BlockNumber pages = selectivity * baserel->pages; + + if (pages <= 0) + pages = 1; + + /* TODO decide what the costs should be */ + ntuples += selectivity * baserel->tuples; + nseqpages += pages - 1; + nrandompages++; } } @@ -1248,10 +1271,10 @@ cost_tidscan(Path *path, PlannerInfo *root, /* fetch estimated page cost for tablespace containing table */ get_tablespace_page_costs(baserel->reltablespace, &spc_random_page_cost, - NULL); + &spc_seq_page_cost); - /* disk costs --- assume each tuple on a different page */ - run_cost += spc_random_page_cost * ntuples; + /* disk costs */ + run_cost += spc_random_page_cost * nrandompages + spc_seq_page_cost + nseqpages; /* Add scanning CPU costs */ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ec66cb9..b847151 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -18,6 +18,9 @@ #include "postgres.h" #include "access/stratnum.h" +#include "access/sysattr.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" @@ -848,6 +851,22 @@ build_join_pathkeys(PlannerInfo *root, return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); } +/* + * build_tidscan_pathkeys + * Build the path keys corresponding to ORDER BY ctid ASC|DESC. + */ +List * +build_tidscan_pathkeys(PlannerInfo *root, + RelOptInfo *rel, + ScanDirection direction) +{ + int opno = (direction == ForwardScanDirection) ? TIDLessOperator : TIDGreaterOperator; + Var *varexpr = makeVar(rel->relid, SelfItemPointerAttributeNumber, TIDOID, -1, InvalidOid, 0); + List *pathkeys = build_expression_pathkey(root, (Expr *) varexpr, NULL, opno, rel->relids, true); + + return pathkeys; +} + /**************************************************************************** * PATHKEYS AND SORT CLAUSES ****************************************************************************/ diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 3bb5b8d..8839aed 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -4,13 +4,16 @@ * Routines to determine which TID conditions are usable for scanning * a given relation, and create TidPaths accordingly. * - * What we are looking for here is WHERE conditions of the form - * "CTID = pseudoconstant", which can be implemented by just fetching - * the tuple directly via heap_fetch(). We can also handle OR'd conditions - * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr - * conditions of the form CTID = ANY(pseudoconstant_array). In particular - * this allows - * WHERE ctid IN (tid1, tid2, ...) + * What we are looking for here is WHERE conditions of the forms: + * - "CTID = c", which can be implemented by just fetching + * the tuple directly via heap_fetch(). + * - "CTID IN (pseudoconstant, ...)" or "CTID = ANY(pseudoconstant_array)" + * - "CTID > pseudoconstant", etc. for >, >=, <, and <=. + * - "CTID > pseudoconstant AND CTID < pseudoconstant", etc., with up to one + * lower bound and one upper bound. + * + * We can also handle OR'd conditions of the above form, such as + * "(CTID = const1) OR (CTID >= const2) OR CTID IN (...)". * * We also support "WHERE CURRENT OF cursor" conditions (CurrentOfExpr), * which amount to "CTID = run-time-determined-TID". These could in @@ -46,32 +49,46 @@ #include "optimizer/restrictinfo.h" -static bool IsTidEqualClause(OpExpr *node, int varno); +static bool IsTidVar(Var *var, int varno); +static bool IsTidComparison(OpExpr *node, int varno, Oid expected_comparison_operator); static bool IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno); +static bool IsUsableRangeQual(Node *expr, int varno, bool want_lower_bound); +static List *MakeTidRangeQuals(List *quals); +static List *TidCompoundRangeQualFromExpr(Node *expr, int varno); static List *TidQualFromExpr(Node *expr, int varno); static List *TidQualFromBaseRestrictinfo(RelOptInfo *rel); +static bool +IsTidVar(Var *var, int varno) +{ + return (var->varattno == SelfItemPointerAttributeNumber && + var->vartype == TIDOID && + var->varno == varno && + var->varlevelsup == 0); +} + /* * Check to see if an opclause is of the form - * CTID = pseudoconstant + * CTID OP pseudoconstant * or - * pseudoconstant = CTID + * pseudoconstant OP CTID + * where OP is the expected comparison operator. * * We check that the CTID Var belongs to relation "varno". That is probably * redundant considering this is only applied to restriction clauses, but * let's be safe. */ static bool -IsTidEqualClause(OpExpr *node, int varno) +IsTidComparison(OpExpr *node, int varno, Oid expected_comparison_operator) { Node *arg1, *arg2, *other; Var *var; - /* Operator must be tideq */ - if (node->opno != TIDEqualOperator) + /* Operator must be the expected one */ + if (node->opno != expected_comparison_operator) return false; if (list_length(node->args) != 2) return false; @@ -83,19 +100,13 @@ IsTidEqualClause(OpExpr *node, int varno) if (arg1 && IsA(arg1, Var)) { var = (Var *) arg1; - if (var->varattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID && - var->varno == varno && - var->varlevelsup == 0) + if (IsTidVar(var, varno)) other = arg2; } if (!other && arg2 && IsA(arg2, Var)) { var = (Var *) arg2; - if (var->varattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID && - var->varno == varno && - var->varlevelsup == 0) + if (IsTidVar(var, varno)) other = arg1; } if (!other) @@ -110,6 +121,17 @@ IsTidEqualClause(OpExpr *node, int varno) return true; /* success */ } +#define IsTidEqualClause(node, varno) IsTidComparison(node, varno, TIDEqualOperator) +#define IsTidLTClause(node, varno) IsTidComparison(node, varno, TIDLessOperator) +#define IsTidLEClause(node, varno) IsTidComparison(node, varno, TIDLessEqOperator) +#define IsTidGTClause(node, varno) IsTidComparison(node, varno, TIDGreaterOperator) +#define IsTidGEClause(node, varno) IsTidComparison(node, varno, TIDGreaterEqOperator) + +#define IsTidRangeClause(node, varno) (IsTidLTClause(node, varno) || \ + IsTidLEClause(node, varno) || \ + IsTidGTClause(node, varno) || \ + IsTidGEClause(node, varno)) + /* * Check to see if a clause is of the form * CTID = ANY (pseudoconstant_array) @@ -134,10 +156,7 @@ IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno) { Var *var = (Var *) arg1; - if (var->varattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID && - var->varno == varno && - var->varlevelsup == 0) + if (IsTidVar(var, varno)) { /* The other argument must be a pseudoconstant */ if (is_pseudo_constant_clause(arg2)) @@ -149,6 +168,76 @@ IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno) } /* + * IsUsableRangeQual + * Check if the expr is range qual of the expected type. + */ +static bool +IsUsableRangeQual(Node *expr, int varno, bool want_lower_bound) +{ + if (is_opclause(expr) && IsTidRangeClause((OpExpr *) expr, varno)) + { + bool is_lower_bound = IsTidGTClause((OpExpr *) expr, varno) || IsTidGEClause((OpExpr *) expr, varno); + Node *leftop = get_leftop((Expr *) expr); + + if (!IsA(leftop, Var) ||!IsTidVar((Var *) leftop, varno)) + is_lower_bound = !is_lower_bound; + + if (is_lower_bound == want_lower_bound) + return true; + } + + return false; +} + +static List * +MakeTidRangeQuals(List *quals) +{ + if (list_length(quals) == 1) + return quals; + else + return list_make1(make_andclause(quals)); +} + +/* + * TidCompoundRangeQualFromExpr + * + * Extract a compound CTID range condition from the given qual expression + */ +static List * +TidCompoundRangeQualFromExpr(Node *expr, int varno) +{ + List *rlst = NIL; + ListCell *l; + bool found_lower = false; + bool found_upper = false; + List *found_quals = NIL; + + foreach(l, ((BoolExpr *) expr)->args) + { + Node *clause = (Node *) lfirst(l); + + /* Check if this clause contains a range qual */ + if (!found_lower && IsUsableRangeQual(clause, varno, true)) + { + found_lower = true; + found_quals = lappend(found_quals, clause); + } + + if (!found_upper && IsUsableRangeQual(clause, varno, false)) + { + found_upper = true; + found_quals = lappend(found_quals, clause); + } + } + + /* If one or both range quals was specified, use them. */ + if (found_quals) + rlst = MakeTidRangeQuals(found_quals); + + return rlst; +} + +/* * Extract a set of CTID conditions from the given qual expression * * Returns a List of CTID qual expressions (with implicit OR semantics @@ -174,6 +263,8 @@ TidQualFromExpr(Node *expr, int varno) /* base case: check for tideq opclause */ if (IsTidEqualClause((OpExpr *) expr, varno)) rlst = list_make1(expr); + else if (IsTidRangeClause((OpExpr *) expr, varno)) + rlst = list_make1(expr); } else if (expr && IsA(expr, ScalarArrayOpExpr)) { @@ -189,11 +280,18 @@ TidQualFromExpr(Node *expr, int varno) } else if (and_clause(expr)) { - foreach(l, ((BoolExpr *) expr)->args) + /* look for a range qual in the clause */ + rlst = TidCompoundRangeQualFromExpr(expr, varno); + + /* if no range qual was found, look for any other TID qual */ + if (!rlst) { - rlst = TidQualFromExpr((Node *) lfirst(l), varno); - if (rlst) - break; + foreach(l, ((BoolExpr *) expr)->args) + { + rlst = TidQualFromExpr((Node *) lfirst(l), varno); + if (rlst) + break; + } } } else if (or_clause(expr)) @@ -217,17 +315,28 @@ TidQualFromExpr(Node *expr, int varno) } /* - * Extract a set of CTID conditions from the rel's baserestrictinfo list + * Extract a set of CTID conditions from the rel's baserestrictinfo list + * + * Normally we just use the first RestrictInfo item with some usable quals, + * but it's also possible for a good compound range qual, such as + * "CTID > ? AND CTID < ?", to be split across two items. So we look for + * lower/upper bound range quals in all items and use them if any were found. + * In principal there might be more than one lower or upper bound), but we + * just use the first one found of each type. */ static List * TidQualFromBaseRestrictinfo(RelOptInfo *rel) { List *rlst = NIL; ListCell *l; + bool found_lower = false; + bool found_upper = false; + List *found_quals = NIL; foreach(l, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; /* * If clause must wait till after some lower-security-level @@ -236,10 +345,31 @@ TidQualFromBaseRestrictinfo(RelOptInfo *rel) if (!restriction_is_securely_promotable(rinfo, rel)) continue; - rlst = TidQualFromExpr((Node *) rinfo->clause, rel->relid); + /* Look for lower and upper bound range quals. */ + if (!found_lower && IsUsableRangeQual((Node *) clause, rel->relid, true)) + { + found_lower = true; + found_quals = lappend(found_quals, clause); + continue; + } + + if (!found_upper && IsUsableRangeQual((Node *) clause, rel->relid, false)) + { + found_upper = true; + found_quals = lappend(found_quals, clause); + continue; + } + + /* Look for other TID quals. */ + rlst = TidQualFromExpr((Node *) clause, rel->relid); if (rlst) break; } + + /* Use a range qual if any were found. */ + if (found_quals) + rlst = MakeTidRangeQuals(found_quals); + return rlst; } @@ -247,12 +377,16 @@ TidQualFromBaseRestrictinfo(RelOptInfo *rel) * create_tidscan_paths * Create paths corresponding to direct TID scans of the given rel. * + * Path keys and direction will be set on the scans if it looks useful. + * * Candidate paths are added to the rel's pathlist (using add_path). */ void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) { Relids required_outer; + List *pathkeys = NULL; + ScanDirection direction = ForwardScanDirection; List *tidquals; /* @@ -262,9 +396,37 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) */ required_outer = rel->lateral_relids; + /* + * Try to determine the best scan direction and create some useful + * pathkeys. + */ + if (has_useful_pathkeys(root, rel)) + { + /* + * Build path keys corresponding to ORDER BY ctid ASC, and check + * whether they will be useful for this scan. If not, build path keys + * for DESC, and try that; set the direction to BackwardScanDirection + * if so. If neither of them will be useful, no path keys will be + * set. + */ + pathkeys = build_tidscan_pathkeys(root, rel, ForwardScanDirection); + if (!pathkeys_contained_in(pathkeys, root->query_pathkeys)) + { + pathkeys = build_tidscan_pathkeys(root, rel, BackwardScanDirection); + if (pathkeys_contained_in(pathkeys, root->query_pathkeys)) + direction = BackwardScanDirection; + else + pathkeys = NULL; + } + } + tidquals = TidQualFromBaseRestrictinfo(rel); - if (tidquals) - add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, - required_outer)); + /* + * If there are tidquals or some useful pathkeys were found, then it's + * worth generating a tidscan path. + */ + if (tidquals || pathkeys) + add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, pathkeys, + direction, required_outer)); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ae41c9e..5452730 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -185,7 +185,7 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, List *bitmapqualorig, Index scanrelid); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, - List *tidquals); + List *tidquals, ScanDirection direction); static SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, @@ -3086,6 +3086,21 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, } /* + * In the case of a compound range qual, the two parts may have come + * from different RestrictInfos. So remove each part separately. + */ + if (list_length(tidquals) == 1) + { + Node *qual = linitial(tidquals); + + if (and_clause(qual)) + { + BoolExpr *and_qual = ((BoolExpr *) qual); + scan_clauses = list_difference(scan_clauses, and_qual->args); + } + } + + /* * Remove any clauses that are TID quals. This is a bit tricky since the * tidquals list has implicit OR semantics. */ @@ -3097,7 +3112,9 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, scan_plan = make_tidscan(tlist, scan_clauses, scan_relid, - tidquals); + tidquals, + best_path->direction + ); copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); @@ -5179,7 +5196,8 @@ static TidScan * make_tidscan(List *qptlist, List *qpqual, Index scanrelid, - List *tidquals) + List *tidquals, + ScanDirection direction) { TidScan *node = makeNode(TidScan); Plan *plan = &node->scan.plan; @@ -5190,6 +5208,7 @@ make_tidscan(List *qptlist, plan->righttree = NULL; node->scan.scanrelid = scanrelid; node->tidquals = tidquals; + node->direction = direction; return node; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c5aaaf5..e2d51a9 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1186,6 +1186,7 @@ create_bitmap_or_path(PlannerInfo *root, */ TidPath * create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, + List *pathkeys, ScanDirection direction, Relids required_outer) { TidPath *pathnode = makeNode(TidPath); @@ -1198,9 +1199,10 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; - pathnode->path.pathkeys = NIL; /* always unordered */ + pathnode->path.pathkeys = pathkeys; pathnode->tidquals = tidquals; + pathnode->direction = direction; cost_tidscan(&pathnode->path, root, rel, tidquals, pathnode->path.param_info); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index b8c0e03..eaacab7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -572,6 +572,30 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, if (!HeapTupleIsValid(vardata->statsTuple)) { + /* + * There are no stats for system columns, but for CTID we can estimate + * based on table size. + */ + if (vardata->var && IsA(vardata->var, Var) && + ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber) + { + ItemPointer itemptr; + BlockNumber block; + + /* If the relation's empty, we're going to read all of it. */ + if (vardata->rel->pages == 0) + return 1.0; + + itemptr = (ItemPointer) DatumGetPointer(constval); + block = ItemPointerGetBlockNumberNoCheck(itemptr); + selec = block / (double) vardata->rel->pages; + if (isgt) + selec = 1.0 - selec; + + CLAMP_PROBABILITY(selec); + return selec; + } + /* no stats available, so default result */ return DEFAULT_INEQ_SEL; } @@ -1786,6 +1810,15 @@ nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, return (Selectivity) 0; /* keep compiler quiet */ } } + else if (vardata.var && IsA(vardata.var, Var) && + ((Var *) vardata.var)->varattno == SelfItemPointerAttributeNumber) + { + /* + * There are no stats for system columns, but we know CTID is never + * NULL. + */ + selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0; + } else { /* diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index d9b6bad..cdd2cd3 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -156,15 +156,15 @@ oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt', oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' }, -{ oid => '2800', descr => 'greater than', +{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than', oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt', oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' }, -{ oid => '2801', descr => 'less than or equal', +{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal', oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle', oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' }, -{ oid => '2802', descr => 'greater than or equal', +{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal', oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge', oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' }, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 03ad516..ee6a04d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1477,6 +1477,8 @@ typedef struct BitmapHeapScanState ParallelBitmapHeapState *pstate; } BitmapHeapScanState; +typedef struct TidRange TidRange; + /* ---------------- * TidScanState information * @@ -1493,10 +1495,11 @@ typedef struct TidScanState ScanState ss; /* its first field is NodeTag */ List *tss_tidexprs; bool tss_isCurrentOf; - int tss_NumTids; + int tss_NumRanges; int tss_TidPtr; - ItemPointerData *tss_TidList; - HeapTupleData tss_htup; + TidRange *tss_TidRanges; + bool tss_inScan; + HeapTupleData tss_htup; /* for current-of and single TID fetches */ } TidScanState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7c2abbd..96d30aa 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -492,6 +492,7 @@ typedef struct TidScan { Scan scan; List *tidquals; /* qual(s) involving CTID = something */ + ScanDirection direction; } TidScan; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index adb4265..2fee1e1 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1229,14 +1229,24 @@ typedef struct BitmapOrPath /* * TidPath represents a scan by TID * - * tidquals is an implicitly OR'ed list of qual expressions of the form - * "CTID = pseudoconstant" or "CTID = ANY(pseudoconstant_array)". + * tidquals is an implicitly OR'ed list of qual expressions of the forms: + * - "CTID = pseudoconstant" + * - "CTID = ANY(pseudoconstant_array)" + * - "CURRENT OF cursor" + * - "CTID relop pseudoconstant" + * - "(CTID relop pseudoconstant) AND (CTID relop pseudoconstant)" + * + * It is permissable for the CTID variable to be the LHS or RHS of operator + * expressions; in the last case, there is always a lower bound and upper bound, + * in any order. If tidquals is empty, all CTIDs will match. + * * Note they are bare expressions, not RestrictInfos. */ typedef struct TidPath { Path path; - List *tidquals; /* qual(s) involving CTID = something */ + List *tidquals; + ScanDirection direction; } TidPath; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 7c5ff22..a0a88a5 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -63,7 +63,8 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, RelOptInfo *rel, List *bitmapquals); extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, - List *tidquals, Relids required_outer); + List *tidquals, List *pathkeys, ScanDirection direction, + Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, Relids required_outer, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cafde30..9d0699e 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -211,6 +211,9 @@ extern List *build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys); +extern List *build_tidscan_pathkeys(PlannerInfo *root, + RelOptInfo *rel, + ScanDirection direction); extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist); diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out index 521ed1b..4b9564b 100644 --- a/src/test/regress/expected/tidscan.out +++ b/src/test/regress/expected/tidscan.out @@ -116,6 +116,39 @@ FETCH FIRST FROM c; (1 row) ROLLBACK; +-- check that ordering on a tidscan doesn't require a sort +EXPLAIN (COSTS OFF) +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid; + QUERY PLAN +--------------------------------------------------------------- + Tid Scan on tidscan + TID Cond: (ctid = ANY ('{"(0,2)","(0,1)","(0,3)"}'::tid[])) +(2 rows) + +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid; + ctid | id +-------+---- + (0,1) | 1 + (0,2) | 2 + (0,3) | 3 +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC; + QUERY PLAN +--------------------------------------------------------------- + Tid Scan Backward on tidscan + TID Cond: (ctid = ANY ('{"(0,2)","(0,1)","(0,3)"}'::tid[])) +(2 rows) + +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC; + ctid | id +-------+---- + (0,3) | 3 + (0,2) | 2 + (0,1) | 1 +(3 rows) + -- tidscan via CURRENT OF BEGIN; DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan; @@ -177,3 +210,315 @@ UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; ERROR: cursor "c" is not positioned on a row ROLLBACK; DROP TABLE tidscan; +-- tests for tidrangescans +CREATE TABLE tidrangescan(id integer, data text); +INSERT INTO tidrangescan SELECT i,'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' FROM generate_series(1,1000) AS s(i); +DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;; +VACUUM tidrangescan; +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)'; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)'; + ctid | data +--------+---------------------------------------------------------------------------------- + (0,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)'; + QUERY PLAN +------------------------------------ + Tid Scan on tidrangescan + TID Cond: (ctid <= '(1,5)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)'; + ctid | data +--------+---------------------------------------------------------------------------------- + (0,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (0,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (1,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (1,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (1,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (1,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (1,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(15 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)'; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan + TID Cond: (ctid < '(0,0)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)'; + ctid | data +------+------ +(0 rows) + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)'; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan + TID Cond: (ctid > '(9,8)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)'; + ctid | data +--------+---------------------------------------------------------------------------------- + (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan + TID Cond: ('(9,8)'::tid < ctid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid; + ctid | data +--------+---------------------------------------------------------------------------------- + (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)'; + QUERY PLAN +------------------------------------ + Tid Scan on tidrangescan + TID Cond: (ctid >= '(9,8)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)'; + ctid | data +--------+---------------------------------------------------------------------------------- + (9,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)'; + QUERY PLAN +-------------------------------------- + Tid Scan on tidrangescan + TID Cond: (ctid >= '(100,0)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)'; + ctid | data +------+------ +(0 rows) + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + QUERY PLAN +---------------------------------------------------------------- + Tid Scan on tidrangescan + TID Cond: ((ctid > '(4,4)'::tid) AND ('(4,7)'::tid >= ctid)) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + ctid | data +-------+---------------------------------------------------------------------------------- + (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + QUERY PLAN +---------------------------------------------------------------- + Tid Scan on tidrangescan + TID Cond: (('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + ctid | data +-------+---------------------------------------------------------------------------------- + (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(3 rows) + +-- combinations +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)'; + QUERY PLAN +------------------------------------------------------------------------------------------- + Tid Scan on tidrangescan + TID Cond: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR (ctid = '(2,2)'::tid)) +(2 rows) + +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)'; + ctid | data +-------+---------------------------------------------------------------------------------- + (2,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(4 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo'; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Tid Scan on tidrangescan + TID Cond: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR (ctid = '(2,2)'::tid)) + Filter: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR ((ctid = '(2,2)'::tid) AND (data = 'foo'::text))) +(3 rows) + +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo'; + ctid | data +-------+---------------------------------------------------------------------------------- + (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +(3 rows) + +-- ordering with no quals should use tid range scan +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan ORDER BY ctid ASC; + QUERY PLAN +-------------------------- + Tid Scan on tidrangescan +(1 row) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan ORDER BY ctid DESC; + QUERY PLAN +----------------------------------- + Tid Scan Backward on tidrangescan +(1 row) + +-- min/max +EXPLAIN (COSTS OFF) +SELECT MIN(ctid) FROM tidrangescan; + QUERY PLAN +-------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Tid Scan on tidrangescan + Filter: (ctid IS NOT NULL) +(5 rows) + +SELECT MIN(ctid) FROM tidrangescan; + min +------- + (0,1) +(1 row) + +EXPLAIN (COSTS OFF) +SELECT MAX(ctid) FROM tidrangescan; + QUERY PLAN +------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Tid Scan Backward on tidrangescan + Filter: (ctid IS NOT NULL) +(5 rows) + +SELECT MAX(ctid) FROM tidrangescan; + max +-------- + (9,10) +(1 row) + +EXPLAIN (COSTS OFF) +SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)'; + QUERY PLAN +------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Tid Scan on tidrangescan + TID Cond: (ctid > '(5,0)'::tid) + Filter: (ctid IS NOT NULL) +(6 rows) + +SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)'; + min +------- + (5,1) +(1 row) + +EXPLAIN (COSTS OFF) +SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)'; + QUERY PLAN +------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Tid Scan Backward on tidrangescan + TID Cond: (ctid < '(5,0)'::tid) + Filter: (ctid IS NOT NULL) +(6 rows) + +SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)'; + max +-------- + (4,10) +(1 row) + +-- empty table +CREATE TABLE tidrangescan_empty(id integer, data text); +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan_empty + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + ctid | data +------+------ +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)'; + QUERY PLAN +----------------------------------- + Tid Scan on tidrangescan_empty + TID Cond: (ctid > '(9,0)'::tid) +(2 rows) + +SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)'; + ctid | data +------+------ +(0 rows) + diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql index a8472e0..e9519ee 100644 --- a/src/test/regress/sql/tidscan.sql +++ b/src/test/regress/sql/tidscan.sql @@ -43,6 +43,15 @@ FETCH BACKWARD 1 FROM c; FETCH FIRST FROM c; ROLLBACK; +-- check that ordering on a tidscan doesn't require a sort +EXPLAIN (COSTS OFF) +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid; +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC; +SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC; + -- tidscan via CURRENT OF BEGIN; DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan; @@ -64,3 +73,94 @@ UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; ROLLBACK; DROP TABLE tidscan; + +-- tests for tidrangescans + +CREATE TABLE tidrangescan(id integer, data text); + +INSERT INTO tidrangescan SELECT i,'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' FROM generate_series(1,1000) AS s(i); +DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;; +VACUUM tidrangescan; + +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)'; +SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)'; +SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)'; +SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)'; + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)'; +SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid; +SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)'; +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)'; +SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)'; + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; +SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + +-- combinations +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)'; +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo'; +SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo'; + +-- ordering with no quals should use tid range scan +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan ORDER BY ctid ASC; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan ORDER BY ctid DESC; + +-- min/max +EXPLAIN (COSTS OFF) +SELECT MIN(ctid) FROM tidrangescan; +SELECT MIN(ctid) FROM tidrangescan; + +EXPLAIN (COSTS OFF) +SELECT MAX(ctid) FROM tidrangescan; +SELECT MAX(ctid) FROM tidrangescan; + +EXPLAIN (COSTS OFF) +SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)'; +SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)'; + +EXPLAIN (COSTS OFF) +SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)'; +SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)'; + +-- empty table +CREATE TABLE tidrangescan_empty(id integer, data text); + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)'; +SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)'; +SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)';