diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3395445..e89343f 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -575,11 +575,18 @@ heapgettup(HeapScanDesc scan,
* forward scanners.
*/
scan->rs_syncscan = false;
+
/* start from last page of the scan */
- if (scan->rs_startblock > 0)
- page = scan->rs_startblock - 1;
+ if (scan->rs_numblocks == InvalidBlockNumber)
+ {
+ if (scan->rs_startblock > 0)
+ page = scan->rs_startblock - 1;
+ else
+ page = scan->rs_nblocks - 1;
+ }
else
- page = scan->rs_nblocks - 1;
+ page = scan->rs_startblock + scan->rs_numblocks - 1;
+
heapgetpage(scan, page);
}
else
@@ -876,11 +883,18 @@ heapgettup_pagemode(HeapScanDesc scan,
* forward scanners.
*/
scan->rs_syncscan = false;
+
/* start from last page of the scan */
- if (scan->rs_startblock > 0)
- page = scan->rs_startblock - 1;
+ if (scan->rs_numblocks == InvalidBlockNumber)
+ {
+ if (scan->rs_startblock > 0)
+ page = scan->rs_startblock - 1;
+ else
+ page = scan->rs_nblocks - 1;
+ }
else
- page = scan->rs_nblocks - 1;
+ page = scan->rs_startblock + scan->rs_numblocks - 1;
+
heapgetpage(scan, page);
}
else
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ed6afe7..aed7016 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -111,6 +111,7 @@ static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
static const char *explain_get_index_name(Oid indexId);
static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static void show_scan_direction(ExplainState *es, ScanDirection direction);
static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
ExplainState *es);
static void ExplainScanTarget(Scan *plan, ExplainState *es);
@@ -1245,7 +1246,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_SeqScan:
case T_SampleScan:
case T_BitmapHeapScan:
- case T_TidScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -1254,6 +1254,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_WorkTableScan:
ExplainScanTarget((Scan *) plan, es);
break;
+ case T_TidScan:
+ show_scan_direction(es, ((TidScan *) plan)->direction);
+ ExplainScanTarget((Scan *) plan, es);
+ break;
case T_ForeignScan:
case T_CustomScan:
if (((Scan *) plan)->scanrelid > 0)
@@ -2867,25 +2871,21 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
}
/*
- * Add some additional details about an IndexScan or IndexOnlyScan
+ * Show the direction of a scan.
*/
static void
-ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
- ExplainState *es)
+show_scan_direction(ExplainState *es, ScanDirection direction)
{
- const char *indexname = explain_get_index_name(indexid);
-
if (es->format == EXPLAIN_FORMAT_TEXT)
{
- if (ScanDirectionIsBackward(indexorderdir))
+ if (ScanDirectionIsBackward(direction))
appendStringInfoString(es->str, " Backward");
- appendStringInfo(es->str, " using %s", indexname);
}
else
{
const char *scandir;
- switch (indexorderdir)
+ switch (direction)
{
case BackwardScanDirection:
scandir = "Backward";
@@ -2901,8 +2901,24 @@ ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
break;
}
ExplainPropertyText("Scan Direction", scandir, es);
+ }
+}
+
+/*
+ * Add some additional details about an IndexScan or IndexOnlyScan
+ */
+static void
+ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
+ ExplainState *es)
+{
+ const char *indexname = explain_get_index_name(indexid);
+
+ show_scan_direction(es, indexorderdir);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfo(es->str, " using %s", indexname);
+ else
ExplainPropertyText("Index Name", indexname, es);
- }
}
/*
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 0cb1946..9b455d8 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -22,7 +22,9 @@
*/
#include "postgres.h"
+#include "access/relscan.h"
#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/execdebug.h"
#include "executor/nodeTidscan.h"
@@ -39,21 +41,78 @@
((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
((Var *) (node))->varlevelsup == 0)
+typedef enum
+{
+ TIDEXPR_CURRENT_OF,
+ TIDEXPR_IN_ARRAY,
+ TIDEXPR_EQ,
+ TIDEXPR_LT,
+ TIDEXPR_GT,
+ TIDEXPR_BETWEEN,
+ TIDEXPR_ANY
+} TidExprType;
+
/* one element in tss_tidexprs */
typedef struct TidExpr
{
+ TidExprType type;
ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
- bool isarray; /* if true, it yields tid[] not just tid */
- CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
+ ExprState *exprstate2; /* For TIDEXPR_BETWEEN */
+ CurrentOfExpr *cexpr; /* For TIDEXPR_CURRENT_OF */
+ bool strict; /* Indicates < rather than <=, or > rather */
+ bool strict2; /* than >= */
} TidExpr;
+typedef struct TidRange
+{
+ ItemPointerData first;
+ ItemPointerData last;
+} TidRange;
+
+static ExprState *MakeTidOpExprState(OpExpr *expr, TidScanState *tidstate, bool *strict, bool *invert);
static void TidExprListCreate(TidScanState *tidstate);
+static TidRange * EnlargeTidRangeArray(TidRange * tidRanges, int numRanges, int *numAllocRanges);
+static bool SetTidLowerBound(ItemPointer tid, bool strict, int nblocks, ItemPointer lowerBound);
+static bool SetTidUpperBound(ItemPointer tid, bool strict, int nblocks, ItemPointer upperBound);
static void TidListEval(TidScanState *tidstate);
+static bool MergeTidRanges(TidRange * a, TidRange * b);
static int itemptr_comparator(const void *a, const void *b);
+static int tidrange_comparator(const void *a, const void *b);
+static HeapScanDesc BeginTidRangeScan(TidScanState *node, TidRange * range);
+static HeapTuple NextInTidRange(HeapScanDesc scandesc, ScanDirection direction, TidRange * range);
static TupleTableSlot *TidNext(TidScanState *node);
/*
+ * Create an ExprState corresponding to the value part of a TID comparison.
+ * If the comparison operator is > or <, strict is set.
+ * If the comparison is of the form VALUE op CTID, then invert is set.
+ */
+static ExprState *
+MakeTidOpExprState(OpExpr *expr, TidScanState *tidstate, bool *strict, bool *invert)
+{
+ Node *arg1 = get_leftop((Expr *) expr);
+ Node *arg2 = get_rightop((Expr *) expr);
+ ExprState *exprstate = NULL;
+
+ *invert = false;
+
+ if (IsCTIDVar(arg1))
+ exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
+ else if (IsCTIDVar(arg2))
+ {
+ exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
+ *invert = true;
+ }
+ else
+ elog(ERROR, "could not identify CTID variable");
+
+ *strict = expr->opno == TIDLessOperator || expr->opno == TIDGreaterOperator;
+
+ return exprstate;
+}
+
+/*
* Extract the qual subexpressions that yield TIDs to search for,
* and compile them into ExprStates if they're ordinary expressions.
*
@@ -69,6 +128,14 @@ TidExprListCreate(TidScanState *tidstate)
tidstate->tss_tidexprs = NIL;
tidstate->tss_isCurrentOf = false;
+ if (!node->tidquals)
+ {
+ TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
+
+ tidexpr->type = TIDEXPR_ANY;
+ tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
+ }
+
foreach(l, node->tidquals)
{
Expr *expr = (Expr *) lfirst(l);
@@ -76,20 +143,16 @@ TidExprListCreate(TidScanState *tidstate)
if (is_opclause(expr))
{
- Node *arg1;
- Node *arg2;
+ OpExpr *opexpr = (OpExpr *) expr;
+ bool invert;
- arg1 = get_leftop(expr);
- arg2 = get_rightop(expr);
- if (IsCTIDVar(arg1))
- tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
- &tidstate->ss.ps);
- else if (IsCTIDVar(arg2))
- tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
- &tidstate->ss.ps);
+ tidexpr->exprstate = MakeTidOpExprState(opexpr, tidstate, &tidexpr->strict, &invert);
+ if (opexpr->opno == TIDLessOperator || opexpr->opno == TIDLessEqOperator)
+ tidexpr->type = invert ? TIDEXPR_GT : TIDEXPR_LT;
+ else if (opexpr->opno == TIDGreaterOperator || opexpr->opno == TIDGreaterEqOperator)
+ tidexpr->type = invert ? TIDEXPR_LT : TIDEXPR_GT;
else
- elog(ERROR, "could not identify CTID variable");
- tidexpr->isarray = false;
+ tidexpr->type = TIDEXPR_EQ;
}
else if (expr && IsA(expr, ScalarArrayOpExpr))
{
@@ -98,15 +161,46 @@ TidExprListCreate(TidScanState *tidstate)
Assert(IsCTIDVar(linitial(saex->args)));
tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
&tidstate->ss.ps);
- tidexpr->isarray = true;
+ tidexpr->type = TIDEXPR_IN_ARRAY;
}
else if (expr && IsA(expr, CurrentOfExpr))
{
CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
tidexpr->cexpr = cexpr;
+ tidexpr->type = TIDEXPR_CURRENT_OF;
tidstate->tss_isCurrentOf = true;
}
+ else if (and_clause((Node *) expr))
+ {
+ OpExpr *arg1;
+ OpExpr *arg2;
+ bool invert;
+ bool invert2;
+
+ Assert(list_length(((BoolExpr *) expr)->args) == 2);
+ arg1 = (OpExpr *) linitial(((BoolExpr *) expr)->args);
+ arg2 = (OpExpr *) lsecond(((BoolExpr *) expr)->args);
+ tidexpr->exprstate = MakeTidOpExprState(arg1, tidstate, &tidexpr->strict, &invert);
+ tidexpr->exprstate2 = MakeTidOpExprState(arg2, tidstate, &tidexpr->strict2, &invert2);
+
+ /* If the LHS is not the lower bound, swap them. */
+ if (invert == (arg1->opno == TIDGreaterOperator || arg1->opno == TIDGreaterEqOperator))
+ {
+ bool temp_strict;
+ ExprState *temp_es;
+
+ temp_es = tidexpr->exprstate;
+ tidexpr->exprstate = tidexpr->exprstate2;
+ tidexpr->exprstate2 = temp_es;
+
+ temp_strict = tidexpr->strict;
+ tidexpr->strict = tidexpr->strict2;
+ tidexpr->strict2 = temp_strict;
+ }
+
+ tidexpr->type = TIDEXPR_BETWEEN;
+ }
else
elog(ERROR, "could not identify CTID expression");
@@ -118,6 +212,113 @@ TidExprListCreate(TidScanState *tidstate)
!tidstate->tss_isCurrentOf);
}
+static TidRange *
+EnlargeTidRangeArray(TidRange * tidRanges, int numRanges, int *numAllocRanges)
+{
+ if (numRanges >= *numAllocRanges)
+ {
+ *numAllocRanges *= 2;
+ tidRanges = (TidRange *)
+ repalloc(tidRanges,
+ *numAllocRanges * sizeof(TidRange));
+ }
+ return tidRanges;
+}
+
+/*
+ * Set a lower bound tid, taking into account the strictness of the bound.
+ * Return false if the lower bound is outside the size of the table.
+ */
+static bool
+SetTidLowerBound(ItemPointer tid, bool strict, int nblocks, ItemPointer lowerBound)
+{
+ OffsetNumber offset;
+
+ if (tid == NULL)
+ {
+ ItemPointerSetBlockNumber(lowerBound, 0);
+ ItemPointerSetOffsetNumber(lowerBound, 1);
+ return true;
+ }
+
+ if (ItemPointerGetBlockNumberNoCheck(tid) > nblocks)
+ return false;
+
+ *lowerBound = *tid;
+ offset = ItemPointerGetOffsetNumberNoCheck(tid);
+
+ if (strict)
+ ItemPointerSetOffsetNumber(lowerBound, OffsetNumberNext(offset));
+ else if (offset == 0)
+ ItemPointerSetOffsetNumber(lowerBound, 1);
+
+ return true;
+}
+
+/*
+ * Set an upper bound tid, taking into account the strictness of the bound.
+ * Return false if the bound excludes anything from the table.
+ */
+static bool
+SetTidUpperBound(ItemPointer tid, bool strict, int nblocks, ItemPointer upperBound)
+{
+ OffsetNumber offset;
+
+ /* If the table is empty, the range must be empty. */
+ if (nblocks == 0)
+ return false;
+
+ if (tid == NULL)
+ {
+ ItemPointerSetBlockNumber(upperBound, nblocks - 1);
+ ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber);
+ return true;
+ }
+
+ *upperBound = *tid;
+ offset = ItemPointerGetOffsetNumberNoCheck(tid);
+
+ /*
+ * If the expression was non-strict (<=) and the offset is 0, then just
+ * pretend it was strict, because offset 0 doesn't exist and we may as
+ * well exclude that block.
+ */
+ if (!strict && offset == 0)
+ strict = true;
+
+ if (strict)
+ {
+ if (offset == 0)
+ {
+ BlockNumber block = ItemPointerGetBlockNumberNoCheck(upperBound);
+
+ /*
+ * If the upper bound was already block 0, then there is no valid
+ * range.
+ */
+ if (block == 0)
+ return false;
+
+ ItemPointerSetBlockNumber(upperBound, block - 1);
+ ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber);
+ }
+ else
+ ItemPointerSetOffsetNumber(upperBound, OffsetNumberPrev(offset));
+ }
+
+ /*
+ * If the upper bound is beyond the last block of the table, truncate it
+ * to the last TID of the last block.
+ */
+ if (ItemPointerGetBlockNumberNoCheck(upperBound) > nblocks)
+ {
+ ItemPointerSetBlockNumber(upperBound, nblocks - 1);
+ ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber);
+ }
+
+ return true;
+}
+
/*
* Compute the list of TIDs to be visited, by evaluating the expressions
* for them.
@@ -129,9 +330,9 @@ TidListEval(TidScanState *tidstate)
{
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
BlockNumber nblocks;
- ItemPointerData *tidList;
- int numAllocTids;
- int numTids;
+ TidRange *tidRanges;
+ int numAllocRanges;
+ int numRanges;
ListCell *l;
/*
@@ -147,10 +348,9 @@ TidListEval(TidScanState *tidstate)
* are simple OpExprs or CurrentOfExprs. If there are any
* ScalarArrayOpExprs, we may have to enlarge the array.
*/
- numAllocTids = list_length(tidstate->tss_tidexprs);
- tidList = (ItemPointerData *)
- palloc(numAllocTids * sizeof(ItemPointerData));
- numTids = 0;
+ numAllocRanges = list_length(tidstate->tss_tidexprs);
+ tidRanges = (TidRange *) palloc0(numAllocRanges * sizeof(TidRange));
+ numRanges = 0;
foreach(l, tidstate->tss_tidexprs)
{
@@ -158,7 +358,7 @@ TidListEval(TidScanState *tidstate)
ItemPointer itemptr;
bool isNull;
- if (tidexpr->exprstate && !tidexpr->isarray)
+ if (tidexpr->exprstate && tidexpr->type == TIDEXPR_EQ)
{
itemptr = (ItemPointer)
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
@@ -168,17 +368,76 @@ TidListEval(TidScanState *tidstate)
ItemPointerIsValid(itemptr) &&
ItemPointerGetBlockNumber(itemptr) < nblocks)
{
- if (numTids >= numAllocTids)
- {
- numAllocTids *= 2;
- tidList = (ItemPointerData *)
- repalloc(tidList,
- numAllocTids * sizeof(ItemPointerData));
- }
- tidList[numTids++] = *itemptr;
+ tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges);
+ tidRanges[numRanges].first = *itemptr;
+ tidRanges[numRanges].last = *itemptr;
+ numRanges++;
}
}
- else if (tidexpr->exprstate && tidexpr->isarray)
+ else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_LT)
+ {
+ bool upper_isNull;
+ ItemPointer upper_itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
+ econtext,
+ &upper_isNull));
+
+ if (upper_isNull)
+ continue;
+
+ tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges);
+
+ SetTidLowerBound(NULL, false, nblocks, &tidRanges[numRanges].first);
+ if (SetTidUpperBound(upper_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].last))
+ numRanges++;
+ }
+ else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_GT)
+ {
+ bool lower_isNull;
+ ItemPointer lower_itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
+ econtext,
+ &lower_isNull));
+
+ if (lower_isNull)
+ continue;
+
+ tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges);
+
+ if (SetTidLowerBound(lower_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].first) &&
+ SetTidUpperBound(NULL, false, nblocks, &tidRanges[numRanges].last))
+ numRanges++;
+ }
+ else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_BETWEEN)
+ {
+ bool lower_isNull,
+ upper_isNull;
+ ItemPointer lower_itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
+ econtext,
+ &lower_isNull));
+ ItemPointer upper_itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate2,
+ econtext,
+ &upper_isNull));
+
+ if (lower_isNull || upper_isNull)
+ continue;
+
+ tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges);
+
+ if (SetTidLowerBound(lower_itemptr, tidexpr->strict, nblocks, &tidRanges[numRanges].first) &&
+ SetTidUpperBound(upper_itemptr, tidexpr->strict2, nblocks, &tidRanges[numRanges].last))
+ numRanges++;
+ }
+ else if (tidexpr->type == TIDEXPR_ANY)
+ {
+ tidRanges = EnlargeTidRangeArray(tidRanges, numRanges, &numAllocRanges);
+ SetTidLowerBound(NULL, false, nblocks, &tidRanges[numRanges].first);
+ SetTidUpperBound(NULL, false, nblocks, &tidRanges[numRanges].last);
+ numRanges++;
+ }
+ else if (tidexpr->exprstate && tidexpr->type == TIDEXPR_IN_ARRAY)
{
Datum arraydatum;
ArrayType *itemarray;
@@ -196,12 +455,12 @@ TidListEval(TidScanState *tidstate)
deconstruct_array(itemarray,
TIDOID, sizeof(ItemPointerData), false, 's',
&ipdatums, &ipnulls, &ndatums);
- if (numTids + ndatums > numAllocTids)
+ if (numRanges + ndatums > numAllocRanges)
{
- numAllocTids = numTids + ndatums;
- tidList = (ItemPointerData *)
- repalloc(tidList,
- numAllocTids * sizeof(ItemPointerData));
+ numAllocRanges = numRanges + ndatums;
+ tidRanges = (TidRange *)
+ repalloc(tidRanges,
+ numAllocRanges * sizeof(TidRange));
}
for (i = 0; i < ndatums; i++)
{
@@ -210,13 +469,15 @@ TidListEval(TidScanState *tidstate)
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
if (ItemPointerIsValid(itemptr) &&
ItemPointerGetBlockNumber(itemptr) < nblocks)
- tidList[numTids++] = *itemptr;
+ tidRanges[numRanges].first = *itemptr;
+ tidRanges[numRanges].last = *itemptr;
+ numRanges++;
}
}
pfree(ipdatums);
pfree(ipnulls);
}
- else
+ else if (tidexpr->type == TIDEXPR_CURRENT_OF)
{
ItemPointerData cursor_tid;
@@ -225,16 +486,20 @@ TidListEval(TidScanState *tidstate)
RelationGetRelid(tidstate->ss.ss_currentRelation),
&cursor_tid))
{
- if (numTids >= numAllocTids)
- {
- numAllocTids *= 2;
- tidList = (ItemPointerData *)
- repalloc(tidList,
- numAllocTids * sizeof(ItemPointerData));
- }
- tidList[numTids++] = cursor_tid;
+ /*
+ * A current-of TidExpr only exists by itself, and we should
+ * already have allocated a tidList entry for it. We don't
+ * need to check whether the tidList array needs to be
+ * resized.
+ */
+ Assert(numRanges < numAllocRanges);
+ tidRanges[numRanges].first = cursor_tid;
+ tidRanges[numRanges].last = cursor_tid;
+ numRanges++;
}
}
+ else
+ Assert(false);
}
/*
@@ -243,31 +508,55 @@ TidListEval(TidScanState *tidstate)
* the list. Sorting makes it easier to detect duplicates, and as a bonus
* ensures that we will visit the heap in the most efficient way.
*/
- if (numTids > 1)
+ if (numRanges > 1)
{
- int lastTid;
+ int lastRange;
int i;
/* CurrentOfExpr could never appear OR'd with something else */
Assert(!tidstate->tss_isCurrentOf);
- qsort((void *) tidList, numTids, sizeof(ItemPointerData),
- itemptr_comparator);
- lastTid = 0;
- for (i = 1; i < numTids; i++)
+ qsort((void *) tidRanges, numRanges, sizeof(TidRange), tidrange_comparator);
+ lastRange = 0;
+ for (i = 1; i < numRanges; i++)
{
- if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
- tidList[++lastTid] = tidList[i];
+ if (!MergeTidRanges(&tidRanges[lastRange], &tidRanges[i]))
+ tidRanges[++lastRange] = tidRanges[i];
}
- numTids = lastTid + 1;
+ numRanges = lastRange + 1;
}
- tidstate->tss_TidList = tidList;
- tidstate->tss_NumTids = numTids;
+ tidstate->tss_TidRanges = tidRanges;
+ tidstate->tss_NumRanges = numRanges;
tidstate->tss_TidPtr = -1;
}
/*
+ * If two ranges overlap, merge them into one.
+ * Assumes the two ranges are already ordered by (first, last).
+ * Returns true if they were merged.
+ */
+static bool
+MergeTidRanges(TidRange * a, TidRange * b)
+{
+ ItemPointerData a_last = a->last;
+ ItemPointerData b_last;
+
+ if (!ItemPointerIsValid(&a_last))
+ a_last = a->first;
+
+ if (itemptr_comparator(&a_last, &b->first) <= 0)
+ return false;
+
+ b_last = b->last;
+ if (!ItemPointerIsValid(&b_last))
+ b_last = b->first;
+
+ a->last = b->last;
+ return true;
+}
+
+/*
* qsort comparator for ItemPointerData items
*/
static int
@@ -291,6 +580,86 @@ itemptr_comparator(const void *a, const void *b)
return 0;
}
+/*
+ * qsort comparator for TidRange items
+ */
+static int
+tidrange_comparator(const void *a, const void *b)
+{
+ const TidRange *tra = (const TidRange *) a;
+ const TidRange *trb = (const TidRange *) b;
+ int cmp_first = itemptr_comparator(&tra->first, &trb->first);
+
+ if (cmp_first != 0)
+ return cmp_first;
+ else
+ return itemptr_comparator(&tra->last, &trb->last);
+}
+
+static HeapScanDesc
+BeginTidRangeScan(TidScanState *node, TidRange * range)
+{
+ HeapScanDesc scandesc = node->ss.ss_currentScanDesc;
+ BlockNumber first_block = ItemPointerGetBlockNumberNoCheck(&range->first);
+ BlockNumber last_block = ItemPointerGetBlockNumberNoCheck(&range->last);
+
+ if (!scandesc)
+ {
+ EState *estate = node->ss.ps.state;
+
+ scandesc = heap_beginscan_strat(node->ss.ss_currentRelation,
+ estate->es_snapshot,
+ 0, NULL,
+ false, false);
+ node->ss.ss_currentScanDesc = scandesc;
+ }
+ else
+ heap_rescan(scandesc, NULL);
+
+ heap_setscanlimits(scandesc, first_block, last_block - first_block + 1);
+ node->tss_inScan = true;
+ return scandesc;
+}
+
+static HeapTuple
+NextInTidRange(HeapScanDesc scandesc, ScanDirection direction, TidRange * range)
+{
+ BlockNumber first_block = ItemPointerGetBlockNumber(&range->first);
+ OffsetNumber first_offset = ItemPointerGetOffsetNumber(&range->first);
+ BlockNumber last_block = ItemPointerGetBlockNumber(&range->last);
+ OffsetNumber last_offset = ItemPointerGetOffsetNumber(&range->last);
+ HeapTuple tuple;
+
+ for (;;)
+ {
+ BlockNumber block;
+ OffsetNumber offset;
+
+ tuple = heap_getnext(scandesc, direction);
+ if (!tuple)
+ break;
+
+ /* Check that the tuple is within the required range. */
+ block = ItemPointerGetBlockNumber(&tuple->t_self);
+ offset = ItemPointerGetOffsetNumber(&tuple->t_self);
+
+ /*
+ * TODO if scanning forward, can stop as soon as we see a tuple
+ * greater than last_offset
+ */
+ /* similarly with backward, less than, first_offset */
+ if (block == first_block && offset < first_offset)
+ continue;
+
+ if (block == last_block && offset > last_offset)
+ continue;
+
+ break;
+ }
+
+ return tuple;
+}
+
/* ----------------------------------------------------------------
* TidNext
*
@@ -302,6 +671,7 @@ itemptr_comparator(const void *a, const void *b)
static TupleTableSlot *
TidNext(TidScanState *node)
{
+ HeapScanDesc scandesc;
EState *estate;
ScanDirection direction;
Snapshot snapshot;
@@ -309,105 +679,149 @@ TidNext(TidScanState *node)
HeapTuple tuple;
TupleTableSlot *slot;
Buffer buffer = InvalidBuffer;
- ItemPointerData *tidList;
- int numTids;
+ int numRanges;
bool bBackward;
/*
* extract necessary information from tid scan node
*/
+ scandesc = node->ss.ss_currentScanDesc;
estate = node->ss.ps.state;
direction = estate->es_direction;
snapshot = estate->es_snapshot;
heapRelation = node->ss.ss_currentRelation;
slot = node->ss.ss_ScanTupleSlot;
- /*
- * First time through, compute the list of TIDs to be visited
- */
- if (node->tss_TidList == NULL)
+ /* First time through, compute the list of TID ranges to be visited */
+ if (node->tss_TidRanges == NULL)
+ {
TidListEval(node);
- tidList = node->tss_TidList;
- numTids = node->tss_NumTids;
+ node->tss_TidPtr = -1;
+ }
- /*
- * We use node->tss_htup as the tuple pointer; note this can't just be a
- * local variable here, as the scan tuple slot will keep a pointer to it.
- */
- tuple = &(node->tss_htup);
+ numRanges = node->tss_NumRanges;
- /*
- * Initialize or advance scan position, depending on direction.
- */
- bBackward = ScanDirectionIsBackward(direction);
- if (bBackward)
+ /* If the plan direction is backward, invert the direction. */
+ if (ScanDirectionIsBackward(((TidScan *) node->ss.ps.plan)->direction))
{
- if (node->tss_TidPtr < 0)
- {
- /* initialize for backward scan */
- node->tss_TidPtr = numTids - 1;
- }
- else
- node->tss_TidPtr--;
+ if (ScanDirectionIsForward(direction))
+ direction = BackwardScanDirection;
+ else if (ScanDirectionIsBackward(direction))
+ direction = ForwardScanDirection;
}
- else
+
+ tuple = NULL;
+ for (;;)
{
- if (node->tss_TidPtr < 0)
+ TidRange *currentRange;
+
+ if (!node->tss_inScan)
{
- /* initialize for forward scan */
- node->tss_TidPtr = 0;
+ /* Initialize or advance scan position, depending on direction. */
+ bBackward = ScanDirectionIsBackward(direction);
+ if (bBackward)
+ {
+ if (node->tss_TidPtr < 0)
+ {
+ /* initialize for backward scan */
+ node->tss_TidPtr = numRanges - 1;
+ }
+ else
+ node->tss_TidPtr--;
+ }
+ else
+ {
+ if (node->tss_TidPtr < 0)
+ {
+ /* initialize for forward scan */
+ node->tss_TidPtr = 0;
+ }
+ else
+ node->tss_TidPtr++;
+ }
}
- else
- node->tss_TidPtr++;
- }
- while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
- {
- tuple->t_self = tidList[node->tss_TidPtr];
+ if (node->tss_TidPtr >= numRanges || node->tss_TidPtr < 0)
+ break;
+
+ currentRange = &node->tss_TidRanges[node->tss_TidPtr];
- /*
- * For WHERE CURRENT OF, the tuple retrieved from the cursor might
- * since have been updated; if so, we should fetch the version that is
- * current according to our snapshot.
- */
+ /* TODO ranges of size 1 should also use a simple tuple fetch */
if (node->tss_isCurrentOf)
- heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
-
- if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
{
/*
- * Store the scanned tuple in the scan tuple slot of the scan
- * state. Eventually we will only do this and not return a tuple.
+ * We use node->tss_htup as the tuple pointer; note this can't
+ * just be a local variable here, as the scan tuple slot will keep
+ * a pointer to it.
*/
- ExecStoreBufferHeapTuple(tuple, /* tuple to store */
- slot, /* slot to store in */
- buffer); /* buffer associated with
- * tuple */
+ tuple = &(node->tss_htup);
+ tuple->t_self = currentRange->first;
/*
- * At this point we have an extra pin on the buffer, because
- * ExecStoreHeapTuple incremented the pin count. Drop our local
- * pin.
+ * For WHERE CURRENT OF, the tuple retrieved from the cursor might
+ * since have been updated; if so, we should fetch the version
+ * that is current according to our snapshot.
*/
- ReleaseBuffer(buffer);
+ if (node->tss_isCurrentOf)
+ heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
- return slot;
+ if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
+ {
+ /*
+ * Store the scanned tuple in the scan tuple slot of the scan
+ * state. Eventually we will only do this and not return a
+ * tuple.
+ */
+ ExecStoreBufferHeapTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ buffer); /* buffer associated with
+ * tuple */
+
+ /*
+ * At this point we have an extra pin on the buffer, because
+ * ExecStoreHeapTuple incremented the pin count. Drop our
+ * local pin.
+ */
+ ReleaseBuffer(buffer);
+
+ return slot;
+ }
+ else
+ {
+ tuple = NULL;
+ }
}
- /* Bad TID or failed snapshot qual; try next */
- if (bBackward)
- node->tss_TidPtr--;
else
- node->tss_TidPtr++;
+ {
+ if (!node->tss_inScan)
+ scandesc = BeginTidRangeScan(node, currentRange);
- CHECK_FOR_INTERRUPTS();
+ tuple = NextInTidRange(scandesc, direction, currentRange);
+ if (tuple)
+ break;
+
+ node->tss_inScan = false;
+ }
}
/*
- * if we get here it means the tid scan failed so we are at the end of the
- * scan..
+ * save the tuple and the buffer returned to us by the access methods in
+ * our scan tuple slot and return the slot. Note: we pass 'false' because
+ * tuples returned by heap_getnext() are pointers onto disk pages and were
+ * not created with palloc() and so should not be pfree()'d. Note also
+ * that ExecStoreHeapTuple will increment the refcount of the buffer; the
+ * refcount will not be dropped until the tuple table slot is cleared.
*/
- return ExecClearTuple(slot);
+ if (tuple)
+ ExecStoreBufferHeapTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ scandesc->rs_cbuf); /* buffer associated
+ * with this tuple */
+ else
+ ExecClearTuple(slot);
+
+ return slot;
}
/*
@@ -460,11 +874,13 @@ ExecTidScan(PlanState *pstate)
void
ExecReScanTidScan(TidScanState *node)
{
- if (node->tss_TidList)
- pfree(node->tss_TidList);
- node->tss_TidList = NULL;
- node->tss_NumTids = 0;
+ if (node->tss_TidRanges)
+ pfree(node->tss_TidRanges);
+
+ node->tss_TidRanges = NULL;
+ node->tss_NumRanges = 0;
node->tss_TidPtr = -1;
+ node->tss_inScan = false;
ExecScanReScan(&node->ss);
}
@@ -479,6 +895,8 @@ ExecReScanTidScan(TidScanState *node)
void
ExecEndTidScan(TidScanState *node)
{
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
+
/*
* Free the exprcontext
*/
@@ -490,6 +908,10 @@ ExecEndTidScan(TidScanState *node)
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
+ /* close heap scan */
+ if (scan != NULL)
+ heap_endscan(scan);
+
/*
* close the heap relation.
*/
@@ -529,11 +951,12 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
ExecAssignExprContext(estate, &tidstate->ss.ps);
/*
- * mark tid list as not computed yet
+ * mark tid range list as not computed yet
*/
- tidstate->tss_TidList = NULL;
- tidstate->tss_NumTids = 0;
+ tidstate->tss_TidRanges = NULL;
+ tidstate->tss_NumRanges = 0;
tidstate->tss_TidPtr = -1;
+ tidstate->tss_inScan = false;
/*
* open the base relation and acquire appropriate lock on it.
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 7c8220c..5f84984 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -583,6 +583,7 @@ _copyTidScan(const TidScan *from)
* copy remainder of node
*/
COPY_NODE_FIELD(tidquals);
+ COPY_SCALAR_FIELD(direction);
return newnode;
}
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 93f1e2c..e20ef0e 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -619,6 +619,7 @@ _outTidScan(StringInfo str, const TidScan *node)
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(tidquals);
+ WRITE_ENUM_FIELD(direction, ScanDirection);
}
static void
@@ -1895,6 +1896,7 @@ _outTidPath(StringInfo str, const TidPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(tidquals);
+ WRITE_ENUM_FIELD(direction, ScanDirection);
}
static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 519deab..79de340 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1848,6 +1848,7 @@ _readTidScan(void)
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(tidquals);
+ READ_ENUM_FIELD(direction, ScanDirection);
READ_DONE();
}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 7bf67a0..72b4fc6 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1185,8 +1185,11 @@ cost_tidscan(Path *path, PlannerInfo *root,
Cost cpu_per_tuple;
QualCost tid_qual_cost;
int ntuples;
+ int nrandompages;
+ int nseqpages;
ListCell *l;
double spc_random_page_cost;
+ double spc_seq_page_cost;
/* Should only be applied to base relations */
Assert(baserel->relid > 0);
@@ -1200,6 +1203,8 @@ cost_tidscan(Path *path, PlannerInfo *root,
/* Count how many tuples we expect to retrieve */
ntuples = 0;
+ nrandompages = 0;
+ nseqpages = 0;
foreach(l, tidquals)
{
if (IsA(lfirst(l), ScalarArrayOpExpr))
@@ -1207,19 +1212,37 @@ cost_tidscan(Path *path, PlannerInfo *root,
/* Each element of the array yields 1 tuple */
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) lfirst(l);
Node *arraynode = (Node *) lsecond(saop->args);
+ int array_len = estimate_array_length(arraynode);
- ntuples += estimate_array_length(arraynode);
+ ntuples += array_len;
+ nrandompages += array_len;
}
else if (IsA(lfirst(l), CurrentOfExpr))
{
/* CURRENT OF yields 1 tuple */
isCurrentOf = true;
ntuples++;
+ nrandompages++;
}
else
{
- /* It's just CTID = something, count 1 tuple */
- ntuples++;
+ /*
+ * For anything else, we'll use the normal selectivity estimate.
+ * Count the first page as a random page, the rest as sequential.
+ */
+ Selectivity selectivity = clause_selectivity(root, lfirst(l),
+ baserel->relid,
+ JOIN_INNER,
+ NULL);
+ BlockNumber pages = selectivity * baserel->pages;
+
+ if (pages <= 0)
+ pages = 1;
+
+ /* TODO decide what the costs should be */
+ ntuples += selectivity * baserel->tuples;
+ nseqpages += pages - 1;
+ nrandompages++;
}
}
@@ -1248,10 +1271,10 @@ cost_tidscan(Path *path, PlannerInfo *root,
/* fetch estimated page cost for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
&spc_random_page_cost,
- NULL);
+ &spc_seq_page_cost);
- /* disk costs --- assume each tuple on a different page */
- run_cost += spc_random_page_cost * ntuples;
+ /* disk costs */
+ run_cost += spc_random_page_cost * nrandompages + spc_seq_page_cost + nseqpages;
/* Add scanning CPU costs */
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9..b847151 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -18,6 +18,9 @@
#include "postgres.h"
#include "access/stratnum.h"
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
@@ -848,6 +851,22 @@ build_join_pathkeys(PlannerInfo *root,
return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
}
+/*
+ * build_tidscan_pathkeys
+ * Build the path keys corresponding to ORDER BY ctid ASC|DESC.
+ */
+List *
+build_tidscan_pathkeys(PlannerInfo *root,
+ RelOptInfo *rel,
+ ScanDirection direction)
+{
+ int opno = (direction == ForwardScanDirection) ? TIDLessOperator : TIDGreaterOperator;
+ Var *varexpr = makeVar(rel->relid, SelfItemPointerAttributeNumber, TIDOID, -1, InvalidOid, 0);
+ List *pathkeys = build_expression_pathkey(root, (Expr *) varexpr, NULL, opno, rel->relids, true);
+
+ return pathkeys;
+}
+
/****************************************************************************
* PATHKEYS AND SORT CLAUSES
****************************************************************************/
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 3bb5b8d..8839aed 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -4,13 +4,16 @@
* Routines to determine which TID conditions are usable for scanning
* a given relation, and create TidPaths accordingly.
*
- * What we are looking for here is WHERE conditions of the form
- * "CTID = pseudoconstant", which can be implemented by just fetching
- * the tuple directly via heap_fetch(). We can also handle OR'd conditions
- * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr
- * conditions of the form CTID = ANY(pseudoconstant_array). In particular
- * this allows
- * WHERE ctid IN (tid1, tid2, ...)
+ * What we are looking for here is WHERE conditions of the forms:
+ * - "CTID = c", which can be implemented by just fetching
+ * the tuple directly via heap_fetch().
+ * - "CTID IN (pseudoconstant, ...)" or "CTID = ANY(pseudoconstant_array)"
+ * - "CTID > pseudoconstant", etc. for >, >=, <, and <=.
+ * - "CTID > pseudoconstant AND CTID < pseudoconstant", etc., with up to one
+ * lower bound and one upper bound.
+ *
+ * We can also handle OR'd conditions of the above form, such as
+ * "(CTID = const1) OR (CTID >= const2) OR CTID IN (...)".
*
* We also support "WHERE CURRENT OF cursor" conditions (CurrentOfExpr),
* which amount to "CTID = run-time-determined-TID". These could in
@@ -46,32 +49,46 @@
#include "optimizer/restrictinfo.h"
-static bool IsTidEqualClause(OpExpr *node, int varno);
+static bool IsTidVar(Var *var, int varno);
+static bool IsTidComparison(OpExpr *node, int varno, Oid expected_comparison_operator);
static bool IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno);
+static bool IsUsableRangeQual(Node *expr, int varno, bool want_lower_bound);
+static List *MakeTidRangeQuals(List *quals);
+static List *TidCompoundRangeQualFromExpr(Node *expr, int varno);
static List *TidQualFromExpr(Node *expr, int varno);
static List *TidQualFromBaseRestrictinfo(RelOptInfo *rel);
+static bool
+IsTidVar(Var *var, int varno)
+{
+ return (var->varattno == SelfItemPointerAttributeNumber &&
+ var->vartype == TIDOID &&
+ var->varno == varno &&
+ var->varlevelsup == 0);
+}
+
/*
* Check to see if an opclause is of the form
- * CTID = pseudoconstant
+ * CTID OP pseudoconstant
* or
- * pseudoconstant = CTID
+ * pseudoconstant OP CTID
+ * where OP is the expected comparison operator.
*
* We check that the CTID Var belongs to relation "varno". That is probably
* redundant considering this is only applied to restriction clauses, but
* let's be safe.
*/
static bool
-IsTidEqualClause(OpExpr *node, int varno)
+IsTidComparison(OpExpr *node, int varno, Oid expected_comparison_operator)
{
Node *arg1,
*arg2,
*other;
Var *var;
- /* Operator must be tideq */
- if (node->opno != TIDEqualOperator)
+ /* Operator must be the expected one */
+ if (node->opno != expected_comparison_operator)
return false;
if (list_length(node->args) != 2)
return false;
@@ -83,19 +100,13 @@ IsTidEqualClause(OpExpr *node, int varno)
if (arg1 && IsA(arg1, Var))
{
var = (Var *) arg1;
- if (var->varattno == SelfItemPointerAttributeNumber &&
- var->vartype == TIDOID &&
- var->varno == varno &&
- var->varlevelsup == 0)
+ if (IsTidVar(var, varno))
other = arg2;
}
if (!other && arg2 && IsA(arg2, Var))
{
var = (Var *) arg2;
- if (var->varattno == SelfItemPointerAttributeNumber &&
- var->vartype == TIDOID &&
- var->varno == varno &&
- var->varlevelsup == 0)
+ if (IsTidVar(var, varno))
other = arg1;
}
if (!other)
@@ -110,6 +121,17 @@ IsTidEqualClause(OpExpr *node, int varno)
return true; /* success */
}
+#define IsTidEqualClause(node, varno) IsTidComparison(node, varno, TIDEqualOperator)
+#define IsTidLTClause(node, varno) IsTidComparison(node, varno, TIDLessOperator)
+#define IsTidLEClause(node, varno) IsTidComparison(node, varno, TIDLessEqOperator)
+#define IsTidGTClause(node, varno) IsTidComparison(node, varno, TIDGreaterOperator)
+#define IsTidGEClause(node, varno) IsTidComparison(node, varno, TIDGreaterEqOperator)
+
+#define IsTidRangeClause(node, varno) (IsTidLTClause(node, varno) || \
+ IsTidLEClause(node, varno) || \
+ IsTidGTClause(node, varno) || \
+ IsTidGEClause(node, varno))
+
/*
* Check to see if a clause is of the form
* CTID = ANY (pseudoconstant_array)
@@ -134,10 +156,7 @@ IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno)
{
Var *var = (Var *) arg1;
- if (var->varattno == SelfItemPointerAttributeNumber &&
- var->vartype == TIDOID &&
- var->varno == varno &&
- var->varlevelsup == 0)
+ if (IsTidVar(var, varno))
{
/* The other argument must be a pseudoconstant */
if (is_pseudo_constant_clause(arg2))
@@ -149,6 +168,76 @@ IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno)
}
/*
+ * IsUsableRangeQual
+ * Check if the expr is range qual of the expected type.
+ */
+static bool
+IsUsableRangeQual(Node *expr, int varno, bool want_lower_bound)
+{
+ if (is_opclause(expr) && IsTidRangeClause((OpExpr *) expr, varno))
+ {
+ bool is_lower_bound = IsTidGTClause((OpExpr *) expr, varno) || IsTidGEClause((OpExpr *) expr, varno);
+ Node *leftop = get_leftop((Expr *) expr);
+
+ if (!IsA(leftop, Var) ||!IsTidVar((Var *) leftop, varno))
+ is_lower_bound = !is_lower_bound;
+
+ if (is_lower_bound == want_lower_bound)
+ return true;
+ }
+
+ return false;
+}
+
+static List *
+MakeTidRangeQuals(List *quals)
+{
+ if (list_length(quals) == 1)
+ return quals;
+ else
+ return list_make1(make_andclause(quals));
+}
+
+/*
+ * TidCompoundRangeQualFromExpr
+ *
+ * Extract a compound CTID range condition from the given qual expression
+ */
+static List *
+TidCompoundRangeQualFromExpr(Node *expr, int varno)
+{
+ List *rlst = NIL;
+ ListCell *l;
+ bool found_lower = false;
+ bool found_upper = false;
+ List *found_quals = NIL;
+
+ foreach(l, ((BoolExpr *) expr)->args)
+ {
+ Node *clause = (Node *) lfirst(l);
+
+ /* Check if this clause contains a range qual */
+ if (!found_lower && IsUsableRangeQual(clause, varno, true))
+ {
+ found_lower = true;
+ found_quals = lappend(found_quals, clause);
+ }
+
+ if (!found_upper && IsUsableRangeQual(clause, varno, false))
+ {
+ found_upper = true;
+ found_quals = lappend(found_quals, clause);
+ }
+ }
+
+ /* If one or both range quals was specified, use them. */
+ if (found_quals)
+ rlst = MakeTidRangeQuals(found_quals);
+
+ return rlst;
+}
+
+/*
* Extract a set of CTID conditions from the given qual expression
*
* Returns a List of CTID qual expressions (with implicit OR semantics
@@ -174,6 +263,8 @@ TidQualFromExpr(Node *expr, int varno)
/* base case: check for tideq opclause */
if (IsTidEqualClause((OpExpr *) expr, varno))
rlst = list_make1(expr);
+ else if (IsTidRangeClause((OpExpr *) expr, varno))
+ rlst = list_make1(expr);
}
else if (expr && IsA(expr, ScalarArrayOpExpr))
{
@@ -189,11 +280,18 @@ TidQualFromExpr(Node *expr, int varno)
}
else if (and_clause(expr))
{
- foreach(l, ((BoolExpr *) expr)->args)
+ /* look for a range qual in the clause */
+ rlst = TidCompoundRangeQualFromExpr(expr, varno);
+
+ /* if no range qual was found, look for any other TID qual */
+ if (!rlst)
{
- rlst = TidQualFromExpr((Node *) lfirst(l), varno);
- if (rlst)
- break;
+ foreach(l, ((BoolExpr *) expr)->args)
+ {
+ rlst = TidQualFromExpr((Node *) lfirst(l), varno);
+ if (rlst)
+ break;
+ }
}
}
else if (or_clause(expr))
@@ -217,17 +315,28 @@ TidQualFromExpr(Node *expr, int varno)
}
/*
- * Extract a set of CTID conditions from the rel's baserestrictinfo list
+ * Extract a set of CTID conditions from the rel's baserestrictinfo list
+ *
+ * Normally we just use the first RestrictInfo item with some usable quals,
+ * but it's also possible for a good compound range qual, such as
+ * "CTID > ? AND CTID < ?", to be split across two items. So we look for
+ * lower/upper bound range quals in all items and use them if any were found.
+ * In principal there might be more than one lower or upper bound), but we
+ * just use the first one found of each type.
*/
static List *
TidQualFromBaseRestrictinfo(RelOptInfo *rel)
{
List *rlst = NIL;
ListCell *l;
+ bool found_lower = false;
+ bool found_upper = false;
+ List *found_quals = NIL;
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ Node *clause = (Node *) rinfo->clause;
/*
* If clause must wait till after some lower-security-level
@@ -236,10 +345,31 @@ TidQualFromBaseRestrictinfo(RelOptInfo *rel)
if (!restriction_is_securely_promotable(rinfo, rel))
continue;
- rlst = TidQualFromExpr((Node *) rinfo->clause, rel->relid);
+ /* Look for lower and upper bound range quals. */
+ if (!found_lower && IsUsableRangeQual((Node *) clause, rel->relid, true))
+ {
+ found_lower = true;
+ found_quals = lappend(found_quals, clause);
+ continue;
+ }
+
+ if (!found_upper && IsUsableRangeQual((Node *) clause, rel->relid, false))
+ {
+ found_upper = true;
+ found_quals = lappend(found_quals, clause);
+ continue;
+ }
+
+ /* Look for other TID quals. */
+ rlst = TidQualFromExpr((Node *) clause, rel->relid);
if (rlst)
break;
}
+
+ /* Use a range qual if any were found. */
+ if (found_quals)
+ rlst = MakeTidRangeQuals(found_quals);
+
return rlst;
}
@@ -247,12 +377,16 @@ TidQualFromBaseRestrictinfo(RelOptInfo *rel)
* create_tidscan_paths
* Create paths corresponding to direct TID scans of the given rel.
*
+ * Path keys and direction will be set on the scans if it looks useful.
+ *
* Candidate paths are added to the rel's pathlist (using add_path).
*/
void
create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
{
Relids required_outer;
+ List *pathkeys = NULL;
+ ScanDirection direction = ForwardScanDirection;
List *tidquals;
/*
@@ -262,9 +396,37 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
*/
required_outer = rel->lateral_relids;
+ /*
+ * Try to determine the best scan direction and create some useful
+ * pathkeys.
+ */
+ if (has_useful_pathkeys(root, rel))
+ {
+ /*
+ * Build path keys corresponding to ORDER BY ctid ASC, and check
+ * whether they will be useful for this scan. If not, build path keys
+ * for DESC, and try that; set the direction to BackwardScanDirection
+ * if so. If neither of them will be useful, no path keys will be
+ * set.
+ */
+ pathkeys = build_tidscan_pathkeys(root, rel, ForwardScanDirection);
+ if (!pathkeys_contained_in(pathkeys, root->query_pathkeys))
+ {
+ pathkeys = build_tidscan_pathkeys(root, rel, BackwardScanDirection);
+ if (pathkeys_contained_in(pathkeys, root->query_pathkeys))
+ direction = BackwardScanDirection;
+ else
+ pathkeys = NULL;
+ }
+ }
+
tidquals = TidQualFromBaseRestrictinfo(rel);
- if (tidquals)
- add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals,
- required_outer));
+ /*
+ * If there are tidquals or some useful pathkeys were found, then it's
+ * worth generating a tidscan path.
+ */
+ if (tidquals || pathkeys)
+ add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, pathkeys,
+ direction, required_outer));
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index ae41c9e..5452730 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -185,7 +185,7 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
List *bitmapqualorig,
Index scanrelid);
static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
- List *tidquals);
+ List *tidquals, ScanDirection direction);
static SubqueryScan *make_subqueryscan(List *qptlist,
List *qpqual,
Index scanrelid,
@@ -3086,6 +3086,21 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
}
/*
+ * In the case of a compound range qual, the two parts may have come
+ * from different RestrictInfos. So remove each part separately.
+ */
+ if (list_length(tidquals) == 1)
+ {
+ Node *qual = linitial(tidquals);
+
+ if (and_clause(qual))
+ {
+ BoolExpr *and_qual = ((BoolExpr *) qual);
+ scan_clauses = list_difference(scan_clauses, and_qual->args);
+ }
+ }
+
+ /*
* Remove any clauses that are TID quals. This is a bit tricky since the
* tidquals list has implicit OR semantics.
*/
@@ -3097,7 +3112,9 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
scan_plan = make_tidscan(tlist,
scan_clauses,
scan_relid,
- tidquals);
+ tidquals,
+ best_path->direction
+ );
copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
@@ -5179,7 +5196,8 @@ static TidScan *
make_tidscan(List *qptlist,
List *qpqual,
Index scanrelid,
- List *tidquals)
+ List *tidquals,
+ ScanDirection direction)
{
TidScan *node = makeNode(TidScan);
Plan *plan = &node->scan.plan;
@@ -5190,6 +5208,7 @@ make_tidscan(List *qptlist,
plan->righttree = NULL;
node->scan.scanrelid = scanrelid;
node->tidquals = tidquals;
+ node->direction = direction;
return node;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c5aaaf5..e2d51a9 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1186,6 +1186,7 @@ create_bitmap_or_path(PlannerInfo *root,
*/
TidPath *
create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
+ List *pathkeys, ScanDirection direction,
Relids required_outer)
{
TidPath *pathnode = makeNode(TidPath);
@@ -1198,9 +1199,10 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = 0;
- pathnode->path.pathkeys = NIL; /* always unordered */
+ pathnode->path.pathkeys = pathkeys;
pathnode->tidquals = tidquals;
+ pathnode->direction = direction;
cost_tidscan(&pathnode->path, root, rel, tidquals,
pathnode->path.param_info);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index b8c0e03..eaacab7 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -572,6 +572,30 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
if (!HeapTupleIsValid(vardata->statsTuple))
{
+ /*
+ * There are no stats for system columns, but for CTID we can estimate
+ * based on table size.
+ */
+ if (vardata->var && IsA(vardata->var, Var) &&
+ ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
+ {
+ ItemPointer itemptr;
+ BlockNumber block;
+
+ /* If the relation's empty, we're going to read all of it. */
+ if (vardata->rel->pages == 0)
+ return 1.0;
+
+ itemptr = (ItemPointer) DatumGetPointer(constval);
+ block = ItemPointerGetBlockNumberNoCheck(itemptr);
+ selec = block / (double) vardata->rel->pages;
+ if (isgt)
+ selec = 1.0 - selec;
+
+ CLAMP_PROBABILITY(selec);
+ return selec;
+ }
+
/* no stats available, so default result */
return DEFAULT_INEQ_SEL;
}
@@ -1786,6 +1810,15 @@ nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
return (Selectivity) 0; /* keep compiler quiet */
}
}
+ else if (vardata.var && IsA(vardata.var, Var) &&
+ ((Var *) vardata.var)->varattno == SelfItemPointerAttributeNumber)
+ {
+ /*
+ * There are no stats for system columns, but we know CTID is never
+ * NULL.
+ */
+ selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
+ }
else
{
/*
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index d9b6bad..cdd2cd3 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -156,15 +156,15 @@
oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt',
oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' },
-{ oid => '2800', descr => 'greater than',
+{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than',
oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt',
oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' },
-{ oid => '2801', descr => 'less than or equal',
+{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal',
oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle',
oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' },
-{ oid => '2802', descr => 'greater than or equal',
+{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal',
oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge',
oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' },
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 03ad516..ee6a04d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1477,6 +1477,8 @@ typedef struct BitmapHeapScanState
ParallelBitmapHeapState *pstate;
} BitmapHeapScanState;
+typedef struct TidRange TidRange;
+
/* ----------------
* TidScanState information
*
@@ -1493,10 +1495,11 @@ typedef struct TidScanState
ScanState ss; /* its first field is NodeTag */
List *tss_tidexprs;
bool tss_isCurrentOf;
- int tss_NumTids;
+ int tss_NumRanges;
int tss_TidPtr;
- ItemPointerData *tss_TidList;
- HeapTupleData tss_htup;
+ TidRange *tss_TidRanges;
+ bool tss_inScan;
+ HeapTupleData tss_htup; /* for current-of and single TID fetches */
} TidScanState;
/* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 7c2abbd..96d30aa 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -492,6 +492,7 @@ typedef struct TidScan
{
Scan scan;
List *tidquals; /* qual(s) involving CTID = something */
+ ScanDirection direction;
} TidScan;
/* ----------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index adb4265..2fee1e1 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1229,14 +1229,24 @@ typedef struct BitmapOrPath
/*
* TidPath represents a scan by TID
*
- * tidquals is an implicitly OR'ed list of qual expressions of the form
- * "CTID = pseudoconstant" or "CTID = ANY(pseudoconstant_array)".
+ * tidquals is an implicitly OR'ed list of qual expressions of the forms:
+ * - "CTID = pseudoconstant"
+ * - "CTID = ANY(pseudoconstant_array)"
+ * - "CURRENT OF cursor"
+ * - "CTID relop pseudoconstant"
+ * - "(CTID relop pseudoconstant) AND (CTID relop pseudoconstant)"
+ *
+ * It is permissable for the CTID variable to be the LHS or RHS of operator
+ * expressions; in the last case, there is always a lower bound and upper bound,
+ * in any order. If tidquals is empty, all CTIDs will match.
+ *
* Note they are bare expressions, not RestrictInfos.
*/
typedef struct TidPath
{
Path path;
- List *tidquals; /* qual(s) involving CTID = something */
+ List *tidquals;
+ ScanDirection direction;
} TidPath;
/*
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 7c5ff22..a0a88a5 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -63,7 +63,8 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals);
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
- List *tidquals, Relids required_outer);
+ List *tidquals, List *pathkeys, ScanDirection direction,
+ Relids required_outer);
extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
Relids required_outer,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde30..9d0699e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -211,6 +211,9 @@ extern List *build_join_pathkeys(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
List *outer_pathkeys);
+extern List *build_tidscan_pathkeys(PlannerInfo *root,
+ RelOptInfo *rel,
+ ScanDirection direction);
extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
List *sortclauses,
List *tlist);
diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out
index 521ed1b..4b9564b 100644
--- a/src/test/regress/expected/tidscan.out
+++ b/src/test/regress/expected/tidscan.out
@@ -116,6 +116,39 @@ FETCH FIRST FROM c;
(1 row)
ROLLBACK;
+-- check that ordering on a tidscan doesn't require a sort
+EXPLAIN (COSTS OFF)
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid;
+ QUERY PLAN
+---------------------------------------------------------------
+ Tid Scan on tidscan
+ TID Cond: (ctid = ANY ('{"(0,2)","(0,1)","(0,3)"}'::tid[]))
+(2 rows)
+
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid;
+ ctid | id
+-------+----
+ (0,1) | 1
+ (0,2) | 2
+ (0,3) | 3
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC;
+ QUERY PLAN
+---------------------------------------------------------------
+ Tid Scan Backward on tidscan
+ TID Cond: (ctid = ANY ('{"(0,2)","(0,1)","(0,3)"}'::tid[]))
+(2 rows)
+
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC;
+ ctid | id
+-------+----
+ (0,3) | 3
+ (0,2) | 2
+ (0,1) | 1
+(3 rows)
+
-- tidscan via CURRENT OF
BEGIN;
DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan;
@@ -177,3 +210,315 @@ UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
ERROR: cursor "c" is not positioned on a row
ROLLBACK;
DROP TABLE tidscan;
+-- tests for tidrangescans
+CREATE TABLE tidrangescan(id integer, data text);
+INSERT INTO tidrangescan SELECT i,'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' FROM generate_series(1,1000) AS s(i);
+DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;;
+VACUUM tidrangescan;
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)';
+ ctid | data
+--------+----------------------------------------------------------------------------------
+ (0,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)';
+ QUERY PLAN
+------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid <= '(1,5)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)';
+ ctid | data
+--------+----------------------------------------------------------------------------------
+ (0,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (0,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (1,1) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (1,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (1,3) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (1,4) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (1,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(15 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid < '(0,0)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)';
+ ctid | data
+------+------
+(0 rows)
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid > '(9,8)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)';
+ ctid | data
+--------+----------------------------------------------------------------------------------
+ (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid;
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: ('(9,8)'::tid < ctid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid;
+ ctid | data
+--------+----------------------------------------------------------------------------------
+ (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)';
+ QUERY PLAN
+------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid >= '(9,8)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)';
+ ctid | data
+--------+----------------------------------------------------------------------------------
+ (9,8) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (9,9) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (9,10) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)';
+ QUERY PLAN
+--------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (ctid >= '(100,0)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)';
+ ctid | data
+------+------
+(0 rows)
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid;
+ QUERY PLAN
+----------------------------------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: ((ctid > '(4,4)'::tid) AND ('(4,7)'::tid >= ctid))
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid;
+ ctid | data
+-------+----------------------------------------------------------------------------------
+ (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)';
+ QUERY PLAN
+----------------------------------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: (('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid))
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)';
+ ctid | data
+-------+----------------------------------------------------------------------------------
+ (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(3 rows)
+
+-- combinations
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)';
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR (ctid = '(2,2)'::tid))
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)';
+ ctid | data
+-------+----------------------------------------------------------------------------------
+ (2,2) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo';
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------
+ Tid Scan on tidrangescan
+ TID Cond: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR (ctid = '(2,2)'::tid))
+ Filter: ((('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) OR ((ctid = '(2,2)'::tid) AND (data = 'foo'::text)))
+(3 rows)
+
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo';
+ ctid | data
+-------+----------------------------------------------------------------------------------
+ (4,5) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,6) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ (4,7) | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+(3 rows)
+
+-- ordering with no quals should use tid range scan
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan ORDER BY ctid ASC;
+ QUERY PLAN
+--------------------------
+ Tid Scan on tidrangescan
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan ORDER BY ctid DESC;
+ QUERY PLAN
+-----------------------------------
+ Tid Scan Backward on tidrangescan
+(1 row)
+
+-- min/max
+EXPLAIN (COSTS OFF)
+SELECT MIN(ctid) FROM tidrangescan;
+ QUERY PLAN
+--------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Tid Scan on tidrangescan
+ Filter: (ctid IS NOT NULL)
+(5 rows)
+
+SELECT MIN(ctid) FROM tidrangescan;
+ min
+-------
+ (0,1)
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT MAX(ctid) FROM tidrangescan;
+ QUERY PLAN
+-------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Tid Scan Backward on tidrangescan
+ Filter: (ctid IS NOT NULL)
+(5 rows)
+
+SELECT MAX(ctid) FROM tidrangescan;
+ max
+--------
+ (9,10)
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)';
+ QUERY PLAN
+-------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Tid Scan on tidrangescan
+ TID Cond: (ctid > '(5,0)'::tid)
+ Filter: (ctid IS NOT NULL)
+(6 rows)
+
+SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)';
+ min
+-------
+ (5,1)
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)';
+ QUERY PLAN
+-------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Tid Scan Backward on tidrangescan
+ TID Cond: (ctid < '(5,0)'::tid)
+ Filter: (ctid IS NOT NULL)
+(6 rows)
+
+SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)';
+ max
+--------
+ (4,10)
+(1 row)
+
+-- empty table
+CREATE TABLE tidrangescan_empty(id integer, data text);
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan_empty
+ TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)';
+ ctid | data
+------+------
+(0 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on tidrangescan_empty
+ TID Cond: (ctid > '(9,0)'::tid)
+(2 rows)
+
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)';
+ ctid | data
+------+------
+(0 rows)
+
diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql
index a8472e0..e9519ee 100644
--- a/src/test/regress/sql/tidscan.sql
+++ b/src/test/regress/sql/tidscan.sql
@@ -43,6 +43,15 @@ FETCH BACKWARD 1 FROM c;
FETCH FIRST FROM c;
ROLLBACK;
+-- check that ordering on a tidscan doesn't require a sort
+EXPLAIN (COSTS OFF)
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid;
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC;
+SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,2)', '(0,1)', '(0,3)']::tid[]) ORDER BY ctid DESC;
+
-- tidscan via CURRENT OF
BEGIN;
DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan;
@@ -64,3 +73,94 @@ UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
ROLLBACK;
DROP TABLE tidscan;
+
+-- tests for tidrangescans
+
+CREATE TABLE tidrangescan(id integer, data text);
+
+INSERT INTO tidrangescan SELECT i,'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' FROM generate_series(1,1000) AS s(i);
+DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;;
+VACUUM tidrangescan;
+
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)';
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(1,0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)';
+SELECT ctid, data FROM tidrangescan WHERE ctid <= '(1,5)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)';
+SELECT ctid, data FROM tidrangescan WHERE ctid < '(0,0)';
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)';
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(9,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid;
+SELECT ctid, data FROM tidrangescan WHERE '(9,8)' < ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)';
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(9,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)';
+SELECT ctid, data FROM tidrangescan WHERE ctid >= '(100,0)';
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid;
+SELECT ctid, data FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)';
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)';
+
+-- combinations
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)';
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo';
+SELECT ctid, data FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)' OR ctid = '(2,2)' AND data = 'foo';
+
+-- ordering with no quals should use tid range scan
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan ORDER BY ctid ASC;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan ORDER BY ctid DESC;
+
+-- min/max
+EXPLAIN (COSTS OFF)
+SELECT MIN(ctid) FROM tidrangescan;
+SELECT MIN(ctid) FROM tidrangescan;
+
+EXPLAIN (COSTS OFF)
+SELECT MAX(ctid) FROM tidrangescan;
+SELECT MAX(ctid) FROM tidrangescan;
+
+EXPLAIN (COSTS OFF)
+SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)';
+SELECT MIN(ctid) FROM tidrangescan WHERE ctid > '(5,0)';
+
+EXPLAIN (COSTS OFF)
+SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)';
+SELECT MAX(ctid) FROM tidrangescan WHERE ctid < '(5,0)';
+
+-- empty table
+CREATE TABLE tidrangescan_empty(id integer, data text);
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)';
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid < '(1, 0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)';
+SELECT ctid, data FROM tidrangescan_empty WHERE ctid > '(9, 0)';