From c679bb2fe98aa4a093a73d225b77a114d554a71b Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Sat, 3 Aug 2019 01:59:57 +0300 Subject: [PATCH 3/3] Avoid GIN full scan for non-empty ALL keys --- src/backend/access/gin/ginget.c | 3 +- src/backend/access/gin/ginscan.c | 56 ++++++++++++++++++++++++++++++++--- src/backend/utils/adt/selfuncs.c | 54 +++++++++++++++++---------------- src/include/access/gin_private.h | 2 ++ src/test/regress/expected/tsearch.out | 24 +++++++++++++++ src/test/regress/sql/tsearch.sql | 7 +++++ 6 files changed, 116 insertions(+), 30 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 65ed8b2..acc43c1 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -588,7 +588,8 @@ startScan(IndexScanDesc scan) uint32 i; for (i = 0; i < so->totalentries; i++) - startScanEntry(ginstate, so->entries[i], scan->xs_snapshot); + if (so->entries[i]->nrefs > 0) + startScanEntry(ginstate, so->entries[i], scan->xs_snapshot); if (GinFuzzySearchLimit > 0) { diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index f612e55..605cc5f 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -87,6 +87,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, queryCategory) == 0) { /* Successful match */ + prevEntry->nrefs++; return prevEntry; } } @@ -94,6 +95,9 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, /* Nope, create a new entry */ scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData)); + + scanEntry->nrefs = 1; + scanEntry->queryKey = queryKey; scanEntry->queryCategory = queryCategory; scanEntry->isPartialMatch = isPartialMatch; @@ -273,6 +277,8 @@ ginNewScanKey(IndexScanDesc scan) GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; bool hasNullQuery = false; + bool hasAllQuery = false; + bool hasNormalQuery = false; int numColsNeedNotNull = 0; bool colNeedsNotNull[INDEX_MAX_KEYS] = {0}; bool colImpliesNotNull[INDEX_MAX_KEYS] = {0}; @@ -390,6 +396,11 @@ ginNewScanKey(IndexScanDesc scan) if (searchMode != GIN_SEARCH_MODE_DEFAULT) hasNullQuery = true; + if (searchMode == GIN_SEARCH_MODE_ALL) + hasAllQuery = true; + else + hasNormalQuery = true; + /* * Create GinNullCategory representation. If the extractQueryFn * didn't create a nullFlags array, we assume everything is non-null. @@ -467,13 +478,50 @@ ginNewScanKey(IndexScanDesc scan) } } } - else if (numColsNeedNotNull > 0) + else { + if (numColsNeedNotNull > 0) + { + /* + * We use recheck instead of adding NOT_NULL entries to eliminate + * rows with NULL columns. + */ + so->forcedRecheck = true; + } + /* - * We use recheck instead of adding NOT_NULL entries to eliminate - * rows with NULL columns. + * If we have both ALL and normal keys, then remove ALL keys and + * force recheck. */ - so->forcedRecheck = true; + if (hasAllQuery && hasNormalQuery) + { + int nkeys = so->nkeys; + int j = 0; + + for (i = 0; i < nkeys; i++) + { + GinScanKey key = &so->keys[i]; + + if (key->searchMode == GIN_SEARCH_MODE_ALL) + { + /* Derefence key's entries */ + for (int e = 0; e < key->nentries; e++) + key->scanEntry[e]->nrefs--; + + so->nkeys--; + } + else + { + /* Move key */ + if (i != j) + so->keys[j] = so->keys[i]; + + j++; + } + } + + so->forcedRecheck = true; + } } } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1a9d76d..dcdd636 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6245,6 +6245,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, typedef struct { bool haveFullScan; + bool haveNormalScan; double partialEntries; double exactEntries; double searchEntries; @@ -6326,40 +6327,42 @@ gincost_pattern(IndexOptInfo *index, int indexcol, return false; } - if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_ALL) + if (searchMode == GIN_SEARCH_MODE_ALL) { /* * GIN does not emit scan entries for empty GIN_SEARCH_MODE_ALL keys, * and it can avoid full index scan if there are entries from other * keys, so we can skip setting of 'haveFullScan' flag. */ - return true; - } + if (nentries <= 0) + return true; - for (i = 0; i < nentries; i++) + counts->haveFullScan = true; + } + else { - /* - * For partial match we haven't any information to estimate number of - * matched entries in index, so, we just estimate it as 100 - */ - if (partial_matches && partial_matches[i]) - counts->partialEntries += 100; - else - counts->exactEntries++; + counts->haveNormalScan = true; - counts->searchEntries++; - } + for (i = 0; i < nentries; i++) + { + /* + * For partial match we haven't any information to estimate number of + * matched entries in index, so, we just estimate it as 100 + */ + if (partial_matches && partial_matches[i]) + counts->partialEntries += 100; + else + counts->exactEntries++; - if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) - { - /* Treat "include empty" like an exact-match item */ - counts->exactEntries++; - counts->searchEntries++; - } - else if (searchMode != GIN_SEARCH_MODE_DEFAULT) - { - /* It's GIN_SEARCH_MODE_ALL */ - counts->haveFullScan = true; + counts->searchEntries++; + } + + if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + { + /* Treat "include empty" like an exact-match item */ + counts->exactEntries++; + counts->searchEntries++; + } } return true; @@ -6719,7 +6722,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, return; } - if (counts.haveFullScan || indexQuals == NIL || counts.searchEntries <= 0) + if ((counts.haveFullScan && !counts.haveNormalScan) || + indexQuals == NIL || counts.searchEntries <= 0) { /* * Full index scan will be required. We treat this as if every key in diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index b0251f7..af05964 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -313,6 +313,8 @@ typedef struct GinScanKeyData typedef struct GinScanEntryData { + /* Number of references from GinScanKeys */ + int nrefs; /* query key and other information from extractQueryFn */ Datum queryKey; GinNullCategory queryCategory; diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 7af2899..3f19620 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -337,6 +337,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; 508 (1 row) +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM test_tsvector WHERE a @@ '!qh'; + QUERY PLAN +------------------------------------------------------------- + Bitmap Heap Scan on test_tsvector (actual rows=410 loops=1) + Recheck Cond: (a @@ '!''qh'''::tsquery) + Heap Blocks: exact=25 + -> Bitmap Index Scan on wowidx (actual rows=410 loops=1) + Index Cond: (a @@ '!''qh'''::tsquery) +(5 rows) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + QUERY PLAN +------------------------------------------------------------------------------ + Bitmap Heap Scan on test_tsvector (actual rows=60 loops=1) + Recheck Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) + Rows Removed by Index Recheck: 17 + Heap Blocks: exact=24 + -> Bitmap Index Scan on wowidx (actual rows=77 loops=1) + Index Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) +(6 rows) + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index ece80b9..54a5eef 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -111,6 +111,13 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme'; SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM test_tsvector WHERE a @@ '!qh'; + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); -- 2.7.4