diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c new file mode 100644 index 7549542..53be8ec *** a/src/backend/utils/adt/varlena.c --- b/src/backend/utils/adt/varlena.c *************** varstr_cmp(char *arg1, int len1, char *a *** 1405,1410 **** --- 1405,1438 ---- #endif } + /* + * Fast path: Attempt an entirely opportunistic "memcmp() == 0". + * + * In general there is no reason to be optimistic about being able to + * resolve the string comparison in this way. There are many usage + * patterns in which this fast path will never be taken, and some in + * which it will frequently be taken. When it is frequently taken, + * clearly the optimization is worthwhile, since strcoll() is known to + * be very expensive. When it is not taken, the overhead is completely + * negligible, and perhaps even immeasurably small, even in the worst + * case. We have nothing to lose and everything to gain. + * + * These counter-intuitive performance characteristics are likely + * explained by the dominant memory latency cost when performing a + * strcoll(); in practice, this may give the compiler and CPU + * sufficient leeway to order things in a way that hides the overhead + * of useless memcmp() calls. Sorting is very probably bottlenecked on + * memory bandwidth, and so it's worth it to use a few arithmetic + * instructions on the off chance that it results in a saving in memory + * bandwidth. In effect, this makes use of a capacity that would + * otherwise go unused. The same memory needed to be read into CPU + * cache at this juncture anyway. Modern CPUs can execute hundreds of + * instructions in the time taken to fetch a single cache line from + * main memory. + */ + if (len1 == len2 && memcmp(arg1, arg2, len1) == 0) + return 0; + #ifdef WIN32 /* Win32 does not have utf-8, so we need to map to UTF-16 */ if (GetDatabaseEncoding() == PG_UTF8) *************** bttextfastcmp_locale(Datum x, Datum y, S *** 1842,1847 **** --- 1870,1885 ---- len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); + /* + * Fast path: Attempt an entirely opportunistic memcmp() == 0, as + * explained within varstr_cmp() + */ + if (len1 == len2 && memcmp(a1p, a2p, len1) == 0) + { + result = 0; + goto done; + } + if (len1 >= tss->buflen1) { pfree(tss->buf1); *************** bttextfastcmp_locale(Datum x, Datum y, S *** 1875,1880 **** --- 1913,1919 ---- if (result == 0) result = strcmp(tss->buf1, tss->buf2); + done: /* We can't afford to leak memory here. */ if (PointerGetDatum(arg1) != x) pfree(arg1);