From c4f919d4d15ec362f157736ea7a4bb49a890a437 Mon Sep 17 00:00:00 2001 From: "Andrey M. Borodin" Date: Wed, 20 Mar 2024 22:30:14 +0500 Subject: [PATCH v31] Implement UUID v7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds function for UUID generation. Most important function here is uuidv7() which generates new UUID according to the new standard. For code readability this commit adds alias uuidv4() to function gen_random_uuid(). Author: Andrey Borodin Reviewed-by: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch Reviewed-by: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev Reviewed-by: Peter Eisentraut, Chris Travers, Lukas Fittl Reviewed-by: Michael Paquier, Masahiko Sawada, Stepan Neretin Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com --- doc/src/sgml/datatype.sgml | 2 +- doc/src/sgml/func.sgml | 21 ++- src/backend/utils/adt/uuid.c | 165 +++++++++++++++++++++-- src/include/catalog/pg_proc.dat | 11 +- src/include/portability/instr_time.h | 22 +++ src/port/win32gettimeofday.c | 17 +++ src/test/regress/expected/opr_sanity.out | 3 + src/test/regress/expected/uuid.out | 41 +++++- src/test/regress/sql/uuid.sql | 18 ++- 9 files changed, 283 insertions(+), 17 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index e0d33f12e1..3e6751d64c 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -4380,7 +4380,7 @@ SELECT to_tsvector( 'postgraduate' ), to_tsquery( 'postgres:*' ); The data type uuid stores Universally Unique Identifiers - (UUID) as defined by RFC 4122, + (UUID) as defined by RFC 9562, ISO/IEC 9834-8:2005, and related standards. (Some systems refer to this data type as a globally unique identifier, or GUID,GUID instead.) This diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 73979f20ff..03161b3f87 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14213,6 +14213,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple gen_random_uuid + + uuidv4 + + + + uuidv7 + + uuid_extract_timestamp @@ -14222,12 +14230,17 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple - PostgreSQL includes one function to generate a UUID: + PostgreSQL includes several functions to generate a UUID. gen_random_uuid () uuid +uuidv4 () uuid + + These functions return a version 4 (random) UUID. + +uuidv7 () uuid - This function returns a version 4 (random) UUID. This is the most commonly - used type of UUID and is appropriate for most applications. + This function returns a version 7 UUID (UNIX timestamp with 1ms precision + + randomly seeded counter + random). @@ -14251,7 +14264,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple uuid_extract_version (uuid) smallint This function extracts the version from a UUID of the variant described by - RFC 4122. For + RFC 9562. For other variants, this function returns null. For example, for a UUID generated by gen_random_uuid, this function will return 4. diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 5284d23dcc..6c5aee2e31 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -13,10 +13,13 @@ #include "postgres.h" +#include + #include "common/hashfn.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "port/pg_bswap.h" +#include "portability/instr_time.h" #include "utils/fmgrprotos.h" #include "utils/guc.h" #include "utils/sortsupport.h" @@ -37,6 +40,8 @@ static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); +static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version); +static inline uint64 get_real_time_ns_ascending(); Datum uuid_in(PG_FUNCTION_ARGS) @@ -401,6 +406,24 @@ uuid_hash_extended(PG_FUNCTION_ARGS) return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); } +/* + * Set magic numbers for a UUID variant 3 + * https://www.rfc-editor.org/rfc/rfc9562 + */ +static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version) +{ + /* set version field, top four bits */ + uuid->data[6] = (uuid->data[6] & 0x0f) | (version << 4); + /* set variant field, top two bits are 1, 0 */ + uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; +} + +/* + * Generate UUID version 4. + * + * All UUID bytes are filled with strong random numbers except version and + * variant bits. + */ Datum gen_random_uuid(PG_FUNCTION_ARGS) { @@ -413,20 +436,129 @@ gen_random_uuid(PG_FUNCTION_ARGS) /* * Set magic numbers for a "version 4" (pseudorandom) UUID, see - * http://tools.ietf.org/html/rfc4122#section-4.4 + * https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4 */ - uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */ - uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */ + uuid_set_version(uuid, 4); PG_RETURN_UUID_P(uuid); } -#define UUIDV1_EPOCH_JDATE 2299161 /* == date2j(1582,10,15) */ +/* + * Aquire nanosecond reading and ensure it is ascending (on this backend) + */ +static inline uint64 get_real_time_ns_ascending() +{ + static uint64 previous_ns = 0; + uint64 ns = get_real_time_ns(); + + /* minimum amount of ns that guarantees step of UUID increased clock precision */ +#define NS_PER_MS INT64CONST(1000000) +#define SUB_MILLISECOND_STEP ((NS_PER_MS / (1 << 12)) + 1) + if (previous_ns + SUB_MILLISECOND_STEP >= ns) + ns = previous_ns + SUB_MILLISECOND_STEP; + previous_ns = ns; + + return ns; +} + +/* + * Generate UUID version 7 per RFC 9562. + * + * Monotonicity (regarding generation on given backend) is ensured with method + * "Replace Leftmost Random Bits with Increased Clock Precision (Method 3)". + * We use 12 bits in "rand_a" bits to store 1/4096 fractions of millisecond. + * Usage of pg_testtime indicates that such precision is available on most + * systems. If timestamp is not advancing between two consecutive UUID + * generations, previous timestamp is incremented and used instead of current + * timestamp. + */ +static Datum +generate_uuidv7(uint64 ns) +{ + pg_uuid_t *uuid = palloc(UUID_LEN); + uint64 unix_ts_ms; + uint16 increased_clock_precision; + + unix_ts_ms = ns / NS_PER_MS; + + /* Fill in time part */ + uuid->data[0] = (unsigned char) (unix_ts_ms >> 40); + uuid->data[1] = (unsigned char) (unix_ts_ms >> 32); + uuid->data[2] = (unsigned char) (unix_ts_ms >> 24); + uuid->data[3] = (unsigned char) (unix_ts_ms >> 16); + uuid->data[4] = (unsigned char) (unix_ts_ms >> 8); + uuid->data[5] = (unsigned char) unix_ts_ms; + + /* sub-millisecond timestamp fraction (12 bits) */ + increased_clock_precision = ((ns % NS_PER_MS) * (1 << 12)) / NS_PER_MS; + + uuid->data[6] = (unsigned char) (increased_clock_precision >> 8); + uuid->data[7] = (unsigned char) (increased_clock_precision); + + /* fill everything after the increased clock precision with random bytes */ + if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate random values"))); + + /* + * Set magic numbers for a "version 7" (pseudorandom) UUID, see + * https://www.rfc-editor.org/rfc/rfc9562#name-version-field + */ + uuid_set_version(uuid, 7); + + PG_RETURN_UUID_P(uuid); +} + +/* + * Entry point for uuidv7() + */ +Datum +uuidv7(PG_FUNCTION_ARGS) +{ + return generate_uuidv7(get_real_time_ns_ascending()); +} + +/* + * Entry point for uuidv7(interval) + */ +Datum +uuidv7_interval(PG_FUNCTION_ARGS) +{ + uint64 ns = get_real_time_ns_ascending(); + /* + * We are given a time shift interval as an argument. + * The interval represent days, monthes and years, that are not fixed + * number of nanoseconds. To make correct computations we call + * timestamptz_pl_interval() with corresponding logic. This logic is + * implemented with microsecond precision. So we carry nanoseconds + * between computations. + */ + Interval *span = PG_GETARG_INTERVAL_P(0); + /* Convert time part of UUID to Timestamptz (ms since Postgres epoch) */ + TimestampTz ts = (TimestampTz) (ns / 1000) - + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; + + /* Copmute time shift */ + ts = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval, + TimestampTzGetDatum(ts), + IntervalPGetDatum(span))); + /* Convert TimestampTz back and carry nanoseconds. */ + ns = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC) + * 1000 + ns % 1000; + return generate_uuidv7(ns); +} + +/* + * Start of a Gregorian epoch == date2j(1582,10,15) + * We cast it to 64-bit because it's used in overflow-prone computations + */ +#define GREGORIAN_EPOCH_JDATE INT64CONST(2299161) /* * Extract timestamp from UUID. * - * Returns null if not RFC 4122 variant or not a version that has a timestamp. + * Returns null if not RFC 9562 variant or not a version that has a timestamp. */ Datum uuid_extract_timestamp(PG_FUNCTION_ARGS) @@ -436,7 +568,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS) uint64 tms; TimestampTz ts; - /* check if RFC 4122 variant */ + /* check if RFC 9562 variant */ if ((uuid->data[8] & 0xc0) != 0x80) PG_RETURN_NULL(); @@ -455,7 +587,22 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS) /* convert 100-ns intervals to us, then adjust */ ts = (TimestampTz) (tms / 10) - - ((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; + ((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; + PG_RETURN_TIMESTAMPTZ(ts); + } + + if (version == 7) + { + tms = (uuid->data[5]) + + (((uint64) uuid->data[4]) << 8) + + (((uint64) uuid->data[3]) << 16) + + (((uint64) uuid->data[2]) << 24) + + (((uint64) uuid->data[1]) << 32) + + (((uint64) uuid->data[0]) << 40); + + /* convert ms to us, then adjust */ + ts = (TimestampTz) (tms * 1000) - + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; PG_RETURN_TIMESTAMPTZ(ts); } @@ -467,7 +614,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS) /* * Extract UUID version. * - * Returns null if not RFC 4122 variant. + * Returns null if not RFC 9562 variant. */ Datum uuid_extract_version(PG_FUNCTION_ARGS) @@ -475,7 +622,7 @@ uuid_extract_version(PG_FUNCTION_ARGS) pg_uuid_t *uuid = PG_GETARG_UUID_P(0); uint16 version; - /* check if RFC 4122 variant */ + /* check if RFC 9562 variant */ if ((uuid->data[8] & 0xc0) != 0x80) PG_RETURN_NULL(); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index cbbe8acd38..3353e9d6e3 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9342,11 +9342,20 @@ { oid => '3432', descr => 'generate random UUID', proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v', prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '9895', descr => 'generate UUID version 4', + proname => 'uuidv4', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '9896', descr => 'generate UUID version 7', + proname => 'uuidv7', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' }, +{ oid => '9897', descr => 'generate UUID version 7 with a timestamp shifted on specific interval', + proname => 'uuidv7', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7_interval' }, { oid => '6342', descr => 'extract timestamp from UUID', proname => 'uuid_extract_timestamp', proleakproof => 't', prorettype => 'timestamptz', proargtypes => 'uuid', prosrc => 'uuid_extract_timestamp' }, -{ oid => '6343', descr => 'extract version from RFC 4122 UUID', +{ oid => '6343', descr => 'extract version from RFC 9562 UUID', proname => 'uuid_extract_version', proleakproof => 't', prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_extract_version' }, diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index e66ecf34cd..9acf547b64 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -194,4 +194,26 @@ GetTimerFrequency(void) #define INSTR_TIME_GET_MICROSEC(t) \ (INSTR_TIME_GET_NANOSEC(t) / NS_PER_US) +#ifndef WIN32 + +/* + * Read real time with high resolution. Trimmed to microseconds on MacOS. + */ +static inline uint64 get_real_time_ns() +{ + struct timespec tmp; + + clock_gettime(CLOCK_REALTIME, &tmp); + return tmp.tv_sec * 1000000000L + tmp.tv_nsec; +} +#else /* WIN32 */ + +/* + * Function to read real time with all available preciscion. + * Prototype-only, implementation in win32gettimeofday.c + */ +uint64 get_real_time_ns(); + +#endif + #endif /* INSTR_TIME_H */ diff --git a/src/port/win32gettimeofday.c b/src/port/win32gettimeofday.c index 1e00f7ee14..ec46cc00fd 100644 --- a/src/port/win32gettimeofday.c +++ b/src/port/win32gettimeofday.c @@ -41,6 +41,7 @@ static const unsigned __int64 epoch = UINT64CONST(116444736000000000); */ #define FILETIME_UNITS_PER_SEC 10000000L #define FILETIME_UNITS_PER_USEC 10 +#define FILETIME_UNITS_TO_NS 100L /* @@ -73,3 +74,19 @@ gettimeofday(struct timeval *tp, void *tzp) return 0; } + +/* + * Function to read real time with all available preciscion. + */ +uint64 +get_real_time_ns() +{ + FILETIME file_time; + ULARGE_INTEGER ularge; + + GetSystemTimePreciseAsFileTime(&file_time); + ularge.LowPart = file_time.dwLowDateTime; + ularge.HighPart = file_time.dwHighDateTime; + + return (ularge.QuadPart - epoch) * FILETIME_UNITS_TO_NS; +} diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 34a32bd11d..43e7180a16 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -878,6 +878,9 @@ crc32(bytea) crc32c(bytea) bytea_larger(bytea,bytea) bytea_smaller(bytea,bytea) +uuidv4() +uuidv7() +uuidv7(interval) -- restore normal output mode \a\t -- List of functions used by libpq's fe-lobj.c diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out index 6026e15ed3..aa6224e81b 100644 --- a/src/test/regress/expected/uuid.out +++ b/src/test/regress/expected/uuid.out @@ -168,6 +168,27 @@ SELECT count(DISTINCT guid_field) FROM guid1; 2 (1 row) +-- test of uuidv4() alias +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +SELECT count(DISTINCT guid_field) FROM guid1; + count +------- + 2 +(1 row) + +-- generation test for v7 +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7(INTERVAL '1 day')); +SELECT count(DISTINCT guid_field) FROM guid1; + count +------- + 3 +(1 row) + -- extract functions -- version SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111'); -- 5 @@ -188,8 +209,26 @@ SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111'); -- null (1 row) +SELECT uuid_extract_version(uuidv4()); --4 + uuid_extract_version +---------------------- + 4 +(1 row) + +SELECT uuid_extract_version(uuidv7()); --7 + uuid_extract_version +---------------------- + 7 +(1 row) + -- timestamp -SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector +SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1 + ?column? +---------- + t +(1 row) + +SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7 ?column? ---------- t diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql index c88f6d087a..eec7f160f8 100644 --- a/src/test/regress/sql/uuid.sql +++ b/src/test/regress/sql/uuid.sql @@ -85,6 +85,19 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid()); INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid()); SELECT count(DISTINCT guid_field) FROM guid1; +-- test of uuidv4() alias +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +SELECT count(DISTINCT guid_field) FROM guid1; + +-- generation test for v7 +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7(INTERVAL '1 day')); +SELECT count(DISTINCT guid_field) FROM guid1; + -- extract functions @@ -92,9 +105,12 @@ SELECT count(DISTINCT guid_field) FROM guid1; SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111'); -- 5 SELECT uuid_extract_version(gen_random_uuid()); -- 4 SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111'); -- null +SELECT uuid_extract_version(uuidv4()); --4 +SELECT uuid_extract_version(uuidv7()); --7 -- timestamp -SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector +SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1 +SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7 SELECT uuid_extract_timestamp(gen_random_uuid()); -- null SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111'); -- null -- 2.39.5 (Apple Git-154)