diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index b28766c..5b7e5db 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #ifdef HAVE_SYS_IPC_H #include @@ -46,6 +47,8 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ unsigned long UsedShmemSegID = 0; void *UsedShmemSegAddr = NULL; +static Size AnonymousShmemSize; +static PGShmemHeader *AnonymousShmem; static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void IpcMemoryDetach(int status, Datum shmaddr); @@ -220,6 +223,11 @@ IpcMemoryDetach(int status, Datum shmaddr) { if (shmdt(DatumGetPointer(shmaddr)) < 0) elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr)); + + /* If we mapped anonymous memory to reduce sysv shm pressure, dump it. */ + if (AnonymousShmem != NULL + && munmap(AnonymousShmem, AnonymousShmemSize) < 0) + elog(LOG, "munmap(%p) failed: %m", AnonymousShmem); } /****************************************************************************/ @@ -357,10 +365,52 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) PGShmemHeader *hdr; IpcMemoryId shmid; struct stat statbuf; + Size allocsize = size; /* Room for a header? */ Assert(size > MAXALIGN(sizeof(PGShmemHeader))); + /* + * Many systems allow only very limited amounts of POSIX shm by default. + * To avoid blowing out the limit, we allocate most of our shared memory + * via anonymous mmap, keeping a small POSIX segment just as an interlock + * to protect the data directory. But we can't do this in the EXEC_BACKEND + * case because such a mapping can't be reattached. + */ +#ifndef EXEC_BACKEND + { + long pagesize = sysconf(_SC_PAGE_SIZE); + + /* + * pagesize will, for practical purposes, always be a power of two. + * But just in case it isn't, we do it this way instead of using + * TYPEALIGN(). + */ + AnonymousShmemSize = size; + if (size % pagesize != 0) + AnonymousShmemSize += pagesize - (size % pagesize); + + /* + * According to the fine manual, MAP_SHARED can be used with + * MAP_ANONYMOUS only in Linux >= 2.4. What do we do about that? + */ + AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, +#ifdef MAP_ANONYMOUS + MAP_SHARED|MAP_ANONYMOUS, +#else + MAP_SHARED|MAP_ANON, +#endif + -1, 0); + if (AnonymousShmem == NULL) + ereport(FATAL, + (errmsg("could not map %lu bytes of anonymous shared memory: %m", + (unsigned long) AnonymousShmemSize))); + + /* Now we can allocate a minimal SHM block. */ + allocsize = sizeof(PGShmemHeader); + } +#endif + /* Make sure PGSharedMemoryAttach doesn't fail without need */ UsedShmemSegAddr = NULL; @@ -370,7 +420,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) for (NextShmemSegID++;; NextShmemSegID++) { /* Try to create new segment */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize); if (memAddress) break; /* successful create and attach */ @@ -409,7 +459,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) /* * Now try again to create the segment. */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize); if (memAddress) break; /* successful create and attach */ @@ -448,6 +498,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) UsedShmemSegAddr = memAddress; UsedShmemSegID = (unsigned long) NextShmemSegID; + /* + * If we're using an anonymous segment for offload purposes, we must + * return a pointer to that rather than the small, mostly fake POSIX + * segment. + */ + if (AnonymousShmem != NULL) + { + memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader)); + return AnonymousShmem; + } + return hdr; } @@ -516,6 +577,11 @@ PGSharedMemoryDetach(void) elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr); UsedShmemSegAddr = NULL; } + + /* If we mapped anonymous memory to reduce sysv shm pressure, dump it. */ + if (AnonymousShmem != NULL + && munmap(AnonymousShmem, AnonymousShmemSize) < 0) + elog(LOG, "munmap(%p) failed: %m", AnonymousShmem); }