Nuetrino Nuetrino - 3 months ago 8
Linux Question

How does limits on the shared memory work on Linux

I was looking into the Linux kernel limits on the shared memory

/proc/sys/kernel/shmall


specifies the maximum amount of pages that can be allocated. Considering this number as x and the page size as p. I assume that "x * p" bytes is the limit on the system wide shared memory.

Now I wrote a small program to create a shared memory segment and i attached to that shared memory segment twice as below

shm_id = shmget(IPC_PRIVATE, 4*sizeof(int), IPC_CREAT | 0666);

if (shm_id < 0) {
printf("shmget error\n");
exit(1);
}
printf("\n The shared memory created is %d",shm_id);

ptr = shmat(shm_id,NULL,0);
ptr_info = shmat(shm_id,NULL,0);


In the above program
ptr
and
ptr_info
were different. So the shared memory is mapped to 2 virtual addresses in my process address space.

When I do an
ipcs
it looks like this

...
0x00000000 1638416 sun 666 16000000 2
...


Now coming to the
shmall
limit
x * p
noted above in my question. Is this limit applicable on the sum of all the virtual memory allocated for every shared memory segment? or does this limit apply on the physical memory?

Physical memory is only one here (shared memory) and from the program above when I do 2
shmat
's there is twice the amount of memory allocated in my process address space. So this limit will hit soon if do continuous
shmat
's on a single shared memory segment?

Answer

The limit only applies to physical memory, that is the real shared memory allocated for all segments, because shmat() just maps that allocated segment into process address space.

You can trace it in the kernel, there is only one place where this limit is checked — in the newseg() function that allocates new segments (ns->shm_ctlall comparison). shmat() implementation is busy with a lot of stuff, but doesn't care at all about shmall limit, so you can map one segment as many times as you want to (well, address space is also limited, but in practice you rarely care about this limit).

You can also try some test from userspace with a simple program like this one:

#define _GNU_SOURCE
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <unistd.h>

unsigned long int get_shmall() {
        FILE *f = NULL;
        char buf[512];
        unsigned long int value = 0;

        if ((f = fopen("/proc/sys/kernel/shmall", "r")) != NULL) {
                if (fgets(buf, sizeof(buf), f) != NULL)
                        value = strtoul(buf, NULL, 10); // no proper checks                                                                                                                                                             
                fclose(f); // no return value check                                                                                                                                                                                     
        }
        return value;
}

int set_shmall(unsigned long int value) {
        FILE *f = NULL;
        char buf[512];
        int retval = 0;

        if ((f = fopen("/proc/sys/kernel/shmall", "w")) != NULL) {
                if (snprintf(buf, sizeof(buf), "%lu\n", value) >= sizeof(buf) ||
                    fwrite(buf, 1, strlen(buf), f) != strlen(buf))
                        retval = -1;
                fclose(f); // fingers crossed                                                                                                                                                                                           
        } else
                retval = -1;
        return retval;
}

int main()
{
        int shm_id1 = -1, shm_id2 = -1;
        unsigned long int shmall = 0, shmused, newshmall;
        void *ptr1, *ptr2;
        struct shm_info shminf;

        if ((shmall = get_shmall()) == 0) {
                printf("can't get shmall\n");
                goto out;
        }
        printf("original shmall: %lu pages\n", shmall);
        if (shmctl(0, SHM_INFO, (struct shmid_ds *)&shminf) < 0) {
                printf("can't get SHM_INFO\n");
                goto out;
        }
        shmused = shminf.shm_tot * getpagesize();
        printf("shmused: %lu pages (%lu bytes)\n", shminf.shm_tot, shmused);
        newshmall = shminf.shm_tot + 1;
        if (set_shmall(newshmall) != 0) {
                printf("can't set shmall\n");
                goto out;
        }
        if (get_shmall() != newshmall) {
                printf("something went wrong with shmall setting\n");
                goto out;
        }
        printf("new shmall: %lu pages (%lu bytes)\n", newshmall, newshmall * getpagesize());
        printf("shmget() for %u bytes: ", (unsigned int) getpagesize());
        shm_id1 = shmget(IPC_PRIVATE, (size_t)getpagesize(), IPC_CREAT | 0666);
        if (shm_id1 < 0) {
                printf("failed: %s\n", strerror(errno));
                goto out;
        }
        printf("ok\nshmat 1: ");
        ptr1 = shmat(shm_id1, NULL, 0);
        if (ptr1 == 0) {
                printf("failed\n");
                goto out;
        }
        printf("ok\nshmat 2: ");
        ptr2 = shmat(shm_id1, NULL, 0);
        if (ptr2 == 0) {
                printf("failed\n");
                goto out;
        }
        printf("ok\n");
        if (ptr1 == ptr2) {
                printf("ptr1 and ptr2 are the same with shm_id1\n");
                goto out;
        }
        printf("shmget() for %u bytes: ", (unsigned int) getpagesize());
        shm_id2 = shmget(IPC_PRIVATE, (size_t)getpagesize(), IPC_CREAT | 0666);
        if (shm_id2 < 0)
                printf("failed: %s\n", strerror(errno));
        else
                printf("ok, although it's wrong\n");
out:
        if (shmall != 0 && set_shmall(shmall) != 0)
                printf("failed to restrore shmall\n");

        if (shm_id1 >= 0 && shmctl(shm_id1, IPC_RMID, NULL) < 0)
                printf("failed to remove shm_id1\n");

        if (shm_id2 >= 0 && shmctl(shm_id2, IPC_RMID, NULL) < 0)
                printf("failed to remove shm_id2\n");

        return 0;
}

What is does is it sets the shmall limit just one page above what is currently used by the system, then tries to get page-sized new segment and map it twice (all successfully), then tries to get one more page-sized segment and fails to do that:

$ ./a.out 
original shmall: 18446744073708503040 pages
shmused: 21053 pages (86233088 bytes)
new shmall: 21054 pages (86237184 bytes)
shmget() for 4096 bytes: ok
shmat 1: ok
shmat 2: ok
shmget() for 4096 bytes: failed: No space left on device