diff --git a/debian/daos-client-tests.install b/debian/daos-client-tests.install index b1c31a4ea5c..67da4debbe9 100644 --- a/debian/daos-client-tests.install +++ b/debian/daos-client-tests.install @@ -16,6 +16,7 @@ usr/bin/daos_perf usr/bin/daos_racer usr/bin/daos_test usr/bin/dfs_test +usr/bin/shm_test usr/bin/jobtest usr/bin/crt_launch usr/bin/daos_gen_io_conf diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 0d98cae5aa2..0c044c418be 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -51,6 +51,10 @@ #include #include +#include +#include +#include + #include "hook.h" #include "pil4dfs_int.h" @@ -93,6 +97,12 @@ static int fd_dummy = -1; /* Default dir cache garbage collector time-out in seconds */ #define DCACHE_GC_PERIOD 120 +/* the pointer to global shared memory buffer across processes */ +//static struct d_shm_buf_loc *p_shm_buf; + +/* the hash table in shared memory for flock() across processes */ +static struct d_shm_ht_head *ht_head_flock; + /* the number of low fd reserved */ static uint16_t low_fd_count; /* the list of low fd reserved */ @@ -957,6 +967,8 @@ child_hdlr(void) { int rc; + shm_inc_ref(); + /* daos is not initialized yet */ if (atomic_load_relaxed(&d_daos_inited) == false) return; @@ -4562,6 +4574,8 @@ reset_daos_env_before_exec(void) { int rc; + shm_dec_ref(); + /* bash does fork(), then close opened files before exec(), * so the fd for log file probably is invalid now. */ @@ -6608,7 +6622,13 @@ posix_fadvise64(int fd, off_t offset, off_t len, int advice) int flock(int fd, int operation) { - int fd_directed; + int fd_directed; + daos_obj_id_t obj_id; + /* obj_id.hi + obj_id.low + item_name */ + char key[DFS_MAX_NAME + sizeof(daos_obj_id_t)*2]; + pthread_rwlock_t *p_locks; + int rc; + struct shm_ht_rec *link; if (next_flock == NULL) { next_flock = dlsym(RTLD_NEXT, "flock"); @@ -6623,11 +6643,32 @@ flock(int fd, int operation) if (d_compatible_mode && fd < FD_FILE_BASE) return next_flock(fd, operation); - /* We output the message only if env "D_IL_REPORT" is set. */ - if (report) - DS_ERROR(ENOTSUP, "flock() is not implemented yet"); - errno = ENOTSUP; - return -1; + if (shm_inited() == false) + /* shared memory is not properly setup */ + return ENOTSUP; + + rc = dfs_obj2id(d_file_list[fd_directed - FD_FILE_BASE]->file, &obj_id); + D_ASSERT(rc == 0); + rc = snprintf(key, DFS_MAX_NAME + sizeof(daos_obj_id_t), "%lx%lx%s", obj_id.hi, obj_id.lo, + d_file_list[fd_directed - FD_FILE_BASE]->item_name); + D_ASSERT((rc < (DFS_MAX_NAME + sizeof(daos_obj_id_t)*2)) && (rc > 0)); + p_locks = (pthread_rwlock_t *)shm_ht_rec_find_insert(ht_head_flock, key, rc, + KEY_VALUE_PTHREAD_RWLOCK, sizeof(pthread_rwlock_t), &link); + + if (operation == LOCK_SH) { + /* use read lock to mimic shared file lock */ + rc = pthread_rwlock_rdlock(p_locks); + } else if (operation == LOCK_EX) { + /* use write lock to mimic exclusive file lock */ + rc = pthread_rwlock_wrlock(p_locks); + } else if (operation == LOCK_UN) { + rc = pthread_rwlock_unlock(p_locks); + } else { + errno = EINVAL; + return -1; + } + + return rc; } int @@ -7133,6 +7174,27 @@ init_myhook(void) else daos_debug_inited = true; + rc = shm_init(); + if (rc == 0) { + rc = shm_ht_create("shm_ht_flock", 8, 16, &ht_head_flock); + if (rc) { + /* decrease shared memory reference */ + shm_dec_ref(); + DS_ERROR(rc, "failed to create shm_ht_flock in shared memory"); + } + } else { + /* shared memory cache will not be used. */ + DS_ERROR(rc, "failed to initialize shared memory"); + } + + rc = d_agetenv_str(&env_log, "D_IL_REPORT"); + if (env_log) { + report = true; + if (strncmp(env_log, "0", 2) == 0 || strncasecmp(env_log, "false", 6) == 0) + report = false; + d_freeenv_str(&env_log); + } + d_compatible_mode = false; d_getenv_bool("D_IL_COMPATIBLE", &d_compatible_mode); @@ -7383,6 +7445,9 @@ finalize_myhook(void) int rc; d_list_t *rlink; + if (shm_inited()) + shm_dec_ref(); + if (bypass) return; diff --git a/src/gurt/SConscript b/src/gurt/SConscript index 389ec182361..4e302cd12ab 100644 --- a/src/gurt/SConscript +++ b/src/gurt/SConscript @@ -5,7 +5,8 @@ """Build libgurt""" SRC = ['debug.c', 'dlog.c', 'hash.c', 'misc.c', 'heap.c', 'errno.c', - 'fault_inject.c', 'slab.c', 'telemetry.c', 'hlc.c', 'hlct.c', 'signals.c'] + 'fault_inject.c', 'slab.c', 'telemetry.c', 'hlc.c', 'hlct.c', 'signals.c', + 'shm_alloc.c', 'shm_dict.c', 'shm_tlsf.c', 'shm_utils.c'] def scons(): @@ -19,7 +20,7 @@ def scons(): denv = env.Clone() - denv.AppendUnique(LIBS=['pthread', 'yaml', 'm', 'dl']) + denv.AppendUnique(LIBS=['pthread', 'yaml', 'm', 'dl', 'rt']) denv.require('uuid') gurt_targets = denv.SharedObject(SRC) diff --git a/src/gurt/shm_alloc.c b/src/gurt/shm_alloc.c new file mode 100644 index 00000000000..875a7773b63 --- /dev/null +++ b/src/gurt/shm_alloc.c @@ -0,0 +1,285 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* the name of shared memory used for mmap which will be found under /dev/shm/ */ +#define daos_shm_name "daos_shm_cache" + +/* the memory allocator that will be used to handle small memory allocation */ +static __thread int idx_small = -1; + +/* the address of shared memory region */ +struct d_shm_alloc *d_shm_head; + +/* the attribute set for rwlock located inside shared memory */ +pthread_rwlockattr_t d_shm_rwlock_attr; + +/* the attribute set for mutex located inside shared memory */ +pthread_mutexattr_t d_shm_mutex_attr; + +/* the number of times to try calling shm_open() */ +#define RETRY (5) + +/** + * pid of the process who creates shared memory region. shared memory is NOT unmapped when this + * process exits to keep shm always available. shared memory is unmapped when other processes + * exit. + */ +static int pid_shm_creator; + +#ifndef PTHREAD_MUTEXATTR_FLAG_ROBUST +#define PTHREAD_MUTEXATTR_FLAG_ROBUST 0x40000000 +#endif + +#ifndef PTHREAD_MUTEXATTR_FLAG_PSHARED +#define PTHREAD_MUTEXATTR_FLAG_PSHARED 0x80000000 +#endif + +int +shm_init(void) +{ + int i; + int shm_ht_fd; + int shmopen_perm = 0600; + void *shm_addr; + int rc; + char daos_shm_name_buf[64]; + + /* shared memory already initlized in current process */ + if (d_shm_head) + return 0; + + rc = pthread_rwlockattr_init(&d_shm_rwlock_attr); + D_ASSERT(rc == 0); + rc = pthread_rwlockattr_setpshared(&d_shm_rwlock_attr, PTHREAD_PROCESS_SHARED); + D_ASSERT(rc == 0); + + rc = pthread_mutexattr_init(&d_shm_mutex_attr); + D_ASSERT(rc == 0); + rc = pthread_mutexattr_settype(&d_shm_mutex_attr, PTHREAD_MUTEX_ERRORCHECK); + D_ASSERT(rc == 0); + rc = pthread_mutexattr_setpshared(&d_shm_mutex_attr, PTHREAD_PROCESS_SHARED); + D_ASSERT(rc == 0); + pthread_mutexattr_setrobust(&d_shm_mutex_attr, PTHREAD_MUTEX_ROBUST); + D_ASSERT(rc == 0); + + /* the shared memory only accessible for individual user for now */ + sprintf(daos_shm_name_buf, "%s_%d", daos_shm_name, getuid()); + shm_ht_fd = shm_open(daos_shm_name_buf, O_RDWR, 0600); + /* failed to open */ + if(shm_ht_fd == -1) { + if (errno == ENOENT) { + goto create_shm; + } else { + DS_ERROR(errno, "unexpected error shm_open()"); + for (i = 0; i < RETRY; i++) { + usleep(5); + shm_ht_fd = shm_open(daos_shm_name_buf, O_RDWR, shmopen_perm); + if (shm_ht_fd >= 0) + break; + } + if (i >= RETRY) { + DS_ERROR(errno, "failed to open shared memory after %d retries", + RETRY); + goto err; + } + } + } + + /* map existing shared memory */ + shm_addr = mmap(FIXED_SHM_ADDR, SHM_SIZE_REQ, PROT_READ | PROT_WRITE, MAP_SHARED | + MAP_FIXED, shm_ht_fd, 0); + if (shm_addr != FIXED_SHM_ADDR) { + DS_ERROR(errno, "mmap failed to map at desired address"); + goto err; + } + d_shm_head = (struct d_shm_alloc *)shm_addr; + /* wait until the shared memory initlization finished */ + while (d_shm_head->magic != DSM_MAGIC) + usleep(1); + if (d_shm_head->size != SHM_SIZE_REQ) { + /* EBADRQC - Invalid request code */ + errno = EBADRQC; + DS_ERROR(errno, "unexpected shared memory size. Multiple versions of daos?"); + goto err_unmap; + } + atomic_fetch_add_relaxed(&(d_shm_head->ref_count), 1); + close(shm_ht_fd); + return 0; + +create_shm: + shm_ht_fd = shm_open(daos_shm_name_buf, O_RDWR | O_CREAT | O_EXCL, shmopen_perm); + /* failed to create */ + if(shm_ht_fd == -1) { + DS_ERROR(errno, "shm_open() failed to create shared memory"); + return errno; + } + + if (ftruncate(shm_ht_fd, SHM_SIZE_REQ) != 0) { + DS_ERROR(errno, "ftruncate() failed for shm_ht_fd"); + goto err; + } + /* map the shared memory at a fixed address for now. We will remove this limit later. */ + shm_addr = mmap(FIXED_SHM_ADDR, SHM_SIZE_REQ, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, shm_ht_fd, 0); + if (shm_addr != FIXED_SHM_ADDR) { + DS_ERROR(errno, "mmap() failed to map at desired address"); + goto err; + } + d_shm_head = (struct d_shm_alloc *)shm_addr; + for (i = 0; i < N_SHM_POOL; i++) { + d_shm_head->tlsf[i] = tlsf_create_with_pool(shm_addr + sizeof(struct d_shm_alloc) + + (i * SHM_POOL_SIZE), SHM_POOL_SIZE); + } + + if(pthread_mutex_init(&(d_shm_head->g_lock), &d_shm_mutex_attr) != 0) { + DS_ERROR(errno, "pthread_mutex_init() failed"); + goto err_unmap; + } + for (i = 0; i < N_SHM_POOL; i++) { + if(pthread_mutex_init(&(d_shm_head->mem_lock[i]), &d_shm_mutex_attr) != 0) { + DS_ERROR(errno, "pthread_mutex_init() failed"); + goto err_unmap; + } + } + if(pthread_mutex_init(&(d_shm_head->ht_lock), &d_shm_mutex_attr) != 0) { + DS_ERROR(errno, "pthread_mutex_init() failed"); + goto err_unmap; + } + + pid_shm_creator = getpid(); + d_shm_head->off_ht_head = INVALID_OFFSET; + + atomic_store_relaxed(&(d_shm_head->ref_count), 1); + d_shm_head->size = SHM_SIZE_REQ; + d_shm_head->magic = DSM_MAGIC; + /* initilization is finished now. */ + return 0; + +err_unmap: + d_shm_head = NULL; + munmap(shm_addr, SHM_SIZE_REQ); + +err: + close(shm_ht_fd); + return errno; +} + +static void * +shm_alloc_comm(size_t align, size_t size) +{ + int idx_allocator; + void *buf; + int tid; + uint32_t hash; + uint64_t oldref; + + if (idx_small < 0) { + tid = syscall(SYS_gettid); + hash = d_hash_string_u32((const char *)&tid, sizeof(int)); + /* choose a memory allocator based on tid */ + idx_small = hash % N_SHM_POOL; + } + idx_allocator = idx_small; + if (size >= LARGE_MEM) { + oldref = atomic_fetch_add_relaxed(&(d_shm_head->large_mem_count), 1); + /* pick the allocator for large memery request with round-robin */ + idx_allocator = oldref % N_SHM_POOL; + } + shm_mutex_lock(&(d_shm_head->mem_lock[idx_allocator])); + if (align == 0) + buf = tlsf_malloc(d_shm_head->tlsf[idx_allocator], size); + else + buf = tlsf_memalign(d_shm_head->tlsf[idx_allocator], align, size); + shm_mutex_unlock(&(d_shm_head->mem_lock[idx_allocator])); + + return buf; +} + +void * +shm_alloc(size_t size) +{ + return shm_alloc_comm(0, size); +} + +void * +shm_memalign(size_t align, size_t size) +{ + return shm_alloc_comm(align, size); +} + +void +shm_free(void *ptr) +{ + uint32_t idx_allocator; + + /* compare with the lower bound address of shared memory pool */ + if (ptr < d_shm_head->tlsf[0]) { + DS_WARN(EINVAL, "Out of range memory pointer for shm_free()\n"); + return; + } + + idx_allocator = ((uint64_t)ptr - (uint64_t)d_shm_head->tlsf[0]) / SHM_POOL_SIZE; + /* compare with the upper bound address of shared memory pool */ + if (idx_allocator >= N_SHM_POOL) { + DS_WARN(EINVAL, "Out of range memory pointer for shm_free()\n"); + return; + } + + shm_mutex_lock(&(d_shm_head->mem_lock[idx_allocator])); + tlsf_free(d_shm_head->tlsf[idx_allocator], ptr); + shm_mutex_unlock(&(d_shm_head->mem_lock[idx_allocator])); +} + +void +shm_destroy(void) +{ + char daos_shm_file_name[128]; + + sprintf(daos_shm_file_name, "/dev/shm/%s_%d", daos_shm_name, getuid()); + unlink(daos_shm_file_name); +} + +void +shm_dec_ref(void) +{ + D_ASSERT(d_shm_head != NULL); + atomic_fetch_add_relaxed(&(d_shm_head->ref_count), -1); + if (getpid() != pid_shm_creator) + munmap(d_shm_head, SHM_SIZE_REQ); + d_shm_head = NULL; +} + +void +shm_inc_ref(void) +{ + D_ASSERT(d_shm_head != NULL); + atomic_fetch_add_relaxed(&(d_shm_head->ref_count), 1); +} + +bool +shm_inited(void) +{ + if (d_shm_head == NULL) + return false; + if (d_shm_head->magic != DSM_MAGIC) + return false; + return true; +} diff --git a/src/gurt/shm_dict.c b/src/gurt/shm_dict.c new file mode 100644 index 00000000000..dbc6a59cc1d --- /dev/null +++ b/src/gurt/shm_dict.c @@ -0,0 +1,423 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* the address of shared memory region */ +extern struct d_shm_alloc *d_shm_head; + +/* the attribute set for rwlock located inside shared memory */ +extern pthread_rwlockattr_t d_shm_rwlock_attr; + +/* the attribute set for mutex located inside shared memory */ +extern pthread_mutexattr_t d_shm_mutex_attr; + +int +shm_ht_create(const char name[], int bits, int n_lock, struct d_shm_ht_head **ht_head) +{ + int i; + struct d_shm_ht_head *ht_head_loc; + long int *off_next; + long int offset; + pthread_mutex_t *p_locks; + int len_name; + int n_bucket; + + *ht_head = NULL; + len_name = strnlen(name, MAX_HT_NAME_LEN); + if (len_name >= MAX_HT_NAME_LEN) { + printf("hash table name is longer than %d bytes.\n", MAX_HT_NAME_LEN - 1); + return EINVAL; + } + + n_bucket = 1 << bits; + shm_mutex_lock(&(d_shm_head->ht_lock)); + + /* loop over existing hash tables to check whether it exists or not */ + if (d_shm_head->off_ht_head != INVALID_OFFSET) { + offset = d_shm_head->off_ht_head; + while (offset > 0) { + ht_head_loc = (struct d_shm_ht_head *)((char *)d_shm_head + offset); + if ((strncmp(name, ht_head_loc->ht_name, MAX_HT_NAME_LEN) == 0) && + (ht_head_loc->n_bucket == n_bucket) && + (ht_head_loc->n_lock == n_lock)) { + *ht_head = ht_head_loc; + break; + } + if (ht_head_loc->next == INVALID_OFFSET) { + *ht_head = NULL; + break; + } + offset = ht_head_loc->next; + } + } + + if (*ht_head) { + shm_mutex_unlock(&(d_shm_head->ht_lock)); + return 0; + } + + /* This hash table does not exist, then create it. */ + *ht_head = shm_alloc(sizeof(struct d_shm_ht_head) + (sizeof(pthread_mutex_t) * n_lock) + + (sizeof(long int) * n_bucket)); + if (*ht_head == NULL) + return ENOMEM; + ht_head_loc = *ht_head; + + memcpy(ht_head_loc->ht_name, name, len_name + 1); + ht_head_loc->n_bucket = n_bucket; + ht_head_loc->n_lock = n_lock; + + p_locks = (pthread_mutex_t *)((char *)ht_head_loc + sizeof(struct d_shm_ht_head)); + for (i = 0; i < n_lock; i++) { + if(pthread_mutex_init(&(p_locks[i]), &d_shm_mutex_attr) != 0) { + perror("pthread_mutex_init"); + return errno; + } + } + off_next = (long int *)((char *)ht_head_loc + sizeof(struct d_shm_ht_head) + + (sizeof(pthread_mutex_t) * n_lock)); + for (i = 0; i < n_bucket; i++) + off_next[i] = INVALID_OFFSET; + /* insert the new hash table as the first one */ + ht_head_loc->next = d_shm_head->off_ht_head; + d_shm_head->off_ht_head = (long int)((char *)ht_head_loc - (char *)d_shm_head); + + shm_mutex_unlock(&(d_shm_head->ht_lock)); + return 0; +} + +int +shm_ht_rec_delete(struct d_shm_ht_head *ht_head, const char *key, const int ksize) +{ + unsigned int hash; + unsigned int idx; + unsigned int idx_lock; + pthread_mutex_t *p_ht_lock; + long int off_next; + long int *p_off_list; + struct shm_ht_rec *rec; + struct shm_ht_rec *rec_prev = NULL; + struct shm_ht_rec *rec_next = NULL; + + hash = d_hash_string_u32(key, ksize); + idx = hash & (ht_head->n_bucket - 1); + idx_lock = (unsigned int)(idx * ht_head->n_lock * 1.0f / ht_head->n_bucket); + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + p_off_list = (long int *)((char *)p_ht_lock + sizeof(pthread_mutex_t) * ht_head->n_lock); + shm_mutex_lock(&(p_ht_lock[idx_lock])); + if (p_off_list[idx] < 0) { + /* empty bucket */ + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + return 0; + } + + /* loop over all records in this bucket to find the key */ + off_next = p_off_list[idx]; + while (off_next) { + rec = (struct shm_ht_rec *)((char *)d_shm_head + off_next); + if (ksize == rec->len_key) { + if (memcmp(key, (char *)rec + sizeof(struct shm_ht_rec), ksize) == 0) { + /* found the record for the key, then remove it from the link + * list. + */ + if (rec->prev != INVALID_OFFSET) { + rec_prev = (struct shm_ht_rec *)((char *)d_shm_head + rec->prev); + rec_prev->next = rec->next; + } + if (rec->next != INVALID_OFFSET) { + rec_next = (struct shm_ht_rec *)((char *)d_shm_head + rec->next); + rec_next->prev = rec->prev; + } + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + shm_free(rec); + + return 1; + } + } + off_next = rec->next; + } + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + + return 0; +} + +int +shm_ht_rec_delete_at(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link) +{ + int idx_lock = link->idx_lock; + struct shm_ht_rec *rec_prev = NULL; + struct shm_ht_rec *rec_next = NULL; + pthread_mutex_t *p_ht_lock; + + assert(link != NULL); + idx_lock = link->idx_lock; + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + + shm_mutex_lock(&(p_ht_lock[idx_lock])); + if (link->prev != INVALID_OFFSET) { + rec_prev = (struct shm_ht_rec *)((char *)d_shm_head + link->prev); + rec_prev->next = link->next; + } + if (link->next != INVALID_OFFSET) { + rec_next = (struct shm_ht_rec *)((char *)d_shm_head + link->next); + rec_next->prev = link->prev; + } + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + + shm_free(link); + + return 0; +} + +void +shm_ht_rec_decref(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link) +{ + int idx_lock = link->idx_lock; + pthread_mutex_t *p_ht_lock; + + // Use atomic OP instead???? + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + shm_mutex_lock(&(p_ht_lock[idx_lock])); + link->ref_count--; + shm_mutex_unlock(&(p_ht_lock[idx_lock])); +} + +void +shm_ht_rec_addref(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link) +{ + int idx_lock = link->idx_lock; + pthread_mutex_t *p_ht_lock; + + // Use atomic OP instead???? + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + shm_mutex_lock(&(p_ht_lock[idx_lock])); + link->ref_count++; + shm_mutex_unlock(&(p_ht_lock[idx_lock])); +} + +int +shm_ht_destroy(struct d_shm_ht_head *ht_head, int force) +{ + int i; + int n_bucket = ht_head->n_bucket; + int n_lock = ht_head->n_lock; + pthread_mutex_t *p_ht_lock; + long int off_next; + long int *p_off_list; + struct shm_ht_rec *rec; + struct d_shm_ht_head *ht_head_prev; + struct d_shm_ht_head *ht_head_next; + + /* free record in buckets of hash table */ + for (i = 0; i < n_bucket; i++) { + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + p_off_list = (long int *)((char *)p_ht_lock + sizeof(pthread_mutex_t) * n_lock); + shm_mutex_lock(&(p_ht_lock[i])); + + off_next = p_off_list[i]; + while (off_next != INVALID_OFFSET) { + rec = (struct shm_ht_rec *)((char *)d_shm_head + off_next); + p_off_list[i] = rec->next; + shm_free(rec); + off_next = p_off_list[i]; + } + + shm_mutex_unlock(&(p_ht_lock[i])); + } + + /* remove the hash table from link list */ + if (ht_head->prev != INVALID_OFFSET) { + ht_head_prev = (struct d_shm_ht_head *)((char *)d_shm_head + ht_head->prev); + ht_head_prev->next = ht_head->next; + } else { + /* this is the first hash table */ + d_shm_head->off_ht_head = ht_head->next; + } + + if (ht_head->next != INVALID_OFFSET) { + ht_head_next = (struct d_shm_ht_head *)((char *)d_shm_head + ht_head->next); + ht_head_next->prev = ht_head->prev; + } + + /* free hash table buckets and locks */ + shm_free(ht_head); + + return 0; +} + +int +get_ht_with_name(const char *name, struct d_shm_ht_head **ht_head) +{ + long int offset; + struct d_shm_ht_head *head; + + if (ht_head == NULL) + return EINVAL; + + *ht_head = NULL; + /* no hash table in shared memory region at all */ + if (d_shm_head->off_ht_head < 0) + return 0; + + offset = d_shm_head->off_ht_head; + while (offset > 0) { + head = (struct d_shm_ht_head *)((char *)d_shm_head + offset); + if (strncmp(name, head->ht_name, MAX_HT_NAME_LEN) == 0) { + *ht_head = head; + return 0; + } + if (head->next < 0) + /* reaching the end of link list and hash table with target name not found */ + return 0; + offset = head->next; + } + + return 0; +} + +void * +shm_ht_rec_find(struct d_shm_ht_head *ht_head, const char *key, const int len_key, struct shm_ht_rec + **link) +{ + unsigned int hash; + unsigned int idx; + unsigned int idx_lock; + pthread_mutex_t *p_ht_lock; + long int off_next; + long int *p_off_list; + struct shm_ht_rec *rec; + char *value = NULL; + + if (link) + *link = NULL; + hash = d_hash_string_u32(key, len_key); + idx = hash & (ht_head->n_bucket - 1); + idx_lock = (unsigned int)(idx * ht_head->n_lock * 1.0f / ht_head->n_bucket); + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + p_off_list = (long int *)((char *)p_ht_lock + sizeof(pthread_mutex_t) * ht_head->n_lock); + shm_mutex_lock(&(p_ht_lock[idx_lock])); + if (p_off_list[idx] < 0) { + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + return NULL; + } + off_next = p_off_list[idx]; + while (off_next) { + rec = (struct shm_ht_rec *)((char *)d_shm_head + off_next); + if (len_key == rec->len_key) { + if (memcmp(key, (char *)rec + sizeof(struct shm_ht_rec), len_key) == 0) { + value = (char *)rec + sizeof(struct shm_ht_rec) + len_key + rec->len_padding; + D_ASSERT(((uint64_t)value & (SHM_MEM_ALIGN - 1)) == 0); + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + if (link) + *link = rec; + return value; + } + } + off_next = rec->next; + } + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + return NULL; +} + +void * +shm_ht_rec_find_insert(struct d_shm_ht_head *ht_head, const char *key, const int len_key, + const char *val, const int len_value, struct shm_ht_rec **link) +{ + unsigned int hash; + unsigned int idx; + unsigned int idx_lock; + pthread_mutex_t *p_ht_lock; + long int off_next; + long int *p_off_list; + struct shm_ht_rec *rec = NULL; + struct shm_ht_rec *new_rec; + char *value = NULL; + + if (link) + *link = NULL; + hash = d_hash_string_u32(key, len_key); + idx = hash & (ht_head->n_bucket - 1); + idx_lock = (unsigned int)(idx * ht_head->n_lock * 1.0f / ht_head->n_bucket); + p_ht_lock = (pthread_mutex_t *)((char *)ht_head + sizeof(struct d_shm_ht_head)); + p_off_list = (long int *)((char *)p_ht_lock + sizeof(pthread_mutex_t) * ht_head->n_lock); + shm_mutex_lock(&(p_ht_lock[idx_lock])); + if (p_off_list[idx] != INVALID_OFFSET) { + off_next = p_off_list[idx]; + while (off_next != INVALID_OFFSET) { + rec = (struct shm_ht_rec *)((char *)d_shm_head + off_next); + if (len_key == rec->len_key) { + if (memcmp(key, (char *)rec + sizeof(struct shm_ht_rec), len_key) == 0) { + /* found the key, then return value */ + value = (char *)rec + sizeof(struct shm_ht_rec) + len_key + + rec->len_padding; + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + D_ASSERT(((uint64_t)value & (SHM_MEM_ALIGN - 1)) == 0); + if (link) + *link = rec; + return value; + } + } + off_next = rec->next; + } + } + /* record is not found. Insert it at the very beginning of the link list. */ + new_rec = (struct shm_ht_rec *)shm_memalign(SHM_MEM_ALIGN, sizeof(struct shm_ht_rec) + + len_key + len_value); + if (new_rec == NULL) { + errno = ENOMEM; + return NULL; + } + new_rec->len_key = len_key; + new_rec->len_padding = (len_key & (SHM_MEM_ALIGN - 1)) ? + (SHM_MEM_ALIGN - (len_key & (SHM_MEM_ALIGN - 1))) : 0; + new_rec->len_value = len_value; + new_rec->next = INVALID_OFFSET; + memcpy((char *)new_rec + sizeof(struct shm_ht_rec), key, len_key); + value = (char *)new_rec + sizeof(struct shm_ht_rec) + len_key + new_rec->len_padding; + D_ASSERT(((uint64_t)value & (SHM_MEM_ALIGN - 1)) == 0); + + if ((strcmp(val, KEY_VALUE_PTHREAD_LOCK) == 0) && (len_value == sizeof(pthread_mutex_t))) { + /* value holds a pthread mutex lock */ + if (pthread_mutex_init((pthread_mutex_t *)value, &d_shm_mutex_attr) != 0) { + perror("pthread_mutex_init"); + return NULL; + } + } else if ((strcmp(val, KEY_VALUE_PTHREAD_RWLOCK) == 0) && (len_value == sizeof(pthread_rwlock_t))) { + /* value holds a pthread read-write mutex lock */ + if (pthread_rwlock_init((pthread_rwlock_t *)value, &d_shm_rwlock_attr) != 0) { + perror("pthread_rwlock_init"); + return NULL; + } + } else { + /* set value */ + memcpy(value, val, len_value); + } + + if (rec == NULL) { + /* bucket is empty */ + p_off_list[idx] = (long int)((char *)new_rec - (char *)d_shm_head); + } else { + /* rec-> pre = INVALID_OFFSET for the first record */ + new_rec->prev = INVALID_OFFSET; + new_rec->next = p_off_list[idx]; + p_off_list[idx] = (long int)((char *)new_rec - (char *)d_shm_head); + } + new_rec->idx_lock = idx_lock; + + shm_mutex_unlock(&(p_ht_lock[idx_lock])); + if (link) + *link = rec; + return value; +} diff --git a/src/gurt/shm_tlsf.c b/src/gurt/shm_tlsf.c new file mode 100644 index 00000000000..eec6d73eb0e --- /dev/null +++ b/src/gurt/shm_tlsf.c @@ -0,0 +1,1324 @@ +#include +#include +#include +#include +#include +#include + +#include + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +static tlsf_t +tlsf_create(void* mem); + +/* Add/remove memory pools. */ +static pool_t +tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); + +/* Overheads/limits of internal structures. */ +static size_t +tlsf_size(void); +static size_t +tlsf_pool_overhead(void); + +/* +static void +tlsf_destroy(tlsf_t tlsf); +static void +tlsf_remove_pool(tlsf_t tlsf, pool_t pool); +static pool_t +tlsf_get_pool(tlsf_t tlsf); +// Returns internal block size, not original request size +static size_t +tlsf_block_size(void* ptr); +static size_t +tlsf_align_size(void); +static size_t +tlsf_block_size_min(void); +static size_t +tlsf_block_size_max(void); +static size_t +tlsf_alloc_overhead(void); +static int +tlsf_check_pool(pool_t pool); +// Returns nonzero if any internal consistency check fails. +static int +tlsf_check(tlsf_t tlsf); +// Debugging +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +static void +tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +*/ + + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* comment out the code used for debugging */ +#if 0 +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} +#endif + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + + +/* +size_t +tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int +tlsf_check_pool(pool_t pool) +{ + // Check that the blocks are physically correct. + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +size_t +tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t +tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t +tlsf_block_size_max(void) +{ + return block_size_max; +} + +size_t +tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} +*/ + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +/* +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} +*/ + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +/* +void +tlsf_destroy(tlsf_t tlsf) +{ +} + +pool_t +tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} +*/ + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + return block_prepare_used(control, block, adjust); +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + return block_prepare_used(control, block, adjust); +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + p = ptr; + } + } + + return p; +} diff --git a/src/gurt/shm_utils.c b/src/gurt/shm_utils.c new file mode 100644 index 00000000000..e6e31a94624 --- /dev/null +++ b/src/gurt/shm_utils.c @@ -0,0 +1,45 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include + +#include +#include + +int +shm_mutex_lock(pthread_mutex_t *mutex) +{ + int rc; + + rc = pthread_mutex_lock(mutex); + + if (rc == 0) + return rc; + + if (rc != EOWNERDEAD) + return rc; + + /* error EOWNERDEAD. */ + rc = pthread_mutex_consistent(mutex); + if (rc) { + DS_ERROR(rc, "pthread_mutex_consistent() failed"); + return rc; + } + rc = pthread_mutex_unlock(mutex); + if (rc) { + DS_ERROR(rc, "pthread_mutex_unlock() failed after pthread_mutex_consistent()"); + return rc; + } + /* now try lock again */ + return pthread_mutex_lock(mutex); +} + +int +shm_mutex_unlock(pthread_mutex_t *mutex) +{ + return pthread_mutex_unlock(mutex); +} diff --git a/src/gurt/tests/SConscript b/src/gurt/tests/SConscript index a773b12812a..86b3cbb4fb4 100644 --- a/src/gurt/tests/SConscript +++ b/src/gurt/tests/SConscript @@ -18,7 +18,7 @@ def scons(): test_env = env.Clone() test_env.require('mercury', 'uuid') - test_env.AppendUnique(LIBS=['pthread', 'cmocka', 'm', 'dl']) + test_env.AppendUnique(LIBS=['pthread', 'cmocka', 'm', 'dl', 'rt']) test_env.AppendUnique(CXXFLAGS=['-std=c++0x']) mocks_ld_script = f"{Dir('.').srcnode()}/mocks-gurt-ld-opts" diff --git a/src/include/gurt/shm_alloc.h b/src/include/gurt/shm_alloc.h new file mode 100644 index 00000000000..7ae43667350 --- /dev/null +++ b/src/include/gurt/shm_alloc.h @@ -0,0 +1,138 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_SHM_ALLOC_H__ +#define __DAOS_SHM_ALLOC_H__ + +#include +#include +#include + +/* default value for invalid offset pointer */ +#define INVALID_OFFSET (-1L) + +/* the magic value stored at the header of share memory region */ +#define DSM_MAGIC (0x13577531) + +/* the fixed address for shared memory in all processes. We will phase this out later. */ +#define FIXED_SHM_ADDR ((void *)0x600000000000) + +/** + * the number of shared memory allocators. Use multiple allocators to minimize lock contentions + * since the allocator currently used is not thread safe. + */ +#define N_SHM_POOL (8) + +/* the size of each shm pool */ +#define SHM_POOL_SIZE (1024*1024*1024L) + +/* the total size of shared memory that will be allocated */ +#define SHM_SIZE_TOTAL (SHM_POOL_SIZE * N_SHM_POOL) + +/** + * the threshold value to determine whether requesting large memory. The ways to pick memory + * allocator are different for large and small memory blocks. + */ +#define LARGE_MEM (64 * 1024) + +/* the address of shared memory region */ +extern struct d_shm_alloc *d_shm_head; + +/* Local info about a shm buffer. Each process has its own copy. */ +struct d_shm_alloc { + /* magic not equal DSM_MAGIC means shared memory is not initialized yet */ + int magic; + pthread_mutex_t g_lock; + /* the count of how many processes are mapping the shared memory region */ + _Atomic int ref_count; + /* global counter used for round robin picking memory allocator for large memory request */ + _Atomic uint64_t large_mem_count; + /* array of pointors to memory allocators */ + tlsf_t tlsf[N_SHM_POOL]; + /* lock for accessing one individual memory allocator */ + pthread_mutex_t mem_lock[N_SHM_POOL]; + + /* the lock needed when a hash table to be created or destroyed */ + pthread_mutex_t ht_lock; + /* the offset to the first hash table head */ + long int off_ht_head; + + /* the total size of shared memory region */ + uint64_t size; + /* reserved for future usage */ + char reserved[256]; +}; + +/* the total size of shared memory that will be allocated */ +#define SHM_SIZE_REQ (SHM_POOL_SIZE * N_SHM_POOL + sizeof(struct d_shm_alloc)) + + +/** + * Initialize shared memory region in current process + * + * \return zero for success. return error code otherwise. + */ +int +shm_init(void); + +/** + * Unmap and decrease the reference count of shared memory. Shared memory should not be referenced + * after shm_dec_ref() is called. + */ +void +shm_dec_ref(void); + +/** + * Increase the reference of shared memory. + */ +void +shm_inc_ref(void); + +/** + * Allocate memory from shared memory region + * + * \param[in] size size of memory block requested + * + * \return buffer address + */ +void * +shm_alloc(size_t size); + +/** + * Remove shared memory file under /dev/shm/ when tests finish + */ +void +shm_destroy(void); + +/** + * Allocate memory from shared memory region with alignment + * + * \param[in] align size of alignment + * \param[in] size size of memory block requested + * + * \return buffer address + */ +void * +shm_memalign(size_t align, size_t size); + +/** + * Free a memory block which was allocated from shared memory region + * + * \param[in] ptr memory block address + * + */ +void +shm_free(void *ptr); + +/** + * Query whether shared memory region is initialized properly or not + * + * \return True/False + */ +bool +shm_inited(void); + +#endif diff --git a/src/include/gurt/shm_dict.h b/src/include/gurt/shm_dict.h new file mode 100644 index 00000000000..f61ed96f087 --- /dev/null +++ b/src/include/gurt/shm_dict.h @@ -0,0 +1,147 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include + +/* the max length allowed for a hash table name */ +#define MAX_HT_NAME_LEN 16 + +/* reserved string for pthread_rwlockattr_t as ht record value */ +#define KEY_VALUE_PTHREAD_RWLOCK "INIT_PTHREAD_RWLOCK" + +/* reserved string for pthread_mutexattr_t as ht record value */ +#define KEY_VALUE_PTHREAD_LOCK "INIT_PTHREAD_LOCK" + +/* struct of the record in the hash table stored in shared memory */ +struct shm_ht_rec { + /* length of key */ + int len_key; + /* length of value */ + int len_value; + /* length of padding. Padding may be needed when value is a mutex!!!! */ + int len_padding; + /* reference count of this record */ + int ref_count; + /* the index of the mutex to be locked when updating this record */ + int idx_lock; + /* offset pointer to the previous record in record link list */ + long int prev; + /* offset pointer to the next record in record link list */ + long int next; + + /* char key[len_key] will be stored here */ + /* char value[len_value] will be stored here */ +}; + +/* struct of the head of the hash table stored in shared memory */ +struct d_shm_ht_head { + char ht_name[MAX_HT_NAME_LEN]; + int n_bucket; /* size of hash table */ + int n_lock; /* number of records stored */ + long int prev; /* offset to find the previous d_shm_ht_head */ + long int next; /* offset to find the next d_shm_ht_head */ + + /** + * pthread_mutex_t locks[n_lock] will be stored here. Multiple mutexes to alleviate lock + * contention + */ + /** + * long int off_next[n_bucket] will be stored here. The array of offset to next shm_ht_rec + */ +}; + +/* the address of shared memory region */ +extern struct d_shm_alloc *d_shm_head; + +int +shm_ht_create(const char name[], int bits, int n_lock, struct d_shm_ht_head **ht_head); + +int +shm_ht_destroy(struct d_shm_ht_head *ht_head, int force); + +int +get_ht_with_name(const char *name, struct d_shm_ht_head **ht_head); + +/** + * lookup \p key in the hash table, the value in the record is returned on success. + * + * \param[in] htable Pointer to the hash table + * \param[in] key The key to search + * \param[in] ksize Size of the key + + * \param[out] link The pointer to the hash table record + * + * \return value + */ +void * +shm_ht_rec_find(struct d_shm_ht_head *ht_head, const char *key, const int ksize, struct shm_ht_rec + **link); + +/** + * Lookup \p key in the hash table, if there is a matched record, it should be + * returned, otherwise a new record is inserted in the hash table. + * + * \param[in] ht_head Pointer to the hash table + * \param[in] key The key to be inserted + * \param[in] ksize Size of the key + * \param[in] val The value for the key + * \param[in] len_value The size of the value + + * \param[out] link The pointer to the hash table record + * + * \return value + */ +void * +shm_ht_rec_find_insert(struct d_shm_ht_head *ht_head, const char *key, const int ksize, + const char *val, const int len_value, struct shm_ht_rec **link); + +/** + * Search and delete the record identified by \p key from the hash table. + * + * \param[in] htable Pointer to the hash table + * \param[in] key The key of the record being deleted + * \param[in] ksize Size of the key + * + * \retval true Item with \p key has been deleted + * \retval false Can't find the record by \p key + */ +int +shm_ht_rec_delete(struct d_shm_ht_head *ht_head, const char *key, const int ksize); + +/** + * Delete the record linked by the chain \p link. + * This record will be freed if hop_rec_free() is defined and the hash table + * holds the last refcount. + * + * \param[in] htable Pointer to the hash table + * \param[in] link The link chain of the record + * + * \retval true Successfully deleted the record + * \retval false The record has already been unlinked + * from the hash table + */ +int +shm_ht_rec_delete_at(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link); + +/** + * Decrease the refcount of the record. + * The record will be freed if hop_decref() returns true and the EPHEMERAL bit + * is set. + * + * \param[in] htable Pointer to the hash table + * \param[in] link Chain link of the hash record + */ +void +shm_ht_rec_decref(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link); + +/** + * Increase the refcount of the record. + * + * \param[in] htable Pointer to the hash table + * \param[in] link The link chain of the record + */ +void +shm_ht_rec_addref(struct d_shm_ht_head *ht_head, struct shm_ht_rec *link); diff --git a/src/include/gurt/shm_tlsf.h b/src/include/gurt/shm_tlsf.h new file mode 100644 index 00000000000..577e4c940c1 --- /dev/null +++ b/src/include/gurt/shm_tlsf.h @@ -0,0 +1,70 @@ +#ifndef INCLUDED_tlsf +#define INCLUDED_tlsf + +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +/* Create/destroy a memory pool. */ +tlsf_t +tlsf_create_with_pool(void* mem, size_t bytes); + +/* malloc/memalign/realloc/free replacements. */ +void* +tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* +tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* +tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void +tlsf_free(tlsf_t tlsf, void* ptr); + +#if defined(__cplusplus) +}; +#endif + +#endif diff --git a/src/include/gurt/shm_utils.h b/src/include/gurt/shm_utils.h new file mode 100644 index 00000000000..8c665bebd70 --- /dev/null +++ b/src/include/gurt/shm_utils.h @@ -0,0 +1,34 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_SHM_UTILS_H__ +#define __DAOS_SHM_UTILS_H__ + +/* memory block alignment in shared memory */ +#define SHM_MEM_ALIGN 4 + +/** + * wrapper of pthread_mutex_lock() for a mutex created with attribute PTHREAD_MUTEX_ROBUST + * + * \param[in] mutex pointer to metex + * + * \return zero for success. error code otherwise. +*/ + +int +shm_mutex_lock(pthread_mutex_t *mutex); + +/** + * wrapper of pthread_mutex_unlock(). Used just for clarity. + * + * \param[in] mutex pointer to metex + * + * \return zero for success. error code otherwise. +*/ +int +shm_mutex_unlock(pthread_mutex_t *mutex); + +#endif diff --git a/src/tests/ftest/cart/utest/SConscript b/src/tests/ftest/cart/utest/SConscript index 9e25446f846..b5782c56cf2 100644 --- a/src/tests/ftest/cart/utest/SConscript +++ b/src/tests/ftest/cart/utest/SConscript @@ -20,7 +20,7 @@ def scons(): test_env.require('mercury', 'uuid', 'cmocka') # The test is checking that this feature works so disable the compile warnings for it. test_env.AppendIfSupported(CCFLAGS=['-Wno-gnu-designator', '-Wno-missing-field-initializers']) - test_env.AppendUnique(LIBS=['pthread', 'm', 'yaml', 'dl']) + test_env.AppendUnique(LIBS=['pthread', 'm', 'yaml', 'dl', 'rt']) test_env.AppendUnique(CXXFLAGS=['-std=c++0x']) test_env.AppendUnique(LIBPATH=LIBPATH) test_env.AppendUnique(RPATH_FULL=LIBPATH) diff --git a/src/tests/ftest/daos_test/shm.py b/src/tests/ftest/daos_test/shm.py new file mode 100644 index 00000000000..4c99143d000 --- /dev/null +++ b/src/tests/ftest/daos_test/shm.py @@ -0,0 +1,44 @@ +""" + (C) Copyright 2024 Hewlett Packard Enterprise Development LP. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" + +import os + +from apricot import TestWithServers +from cmocka_utils import CmockaUtils +from job_manager_utils import get_job_manager + + +class DaosCoreTestShm(TestWithServers): + """Runs DAOS shared memory tests. + + :avocado: recursive + """ + + def test_daos_shm_unit(self): + """Jira ID: DAOS-16877. + + Test Description: + Run shm_test + + Use cases: + DAOS shared memory unit tests + + :avocado: tags=all,pr,daily_regression + :avocado: tags=vm + :avocado: tags=daos_test,shm_test,shm + :avocado: tags=DaosCoreTestShm,test_daos_shm_unit + """ + daos_test = os.path.join(self.bin, 'shm_test') + cmocka_utils = CmockaUtils( + self.hostlist_clients, "shm", self.outputdir, self.test_dir, self.log) + daos_test_env = cmocka_utils.get_cmocka_env() + job = get_job_manager(self, "Clush", cmocka_utils.get_cmocka_command(daos_test)) + job.assign_hosts(cmocka_utils.hosts) + job.assign_environment(daos_test_env) + + cmocka_utils.run_cmocka_test(self, job) + if not job.result.passed: + self.fail(f'Error running {job.command} on {job.hosts}') diff --git a/src/tests/ftest/daos_test/shm.yaml b/src/tests/ftest/daos_test/shm.yaml new file mode 100644 index 00000000000..9a3caa75175 --- /dev/null +++ b/src/tests/ftest/daos_test/shm.yaml @@ -0,0 +1,20 @@ +hosts: + test_servers: 1 +timeout: 90 +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 +pool: + scm_size: 1G +container: + type: POSIX + control_method: daos diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index a118227fabd..c13d376d226 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -13,6 +13,15 @@ def scons(): dfusetest = dfuse_env.d_program(File("dfuse_test.c"), LIBS='cmocka') denv.Install('$PREFIX/bin/', dfusetest) + shm_test_env = base_env.Clone() + shm_test_env.compiler_setup() + shm_test_env.AppendUnique(LIBPATH=[Dir('../../gurt')]) + shm_test_env.AppendUnique(LIBPATH=[Dir('../../common')]) + shm_test_env.AppendUnique(LIBPATH=[Dir('../../cart')]) + shmtest = shm_test_env.d_program(File("shm_test.c"), LIBS=['gurt', 'daos_common', 'cart', + 'cmocka', 'rt', 'pthread']) + denv.Install('$PREFIX/bin/', shmtest) + denv.AppendUnique(LIBPATH=[Dir('../../client/dfs')]) denv.AppendUnique(CPPPATH=[Dir('../../client/dfs').srcnode()]) denv.AppendUnique(CPPPATH=[Dir('../../mgmt').srcnode()]) diff --git a/src/tests/suite/shm_test.c b/src/tests/suite/shm_test.c new file mode 100644 index 00000000000..3597e67c5dc --- /dev/null +++ b/src/tests/suite/shm_test.c @@ -0,0 +1,425 @@ +/** + * (C) Copyright 2024 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * Unit test for shared memory. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Tests can be run by specifying the appropriate argument for a test or all will be run if no test + * is specified. + */ +static const char *all_tests = "hlm"; + +static void +print_usage() +{ + print_message("\n\nShared memory tests\n=============================\n"); + print_message("Tests: Use one of these arg(s) for specific test\n"); + print_message("shm_test -a|--all\n"); + print_message("shm_test -h|--hash\n"); + print_message("shm_test -l|--lock\n"); + print_message("shm_test -m|--memory\n"); + print_message("Default runs all tests\n"); + print_message("\n=============================\n"); +} + +#define N_LOOP_MEM (8) + +void +do_mem(void **state) +{ + int i; + int rc; + size_t align = 4; + size_t size; + char *buf_list[N_LOOP_MEM]; + + rc = shm_init(); + assert_true(rc == 0); + assert_true(shm_inited() == true); + + srandom(1); + /* testing allocation with alignment and deallocation */ + for (i = 0; i < N_LOOP_MEM; i++) { + size = (size_t)(random() % (120 * 1024)); + buf_list[i] = shm_memalign(align, size); + assert_non_null(buf_list[i]); + assert_true((uint64_t)buf_list[i] % align == 0); + align *= 2; + } + for (i = 0; i < N_LOOP_MEM; i++) { + shm_free(buf_list[i]); + } + + /* testing allocation without alignment and deallocation */ + for (i = 0; i < N_LOOP_MEM; i++) { + size = (size_t)(random() % (120 * 1024)); + buf_list[i] = shm_alloc(size); + assert_non_null(buf_list[i]); + } + for (i = 0; i < N_LOOP_MEM; i++) { + shm_free(buf_list[i]); + } +} + +#define HT_NAME "shm_ht_test" +#define KEY_1 "key_1" +#define VAL_1 "value_1" +#define KEY_2 "key_2" +#define VAL_2 "value_2" +#define KEY_3 "key_3" +#define VAL_3 "value_3" + +void +verify_hash(void) +{ + int rc; + char *value; + struct shm_ht_rec *link; + struct d_shm_ht_head *ht_head_lock; + + /* look up hash key in current process */ + rc = get_ht_with_name(HT_NAME, &ht_head_lock); + assert_true(rc == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_1, strlen(KEY_1), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_1) == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_2, strlen(KEY_2), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_2) == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_3, strlen(KEY_3), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_3) == 0); +} + +void +verify_hash_by_child(void) +{ + int rc; + char *value; + struct shm_ht_rec *link; + struct d_shm_ht_head *ht_head_lock; + + /* look up hash key in child process */ + rc = shm_init(); + assert_true(rc == 0); + assert_true(shm_inited() == true); + + rc = get_ht_with_name(HT_NAME, &ht_head_lock); + assert_true(rc == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_1, strlen(KEY_1), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_1) == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_2, strlen(KEY_2), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_2) == 0); + + value = (char *)shm_ht_rec_find(ht_head_lock, KEY_3, strlen(KEY_3), &link); + assert_non_null(value); + assert_true(strcmp(value, VAL_3) == 0); +} + + +void +do_hash(void **state) +{ + int rc; + int status; + /* the hash table in shared memory */ + struct d_shm_ht_head *ht_head_lock; + struct shm_ht_rec *link; + char *value; + char *argv[3] = {"shm_test", "--verifykv", NULL}; + char *exe_path; + pid_t pid; + + /* create shared memory, create a hash table, insert three keys */ + rc = shm_init(); + assert_true(rc == 0); + assert_true(shm_inited() == true); + + rc = shm_ht_create(HT_NAME, 8, 16, &ht_head_lock); + assert_true(rc == 0); + + value = shm_ht_rec_find_insert(ht_head_lock, KEY_1, strlen(KEY_1), VAL_1, sizeof(VAL_1), + &link); + assert_non_null(value); + + value = shm_ht_rec_find_insert(ht_head_lock, KEY_2, strlen(KEY_2), VAL_2, sizeof(VAL_2), + &link); + assert_non_null(value); + + value = shm_ht_rec_find_insert(ht_head_lock, KEY_3, strlen(KEY_3), VAL_3, sizeof(VAL_3), + &link); + assert_non_null(value); + + verify_hash(); + + /* start a child process and run shm_test & verify key-value pairs */ + exe_path = malloc(PATH_MAX); + assert_non_null(exe_path); + rc = readlink("/proc/self/exe", exe_path, PATH_MAX - 1); + assert_true(rc > 0); + exe_path[rc] = 0; + + pid = fork(); + if (pid == 0) + execvp(exe_path, argv); + waitpid(pid, &status, 0); + if (WIFEXITED(status)) + assert_int_equal(WEXITSTATUS(status), 0); + free(exe_path); +} + +#define TIME_SLEEP (1) + +void +do_lock_mutex_child(bool lock_only) +{ + int rc; + struct shm_ht_rec *link; + struct d_shm_ht_head *ht_head_lock; + const char ht_name[] = "shm_lock_test"; + const char key[] = "mutex"; + pthread_mutex_t *mutex; + + /* test lock a mutex in shared memory in a child process */ + rc = shm_init(); + assert_true(rc == 0); + assert_true(shm_inited() == true); + + rc = get_ht_with_name(ht_name, &ht_head_lock); + assert_true(rc == 0); + + mutex = (pthread_mutex_t *)shm_ht_rec_find(ht_head_lock, key, strlen(key), &link); + assert_true(mutex != NULL); + + shm_mutex_lock(mutex); + sleep(TIME_SLEEP); + if (!lock_only) + shm_mutex_unlock(mutex); +} + +void +do_lock(void **state) +{ + int rc; + int status; + pthread_mutex_t *mutex; + /* the hash table in shared memory */ + struct d_shm_ht_head *ht_head_lock; + const char ht_name[] = "shm_lock_test"; + const char key[] = "mutex"; + struct timeval tm1, tm2; + double dt; + struct shm_ht_rec *link; + char *argv[3] = {"shm_test", "--lockmutex", NULL}; + char *argv2[3] = {"shm_test", "--lockonly", NULL}; + char *exe_path; + pid_t pid; + + /** + * create shared memory, create a hash table, insert a key whose value is a struct of + * pthread_mutex_t + */ + rc = shm_init(); + assert_true(rc == 0); + assert_true(shm_inited() == true); + + rc = shm_ht_create(ht_name, 8, 16, &ht_head_lock); + assert_true(rc == 0); + + mutex = (pthread_mutex_t *)shm_ht_rec_find_insert(ht_head_lock, key, strlen(key), + KEY_VALUE_PTHREAD_LOCK, sizeof(pthread_mutex_t), &link); + assert_true(mutex != NULL); + + /* start a child process to lock this mutex */ + exe_path = malloc(PATH_MAX); + assert_non_null(exe_path); + rc = readlink("/proc/self/exe", exe_path, PATH_MAX - 1); + assert_true(rc > 0); + exe_path[rc] = 0; + + pid = fork(); + if (pid == 0) + execvp(exe_path, argv); + else + /* take a short nap to allow the child process to lock the mutex first */ + usleep(18000); + + gettimeofday(&tm1, NULL); + shm_mutex_lock(mutex); + gettimeofday(&tm2, NULL); + dt = (tm2.tv_sec - tm1.tv_sec) + (tm2.tv_usec - tm1.tv_usec) * 0.000001; + assert_true(fabs(dt - TIME_SLEEP) < 0.02); + shm_mutex_unlock(mutex); + + waitpid(pid, &status, 0); + if (WIFEXITED(status)) + assert_int_equal(WEXITSTATUS(status), 0); + + /** + * start a child process to lock this mutex and exit without unlocking this mutex to mimic + * a lock owner process crashes or is killed + */ + pid = fork(); + if (pid == 0) + execvp(exe_path, argv2); + + /* the child process should finish now with mutex unlocked */ + waitpid(pid, &status, 0); + if (WIFEXITED(status)) + assert_int_equal(WEXITSTATUS(status), 0); + + shm_mutex_lock(mutex); + shm_mutex_unlock(mutex); +} + +static int +run_specified_tests(const char *tests, int *sub_tests, int sub_tests_size) +{ + int nr_failed = 0; + + if (strlen(tests) == 0) + tests = all_tests; + + while (*tests != '\0') { + switch (*tests) { + case 'h': + printf("\n\n================="); + printf("shm hash table tests"); + printf("=====================\n"); + const struct CMUnitTest ht_tests[] = { + cmocka_unit_test(do_hash), + }; + nr_failed += cmocka_run_group_tests(ht_tests, NULL, NULL); + break; + + case 'l': + printf("\n\n================="); + printf("shm lock/unlock tests"); + printf("=====================\n"); + const struct CMUnitTest lock_tests[] = { + cmocka_unit_test(do_lock), + }; + nr_failed += cmocka_run_group_tests(lock_tests, NULL, NULL); + break; + + case 'm': + printf("\n\n================="); + printf("shm allocation/deallocation tests"); + printf("=====================\n"); + const struct CMUnitTest mem_tests[] = { + cmocka_unit_test(do_mem), + }; + nr_failed += cmocka_run_group_tests(mem_tests, NULL, NULL); + break; + + default: + assert_true(0); + } + + tests++; + } + + return nr_failed; +} + +int +main(int argc, char **argv) +{ + char tests[64] = {}; + int ntests = 0; + int nr_failed = 0; + int opt = 0, index = 0, rc; + + static struct option long_options[] = {{"all", no_argument, NULL, 'a'}, + {"hash", no_argument, NULL, 'h'}, + {"lock", no_argument, NULL, 'l'}, + {"lockmutex", no_argument, NULL, 'k'}, + {"memory", no_argument, NULL, 'm'}, + {"lockonly", no_argument, NULL, 'o'}, + {"verifykv", no_argument, NULL, 'v'}, + {NULL, 0, NULL, 0}}; + + rc = daos_debug_init(NULL); + assert_true(rc == 0); + + while ((opt = getopt_long(argc, argv, ":ahlkmov", long_options, &index)) != -1) { + if (strchr(all_tests, opt) != NULL) { + tests[ntests] = opt; + ntests++; + continue; + } + switch (opt) { + case 'a': + break; + case 'v': + /* only run by child process */ + verify_hash_by_child(); + goto exit_child; + case 'k': + /* only run by child process */ + do_lock_mutex_child(false); + goto exit_child; + case 'o': + /* only run by child process */ + do_lock_mutex_child(true); + goto exit_child; + default: + printf("Unknown Option\n"); + print_usage(); + return 1; + } + } + + nr_failed = run_specified_tests(tests, NULL, 0); + + print_message("\n============ Summary %s\n", __FILE__); + if (nr_failed == 0) + print_message("OK - NO TEST FAILURES\n"); + else + print_message("ERROR, %i TEST(S) FAILED\n", nr_failed); + + /* unlink shared memory file under /dev/shm/ */ + shm_destroy(); + daos_debug_fini(); + + return nr_failed; + +exit_child: + daos_debug_fini(); + return 0; +} diff --git a/utils/rpms/daos.rpmlintrc b/utils/rpms/daos.rpmlintrc index 9912465edf4..cddd4f4ad90 100644 --- a/utils/rpms/daos.rpmlintrc +++ b/utils/rpms/daos.rpmlintrc @@ -20,7 +20,7 @@ addFilter("daos-client\.x86_64: E: post(i|u)n-without-ldconfig \/usr\/lib64\/lib addFilter("daos-(client|server)\.x86_64: W: dangerous-command-in-%post(un)? rm") # lots of missing manpages -addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests|ddb|ddb_tests)") +addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|shm_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests|ddb|ddb_tests)") addFilter("daos-(server|firmware)\.x86_64: W: non-standard-(u|g)id \/.+ daos_server") diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index e9de33b41c7..61d2eabeac7 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -527,6 +527,7 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent %{_bindir}/drpc_engine_test %{_bindir}/drpc_test %{_bindir}/dfuse_test +%{_bindir}/shm_test %{_bindir}/eq_tests %{_bindir}/job_tests %{_bindir}/security_test