Skip to content

Commit

Permalink
Waste less memory on os_event structs
Browse files Browse the repository at this point in the history
Summary:
WebScaleSQL Feature: Lower Memory Footprint

This fixes: http://bugs.mysql.com/62535

Allow os_event structs to be packed into their parent structures.

Split memory-management linked-list overhead out of event struct.

Don't allocate linked-list overhead when it won't be used (when packed).

Packed event structures into mutex and rw_lock structures.

Adjust event and fast_mutex counts on exit, since rw_locks are never freed.

Trimmed down some wasted space within the event strutures.

Added macros to access sub-elements in optimized packed values.

Share the pthread mutex and condvar data in a pool for packed events.

Still allocate dedicated pthread data for each non-packed os_event_t.

Added innodb_sync_pool_size sysvar to select pool size (def: 1024).

Memory usage test results (with 54G buffer pool, from 5.6.10):
61510268 KB - Without this change
60266124 KB - With this change
Space savings ~ 1.2 GB

Sysbench results (from 5.6.10):
QPS for 24 cores, 22G database, 1G buffer pool, read-only:
8	16	32	64	128	256	threads
51253	100571	164190	165912	159370	170362	without this change
51717	103426	166108	168247	159133	172756	with this change

Test Plan: Passed all unit tests, including the big and huge stress tests.

Reviewers: darnaut, inaam-rana, jeremycole, pivanof

Reviewed By: pivanof

Subscribers: jtolmer

Differential Revision: https://reviews.facebook.net/D24165
Differential Revision: https://reviews.facebook.net/D54465
  • Loading branch information
steaphangreene authored and Herman Lee committed Jan 24, 2017
1 parent 19dc9fa commit c1b8d83
Show file tree
Hide file tree
Showing 14 changed files with 402 additions and 103 deletions.
53 changes: 53 additions & 0 deletions mysql-test/suite/sys_vars/r/innodb_sync_pool_size_basic.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
'#---------------------BS_STVARS_022_01----------------------#'
SELECT @@GLOBAL.innodb_sync_pool_size;
@@GLOBAL.innodb_sync_pool_size
1024
1024 Expected
'#---------------------BS_STVARS_022_02----------------------#'
SET @@GLOBAL.innodb_sync_pool_size=1;
ERROR HY000: Variable 'innodb_sync_pool_size' is a read only variable
Expected error 'Read only variable'
SELECT @@GLOBAL.innodb_sync_pool_size;
@@GLOBAL.innodb_sync_pool_size
1024
1024 Expected
'#---------------------BS_STVARS_022_03----------------------#'
SELECT @@GLOBAL.innodb_sync_pool_size = VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_sync_pool_size';
@@GLOBAL.innodb_sync_pool_size = VARIABLE_VALUE
1
1 Expected
SELECT @@GLOBAL.innodb_sync_pool_size;
@@GLOBAL.innodb_sync_pool_size
1024
1024 Expected
SELECT VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_sync_pool_size';
VARIABLE_VALUE
1024
1024 Expected
'#---------------------BS_STVARS_022_04----------------------#'
SELECT @@innodb_sync_pool_size = @@GLOBAL.innodb_sync_pool_size;
@@innodb_sync_pool_size = @@GLOBAL.innodb_sync_pool_size
1
1 Expected
'#---------------------BS_STVARS_022_05----------------------#'
SELECT @@innodb_sync_pool_size;
@@innodb_sync_pool_size
1024
1024 Expected
SELECT @@local.innodb_sync_pool_size;
ERROR HY000: Variable 'innodb_sync_pool_size' is a GLOBAL variable
Expected error 'Variable is a GLOBAL variable'
SELECT @@SESSION.innodb_sync_pool_size;
ERROR HY000: Variable 'innodb_sync_pool_size' is a GLOBAL variable
Expected error 'Variable is a GLOBAL variable'
SELECT @@GLOBAL.innodb_sync_pool_size;
@@GLOBAL.innodb_sync_pool_size
1024
1024 Expected
SELECT innodb_sync_pool_size = @@SESSION.innodb_sync_pool_size;
ERROR 42S22: Unknown column 'innodb_sync_pool_size' in 'field list'
Expected error 'Unknown column'
93 changes: 93 additions & 0 deletions mysql-test/suite/sys_vars/t/innodb_sync_pool_size_basic.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
################## mysql-test\t\innodb_sync_pool_size_basic.test ##############
# #
# Variable Name: innodb_sync_pool_size #
# Scope: Global #
# Access Type: Static #
# Data Type: numeric #
# #
# #
# Creation Date: 2012-02-27 #
# Author : Steaphan Greene #
# #
# #
# Description: Test Cases of Dynamic System Variable innodb_sync_pool_size #
# that checks the behavior of this variable. #
# #
###############################################################################

--source include/have_innodb.inc

--echo '#---------------------BS_STVARS_022_01----------------------#'
####################################################################
# Displaying default value #
####################################################################
SELECT @@GLOBAL.innodb_sync_pool_size;
--echo 1024 Expected


--echo '#---------------------BS_STVARS_022_02----------------------#'
####################################################################
# Check if Value can set #
####################################################################

--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SET @@GLOBAL.innodb_sync_pool_size=1;
--echo Expected error 'Read only variable'

SELECT @@GLOBAL.innodb_sync_pool_size;
--echo 1024 Expected




--echo '#---------------------BS_STVARS_022_03----------------------#'
#################################################################
# Check if the value in GLOBAL Table matches value in variable #
#################################################################

SELECT @@GLOBAL.innodb_sync_pool_size = VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_sync_pool_size';
--echo 1 Expected

SELECT @@GLOBAL.innodb_sync_pool_size;
--echo 1024 Expected

SELECT VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_sync_pool_size';
--echo 1024 Expected



--echo '#---------------------BS_STVARS_022_04----------------------#'
################################################################################
# Check if accessing variable with and without GLOBAL point to same variable #
################################################################################
SELECT @@innodb_sync_pool_size = @@GLOBAL.innodb_sync_pool_size;
--echo 1 Expected



--echo '#---------------------BS_STVARS_022_05----------------------#'
################################################################################
# Check if innodb_sync_pool_size can be accessed with and without @@ sign #
################################################################################

SELECT @@innodb_sync_pool_size;
--echo 1024 Expected

--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
SELECT @@local.innodb_sync_pool_size;
--echo Expected error 'Variable is a GLOBAL variable'

--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
SELECT @@SESSION.innodb_sync_pool_size;
--echo Expected error 'Variable is a GLOBAL variable'

SELECT @@GLOBAL.innodb_sync_pool_size;
--echo 1024 Expected

--Error ER_BAD_FIELD_ERROR
SELECT innodb_sync_pool_size = @@SESSION.innodb_sync_pool_size;
--echo Expected error 'Unknown column'
9 changes: 9 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ static ulong innobase_write_io_threads;
static long innobase_buffer_pool_instances = 1;

static long long innobase_buffer_pool_size, innobase_log_file_size;
static unsigned long innobase_sync_pool_size;

/** Percentage of the buffer pool to reserve for 'old' blocks.
Connected to buf_LRU_old_ratio. */
Expand Down Expand Up @@ -3292,6 +3293,7 @@ innobase_init(
#endif /* defined(__WIN__) && !defined(_WIN64) */
}
srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
srv_sync_pool_size = (ulint) innobase_sync_pool_size;
srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;

srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
Expand Down Expand Up @@ -16375,6 +16377,12 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);

static MYSQL_SYSVAR_ULONG(sync_pool_size, innobase_sync_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"The size of the shared sync pool buffer InnoDB uses to store system lock"
"and condition variables.",
NULL, NULL, 1024UL, 1UL, 1024UL * 1024UL, 1UL);

#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
Expand Down Expand Up @@ -16900,6 +16908,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(autoextend_increment),
MYSQL_SYSVAR(buffer_pool_size),
MYSQL_SYSVAR(buffer_pool_populate),
MYSQL_SYSVAR(sync_pool_size),
MYSQL_SYSVAR(buffer_pool_instances),
MYSQL_SYSVAR(buffer_pool_filename),
MYSQL_SYSVAR(buffer_pool_dump_now),
Expand Down
83 changes: 65 additions & 18 deletions storage/innobase/include/os0sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ typedef CONDITION_VARIABLE os_cond_t;
typedef pthread_mutex_t fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;

#define BIT63 (1ULL << 63)
#define INC_SIGNAL_COUNT(ev) { ++(ev)->stats; }
#define SIGNAL_COUNT(ev) (static_cast<ib_int64_t>((ev)->stats & ~BIT63))
#define SET_IS_SET(ev) { (ev)->stats |= BIT63; }
#define CLEAR_IS_SET(ev) { (ev)->stats &= ~BIT63; }
#define IS_SET(ev) (((ev)->stats & BIT63) != 0)

#endif

/** Structure that includes Performance Schema Probe pfs_psi
Expand All @@ -82,26 +90,50 @@ struct os_fast_mutex_t {
#endif
};

/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event wrapper */
typedef struct os_event_wrapper_struct os_event_wrapper_struct_t;
/** Operating system event handle */
typedef struct os_event* os_event_t;
typedef os_event_wrapper_struct_t* os_event_t;

typedef struct os_event_support_struct {
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
} os_event_support_t;

extern os_event_support_t* os_support;

/** An asynchronous signal sent between threads */
struct os_event {
struct os_event_struct {
#ifdef __WIN__
HANDLE handle; /*!< kernel event object, slow,
used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
ib_uint64_t stats; /*!< msb: "is_set" (boolean bit field)
This is TRUE when the event is
in the signaled state, i.e., a thread
does not stop if it tries to wait for
this event */
ib_int64_t signal_count; /*!< this is incremented each time
/*!< rest: "signal_count" (63-bit uint)
this is incremented each time
the event becomes signaled */
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_t) os_event_list;
os_event_support_t* sup; /*!< Pointer to OS-support data
For events created by os_event_create()
this will point to an allocated set of
data exclusively for this event.
For events created by os_event_create2()
this will point to one of the shared
sync data pool elements
allocated by os_sync_init()
*/
};

struct os_event_wrapper_struct {
struct os_event_struct ev; /*!< Actual event struct */
UT_LIST_NODE_T(os_event_wrapper_struct_t) os_event_list;
/*!< list of all created events */
};

Expand Down Expand Up @@ -146,33 +178,46 @@ UNIV_INTERN
os_event_t
os_event_create(void);
/*==================*/
UNIV_INTERN
void
os_event_create2(
/*============*/
os_event_struct_t* event); /*!< in: pointer to pre-allocated struct */
/**********************************************************//**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
#define os_event_set(e) os_event_set2(&((e)->ev))
UNIV_INTERN
void
os_event_set(
os_event_set2(
/*=========*/
os_event_t event); /*!< in: event to set */
os_event_struct_t* event); /*!< in: event to set */
/**********************************************************//**
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event.
The return value should be passed to os_even_wait_low() if it is desired
that this thread should not wait in case of an intervening call to
os_event_set() between this os_event_reset() and the
os_event_wait_low() call. See comments for os_event_wait_low(). */
#define os_event_reset(e) os_event_reset2(&((e)->ev))
UNIV_INTERN
ib_int64_t
os_event_reset(
os_event_reset2(
/*===========*/
os_event_t event); /*!< in: event to reset */
os_event_struct_t* event); /*!< in: event to reset */
/**********************************************************//**
Frees an event object. */
UNIV_INTERN
void
os_event_free(
/*==========*/
os_event_t event); /*!< in: event to free */
os_event_t event); /*!< in: wrapped event to free */

UNIV_INTERN
void
os_event_free2(
/*************/
os_event_struct_t* event); /*!< in: event to free */

/**********************************************************//**
Waits for an event object until it is in the signaled state.
Expand All @@ -191,11 +236,12 @@ thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by os_event_reset() should be passed in as
reset_sig_count. */
#define os_event_wait_low(e,c) os_event_wait_low2(&((e)->ev),c)
UNIV_INTERN
void
os_event_wait_low(
os_event_wait_low2(
/*==============*/
os_event_t event, /*!< in: event to wait */
os_event_struct_t* event, /*!< in: event to wait */
ib_int64_t reset_sig_count);/*!< in: zero or the value
returned by previous call of
os_event_reset(). */
Expand All @@ -207,11 +253,12 @@ os_event_wait_low(
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
#define os_event_wait_time_low(e,t,u) os_event_wait_time_low2(&((e)->ev),t,u)
UNIV_INTERN
ulint
os_event_wait_time_low(
os_event_wait_time_low2(
/*===================*/
os_event_t event, /*!< in: event to wait */
os_event_struct_t* event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
Expand Down
1 change: 1 addition & 0 deletions storage/innobase/include/srv0srv.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ extern ulong srv_flush_neighbors; /*!< whether or not to flush
neighbors of a block */
extern ulint srv_buf_pool_old_size; /*!< previously requested size */
extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
extern ulint srv_sync_pool_size; /*!< requested size (number) */
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;

Expand Down
6 changes: 4 additions & 2 deletions storage/innobase/include/sync0rw.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ struct rw_lock_debug_t;
#endif /* UNIV_SYNC_DEBUG */

typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
extern ulint rw_lock_count;

extern rw_lock_list_t rw_lock_list;
extern ib_mutex_t rw_lock_list_mutex;
Expand Down Expand Up @@ -594,8 +595,9 @@ struct rw_lock_t {
/*!< Thread id of writer thread. Is only
guaranteed to have sane and non-stale
value iff recursive flag is set. */
os_event_t event; /*!< Used by sync0arr.cc for thread queueing */
os_event_t wait_ex_event;
os_event_struct_t event;
/*!< Used by sync0arr.cc for thread queueing */
os_event_struct_t wait_ex_event;
/*!< Event for next-writer to wait on. A thread
must decrement lock_word before waiting. */
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
Expand Down
4 changes: 2 additions & 2 deletions storage/innobase/include/sync0rw.ic
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ rw_lock_s_unlock_func(
/* wait_ex waiter exists. It may not be asleep, but we signal
anyway. We do not wake other waiters, because they can't
exist without wait_ex waiter and wait_ex waiter goes first.*/
os_event_set(lock->wait_ex_event);
os_event_set2(&lock->wait_ex_event);
sync_array_object_signalled();

}
Expand Down Expand Up @@ -517,7 +517,7 @@ rw_lock_x_unlock_func(
exist when there is a writer. */
if (lock->waiters) {
rw_lock_reset_waiter_flag(lock);
os_event_set(lock->event);
os_event_set2(&lock->event);
sync_array_object_signalled();
}
}
Expand Down
3 changes: 2 additions & 1 deletion storage/innobase/include/sync0sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,8 @@ implementation of a mutual exclusion semaphore. */

/** InnoDB mutex */
struct ib_mutex_t {
os_event_t event; /*!< Used by sync0arr.cc for the wait queue */
os_event_struct_t event;
/*!< Used by sync0arr.cc for the wait queue */
volatile lock_word_t lock_word; /*!< lock_word is the target
of the atomic test-and-set instruction when
atomic operations are enabled. */
Expand Down
Loading

0 comments on commit c1b8d83

Please sign in to comment.