Skip to content

Commit

Permalink
update to address issue where race conditions can happen when unlinki…
Browse files Browse the repository at this point in the history
…ng MEs that are actively receiving, closes #70
  • Loading branch information
regrant committed Oct 19, 2016
1 parent d37b306 commit 7669d28
Show file tree
Hide file tree
Showing 5 changed files with 311 additions and 19 deletions.
62 changes: 49 additions & 13 deletions src/ib/ptl_me.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ int _PtlMEUnlink(PPEGBL ptl_handle_me_t me_handle)
me_t *me;
int ref_cnt;
// int pt_index;
// pt_t *pt;
pt_t *pt;
// ni_t *ni;

#ifndef NO_ARG_VALIDATION
Expand All @@ -413,12 +413,20 @@ int _PtlMEUnlink(PPEGBL ptl_handle_me_t me_handle)
err = to_me(MYGBL_ me_handle, &me);
if (err)
goto err1;

if (me == NULL){
err = PTL_OK;
goto err1;
}
else{
pt = me->pt;
}
#else
me = to_obj(MYGBL_ POOL_ANY, me_handle);
#endif
//pt_index = me->pt_index;
//pt = &ni->pt[pt_index];
//me_get(me);
//me_get(me);

//If this was an overflow, it should just complete now
//there's no other busy work being done
Expand All @@ -428,35 +436,63 @@ int _PtlMEUnlink(PPEGBL ptl_handle_me_t me_handle)

/* make sure the me isn't still involved in any final
* cleanup before we unlink it */
while (atomic_read(&me->busy) == 1) {
SPINLOCK_BODY();
if (atomic_read(&me->busy) == 1){
err = PTL_IN_USE;
me_put(me);
goto err1;
}

PTL_FASTLOCK_LOCK(&me->pt->lock);
if (me != NULL && pt != NULL){
while (pthread_spin_trylock(&pt->lock) != 0){
usleep(500);
if(me == NULL ){
err = PTL_IN_USE;
goto err1;
}
}
if(me == NULL){
PTL_FASTLOCK_UNLOCK(&pt->lock);
err = PTL_IN_USE;
goto err1;
}
}
else {
err = PTL_OK;
goto err1;
}


ref_cnt = me_ref_cnt(me);

/* There should only be 2 references on the object before we can
* release it. */
if (ref_cnt > 2) {
me_put(me);
err = PTL_IN_USE;
goto err1;
} else if (ref_cnt < 2) {
goto err2;
} else if (ref_cnt < 2 && ref_cnt > 0) {
me_put(me);
err = PTL_ARG_INVALID;
goto err1;
goto err2;
}
else if (ref_cnt <= 0){
err = PTL_OK;
goto err2;
}

PTL_FASTLOCK_UNLOCK(&me->pt->lock);
le_unlink((le_t *)me, 0);
//if (me->pt != NULL)
PTL_FASTLOCK_UNLOCK(&pt->lock);
//if (me != NULL)
le_unlink((le_t *)me, 0);

err = PTL_OK;

ref_cnt = me_ref_cnt(me);
if (ref_cnt > 0)
me_put(me);
if (ref_cnt > 0)
me_put(me);
goto err1;
err2:
//if (me->pt != NULL)
PTL_FASTLOCK_UNLOCK(&pt->lock);
err1:
#ifndef NO_ARG_VALIDATION
gbl_put();
Expand Down
3 changes: 3 additions & 0 deletions src/ib/ptl_ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ static inline void ref_get(struct ref *ref)

ref_cnt = atomic_inc(&ref->ref_cnt);

#ifdef PTL_DEBUG
assert(ref_cnt >= 1);
#endif

}

/**
Expand Down
13 changes: 8 additions & 5 deletions src/ib/ptl_tgt.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,8 @@ static int tgt_get_match(buf_t *buf)
pt->state |= PT_AUTO_DISABLED;
ptl_info("dropping due to lack of unexpected headers\n");
PTL_FASTLOCK_UNLOCK(&pt->lock);
le_put(buf->le);
buf->le = NULL;
buf->ni_fail = PTL_NI_PT_DISABLED;
WARN();
return STATE_TGT_DROP;
Expand Down Expand Up @@ -1711,8 +1713,9 @@ static int tgt_send_ack(buf_t *buf)

if (buf->le && buf->le->ptl_list == PTL_PRIORITY_LIST) {
/* The LE must be released before we sent the ack. */
le_put(buf->le);
//le_put(buf->le);
atomic_set(&buf->me->busy, 0);
le_put(buf->le);
buf->le = NULL;
}

Expand Down Expand Up @@ -1807,8 +1810,8 @@ static int tgt_send_reply(buf_t *buf)

if (buf->le && buf->le->ptl_list == PTL_PRIORITY_LIST) {
/* The LE must be released before we sent the ack. */
le_put(buf->le);
atomic_set(&buf->me->busy, 0);
le_put(buf->le);
buf->le = NULL;
}

Expand Down Expand Up @@ -1863,7 +1866,7 @@ static int tgt_cleanup(buf_t *buf)
/* On the overflow list, and was already matched by an
* ME/LE. */
assert(buf->le->ptl_list == PTL_OVERFLOW_LIST);
atomic_set(&buf->le->busy, 0);
//atomic_set(&buf->le->busy, 0);
state = STATE_TGT_OVERFLOW_EVENT;
} else if (buf->le && buf->le->ptl_list == PTL_OVERFLOW_LIST) {
//if the pt hasn't run out of resources and unexpected headers are enabled
Expand Down Expand Up @@ -1924,8 +1927,8 @@ static void tgt_cleanup_2(buf_t *buf)
{
if (buf->le) {
ptl_warn("me/le cleanup \n");
le_put(buf->le);
atomic_set(&buf->me->busy, 0);
le_put(buf->le);
buf->le = NULL;
}

Expand All @@ -1937,8 +1940,8 @@ static void tgt_cleanup_2(buf_t *buf)
//it was an overflow match, so reduce the
//unexpected message count
atomic_dec(&pt->unexpected_size);
le_put(buf->matching.le);
atomic_set(&buf->matching.le->busy, 0);
le_put(buf->matching.le);
buf->matching.le = NULL;
}

Expand Down
5 changes: 4 additions & 1 deletion test/basic/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TESTS = \
test_PA_NIInit \
test_LA_NIInit \
test_bootstrap \
test_PA_LE_put_self \
test_PA_LE_put_self \
test_PA_ME_put_self \
test_LA_LE_put_self \
test_LA_ME_put_self \
Expand Down Expand Up @@ -77,6 +77,7 @@ TESTS = \
test_ME_flowctl_nohdr \
test_LE_unlink \
test_ME_unlink \
test_unlink_race \
test_PA_LE_persistent_search \
test_PA_ME_persistent_search \
test_ct_ack \
Expand Down Expand Up @@ -298,6 +299,8 @@ test_LE_unlink_CPPFLAGS = $(AM_CPPFLAGS) -DMATCHING=0
test_ME_unlink_SOURCES = test_unlink.c
test_ME_unlink_CPPFLAGS = $(AM_CPPFLAGS) -DMATCHING=1

test_unlink_race_SOURCES = test_unlink_race.c

test_PA_LE_persistent_search_SOURCES = test_persistent_search.c
test_PA_LE_persistent_search_CPPFLAGS = $(AM_CPPFLAGS) -DMATCHING=0

Expand Down
Loading

0 comments on commit 7669d28

Please sign in to comment.