Skip to content

Commit

Permalink
Fix a notification failure on the read side and a race on the writer …
Browse files Browse the repository at this point in the history
…side.
  • Loading branch information
eisenhauer committed Nov 3, 2019
1 parent 7610e17 commit eac29ee
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
2 changes: 2 additions & 0 deletions source/adios2/toolkit/sst/cp/cp_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ extern void ReaderConnCloseHandler(CManager cm, CMConnection ClosedConn,
CP_verbose(Stream, "Reader-side Rank received a "
"connection-close event after close, "
"not unexpected\n");
Stream->DP_Interface->notifyConnFailure(&Svcs, Stream->DP_Stream,
FailedPeerRank);
}
else
{
Expand Down
4 changes: 2 additions & 2 deletions source/adios2/toolkit/sst/cp/cp_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,6 @@ extern void WriterConnCloseHandler(CManager cm, CMConnection closed_conn,
"connection-close event in unexpected "
"state %s\n",
SSTStreamStatusStr[WSreader->ReaderStatus]);
CP_PeerFailCloseWSReader(WSreader, PeerFailed);
}
QueueMaintenance(ParentWriterStream);
PTHREAD_MUTEX_UNLOCK(&ParentWriterStream->DataLock);
Expand Down Expand Up @@ -1304,6 +1303,7 @@ static void CP_PeerFailCloseWSReader(WS_ReaderInfo CP_WSR_Stream,
}
if (NewState == PeerFailed)
{
DerefAllSentTimesteps(CP_WSR_Stream->ParentStream, CP_WSR_Stream);
CMadd_delayed_task(ParentStream->CPInfo->cm, 2, 0, CloseWSRStream,
CP_WSR_Stream);
}
Expand Down Expand Up @@ -1596,7 +1596,7 @@ static void ProcessReaderStatusList(SstStream Stream,
CP_verbose(Stream, "Adjusting reader %d status from %s to %s\n", i,
SSTStreamStatusStr[Stream->Readers[i]->ReaderStatus],
SSTStreamStatusStr[Metadata->ReaderStatus[i]]);
Stream->Readers[i]->ReaderStatus = Metadata->ReaderStatus[i];
CP_PeerFailCloseWSReader(Stream->Readers[i], Metadata->ReaderStatus[i]);
}
}
PTHREAD_MUTEX_UNLOCK(&Stream->DataLock);
Expand Down
7 changes: 6 additions & 1 deletion source/adios2/toolkit/sst/dp/evpath_dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,13 @@ static void EvpathReadRequestHandler(CManager cm, CMConnection conn,
* Shouldn't ever get here because we should never get a request for a
* timestep that we don't have.
*/
fprintf(stderr, "Writer rank %d - Failed to read Timestep %ld, not found\n",
fprintf(stderr, "\n\n\n\n");
fprintf(stderr, "Writer rank %d - Failed to read Timestep %ld, not found. This is an internal inconsistency\n",
WSR_Stream->WS_Stream->Rank, ReadRequestMsg->Timestep);
fprintf(stderr, "Writer rank %d - Request came from rank %d, please report this error!\n",
WSR_Stream->WS_Stream->Rank, RequestingRank);
fprintf(stderr, "\n\n\n\n");

/*
* in the interest of not failing a writer on a reader failure, don't
* assert(0) here. Probably this sort of error should close the link to
Expand Down

0 comments on commit eac29ee

Please sign in to comment.