bdunz19
asked on
Winsock IOCP WSASend Heap Memory Problem
Hello,
My problem is within an IO Completion Ports server that has worker threads waiting on GetQueuedCompletionStatus( ). The server is breaking when I attempt to delete pPerIOData which is a structure containing the OVERLAPPED and WSABUF types. This does not happen all the time. Please take a look at the following worker thread:
UINT CClientCommThread::IOCompl etionThrea d(LPVOID pParam)
{
IOCPTHREAD *pIOCPT = (IOCPTHREAD*)pParam;
HANDLE hCompletionPort = pIOCPT->hCompletionPort;
DWORD nThreadID = pIOCPT->nThreadID;
DWORD dwBytesTransfered;
OVERLAPPED *pOverlapped;
PER_HANDLE_DATA *pPerHandleData;
PER_IO_DATA *pPerIOData;
DWORD dwSendBytes, dwRecvBytes;
DWORD dwFlags;
delete pIOCPT;
while(TRUE)
{
pPerHandleData = NULL;
pOverlapped = NULL;
pPerIOData = NULL;
BOOL ret = GetQueuedCompletionStatus( hCompletio nPort, &dwBytesTransfered, (PULONG_PTR)&pPerHandleDat a, &pOverlapped, INFINITE);
if(ret == FALSE || pOverlapped == NULL)
{
DWORD dwError = GetLastError();
if((dwError == ERROR_NETNAME_DELETED) || (dwError == ERROR_IO_INCOMPLETE))
{
if(pPerHandleData)
{
::PostThreadMessage(nThrea dID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
}
}
else
{
if(pPerHandleData)
{
::PostThreadMessage(nThrea dID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
}
CString tmp;
tmp.Format("UINT CClientCommThread::IOCompl etionThrea d() -> GetQueuedCompletionStatus( ) Failed (GetLastError()=%d)!", dwError);
char *lpszError = new char[tmp.GetLength() + 2];
strcpy(lpszError, tmp);
::PostThreadMessage(nThrea dID, UWM_LOGERROR, NULL, (LPARAM)lpszError);
}
if(pOverlapped != NULL)
{
pPerIOData = CONTAINING_RECORD(pOverlap ped, PER_IO_DATA, Overlapped);
if(pPerIOData != NULL)
{
if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
delete pPerIOData;
}
}
}
continue;
}
if(pPerHandleData == 0)
{
return 0;
}
pPerIOData = CONTAINING_RECORD(pOverlap ped, PER_IO_DATA, Overlapped);
if(dwBytesTransfered == 0)
{
// 0 Bytes on a read or write means the socket was closed by the client.
shutdown(pPerHandleData->S ock, SD_BOTH);
closesocket(pPerHandleData ->Sock);
::PostThreadMessage(nThrea dID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
delete pPerIOData;
}
continue;
}
if(pPerIOData->OpCode == OP_RECV)
{
// Copy the new recieved data to the buffer...
if(pPerIOData->nBufSize)
{
char *lpszTmp = new char[pPerIOData->nBufSize+ dwBytesTra nsfered+2] ;
for(long i = 0; i < pPerIOData->nBufSize; i++)
lpszTmp[i] = pPerIOData->lpszBuf[i];
delete [] pPerIOData->lpszBuf;
pPerIOData->lpszBuf = lpszTmp;
}
else
pPerIOData->lpszBuf = new char[dwBytesTransfered];
for(int i = 0; i < dwBytesTransfered; i++)
{
(pPerIOData->lpszBuf+pPerI OData->nBu fSize)[i] = pPerIOData->wBuf.buf[i];
}
pPerIOData->nBufSize += dwBytesTransfered;
// Check to see if this CUCP packet is complete. A complete CUCP packet has 8 zero's trailing.
bool bZero = false;
if(pPerIOData->nBufSize >= 8)
{
bZero = true;
for(int i = pPerIOData->nBufSize-8; i < pPerIOData->nBufSize; i++)
if(pPerIOData->lpszBuf[i] != 0)
bZero = false;
}
if(bZero)
{
pPerIOData->nBufSize -= 8;
UINT nRes = ProcessRecievedPacket(pPer HandleData , pPerIOData->lpszBuf, pPerIOData->nBufSize, nThreadID);
if(pPerIOData)
{
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
}
}
// Reset the per io data structure for reuse...
ZeroMemory(&pPerIOData->Ov erlapped, sizeof(OVERLAPPED));
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
pPerIOData->wBuf.buf = new char[1040];
pPerIOData->wBuf.len = 1024;
pPerIOData->OpCode = OP_RECV;
DWORD dwBytes = 0;
DWORD dwFlags = 0;
WSARecv(pPerHandleData->So ck, &(pPerIOData->wBuf), 1, &dwBytes, &dwFlags, &pPerIOData->Overlapped, NULL);
}
else if(pPerIOData->OpCode == OP_SEND)
{
if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)
{
CHAR *lpszTmpBuf = new CHAR[(pPerIOData->wBuf.len -dwBytesTr ansfered) + 2];
for(int i = 0; i < (pPerIOData->wBuf.len-dwBy tesTransfe red); i++)
{
lpszTmpBuf[i] = (pPerIOData->wBuf.buf+dwBy tesTransfe red)[i];
}
delete [] pPerIOData->wBuf.buf;
pPerIOData->wBuf.buf = lpszTmpBuf;
pPerIOData->wBuf.len = (pPerIOData->wBuf.len-dwBy tesTransfe red);
ZeroMemory(&pPerIOData->Ov erlapped, sizeof(OVERLAPPED));
DWORD dwBytes = 0;
DWORD dwFlags = 0;
WSASend(pPerHandleData->So ck, &(pPerIOData->wBuf), 1, &dwBytes, dwFlags, &pPerIOData->Overlapped, NULL);
}
else
{
if(pPerIOData != NULL)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
pPerIOData->wBuf.buf = NULL;
}
ZeroMemory(&pPerIOData->Ov erlapped, sizeof(OVERLAPPED));
delete pPerIOData; // <-----Exception thrown here!
pPerIOData = NULL;
}
}
}
}
return 0;
}
Here is the PER_IO_DATA structure:
typedef struct _PER_IO_DATA
{
OVERLAPPED Overlapped;
int OpCode; // OP_RECV or OP_SEND
WSABUF wBuf; // Contains the data just recieved
DWORD dwBytes, dwFlags;
char *lpszBuf; // Stores the entire message
ULONG nBufSize; // Stores the size of the entire message
} PER_IO_DATA;
Here is an example call to WSASend (all calls to this within the program look exactly like this):
PER_IO_DATA *pPerIOData = NULL;
pPerIOData = new PER_IO_DATA;
ZeroMemory(&pPerIOData->Ov erlapped, sizeof(OVERLAPPED));
pPerIOData->wBuf.len = GetCompletePacket(pPerIODa ta->wBuf.b uf);
pPerIOData->OpCode = OP_SEND;
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
pPerIOData->dwBytes = 0;
pPerIOData->dwFlags = 0;
WSASend(pPerHandleData->So ck, &pPerIOData->wBuf, 1, &pPerIOData->dwBytes, pPerIOData->dwFlags, &pPerIOData->Overlapped, NULL);
Like I said, this server will run for about an hour with ~45 active connections in debug mode before it breaks. Any help at all is greatly appreciated!
Thanks,
Brandon
My problem is within an IO Completion Ports server that has worker threads waiting on GetQueuedCompletionStatus(
UINT CClientCommThread::IOCompl
{
IOCPTHREAD *pIOCPT = (IOCPTHREAD*)pParam;
HANDLE hCompletionPort = pIOCPT->hCompletionPort;
DWORD nThreadID = pIOCPT->nThreadID;
DWORD dwBytesTransfered;
OVERLAPPED *pOverlapped;
PER_HANDLE_DATA *pPerHandleData;
PER_IO_DATA *pPerIOData;
DWORD dwSendBytes, dwRecvBytes;
DWORD dwFlags;
delete pIOCPT;
while(TRUE)
{
pPerHandleData = NULL;
pOverlapped = NULL;
pPerIOData = NULL;
BOOL ret = GetQueuedCompletionStatus(
if(ret == FALSE || pOverlapped == NULL)
{
DWORD dwError = GetLastError();
if((dwError == ERROR_NETNAME_DELETED) || (dwError == ERROR_IO_INCOMPLETE))
{
if(pPerHandleData)
{
::PostThreadMessage(nThrea
}
}
else
{
if(pPerHandleData)
{
::PostThreadMessage(nThrea
}
CString tmp;
tmp.Format("UINT CClientCommThread::IOCompl
char *lpszError = new char[tmp.GetLength() + 2];
strcpy(lpszError, tmp);
::PostThreadMessage(nThrea
}
if(pOverlapped != NULL)
{
pPerIOData = CONTAINING_RECORD(pOverlap
if(pPerIOData != NULL)
{
if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
delete pPerIOData;
}
}
}
continue;
}
if(pPerHandleData == 0)
{
return 0;
}
pPerIOData = CONTAINING_RECORD(pOverlap
if(dwBytesTransfered == 0)
{
// 0 Bytes on a read or write means the socket was closed by the client.
shutdown(pPerHandleData->S
closesocket(pPerHandleData
::PostThreadMessage(nThrea
if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
delete pPerIOData;
}
continue;
}
if(pPerIOData->OpCode == OP_RECV)
{
// Copy the new recieved data to the buffer...
if(pPerIOData->nBufSize)
{
char *lpszTmp = new char[pPerIOData->nBufSize+
for(long i = 0; i < pPerIOData->nBufSize; i++)
lpszTmp[i] = pPerIOData->lpszBuf[i];
delete [] pPerIOData->lpszBuf;
pPerIOData->lpszBuf = lpszTmp;
}
else
pPerIOData->lpszBuf = new char[dwBytesTransfered];
for(int i = 0; i < dwBytesTransfered; i++)
{
(pPerIOData->lpszBuf+pPerI
}
pPerIOData->nBufSize += dwBytesTransfered;
// Check to see if this CUCP packet is complete. A complete CUCP packet has 8 zero's trailing.
bool bZero = false;
if(pPerIOData->nBufSize >= 8)
{
bZero = true;
for(int i = pPerIOData->nBufSize-8; i < pPerIOData->nBufSize; i++)
if(pPerIOData->lpszBuf[i] != 0)
bZero = false;
}
if(bZero)
{
pPerIOData->nBufSize -= 8;
UINT nRes = ProcessRecievedPacket(pPer
if(pPerIOData)
{
if(pPerIOData->lpszBuf != NULL)
{
delete [] pPerIOData->lpszBuf;
}
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
}
}
// Reset the per io data structure for reuse...
ZeroMemory(&pPerIOData->Ov
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
}
pPerIOData->wBuf.buf = new char[1040];
pPerIOData->wBuf.len = 1024;
pPerIOData->OpCode = OP_RECV;
DWORD dwBytes = 0;
DWORD dwFlags = 0;
WSARecv(pPerHandleData->So
}
else if(pPerIOData->OpCode == OP_SEND)
{
if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)
{
CHAR *lpszTmpBuf = new CHAR[(pPerIOData->wBuf.len
for(int i = 0; i < (pPerIOData->wBuf.len-dwBy
{
lpszTmpBuf[i] = (pPerIOData->wBuf.buf+dwBy
}
delete [] pPerIOData->wBuf.buf;
pPerIOData->wBuf.buf = lpszTmpBuf;
pPerIOData->wBuf.len = (pPerIOData->wBuf.len-dwBy
ZeroMemory(&pPerIOData->Ov
DWORD dwBytes = 0;
DWORD dwFlags = 0;
WSASend(pPerHandleData->So
}
else
{
if(pPerIOData != NULL)
{
if(pPerIOData->wBuf.buf != NULL)
{
delete [] pPerIOData->wBuf.buf;
pPerIOData->wBuf.buf = NULL;
}
ZeroMemory(&pPerIOData->Ov
delete pPerIOData; // <-----Exception thrown here!
pPerIOData = NULL;
}
}
}
}
return 0;
}
Here is the PER_IO_DATA structure:
typedef struct _PER_IO_DATA
{
OVERLAPPED Overlapped;
int OpCode; // OP_RECV or OP_SEND
WSABUF wBuf; // Contains the data just recieved
DWORD dwBytes, dwFlags;
char *lpszBuf; // Stores the entire message
ULONG nBufSize; // Stores the size of the entire message
} PER_IO_DATA;
Here is an example call to WSASend (all calls to this within the program look exactly like this):
PER_IO_DATA *pPerIOData = NULL;
pPerIOData = new PER_IO_DATA;
ZeroMemory(&pPerIOData->Ov
pPerIOData->wBuf.len = GetCompletePacket(pPerIODa
pPerIOData->OpCode = OP_SEND;
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
pPerIOData->dwBytes = 0;
pPerIOData->dwFlags = 0;
WSASend(pPerHandleData->So
Like I said, this server will run for about an hour with ~45 active connections in debug mode before it breaks. Any help at all is greatly appreciated!
Thanks,
Brandon
ASKER
Hey Alex,
Thanks so much for a reply, I was afraid no one would!
The number of concurrent threads is 1 per processor (2 total) and same with the total created threads (which I was hoping to step up to at least 4 once the code is solid).
The reason I am using IOCP is because I need this server to be able to handle as many connections a possible. I am very well versed with using the event model and have never had problems, but for this server application I need it to be able to handle thousands of simultanious conections with no problem and I have read that nothing beats the IOCP model. The current users are simply beta testers.
As for this statement: if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL), some times the WSASend will only send a portion of the buffer supplied, so it is always important to check to make sure it sent all of the buffer. If the dwBytesTransfered is less than the buffer length, then I think you should attempt to send the rest. That's what I'm doing in that part of the code: if the operation was a send that was completed, then check if it sent the entire packet, if it did then just delete the buffer (that's the "else"), if it didn't then send the remaining bytes (that's the first "if(dwBytesTransfered < pPerIOData->....").
Maybe I'm not following what you are questioning, if so then please clairify, or really just shoot anything at me that doesn't make sense to you.... It really helps me for people to question what I am doing so that I can reafirm or by thinking about it catch a problem. But yeah, another set of eyes always helps, thanks again!
Brandon
Thanks so much for a reply, I was afraid no one would!
The number of concurrent threads is 1 per processor (2 total) and same with the total created threads (which I was hoping to step up to at least 4 once the code is solid).
The reason I am using IOCP is because I need this server to be able to handle as many connections a possible. I am very well versed with using the event model and have never had problems, but for this server application I need it to be able to handle thousands of simultanious conections with no problem and I have read that nothing beats the IOCP model. The current users are simply beta testers.
As for this statement: if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL), some times the WSASend will only send a portion of the buffer supplied, so it is always important to check to make sure it sent all of the buffer. If the dwBytesTransfered is less than the buffer length, then I think you should attempt to send the rest. That's what I'm doing in that part of the code: if the operation was a send that was completed, then check if it sent the entire packet, if it did then just delete the buffer (that's the "else"), if it didn't then send the remaining bytes (that's the first "if(dwBytesTransfered < pPerIOData->....").
Maybe I'm not following what you are questioning, if so then please clairify, or really just shoot anything at me that doesn't make sense to you.... It really helps me for people to question what I am doing so that I can reafirm or by thinking about it catch a problem. But yeah, another set of eyes always helps, thanks again!
Brandon
ASKER
Wait, are you saying that if the dwBytesTransfered is less than the total buffer length to be sent that Winsock will automatically try to send the remaining buffer?
>>>> are you saying that if the dwBytesTransfered is less than the total >>>> buffer length to be sent that Winsock will automatically try to send the remaining buffer?
Actually, you have no guarantee that WinSock will send a buffer in total. It will do that - normally on a LAN - but in case it has to rerequest for lost buffers it can cut any message at any point. Normally the IoCompletion mechanism will wait for 'completion' in these cases but there are timeouts involved there to keep the pump running, so I wouldn't bet on that only full messages were transferred.
>>>> I need it to be able to handle thousands of simultanious conections with no problem
I did something similar (ok it was only some 100 connections) by using a thread-pool of 64 permanently running threads managed by a job queue. I had very good experiences with that and I hardly can think that a system relying on a maximum of 4 threads is faster.
To make the above code more stable you might consider not to delete the 'parent' struct pPerIOData but only the transfer buffer. You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.
Regards, Alex
Actually, you have no guarantee that WinSock will send a buffer in total. It will do that - normally on a LAN - but in case it has to rerequest for lost buffers it can cut any message at any point. Normally the IoCompletion mechanism will wait for 'completion' in these cases but there are timeouts involved there to keep the pump running, so I wouldn't bet on that only full messages were transferred.
>>>> I need it to be able to handle thousands of simultanious conections with no problem
I did something similar (ok it was only some 100 connections) by using a thread-pool of 64 permanently running threads managed by a job queue. I had very good experiences with that and I hardly can think that a system relying on a maximum of 4 threads is faster.
To make the above code more stable you might consider not to delete the 'parent' struct pPerIOData but only the transfer buffer. You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.
Regards, Alex
ASKER
>>You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.
Hmm... The problem with that is that this is not a simple request/response setup. There are events that occur on the server triggered by different clients, which are then updated to all other clients via WSASend. As you can see if I only have one PER_IO_DATA, then I run into the chance that it is already in use.
Is there any way to check if the OVERLAPPED pointer returned by GetQueuedCompletionStatus is really valid other than just checking if it's not NULL?
Can you figure any reason why the code errors on "delete pPerIOData;" and not on the line above: "delete [] pPerIOData->wBuf.buf;", or really on any other area of the code where it dereferences pPerIOData? Shouldn't it error out when pPerIOData is used at all?? I read that this could most definatly be caused by the corruption of the heap. If so, then why is it in the same place every time?
Anyway, I understand you probably think that IOCP is overkill and as I have found: really complex. But again, thanks for your help as I would really like to make this work and not give up on it. If you can at least help with the questions above maybe I can get somewhere with just that.
Thanks again Alex,
Brandon
Hmm... The problem with that is that this is not a simple request/response setup. There are events that occur on the server triggered by different clients, which are then updated to all other clients via WSASend. As you can see if I only have one PER_IO_DATA, then I run into the chance that it is already in use.
Is there any way to check if the OVERLAPPED pointer returned by GetQueuedCompletionStatus is really valid other than just checking if it's not NULL?
Can you figure any reason why the code errors on "delete pPerIOData;" and not on the line above: "delete [] pPerIOData->wBuf.buf;", or really on any other area of the code where it dereferences pPerIOData? Shouldn't it error out when pPerIOData is used at all?? I read that this could most definatly be caused by the corruption of the heap. If so, then why is it in the same place every time?
Anyway, I understand you probably think that IOCP is overkill and as I have found: really complex. But again, thanks for your help as I would really like to make this work and not give up on it. If you can at least help with the questions above maybe I can get somewhere with just that.
Thanks again Alex,
Brandon
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Hey Alex, I've given it a long run with taking out every statement that deletes the PER_IO_DATA structure except for where dwBytesTransfered is equal to the size of the buffer (only one delete in the program). This did not help though. It's still erroring. So after giving some thought to it and really reading up on WSASend and WSARecv, I'm pretty sure I figured out what the problem is. The calls that I have to WSASend are in the main parent thread. The calls I have to WSARecv are in the worker threads. Because winsock is creating it's own buffers to copy the send/recv buffer data from the paged memory it seems that I might be corrupting it by making these calls on the same socket from different threads.
Also I like your idea about the pPerIoData->deleter... That's real clever. Though it is more of a hack than a real solution, still it is clever!
I'm almost positive the problem is in the WSASend/WSARecv and will attempt to solve this by making all calls from the parent thread. I will let you know if this solves it.
Thanks again for your help on this solution, I will award the points to you once this is fixed, because of your effort that no one else could make and also you were able to get me thinking. Your last post really helped with your description about how invalid memory would work or even what it would look like if the PER_IO_DATA was already deleted.
Thanks again Alex, you've really been a great help!
Brandon
Also I like your idea about the pPerIoData->deleter... That's real clever. Though it is more of a hack than a real solution, still it is clever!
I'm almost positive the problem is in the WSASend/WSARecv and will attempt to solve this by making all calls from the parent thread. I will let you know if this solves it.
Thanks again for your help on this solution, I will award the points to you once this is fixed, because of your effort that no one else could make and also you were able to get me thinking. Your last post really helped with your description about how invalid memory would work or even what it would look like if the PER_IO_DATA was already deleted.
Thanks again Alex, you've really been a great help!
Brandon
ASKER
Alex, you won't believe the answer to this problem.... One of the WSASends allocated the PER_IO_DATA structure using "GlobalAlloc" instead of "new"! I just found this and I am dieing laughing... I've spent 4 weeks trying to figure this out.
Anyway, another valuable lesson learned. Thanks again Alex,
Brandon
Anyway, another valuable lesson learned. Thanks again Alex,
Brandon
Is there any need for using overlapped I/O?
>>>> if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)
Is that if statement well-defined? Why is dwBytesTransfered less than pPerIOData->wBuf.len in good case? Can you think of a case where it fails? E. g. if the pPerIOData->wBuf.len was greater than the maximum message size.
If the statement fails because the dwBytesTransfered is equal to pPerIOData->wBuf.len, then another thread waiting on the port may get the rest of the message. But the current thread has deleted the pPerIOData.
Regards, Alex