• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 2075
  • Last Modified:

Winsock IOCP WSASend Heap Memory Problem

Hello,

My problem is within an IO Completion Ports server that has worker threads waiting on GetQueuedCompletionStatus(). The server is breaking when I attempt to delete pPerIOData which is a structure containing the OVERLAPPED and WSABUF types. This does not happen all the time. Please take a look at the following worker thread:

UINT CClientCommThread::IOCompletionThread(LPVOID pParam)
{
      IOCPTHREAD *pIOCPT = (IOCPTHREAD*)pParam;
      HANDLE hCompletionPort = pIOCPT->hCompletionPort;
      DWORD nThreadID = pIOCPT->nThreadID;
      DWORD dwBytesTransfered;
      OVERLAPPED *pOverlapped;
      PER_HANDLE_DATA *pPerHandleData;
      PER_IO_DATA *pPerIOData;
      DWORD dwSendBytes, dwRecvBytes;
      DWORD dwFlags;

      delete pIOCPT;

      while(TRUE)
      {
            pPerHandleData = NULL;
            pOverlapped = NULL;
            pPerIOData = NULL;
            BOOL ret = GetQueuedCompletionStatus(hCompletionPort, &dwBytesTransfered, (PULONG_PTR)&pPerHandleData, &pOverlapped, INFINITE);


            if(ret == FALSE || pOverlapped == NULL)
            {
                  DWORD dwError = GetLastError();
                  if((dwError == ERROR_NETNAME_DELETED) || (dwError == ERROR_IO_INCOMPLETE))
                  {
                        if(pPerHandleData)
                        {
                              ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                        }
                  }
                  else
                  {
                        if(pPerHandleData)
                        {
                              ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                        }

                        CString tmp;
                        tmp.Format("UINT CClientCommThread::IOCompletionThread() -> GetQueuedCompletionStatus() Failed (GetLastError()=%d)!", dwError);
                        char *lpszError = new char[tmp.GetLength() + 2];
                        strcpy(lpszError, tmp);
                        ::PostThreadMessage(nThreadID, UWM_LOGERROR, NULL, (LPARAM)lpszError);
                  }

                  if(pOverlapped != NULL)
                  {
                        pPerIOData = CONTAINING_RECORD(pOverlapped, PER_IO_DATA, Overlapped);

                        if(pPerIOData != NULL)
                        {
                              if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
                              {
                                    if(pPerIOData->wBuf.buf != NULL)
                                    {
                                          delete [] pPerIOData->wBuf.buf;
                                    }
                                    if(pPerIOData->lpszBuf != NULL)
                                    {
                                          delete [] pPerIOData->lpszBuf;
                                    }
                                    delete pPerIOData;
                              }
                        }
                  }

                  continue;
            }

            if(pPerHandleData == 0)
            {
                  return 0;
            }

            pPerIOData = CONTAINING_RECORD(pOverlapped, PER_IO_DATA, Overlapped);

            if(dwBytesTransfered == 0)
            {
                  // 0 Bytes on a read or write means the socket was closed by the client.
                  shutdown(pPerHandleData->Sock, SD_BOTH);
                  closesocket(pPerHandleData->Sock);
                  ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                  
                  if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
                  {
                        if(pPerIOData->wBuf.buf != NULL)
                        {
                              delete [] pPerIOData->wBuf.buf;
                        }
                        if(pPerIOData->lpszBuf != NULL)
                        {
                              delete [] pPerIOData->lpszBuf;
                        }
                        delete pPerIOData;
                  }
                  continue;
            }

            if(pPerIOData->OpCode == OP_RECV)
            {
                  // Copy the new recieved data to the buffer...
                  if(pPerIOData->nBufSize)
                  {
                        char *lpszTmp = new char[pPerIOData->nBufSize+dwBytesTransfered+2];
                        for(long i = 0; i < pPerIOData->nBufSize; i++)
                              lpszTmp[i] = pPerIOData->lpszBuf[i];
                        delete [] pPerIOData->lpszBuf;
                        pPerIOData->lpszBuf = lpszTmp;
                  }
                  else
                        pPerIOData->lpszBuf = new char[dwBytesTransfered];

                  for(int i = 0; i < dwBytesTransfered; i++)
                  {
                        (pPerIOData->lpszBuf+pPerIOData->nBufSize)[i] = pPerIOData->wBuf.buf[i];
                  }

                  pPerIOData->nBufSize += dwBytesTransfered;

                  // Check to see if this CUCP packet is complete. A complete CUCP packet has 8 zero's trailing.
                  bool bZero = false;
                  if(pPerIOData->nBufSize >= 8)
                  {
                        bZero = true;
                        for(int i = pPerIOData->nBufSize-8; i < pPerIOData->nBufSize; i++)
                              if(pPerIOData->lpszBuf[i] != 0)
                                    bZero = false;
                  }

                  if(bZero)
                  {
                        pPerIOData->nBufSize -= 8;

                        UINT nRes = ProcessRecievedPacket(pPerHandleData, pPerIOData->lpszBuf, pPerIOData->nBufSize, nThreadID);

                        if(pPerIOData)
                        {
                              if(pPerIOData->lpszBuf != NULL)
                              {
                                    delete [] pPerIOData->lpszBuf;
                              }
                              pPerIOData->lpszBuf = NULL;
                              pPerIOData->nBufSize = 0;
                        }
                  }

                  // Reset the per io data structure for reuse...
                  ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));
                  if(pPerIOData->wBuf.buf != NULL)
                  {
                        delete [] pPerIOData->wBuf.buf;
                  }
                  pPerIOData->wBuf.buf = new char[1040];
                  pPerIOData->wBuf.len = 1024;
                  pPerIOData->OpCode = OP_RECV;
                  DWORD dwBytes = 0;
                  DWORD dwFlags = 0;
                  WSARecv(pPerHandleData->Sock, &(pPerIOData->wBuf), 1, &dwBytes, &dwFlags, &pPerIOData->Overlapped, NULL);
            }
            else if(pPerIOData->OpCode == OP_SEND)
            {
                  if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)
                  {
                        CHAR *lpszTmpBuf = new CHAR[(pPerIOData->wBuf.len-dwBytesTransfered) + 2];

                        for(int i = 0; i < (pPerIOData->wBuf.len-dwBytesTransfered); i++)
                        {
                              lpszTmpBuf[i] = (pPerIOData->wBuf.buf+dwBytesTransfered)[i];
                        }

                        delete [] pPerIOData->wBuf.buf;

                        pPerIOData->wBuf.buf = lpszTmpBuf;
                        pPerIOData->wBuf.len = (pPerIOData->wBuf.len-dwBytesTransfered);

                        ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));

                        DWORD dwBytes = 0;
                        DWORD dwFlags = 0;
                        WSASend(pPerHandleData->Sock, &(pPerIOData->wBuf), 1, &dwBytes, dwFlags, &pPerIOData->Overlapped, NULL);
                  }
                  else
                  {
                        if(pPerIOData != NULL)
                        {
                              if(pPerIOData->wBuf.buf != NULL)
                              {
                                    delete [] pPerIOData->wBuf.buf;
                                    pPerIOData->wBuf.buf = NULL;
                              }

                              ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));

                              delete pPerIOData; // <-----Exception thrown here!
                              pPerIOData = NULL;
                        }
                  }
            }
      }
      return 0;
}


Here is the PER_IO_DATA structure:

typedef struct _PER_IO_DATA
{
      OVERLAPPED Overlapped;
      int OpCode; // OP_RECV or OP_SEND
      WSABUF wBuf; // Contains the data just recieved
      DWORD dwBytes, dwFlags;
      char *lpszBuf; // Stores the entire message
      ULONG nBufSize; // Stores the size of the entire message
} PER_IO_DATA;



Here is an example call to WSASend (all calls to this within the program look exactly like this):

PER_IO_DATA *pPerIOData = NULL;
pPerIOData = new PER_IO_DATA;
ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));
pPerIOData->wBuf.len = GetCompletePacket(pPerIOData->wBuf.buf);
pPerIOData->OpCode = OP_SEND;
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
pPerIOData->dwBytes = 0;
pPerIOData->dwFlags = 0;
WSASend(pPerHandleData->Sock, &pPerIOData->wBuf, 1, &pPerIOData->dwBytes, pPerIOData->dwFlags, &pPerIOData->Overlapped, NULL);


Like I said, this server will run for about an hour with ~45 active connections in debug mode before it breaks. Any help at all is greatly appreciated!

Thanks,
Brandon
0
bdunz19
Asked:
bdunz19
  • 5
  • 3
1 Solution
 
itsmeandnobodyelseCommented:
How many threads are waiting on one completion port?

Is there any need for using overlapped I/O?

>>>> if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)

Is that if  statement well-defined? Why is dwBytesTransfered less than pPerIOData->wBuf.len in good case? Can you think of a case where it fails? E. g. if the pPerIOData->wBuf.len was greater than the maximum message size.

If the statement fails because the dwBytesTransfered is equal to pPerIOData->wBuf.len, then another thread waiting on the port may get the rest of the message. But the current thread has deleted the pPerIOData.

Regards, Alex
0
 
bdunz19Author Commented:
Hey Alex,

Thanks so much for a reply, I was afraid no one would!

The number of concurrent threads is 1 per processor (2 total) and same with the total created threads (which I was hoping to step up to at least 4 once the code is solid).

The reason I am using IOCP is because I need this server to be able to handle as many connections a possible. I am very well versed with using the event model and have never had problems, but for this server application I need it to be able to handle thousands of simultanious conections with no problem and I have read that nothing beats the IOCP model. The current users are simply beta testers.

As for this statement: if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL), some times the WSASend will only send a portion of the buffer supplied, so it is always important to check to make sure it sent all of the buffer. If the dwBytesTransfered is less than the buffer length, then I think you should attempt to send the rest. That's what I'm doing in that part of the code: if the operation was a send that was completed, then check if it sent the entire packet, if it did then just delete the buffer (that's the "else"), if it didn't then send the remaining bytes (that's the first "if(dwBytesTransfered < pPerIOData->....").

Maybe I'm not following what you are questioning, if so then please clairify, or really just shoot anything at me that doesn't make sense to you.... It really helps me for people to question what I am doing so that I can reafirm or by thinking about it catch a problem. But yeah, another set of eyes always helps, thanks again!

Brandon
0
 
bdunz19Author Commented:
Wait, are you saying that if the dwBytesTransfered is less than the total buffer length to be sent that Winsock will automatically try to send the remaining buffer?
0
Technology Partners: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

 
itsmeandnobodyelseCommented:
>>>> are you saying that if the dwBytesTransfered is less than the total >>>> buffer length to be sent that Winsock will automatically try to send the remaining buffer?
Actually, you have no guarantee that WinSock will send a buffer in total. It will do that - normally on a LAN - but in case it has to rerequest for lost buffers it can cut any message at any point. Normally the IoCompletion mechanism will wait for 'completion' in these cases but there are timeouts involved there to keep the pump running, so I wouldn't bet on that only full messages were transferred.

>>>> I need it to be able to handle thousands of simultanious conections with no problem

I did something similar (ok it was only some 100 connections) by using a thread-pool of 64 permanently running threads managed by a job queue. I had very good experiences with that and I hardly can think that a system relying on a maximum of 4 threads is faster.

To make the above code more stable you might consider not to delete the 'parent' struct pPerIOData but only the transfer buffer. You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.

Regards, Alex
0
 
bdunz19Author Commented:
>>You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.

Hmm... The problem with that is that this is not a simple request/response setup. There are events that occur on the server triggered by different clients, which are then updated to all other clients via WSASend. As you can see if I only have one PER_IO_DATA, then I run into the chance that it is already in use.

Is there any way to check if the OVERLAPPED pointer returned by GetQueuedCompletionStatus is really valid other than just checking if it's not NULL?

Can you figure any reason why the code errors on "delete pPerIOData;" and not on the line above: "delete [] pPerIOData->wBuf.buf;", or really on any other area of the code where it dereferences pPerIOData? Shouldn't it error out when pPerIOData is used at all?? I read that this could most definatly be caused by the corruption of the heap. If so, then why is it in the same place every time?

Anyway, I understand you probably think that IOCP is overkill and as I have found: really complex. But again, thanks for your help as I would really like to make this work and not give up on it. If you can at least help with the questions above maybe I can get somewhere with just that.

Thanks again Alex,
Brandon
0
 
itsmeandnobodyelseCommented:
>>>> Is there any way to check if the OVERLAPPED pointer returned by
>>>> GetQueuedCompletionStatus is really valid other than just checking
>>>> if it's not NULL?
Hmmm. I've seen code from MS where they tested a pointer by calling a function within a try block. Then, they caught the exception ("access violation") and knew the pointer was not valid. But it really looked not very convincing ...   You could put the wrong 'delete' into a try block waht would help to not crash but doesn't help to find out what goes wrong.

>>>>delete pPerIOData; // <-----Exception thrown here!

Ok, let's start from the begin. If here an exception was thrown, there were only two choices: first, it is a invalid pointer value like 0x00000004 or 0xcdcdcdcd. I would say that this is unlikely cause you already tested successfully on 'pPerIOData->OpCode == OP_SEND' and 'pPerIOData->wbuf.buf == NULL' what rarely could be valid in case of a corrupt pointer value. So, the - only - thing left is that the pointer already was deleted. Note, if a pointer was deleted all member data keep valid until the storage was used for other purposes. So, it is a good chance that 'pPerIOData->OpCode == OP_SEND' even if the pointer was deleted.

So, we have to know which part of the prog deleted the pointer when it comes to the exception. It must not be the above code itself. It also can be caused by the function that made the initial WSASend, e. g. if it has an error *after* WSASend or an error with WSASend that nevertheless didn't cancel the queuing of I/O completion. Assume it would delete the PER_IO_DATA  it newly created, and you have the crash. My assumption was - but actually I know too less about I/O completion ports to tell whether that could happen - that with the GetQueuedCompletionStatus you could get only a part of the original message sent and that you would get the remaining message by a further call to GetQueuedCompletionStatus. Then, deleting the parent pointer would make the second call return an invalid OVERLAPPED pointer pointing to already freed storage. There is something that speaks against that: The wording 'completion' port seems to indicate that only 'complete' packages were signaled. And, if the remaining would be catched by another thread it would have to use the same OVERLAPPED struct than the first thread, what rarely can be a good design.  So, my assumption only could be true, if the next call to GetQueuedCompletionStatus would get the pointers back as input  so that the function 'knows' it could safely return the remaining message.

What to do?

I would suggest to add a new member to PER_IO_DATA where you store a unique id in case of a delete.

    try
    {
         if (pPerIoData->deleter != 0)
         {
               // we got it. It already was deleted by pPerIoData->deleter
         }
         pPerIoData->deleter = 123;
         delete pPerIoData;
     }
     catch(...)
     {
         
     }

You would need to use the above sequence at all program points where you make a delete.

Regards, Alex


0
 
bdunz19Author Commented:
Hey Alex, I've given it a long run with taking out every statement that deletes the PER_IO_DATA structure except for where dwBytesTransfered is equal to the size of the buffer (only one delete in the program). This did not help though. It's still erroring. So after giving some thought to it and really reading up on WSASend and WSARecv, I'm pretty sure I figured out what the problem is. The calls that I have to WSASend are in the main parent thread. The calls I have to WSARecv are in the worker threads. Because winsock is creating it's own buffers to copy the send/recv buffer data from the paged memory it seems that I might be corrupting it by making these calls on the same socket from different threads.

Also I like your idea about the pPerIoData->deleter... That's real clever. Though it is more of a hack than a real solution, still it is clever!

I'm almost positive the problem is in the WSASend/WSARecv and will attempt to solve this by making all calls from the parent thread. I will let you know if this solves it.

Thanks again for your help on this solution, I will award the points to you once this is fixed, because of your effort that no one else could make and also you were able to get me thinking. Your last post really helped with your description about how invalid memory would work or even what it would look like if the PER_IO_DATA was already deleted.

Thanks again Alex, you've really been a great help!

Brandon
0
 
bdunz19Author Commented:
Alex, you won't believe the answer to this problem.... One of the WSASends allocated the PER_IO_DATA structure using "GlobalAlloc" instead of "new"! I just found this and I am dieing laughing... I've spent 4 weeks trying to figure this out.

Anyway, another valuable lesson learned. Thanks again Alex,
Brandon
0

Featured Post

Keep up with what's happening at Experts Exchange!

Sign up to receive Decoded, a new monthly digest with product updates, feature release info, continuing education opportunities, and more.

  • 5
  • 3
Tackle projects and never again get stuck behind a technical roadblock.
Join Now