We help IT Professionals succeed at work.

Winsock IOCP WSASend Heap Memory Problem

2,411 Views
Last Modified: 2013-11-13
Hello,

My problem is within an IO Completion Ports server that has worker threads waiting on GetQueuedCompletionStatus(). The server is breaking when I attempt to delete pPerIOData which is a structure containing the OVERLAPPED and WSABUF types. This does not happen all the time. Please take a look at the following worker thread:

UINT CClientCommThread::IOCompletionThread(LPVOID pParam)
{
      IOCPTHREAD *pIOCPT = (IOCPTHREAD*)pParam;
      HANDLE hCompletionPort = pIOCPT->hCompletionPort;
      DWORD nThreadID = pIOCPT->nThreadID;
      DWORD dwBytesTransfered;
      OVERLAPPED *pOverlapped;
      PER_HANDLE_DATA *pPerHandleData;
      PER_IO_DATA *pPerIOData;
      DWORD dwSendBytes, dwRecvBytes;
      DWORD dwFlags;

      delete pIOCPT;

      while(TRUE)
      {
            pPerHandleData = NULL;
            pOverlapped = NULL;
            pPerIOData = NULL;
            BOOL ret = GetQueuedCompletionStatus(hCompletionPort, &dwBytesTransfered, (PULONG_PTR)&pPerHandleData, &pOverlapped, INFINITE);


            if(ret == FALSE || pOverlapped == NULL)
            {
                  DWORD dwError = GetLastError();
                  if((dwError == ERROR_NETNAME_DELETED) || (dwError == ERROR_IO_INCOMPLETE))
                  {
                        if(pPerHandleData)
                        {
                              ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                        }
                  }
                  else
                  {
                        if(pPerHandleData)
                        {
                              ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                        }

                        CString tmp;
                        tmp.Format("UINT CClientCommThread::IOCompletionThread() -> GetQueuedCompletionStatus() Failed (GetLastError()=%d)!", dwError);
                        char *lpszError = new char[tmp.GetLength() + 2];
                        strcpy(lpszError, tmp);
                        ::PostThreadMessage(nThreadID, UWM_LOGERROR, NULL, (LPARAM)lpszError);
                  }

                  if(pOverlapped != NULL)
                  {
                        pPerIOData = CONTAINING_RECORD(pOverlapped, PER_IO_DATA, Overlapped);

                        if(pPerIOData != NULL)
                        {
                              if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
                              {
                                    if(pPerIOData->wBuf.buf != NULL)
                                    {
                                          delete [] pPerIOData->wBuf.buf;
                                    }
                                    if(pPerIOData->lpszBuf != NULL)
                                    {
                                          delete [] pPerIOData->lpszBuf;
                                    }
                                    delete pPerIOData;
                              }
                        }
                  }

                  continue;
            }

            if(pPerHandleData == 0)
            {
                  return 0;
            }

            pPerIOData = CONTAINING_RECORD(pOverlapped, PER_IO_DATA, Overlapped);

            if(dwBytesTransfered == 0)
            {
                  // 0 Bytes on a read or write means the socket was closed by the client.
                  shutdown(pPerHandleData->Sock, SD_BOTH);
                  closesocket(pPerHandleData->Sock);
                  ::PostThreadMessage(nThreadID, UWM_FREEPERHANDLE, NULL, (LPARAM)pPerHandleData);
                  
                  if(pPerIOData->OpCode == OP_RECV || pPerIOData->OpCode == OP_SEND)
                  {
                        if(pPerIOData->wBuf.buf != NULL)
                        {
                              delete [] pPerIOData->wBuf.buf;
                        }
                        if(pPerIOData->lpszBuf != NULL)
                        {
                              delete [] pPerIOData->lpszBuf;
                        }
                        delete pPerIOData;
                  }
                  continue;
            }

            if(pPerIOData->OpCode == OP_RECV)
            {
                  // Copy the new recieved data to the buffer...
                  if(pPerIOData->nBufSize)
                  {
                        char *lpszTmp = new char[pPerIOData->nBufSize+dwBytesTransfered+2];
                        for(long i = 0; i < pPerIOData->nBufSize; i++)
                              lpszTmp[i] = pPerIOData->lpszBuf[i];
                        delete [] pPerIOData->lpszBuf;
                        pPerIOData->lpszBuf = lpszTmp;
                  }
                  else
                        pPerIOData->lpszBuf = new char[dwBytesTransfered];

                  for(int i = 0; i < dwBytesTransfered; i++)
                  {
                        (pPerIOData->lpszBuf+pPerIOData->nBufSize)[i] = pPerIOData->wBuf.buf[i];
                  }

                  pPerIOData->nBufSize += dwBytesTransfered;

                  // Check to see if this CUCP packet is complete. A complete CUCP packet has 8 zero's trailing.
                  bool bZero = false;
                  if(pPerIOData->nBufSize >= 8)
                  {
                        bZero = true;
                        for(int i = pPerIOData->nBufSize-8; i < pPerIOData->nBufSize; i++)
                              if(pPerIOData->lpszBuf[i] != 0)
                                    bZero = false;
                  }

                  if(bZero)
                  {
                        pPerIOData->nBufSize -= 8;

                        UINT nRes = ProcessRecievedPacket(pPerHandleData, pPerIOData->lpszBuf, pPerIOData->nBufSize, nThreadID);

                        if(pPerIOData)
                        {
                              if(pPerIOData->lpszBuf != NULL)
                              {
                                    delete [] pPerIOData->lpszBuf;
                              }
                              pPerIOData->lpszBuf = NULL;
                              pPerIOData->nBufSize = 0;
                        }
                  }

                  // Reset the per io data structure for reuse...
                  ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));
                  if(pPerIOData->wBuf.buf != NULL)
                  {
                        delete [] pPerIOData->wBuf.buf;
                  }
                  pPerIOData->wBuf.buf = new char[1040];
                  pPerIOData->wBuf.len = 1024;
                  pPerIOData->OpCode = OP_RECV;
                  DWORD dwBytes = 0;
                  DWORD dwFlags = 0;
                  WSARecv(pPerHandleData->Sock, &(pPerIOData->wBuf), 1, &dwBytes, &dwFlags, &pPerIOData->Overlapped, NULL);
            }
            else if(pPerIOData->OpCode == OP_SEND)
            {
                  if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)
                  {
                        CHAR *lpszTmpBuf = new CHAR[(pPerIOData->wBuf.len-dwBytesTransfered) + 2];

                        for(int i = 0; i < (pPerIOData->wBuf.len-dwBytesTransfered); i++)
                        {
                              lpszTmpBuf[i] = (pPerIOData->wBuf.buf+dwBytesTransfered)[i];
                        }

                        delete [] pPerIOData->wBuf.buf;

                        pPerIOData->wBuf.buf = lpszTmpBuf;
                        pPerIOData->wBuf.len = (pPerIOData->wBuf.len-dwBytesTransfered);

                        ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));

                        DWORD dwBytes = 0;
                        DWORD dwFlags = 0;
                        WSASend(pPerHandleData->Sock, &(pPerIOData->wBuf), 1, &dwBytes, dwFlags, &pPerIOData->Overlapped, NULL);
                  }
                  else
                  {
                        if(pPerIOData != NULL)
                        {
                              if(pPerIOData->wBuf.buf != NULL)
                              {
                                    delete [] pPerIOData->wBuf.buf;
                                    pPerIOData->wBuf.buf = NULL;
                              }

                              ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));

                              delete pPerIOData; // <-----Exception thrown here!
                              pPerIOData = NULL;
                        }
                  }
            }
      }
      return 0;
}


Here is the PER_IO_DATA structure:

typedef struct _PER_IO_DATA
{
      OVERLAPPED Overlapped;
      int OpCode; // OP_RECV or OP_SEND
      WSABUF wBuf; // Contains the data just recieved
      DWORD dwBytes, dwFlags;
      char *lpszBuf; // Stores the entire message
      ULONG nBufSize; // Stores the size of the entire message
} PER_IO_DATA;



Here is an example call to WSASend (all calls to this within the program look exactly like this):

PER_IO_DATA *pPerIOData = NULL;
pPerIOData = new PER_IO_DATA;
ZeroMemory(&pPerIOData->Overlapped, sizeof(OVERLAPPED));
pPerIOData->wBuf.len = GetCompletePacket(pPerIOData->wBuf.buf);
pPerIOData->OpCode = OP_SEND;
pPerIOData->lpszBuf = NULL;
pPerIOData->nBufSize = 0;
pPerIOData->dwBytes = 0;
pPerIOData->dwFlags = 0;
WSASend(pPerHandleData->Sock, &pPerIOData->wBuf, 1, &pPerIOData->dwBytes, pPerIOData->dwFlags, &pPerIOData->Overlapped, NULL);


Like I said, this server will run for about an hour with ~45 active connections in debug mode before it breaks. Any help at all is greatly appreciated!

Thanks,
Brandon
Comment
Watch Question

How many threads are waiting on one completion port?

Is there any need for using overlapped I/O?

>>>> if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL)

Is that if  statement well-defined? Why is dwBytesTransfered less than pPerIOData->wBuf.len in good case? Can you think of a case where it fails? E. g. if the pPerIOData->wBuf.len was greater than the maximum message size.

If the statement fails because the dwBytesTransfered is equal to pPerIOData->wBuf.len, then another thread waiting on the port may get the rest of the message. But the current thread has deleted the pPerIOData.

Regards, Alex

Author

Commented:
Hey Alex,

Thanks so much for a reply, I was afraid no one would!

The number of concurrent threads is 1 per processor (2 total) and same with the total created threads (which I was hoping to step up to at least 4 once the code is solid).

The reason I am using IOCP is because I need this server to be able to handle as many connections a possible. I am very well versed with using the event model and have never had problems, but for this server application I need it to be able to handle thousands of simultanious conections with no problem and I have read that nothing beats the IOCP model. The current users are simply beta testers.

As for this statement: if(dwBytesTransfered < pPerIOData->wBuf.len && pPerIOData->wBuf.buf != NULL), some times the WSASend will only send a portion of the buffer supplied, so it is always important to check to make sure it sent all of the buffer. If the dwBytesTransfered is less than the buffer length, then I think you should attempt to send the rest. That's what I'm doing in that part of the code: if the operation was a send that was completed, then check if it sent the entire packet, if it did then just delete the buffer (that's the "else"), if it didn't then send the remaining bytes (that's the first "if(dwBytesTransfered < pPerIOData->....").

Maybe I'm not following what you are questioning, if so then please clairify, or really just shoot anything at me that doesn't make sense to you.... It really helps me for people to question what I am doing so that I can reafirm or by thinking about it catch a problem. But yeah, another set of eyes always helps, thanks again!

Brandon

Author

Commented:
Wait, are you saying that if the dwBytesTransfered is less than the total buffer length to be sent that Winsock will automatically try to send the remaining buffer?
>>>> are you saying that if the dwBytesTransfered is less than the total >>>> buffer length to be sent that Winsock will automatically try to send the remaining buffer?
Actually, you have no guarantee that WinSock will send a buffer in total. It will do that - normally on a LAN - but in case it has to rerequest for lost buffers it can cut any message at any point. Normally the IoCompletion mechanism will wait for 'completion' in these cases but there are timeouts involved there to keep the pump running, so I wouldn't bet on that only full messages were transferred.

>>>> I need it to be able to handle thousands of simultanious conections with no problem

I did something similar (ok it was only some 100 connections) by using a thread-pool of 64 permanently running threads managed by a job queue. I had very good experiences with that and I hardly can think that a system relying on a maximum of 4 threads is faster.

To make the above code more stable you might consider not to delete the 'parent' struct pPerIOData but only the transfer buffer. You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.

Regards, Alex

Author

Commented:
>>You might use 1 PER_IO_DATA for any connected socket that was deleted only when the connection was closed.

Hmm... The problem with that is that this is not a simple request/response setup. There are events that occur on the server triggered by different clients, which are then updated to all other clients via WSASend. As you can see if I only have one PER_IO_DATA, then I run into the chance that it is already in use.

Is there any way to check if the OVERLAPPED pointer returned by GetQueuedCompletionStatus is really valid other than just checking if it's not NULL?

Can you figure any reason why the code errors on "delete pPerIOData;" and not on the line above: "delete [] pPerIOData->wBuf.buf;", or really on any other area of the code where it dereferences pPerIOData? Shouldn't it error out when pPerIOData is used at all?? I read that this could most definatly be caused by the corruption of the heap. If so, then why is it in the same place every time?

Anyway, I understand you probably think that IOCP is overkill and as I have found: really complex. But again, thanks for your help as I would really like to make this work and not give up on it. If you can at least help with the questions above maybe I can get somewhere with just that.

Thanks again Alex,
Brandon
Unlock this solution and get a sample of our free trial.
(No credit card required)
UNLOCK SOLUTION

Author

Commented:
Hey Alex, I've given it a long run with taking out every statement that deletes the PER_IO_DATA structure except for where dwBytesTransfered is equal to the size of the buffer (only one delete in the program). This did not help though. It's still erroring. So after giving some thought to it and really reading up on WSASend and WSARecv, I'm pretty sure I figured out what the problem is. The calls that I have to WSASend are in the main parent thread. The calls I have to WSARecv are in the worker threads. Because winsock is creating it's own buffers to copy the send/recv buffer data from the paged memory it seems that I might be corrupting it by making these calls on the same socket from different threads.

Also I like your idea about the pPerIoData->deleter... That's real clever. Though it is more of a hack than a real solution, still it is clever!

I'm almost positive the problem is in the WSASend/WSARecv and will attempt to solve this by making all calls from the parent thread. I will let you know if this solves it.

Thanks again for your help on this solution, I will award the points to you once this is fixed, because of your effort that no one else could make and also you were able to get me thinking. Your last post really helped with your description about how invalid memory would work or even what it would look like if the PER_IO_DATA was already deleted.

Thanks again Alex, you've really been a great help!

Brandon

Author

Commented:
Alex, you won't believe the answer to this problem.... One of the WSASends allocated the PER_IO_DATA structure using "GlobalAlloc" instead of "new"! I just found this and I am dieing laughing... I've spent 4 weeks trying to figure this out.

Anyway, another valuable lesson learned. Thanks again Alex,
Brandon

Gain unlimited access to on-demand training courses with an Experts Exchange subscription.

Get Access
Why Experts Exchange?

Experts Exchange always has the answer, or at the least points me in the correct direction! It is like having another employee that is extremely experienced.

Jim Murphy
Programmer at Smart IT Solutions

When asked, what has been your best career decision?

Deciding to stick with EE.

Mohamed Asif
Technical Department Head

Being involved with EE helped me to grow personally and professionally.

Carl Webster
CTP, Sr Infrastructure Consultant
Empower Your Career
Did You Know?

We've partnered with two important charities to provide clean water and computer science education to those who need it most. READ MORE

Ask ANY Question

Connect with Certified Experts to gain insight and support on specific technology challenges including:

  • Troubleshooting
  • Research
  • Professional Opinions
Unlock the solution to this question.
Thanks for using Experts Exchange.

Please provide your email to receive a sample view!

*This site is protected by reCAPTCHA and the Google Privacy Policy and Terms of Service apply.

OR

Please enter a first name

Please enter a last name

8+ characters (letters, numbers, and a symbol)

By clicking, you agree to the Terms of Use and Privacy Policy.