Invalid instruction run-time error

Sandra-24 asked
Medium Priority
Last Modified: 2008-02-26
In release build only, I get an invalid instruction error on the flagged line:

00401503  push        eax  
00401504  push        offset string "x" (40C128h)
00401509  push        190h
0040150E  push        ebx  
0040150F  call          memmem (401000h)
*00401514  add         esp,10h

memmem being a function written in asm. I can't make head nor tail of this thing. Been working at it for two hours now, just frustrating me. I suspect I might be doing something wrong with the stack, I don't fully understand how it works. Curiosuly, in debug builds memchr produces an error "Access violation reading location 0x00000000", again maybe related to me messing up some of the registers or stack for memchr. Source is as follows:

__declspec(naked) void * __cdecl memmem(const void * buf, size_type count, const void * needle, size_type needle_len)
            ;//Prologue, preserve registers
            push      ebp
            push    ebx  
            push    esi  
            push    edi

            mov            edi, [esp+14h]                  ;// Put buffer in destination register
            mov            ecx, [esp+18h]                  ;// Put count in ecx register
            mov            esi, [esp+1Ch]                  ;// Put needle in source register
            mov            ebx, [esp+20h]                  ;// Put needle length in ebx register            

            test      ebx, ebx                        ;// Is needle empty?
            jz            found                              ;// Return buffer

            xor            edx, edx                        ;//      Clear edx, because we may use the whole thing later (in to_memchr)

            mov            dl, [esi]                        ;// Put first byte of needle into dl (we know it has atleast 1 byte

            sub            ebx, 2                              ;// Subtract two from needle length (since we have two bytes of it in registers already)
            jb            to_memchr                        ;// Needle length is one byte, call memchr instead

            inc            esi
            mov            dh, [esi]                        ;// Put second byte in dh
            inc            esi

            jmp            main_loop                        ;// Needle is greater than or equal to 2 bytes

            cmp            [edi], dl                        ;// Compare first byte of needle with byte of buffer
            je            get_second_byte                  ;// Match, check the rest of needle against the rest of buffer

            dec            ecx                                    ;// Decrement counter (doesn't set carry flag)
            jz            not_found                        ;// Counter went to zero, needle wasn't found (atleast 2 bytes)

            inc            edi                                    ;// Increment buffer pointer by one byte

            jmp            main_loop                        ;// Back to main loop

            dec            ecx                                    ;// Decrement counter (doesn't set carry flag)
            jz            not_found                        ;// Counter went to zero, needle wasn't found (needle is atleast length 2)

            inc            edi                                    ;// Increment buffer pointer
            cmp            [edi], dh                        ;// Check if second byte matches
            je            compare_init                  ;// Match, compare rest of needle against buffer

            jmp            main_loop                        ;// We've fetched the next byte, decremented the counter, just continue on as normal, comparing it with the start of the pattern

            cmp            ecx,ebx                              ;// ebx must be greater than or equal to length of needle - 2
            jb            not_found                        ;// If count is < needle length, needle cannot be in the buffer

            mov            eax, ebx                        ;// Take a new copy of needle length for use as a counter

            push      edi                                    ;// Preserve buffer pointer
            push      esi                                    ;// Preserve needle

            inc            edi

            jmp            compare_loop                  ;// Loop

            pop            esi                                    ;// Restore needle
            pop            edi                                    ;// Restore buffer

            jmp            main_loop                        ;// Continue looking

            test      eax, eax                        ;// Zero, we're compared this byte already, we've found it!
            jz            found

            cmpsb                                          ;// Compare last characters in each
            jne            compare_cleanup                  ;// No match, keep looking

            dec            eax                                    ;// Decrement counter (checked on next iteration for zero)

            jmp            compare_loop                  ;// So far, so good, keep checking the needle against the buffer

            pop            esi                                    ;// Restore needle
            pop            edi                                    ;// Restore buffer

            dec            edi                                    ;// We're scrolled one ahead (by compare second byte)

            mov            eax, edi                        ;// Return current location in buffer
            jmp            finished

            xor            eax, eax                        ;// Return zero
            jmp            finished

            push      ecx                                    ;// Count
            push      edx                                    ;// Push needle (must be passed as an int)
            push      edi                                    ;// Buffer
            call      memchr                              ;// Call memchr(buffer,value,count)

            ;// Epilogue
            pop     edi  
            pop     esi  
            pop     ebx  
            pop     ebp  

Yes Dancie, that's exactly correct. I just realized that now looking over the docs for __cdecl. I had misunderstood the fact that you have to clean-up the stack as the calling function. I think add  esp, 0Ch is probably more effecient than three pop instructions though. Atleast that's what the compiler does after my code. Not very 64bit compatible perhaps.

Anyway you did give the right answer, thank you very much. Just wish it'd been an hour sooner :) If you want more points, I'm going to create a topic about optimizing the above function.

