Want to protect your cyber security and still get fast solutions? Ask a secure question today.Go Premium

x
  • Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 246
  • Last Modified:

How to duplicate values in memory fast?

Dear all,

This is my code:

//Duplicate points
__asm  {
      mov      esi, offsetCount
      mov      edx, memLocation1
      mov      eax, memLocation2

      mov ecx,DWORD PTR[eax+esi*4]  //last value1
      mov edi,DWORD PTR[edx+esi*4]  //last value2
     
               //duplicate the last values 3 times
      mov DWORD PTR[eax+esi*4+4], ecx
                mov DWORD PTR[eax+esi*4+8], ecx
      mov DWORD PTR[eax+esi*4+12], ecx
      mov DWORD PTR[edx+esi*4+4], edi
      mov DWORD PTR[edx+esi*4+8], edi
      mov DWORD PTR[edx+esi*4+12], edi

      add      esi, 3
      shr      esi, 2 //make divisable by 4
      mov      offsetCount, esi
}

THis code seems to be a bit slow. Is there a better way of doing it?

thank you.
0
hengck23
Asked:
hengck23
  • 2
  • 2
1 Solution
 
grg99Commented:
That's about as fast as you can store into memory.  Why do you think it's slow?

0
 
_Katka_Commented:
Hi, how about:

Case A:

__asm {
     mov ecx, 1 // transfer size divided by 4 in this case SizeOf(DWORD)=4 div 1=1
     mov ebx, offsetCount // original offset
     shl ebx,2 // original offset multiplied by 4
     mov eax, ebx // store original offset
     add ebx, memLocation1 // shift to [memLocation1+Offset]
     mov esi, ebx // setup source offset to [memLocation1+Offset]
     inc ebx, 4 // shift to [memLocation1+Offset+4]
     mov edi, ebx // setup destiny offset to [memLocation1+Offset+4]
     movsd // 1st duplicate of 1st value
     inc edi,4 // shift destiny offset to [memLocation1+Offset+8]
     movsd // 2nd duplicate of 1st value
     inc edi,4 // shift destiny offset to [memLocation1+Offset+12]
     movsd // 3rd duplicate of 1st value
     mov ebx, eax // restore original offset
     add ebx, memLocation2 // shift to [memLocation2+Offset]
     mov esi, ebx // setup source offset to [memLocation2+Offset]
     inc ebx, 4 // shift to [memLocation2+Offset+4]
     mov edi, ebx // setup destiny offset to [memLocation2+Offset+4]
     movsd // 1st duplicate of 2nd value
     inc edi, 4 // shift destiny offset to [memLocation2+Offset+8]
     movsd // 2nd duplicate of 2nd value
     inc edi, 4 // shift destiny offset to [memLocation2+Offset+12]
     movsd // 3rd duplicate of 2nd value
     shr eax, 2 // restore original offsetCount
     inc eax, 3 // add 3 offset steps in advance
     mov offsetCount, eax // store to offsetCount
}

or without comments:

__asm {
     mov ecx, 1
     mov ebx, offsetCount
     shl ebx,2
     mov eax, ebx
     add ebx, memLocation1
     mov esi, ebx
     inc ebx, 4
     mov edi, ebx
     movsd
     inc edi,4
     movsd
     inc edi,4
     movsd
     mov ebx, eax
     add ebx, memLocation2
     mov esi, ebx
     inc ebx, 4
     mov edi, ebx
     movsd
     inc edi, 4
     movsd
     inc edi, 4
     movsd
     shr eax, 2
     inc eax, 3
     mov offsetCount, eax
}

Case B (if you insist or it's fast to use your speed-up solution):

__asm  {
     mov esi, offsetCount // original offsetCount
     mov edi, esi // store original offsetCount
     shl esi, 2 // shift offset so it wouldn't be done later multiple times
     mov eax, memLocation1 // setup source offset to [memLocation1]
     inc eax, esi // shift source offset to [memLocation1+Offset]
     mov ebx, memLocation2 // setup destiny offset to [memLocation2]
     inc ebx, esi // shift destiny offset to [memLocation2+Offset]

     mov ecx,DWORD PTR[eax]  // load last value1
     mov edx,DWORD PTR[ebx]  // load last value2
     
     // 1st duplicate
     inc eax, 4
     inc ebx, 4
     mov DWORD PTR[eax], ecx
     mov DWORD PTR[ebx], edx
   
     // 2nd duplicate
     inc eax, 4
     inc ebx, 4
     mov DWORD PTR[eax], ecx
     mov DWORD PTR[ebx], edx

     // 3rd duplicate
     inc eax, 4
     inc ebx, 4
     mov DWORD PTR[eax], ecx
     mov DWORD PTR[ebx], edx

     inc edi, 3 // increase original offset by 3
     mov offsetCount, edi // store new offsetCount
}

I hope at least on of the solutions was faster :)

regards,
Kate
0
 
grg99Commented:
Most PC's nowadays have CPU's that are far faster than memory, so it usually doesnt matter what memory fill code you use, you're limited by the CPU to memory bandwidth.  Even with a 533MHz memory bus.

0
 
_Katka_Commented:
Anyways Case B I posted has potential to be about 40% faster then original code :)
Mainly because of faster execution on CPU :))

regards,
Kate
0
 
mbizupCommented:
No comment has been added to this question in more than 21 days, so it is now classified as abandoned.

I will leave the following recommendation for this question in the Cleanup topic area:
    Accept: _Katka_ {http:#13052527}

Any objections should be posted here in the next 4 days. After that time, the question will be closed.

mbizup
EE Cleanup Volunteer
0

Featured Post

Free Tool: Path Explorer

An intuitive utility to help find the CSS path to UI elements on a webpage. These paths are used frequently in a variety of front-end development and QA automation tasks.

One of a set of tools we're offering as a way of saying thank you for being a part of the community.

  • 2
  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now