?
Solved

Reading in two floating point arrays from a C++ driver using a subprogram in assembly.

Posted on 2006-06-05
5
Medium Priority
?
550 Views
Last Modified: 2008-02-01
Hi all, I am in need of a some help with a subprogram that will read in two floating point arrays. These arrays are matricies. The dimensions of the matricies are as follows X = n1 * n2 Y=n2*n3 and z which is going to be the computed matrix is X * Y. The C++ driver program is below. It displays both matricies and their resultant matrix, along with the floating point matrix that the subprogram is supposed to compute.  I have tried to read in the arrays.  From what I have read these arrays are passed by reference pointer or address. So the first argument of the address is the address of an array of addresses.  Each address in the array is the address of a row of the matrix. Well here is the driver.

#include <iostream>

using namespace std;

// asm_matmult prototype
extern "C"
  void asm_matmult(double **X, double **Y, double **Z, int n1, int n2, int n3);

int main()
{
   int i, j;
   const int N1 = 3, N2 = 2, N3 = 4;
   double AI[N1][N2] = { 1, 2, 3, 4, 5, 6 };
   double BI[N2][N3] = { 1, 2, 3, 4, 5, 6, 7, 8 };
   // Result from Matlab
   double CI[N1][N3] = {11, 14, 17, 20, 23, 30, 37, 44, 35, 46, 57, 68 };
   double DI[N1][N3];

   // AI, BI and DI (above) are 2D array types.

   // In C++ it is preferrable to work with pointers to pointers than
   // 2D array types (see Stroustrup C.7.2 & C.7.3 for problems with
   // 2D arrays).
   // A, B, and D are corresponding pointers to arrays of pointers.
   // Each pointer in an array points to a row in the corresponding
   // AI, BI, or DI 2D array.

   double **A = new double*[N1];
   for(i=0; i<N1; i++)
      A[i] = AI[i];

   double **B = new double*[N2];
   for(i=0; i<N2; i++)
      B[i] = BI[i];

   double **D = new double*[N1];
   for(i=0; i<N1; i++)
      D[i] = DI[i];

   cout << "A =\n";
   for (i=0; i<N1; i++) {
      for (j=0; j<N2; j++) {
         // printf("%10.4f    ", A[i][j]);
         cout << A[i][j] << "   ";
      }
      cout << '\n';
   }
   cout << endl;

   cout << "B =\n";
   for (i=0; i<N2; i++) {
      for (j=0; j<N3; j++) {
         cout << B[i][j] << "   ";
      }
      cout << '\n';
   }
   cout << endl;

   cout << "A*B is equal to\n";
   for (i=0; i<N1; i++) {
      for (j=0; j<N3; j++) {
         cout << CI[i][j] << "   ";
      }
      cout << '\n';
   }
   cout << endl;

   asm_matmult(A, B, D, N1, N2, N3);

   cout << "Value computed by asm_matmult() is\n";
   for (i=0; i<N1; i++) {
      for (j=0; j<N3; j++) {
         cout << D[i][j] << "   ";
      }
      cout << '\n';
   }
   cout << endl;

   return 0;
}

and here is what I have so far for the assembly subprogram. Its not much because I cannot get the array passed to the fpu stack where I can manipulate it.

.globl _asm_matmult
      
.section .text

_asm_matmult:
      
mat:
      pushl %ebp                                    #save the stack data pointer            
    movl %esp, %ebp                              #move the stack pointer to the stack data pointer
    movl 8(%ebp), %eax      
                  
      finit                                          #initializes the FPU stack
                                                                        
      
      movl %ebp, %esp                              #move the stack pointer back
    popl %ebp                                    #restore ebp

    ret

Thanks for any help that can be provided.
0
Comment
Question by:00transam
3 Comments
 

Author Comment

by:00transam
ID: 16837418
I have a subprogram now that will read  the arrays in one at a time.  I cant figure out how to them in to the fpu stack row by column so that I can multiply them. Here is the subprogram

#  X at   8(%ebp) # Y at 12(%ebp) Z at 16(%ebp) # n1 at 20(%ebp) # n2 at 24(%ebp) # n3 at 28(%ebp)
#  i at  -4(%ebp) #   j at -8(%ebp)
# X = 1,2,3,4,5,6
# Y = 1,2,3,4,5,6,7,8
# N1 = 3
# N2 = 2
# N3 = 4
.globl _asm_matmult
      
.section .text

_asm_matmult:
      
      pushl      %ebp                        # set up stack frame reg
      movl      %esp,%ebp
      subl      $104,%esp                # making room for local variables

      # for(int i=0; i<n1; i++) {
      movl      $0, -4(%ebp)            # i = 0
begfor_i:
      movl      -4(%ebp),%eax            # EAX = i
      cmpl      20(%ebp),%eax            # if (i<n1)
      jge      endfor_i

      # for(int j=0; j<n2; j++) {
      movl      $0, -8(%ebp)            # j = 0
begfor_j:
      movl      -8(%ebp),%eax            # EAX = j
      cmpl      24(%ebp),%eax            # if (j<n2)
      jge      endfor_j

      movl      8(%ebp),%eax            # EAX = **X
      movl      -4(%ebp),%ecx            # ECX = i
      movl      (%eax,%ecx,4),%eax      # EAX = X[i] (a pointer)
      movl      -8(%ebp),%ecx            # ECX = j
      fldl      (%eax,%ecx,8)            # FPU: X[i][j]
      #call      print_double
      ffree      %st(0)                        # FPU: empty
      #movl      $' ,%eax
      #call      print_char
   
      incl      -8(%ebp)
      
      
      jmp      begfor_j
endfor_j:
      #call      print_nl

      incl      -4(%ebp)            # i++
      jmp      begfor_i
endfor_i:
       %eax
      movl      %ebp, %esp             # restore previous stack frame ptr
      popl      %ebp
      ret

 the above part reads in the X array and the below part will read in the Y array I just need to read in one row of the x array and multiply it by the first column in the Y array

For example 1*1 + 2*5 so the first element of the resultant array would be 11.

readY:

      #pushl      %ebp                        # set up stack frame reg
      #movl      %esp,%ebp
      #subl      $104,%esp                # making room for local variables

      # for(int i=0; i<n1; i++) {
      movl      $0, -4(%ebp)            # i = 0
begfor1_i:
      movl      -4(%ebp),%eax            # EAX = i
      cmpl      24(%ebp),%eax            # if (i<n1)
      jge      endfor1_i

      # for(int j=0; j<n2; j++) {
      movl      $0, -8(%ebp)            # j = 0
begfor1_j:
      movl      -8(%ebp),%eax            # EAX = j
      cmpl      28(%ebp),%eax            # if (j<n2)
      jge      endfor1_j

      movl      12(%ebp),%eax            # EAX = **X
      movl      -4(%ebp),%ecx            # ECX = i
      movl      (%eax,%ecx,4),%eax      # EAX = X[i] (a pointer)
      movl      -8(%ebp),%ecx            # ECX = j
      fldl      (%eax,%ecx,8)            # FPU: X[i][j]
      #call      print_double
      ffree      %st(0)                        # FPU: empty
      #movl      $' ,%eax
      #call      print_char
   
      incl      -8(%ebp)
      movl 24(%ebp), %eax
      cmpl -8(%ebp), %eax
      jg readY
      jmp      begfor1_j
endfor1_j:
      #call      print_nl

      incl      -4(%ebp)            # i++
      jmp      begfor1_i
endfor1_i:

      movl      %ebp, %esp             # restore previous stack frame ptr
      popl      %ebp
      ret
0
 
LVL 3

Accepted Solution

by:
PeterdLo earned 1000 total points
ID: 16842096
Hi 00transam,

The following could be one of solutions. The only change you need to make is to replace all floating instructions with your own IA32 Assembler.



// C implementation for matrix multiplication
void asm_matmult(double **A, double **B, double **D, int N1, int N2, int N3);
{
int i,j,k;

for (i = 0; i < N1; i++) // 3 rows
   {
   for (j = 0; j < N3; j++)  // 4 colums
      {
      for (k = 0; k < N2; k++) // 2 elements in each A row and 2 elements in each B colum
         {
         D[i][j] = D[i][j] + (A[i][k] * B[k][j]);
         }
      }
   }
}

Your assembly code as follows:


#  A at   8(%ebp) # B at 12(%ebp) D at 16(%ebp) # N1 at 20(%ebp) # N2 at 24(%ebp) # N3 at 28(%ebp)
#  i at  -4(%ebp) #  j at -8(%ebp) # k at -12(%ebp)
# X = 1,2,3,4,5,6
# Y = 1,2,3,4,5,6,7,8
# N1 = 3
# N2 = 2
# N3 = 4
.globl _asm_matmult
     
.section .text

_asm_matmult:
     
     pushl     %ebp                    # set up stack frame reg
     movl     %esp,%ebp
     subl     $104,%esp              # making room for local variables
     
     pushl    %ecx
     pushl    %edx

     # for(int i=0; i<n1; i++) {
     movl     $0, -4(%ebp)          # i = 0
begfor_i:
     movl     -4(%ebp),%eax          # EAX = i
     cmpl     20(%ebp),%eax          # if (i<n1)
     jge     endfor_i

     # for(int j=0; j<n3; j++) {
     movl     $0, -8(%ebp)          # j = 0
begfor_j:
     movl     -8(%ebp),%eax          # EAX = j
     cmpl     28(%ebp),%eax          # if (j<n3)
     jge     endfor_j

     # for(int k=0; k<n2; k++) {
     movl     $0, -12(%ebp)          # k = 0
begfor_k:
     movl     -12(%ebp),%eax          # EAX = k
     cmpl     24(%ebp),%eax          # if (k<n2)
     jge     endfor_k

     movl     -4(%ebp),%ecx          #i
     movl     8(%ebp),%edx           #A base addr
     movl     (%edx,%ecx,4),%edx     #A[i]
     movl     -12(%ebp),%eax         #k
     fldl     (%edx,%eax,8)          #A[i][k] loading into float reg
     
     movl     -12(%ebp),%ecx         #k
     movl     12(%ebp),%edx          #B base addr
     movl     (%edx,%ecx,4),%edx     #B[k]
     movl     -8(%ebp),%eax          #j
     fmul     (%edx,%eax,8)          # * B[k][j]
     
     movl     -4(%ebp),%ecx          #i
     movl     16(%ebp),%edx          #D base addr
     movl     (%edx,%ecx,4),%edx     #D[i]
     movl     -8(%ebp),%eax          #j
     fadd     (%edx,%eax,8)          # + D[i][j]
     
     fstp     (%edx,%eax,8)          #store back to D[i][j]
     
     movl     -12(%ebp),%eax         #k = k + 1
     addl     1,%eax
     movl     %eax,-12(%ebp)
     jmp      begfor_k

endfor_k:

     movl     -8(%ebp),%eax          #j = j + 1
     addl     1,%eax
     movl     %eax,-8(%ebp)
     jmp      begfor_j
     
endfor_j:

     movl     -4(%ebp),%eax          #i = i + 1
     addl     1,%eax
     movl     %eax,-4(%ebp)
     jmp      begfor_i
     
endfor_i:

     popl     %edx
     popl     %ecx
     
     ffree     %st(0)                    # FPU: empty
     movl     %ebp, %esp           # restore previous stack frame ptr
     popl     %ebp
     ret

#Note: in above, I am using the Intel x86 floating point instructions, i.e., fldl, fmul, fadd, fstp.
You need to replace them for IA32.
0
 
LVL 18

Assisted Solution

by:Jose Parrot
Jose Parrot earned 1000 total points
ID: 16976757
Hi,

Your code is veeeeeeery long, on will spend an entire hour just to read it. Let me suggest to short the question. I know a teacher for Algorithm Analysis that expect algorithms from us and when we show code he always remember us that he is a person, not a computer...

I can advance two things I have noticed.
First, the code assumes 4 bytes for int and 8 bytes for float. Are you sure these are the right sizes? Probably yes, but a good practice is to use sizeof(type). May be pass it too to the routine.

Second, there are two other approachs to matrix multiplication, one is divide&conquer and other is FFT. This last is around 100 times faster than the raw algorithm. Hints at wekpedia.

Jose
0

Featured Post

Keep up with what's happening at Experts Exchange!

Sign up to receive Decoded, a new monthly digest with product updates, feature release info, continuing education opportunities, and more.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

If anyone asked you to network diagram of the internet, it was drawn in the form of a fluffy cloud which further became known as cloud computing. Popularly cloud computing is defined as workloads that run over the internet in a commercial provider’s…
Most folks would know the basics of how Dropbox works, so that’s not the purpose of this article. Security is what it’s all about, so here I’ll share how I choose to secure my Dropbox Account and the Data it contains.
Exchange organizations may use the Journaling Agent of the Transport Service to archive messages going through Exchange. However, if the Transport Service is integrated with some email content management application (such as an anti-spam), the admin…
Loops Section Overview
Suggested Courses
Course of the Month13 days, 9 hours left to enroll

750 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question