Solved

# Reading in two floating point arrays from a C++ driver using a subprogram in assembly.

Posted on 2006-06-05
528 Views
Hi all, I am in need of a some help with a subprogram that will read in two floating point arrays. These arrays are matricies. The dimensions of the matricies are as follows X = n1 * n2 Y=n2*n3 and z which is going to be the computed matrix is X * Y. The C++ driver program is below. It displays both matricies and their resultant matrix, along with the floating point matrix that the subprogram is supposed to compute.  I have tried to read in the arrays.  From what I have read these arrays are passed by reference pointer or address. So the first argument of the address is the address of an array of addresses.  Each address in the array is the address of a row of the matrix. Well here is the driver.

#include <iostream>

using namespace std;

// asm_matmult prototype
extern "C"
void asm_matmult(double **X, double **Y, double **Z, int n1, int n2, int n3);

int main()
{
int i, j;
const int N1 = 3, N2 = 2, N3 = 4;
double AI[N1][N2] = { 1, 2, 3, 4, 5, 6 };
double BI[N2][N3] = { 1, 2, 3, 4, 5, 6, 7, 8 };
// Result from Matlab
double CI[N1][N3] = {11, 14, 17, 20, 23, 30, 37, 44, 35, 46, 57, 68 };
double DI[N1][N3];

// AI, BI and DI (above) are 2D array types.

// In C++ it is preferrable to work with pointers to pointers than
// 2D array types (see Stroustrup C.7.2 & C.7.3 for problems with
// 2D arrays).
// A, B, and D are corresponding pointers to arrays of pointers.
// Each pointer in an array points to a row in the corresponding
// AI, BI, or DI 2D array.

double **A = new double*[N1];
for(i=0; i<N1; i++)
A[i] = AI[i];

double **B = new double*[N2];
for(i=0; i<N2; i++)
B[i] = BI[i];

double **D = new double*[N1];
for(i=0; i<N1; i++)
D[i] = DI[i];

cout << "A =\n";
for (i=0; i<N1; i++) {
for (j=0; j<N2; j++) {
// printf("%10.4f    ", A[i][j]);
cout << A[i][j] << "   ";
}
cout << '\n';
}
cout << endl;

cout << "B =\n";
for (i=0; i<N2; i++) {
for (j=0; j<N3; j++) {
cout << B[i][j] << "   ";
}
cout << '\n';
}
cout << endl;

cout << "A*B is equal to\n";
for (i=0; i<N1; i++) {
for (j=0; j<N3; j++) {
cout << CI[i][j] << "   ";
}
cout << '\n';
}
cout << endl;

asm_matmult(A, B, D, N1, N2, N3);

cout << "Value computed by asm_matmult() is\n";
for (i=0; i<N1; i++) {
for (j=0; j<N3; j++) {
cout << D[i][j] << "   ";
}
cout << '\n';
}
cout << endl;

return 0;
}

and here is what I have so far for the assembly subprogram. Its not much because I cannot get the array passed to the fpu stack where I can manipulate it.

.globl _asm_matmult

.section .text

_asm_matmult:

mat:
pushl %ebp                                    #save the stack data pointer
movl %esp, %ebp                              #move the stack pointer to the stack data pointer
movl 8(%ebp), %eax

finit                                          #initializes the FPU stack

movl %ebp, %esp                              #move the stack pointer back
popl %ebp                                    #restore ebp

ret

Thanks for any help that can be provided.
0
Question by:00transam

Author Comment

I have a subprogram now that will read  the arrays in one at a time.  I cant figure out how to them in to the fpu stack row by column so that I can multiply them. Here is the subprogram

#  X at   8(%ebp) # Y at 12(%ebp) Z at 16(%ebp) # n1 at 20(%ebp) # n2 at 24(%ebp) # n3 at 28(%ebp)
#  i at  -4(%ebp) #   j at -8(%ebp)
# X = 1,2,3,4,5,6
# Y = 1,2,3,4,5,6,7,8
# N1 = 3
# N2 = 2
# N3 = 4
.globl _asm_matmult

.section .text

_asm_matmult:

pushl      %ebp                        # set up stack frame reg
movl      %esp,%ebp
subl      \$104,%esp                # making room for local variables

# for(int i=0; i<n1; i++) {
movl      \$0, -4(%ebp)            # i = 0
begfor_i:
movl      -4(%ebp),%eax            # EAX = i
cmpl      20(%ebp),%eax            # if (i<n1)
jge      endfor_i

# for(int j=0; j<n2; j++) {
movl      \$0, -8(%ebp)            # j = 0
begfor_j:
movl      -8(%ebp),%eax            # EAX = j
cmpl      24(%ebp),%eax            # if (j<n2)
jge      endfor_j

movl      8(%ebp),%eax            # EAX = **X
movl      -4(%ebp),%ecx            # ECX = i
movl      (%eax,%ecx,4),%eax      # EAX = X[i] (a pointer)
movl      -8(%ebp),%ecx            # ECX = j
fldl      (%eax,%ecx,8)            # FPU: X[i][j]
#call      print_double
ffree      %st(0)                        # FPU: empty
#movl      \$' ,%eax
#call      print_char

incl      -8(%ebp)

jmp      begfor_j
endfor_j:
#call      print_nl

incl      -4(%ebp)            # i++
jmp      begfor_i
endfor_i:
%eax
movl      %ebp, %esp             # restore previous stack frame ptr
popl      %ebp
ret

the above part reads in the X array and the below part will read in the Y array I just need to read in one row of the x array and multiply it by the first column in the Y array

For example 1*1 + 2*5 so the first element of the resultant array would be 11.

#pushl      %ebp                        # set up stack frame reg
#movl      %esp,%ebp
#subl      \$104,%esp                # making room for local variables

# for(int i=0; i<n1; i++) {
movl      \$0, -4(%ebp)            # i = 0
begfor1_i:
movl      -4(%ebp),%eax            # EAX = i
cmpl      24(%ebp),%eax            # if (i<n1)
jge      endfor1_i

# for(int j=0; j<n2; j++) {
movl      \$0, -8(%ebp)            # j = 0
begfor1_j:
movl      -8(%ebp),%eax            # EAX = j
cmpl      28(%ebp),%eax            # if (j<n2)
jge      endfor1_j

movl      12(%ebp),%eax            # EAX = **X
movl      -4(%ebp),%ecx            # ECX = i
movl      (%eax,%ecx,4),%eax      # EAX = X[i] (a pointer)
movl      -8(%ebp),%ecx            # ECX = j
fldl      (%eax,%ecx,8)            # FPU: X[i][j]
#call      print_double
ffree      %st(0)                        # FPU: empty
#movl      \$' ,%eax
#call      print_char

incl      -8(%ebp)
movl 24(%ebp), %eax
cmpl -8(%ebp), %eax
jmp      begfor1_j
endfor1_j:
#call      print_nl

incl      -4(%ebp)            # i++
jmp      begfor1_i
endfor1_i:

movl      %ebp, %esp             # restore previous stack frame ptr
popl      %ebp
ret
0

LVL 3

Accepted Solution

Hi 00transam,

The following could be one of solutions. The only change you need to make is to replace all floating instructions with your own IA32 Assembler.

// C implementation for matrix multiplication
void asm_matmult(double **A, double **B, double **D, int N1, int N2, int N3);
{
int i,j,k;

for (i = 0; i < N1; i++) // 3 rows
{
for (j = 0; j < N3; j++)  // 4 colums
{
for (k = 0; k < N2; k++) // 2 elements in each A row and 2 elements in each B colum
{
D[i][j] = D[i][j] + (A[i][k] * B[k][j]);
}
}
}
}

#  A at   8(%ebp) # B at 12(%ebp) D at 16(%ebp) # N1 at 20(%ebp) # N2 at 24(%ebp) # N3 at 28(%ebp)
#  i at  -4(%ebp) #  j at -8(%ebp) # k at -12(%ebp)
# X = 1,2,3,4,5,6
# Y = 1,2,3,4,5,6,7,8
# N1 = 3
# N2 = 2
# N3 = 4
.globl _asm_matmult

.section .text

_asm_matmult:

pushl     %ebp                    # set up stack frame reg
movl     %esp,%ebp
subl     \$104,%esp              # making room for local variables

pushl    %ecx
pushl    %edx

# for(int i=0; i<n1; i++) {
movl     \$0, -4(%ebp)          # i = 0
begfor_i:
movl     -4(%ebp),%eax          # EAX = i
cmpl     20(%ebp),%eax          # if (i<n1)
jge     endfor_i

# for(int j=0; j<n3; j++) {
movl     \$0, -8(%ebp)          # j = 0
begfor_j:
movl     -8(%ebp),%eax          # EAX = j
cmpl     28(%ebp),%eax          # if (j<n3)
jge     endfor_j

# for(int k=0; k<n2; k++) {
movl     \$0, -12(%ebp)          # k = 0
begfor_k:
movl     -12(%ebp),%eax          # EAX = k
cmpl     24(%ebp),%eax          # if (k<n2)
jge     endfor_k

movl     -4(%ebp),%ecx          #i
movl     (%edx,%ecx,4),%edx     #A[i]
movl     -12(%ebp),%eax         #k

movl     -12(%ebp),%ecx         #k
movl     (%edx,%ecx,4),%edx     #B[k]
movl     -8(%ebp),%eax          #j
fmul     (%edx,%eax,8)          # * B[k][j]

movl     -4(%ebp),%ecx          #i
movl     (%edx,%ecx,4),%edx     #D[i]
movl     -8(%ebp),%eax          #j

fstp     (%edx,%eax,8)          #store back to D[i][j]

movl     -12(%ebp),%eax         #k = k + 1
movl     %eax,-12(%ebp)
jmp      begfor_k

endfor_k:

movl     -8(%ebp),%eax          #j = j + 1
movl     %eax,-8(%ebp)
jmp      begfor_j

endfor_j:

movl     -4(%ebp),%eax          #i = i + 1
movl     %eax,-4(%ebp)
jmp      begfor_i

endfor_i:

popl     %edx
popl     %ecx

ffree     %st(0)                    # FPU: empty
movl     %ebp, %esp           # restore previous stack frame ptr
popl     %ebp
ret

#Note: in above, I am using the Intel x86 floating point instructions, i.e., fldl, fmul, fadd, fstp.
You need to replace them for IA32.
0

LVL 18

Assisted Solution

Hi,

Your code is veeeeeeery long, on will spend an entire hour just to read it. Let me suggest to short the question. I know a teacher for Algorithm Analysis that expect algorithms from us and when we show code he always remember us that he is a person, not a computer...

I can advance two things I have noticed.
First, the code assumes 4 bytes for int and 8 bytes for float. Are you sure these are the right sizes? Probably yes, but a good practice is to use sizeof(type). May be pass it too to the routine.

Second, there are two other approachs to matrix multiplication, one is divide&conquer and other is FFT. This last is around 100 times faster than the raw algorithm. Hints at wekpedia.

Jose
0

## Featured Post

### Suggested Solutions

Binary Bomb: Phase 5 3 1,644
core dump analysis 26 2,565
How to diplay the real thread ID 21 1,133
C++ bitwise shift issue 12 605
Real-time is more about the business, not the technology. In day-to-day life, to make real-time decisions like buying or investing, business needs the latest information(e.g. Gold Rate/Stock Rate). Unlike traditional days, you need not wait for a fe…
Synchronize a new Active Directory domain with an existing Office 365 tenant
how to add IIS SMTP to handle application/Scanner relays into office 365.
This demo shows you how to set up the containerized NetScaler CPX with NetScaler Management and Analytics System in a non-routable Mesos/Marathon environment for use with Micro-Services applications.