minimizing MIPS code so that it doesn't utilize stack

the code is below... how can I minimize/simplify the code so that I don't have to store and load registers from the stack?
vbsme:  addi	$sp, $sp, -20		# create space on the stack pointer
	sw	$ra, 0($sp)		# save return address	

	li	$v0, 0			# reset $v0 
	li	$v1, 0			# reset $v1
	li 	$s0, 1			# i(row) = 1 
	li 	$s1, 1			# j(col) = 1
	li	$s5, 0			# element = 0
	li 	$t3, 1000000		# least sad = 1000000 (initialize it to a large value at first)
	lw 	$s2, 0($a0)		# frame row size
	lw 	$s3, 4($a0)		# frame col size
	lw 	$t6, 8($a0)		# window row size
	lw 	$t7, 12($a0)		# window col size
	
	
for:	mul 	$s4, $s2, $s3		# row * col
	bge 	$s5, $s4, exit		# if element >= row * col then exit
	
	addi	$a3, $s0, -1		# 4th parameter: i-1
	addi	$t0, $s1, -1		# $t0 = j - 1
	
	add 	$t1, $a3, $t6		# $t1 = current row position + frame row size
	add	$t2, $t0, $t7		# $t2 = current col position + frame col size
	bgt	$t1, $s2, cont		# if current row position + window row size > frame row size then don't calculate sad  
	bgt	$t2, $s3, cont		# if current col position + window col size > frame col size then don't calculate sad 
	
	sw	$t0, 4($sp)		# 5th parameter: j-1
	sw	$t5, 8($sp)		# save the temporary x coordinate to stack
	
	jal 	sad			# calculate the sum of absolute difference using the frame starting from row a3 and col 4($sp)
	
	lw	$t0, 4($sp)		# restore 5th parameter: j-1
	lw	$t5, 8($sp)		# restore the temporary x coordinate to stack
		
	bgt	$v0, $t3, cont		# compare with the least sad on the window
	add	$t3, $v0, $zero		# update the least sad with the smallest one that we just compute
	addi 	$t4, $s0, -1		# set $t4 to have temporary y coordinate
	addi	$t5, $s1, -1		# set $t5 to have temporary x coordinate
	
cont:	add $s6, $s0, $s1		# i + j
	andi $s7, $s6, 0x1		# (i + j) & 1 (to check if it's even)
if:	bne $s7, $zero, else		# if (i+j) % 2 != 0 go to else
inif:	bge $s1, $s2, inelse		# if (j < size)
	addi $s1, $s1, 1		# j ++
	j inif1				
inelse: addi $s0, $s0, 2		# else i+=2	
inif1:	addi $s7, $s0, -1  		# $s7 = i - 1
	beqz $s7, loopback		# if (i <= 1) go to loopback
	addi $s0, $s0, -1		# i--
	j loopback
else:  	bge $s0, $s2, inelse1		# if (i < size)
	addi $s0, $s0, 1		# i++
	j inif2
inelse1:addi $s1, $s1, 2		# else j+=2
inif2:  addi $s7, $s1, -1  		# $s7 = j - 1
	beqz $s7, loopback		# if (j <= 1) go to loopback
	addi $s1, $s1, -1		# j--
loopback: addi $s5, $s5, 1		# element++
	  j for
exit:	add $v1, $t5, $zero		# (v1) x coordinate of the block in the frame with the minimum SAD
	add $v0, $t4, $zero		# (v0) y coordinate of the block in the frame with the minimum SAD
	lw	$ra, 0($sp)		# restore return address
	addi	$sp, $sp, 20		# restore stack pointer   
	jr $ra				# return	

.globl	sad
sad:	subu $sp, $sp, 32		# allocate stack space for largest function	
	sw $s2, 8($sp)			# save $s2 value
	sw $s1, 4($sp)			# save $s1 value
	sw $s0, 0($sp)			# save $s0 value
	
	li  $v0, 0			# reset $v0 
	add $s0, $a3, $zero		# reset counter for the row to 0
	li $t1, 0			# index for the window array starting at 0
	lw $t0, 36($sp)			# 5th parameter stored in $t0
			
	add $s6, $a3, $t6		# $s6 = row size	
	add $s7, $t0, $t7		# $s7 = col size
	
	
outer:	bge $s0, $s6, end		# if row counter > frame row size then exit
	lw  $t0, 36($sp)		# reset counter for the column to 0
inner:	bge $t0, $s7, pre		# if col counter > frame col size then go to pre

	mul $s2, $s0, $s3		# perform the following formula (row *num_columns) + col
	add $s2, $s2, $t0
	sll $t2, $s2, 2			# shift by 2 to get the right offset of the frame
	sll $t5, $t1, 2			# shift by 2 to get the right offset of the window
	
	add $t2, $t2, $a1
	add $t5, $t5, $a2
	
	lw  $t2, 0($t2)			# load the corresponding element value at the frame
	lw  $t5, 0($t5)			# load the corresponding element value at the window
	sub $s4, $t2, $t5		# subtract the frame and window size element difference
	bgez $s4, calc 			# if the frame and window difference is positive jump to calc
	sub $s4, $zero, $s4		# otherwise take the absolute value
	
calc:	add $v0, $v0, $s4		# sum the absolute value with $v0
	addi $t1, $t1, 1		# add the window index by 1
	addi $t0, $t0, 1		# add the column counter by 1	
	j inner
	
pre:	addi $s0, $s0, 1		# increment row's counter
	j outer				# jump to outer loop

end:	
	
	lw $s2, 8($sp)			# restore original value of $s2 for caller
	lw $s1, 4($sp)			# restore original value of $s1 for caller
	lw $s0, 0($sp)			# restore original value of $s0 for caller
	addiu $sp, $sp, 32		# restore the caller's stack pointer
	jr $ra				# return to caller's code

Open in new window

kuntilanakAsked:
Who is Participating?

[Webinar] Streamline your web hosting managementRegister Today

x
 
Infinity08Connect With a Mentor Commented:
The basic idea is to :

(a) minimize the amount of registers (variables) you need by optimizing/modifying your algorithm
(b) re-use registers whenever possible

Have a read through this wiki eg. :

        http://en.wikipedia.org/wiki/Register_allocation
0
All Courses

From novice to tech pro — start learning today.