static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
*/
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
void smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j+=8){
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
dst[RIDX(i, (j+1), dim)] = avg(dim, i, (j+1), src);
dst[RIDX(i, (j+2), dim)] = avg(dim, i, (j+2), src);
dst[RIDX(i, (j+3), dim)] = avg(dim, i, (j+3), src);
dst[RIDX(i, (j+4), dim)] = avg(dim, i, (j+4), src);
dst[RIDX(i, (j+5), dim)] = avg(dim, i, (j+5), src);
dst[RIDX(i, (j+6), dim)] = avg(dim, i, (j+6), src);
dst[RIDX(i, (j+7), dim)] = avg(dim, i, (j+7), src);
}
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst++ = (*r0 + *r1)/4;
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
*dst++ = (*r0++ + *r1++ + *r2++)/6;
for (j=1; j<dim-1; j++)
*dst++ = (*r0++ + *r1++ + *r2++)/9;
*dst++ = (*r0 + *r1 + *r2)/6;
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst = (*r0 + *r1)/4;
}
CC = gcc
CFLAGS = -Wall -O2
LIBS = -lm
OBJS = driver.o kernels.o fcyc.o clock.o
all: driver
driver: $(OBJS) fcyc.h clock.h defs.h config.h
$(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver
handin:
cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c
clean:
-rm -f $(OBJS) driver core *~ *.o
#include <stdlib.h>
static inline void add_pixel_2(pixel_sum *sum, pixel *a, pixel *b) {
sum->red = a->red + b->red;
sum->green = a->green + b->green;
sum->blue = a->blue + b->blue;
}
static inline void add_pixel_3(pixel_sum *sum, pixel *a, pixel *b, pixel *c) {
sum->red = a->red + b->red + c->red;
sum->green = a->green + b->green + c->green;
sum->blue = a->blue + b->blue + c->blue;
}
static inline void avg_pixel_sums_2(pixel *dst, pixel_sum *a, pixel_sum *b, int num) {
dst->red = (a->red + b->red)/num;
dst->green = (a->green + b->green)/num;
dst->blue = (a->blue + b->blue)/num;
}
static inline void avg_pixel_sums_3(pixel *dst, pixel_sum *a, pixel_sum *b, pixel_sum *c, int num) {
dst->red = (a->red + b->red + c->red)/num;
dst->green = (a->green + b->green + c->green)/num;
dst->blue = (a->blue + b->blue + c->blue)/num;
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst++, r0, r1, 4);
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 6);
for (j=1; j<dim-1; j++)
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 9);
avg_pixel_sums_3(dst++, r0, r1, r2, 6);
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst, r0, r1, 4);
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+dim);
Smooth: Version = smooth: Current working version:
Dim 32 64 128 256 512 Mean
Your CPEs 40.9 150.4 164.4 169.0 219.1
Baseline CPEs 379.5 401.6 403.2 398.8 397.2
Speedup 9.3 2.7 2.5 2.4 1.8 3.0
