static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
*/
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum); <---- Do we need this?
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
void smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j+=8){
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
dst[RIDX(i, (j+1), dim)] = avg(dim, i, (j+1), src);
dst[RIDX(i, (j+2), dim)] = avg(dim, i, (j+2), src);
dst[RIDX(i, (j+3), dim)] = avg(dim, i, (j+3), src);
dst[RIDX(i, (j+4), dim)] = avg(dim, i, (j+4), src);
dst[RIDX(i, (j+5), dim)] = avg(dim, i, (j+5), src);
dst[RIDX(i, (j+6), dim)] = avg(dim, i, (j+6), src);
dst[RIDX(i, (j+7), dim)] = avg(dim, i, (j+7), src);
}
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
*sums++ = *row++ + *row;
for (i=1; i<dim-1; i++)
*sums++ = *(row-1) + *row++ + *row;
*sums++ = *(row-1) + *row;
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst++ = (*r0 + *r1)/4;
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
*dst++ = (*r0++ + *r1++ + *r2++)/6;
for (j=1; j<dim-1; j++)
*dst++ = (*r0++ + *r1++ + *r2++)/9;
*dst++ = (*r0 + *r1 + *r2)/6;
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst = (*r0 + *r1)/4;
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
*sums++ = *row++ + *row;
for (i=1; i<dim-1; i++)
*sums++ = *(row-1) + *row++ + *row;
*sums++ = *(row-1) + *row;
}
void smooth(int dim, pixel *src, pixel *dst)
{
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst++ = (*r0 + *r1)/4;
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
*dst++ = (*r0++ + *r1++ + *r2++)/6;
for (j=1; j<dim-1; j++)
*dst++ = (*r0++ + *r1++ + *r2++)/9;
*dst++ = (*r0 + *r1 + *r2)/6;
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst = (*r0 + *r1)/4;
free(row_sums);
}
CC = gcc
CFLAGS = -Wall -O2
LIBS = -lm
OBJS = driver.o kernels.o fcyc.o clock.o
all: driver
driver: $(OBJS) fcyc.h clock.h defs.h config.h
$(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver
handin:
cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c
clean:
-rm -f $(OBJS) driver core *~ *.o
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
static void initialize_pixel_sum(pixel_sum *sum)
{
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
#include <stdlib.h>
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
static inline void add_pixel_2(pixel_sum *sum, pixel *a, pixel *b) {
sum->red = a->red + b->red;
sum->green = a->green + b->green;
sum->blue = a->blue + b->blue;
}
static inline void add_pixel_3(pixel_sum *sum, pixel *a, pixel *b, pixel *c) {
sum->red = a->red + b->red + c->red;
sum->green = a->green + b->green + c->green;
sum->blue = a->blue + b->blue + c->blue;
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
add_pixel_2(sums++, row++, row);
for (i=1; i<dim-1; i++)
add_pixel_3(sums++, row-1, row++, row);
add_pixel_2(sums, row-1, row);
}
static inline void avg_pixel_sums_2(pixel *dst, pixel_sum *a, pixel_sum *b, int num) {
dst->red = (a->red + b->red)/num;
dst->green = (a->green + b->green)/num;
dst->blue = (a->blue + b->blue)/num;
}
static inline void avg_pixel_sums_3(pixel *dst, pixel_sum *a, pixel_sum *b, pixel_sum *c, int num) {
dst->red = (a->red + b->red + c->red)/num;
dst->green = (a->green + b->green + c->green)/num;
dst->blue = (a->blue + b->blue + c->blue)/num;
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst++, r0, r1, 4);
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 6);
for (j=1; j<dim-1; j++)
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 9);
avg_pixel_sums_3(dst++, r0, r1, r2, 6);
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst, r0, r1, 4);
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
add_pixel_2(sums++, row, row+1);
row++;
for (i=1; i<dim-1; i++) {
add_pixel_3(sums++, row-1, row, row+1);
row++;
}
add_pixel_2(sums, row-1, row);
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+dim);
Smooth: Version = smooth: Current working version:
Dim 32 64 128 256 512 Mean
Your CPEs 40.9 150.4 164.4 169.0 219.1
Baseline CPEs 379.5 401.6 403.2 398.8 397.2
Speedup 9.3 2.7 2.5 2.4 1.8 3.0
Join the community of 500,000 technology professionals and ask your questions.
Connect with top rated Experts
19 Experts available now in Live!