static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
*/
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum); <---- Do we need this?
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
void smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j+=8){
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
dst[RIDX(i, (j+1), dim)] = avg(dim, i, (j+1), src);
dst[RIDX(i, (j+2), dim)] = avg(dim, i, (j+2), src);
dst[RIDX(i, (j+3), dim)] = avg(dim, i, (j+3), src);
dst[RIDX(i, (j+4), dim)] = avg(dim, i, (j+4), src);
dst[RIDX(i, (j+5), dim)] = avg(dim, i, (j+5), src);
dst[RIDX(i, (j+6), dim)] = avg(dim, i, (j+6), src);
dst[RIDX(i, (j+7), dim)] = avg(dim, i, (j+7), src);
}
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
*sums++ = *row++ + *row;
for (i=1; i<dim-1; i++)
*sums++ = *(row-1) + *row++ + *row;
*sums++ = *(row-1) + *row;
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst++ = (*r0 + *r1)/4;
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
*dst++ = (*r0++ + *r1++ + *r2++)/6;
for (j=1; j<dim-1; j++)
*dst++ = (*r0++ + *r1++ + *r2++)/9;
*dst++ = (*r0 + *r1 + *r2)/6;
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst = (*r0 + *r1)/4;
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
*sums++ = *row++ + *row;
for (i=1; i<dim-1; i++)
*sums++ = *(row-1) + *row++ + *row;
*sums++ = *(row-1) + *row;
}
void smooth(int dim, pixel *src, pixel *dst)
{
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst++ = (*r0 + *r1)/4;
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
*dst++ = (*r0++ + *r1++ + *r2++)/6;
for (j=1; j<dim-1; j++)
*dst++ = (*r0++ + *r1++ + *r2++)/9;
*dst++ = (*r0 + *r1 + *r2)/6;
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
*dst++ = (*r0++ + *r1++)/4;
for (i=1; i<dim-1; i++)
*dst++ = (*r0++ + *r1++)/6;
*dst = (*r0 + *r1)/4;
free(row_sums);
}
CC = gcc
CFLAGS = -Wall -O2
LIBS = -lm
OBJS = driver.o kernels.o fcyc.o clock.o
all: driver
driver: $(OBJS) fcyc.h clock.h defs.h config.h
$(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver
handin:
cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c
clean:
-rm -f $(OBJS) driver core *~ *.o
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
static void initialize_pixel_sum(pixel_sum *sum)
{
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
#include <stdlib.h>
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
static inline void add_pixel_2(pixel_sum *sum, pixel *a, pixel *b) {
sum->red = a->red + b->red;
sum->green = a->green + b->green;
sum->blue = a->blue + b->blue;
}
static inline void add_pixel_3(pixel_sum *sum, pixel *a, pixel *b, pixel *c) {
sum->red = a->red + b->red + c->red;
sum->green = a->green + b->green + c->green;
sum->blue = a->blue + b->blue + c->blue;
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
add_pixel_2(sums++, row++, row);
for (i=1; i<dim-1; i++)
add_pixel_3(sums++, row-1, row++, row);
add_pixel_2(sums, row-1, row);
}
static inline void avg_pixel_sums_2(pixel *dst, pixel_sum *a, pixel_sum *b, int num) {
dst->red = (a->red + b->red)/num;
dst->green = (a->green + b->green)/num;
dst->blue = (a->blue + b->blue)/num;
}
static inline void avg_pixel_sums_3(pixel *dst, pixel_sum *a, pixel_sum *b, pixel_sum *c, int num) {
dst->red = (a->red + b->red + c->red)/num;
dst->green = (a->green + b->green + c->green)/num;
dst->blue = (a->blue + b->blue + c->blue)/num;
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+i*dim);
// calculate row 0
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst++, r0, r1, 4);
// calculate all the middle rows
for (i=1; i<dim-1; i++) {
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
r2 = row_sums+((i+1)%3)*dim;
do_row_sum(r2, dim, src+(i+1)*dim);
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 6);
for (j=1; j<dim-1; j++)
avg_pixel_sums_3(dst++, r0++, r1++, r2++, 9);
avg_pixel_sums_3(dst++, r0, r1, r2, 6);
}
// calculate the last row
r0 = row_sums+((i-1)%3)*dim;
r1 = row_sums+(i%3)*dim;
avg_pixel_sums_2(dst++, r0++, r1++, 4);
for (i=1; i<dim-1; i++)
avg_pixel_sums_2(dst++, r0++, r1++, 6);
avg_pixel_sums_2(dst, r0, r1, 4);
}
static inline void do_row_sum(pixel_sum *sums, int dim, pixel *row) {
int i;
add_pixel_2(sums++, row, row+1);
row++;
for (i=1; i<dim-1; i++) {
add_pixel_3(sums++, row-1, row, row+1);
row++;
}
add_pixel_2(sums, row-1, row);
}
void smooth(int dim, pixel *src, pixel *dst) {
pixel_sum *row_sums = malloc(3*dim*sizeof(pixel_sum));
pixel_sum *r0 = row_sums;
pixel_sum *r1 = r0+dim;
pixel_sum *r2;
int i, j;
do_row_sum(r0, dim, src);
do_row_sum(r1, dim, src+dim);
Smooth: Version = smooth: Current working version:
Dim 32 64 128 256 512 Mean
Your CPEs 40.9 150.4 164.4 169.0 219.1
Baseline CPEs 379.5 401.6 403.2 398.8 397.2
Speedup 9.3 2.7 2.5 2.4 1.8 3.0
If you are experiencing a similar issue, please ask a related question
Title | # Comments | Views | Activity |
---|---|---|---|
In desperate need of help | 8 | 132 | |
Describe order of operation while copying text | 11 | 115 | |
What technology should I use for communication Raspberry Pi 2b and LapTop java application | 2 | 113 | |
Windows Updates failing due to Diskpart not configured correctly | 8 | 106 |
Join the community of 500,000 technology professionals and ask your questions.
Connect with top rated Experts
21 Experts available now in Live!