• C

Burrows–Wheeler transform in c

Im implementing Burrows–Wheeler transform i know the code is somewhat correct but it gives me a error on
"fopen" on the writetofile function, i really, really need help.

any doubt about the code just ask.

Thanks in advance :)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* readfromfile(char *nome);
void writetofile(char *cod, char *file);
char *codificar(char* string);
int compare(const void * a, const void * b);
char *descodificar(char *encod);

int main(int argc, char** argv) {
    char op = '\0';
    char *texto = readfromfile("texto.txt");
    printf("CODIFICAR STRING-> (S):\n'%s'", texto);
    char *string_codificada = codificar(texto);
    printf("Resultado BWT.codificador (ultima coluna da matriz) (S1):\n\"%s\"\n", string_codificada);
    writetofile(string_codificada, "codificada.txt");
    char *texto2 = readfromfile("codificada.txt");
    char *texto3 = descodificar(texto2);
    printf("String de volta ao original (S):\n\"%s\"\n\n", texto3);
    return 0;

char* readfromfile(char *nome) {
    char c;
    int num_chars = 0;
    char aux[1000];

    FILE *file;
    file = fopen(nome, "r");
    if (!file) {
        printf("\nFicheiro inexistente! [para leitura]\n");

    while (!feof(file)) {
        c = fgetc(file);
        if (c == '\n' || c == '\0' || c == ' ') {

        } else {
            aux[num_chars] = c;

    aux[num_chars - 1] = '\0';
    char *string = malloc(strlen(aux));
    strcpy(string, aux);
    return string;

void writetofile(char *codificada, char *file) {
    FILE *f;
    f = fopen(file, "w+");
    if (f == NULL) {
        printf("Ficheiro inexistente!\n");
    fputs(codificada, f);

char *codificar(char* string) {
    int size = strlen(string), l = 0, c = 0, k = 0, i = 0, j = 0, linha = 0;
    char Matrix[size][size];
    char vector[size];
    char** linhas;
    char* temp = (char*) malloc(50 * sizeof (char));
    linhas = (char**) malloc(size * sizeof (char*));

    //Cria a matriz
    for (l = 0; l < size; l++) {
        k = l;
        for (c = 0; c < size; c++) {
            if (k == size) k = 0;
            Matrix[l][c] = string[k];

    printf("\nMatriz de Codificacao:\n");

    for (l = 0; l < size; l++) {
        for (k = 0; k < size; k++) {
            printf("'%c'", Matrix[l][k]);
    //le as linhas da matriz e ordena-as
    for (i = 0; i < size; i++) {
        char* linha_aux_cod = malloc(size);
        for (j = 0; j < size; j++) {

            linha_aux_cod[j] = Matrix[i][j];
        linha_aux_cod[j] = '\0';
        linhas[i] = (char*) malloc(50 * sizeof (char));
        strcpy(linhas[i], linha_aux_cod);

    // Ordenar todas as linhas da matriz, lexicograficamente
    for (i = 1; i < size; i++)
        for (j = 0; j < size - 1; j++)
            if (strcmp(linhas[j], linhas[j + 1]) > 0) {
                strcpy(temp, linhas[j]);
                strcpy(linhas[j], linhas[j + 1]);
                strcpy(linhas[j + 1], temp);


    printf("Matriz de codificacao ordenada lexicograficamente:\n");

    for (l = 0; l < size; l++) {
        printf("[%d]'%s'", l, linhas[l]);
    //Descobre a linha do bloco original
    for (i = 0; i < size; ++i) {
        // printf("'%s'\n",linhas[i]);
        if (strcmp(linhas[i], string) == 0) {
            linha = i;
    FILE *line;
    line = fopen("string.txt", "w+");
    if (line == NULL) {
        fprintf(stderr, "Erro ao abrir ficheiro!\n");
    fprintf(line, "%d", linha);

    //Linha original I
    printf("\n* Linha com a string original (S): [%d] *\n", linha);

    //Cria o vector resultante
    for (i = 0; i < size; ++i) {
        strcpy(temp, linhas[i]); //copia a linha toda para temp
        vector[i] = temp[size - 1]; //copia o último caracter de temp para o vector[i]
        vector[i + 1] = '\0';
    char* cod = malloc(size);
    strcpy(cod, vector);
    return cod;

int compare(const void * a, const void * b) {
    return ( *(char*) a - *(char*) b);

char *descodificar(char *string_codificada) {

    char *decod = malloc(strlen(string_codificada));
    int tamanho_string_codificada = strlen(string_codificada);
    char *vector_L = malloc(strlen(string_codificada));

    int i = 0;

    int vector_T [tamanho_string_codificada];
    char *ptr;

    //copiar para L a string vinda do codificador
    strcpy(vector_L, string_codificada);

    //Ordena o novo vector L
    qsort(vector_L, tamanho_string_codificada, sizeof (char), compare);

    printf("String vinda da codificacao (S1):\n\"%s\"\n\n", string_codificada);
    printf("String,vinda da codificacao, lexicograficamente ordenada (S2):\n");
    for (i = 0; i < tamanho_string_codificada; i++)
        printf("%c", vector_L[i]);
    //Cria o vector T de indices
    for (i = 0; i < tamanho_string_codificada; i++) {
        //ptr aponta para a 1ª ocorrência do conteúdo de [i] em L
        ptr = strchr(vector_L, string_codificada[i]);
        //printf("%s", ptr);
        //vai tirando o caracter já encontrado
        vector_L[ptr - vector_L] = ' ';
        //coloca em T[i] o índice do caracter encontrado
        vector_T[i] = ptr - vector_L;
        printf("\nO caracter '%c' de S1, foi encontrado na posicao [%d] de S2", string_codificada[i], (ptr - vector_L));
          printf("\nIndices guardados em vector_T:\n");
        printf("%d ", vector_T[i]);

    int indice_T = 0;

    FILE *f;
    f = fopen("string.txt", "r");
    if (!f) {
        printf("\nErro na abertura do arquivo\n");
    char c = fgetc(f);
    indice_T = atoi(&c);
    printf("\n* Ler do ficheiro linha.txt, linha com a string original (S): [%d] *\n", indice_T);
    printf("\n* Obter a string original (S) de volta: *\n");
    //obter a string original
    for (i = 0; i < tamanho_string_codificada; i++) {
        //começa a preencher do fim da string decod para o inicio o que está na string
        //codificada na posicao linha, que vai sendo alterada ao ir buscar
        //os indices dos caracteres ao vector_T

        //sabe que na string codificada, na posicao com indice da linha com a string original
        //possui o último caracter da string original
        decod[tamanho_string_codificada - 1 - i] = string_codificada[indice_T];
        printf("Caracter '%c' encontrado em S1[%d]\n", string_codificada[indice_T],indice_T);
        //vai buscar o indice de T[linha] que contém um caracter e guarda em linha
        printf("\nIndice a pesquisar em vector_T[%d]: %d\n",indice_T, vector_T[indice_T]);
        indice_T = vector_T[indice_T];

    decod[tamanho_string_codificada] = '\0';
    return decod;

Open in new window

Who is Participating?
phoffricConnect With a Mentor Commented:
Here are some comments on readfromfile:
char aux[1000]; // what if you fill in more than 1000 chars because the file is too big?

    aux[num_chars - 1] = '\0';
    char *string = malloc(strlen(aux) + 1); // make room for the null byte
    strcpy(string, aux);

Open in new window

VasconcelosAuthor Commented:
i just change the size to 5000 still got a segmentation fault :(

im kinda lost on how to solve this bug lol
>> char* linha_aux_cod = malloc(size);
>> linha_aux_cod[j] = '\0';
Here j should be size (after the loop). But, valid index for linha_aux_cod is from 0 .. size-1.
Improve Your Query Performance Tuning

In this FREE six-day email course, you'll learn from Janis Griffin, Database Performance Evangelist. She'll teach 12 steps that you can use to optimize your queries as much as possible and see measurable results in your work. Get started today!

if size >= 50, you may be writing past the end of temp or linhas[ i ]

you are also seem to be writing past the end of vector and string
So far there are two general problems. One is that you are not creating enough space for a terminating null byte, and the other is that your strcpy is overwriting. There is this magic number 50 that has nothing, in general, to do with the size of the reduced file.

You could start using strncpy to avoid overwriting the destination buffer.
You free temp but then use temp in a strcpy - not good.
You are executing malloc more times than free. To avoid memory leaks, there should be a free for each malloc executed. Back tonight or tomorrow for follow-up.
Kent OlsenConnect With a Mentor Data Warehouse Architect / DBACommented:
Hi Vasconcelos,

All good advice so far.  Here's a bit more.

Your code will be easier to read if you segregate the declarations and the code.  (Group the variable declarations near the top of the function.)

Use strdup() to create a copy of a string instead of calling malloc() and strcpy().  You seem to have been tripped up by allocating memory that is equal in length to the visible portion of the string.  The length of the string in memory is actually 1 byte larger than the visible portion due to the string terminator (NULL or zero byte).  (Lines 60 and 166.)  Function descodificar() has this issue in several places.

The function readfromfile() reads only the first word in the file.  Is this what you intend?

The function codificar() has several places that need examination.
  -- The dimensions of Maxrix[][] arte based on the size of a string that is passed to the function.  If you intend to use the items that are stored in the matrix as anything other than character data, the dimensions should be strlen(string)+1.  (That's not a requirement for the transformation.)
  --  The string buffer in readfromfile() is 1,000 characters.  The string buffer in codificar is 50.

The function compare() (passed to qsort) compares only on the first character of the two objects.  Is that what you want or do you need to do a string comparison?

The biggest issue does seem to be that all of your dynamic strings are 1 character too short.  Again, use strdup() to generate them and this problem solves easily.

Good Luck,
to add to above comments:

if you got error on fopen in writetofile you should output before exit

- the 'file' argument
- the errno variable (which is available when you include <errno.h>)

i wonder why you use option "w+" for fopen. do you really need to read from the currently opened file?

VasconcelosAuthor Commented:
thanks :)
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

All Courses

From novice to tech pro — start learning today.