Burrows–Wheeler transform in c

Posted on 2011-05-03
Last Modified: 2012-08-13
Im implementing Burrows–Wheeler transform i know the code is somewhat correct but it gives me a error on
"fopen" on the writetofile function, i really, really need help.

any doubt about the code just ask.

Thanks in advance :)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* readfromfile(char *nome);
void writetofile(char *cod, char *file);
char *codificar(char* string);
int compare(const void * a, const void * b);
char *descodificar(char *encod);

int main(int argc, char** argv) {
    char op = '\0';
    char *texto = readfromfile("texto.txt");
    printf("CODIFICAR STRING-> (S):\n'%s'", texto);
    char *string_codificada = codificar(texto);
    printf("Resultado BWT.codificador (ultima coluna da matriz) (S1):\n\"%s\"\n", string_codificada);
    writetofile(string_codificada, "codificada.txt");
    char *texto2 = readfromfile("codificada.txt");
    char *texto3 = descodificar(texto2);
    printf("String de volta ao original (S):\n\"%s\"\n\n", texto3);
    return 0;

char* readfromfile(char *nome) {
    char c;
    int num_chars = 0;
    char aux[1000];

    FILE *file;
    file = fopen(nome, "r");
    if (!file) {
        printf("\nFicheiro inexistente! [para leitura]\n");

    while (!feof(file)) {
        c = fgetc(file);
        if (c == '\n' || c == '\0' || c == ' ') {

        } else {
            aux[num_chars] = c;

    aux[num_chars - 1] = '\0';
    char *string = malloc(strlen(aux));
    strcpy(string, aux);
    return string;

void writetofile(char *codificada, char *file) {
    FILE *f;
    f = fopen(file, "w+");
    if (f == NULL) {
        printf("Ficheiro inexistente!\n");
    fputs(codificada, f);

char *codificar(char* string) {
    int size = strlen(string), l = 0, c = 0, k = 0, i = 0, j = 0, linha = 0;
    char Matrix[size][size];
    char vector[size];
    char** linhas;
    char* temp = (char*) malloc(50 * sizeof (char));
    linhas = (char**) malloc(size * sizeof (char*));

    //Cria a matriz
    for (l = 0; l < size; l++) {
        k = l;
        for (c = 0; c < size; c++) {
            if (k == size) k = 0;
            Matrix[l][c] = string[k];

    printf("\nMatriz de Codificacao:\n");

    for (l = 0; l < size; l++) {
        for (k = 0; k < size; k++) {
            printf("'%c'", Matrix[l][k]);
    //le as linhas da matriz e ordena-as
    for (i = 0; i < size; i++) {
        char* linha_aux_cod = malloc(size);
        for (j = 0; j < size; j++) {

            linha_aux_cod[j] = Matrix[i][j];
        linha_aux_cod[j] = '\0';
        linhas[i] = (char*) malloc(50 * sizeof (char));
        strcpy(linhas[i], linha_aux_cod);

    // Ordenar todas as linhas da matriz, lexicograficamente
    for (i = 1; i < size; i++)
        for (j = 0; j < size - 1; j++)
            if (strcmp(linhas[j], linhas[j + 1]) > 0) {
                strcpy(temp, linhas[j]);
                strcpy(linhas[j], linhas[j + 1]);
                strcpy(linhas[j + 1], temp);


    printf("Matriz de codificacao ordenada lexicograficamente:\n");

    for (l = 0; l < size; l++) {
        printf("[%d]'%s'", l, linhas[l]);
    //Descobre a linha do bloco original
    for (i = 0; i < size; ++i) {
        // printf("'%s'\n",linhas[i]);
        if (strcmp(linhas[i], string) == 0) {
            linha = i;
    FILE *line;
    line = fopen("string.txt", "w+");
    if (line == NULL) {
        fprintf(stderr, "Erro ao abrir ficheiro!\n");
    fprintf(line, "%d", linha);

    //Linha original I
    printf("\n* Linha com a string original (S): [%d] *\n", linha);

    //Cria o vector resultante
    for (i = 0; i < size; ++i) {
        strcpy(temp, linhas[i]); //copia a linha toda para temp
        vector[i] = temp[size - 1]; //copia o último caracter de temp para o vector[i]
        vector[i + 1] = '\0';
    char* cod = malloc(size);
    strcpy(cod, vector);
    return cod;

int compare(const void * a, const void * b) {
    return ( *(char*) a - *(char*) b);

char *descodificar(char *string_codificada) {

    char *decod = malloc(strlen(string_codificada));
    int tamanho_string_codificada = strlen(string_codificada);
    char *vector_L = malloc(strlen(string_codificada));

    int i = 0;

    int vector_T [tamanho_string_codificada];
    char *ptr;

    //copiar para L a string vinda do codificador
    strcpy(vector_L, string_codificada);

    //Ordena o novo vector L
    qsort(vector_L, tamanho_string_codificada, sizeof (char), compare);

    printf("String vinda da codificacao (S1):\n\"%s\"\n\n", string_codificada);
    printf("String,vinda da codificacao, lexicograficamente ordenada (S2):\n");
    for (i = 0; i < tamanho_string_codificada; i++)
        printf("%c", vector_L[i]);
    //Cria o vector T de indices
    for (i = 0; i < tamanho_string_codificada; i++) {
        //ptr aponta para a 1ª ocorrência do conteúdo de [i] em L
        ptr = strchr(vector_L, string_codificada[i]);
        //printf("%s", ptr);
        //vai tirando o caracter já encontrado
        vector_L[ptr - vector_L] = ' ';
        //coloca em T[i] o índice do caracter encontrado
        vector_T[i] = ptr - vector_L;
        printf("\nO caracter '%c' de S1, foi encontrado na posicao [%d] de S2", string_codificada[i], (ptr - vector_L));
          printf("\nIndices guardados em vector_T:\n");
        printf("%d ", vector_T[i]);

    int indice_T = 0;

    FILE *f;
    f = fopen("string.txt", "r");
    if (!f) {
        printf("\nErro na abertura do arquivo\n");
    char c = fgetc(f);
    indice_T = atoi(&c);
    printf("\n* Ler do ficheiro linha.txt, linha com a string original (S): [%d] *\n", indice_T);
    printf("\n* Obter a string original (S) de volta: *\n");
    //obter a string original
    for (i = 0; i < tamanho_string_codificada; i++) {
        //começa a preencher do fim da string decod para o inicio o que está na string
        //codificada na posicao linha, que vai sendo alterada ao ir buscar
        //os indices dos caracteres ao vector_T

        //sabe que na string codificada, na posicao com indice da linha com a string original
        //possui o último caracter da string original
        decod[tamanho_string_codificada - 1 - i] = string_codificada[indice_T];
        printf("Caracter '%c' encontrado em S1[%d]\n", string_codificada[indice_T],indice_T);
        //vai buscar o indice de T[linha] que contém um caracter e guarda em linha
        printf("\nIndice a pesquisar em vector_T[%d]: %d\n",indice_T, vector_T[indice_T]);
        indice_T = vector_T[indice_T];

    decod[tamanho_string_codificada] = '\0';
    return decod;

Open in new window

Question by:Vasconcelos
    LVL 31

    Accepted Solution

    Here are some comments on readfromfile:
    char aux[1000]; // what if you fill in more than 1000 chars because the file is too big?

        aux[num_chars - 1] = '\0';
        char *string = malloc(strlen(aux) + 1); // make room for the null byte
        strcpy(string, aux);

    Open in new window


    Author Comment

    i just change the size to 5000 still got a segmentation fault :(

    im kinda lost on how to solve this bug lol
    LVL 31

    Expert Comment

    >> char* linha_aux_cod = malloc(size);
    >> linha_aux_cod[j] = '\0';
    Here j should be size (after the loop). But, valid index for linha_aux_cod is from 0 .. size-1.
    LVL 84

    Expert Comment

    if size >= 50, you may be writing past the end of temp or linhas[ i ]

    you are also seem to be writing past the end of vector and string
    LVL 31

    Expert Comment

    So far there are two general problems. One is that you are not creating enough space for a terminating null byte, and the other is that your strcpy is overwriting. There is this magic number 50 that has nothing, in general, to do with the size of the reduced file.

    You could start using strncpy to avoid overwriting the destination buffer.
    LVL 31

    Expert Comment

    You free temp but then use temp in a strcpy - not good.
    You are executing malloc more times than free. To avoid memory leaks, there should be a free for each malloc executed. Back tonight or tomorrow for follow-up.
    LVL 45

    Assisted Solution

    Hi Vasconcelos,

    All good advice so far.  Here's a bit more.

    Your code will be easier to read if you segregate the declarations and the code.  (Group the variable declarations near the top of the function.)

    Use strdup() to create a copy of a string instead of calling malloc() and strcpy().  You seem to have been tripped up by allocating memory that is equal in length to the visible portion of the string.  The length of the string in memory is actually 1 byte larger than the visible portion due to the string terminator (NULL or zero byte).  (Lines 60 and 166.)  Function descodificar() has this issue in several places.

    The function readfromfile() reads only the first word in the file.  Is this what you intend?

    The function codificar() has several places that need examination.
      -- The dimensions of Maxrix[][] arte based on the size of a string that is passed to the function.  If you intend to use the items that are stored in the matrix as anything other than character data, the dimensions should be strlen(string)+1.  (That's not a requirement for the transformation.)
      --  The string buffer in readfromfile() is 1,000 characters.  The string buffer in codificar is 50.

    The function compare() (passed to qsort) compares only on the first character of the two objects.  Is that what you want or do you need to do a string comparison?

    The biggest issue does seem to be that all of your dynamic strings are 1 character too short.  Again, use strdup() to generate them and this problem solves easily.

    Good Luck,
    LVL 32

    Expert Comment

    to add to above comments:

    if you got error on fopen in writetofile you should output before exit

    - the 'file' argument
    - the errno variable (which is available when you include <errno.h>)

    i wonder why you use option "w+" for fopen. do you really need to read from the currently opened file?


    Author Closing Comment

    thanks :)

    Featured Post

    Free Trending Threat Insights Every Day

    Enhance your security with threat intelligence from the web. Get trending threat insights on hackers, exploits, and suspicious IP addresses delivered to your inbox with our free Cyber Daily.

    Join & Write a Comment

    Suggested Solutions

    An Outlet in Cocoa is a persistent reference to a GUI control; it connects a property (a variable) to a control.  For example, it is common to create an Outlet for the text field GUI control and change the text that appears in this field via that Ou…
    Preface I don't like visual development tools that are supposed to write a program for me. Even if it is Xcode and I can use Interface Builder. Yes, it is a perfect tool and has helped me a lot, mainly, in the beginning, when my programs were small…
    Video by: Grant
    The goal of this video is to provide viewers with basic examples to understand and use nested-loops in the C programming language.
    Video by: Grant
    The goal of this video is to provide viewers with basic examples to understand and use while-loops in the C programming language.

    733 members asked questions and received personalized solutions in the past 7 days.

    Join the community of 500,000 technology professionals and ask your questions.

    Join & Ask a Question

    Need Help in Real-Time?

    Connect with top rated Experts

    23 Experts available now in Live!

    Get 1:1 Help Now