• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 444
  • Last Modified:

Sorting sub routine

I have the following sub-routine:


#########################################################################
#                                                                                                            #
# subroutine sort_data                                                                              #
# Subroutine that does the actual sort.                                                 #
# Accepts 4 params viz.                                                                         #
# 1. The column number to sort. Column no. start from 0.                         #
# 2. The type of sort numeric or alphabetic. Default is Alphabetic.       #
# 3. The order of sort. Default order is ascending                                    #
# 4. The referrence of the array that needs to be sorted.                         #
#########################################################################

sub sort_data {
                  
      my ($row,$type,$sort_order,$r_data) = @_;
      my (@array);
      
      if ($type eq "n") {
            if ($sort_order eq "d") {
                  @array = map { (split ('<->', $_))[1] }
                        reverse sort {$a <=> $b}
                        map { join ('<->', lc ((split('\|', $_))[$row]) , $_) }
                        @{$r_data};
            }
            else {
                  @array = map { (split ('<->', $_))[1] }
                        sort {$a <=> $b}
                        map { join ('<->', lc ((split('\|', $_))[$row]) , $_) }
                        @{$r_data};
            }
      }
      else {
            if ($sort_order eq "d") {
                  @array = map { (split ('<->', $_))[1] }
                        reverse sort {$a cmp $b}
                        map { join ('<->', lc ((split('\|', $_))[$row]) , $_) }
                        @{$r_data};
            }
            else {
                  @array = map { (split ('<->', $_))[1] }
                        sort {$a cmp $b}
                        map { join ('<->', lc ((split('\|', $_))[$row]) , $_) }
                        @{$r_data};
            }
      }

      return (\@array)
}

Someone rewrote it for me to make it more efficient and to make the warning that did showup in the error log file the moment I added the -W option to the perl command.

#########################################################################
#                                                                                                            #
# subroutine sort_data                                                                              #
# Subroutine that does the actual sort.                                                 #
# Accepts 4 params viz.                                                                         #
# 1. The column number to sort. Column no. start from 0.                         #
# 2. The type of sort numeric or alphabetic. Default is Alphabetic.       #
# 3. The order of sort. Default order is ascending                                    #
# 4. The referrence of the array that needs to be sorted.                         #
#########################################################################

sub sort_data {
                  
      my ($row,$type,$sort_order,$r_data) = @_;
      my (@array);
      
      if ($type eq "n") {
            if ($sort_order eq "d") {
                  @array = map { $_->[1] }
                        sort {$b->[0] <=> $a->[0]}
                        map { [ lc ((split('|', $_))[$row]) , $_] }
                        @{$r_data};
            }
            else {
                  @array = map { $_->[1] }
                        sort {$a->[0] <=> $b->[0]}
                        map { [ lc ((split('|', $_))[$row]) , $_] }
                        @{$r_data};
            }
      }
      else {
            if ($sort_order eq "d") {
                  @array = map { $_->[1] }
                        sort {$b->[0] cmp $a->[0]}
                        map { [ lc ((split('|', $_))[$row]) , $_] }
                        @{$r_data};
            }
            else {
                  @array = map { $_->[1] }
                        sort {$a->[0] cmp $b->[0]}
                        map { [ lc ((split('|', $_))[$row]) , $_] }
                        @{$r_data};
            }
      }

      return (\@array)
}

But it isn't working like the old version is! It isn't the same. What is different?

Forexample:

$row = 0;
$type = "n";
$sort_order = "a";

will work in the old version but not in the new version. Trying to learn perl by learning what is wrong and why. Can someone help me to fix the new code?
0
weversbv
Asked:
weversbv
  • 3
  • 2
  • 2
  • +1
1 Solution
 
ozoCommented:
 split('|', $_)
should be
  split('\|', $_)
0
 
kanduraCommented:
It doesn't work with numerical sorting, because the Schwartzian Transform uses '|' to split the rows,
but this is interpreted as an 'or' regexp. The first element in the anonymous array will always contain
only the $row-th character in the data.
I'm pretty sure your alphabetical sort would show the same problem!

You could write the whole routine a lot prettier by using the ? : operator to select two alternatives,
and by realizing that <=> and cmp return 1,0,-1 to tell how the two elements compare.

So I would write this:

sub sort_data
{
      my ($row,$type,$sort_order,$r_data) = @_;

      # $order will contain 1 for ascending and -1 for descending sort
      my $order = $sort_order eq 'd' ? -1 : 1;                    
      
      my @array = map { $_->[1] }
            sort { ( $type eq 'n' ?                              # is it a numerical sort?
                        $a->[0] <=> $b->[0] :                  # then use <=> operator
                        $a->[0] cmp $b->[0] )                  # else do lexical sort
                          * $order                        # multiply by $order to change the direction
                                                        # of the sort
                  }  map {
                        [ lc ((split('\|', $_))[$row]) , $_]      # use a Schwartzian Transform on the input.
                  } @$r_data;
      
      return \@array;
}
0
 
jmcgOwnerCommented:
One good thing about the two versions of your code is that they keep the sort comparison routine short and simple. Kandura undoes much of the virtue of the Schwartzian transform by calculating too much in the compare routine.

Ozo has spotted the essential flaw in the rewriting. Although split will take strings as its first argument, the only string that is supposed to be given to it is one containing a single space. Other than that, you should be passing a regex to match. So all the places where your rewrite has

   split('|', $_)

you should have

   split( /\|/, $_)

or, just a slight performance improvement, since you only need to split enough times to get to the field (why are you calling it a "row"???) of interest.

  split( /\|/, $_, $row+2)

In a similar vein, you could forego the call to the 'lc' operator when doing numeric searches.

0
Cloud Class® Course: Certified Penetration Testing

This CPTE Certified Penetration Testing Engineer course covers everything you need to know about becoming a Certified Penetration Testing Engineer. Career Path: Professional roles include Ethical Hackers, Security Consultants, System Administrators, and Chief Security Officers.

 
weversbvAuthor Commented:
jmcg:

or, just a slight performance improvement, since you only need to split enough times to get to the field (why are you calling it a "row"???) of interest.

  split( /\|/, $_, $row+2)

In a similar vein, you could forego the call to the 'lc' operator when doing numeric searches.

???

Could you help me to an example of what you mean?
0
 
kanduraCommented:
jmcg: you are right. In an attempt to write prettier code, I sacrified performance. I didn't think 1 'if' and 1 multiplication would matter much, but of course it adds up.

weversbv: what jmcg means is that:
1.  you don't have to split every row completely, because you only want the one column. So there's no need to split the remaining columns after that;
2.  you don't have to convert the column you're looking for to lowercase when you're sorting numerically.

To put it together, here's your new routine:

sub sort_data {
               
     my ($row,$type,$sort_order,$r_data) = @_;
     my (@array);
     
     if ($type eq "n") {
          if ($sort_order eq "d") {
               @array = map { $_->[1] }
                    sort {$b->[0] <=> $a->[0]}
                    map { [ (split(/\|/, $_, $row+2))[$row] , $_] }          #  <-- leave out the call to lc( )
                    @{$r_data};
          }
          else {
               @array = map { $_->[1] }
                    sort {$a->[0] <=> $b->[0]}
                    map { [ (split(/\|/, $_, $row+2))[$row] , $_] }          #  <-- same here
                    @{$r_data};
          }
     }
     else {
          if ($sort_order eq "d") {
               @array = map { $_->[1] }
                    sort {$b->[0] cmp $a->[0]}
                    map { [ lc ((split(/\|/, $_, $row+2))[$row]) , $_] }
                    @{$r_data};
          }
          else {
               @array = map { $_->[1] }
                    sort {$a->[0] cmp $b->[0]}
                    map { [ lc ((split(/\|/, $_, $row+2))[$row]) , $_] }
                    @{$r_data};
          }
     }

     return (\@array)
}
0
 
weversbvAuthor Commented:
I do know understand what you mean.

Is it possible to sort nummerical whenever it is possible.
So if the row to sort contains only nummerical values I woudl like to sort nummerical.
If not sort alphabetic. Is this possible?
0
 
jmcgOwnerCommented:
If your array isn't very large (how large is that? I'm not sure: 1000? 5000?), you can combine numerical and lexical sort this way:

    sort { ($a->[0] <=> $b->[0]) || ($a->[0] cmp $b->[0]) } ...

As soon as you start to do something which requires examining each term and making decisions in the comparison routine, you slow things down. As the array being sorted gets larger, it becomes increasingly important to keep the comparison routine simple. And, eventually, you may need to use an external sort program.

For a combined sort such as you are asking for, on something sufficiently large, it may be worthwhile to convert the numbers into zero-filled fields large enough to represent the largest number present. Zero-filled fields, ie, 000123 instead of just 123, allow you to use a lexical sort for numbers.
0
 
kanduraCommented:
Yes, that's possible. It means you have to investigate the data in the column before sorting.
That is, loop through the requested column. If you encounter a non-numerical value, do lexical sort, otherwise it'll be a numerical sort.

sub sort_data_check {
      my ($row,$type,$sort_order,$r_data) = @_;
      my @array = ();
      my $sort_type =  'n';      # I use this to get the sort type from the data

        # here I loop through the data, extracting the requested column
        # and investigating its type
        # note that this takes care of the first step in the Schwartzian Transform
      foreach (@$r_data)
      {
            my $k = (split(/\|/, $_, $row+2))[$row];     # this gets your column
            if($k==0 && $k ne '0') {
                  $sort_type = 'a';                                # it's not a number
            }
            push @array, [$k, $_];
      }

      if($sort_type ne 'n')                                               # convert the data to lower case
      {
            @array = map { lc($_->[0]); $_ } @array;
      }

      if ($sort_type eq "n") {
            if ($sort_order eq "d") {
                  @array = map { $_->[1] }
                        sort {$b->[0] <=> $a->[0]}
                        @array;
            } else {
                  @array = map { $_->[1] }
                        sort {$a->[0] <=> $b->[0]}
                        @array;
            }
      } else {
            if ($sort_order eq "d") {
                  @array = map { $_->[1] }
                        sort {$b->[0] cmp $a->[0]}
                                @array;
            } else {
                  @array = map { $_->[1] }
                        sort {$a->[0] cmp $b->[0]}
                        @array;
            }
      }

      return (\@array)
}



 
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

Join & Write a Comment

Featured Post

Get your problem seen by more experts

Be seen. Boost your question’s priority for more expert views and faster solutions

  • 3
  • 2
  • 2
  • +1
Tackle projects and never again get stuck behind a technical roadblock.
Join Now