# *%) $Id: Comparison.pm,v 1.1.1.1 2004/05/25 01:34:52 scottz Exp $
#
# Copyright (c) 2004 Scott Zuyderduyn <scottz@bccrc.ca>.
# All rights reserved. This program is free software; you
# can redistribute it and/or modify it under the same
# terms as Perl itself.

package Bio::SAGE::Comparison;

=pod

=head1 NAME

Bio::SAGE::Comparison - Compares data from serial analysis of gene expression (SAGE) libraries.

=head1 SYNOPSIS

  use Bio::SAGE::Comparison;
  $sage = Bio::SAGE::Comparison->new();

=head1 DESCRIPTION

This module provides several tools for comparing
data generated from serial analysis of gene
expression (SAGE) libraries.

=head1 README

B<BACKGROUND>

Serial analysis of gene expression (SAGE) is a molecular 
technique for generating a near-global snapshot of a 
cell populations transcriptome.  Briefly, the technique 
extracts short sequences at defined positions of 
transcribed mRNA.  These short sequences are then paired 
to form ditags.  The ditags are concatamerized to form 
long sequences that are then cloned.  The cloned DNA is 
then sequenced.  Bioinformatic techniques are then 
employed to determine the original short tag sequences, 
and to derive their progenitor mRNA.  The number of times
a particular tag is observed can be used to quantitate
the amount of a particular transcript.  The original 
technique was described by Velculescu et al. (1995) and 
utilized an ~14bp sequence tag.  A modified protocol 
was introduced by Saha et al. (2002) that produced ~21bp 
tags.

B<PURPOSE>

This module facilitates the comparison of SAGE libraries.
Specifically:

  1. Calculations for determining the statistical
     significance of expression differences.
  2. Dynamically convert longer-tag libraries to
     a shorter type for comparison (e.g. comparing
     a LongSAGE vs. a regular SAGE library).

Both regular SAGE (14mer tag) and LongSAGE (21mer tag) 
are supported by this module.

Statistical significance in library comparisons is
calculated using the method described by Audic and 
Claverie (1997).  Code was generated by directly
porting the authors' original C source.

B<REFERENCES>

  Velculescu V, Zhang L, Vogelstein B, Kinzler KW. (1995) 
  Serial analysis of gene expression. Science. 270:484-487.

  Saha S, Sparks AB, Rago C, Akmaev V, Wang CJ, Vogelstein B, 
  Kinzler KW, Velculescu V. (2002) Using the transcriptome 
  to annotate the genome. Nat. Biotechnol. 20:508-512.

  Audic S, Claverie JM. (1997) The significance of digital
  gene expression profiles. Genome Res. 7:986-995.

=head1 INSTALLATION

Follow the usual steps for installing any Perl module:

  perl Makefile.PL
  make test
  make install

=head1 PREREQUISITES

None.

=head1 CHANGES

  1.00 2004.05.24 - Initial release.

=cut

use strict;
use diagnostics;
use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $DEBUG );

require Exporter;
require AutoLoader;

@ISA = qw( Exporter AutoLoader );
@EXPORT = qw();

$VERSION = "1.00";

my $PACKAGE = "Bio::SAGE::Comparison";

=pod

=head1 VARIABLES

B<Settings>

=over 2

I<$DEBUG>

  Prints debugging output if value if >= 1.

=back

=cut

$DEBUG = 0; # set this flag to non-zero to enable debugging messages

=pod

=head1 CLASS METHODS

=cut

#######################################################
sub new {
#######################################################
=pod

=head2 new

Constructor for a new Bio::SAGE::Comparison object.

B<Arguments>

  None.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();

=cut

    my $this = shift;
    my $class = ref( $this ) || $this;
    my $self = {};
    bless( $self, $class );

    # set instance variables

    return $self;
    
}

#######################################################
sub calculate_significance {
#######################################################
=pod

=head2 calculate_significance $x, $y, $Nx, $Ny, <$signedValue>

Determines the statistical significance of the difference
in tag count (expression) between two libraries.  This
function uses the method described by Audic and 
Claverie (1997).  This method can be called on an
instantiated object, as well as statically.

B<Arguments>

I<$x,$y>

  The number of tags in the x- and y-axis 
  libraries, respectively.

I<$Nx,$Ny>

  The total number of tags in the x- and y-axis
  libraries, respectively.

I<$signedValue> (optional)

  A boolean value (>=1 is FALSE).  If this flag is
  set to TRUE, downregulated comparisons will return
  a both p-value and either +1, -1, or 0 to indicate
  up/down/same-regulation (i.e. -1 if the expression 
  ratio of tags in the x-axis library(s) is greater 
  than that of the y-axis library(s)).  This flag
  is FALSE by default.

B<Returns>

  The p-value for the observation.  A lower number is
  more significant.  Typically, observations with
  p <= 0.05 are considered statistically significant.

  If $signedValue is set to TRUE, the function also
  returns a 0, -1 or +1 to indicate same/down/up-regulation.

B<Usage>

  # the function is static, so it can be accessed directly
  my $p = Bio::SAGE::Comparison::calculate_significance( 3, 10, 50000, 60000 );

  # or:
  my ( $p, $sign ) = Bio::SAGE::Comparison::calculate_significance( 3, 10, 50000, 60000, 1 );
  if( $p <= 0.05 ) {
    if( $sign == +1 ) { print "Significantly upregulated.\n"; }
    if( $sign == -1 ) { print "Significantly downregulated.\n"; }
    if( $sign == 0 ) { die( "Same expression should never be significant!" ); }
  }

=cut

    my $x = shift;
    if( !defined( $x ) ) { die( $PACKAGE . "::calculate_significance no arguments provided." ); }
    # was this called from object?
    if( !ref( $x ) && ref( $x ) eq $PACKAGE ) {
        $x = shift; # next argument is actually what we're after
    }
    my $y = shift;
    my $M = shift; # cf n1
    my $N = shift; # cf n2
    my $bSign = shift || 0;

    my $p = $M / ( $M+$N );

    my $thisproba = __betai( $x+1, $y+1, $p );
    my $thisproba2 = __betai( $y+1, $x+1, 1.0-$p );

    if( $bSign >= 1 ) {
        my $ratio1 = $x / $M;
        my $ratio2 = $y / $N;
        my $sign = 0;
        if( $ratio1 > $ratio2 ) { $sign = -1; }
        if( $ratio1 < $ratio2 ) { $sign = +1; }
        return ( ( $thisproba < $thisproba2 ? ( 2*$thisproba ) : ( 2*$thisproba2 ) ), $sign );
    }

    return ( $thisproba < $thisproba2 ? ( 2*$thisproba ) : ( 2*$thisproba2 ) );

}

=pod

=head1 INSTANCE METHODS

=cut

#######################################################
sub load_library {
#######################################################
=pod

=head2 load_library $handle
    
Takes a Perl handle to SAGE library data and prepares 
a tag data hash (format: [tag]<whitespace>[count]).

B<Arguments>

I<$handle>
    
  A Perl handle (ie. filehandle, STDIN, etc.) that
  can be used to read in SAGE library data.
    
B<Returns>

  A hashref containing tag sequences
  as keys and the number of times that tag was
  observed as the value.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  my %data = %{$sage->load_library( *STDIN )};
  # print data in descending order of tag count
  map { print join( "\t", $_, $data{$_} ) . "\n"; } sort { $data{$b} <=> {$a} } keys %data;

=cut
    
    my $this = shift;
    my $handle = shift;

    my %data;
    while( my $line = <$handle> ) {
        chomp( $line ); $line =~ s/\r//g;
        my( $tag, $cnt ) = split( /[\s\t]+/, $line );
        $data{$tag} = $cnt;
    }

    return \%data;

}


#######################################################
sub add_library {
#######################################################
=pod

=head2 add_library $label, \%tagData

Adds a library to this object.

B<Arguments>

I<$label>

  A unique label for this library data. This label can
  then be used to refer to the data.

I<\%tagData>

  A hashref containing the library data. The keys
  are tag sequences, the values are tag counts. A 
  properly formatted hash can be created using
  load_library.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  my %data = %{$sage->load_library( *STDIN )};
  # or alternatively:
  my %data = ( 'AACGACTGTT' => 100,
               'CAGATACAAG' => 23,
               'AGATAAAGAC' => 45 );
  $sage->add_library( 'MYLIB', \%data );

=cut

    my $this = shift;
    my $label = shift || die( $PACKAGE . "::add_library no library label was specified." );
    my $pHash = shift || die( $PACKAGE . "::add_library no tag data was specified." );

    my $tagLength = -1;

    my $total_tags = 0;
    foreach my $tag ( keys %$pHash ) {
        my $len = length( $tag );
        if( $tagLength == -1 ) { 
            $tagLength = $len; 
        } else {
            if( $tagLength != $len ) { die( $PACKAGE . "::add_library tag length not consistent ($tagLength != $len)." ); }
        }
        $this->{'libraries'}->{$label}->{$tag} = $$pHash{$tag};
        $total_tags += $$pHash{$tag};
    }
    $this->{'tag_length'}->{$label} = $tagLength;

    $this->{'totals'}->{$label} = $total_tags;

}

#######################################################
sub get_library_labels {
#######################################################
=pod

=head2 get_library_labels

Gets the labels that identify the currently added
libraries.

B<Arguments>

None.

B<Returns>

  An array of library labels.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  my %data = %{$sage->load_library( *STDIN )};
  $sage->add_library( 'MYLIB', \%data );
  print "LIBRARY_NAMES: " . join( ", " , $sage->get_library_labels() ) . "\n";

=cut

    my $this = shift;

    my @labels;
    push( @labels, keys %{$this->{'libraries'}} );

    return @labels;

}

#######################################################
sub get_library_size {
#######################################################
=pod

=head2 get_library_size $label

Gets the total number of tags (the sum of all observed
tag counts for a library(s)).

B<Arguments>

I<$label>

  This can be: a) a string denoting the library label,
  b) a reference to a string denoting the library
  label, or c) an arrayref containing several
  library labels that are pooled to calculate total
  size.

B<Returns>

  The total number of observed tags in the library(s)
  specified.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  my %data = %{$sage->load_library( *STDIN )};
  $sage->add_library( 'MYLIB', \%data );
  print "LIBRARY_SIZE: " . $sage->get_library_size( 'MYLIB' ) . "\n";

=cut

    my $this = shift;
    my $label = shift || die( $PACKAGE . "::get_library_size no library specified." );

    my @labels;

    # make sure the library names variables are in array ref format
    if( !ref( $label ) ) { # scalar (single library name)
        push( @labels, $label );
    } else {
        my $type = ref( $label );
        if( $type eq 'SCALAR' ) {
            push( @labels, $$label );
        }
        elsif( $type eq 'ARRAY' ) {
            push( @labels, @$label );
        }
        else {
            die( $PACKAGE . "::get_library_size libraries cannot be reference to type " . $type );
        }
    }

    if( scalar( @labels ) == 1 ) {
        if( !defined( $this->{'totals'}->{$labels[0]} ) ) {
            die( $PACKAGE . "::get_library_size library $labels[0] not found." );
        }
        return $this->{'totals'}->{$labels[0]};
    }

    my $total = 0;
    foreach my $id ( @labels ) {
        if( !defined( $this->{'totals'}->{$id} ) ) {
            die( $PACKAGE . "::get_library_size library $id not found." );
        }
        $total += $this->{'totals'}->{$id};
    }

    return $total;

}

#######################################################
sub get_number_tag_sequences {
#######################################################
=pod

=head2 get_number_tag_sequences $label

Gets the number of discrete tag sequences present in
the specified library(s).

B<Arguments>

I<$label>

  This can be: a) a string denoting the library label,
  b) a reference to a string denoting the library
  label, or c) an arrayref containing several
  library labels that are pooled to calculate the
  number of tag sequences.

B<Returns>

  The number of different tags in the library(s)
  specified.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  my %data = %{$sage->load_library( *STDIN )};
  $sage->add_library( 'MYLIB', \%data );
  print "TAG_SEQUENCES: " . $sage->get_number_tag_sequences( 'MYLIB' ) . "\n";

=cut

    my $this = shift;
    my $label = shift || die( $PACKAGE . "::get_number_tag_sequences no library specified." );

    my @labels;

    # make sure the library names variables are in array ref format
    if( !ref( $label ) ) { # scalar (single library name)
        push( @labels, $label );
    } else {
        my $type = ref( $label );
        if( $type eq 'SCALAR' ) {
            push( @labels, $$label );
        }
        elsif( $type eq 'ARRAY' ) {
            push( @labels, @$label );
        }
        else {
            die( $PACKAGE . "::get_number_tag_sequences libraries cannot be reference to type " . $type );
        }
    }

    if( scalar( @labels ) == 1 ) {
        if( !defined( $this->{'libraries'}->{$labels[0]} ) ) {
            die( $PACKAGE . "::get_number_tag_sequences library $labels[0] not found." );
        }
        return scalar( keys( %{$this->{'libraries'}->{$label}} ) );
    }

    my %combo;
    foreach my $id ( @labels ) {
        if( !defined( $this->{'libraries'}->{$id} ) ) {
            die( $PACKAGE . "::get_number_tag_sequences library $id not found." );
        }
        map { $combo{$_}++ } keys %{$this->{'libraries'}->{$id}};
    }

    return scalar( keys( %combo ) );

}


#######################################################
sub get_library_comparison {
#######################################################
=pod

=head2 get_library_comparison $x_axis_libraries, $y_axis_libraries

Creates a comparison between two libraries.  This method
returns a hash reference containing the library data and
a p-value for determining statistical signifance.

If the libraries contain tags of different sizes (i.e.
comparing a regular SAGE library vs. a LongSAGE library)
the data is converted to the shortest tag length of
the libraries specified prior to comparison.

B<Arguments>

I<$x_axis_libraries,$y_axis_libraries>

  Library labels for the x- and y-axis, respectively.

  This can be: a) a string denoting the library label,
  b) a reference to a string denoting the library
  label, or c) an arrayref containing several
  library labels that are pooled in the comparison.

B<Returns>

  A hashref is returned where the keys are tag sequences, 
  and the values are hashrefs with the keys 'x' 
  (tags in x-axis library(s)), 'y' (tags in y-axis 
  library(s)), 'reg' (0,-1,+1 denoting same/down/up-regulation), 
  and 'p' (statistical significance).

  i.e. $HASH{$tag}->{'p'} = 0.05;

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  open( LIB1, "lib1.tags.txt" );
  $sage->add_library( 'LIB1', $sage->load_library( *LIB1 ) );
  close( LIB1 );
  open( LIB2, "lib2.tags.txt" );
  $sage->add_library( 'LIB2', $sage->load_library( *LIB2 ) );
  close( LIB2 );
  my %comparison = %{$sage->get_library_comparison( 'LIB1', 'LIB2' );
  # or alternatively:
  my %comparison = %{$sage->get_library_comparison( ['LIB1'], ['LIB2'] );
  # print results in ascending order of p-value (more significant first)
  foreach my $tag ( sort { $comparison{$a}->{'p'} <=> $comparison{$b}->{'p'} } keys %comparison ) {
       print join( "\t", $tag, map { $comparison{$tag}->{$_} } ( 'x','y','reg','p' ) ) . "\n";
  }

=cut

    my $this = shift;
    my $pX = shift || die( $PACKAGE . "::get_library_comparison no x-axis libraries were specified." );
    my $pY = shift || die( $PACKAGE . "::get_library_comparison no y-axis libraries were specified." );

    # make sure the library names variables are in array ref format
    if( !ref( $pX ) ) { # scalar (single library name)
        $pX = [ $pX ];
    } else {
        my $type = ref( $pX );
        if( $type eq 'SCALAR' ) {
            $pX = [ $$pX ];
        }
        elsif( $type eq 'ARRAY' ) {
            # good
        }
        else {
            die( $PACKAGE . "::get_library_comparison x-axis libraries cannot be reference to type " . $type );
        }
    }
    if( !ref( $pY ) ) { # scalar (single library name)
        $pY = [ $pY ];
    } else {
        my $type = ref( $pY );
        if( $type eq 'SCALAR' ) {
            $pY = [ $$pY ];
        }
        elsif( $type eq 'ARRAY' ) {
            # good
        }
        else {
            die( $PACKAGE . "::get_library_comparison y-axis libraries cannot be reference to type " . $type );
        }
    }

    # what's the shortest tag length?
    my $tag_length = -1;
    foreach my $label ( @$pX ) {
        if( !defined( $this->{'libraries'}->{$label} ) ) {
            die( $PACKAGE . "::get_library_comparison unrecognized library name '" . $label . "'" );
        }
        my $len = $this->{'tag_length'}->{$label};
        if( $tag_length == -1 ) { $tag_length = $len; next; }
        if( $tag_length != $len ) {
            # library has different size tags, will have to truncate
            $tag_length = ( $tag_length < $len ) ? $tag_length : $len;
        }
    }
    foreach my $label ( @$pY ) {
        if( !defined( $this->{'libraries'}->{$label} ) ) {
            die( $PACKAGE . "::get_library_comparison unrecognized library name '" . $label . "'" );
        }
        my $len = $this->{'tag_length'}->{$label};
        if( $tag_length == -1 ) { $tag_length = $len; next; }
        if( $tag_length != $len ) {
            # library has different size tags, will have to truncate
            $tag_length = ( $tag_length < $len ) ? $tag_length : $len;
        }
    }

    # prepare properly formatted tag data
    my %data;
    foreach my $label ( @$pX ) {
        my $len = $this->{'tag_length'}->{$label};
        if( $len != $tag_length ) {
            # must be shorter - create truncated version
            $data{$label} = $this->__truncateLibrary( $label, $tag_length );
        } else {
            $data{$label} = $this->{'libraries'}->{$label};
        }
    }
    foreach my $label ( @$pY ) {
        my $len = $this->{'tag_length'}->{$label};
        if( $len != $tag_length ) {
            # must be shorter - create truncated version
            $data{$label} = $this->__truncateLibrary( $label, $tag_length );
        } else {
            $data{$label} = $this->{'libraries'}->{$label};
        }
    }

    # combine tags for all libraries
    my %tags;
    foreach my $label ( @$pX ) {
        foreach my $tag ( keys %{$data{$label}} ) {
            $tags{$tag}++;
        }
    }
    foreach my $label ( @$pY ) {
        foreach my $tag ( keys %{$data{$label}} ) {
            $tags{$tag}++;
        }
    }

    # get total tags for each axis
    my $M = 0;
    my $N = 0;
    foreach my $label ( @$pX ) { $M += $this->{'totals'}->{$label}; }
    foreach my $label ( @$pY ) { $N += $this->{'totals'}->{$label}; }

    # now get values for each tag
    my %RESULTS;

    my %SIGCACHE;

    foreach my $tag ( keys %tags ) {

        my $x = 0;
        foreach my $label( @$pX ) { $x += $data{$label}->{$tag} || 0; }
        my $y = 0;
        foreach my $label( @$pY ) { $y += $data{$label}->{$tag} || 0; }

        # get significance value
        my $p = 0;
        my $reg = 0;
        if( defined( $SIGCACHE{$x.'-'.$y} ) ) { 
            $p = $SIGCACHE{$x.'-'.$y}->{'p'};
            $reg = $SIGCACHE{$x.'-'.$y}->{'reg'};
        } else {
            ( $p, $reg ) = calculate_significance( $x, $y, $M, $N, 1 );
            $SIGCACHE{$x.'-'.$y}->{'p'} = $p;
            $SIGCACHE{$x.'-'.$y}->{'reg'} = $reg;
        }

        $RESULTS{$tag}->{'x'} = $x;
        $RESULTS{$tag}->{'y'} = $y;
        $RESULTS{$tag}->{'p'} = $p;
        $RESULTS{$tag}->{'reg'} = $reg;

    }

    return \%RESULTS;

}

#######################################################
sub print_library_comparison {
#######################################################
=pod

=head2 print_library_comparison \%comparison

Prints a report based on the supplied comparison result
hash.

B<Arguments>

I<\%comparison>

  A properly formed hashref containing library
  comparison results.  This structure can be created
  with get_library_comparison.

  A sample output looks like:

  Tag         N(x)    N(y)   reg  p
  AGATCAAGAT  3388    50     -1   0
  GATAACACTT  11481   186    -1   0
  TATAACACCA  4       607    1    4.1136713480649e-306
  ... etc.

B<Usage>

  my $sage = Bio::SAGE::Comparison->new();
  # load library data
  # ...
  $sage->print_library_comparison( $sage->get_library_comparison( 'LIB1','LIB2' ) );

=cut

    my $this = shift;
    my $pResult = shift || die( $PACKAGE . "::print_library_comparison no comparison result hash was specified." );

    my %data = %{$pResult};

    print join( "\t", "Tag", "N(x)", "N(y)", "reg", "p" ) . "\n";

    foreach my $tag ( sort { $data{$a}->{'p'} <=> $data{$b}->{'p'} } keys %data ) {
         print join( "\t", $tag, map { $data{$tag}->{$_} } ( 'x','y','reg','p' ) ) . "\n";
    }

}

###################################
# Audic and Claverie C->Perl Port #
###################################

my $MAXIT = 500;
my $EPS   = 3.0E-30;
my $FPMIN = 1.0E-30;

my @COF = ( 76.18009172947146, -86.50532032941677,
            24.01409824083091, -1.231739572450155,
            0.1208650973866179E-2,-0.5395239384953E-5 );

sub __gammln {

    my $xx = shift;

    my $x = $xx;
    my $y = $xx;

    my $tmp = $x + 5.5;
    $tmp -= ( $x + 0.5 ) * log( $tmp );
    my $ser = 1.000000000190015;
    for( my $j = 0; $j <= 5; $j++ ) { $ser += $COF[$j] / ++$y; }

    return -$tmp + log( 2.5066282746310005 * $ser / $x );

}

sub __betai {

    my $a = shift;
    my $b = shift;
    my $x = shift;

    if( $x < 0.0 || $x > 1.0 ) {
        die( "Bad x in routine betai." );
    }

    my $bt;

    if( $x == 0.0 || $x == 1.0 ) { 
        $bt = 0.0; 
    } else {
        $bt = exp( __gammln( $a+$b ) - __gammln( $a ) - __gammln( $b ) + $a*log( $x ) + $b*log( 1.0-$x ) );
    }

    if( $x < ( $a+1.0 )/( $a+$b+2.0 ) ) {
        return $bt * __betacf( $a, $b, $x ) / $a;
    }

    return 1.0 - $bt * __betacf( $b, $a, 1.0-$x ) / $b;

}

sub __fabs {
    my $x = shift;
    return ( $x < 0 ? -$x : $x );
}

sub __betacf {

    my $a = shift;
    my $b = shift;
    my $x = shift;

    my $qab = $a + $b;
    my $qap = $a + 1.0;
    my $qam = $a - 1.0;
    my $c = 1.0;
    my $d = 1.0 - $qab * $x / $qap;

    if( __fabs( $d ) < $FPMIN ) { $d = $FPMIN; }

    $d = 1.0 / $d; # inverse d
    my $h = $d;
    my $m;
    for( $m = 1; $m <= $MAXIT; $m++ ) {
        my $m2 = 2 * $m;
        my $aa = $m * ( $b-$m ) * $x / ( ( $qam + $m2 ) * ( $a + $m2 ) );

        $d = 1.0 + $aa*$d;
        if( __fabs( $d ) < $FPMIN ) { $d = $FPMIN; }

        $c = 1.0 + $aa/$c;
        if( __fabs( $c ) < $FPMIN ) { $c = $FPMIN; }

        $d = 1.0 / $d;  # inverse d

        $h *= $d*$c;

        $aa = -($a+$m)*($qab+$m)*$x/(($a+$m2)*($qap+$m2));

        $d = 1.0 + $aa * $d;
        if( __fabs( $d ) < $FPMIN ) { $d = $FPMIN; }

        $c = 1.0 + $aa / $c;
        if( __fabs( $c ) < $FPMIN ) { $c = $FPMIN; }

        $d = 1.0 / $d; # inverse d;

        my $del = $d*$c;
        $h *= $del;
        if( __fabs( $del-1.0 ) < $EPS ) { last; }
    }

    if( $m > $MAXIT ) {
        die( "a or b too big, or MAXIT too small in __betacf" );
    }

    return $h;

}

sub __truncateLibrary {

    my $this = shift;
    my $label = shift;
    my $length = shift;

    my %truncated;
    my %tags = %{$this->{'libraries'}->{$label}};
    foreach my $tag ( keys %tags ) {
        my $trunctag = substr( $tag, 0, $length );
        $truncated{$trunctag} += $tags{$tag};
    }

    return \%truncated;

}

1;

__END__

=pod

=head1 COPYRIGHT

Copyright(c)2004 Scott Zuyderduyn <scottz@bccrc.ca>. All rights reserved.

This program is free software; you can redistribute it and/or modify it 
under the same terms as Perl itself.

=head1 AUTHOR

Scott Zuyderduyn <scottz@bccrc.ca>
BC Cancer Research Centre

=head1 VERSION

  1.00

=head1 SEE ALSO

perl(1).

=head1 TODO

  Nothing yet.

=cut
