#!/usr/bin/perl -w
use strict;
use DB_File;

if (!$ARGV[0] or $ARGV[0] eq "help") {
  print <<EOF;

  This program expects a list of files on the standard input which are 
  chromatogram files. This program will (re)compress the files using bzip2
  and concatenate them into a Berkeley database file.

  The intended usage is for collecting an entire plate (or other logical 
  group) of chromatograms into a single file for ease of storage, archiving, 
  retrival, etc. 

  This is better than tar or zip files, since single chromatograms can be
  retrived efficiently, even if the archive is large. Storing the files
  individually on the disk leads to inode bleed as well as trouble managing
  large directories.

  NOTE: Path information is lost by this program. If paths are specified with
        the filename list on STDIN, they will be used to read the 
	chromatograms but only the filename itself will be stored in the 
        database file.

  NOTE: Expects files to be already gzip compressed. We will uncompress and 
        recompress with bzip2.

  NOTE: This program will abort if output file exists.

  Usage: <output database filename>

EOF
exit(0);
}

my $outputfile = shift;

if ( -f $outputfile ) {
  print <<EOF;

  Output file "$outputfile" exists. This program will not overwrite it.

EOF
  exit(-1);
}

my %cgrams = ();
my $database = tie %cgrams, 'DB_File', "$outputfile", O_CREAT|O_RDWR, 0666, $DB_HASH
  or die "Failed bind database file \"$outputfile\" ($!)";

while(<STDIN>) {
  chomp;

  if ( -f $_ && m/(Z|gz)$/) {
    open F, "zcat $_ | bzip2 -c9 - |";
  } else {
    die ".Z or .gz extension expected but got \"$_\"";
  }

  my ($name) = `basename $_ .gz`;
  chomp $name;

  my $d = ""; my $t;
  while(read(F,$t,1024)) {
    $d .= $t;
  }

  close F
    or die "Error closing compression pipe ($! - $?)";

  $cgrams{$name} = $d;

}

$database->sync();
$database = "";
untie %cgrams;
