--- temp_patchgen	2006-11-13 17:25:20.000000000 -0800
+++ validate_submission.pl	2006-11-13 17:25:06.000000000 -0800
@@ -11,8 +11,6 @@
 
 #use Data::Dumper;
 
-use CXGN::BACSubmission;
-
 sub usage {
   my $message = shift || '';
   $message = "Error: $message\n" if $message;
@@ -115,18 +113,6 @@
 
 =cut
 
-use base qw/Exporter/;
-
-BEGIN {
-  our @EXPORT_OK = qw/ every_other_elem
-		       any  all none notall true false
-		       max min flatten
-		       collate
-		       str_in
-                       distinct
-		     /;
-}
-our @EXPORT_OK;
 
 sub every_other_elem(@) {
   my $last = 0;
@@ -284,8 +270,6 @@
   } @_;
 }
 
-
-
 =head1 SEE ALSO
 
 L<List::Util>
@@ -299,6 +283,8 @@
 ###
 1;#do not remove
 ###
+
+
 package CXGN::Genomic::CloneIdentifiers;
 use strict;
 no strict 'refs'; #using symbolic refs
@@ -306,10 +292,6 @@
 use English;
 use Carp;
 
-use Bio::DB::GenBank;
-
-use CXGN::Tools::List qw/str_in/;
-
 =head1 NAME
 
 CXGN::Genomic::CloneIdentifiers - functions for parsing and generating
@@ -329,18 +311,6 @@
 
 =cut
 
-use base qw/Exporter/;
-
-BEGIN {
-  our @EXPORT_OK = qw(
-		      guess_clone_ident_type
-		      parse_clone_ident
-		      assemble_clone_ident
-		     );
-}
-our @EXPORT_OK;
-
-
 =head2 guess_clone_ident_type
 
   Usage: my $type = CXGN::Genomic::Clone->guess_clone_ident_type('C02HBa0011A02')
@@ -431,7 +401,7 @@
 
   #validate the types we've been given or guessed
   foreach my $type (@types) {
-    str_in($type,NAME_TYPES)
+    CXGN::Tools::List::str_in($type,NAME_TYPES)
       or croak "invalid clone_ident type '$type' passed to parse_clone_ident";
   }
 
@@ -484,8 +454,10 @@
 ###### INDIVIDUAL CLONE NAME PARSERS AND ASSEMBLERS  ######
 # a sub-parser should return nothing if the name it's given is not of
 # its type
-
-our $sep = '[^a-z\d\/]?';
+BEGIN {
+  our $sep = '[^a-z\d\/]?';
+}
+our $sep;
 
 sub _parse_clone_ident_old_cornell {
   my ($name) = @_;
@@ -824,6 +796,7 @@
 ###
 1;#do not remove
 ###
+
 use strict;
 use warnings;
 
@@ -843,38 +816,68 @@
 
 use LWP::Simple;
 
-use XML::LibXML;
-
-use Bio::SeqIO;
-use Bio::SeqUtils;
-use Bio::Tools::RepeatMasker;
-use Bio::FeatureIO;
-
-use CXGN::Genomic::Clone;
-use CXGN::Genomic::CloneIdentifiers qw/parse_clone_ident assemble_clone_ident/;
-
-use CXGN::Tools::Run;
-use CXGN::Tools::File qw/file_contents/;
-
-use CXGN::Annotation::GAMEXML::Combine qw/combine_game_xml_files/;
-
-use CXGN::Publish qw/parse_versioned_filepath/;
-
 use constant GENBANK_ACC_PATTERN => qr/^[A-Z_]{2,4}\d+$/;
 use constant GENBANK_VER_PATTERN => qr/^[A-Z_]{2,4}\d+\.\d+$/;
 
-use Class::MethodMaker
-  [ new    => [qw/ -init open /],
-    scalar => [qw/
-		  _version
-		  _tempdir
-		  _bacname
-		  _tarfile
-		  _tarfile_path
-		  _renamed_sequences_file
-		  _vector_screened_sequences_file
-	       /],
-  ];
+#### begin generated accessors ####
+sub _tempdir {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_tempdir} = $new;
+  }
+  return $self->{_tempdir};
+}
+sub _bacname {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_bacname} = $new;
+  }
+  return $self->{_bacname};
+}
+sub _tarfile {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_tarfile} = $new;
+  }
+  return $self->{_tarfile};
+}
+sub _tarfile_path {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_tarfile_path} = $new;
+  }
+  return $self->{_tarfile_path};
+}
+sub _renamed_sequences_file {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_renamed_sequences_file} = $new;
+  }
+  return $self->{_renamed_sequences_file};
+}
+sub _vector_screened_sequences_file {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_vector_screened_sequences_file} = $new;
+  }
+  return $self->{_vector_screened_sequences_file};
+}
+sub _version {
+  my ($self,$new) = @_;
+  if($new) {
+    $self->{_version} = $new;
+  }
+  return $self->{_version};
+}
+##### end generated accessors ###
+
+sub open {
+  my ($class,@params) = @_;
+  my $this = {};
+  bless $this,$class;
+  $this->init(@params);
+  return $this;
+}
 
 #debugging utils
 use constant DEBUG => $ENV{CXGNBACSUBMISSIONDEBUG} ? 1 : 0;
@@ -958,10 +961,8 @@
 				      )
 		 );
 #  dbp "tar -xzf $tarfile -C ".$self->_tempdir."\n";
-  CXGN::Tools::Run->run('tar',
-			-xzf => $tarfile,
-			-C   => $self->_tempdir,
-		       );
+  system "gzip -d -c $tarfile | tar xf - -C ".$self->_tempdir;
+  die "Failed to decompress $tarfile: $!" if $CHILD_ERROR;
 
   #open the sequences file and initialize the version of this object
   #if the identifier(s) in the file have versions
@@ -1717,9 +1718,24 @@
   our @EXPORT_OK = @errnames;
   our %EXPORT_TAGS = (errors => [@errnames]);
 }
-use base qw/Exporter/;
 
-use enum 'DUMMY',our @errnames;
+use constant E_BAD_FILENAME  => 1;
+use constant E_BAD_BACNAME   => 2;
+use constant E_BAD_LIBNAME   => 3;
+use constant E_BAC_PARSE     => 4;
+use constant E_NO_TOP_DIR    => 5;
+use constant E_NO_MAIN_SEQ   => 6;
+use constant E_NO_MAIN_QUAL  => 7;
+use constant E_BAD_SEQ_VER   => 8;
+use constant E_UNK_CLONE     => 9;
+use constant E_BAD_DATA      => 10;
+use constant E_MULT_SEQS     => 11;
+use constant E_GB_ACC        => 12;
+use constant E_GB_REC        => 13;
+use constant E_GB_SEQ        => 14;
+#use constant E_BAD_DATA      => 15;
+#use constant E_BAD_DATA      => 16;
+
 sub error_string {
   my ($self,$errnum) = @_;
 
@@ -1780,15 +1796,9 @@
   }
 
   #check if the clone name is correctly formatted
-  my $parsed = parse_clone_ident($bacname,'agi_bac_with_chrom')
+  my $parsed = CXGN::Genomic::CloneIdentifiers::parse_clone_ident($bacname,'agi_bac_with_chrom')
     or push @errors,E_BAC_PARSE;#"file's basename '$bacname' is not a parsable BAC name";
 
-  #check if the clone name exists in the database
-  if($parsed) {
-    CXGN::Genomic::Clone->retrieve_from_parsed_name($parsed)
-	or push @errors,E_UNK_CLONE;
-  }
-
   #fatal if no subdirectory
   my $maindir = $self->main_submission_dir;
   unless( -d $maindir ) {
@@ -1914,7 +1924,7 @@
   #check for the clone name somewhere in the DEFINITION field
   #and keywords ITAG and TOMGEN somewhere in the COMMENT field
   push @errors, E_GB_REC
-    unless index($entry->{DEFINITION},$self->clone_object->clone_name_with_chromosome) != -1
+    unless index($entry->{DEFINITION},$self->bac_name) != -1
       && $comment
 	&& $comment =~ /\bTOMGEN\b/;
 
@@ -2094,6 +2104,28 @@
   return assemble_clone_ident(($self->version ? 'versioned_bac_seq' : 'agi_bac_with_chrom'), $parsed_bac_name);
 }
 
+
+=head2 file_contents
+
+  Desc: get the entire contents of a file as a string
+  Args: filename
+  Ret : string containing the entire contents of the file
+  Side Effects: reads the file from the filesystem
+
+  Be careful with this function.  If the file is too big, this will
+  take a LOT of memory.
+
+=cut
+
+sub file_contents {
+    my ($filename) = @_;
+    local $/=undef;
+    CORE::open(my $FILE,"$filename") or return;
+    my $file_contents=<$FILE>;
+    CORE::close $FILE;
+    return $file_contents;
+}
+
 =head2 genbank_accession
 
   Usage: my $gbacc = $submission->genbank_accession
@@ -2143,28 +2175,6 @@
   print $f $acc,"\n";
 }
 
-=head2 clone_object
-
-  Usage: my $clone = $submission->clone_object
-  Desc : get the CXGN::Genomic::Clone object corresponding to this BAC
-         submission, gets info from the database
-         NOTE: THIS IS ONLY RELIABLE IF THE SUBMISSION VALIDATES
-  Ret  : the clone object, or undef if it could not be found
-  Args : none
-  Side Effects: none
-
-=cut
-
-sub clone_object {
-  my ($self) = @_;
-
-  #parse this submission's BAC name
-  my $parsed_name = parse_clone_ident( $self->bac_name, 'agi_bac_with_chrom' )
-    or return undef;
-
-  return CXGN::Genomic::Clone->retrieve_from_parsed_name($parsed_name);
-}
-
 =head2 tar_file
 
   Usage: my $submission = BACSubmission->open($tarfile);
@@ -2207,7 +2217,6 @@
   return $idents == 1;
 }
 
-
 #the filename prefix that precedes all files generated by this object
 sub _generated_file_prefix {
   'temp-cxgn-bac-submit'
@@ -2241,1486 +2250,3 @@
 ###
 1;#do not remove
 ###
-
-
-##############################################################################
-##############################################################################
-#######################     BAC SUBMISSION ANALYSIS   ########################
-##############################################################################
-##############################################################################
-
-#ANALYSIS SUPERCLASS
-package CXGN::BACSubmission::Analysis;
-use Carp qw/confess croak/;
-
-use Bio::Annotation::SimpleValue;
-use Bio::SeqFeature::Annotated;
-
-use Class::MethodMaker
-  [ new      => [qw/ -init new /],
-    abstract => [qw/ run /],
-  ];
-
-sub init {
-}
-
-#maintains list of analysis packages to run at submission
-our %run_at_submission;
-sub run_for_new_submission {
-  my ($class,$newval) = @_;
-  my ($name) =  ( split '::', $class )[-1];
-  $run_at_submission{$name} = $newval if defined $newval;
-}
-
-#return list of analysis packages to run, sorted by their run_at_submission values
-sub analyses_to_run {
-  sort { $run_at_submission{$a} <=> $run_at_submission{$b} || $a cmp $b}
-    grep { $run_at_submission{$_} }
-      keys %run_at_submission
-}
-
-#the last part of the package name is the name of the analysis
-sub analysis_name {
-  ( split '::', ref shift )[-1];
-}
-
-sub check_ok_to_run {
-  return 1;
-}
-
-sub list_params {
-  return ();
-}
-
-sub analysis_generated_file {
-  my ($self,$submission,$file) = @_;
-  @_ == 3 or confess "analysis_generated_file takes 2 arguments";
-
-  my $analysis_dir = File::Spec->catdir($submission->_tempdir,$self->analysis_name);
-  -d $analysis_dir or mkdir $analysis_dir
-    or die "Could not mkdir $analysis_dir: $!";
-
-  my %valid_names =
-    (  GeneSeqer => [qw(
-			out
-			err
-			game_xml
-			gff3
-		       )],
-       GenomeThreader => [qw( un_xed_seqs
-			      out
-			      err
-			      game_xml
-			      gff3
-			    )],
-       tRNAscanSE => [qw( out err game_xml gff3 )],
-       RepeatMasker => [qw( out err game_xml gff3)],
-    );
-
-  #TODO: verify that the requested filename is valid for the analysis
-
-  return File::Spec->catfile($analysis_dir,$file);
-}
-
-
-#given a stem, make a ID that's unique to this analysis
-#by appending a number to the stem
-sub _unique_bio_annotation_id {
-  my ($self,$idstem)  = @_;
-  $self->{uniq_id_ctrs} ||= {};
-  return Bio::Annotation::SimpleValue->new(-value => $idstem.'_'.++$self->{uniq_id_ctrs}{$idstem});
-}
-
-#take a feature hierarchy, manufacture ID and Parent tags to encode
-#the hierarchical relationships, adding them to the features
-sub _make_gff3_id_and_parent {
-  my ($self,$feat,$parent_ID) = @_;
-
-  $feat->add_Annotation('Parent',Bio::Annotation::SimpleValue->new(-value => $parent_ID))
-    if defined $parent_ID;
-
-  #make a unique id for this thing, keeping our id counters on a
-  #per-analysis level
-  $self->{uniq_id_ctr} ||= {};
-  if(my $idstem = $self->_feature_id($feat,$parent_ID)) {
-    my $uniqid = $self->_unique_bio_annotation_id($idstem);
-    $feat->add_Annotation('ID',Bio::Annotation::SimpleValue->new(-value => $uniqid));
-    #recursively ID and Parent all the subfeatures, if any
-    $self->_make_gff3_id_and_parent($_,$uniqid) for $feat->get_SeqFeatures;
-  }
-
-}
-
-#take a self,a feature, and an optional ID of its parent feature,
-#return a string that's the new unique ID the feature should have
-sub _feature_id {} #just a stub, returning nothing.  implement in subclasses
-
-#return the string name of the sequence database(s) this analysis is
-#using.  used mostly for giving the database_name to gamexml
-#generation
-sub _dbname {''}
-
-#recursively set the source on a feature and its subfeatures
-sub _recursive_source {
-  my ($self,$feature,$newsource) = @_;
-  $feature->source($newsource);
-  $self->_recursive_source($_,$newsource) for $feature->get_SeqFeatures;
-}
-
-#make a gff3-compliant feature start, end, and strand
-#from a gamexml-style start and end that might be backwards
-sub _start_end_strand(@) {
-  my ($start,$end) = @_;
-  if($start > $end) {
-    return ($end,$start,-1);
-  } else {
-    return ($start,$end,1);
-  }
-}
-
-#object method to create a new feature object, with some defaults and
-#automation of the more repetitive bits (like adding targets and
-#subfeatures)
-sub new_feature(@) {
-  my ($self,%a) = @_;
-
-  UNIVERSAL::isa($self,__PACKAGE__)
-      or croak('_new_feature is an object method, silly');
-
-  #replace spaces in source with underscores
-  $a{-source} ||= 'GenomeThreader';
-  $a{-source} =~ s/\s+/_/g;
-
-  #if no strand is given, make the proper strand and flip start and
-  #end if necessary
-  @a{qw/-start -end -strand/} = _start_end_strand(@a{qw/-start -end/}) unless $a{-strand};
-
-  #now make the feature and add all its targets and subfeatures and annotations
-  return Bio::SeqFeature::Annotated->new(%a);
-}
-
-
-###
-1;#do not remove
-###
-
-
-########### ADD NEW ANALYSIS DOWN HERE ####################
-
-=head1 AVAILABLE ANALYSES
-
-=cut
-
-#### TO ADD A NEW ANALYSIS
-# 1. make a new analysis package
-# 2. add it to the analysis_list below
-# 3. fill in its run() subroutine
-# 4. optionally, fill in its check_ok_to_run() subroutine
-
-package CXGN::BACSubmission;
-
-#add all analyses to this list
-our @analysis_list = qw(
-			GeneSeqer_SGN_E_tomato
-			GeneSeqer_SGN_U_tomato
-			RepeatMasker
-			GenomeThreader_SGN_E_tomato
-			GenomeThreader_SGN_E_tomato_potato
-			GenomeThreader_SGN_U_tomato
-			GenomeThreader_SGN_markers
-			tRNAscanSE
-			BLAST_tomato_bac_ends
-			BLAST_nr
-			BLAST_ath_pep
-			BLAST_E_coli_K12
-			BLAST_tomato_chloroplast
-			BLAST_tomato_bacs
-			Cross_match_vector
-			FGENESH_Tomato
-			FGENESH_Nicotiana
-		       );
-
-=head2 list_analyses
-
-  Usage: my @a = CXGN::BACSubmission->list_analyses;
-  Desc : class method to get a list of available analyses.
-  Ret  : list of strings, which are names of available analyses
-  Args : none
-
-=cut
-
-sub list_analyses {
-  return @analysis_list;
-}
-
-#############################################################################
-#######################  GENESEQER  #########################################
-#############################################################################
-
-package CXGN::BACSubmission::Analysis::GeneSeqer;
-use base qw/CXGN::BACSubmission::Analysis/;
-use Carp;
-use English;
-use File::Spec;
-use File::Temp;
-
-use CXGN::Tools::File qw/ executable_is_in_path file_contents /;
-use CXGN::Annotation::GAMEXML::FromFile qw/geneseqer_to_game_xml/;
-use CXGN::Annotation::GAMEXML::Combine qw/combine_game_xml_files/;
-
-sub list_params {
-  return ( geneseqer_binary  => '(optional) path to the GeneSeqer executable',
-	   geneseqer_est_seq_file => <<EOD,
-path to the file containing EST sequences in FASTA format that we should
-annotate this BAC submission against
-EOD
-	   geneseqer_ug_seq_file => <<EOD,
-path to the file containing unigene sequences in FASTA format that we should
-annotate this BAC submission against
-EOD
-	 );
-}
-
-#check that we have everything we need to run
-sub check_ok_to_run {
-  my $self = shift;
-  my $submission = shift;
-  my $aux_input = shift; #hash ref of auxiliary inputs
-
-  my @fileset = $self->_fileset($aux_input,$submission);
-
-  croak "Specified geneseqer_est/ug_seq_file '$fileset[0]' could not be found or was not readable"
-    unless $fileset[0] && -r $fileset[0];
-
-  croak "Could not find GeneSeqer executable.  Do you need to set 'geneseqer_binary' analysis option?"
-    unless find_geneseqer($aux_input);
-
-  return 1;
-}
-
-sub _fileset {
-  confess 'abstract, not implemented!';
-}
-
-#figure out where our geneseqer executable is
-sub find_geneseqer {
-  my $aux = shift;
-
-  return ($aux->{geneseqer_binary} && -x $aux->{geneseqer_binary})
-    || (executable_is_in_path 'GeneSeqer' && 'GeneSeqer')
-    || croak 'Cannot find GeneSeqer binary';
-}
-
-#run geneseqer analysis
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  #decide all the places where our various files are or should go
-  my $gs_exec              = find_geneseqer($aux_inputs);
-  my $vector_screened_seqs = $submission->vector_screened_sequences_file;
-
-  my ($ests_file,$gs_est_outfile,$gs_est_errfile,$geneseqer_game_xml_file,$gs_gff3_out_file) =
-    $self->_fileset($aux_inputs,$submission);
-
-  my @gs_options = ( #command-line options for geneseqer
-		    -s => 'Arabidopsis',
-		    -m => 50000,
-		    -x => 16,
-		    -y => 30,
-		    -z => 50,
-		    -L => $vector_screened_seqs,
-		   );
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-    my $gs_est_job = CXGN::Tools::Run->run( $gs_exec,
-					    @gs_options,
-					    -E => $ests_file,
-					    { out_file => $gs_est_outfile,
-					      err_file    => $gs_est_errfile,
-					    }
-					  );
-
-    #convert the geneseqer output to gamexml
-    if($submission->is_finished) {
-      geneseqer_to_game_xml( $vector_screened_seqs, $gs_est_outfile, $geneseqer_game_xml_file );
-    }
-    else {
-      $submission->_write_unfinished_bac_xml_stub($geneseqer_game_xml_file);
-    }
-
-    #convert the geneseqer output to gff3
-
-    my $gs_in = Bio::FeatureIO->new( -format => 'geneseqer', -file => $gs_est_outfile, -mode => 'both_merged' );
-    my $gff3_out = Bio::FeatureIO->new( -format => 'gff', -file => ">$gs_gff3_out_file", -version => 3 );
-    while ( my $f = $gs_in->next_feature ) {
-
-      #set each feature's source to the name of the geneseqer subclass that's running this
-      $self->_recursive_source($f,$self->analysis_name);
-
-      #make some ID and Parent tags in the subfeatures
-      $self->_make_gff3_id_and_parent($f);
-      $gff3_out->write_feature($f);
-    }
-  } else {
-    warn "look at me, I'm faking running '$gs_exec ".join(' ',@gs_options)."'\n";
-    `touch $geneseqer_game_xml_file $gs_gff3_out_file $gs_est_outfile`;
-  }
-
-  return ($geneseqer_game_xml_file, $gs_gff3_out_file, $gs_est_outfile);
-}
-
-sub _feature_id {
-  my ($self,$feat,$parent_ID)  = @_;
-  if($feat->type->name eq 'mRNA') {
-    "${parent_ID}_AGS"
-  } elsif ( $feat->type->name eq 'match') {
-    #get the target name of the first subfeature's target
-    my ($target_id) = (($feat->get_SeqFeatures)[0]->get_Annotations('Target'))[0]->target_id;
-    $target_id.'_alignment'
-  } elsif ( $feat->type->name eq 'region') {
-    'PGL'
-  } else {			#just name the feature for its source and type
-    $feat->source.'_'.$feat->type->name;
-  }
-}
-
-=head2 GeneSeqer_SGN_E_tomato
-
-  Secondary input parameters:
-    geneseqer_binary       - (optional) path to the GeneSeqer executable
-    geneseqer_est_seq_file - path to the file containing EST sequences in FASTA
-                             format that we should annotate this BAC submission
-                             against
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GeneSeqer_SGN_E_tomato;
-use base 'CXGN::BACSubmission::Analysis::GeneSeqer';
-__PACKAGE__->run_for_new_submission(0);
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-
-  return ($aux_inputs->{geneseqer_est_seq_file},
-	  (map {$self->analysis_generated_file($submission,$_)} qw/out err game_xml gff3/),
-	 );
-}
-
-=head2 GeneSeqer_SGN_U_tomato
-
-  Secondary input parameters:
-    geneseqer_binary       - (optional) path to the GeneSeqer executable
-    geneseqer_ug_seq_file  - path to the file containing unigene sequences in FASTA
-                             format that we should annotate this BAC submission
-                             against
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GeneSeqer_SGN_U_tomato;
-use base 'CXGN::BACSubmission::Analysis::GeneSeqer';
-__PACKAGE__->run_for_new_submission(0); #this is kind of obsolete, so don't run it by default
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-  return ($aux_inputs->{geneseqer_ug_seq_file},
-	  (map {$self->analysis_generated_file($submission,$_)} qw/out err game_xml gff3/),
-	 );
-}
-
-
-#############################################################################
-#####################  REPEATMASKER  ########################################
-#############################################################################
-
-=head2 RepeatMasker
-
-  Secondary input parameters:
-    repeatmasker_lib_file - library of repetitive sequences to use
-    repeatmasker_binary   - (optional) full path to RepeatMasker executable script
-
-=cut
-
-package CXGN::BACSubmission::Analysis::RepeatMasker;
-use base qw/CXGN::BACSubmission::Analysis/;
-use Carp;
-use English;
-use File::Spec;
-use File::Basename;
-
-use POSIX;
-
-use CXGN::Tools::File qw/ executable_is_in_path file_contents /;
-use CXGN::Annotation::GAMEXML::FromFile qw/gff_to_game_xml/;
-use CXGN::BACPublish qw/resource_file/;
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( repeatmasker_lib_file => 'full path to fasta file of repeats to use',
-	   repeatmasker_binary   => '(optional) full path to RepeatMasker executable',
-	 );
-}
-
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  #find all the various files we need to run repeatmasker
-  my $repeatmasker_bin     = find_repeatmasker($aux_inputs);
-  my $repeat_lib_file      = $aux_inputs->{repeatmasker_lib_file} || resource_file('repeats_master');
-  my $vector_screened_seqs = $submission->vector_screened_sequences_file;
-  my ($outfile,$errfile,$gff3file,$gamefile) =
-    map {$self->analysis_generated_file($submission,$_)} qw/out err gff3 game_xml/;
-  my $repeatmasker_native  = "$vector_screened_seqs.out"; #and it always writes to this too
-  my $repeatmasker_gff2  = "$vector_screened_seqs.out.gff"; #and it always writes to this too
-  my $tempdir = $submission->_tempdir;
-  -w $tempdir or confess "Cannot write to temp dir '$tempdir'";
-
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-  #run repeatmasker
-    my $rm = CXGN::Tools::Run->run( $repeatmasker_bin,
-				    '-q',
-				    '-nolow',
-				    '-gff',
-				    -lib     => $repeat_lib_file,
-				    -parallel => 2, #use 2 processors
-				    $vector_screened_seqs,
-				    { working_dir => $tempdir,
-				      out_file    => $outfile,
-				      err_file    => $errfile,
-				    }
-				  );
-
-    #convert the repeatmasker to gff3
-    do { my $fi = Bio::Tools::RepeatMasker->new( -file => $repeatmasker_native );
-	 my $fo = Bio::FeatureIO->new(-file => ">$gff3file", -format => 'gff', -version => 3);
-	 while (my $feature_pair = $fi->next_result() ) {
-	   $feature_pair->primary_tag('nucleotide_motif');
-	   my $old = $feature_pair->feature1;
-	   my $f = Bio::SeqFeature::Annotated->new( -feature => $old );
-	   my ($target_id) = ($f->get_Annotations('Target'))[0]->target_id;
-	   $f->add_Annotation('ID',$self->_unique_bio_annotation_id("${target_id}_alignment"));
-	   $fo->write_feature( $f );
-	 }
-       };
-
-    #convert the GFF3 to GAME XML if this is a finished bac
-    if( $submission->is_finished ) {
-      gff_to_game_xml($vector_screened_seqs, $gff3file, $gamefile,
-		      program_name   => 'RepeatMasker',
-		      database_name  => $self->_dbname,
-		      program_date   => asctime(gmtime).' GMT',
-		      gff_version    => 3,
-		     );
-    }
-    else {
-      $submission->_write_unfinished_bac_xml_stub($gamefile);
-    }
-  } else {
-    warn "look at me, I'm not really running RepeatMasker\n";
-    `touch $gamefile $repeatmasker_native $gff3file $repeatmasker_gff2`;
-  }
-
-  #return the result files
-  return ($gamefile, $gff3file, $repeatmasker_gff2, $repeatmasker_native, $submission->repeat_masked_sequences_file);
-}
-sub _dbname {
-  'tomato repeats master'
-}
-
-#figure out where our geneseqer executable is
-sub find_repeatmasker {
-  my $aux = shift;
-
-  return ($aux->{repeatmasker_binary} && -x $aux->{repeatmasker_binary})
-      || (executable_is_in_path 'RepeatMasker' && 'RepeatMasker')
-      || croak 'Cannot find RepeatMasker binary';
-}
-
-sub check_ok_to_run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  no warnings;
-
-#   croak "Specified RepeatMasker library (repeatmasker_lib_file='$aux_inputs->{repeatmasker_lib_file}') could not be found or was not readable"
-#     unless $aux_inputs->{repeatmasker_lib_file} && -r $aux_inputs->{repeatmasker_lib_file};
-
-  croak "Could not find RepeatMasker executable.  Do you need to set the 'repeatmasker_binary' analysis option?"
-    unless find_repeatmasker($aux_inputs);
-
-  return 1;
-}
-
-#############################################################################
-####################  GENOMETHREADER  #######################################
-#############################################################################
-
-package CXGN::BACSubmission::Analysis::GenomeThreader;
-use base qw/CXGN::BACSubmission::Analysis/;
-use Carp;
-use English;
-use CXGN::Tools::File qw/executable_is_in_path/;
-use CXGN::Annotation::GAMEXML::FromFile qw/gthxml_to_game_xml/;
-
-sub un_xed_genomic_seqs {
-  my ($self,$submission) = @_;
-
-  #make the un-xed seqs file if necessary
-  unless ( $self->{un_xed_seqs_file} && -s $self->{un_xed_seqs_file} ) {
-    my $un_xed_seqs = $self->analysis_generated_file($submission,'un_xed_seqs');
-    my $vector_screened_seqs = $submission->vector_screened_sequences_file;
-    open my $xfile, $vector_screened_seqs
-      or confess "could not open '$vector_screened_seqs' for reading";
-    open my $nfile, ">$un_xed_seqs"
-      or confess "could not open un-xed-seqs file '$un_xed_seqs' for writing";
-    while (my $line = <$xfile>) {
-      unless($line =~ /^\s*[>#]/) {   #don't munge identifier or comment (comment?) lines
-	$line =~ tr/X/N/;
-      }
-      print $nfile $line;
-    }
-    $self->{un_xed_seqs_file} = $un_xed_seqs;
-  }
-  return $self->{un_xed_seqs_file};
-}
-
-#run genomethreader analysis
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  #decide all the places where our various files are or should go
-  my $gth_exec             = find_gth($aux_inputs);
-  my $un_xed_seqs          = $self->un_xed_genomic_seqs($submission);
-
-  my ($cdna_file,$outfile,$errfile,$game_xml_file,$gff3_out_file) =
-    $self->_fileset($aux_inputs,$submission);
-
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-    my $gs_est_job = CXGN::Tools::Run->run( $gth_exec,
-					    '-xmlout',
-					    -minalignmentscore => '0.90',
-					    -mincoverage       => '0.90',
-					    -seedlength        => 16,
-					    -species => 'arabidopsis',
-					    -cdna    => $cdna_file,
-					    -genomic => $un_xed_seqs,
-					    { out_file => $outfile,
-					      err_file => $errfile,
-					    }
-					  );
-
-    #convert the geneseqer output to gamexml if it's a finished bac,
-    #otherwise write a 'not supported' comment into the file and leave
-    #it at that
-    if($submission->is_finished) {
-      gthxml_to_game_xml( $submission->vector_screened_sequences_file,$outfile,$game_xml_file,
-			  program_name  => $self->analysis_name,
-			  database_name => $self->_dbname,
-			);
-    }
-    else {
-      $submission->_write_unfinished_bac_xml_stub($game_xml_file);
-    }
-
-    #now convert the gthxml to gff3
-    my $gth_in = Bio::FeatureIO->new( -format => 'gthxml', -file => $outfile, -mode => $self->_parse_mode );
-    my $gff3_out = Bio::FeatureIO->new( -format => 'gff', -file => ">$gff3_out_file", -version => 3 );
-    while ( my $f = $gth_in->next_feature ) {
-
-      #set each feature's source to the name of the gth subclass that's running this
-      $self->_recursive_source($f,$self->analysis_name);
-
-      #make some ID and Parent tags in the subfeatures
-      $self->_make_gff3_id_and_parent($f);
-      $gff3_out->write_feature($f);
-    }
-  } else {
-    warn "look at me, I'm faking running '$gth_exec'\n";
-    `touch $game_xml_file $gff3_out_file $outfile`;
-  }
-
-  return ($game_xml_file, $gff3_out_file, $outfile);
-}
-
-sub _parse_mode {
-  'both_merged';
-}
-
-sub _feature_id {
-  my ($self,$feat,$parent_ID)  = @_;
-  if($feat->type->name eq 'mRNA') {
-    "${parent_ID}_AGS"
-  } elsif ( $feat->type->name eq 'match') {
-    #get the target name of the first subfeature's target
-    my ($target_id) = (($feat->get_SeqFeatures)[0]->get_Annotations('Target'))[0]->target_id;
-    $target_id.'_alignment'
-  } elsif ( $feat->type->name eq 'region') {
-    'PGL'
-  } else {			#just name the feature for its source and type
-    $feat->source.'_'.$feat->type->name;
-  }
-}
-
-#figure out where our executable is
-sub find_gth {
-  my $aux = shift;
-
-  return ($aux->{gth_binary} && -x $aux->{gth_binary})
-      || (executable_is_in_path 'gth' && 'gth')
-      || croak 'Cannot find GenomeThreader binary';
-}
-
-#check that we have everything we need to run
-sub check_ok_to_run {
-  my ($self,$submission,$aux_input) = @_;
-
-  my @fileset = $self->_fileset($aux_input,$submission);
-
-  croak "Specified gth_sgne/u_seq_file '$fileset[0]' could not be found or was not readable"
-    unless $fileset[0] && -r $fileset[0];
-
-  croak "Could not find gth executable.  Do you need to set the 'gth_binary' analysis option?"
-    unless find_gth($aux_input);
-
-  return 1;
-}
-
-=head2 GenomeThreader_SGN_U_tomato
-
-  Secondary input parameters:
-    gth_sgnu_tomato_seqs_file - library of SGN unigene sequences to use
-    gth_binary   - (optional) full path to gth executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GenomeThreader_SGN_U_tomato;
-use base 'CXGN::BACSubmission::Analysis::GenomeThreader';
-use CXGN::BACPublish qw/resource_file/;
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( gth_sgnu_tomato_seq_file => '(optional) full path to fasta file of SGN unigenes to use',
-	   gth_binary   => '(optional) full path to gth executable',
-	 );
-}
-
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-  return ($aux_inputs->{gth_sgnu_tomato_seq_file} || resource_file('lycopersicum_combined_unigene_seqs'),
-	  (map { $self->analysis_generated_file($submission,$_) } qw /out err game_xml gff3/),
-	 );
-}
-
-sub _dbname {
-  return 'SGN Tomato Unigenes';
-}
-
-
-=head2 GenomeThreader_SGN_E_tomato
-
-  Secondary input parameters:
-    gth_sgne_tomato_seq_file - library of SGN EST sequences to use
-    gth_binary   - (optional) full path to gth executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GenomeThreader_SGN_E_tomato;
-use base 'CXGN::BACSubmission::Analysis::GenomeThreader';
-use CXGN::BACPublish qw/resource_file/;
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( gth_sgne_tomato_seq_file => '(optional) full path to fasta file of SGN ESTs to use',
-	   gth_binary   => '(optional) full path to gth executable',
-	 );
-}
-
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-  return ($aux_inputs->{gth_sgne_tomato_seq_file} || resource_file('sgn_ests_tomato'),
-	  (map { $self->analysis_generated_file($submission,$_) } qw /out err game_xml gff3/),
-	 );
-}
-sub _dbname {
-  return 'SGN Tomato ESTs';
-}
-
-=head2 GenomeThreader_SGN_E_tomato_potato
-
-  Secondary input parameters:
-    gth_sgne_tomato_potato_seq_file - library of SGN EST sequences to use
-    gth_binary   - (optional) full path to gth executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GenomeThreader_SGN_E_tomato_potato;
-use base 'CXGN::BACSubmission::Analysis::GenomeThreader';
-use CXGN::BACPublish qw/resource_file/;
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( gth_sgne_tomato_potato_seq_file => '(optional) full path to fasta file of SGN ESTs to use',
-	   gth_binary   => '(optional) full path to gth executable',
-	 );
-}
-
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-  return ($aux_inputs->{gth_sgne_tomato_potato_seq_file} || resource_file('sgn_ests_tomato_potato'),
-	  (map { $self->analysis_generated_file($submission,$_) } qw /out err game_xml gff3/),
-	 );
-}
-sub _dbname {
-  return 'SGN Combined Tomato and Potato ESTs';
-}
-
-
-=head2 GenomeThreader_SGN_markers
-
-  Secondary input parameters:
-    gth_sgn_marker_seqs - (optional) full path to fasta file of SGN marker sequences to use
-    gth_binary   - (optional) full path to gth executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::GenomeThreader_SGN_markers;
-use base 'CXGN::BACSubmission::Analysis::GenomeThreader';
-use CXGN::BACPublish qw/resource_file/;
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( gth_sgn_marker_seqs => '(optional) full path to fasta file of SGN marker sequences to use',
-	   gth_binary   => '(optional) full path to gth executable',
-	 );
-}
-
-sub _fileset {
-  my ($self,$aux_inputs,$submission) = @_;
-  return ($aux_inputs->{gth_sgn_marker_seqs} || resource_file('sgn_marker_seqs'),
-	  (map { $self->analysis_generated_file($submission,$_) } qw /out err game_xml gff3/),
-	 );
-}
-sub _dbname {
-  return 'SGN marker sequences';
-}
-sub _parse_mode {
-  'alignments_merged'
-}
-
-
-=head2 tRNAscanSE
-
-  Secondary input parameters:
-    trnascanse_binary   - (optional) full path to tRNAscan-SE executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::tRNAscanSE;
-use base qw/CXGN::BACSubmission::Analysis/;
-use Carp;
-use English;
-use File::Spec;
-use File::Basename;
-
-use POSIX;
-
-use Bio::Tools::tRNAscanSE;
-
-use CXGN::Tools::File qw/ executable_is_in_path file_contents /;
-use CXGN::Annotation::GAMEXML::FromFile qw/gff_to_game_xml/;
-
-
-__PACKAGE__->run_for_new_submission(1); #set this to be run on new BAC submissions
-
-sub list_params {
-  return ( trnascanse_binary => 'optional full path to tRNAscan-SE executable',
-	 );
-}
-
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  #find all the various files we need to run repeatmasker
-  my $executable = find_trnascanse($aux_inputs);
-  my $vector_screened_seqs = $submission->vector_screened_sequences_file;
-  my ($outfile,$errfile,$gff3_file,$game_xml_file) =
-    map { $self->analysis_generated_file($submission,$_) }
-      qw/out err gff3 game_xml/;
-  my $tempdir = $submission->_tempdir;
-  -w $tempdir or confess "Cannot write to temp dir '$tempdir'";
-
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-    my $run = CXGN::Tools::Run->run( $executable,
-				     $vector_screened_seqs,
-				     { working_dir => $tempdir,
-				       out_file    => $outfile,
-				       err_file    => $errfile,
-				     }
-				   );
-    #convert the output to gff3
-    do { my $fi = Bio::Tools::tRNAscanSE->new(-file => $outfile);
-	 my $fo = Bio::FeatureIO->new(-file => ">$gff3_file", -format => 'gff', -version => 3);
-	 while (my $feature = $fi->next_prediction() ) {
-	   $feature->primary_tag('tRNA');
-	   my $f = Bio::SeqFeature::Annotated->new( -feature => $feature );
-	   $fo->write_feature( $f );
-	 }
-       };
-
-    #convert the GFF3 to GAME XML if this is a finished bac
-    if( $submission->is_finished ) {
-      gff_to_game_xml($vector_screened_seqs, $gff3_file, $game_xml_file,
-		      program_name   => 'tRNAscan-SE',
-		      program_date   => asctime(gmtime).' GMT',
-		      gff_version    => 3,
-		     );
-    }
-    else {
-      $submission->_write_unfinished_bac_xml_stub($game_xml_file);
-    }
-  } else {
-    warn "look at me, I'm not really running tRNAscan-SE\n";
-    `touch $game_xml_file $outfile $gff3_file`;
-  }
-
-  #return the result files
-  return ($game_xml_file, $gff3_file, $outfile);
-}
-
-#figure out where our geneseqer executable is
-sub find_trnascanse {
-  my $aux = shift;
-
-  return ($aux->{trnascanse_binary} && -x $aux->{trnascanse_binary})
-      || (executable_is_in_path 'tRNAscan-SE' && 'tRNAscan-SE')
-      || croak 'Cannot find tRNAscan-SE binary';
-}
-
-sub check_ok_to_run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  no warnings;
-
-  croak "Could not find tRNAscan-SE executable.  Do you need to set the 'trnascanse_binary' analysis option?"
-    unless find_trnascanse($aux_inputs);
-
-  return 1;
-}
-
-
-################ BLASTing ################
-
-package CXGN::BACSubmission::Analysis::BLAST;
-use base qw/CXGN::BACSubmission::Analysis/;
-use Carp;
-use English;
-use File::Spec;
-use File::Basename;
-use POSIX;
-
-use CXGN::BlastDB;
-
-use CXGN::Tools::File qw/ executable_is_in_path file_contents /;
-use CXGN::Annotation::GAMEXML::FromFile qw/gff_to_game_xml/;
-
-
-sub list_params {
-  return ( blastall_binary => 'optional full path to blastall executable',
-	 );
-}
-
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  #find all the various files we need
-  my $executable = find_blastall($aux_inputs);
-  my $vector_screened_seqs = $submission->vector_screened_sequences_file;
-  my @fileset = $self->_fileset($aux_inputs);
-  my ($bdb) = CXGN::BlastDB->search(file_base => $fileset[0])
-    or croak "Cannot find blastdb with file_base '$fileset[0]'.  Is it in the ".CXGN::BlastDB->table." table?";
-
-  my ($outfile,$errfile,$gff3_file,$game_xml_file) =
-    map { $self->analysis_generated_file($submission,$_) }
-      qw/out err gff3 game_xml/;
-  my $tempdir = $submission->_tempdir;
-  -w $tempdir or confess "Cannot write to temp dir '$tempdir'";
-
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-    my $run = CXGN::Tools::Run->run( $executable,
-				     -i => $vector_screened_seqs,
-				     -d => $bdb->full_file_basename,
-				     $self->_blastparams,
-				     -m => 8,
-				     { working_dir => $tempdir,
-				       out_file    => $outfile,
-				       err_file    => $errfile,
-				     }
-				   );
-    #convert the blast report to gff3
-    do { open my $out_fh,$outfile or die "Could not open blast output file $outfile: $!";
-	 my $fo = Bio::FeatureIO->new(-file => ">$gff3_file", -format => 'gff', -version => 3);
-	 while (my $line = <$out_fh> ) {
-	   next if $line =~ /^\#/ || $line =~ /^\s+$/;
-	   next unless $self->_use_line($line);
-	   my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-	       $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-	   my $feature = $self->new_feature( -start => $qstart,
-					     -end   => $qend,
-					     -score => $bits,
-					     -type  => 'match',
-					     -source => $self->analysis_name,
-					     -seq_id => $qname,
-					     -target => { -start => $hstart,
-							  -end   => $hend,
-							  -target_id => $hname,
-						       },
-					     -annots => { ID => $self->_unique_bio_annotation_id("${hname}_alignment"),
-							  blast_percent_identity => $percent_id,
-							  blast_mismatches => $mismatches,
-							  blast_gaps => $gapsm,
-							  blast_evalue => $evalue,
-							},
-					   );
-	   $fo->write_feature( $feature );
-	 }
-       };
-
-    #convert the GFF3 to GAME XML if this is a finished bac
-    if( $submission->is_finished ) {
-      gff_to_game_xml($vector_screened_seqs, $gff3_file, $game_xml_file,
-		      program_name   => $self->analysis_name,
-		      program_date   => asctime(gmtime).' GMT',
-		      database_name  => $bdb->title,
-		      database_date  => $bdb->format_time,
-		      gff_version    => 3,
-		     );
-    }
-    else {
-      $submission->_write_unfinished_bac_xml_stub($game_xml_file);
-    }
-  } else {
-    warn "look at me, I'm not really running BLAST\n";
-    `touch $game_xml_file $outfile $gff3_file`;
-  }
-
-  #return the result files
-  return ($game_xml_file, $gff3_file, $outfile);
-}
-
-#figure out where our blastall executable is
-sub find_blastall {
-  my $aux = shift;
-
-  return ($aux->{blastall_binary} && -x $aux->{blastall_binary})
-      || (executable_is_in_path 'blastall' && 'blastall')
-      || croak 'Cannot find blastall binary';
-}
-
-sub check_ok_to_run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  no warnings;
-
-  croak "Could not find blastall executable.  Do you need to set the 'blastall_binary' analysis option?"
-    unless find_blastall($aux_inputs);
-
-  my @fileset = $self->_fileset($aux_inputs,$submission);
-
-  my ($bdb) = CXGN::BlastDB->search(file_base => $fileset[0]);
-
-  croak "Specified blast database '$fileset[0]' could not be found or was not readable"
-    unless $bdb;
-
-  return 1;
-}
-
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  croak "not implemented";
-}
-sub _blastparams {
-  -e => '1e-10', -p => 'blastn'
-}
-sub _use_line {
-  1;
-}
-
-=head2 BLAST_tomato_bac_ends
-
-  BLAST versus BAC ends
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    bac_ends_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_tomato_bac_ends;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-__PACKAGE__->run_for_new_submission(1);
-sub list_params {
-  return ( blastall_binary => 'optional full path to blastall executable',
-	   bac_ends_blast_db => 'optional file_base of the CXGN::BlastDB bac ends blast database to use',
-	 );
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{bac_ends_blast_db} || 'bacs/tomato_bac_ends');
-}
-
-sub _use_line {
-  my ($self,$line) = @_;
-  my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-      $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-  return $percent_id > 98 && $mismatches < 10 && $gapsm <= 1;
-}
-sub _blastparams {
-  -e => '1e-60', -p => 'blastn'
-}
-
-=head2 BLAST_nr
-
-  BLAST versus Genbank NR
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    nr_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-  NOTE: This analysis takes 4 hours on a 3GHz opteron machine, so
-  it is not automatically run for new bacs
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_nr;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-#it takes 4 hours on a 3Ghz opteron to blastx a bac against
-#nr, so don't run this for every new submission
-__PACKAGE__->run_for_new_submission(0);
-sub list_params {
-  return ( blastall_binary => 'optional full path to blastall executable',
-	   nr_blast_db => 'optional file_base of the CXGN::BlastDB genbank nr blast database to use',
-	 );
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{nr_blast_db} || 'genbank/nr');
-}
-
-# sub _use_line {
-#   my ($self,$line) = @_;
-#   my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-#       $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-#   return $percent_id > 98 && $mismatches < 10 && $gapsm <= 1;
-# }
-sub _blastparams {
-  -e => '1e-20', -p => 'blastx'
-}
-
-=head2 BLAST_ath_pep
-
-  BLAST versus TAIR Arabidopsis peptides.
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    ath_pep_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_ath_pep;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-__PACKAGE__->run_for_new_submission(1);
-sub list_params {
-  return ( blastall_binary => 'optional full path to blastall executable',
-	   ath_pep_blast_db => 'optional file_base of the CXGN::BlastDB arabidopsis peptides blast database to use',
-	 );
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{ath_pep_blast_db} || 'ath1/ATH1_pep');
-}
-
-# sub _use_line {
-#   my ($self,$line) = @_;
-#   my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-#       $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-#   return $percent_id > 98 && $mismatches < 10 && $gapsm <= 1;
-# }
-sub _blastparams {
-  -e => '1e-20', -p => 'blastx'
-}
-
-=head2 BLAST_E_coli_K12
-
-  BLAST versus E. coli genome (genbank: NC_000913.2)
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    e_coli_genome_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_E_coli_K12;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-__PACKAGE__->run_for_new_submission(1);
-sub list_params {
-  return ( blastall_binary => 'optional full path to blastall executable',
-	   e_coli_genome_blast_db => 'optional file_base of the CXGN::BlastDB E. coli genome blast database to use',
-	 );
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{e_coli_genome_blast_db} || 'E.coli_K12/Ecoli_genome');
-}
-
-sub _blastparams {
-  -e => '1e-20', -p => 'blastn'
-}
-
-sub _use_line {
-  my ($self,$line) = @_;
-  my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-      $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-  return $percent_id > 90 && $hsp_len >= 300;
-}
-
-=head2 BLAST_tomato_chloroplast
-
-  BLAST versus tomato chloroplast genome (genbank: AM087200)
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    tomato_chloroplast_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_tomato_chloroplast;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-__PACKAGE__->run_for_new_submission(1);
-sub list_params {
-  blastall_binary => 'optional full path to blastall executable',
-  tomato_chloroplast_blast_db => 'optional file_base of the CXGN::BlastDB E. coli genome blast database to use'
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{tomato_chloroplast_blast_db} || 'screening/organelle/tomato_chloroplast');
-}
-
-sub _blastparams {
-  -e => '1e-4', -p => 'blastn'
-}
-
-sub _use_line {
-  my ($self,$line) = @_;
-  my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-      $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-  return $percent_id > 90 && $hsp_len >= 300;
-}
-
-=head2 BLAST_tomato_bacs
-
-  BLAST versus the other submitted tomato bacs
-
-  Secondary input parameters:
-    blastall_binary   - (optional) full path to blastall executable
-    tomato_bacs_blast_db - (optional) file_base of the L<CXGN::BlastDB> to use
-
-=cut
-
-package CXGN::BACSubmission::Analysis::BLAST_tomato_bacs;
-use base 'CXGN::BACSubmission::Analysis::BLAST';
-
-use Carp;
-use CXGN::Genomic::CloneIdentifiers qw/parse_clone_ident/;
-
-__PACKAGE__->run_for_new_submission(1);
-sub list_params {
-  blastall_binary => 'optional full path to blastall executable',
-  tomato_bacs_blast_db => 'optional file_base of the CXGN::BlastDB E. coli genome blast database to use'
-}
-sub _fileset {
-  my ($self,$aux_inputs) = @_;
-  return ($aux_inputs->{tomato_bacs_blast_db} || 'bacs/tomato_bacs');
-}
-
-sub _blastparams {
-  -e => '1e-3', -p => 'blastn'
-}
-
-sub _use_line {
-  my ($self,$line) = @_;
-  my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
-      $qstart,$qend,$hstart,$hend,$evalue,$bits) = split /\s+/,$line;
-
-  return unless $percent_id > 70 && $hsp_len >= 1000;
-
-  #FIXME: this munges BAC names without using CXGN::Genomic::CloneIdentifiers
-  my $hbac = parse_clone_ident($hname,'versioned_bac_seq')
-    or confess "can't parse bac sequence identifier '$hname'";
-  my $qbac = parse_clone_ident($qname,'versioned_bac_seq')
-    or confess "can't parse bac sequence identifier '$qname'";
-
-  foreach (qw/col row plate lib clonetype/) {
-    if($qbac->{$_} ne $hbac->{$_}) {
-#      warn "$qbac->{$_} eq $hbac->{$_}\n";
-      return 1;
-    }
-  }
-
-  return; #the bac idents must have been equal
-}
-
-=head2 Cross_match_vector
-
-  Parses the cross_match vector screen results into annotations.
-
-  Secondary input parameters:
-     none
-
-=cut
-
-package CXGN::BACSubmission::Analysis::Cross_match_vector;
-use English;
-use POSIX;
-use base 'CXGN::BACSubmission::Analysis';
-use CXGN::Annotation::GAMEXML::FromFile qw/gff_to_game_xml/;
-
-__PACKAGE__->run_for_new_submission(1);
-
-sub run {
-  my $self = shift;
-  my $submission = shift; #BACSubmission object
-  my $aux_inputs = shift; #hash ref of auxiliary inputs
-
-  my ($outfile,$errfile,$gff3_file,$game_xml_file) =
-    map { $self->analysis_generated_file($submission,$_) }
-      qw/out err gff3 game_xml/;
-
-  system "touch $outfile $errfile $gff3_file $game_xml_file";
-
-  open my $out_fh, ">$outfile"
-    or die "Could not open '$outfile' for writing: $!";
-  my $gff3_out = Bio::FeatureIO->new( -format => 'gff', -version => 3,
-				      -file => ">$gff3_file",
-				    );
-
-  #go through each sequence, find the X's, and make features where the
-  #vector probably is
-  my @vector_features;
-  foreach my $seq ($submission->vector_screened_sequences) {
-    my $seq_string = $seq->seq;
-    print $out_fh $seq->primary_id.": sequence length is ".length($seq_string)." bases\n";
-    while($seq_string =~ /X+/g) {
-      my $end = pos($seq_string);
-      my $start = $end - length($MATCH) + 1;
-      my $vector_name = $submission->clone_object->library_object->cloning_vector_object->name;
-      print $out_fh $seq->primary_id.": cross_match masked $vector_name vector sequence from base $start to base $end\n";
-      my $feat = $self->new_feature( -start  => $start,
-				     -end    => $end,
-				     -type   => 'match',
-				     -seq_id => $seq->primary_id,
-				     -source => $self->analysis_name,
-				     -annots => {ID => $self->_unique_bio_annotation_id("${vector_name}_vector_match")},
-				   );
-      $gff3_out->write_feature($feat);
-    }
-  }
-
-  $gff3_out = undef; #make sure to close the gff3 output
-
-  if ($submission->is_finished) {
-    gff_to_game_xml($submission->vector_screened_sequences_file,
-		    $gff3_file,
-		    $game_xml_file,
-		    program_name   => $self->analysis_name,
-		    program_date   => asctime(gmtime).' GMT',
-		    database_name  => 'vector sequence',
-		    gff_version    => 3,
-		   );
-  } else {
-    $submission->_write_unfinished_bac_xml_stub($game_xml_file);
-  }
-
-  #return the result files
-  return ($game_xml_file, $gff3_file, $outfile);
-}
-
-
-##### FGENESH BASE CLASS ####
-
-package CXGN::BACSubmission::Analysis::FGENESH;
-use English;
-use POSIX;
-use Carp;
-use base 'CXGN::BACSubmission::Analysis';
-
-use File::Temp qw/tempfile/;
-
-use Bio::FeatureIO;
-
-use CXGN::Annotation::GAMEXML::FromFile qw/gff_to_game_xml/;
-use CXGN::Tools::Run;
-use CXGN::Tools::File qw/ executable_is_in_path file_contents /;
-
-__PACKAGE__->run_for_new_submission(0);
-
-sub list_params {
-  return ( fgenesh_binary => 'optional full path to fgenesh executable',
-	 );
-}
-
-sub run {
-  my ($self,$submission,$aux_inputs) = @_;
-
-  my $fgenesh = find_executable($aux_inputs);
-  my ($outfile,$errfile,$gff3_file,$gff2_file,$game_xml_file) =
-    map { $self->analysis_generated_file($submission,$_) }
-      qw/out err gff3 gff2 game_xml/;
-
-  unless($ENV{CXGNBACSUBMISSIONFAKEANNOT}) {
-
-    #for each sequence in our sequence file, split it out into its own file, run fgenesh on it,
-    #and write its results to a gff3 file we're building
-
-    my $seqio = Bio::SeqIO->newFh(-file => $submission->repeat_masked_sequences_file,
-				  -format => 'Fasta'
-				 );
-    my $gff3_out = Bio::FeatureIO->new(-file => ">$gff3_file",
-				       -format => 'gff', -version => 3,
-				      );
-    my $gff2_out = Bio::FeatureIO->new(-file => ">$gff2_file",
-				       -format => 'gff', -version => 2,
-				      );
-    my $tempseq = $self->analysis_generated_file($submission,'tempseq');
-    while(my $seq = <$seqio>) {
-      #write the one seq to a temp file
-      Bio::SeqIO->new(-format => 'fasta', -file => ">$tempseq")->write_seq($seq);
-
-      my $run = CXGN::Tools::Run->run($fgenesh,
-				      $self->_params_name,
-				      '-scip_prom',
-				      $tempseq,
-				      { out_file => $outfile,
-				      }
-				     );
-      #convert output to gff3 and gff2
-      my $fgenesh_in = Bio::FeatureIO->new(-format => 'fgenesh',
-					   -file => $outfile);
-      while(my $f = $fgenesh_in->next_feature) {
-	$gff3_out->write_feature($f);
-	$gff2_out->write_feature($f);
-      }
-    }
-    $gff3_out = undef; #close the gff3 output
-
-    #convert the gff3 to gamexml
-    if ($submission->is_finished) {
-      gff_to_game_xml($submission->repeat_masked_sequences_file,
-		      $gff3_file,
-		      $game_xml_file,
-		      program_name   => $self->analysis_name,
-		      program_date   => asctime(gmtime).' GMT',
-		      database_name  => 'de novo',
-		      gff_version    => 3,
-		      render_as_annotation => 0,
-		     );
-    } else {
-      $submission->_write_unfinished_bac_xml_stub($game_xml_file);
-    }
-  } else {
-    warn "look at me, I'm not really running fgenesh\n";
-    `touch $game_xml_file $outfile $gff3_file`;
-  }
-  return ($game_xml_file,$gff3_file,$gff2_file,$outfile);
-}
-
-#figure out where our blastall executable is
-sub find_executable {
-  my $aux = shift;
-
-  return ($aux->{fgenesh_binary} && -x $aux->{fgenesh_binary})
-      || (executable_is_in_path 'fgenesh' && 'fgenesh')
-      || croak 'Cannot find fgenesh executable';
-}
-
-sub check_ok_to_run {
-  my ($self,$submission,$aux) = @_;
-  my $fgenesh = find_executable($aux);
-  my $tempfile = $self->analysis_generated_file($submission,'tempseq');
-  open my $tempfh,">$tempfile" or die "Could not open '$tempfile' for writing: $!";
-  print $tempfh <<EOS;
->testseq
-ACTAGCACGATCATGATAGTAGTACCATCATGCTAGCATATGATAGCTGCATCATATGATCGATCGATAGTAGCATCGATCATGCATCGAT
-EOS
-  close $tempfh;
-  my $testrun = CXGN::Tools::Run->run($fgenesh, 'Tomato', $tempfile);
-  unlink $tempfile;
-  return 1 if $testrun && $testrun->out =~ /Number of predicted genes/;
-}
-
-sub _params_name {
-  confess '_params_name not implemented in base FGENESH class';
-}
-
-
-=head2 FGENESH_Tomato
-
-  Run FGENESH with the Tomato parameters file.
-
-  Secondary input parameters:
-     fgenesh_binary - optional full path to fgenesh executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::FGENESH_Tomato;
-use base 'CXGN::BACSubmission::Analysis::FGENESH';
-
-__PACKAGE__->run_for_new_submission(1);
-
-sub list_params {
-  fgenesh_binary => 'optional full path to fgenesh executable'
-}
-sub _params_name { 'Tomato' }
-
-=head2 FGENESH_Nicotiana
-
-  Run FGENESH with the Nicotiana parameters file.
-
-  Secondary input parameters:
-     fgenesh_binary - optional full path to fgenesh executable
-
-=cut
-
-package CXGN::BACSubmission::Analysis::FGENESH_Nicotiana;
-use base 'CXGN::BACSubmission::Analysis::FGENESH';
-__PACKAGE__->run_for_new_submission(1);
-
-sub list_params {
-  fgenesh_binary => 'optional full path to fgenesh executable'
-}
-sub _params_name { 'Nicotiana' }
-
-
-###
-1;#do not remove
-###
