#!/usr/bin/perl -w

#******************************************************************#
#* Required modules                                               *#
#******************************************************************#
use 5.006_001;
use strict;
use English qw( -no_match_vars );

use Getopt::Long;
use IO::File;

my $fh = new IO::File;

my $help = undef;
my $helpstring = <<END;
Usage: ./ndv-converter.pl --file example.ndv --output example.csv

Converts ndv files into csv format for use in spectral analysis and plotting software
Options:
--help    Displays usage information
--file    The ndv file to convert, which may include wildcards. If unspecified converts all .ndv files in the current directory
--output  The name of the csv file to write to. If unspecifed uses the same base name as the ndv file

END

my $filename = undef;
my $outfile = undef;

# Read options from stdin
my $result = GetOptions ("help" => \$help,
                         "file=s"   => \$filename,
                         "output=s" => \$outfile);

if (defined($help))
{
	# Display help message
	print $helpstring;
}
else
{
	if (!defined($filename))
	{
		# If left blank, default input file to all working files in cwd
		$filename = "*.ndv";
	}
	
	#Glob files
	my @files = glob($filename);
	
	if (scalar(@files) == 0)
	{
		# If there are no input files, give up
		print "No valid .ndv input file found, or file specified does ".
		                                                    "not exist";
	}
	else
	{
		my @wavelengths;
		
		# Process each ndv file
		foreach my $file (@files) 
		{
			open my $fh, '<', $file or die "Could not open $file $!.\n";
			
			#Look through each row
			while (my $row = <$fh>) 
			{
				#Split on tabs
				chomp($row);
				my @rowvals = split(/\t+/, $row);
				
				if ($rowvals[0] eq "Sample ID")
				{
					for (my $i = 14; $i < scalar(@rowvals); $i++)
					{
						# Remove carriage return (Windows) character 
					    # from end
						$rowvals[$i] =~ s/\r//g;
						push(@wavelengths, $rowvals[$i]);
					}
					
					last;
				}
			}
			
			my %samples; # Hash of arrays
			my @names; # Sample IDs
			
			# Get the data from the remaining rows
			while (my $datarow = <$fh>)
			{
				# Split on tabs
				chomp($datarow);
				my @rowvals = split(/\t+/, $datarow);
				
				# More than one spectrum may have been taken for a given
				# sample ID, in which case give them an index 
				my $name = $rowvals[0];
				push (@names, $name);
				
				# Check through array of names for duplicates which need
				# an index
				my $j = 0;
				for (my $i = 0; $i<scalar(@names); $i++)
				{
					if ($names[$i] eq $name)
					{
						$j++;
					}
				}
				
				# Append index to filename in hash
				if ($j>1)
				{
					$name .= "_$j";
				}
				
				# Reference hash by sample ID, and contains an array of all the values at each wavelength
				for (my $i = 14; $i < scalar(@rowvals); $i++)
				{				
					push (@{$samples{$name}}, $rowvals[$i]);
				}
			}
			close $fh;
				
			# Output data in csv format, one for each file
			
			my $outfile_pre;
			
			# Set each output file's name
			if (!defined($outfile) && $file =~ m/(.*)\.ndv$/)
			{
				$outfile_pre = "$1";
			}
			else
			{
				$outfile_pre = $outfile;
			}
			
			# Print out a file for each sample in csv format
			foreach my $sample (keys(%samples))
			{
				my $outname = $outfile_pre . "_" . $sample . ".csv";
				open (OUTFILE, ">$outname") 
					  or die "$outname could not be opened for writing";
					  
				for (my $i = 0; $i < scalar(@{$samples{$sample}}); $i++)
				{
					print OUTFILE $wavelengths[$i] . "," . 
					                       $samples{$sample}[$i] . "\n";
				}
				close OUTFILE;
				
			}
		}
	}
	
}

exit(0);
