#!/usr/bin/perl
# @(#) $Id$
# vim: set tabstop=4 shiftwidth=4 expandtab scrolloff=10:

# ----------------------------------------------------------------------------
# environment
# ----------------------------------------------------------------------------

use strict;

use Getopt::Long;

# ----------------------------------------------------------------------------
# setup
# ----------------------------------------------------------------------------

our $PROGID     = 'tag2tsv';
our $VERSION    = '1.2';
our $CVSID      = '$Id$';

our $file_version = 0.0;
our $physical_line_count = 0;

our @valid_fields = ();
our $num_fields = 0;
our %serial_fields = ();
our %skip_fields = ();
our $start_field = undef;

our %current_values = ();
our %prev_values = ();

# ----------------------------------------------------------------------------
# process command line
# ----------------------------------------------------------------------------

our $optsts         = undef;    # returned status of GetOptions()
our @optkwds        = ();       # used by debug and help routines
# our @cmd_args       = ();       # remaining command line arguments

our $opt_help       = undef;    push @optkwds, 'help';
our $opt_version    = undef;    push @optkwds, 'version';
our $opt_tags       = undef;    push @optkwds, 'tags';
our $opt_type       = undef;    push @optkwds, 'type';

$optsts = GetOptions(

    "help!"         => \$opt_help,
    "version!"      => \$opt_version,

    "tags=s"        => \$opt_tags,
    "type=s"        => \$opt_type,

) or die "?Please check your command-line options.\n";

# ----- handle simple options -----

if ($opt_version)   { do_version(); exit; }
if ($opt_help)      { do_help();    exit; }

# ----- defaults -----

$opt_tags ||= "";
$opt_type ||= "tsv";    # "tsv", "csv", "block"

# ----- preliminary argument processing -----

# @cmd_args = ( @ARGV );
# if (0 == @cmd_args ) {
#     unshift @cmd_args, "help";
# }

# ----------------------------------------------------------------------------
# main
# ----------------------------------------------------------------------------

our $first = 1;
while (my $line = get_logical_line()) {

    if ($line =~ m/^%/) {

        my ($directive, @dirargs) = split(/\s+/, $line);

        if ($directive eq '%FORMAT')    { dx_format(@dirargs); next; }
        if ($directive eq '%FIELDS')    { dx_fields(@dirargs); next; }
        if ($directive eq '%SERIAL')    { dx_serial(@dirargs); next; }
        if ($directive eq '%SKIP')      { dx_skip(@dirargs);   next; }

        dx_unknown($directive);
        next;
    }

    if ($line =~ m/^(\w+)\s*:\s*(.*)/) {

        my $tag = $1;
        my $val = $2;

        if (! grep(m/$tag/, @valid_fields) ) {
            print STDERR "[e] $physical_line_count : Unknown tag: '$tag'\n";
            exit 1;
        }

        if ($tag eq $start_field) {
            print_current_values($opt_type) unless $first;
            $first = 0;
            %prev_values = %current_values;
            %current_values = ();
        }

        $current_values{$tag} = $val;
        if ($val eq "*") {
            if (defined $serial_fields{$tag}) {
                $current_values{$tag} = $prev_values{$tag} + 1;
            } else {
                $current_values{$tag} = $prev_values{$tag};
            }
        }

        next;
    }

    print STDERR "[e] $physical_line_count : Unknown line format: '$line'\n";
    exit 1;
}

print_current_values($opt_type);

exit;

# ----------------------------------------------------------------------------
# subroutines
# ----------------------------------------------------------------------------

sub do_version {

    print STDERR "[i] Program $PROGID, version $VERSION\n";
    print STDERR "[i] $CVSID\n";

    return;
}

# ----------------------------------------------------------------------------

sub do_help {

    my @foo = ( @optkwds );
    map { s/_/-/ } @foo;

    print "Options:\n";

    for my $k (sort @foo) {
        printf "    %s\n", $k;
    }

    return;
}

# ----------------------------------------------------------------------------

sub dx_unknown {
  my $unknown = shift;

    print STDERR "[e] $physical_line_count : unknown directive '$unknown'\n";
    exit 1;

    return;
}

# ----------------------------------------------------------------------------

sub dx_format {

    $file_version = shift;

    # print STDERR "[i] $physical_line_count : File format version = $file_version\n";

    return;
}

# ----------------------------------------------------------------------------

sub dx_serial {

    foreach my $field (@_) {
        $serial_fields{$field} = 1;
    }

    # print STDERR "[i] $physical_line_count : Serial fields '", join(' ', keys %serial_fields), "'\n";

    return;
}

# ----------------------------------------------------------------------------

sub dx_skip {

    foreach my $field (@_) {
        $skip_fields{$field} = 1;
    }


    # print STDERR "[i] $physical_line_count : Skip fields '", join(' ', keys %skip_fields), "'\n";

    return;
}

# ----------------------------------------------------------------------------

sub dx_fields {

    foreach my $field (@_) {
        push @valid_fields, $field;
    }

    $num_fields  = scalar(@valid_fields);
    $start_field = $valid_fields[0];

    # print STDERR "[i] $physical_line_count : Start field '$start_field'\n";
    # print STDERR "[i] $physical_line_count : Fields '", join(' ',@valid_fields), "'\n";

    return;
}

# ----------------------------------------------------------------------------

sub print_current_values {
  my $type = shift;

    my $fmt = "%-15s : %s\n";

    if ( $num_fields != (keys %current_values) ) {
        my @missing = ();
        foreach my $f (@valid_fields) {
            push @missing, $f unless (grep m/$f/, keys %current_values);
        }
        print STDERR "[e] $physical_line_count : Missing field(s) '",
            join(' ',@missing), "' in record preceeding line $physical_line_count\n";
        exit 1;
    }

    foreach my $k (@valid_fields) {
        if ( defined($skip_fields{$k}) ) {
            next;
        }
        if ($type eq "block") {
            printf $fmt, $k, $current_values{$k};
        } elsif ($type eq "tsv") {
            print "$current_values{$k}\t";
        } elsif ($type eq "csv") {
            print "$current_values{$k},";
        } else {
            print STDERR "[e] Unknown output type '$type' - aborting\n";
            exit 1;
        }
    }

    if ($type eq "block") {
        printf $fmt, 'Tags', $opt_tags;
        print "\n"; # blank line afterward
    } else {
        print "$opt_tags\n";
    }
}

# ----------------------------------------------------------------------------

sub clean_text {
  my $text = shift;

    chomp $text;
    $text =~ s/^\s+//;
    $text =~ s/\s+$//;

    return $text;
}

# ----------------------------------------------------------------------------

sub remove_comment {
  my $text = shift;

    $text =~ s/\s*#.*$//;

    return $text;
}

# ----------------------------------------------------------------------------

sub get_logical_line {

    my $part;
    my $line = undef;

    while ($part = <>) {

        $physical_line_count++;

        $part = clean_text($part);
        $part = remove_comment($part);

        next if ($part =~ m/^$/);
        $line .= $part;

        # continued?
        if ($line =~ s/\\$//) {
            $line =~ s/\s+$/ /;
            next unless eof();
        }

        last;
    }

    return $line;
}

# ----------------------------------------------------------------------------
# end
# ----------------------------------------------------------------------------
__END__
