#!/usr/local/bin/perl -w
#
# $Header: /tmp_mnt/local.fp/haskell/report/report-1.0/RCS/splitAndIndexPgm,v 1.0 90/03/28 15:00:06 partain Exp $
#
# This script reads a Haskell program and inserts LaTeX/verbatim
#   magic to indicate where a page break may occur.
#   It must be after a blank line that is followed by code or a
#   comment that begins in column 1.
#   Also, if the number of lines spit out between allowed breaks
#   exceeds $maxLines, then the LaTeX magic is inserted anyway.
#
#   Makes up \index (or \indextt) entries from type signatures.
#
#   Will Partain, partain@cs.glasgow.ac.uk
#
$pgm = $0;
$prevState = 'toplevCode';
$currState = '';
$savedBlanks = 0;
$maxLines = 42; # before there must be a break
$lineCnt = 0;
@indexentries = ();

while ($ARGV[0] =~ /^-/) {
    $_ = $ARGV[0]; shift;
    if (/^-m(\d+)/) {
    	$maxLines = $1;
    }
}

print "\\noindent\\bprogB\n\@\n";
while (<>) {
    $currState = &newState();
    s/@/@@/g; # for verbatim...

    if ($prevState eq 'blankLine' &&
    	($currState =~ /^toplev/)) { # a break OK here
    	print "\@\n";
    	&printIndexEntries();
	&grokForIndexEntries();
	print "\\eprogB\\noindent\\bprogB\n\@\n$_";
	$lineCnt = 0;
    	$savedBlanks = 0;

    } elsif ($currState eq 'blankLine') { # save these up
    	$savedBlanks++;

    } else {
    	if ($lineCnt > $maxLines) {
    	    print stderr "$pgm: Forced split after $maxLines lines\n";
	    print "\@\n";
	    &printIndexEntries();
	    &grokForIndexEntries();
	    print "\\eprogB\\noindent\\bprogB\n\@\n";
	    $lineCnt = 0;
	} else {
    	    &grokForIndexEntries();
	}
    	print "\n" if ($savedBlanks > 0);
    	$lineCnt++ if ($savedBlanks > 0);
    	print $_;
	$lineCnt++;
    	$savedBlanks = 0;
    }
    $prevState = $currState;
}
print "\@\n";
&printIndexEntries();
print "\\eprogB\n";

sub newState {
    if (/^--/) { # comment in column 1
	'toplevComment';
    } elsif (/^\s*$/ && $prevState eq 'toplevComment') { # the gimmick
	'toplevComment';
    } elsif (/^\s*$/) {
	'blankLine';
    } elsif (/^[^ \t]/) { # something in column 1
	'toplevCode';
    } else {
    	'localCode';
    }
}

sub grokForIndexEntries { # in $_
    local($goodies,@goodie,$g);

    # next version: allow for leading whitespace?  I think so...

    if (/^([a-z\(].*)::/) {
    	$goodies = $1;
	$goodies =~ s/[ \t]//g;
	@goodie = split(/,/,$goodies);
	foreach $g (@goodie) {
    	    push(@indexentries, $g);
	}
    }
}

sub printIndexEntries { # also re-sets $indexentries

    local($i,$raw,$processed);
    
    foreach $i (@indexentries) {
    	if ($i =~ /^[a-z][A-Za-z0-9]*$/) {
    	    print "\\indextt\{$i\}%\n" if ($i !~ /^prim[A-Z]/); # no primitives, we're British
	} elsif ($i =~ /^\((.*)\)$/) {
    	    $raw = $1;
	    $processed = $raw;
	    # this is really really ugly...
    	    $processed =~ s/^\|\|$/\{\\tt \{\\char'174\}\{\\char'174\}\}/;
    	    $processed =~ s/^\^\^$/\{\\tt \{\\char'136\}\{\\char'136\}\}/;
    	    $processed =~ s/^\_$/\{\\tt \{\\char'137\}\}/;
    	    $processed =~ s/^\^$/\{\\tt \{\\char'136\}\}/;
    	    $processed =~ s/^\\\\$/\{\\tt \{\\char'134\}\{\\char'134\}\}/;
    	    $processed =~ s/^\\=$/\{\\tt \{\\char'134\}=\}/;
    	    $processed =~ s/^\%$/\{\\tt \{\\char'045\}\}/;
    	    $processed =~ s/^\!\!$/\{\\tt \{\\char'041\}\{\\char'041\}\}/;
    	    $processed =~ s/^\!$/\{\\tt \{\\char'041\}\}/;
	    $processed =~ s/^\&\&$/\{\\tt \\&\\&\}/;
	    $processed =~ s/^\+\+$/\{\\tt ++\}/;
	    $processed =~ s/^\/\/$/\{\\ptt \/\/\}/;
	    $processed =~ s/^\.$/\{\\ptt \.\}/;
	    #
    	    print "\\index\{$raw\@\@$processed\}%\n";
	} else {
    	    print stderr "index proto-entry in unexpected form: $i\n";
    	}
    }
    @indexentries = (); # re-set
}
