#!/usr/bin/perl
# Filename:	scurvy
# Author:	David Ljung Madison <DaveSource.com>
# From:		http://MarginalHacks.com/Hacks/scurvy/
# See License:	http://MarginalHacks.com/License/
  my $VERSION=  '1.01';
# Description:	Screenplay/screenwriting tool: txt->script formatter
# Also See:	http://screenplay.sourceforge.net/
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

my $TABSIZE = 5;

##################################################
# Usage
##################################################
sub fatal {
  foreach my $msg (@_) { print STDERR "[$PROGNAME] ERROR:  $msg\n"; }
  exit(-1);
}

sub usage {
  foreach my $msg (@_) { print STDERR "ERROR:  $msg\n"; }
  print STDERR <<USAGE;

Usage:\t$PROGNAME [-d] [options] <file> [-o <out>]
\tFormats a script
\t-d                 Set debug mode
\t-o <file>          Output file (default is STDOUT)
\t-c                 Count headings
\t-C                 Show \"Continued\" page breaks
\t-i                 Add initial indent
\t-indent <tabs>     Add initial indent [default 2 tabs]
\t-in_indent <tabs>  Input indent (for non-scurvy formats)
\t-n                 Show page/line numbers
\t-I <fmt>           Set input format [default scurvy]
\t-O <fmt>           Set output format [default script]

\tFormats:
\t  scurvy   Our simple input format (described in docs)
\t  text     The default text script output
\t  final    Final Draft format (text-with-layout)
\t  rtf      RTF (input only) format - using Final Draft RTF format

USAGE
  exit -1;
}

sub parse_args {
  my $opt = {};
  $opt->{infmt}='scurvy';
  $opt->{outfmt}='text';

  # Defaults
  $opt->{per_page} = 53;

  my @formats = qw(scurvy text final rtf);

  while (my $arg=shift(@ARGV)) {
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /^-o$/) { $opt->{out}=shift @ARGV; next; }
    if ($arg =~ /^-c$/) { $opt->{count_head}=1; next; }
    if ($arg =~ /^-C$/) { $opt->{page_breaks}=1; next; }
    if ($arg =~ /^-i$/) { $opt->{indent}=2; next; }
    if ($arg =~ /^-indent$/) { $opt->{indent}=shift @ARGV; next; }
    if ($arg =~ /^-in_indent$/) { $opt->{in_indent}=shift @ARGV; next; }
    if ($arg =~ /^-I$/) { $opt->{infmt}=shift @ARGV; next; }
    if ($arg =~ /^-O$/) { $opt->{outfmt}=shift @ARGV; next; }
    if ($arg =~ /^-n$/) { $opt->{num}=1; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    usage("Too many files specified [$arg and $opt->{in}]") if $opt->{in};
    $opt->{in}=$arg;
  }
  usage("No file defined") unless $opt->{in};

  usage("Unknown input format: $opt->{infmt}")
    unless grep($opt->{infmt} eq $_, @formats);
  usage("Unknown output format: $opt->{outfmt}")
    unless grep($opt->{outfmt} eq $_, @formats);
  usage("Can't output rtf format: $opt->{outfmt}") if $opt->{outfmt} eq 'rtf';

  $opt;
}

sub debug {
  return unless $MAIN::DEBUG;
  foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}

##################################################
# Main code
##################################################
my $HEADING	= 0;
my $ACTION	= 2;
my $DIALOGUE	= 3;	# array of [who,parenthetical,dialogue]
my $TRANSITION	= 4;
my $GENERAL	= 5;

sub add {
  my ($script,$type,@what) = @_;
  push(@$script, [$type, @what]);
}

sub read_scurvy {
  my ($opt) = @_;

  # Read in a scurvy format file
  my $script = [];
  my %alias;
  while(<IN>) {
    chomp;
    next unless /\S/;
    next if /^#/;	# "post-notes" for comments
    last if /^ZZSTOP$/;	# hook for debugging scripts

    # Handle {aliases}     # ignore:{
    s/{([^}\s]+)}/$alias{$1} || "{$1}"/eg;

    if (/^(\S+):=(\S[^\t]*)(\t.*)?$/) {
      $alias{$1}=$2;
    } elsif (/^(ext|int)/i) {
      add($script, $HEADING, uc($_));
    } elsif (/([^\t]+?)(\s*\(.+\))?:\t(?:\((.+)\)\s)?(\S.+)/) {
      my ($name, $vo, $paren, $txt) = ($1, $2, $3, $4);
      $name = uc($alias{$name} || $name);
      add($script, $DIALOGUE, "$name$vo", $paren, $txt);
    } elsif (/^\t(\S.*)/) {
      add($script, $ACTION, $1);
    } elsif (/^\t\t(\S.*)/) {
      add($script, $TRANSITION, uc($1));
    } else {
      add($script,$GENERAL, $_);
    }
  }
  $script;
}

my $saveline = undef;
sub getline {
  my ($opt) = @_;
  if (defined $saveline) {
    $_ = $saveline;
    undef $saveline;
    return $_;
  }
  $_ = scalar <IN>;
  return $_;
}
sub pushline {
  my ($opt,$line) = @_;
  die("INTERNAL ERROR: Can't pushline twice without calling getline!\n")
    if defined $saveline;
  $saveline = $line;
}

sub cutline {
  my ($opt) = @_;
  s/[\r\n]$//g;	# Chomp, but handles DOS format better
  s/^[ \d]\d /   /g;		# Remove line numbering
  next if /^PAGE \d+:$/;	# Remove page numbering
  next if /^\s+\(CONTINUED\)$/;	# Remove continuation lines
  next if /^CONTINUED( PAGE \d+:)?$/;

  # Figure out how many tabs of indentation we have
  /^(\s*)(.*)/;
  my ($indent,$txt) = (length($1),$2);
  $indent++ if $txt =~ /^\(/;	# Final Draft puts the '(' in the indent
  my $tabs = int($indent/$TABSIZE) - $opt->{in_indent};
  $tabs=0 if $tabs<0;
  ($tabs,$txt);
}

# Keep getting lines as long as they have the right number of tabs
sub continuetabs {
  my ($opt,$from,$to) = @_;
  $to = $to || $from;
  my @ret;
  while(getline($opt)) {
    my $line = $_;
    my ($tabs,$txt) = cutline($opt);
    if ($tabs<$from || $tabs>$to) {
      pushline($opt,$line);
      last;
    }
    $txt =~ s/\s$//;
    push(@ret,$txt);
  }
  join(' ',@ret);
}

sub read_text {
  my ($opt) = @_;
  my $script;

  # Read in a plaintext script
  my $script = [];
  while(getline($opt)) {
    next unless /\S/;
    my ($tabs,$txt) = cutline($opt);

    if ($tabs==0) {
      # Heading, Action or General
      if ($txt =~ /^(ext|int)/i) {
        add($script, $HEADING, $txt);
        next;
      }
      # Could either be action or general..
      # go with action since it has a shorter right margin
      # We have no way of telling if the next lines are a general
      # or a continuation of an action..
      add($script, $ACTION, $txt);
    } elsif ($tabs==4) {
      my ($name,$paren,$dialogue) = $txt;
      my $paren = continuetabs($opt,3);
      $paren =~ s/^\(//;  $paren =~ s/\)$//;
      my $dialogue = continuetabs($opt,2);
      add($script, $DIALOGUE, $name, $paren, $dialogue);
    } elsif ($tabs>=8) {
      my $and = continuetabs($opt,8,10);
      $txt .= " $and" if $and;
      $txt =~ s/:$//;
      add($script, $TRANSITION, $txt);
    } else {
      usage("Saw an unexpected spacing [$tabs tabs] in input file.\n\tConsider using -in_indent option");
    }

  }

  $script;
}

sub read_rtf {
  my ($opt) = @_;
  my $script;

  my @styles = qw(GENERAL SCENE_HEADING ACTION CHARACTER_NAME PARENTHETICAL DIALOG TRANSITION SHOT);

  # Read in a Final Draft RTF file
  my $script = [];
  my %style;
  my ($name,$paren);
  while (<IN>) {
    chomp;
    if (/^{\\s(\d+)[^}]+?\s+([^\\][^\}]+);\s*}/) {
      my ($type,$num) = (uc($2),$1);
      $type =~ s/\s+/_/g;
      $style{$num} = $type;
      print STDERR "Unknown style type: $type\n" unless grep($type eq $_, @styles);
    }
    if (/{\\pard.*\\s(\d+)[^}]+?\s+([^\\][^\}]+)\\par\s*}/) {
      my ($style,$stylenum,$txt) = ($style{$1},$1,$2);
      if (!$style) {
        print STDERR "Unknown style: \\s$stylenum\n";
        next;
      }
      # These are some RTF text codes I know.  I doubt script writers
      # even use all of these.  Does anyone know how you put { or } in text?
      $txt =~ s/\\tab/\t/g;
      $txt =~ s/\\emdash/--/g;
      $txt =~ s/\\endash/-/g;
      $txt =~ s/\\e(m|n)space/ /g;
      $txt =~ s/\\\~/ /g;
      $txt =~ s/\\_/-/g;
      $txt =~ s/\\'([0-9a-f]{2})/chr(hex("0x$1"))/eig;
      $txt =~ s/\\(l|r)quote/'/g;
      $txt =~ s/\\(l|r)dblquote/"/g;
      my @txt = split(/\\(?:par|sect|page)\s*/, $txt);
      my $simple;
      $simple = $HEADING if $style eq 'SCENE_HEADING';
      $simple = $ACTION if $style eq 'ACTION';
      $simple = $GENERAL if $style eq 'GENERAL';
      $simple = $TRANSITION if $style eq 'TRANSITION';
# Not sure what to do with this, we don't handle shots.
      $simple = $TRANSITION if $style eq 'SHOT';
      if (defined $simple) {
        map add($script, $simple, $_), @txt;
        next;
      }
      if ($style eq 'CHARACTER_NAME') {
        $name = $txt;
        next;
      }
      if ($style eq 'PARENTHETICAL') {
        $paren = $txt;
        $paren =~ s/^\(//;
        $paren =~ s/\)$//;
        next;
      }
      if ($style eq 'DIALOG') {
        foreach my $t ( @txt ) {
          add($script, $DIALOGUE, $name, $paren, $t);
          undef $paren;
        }
        undef $name;
        next;
      }
      print STDERR "Unused/unknown style? [$style,$stylenum]\n";
    }
  }

  $script;
}

sub read_input {
  my ($opt) = @_;

  open(IN,"<$opt->{in}") || usage("Couldn't read file: $opt->{in}");

  my $script;
  if ($opt->{infmt} eq 'scurvy') {
    $script = read_scurvy($opt);
  } elsif ($opt->{infmt} eq 'text') {
    $script = read_text($opt);
  } elsif ($opt->{infmt} eq 'final') {
    $opt->{in_indent} = 3 if !defined $opt->{in_indent};
    $script = read_text($opt);
  } elsif ($opt->{infmt} eq 'rtf') {
    $script = read_rtf($opt);
  }
  close(IN);
  $script;
}


##################################################
# Output
##################################################
sub fold {
  my ($cols, $txt, $pre, $pre2) = @_;
  $pre2 = $pre2 || $pre;

  my @fold;
  my $at = 0;
  my $line;
  while ($txt && $txt =~ s/^(\S*)(\s*)//) {
    my ($next,$space) = ($1,$2);
    my $l = length($next);
    my $ls = length($space);
    if ($at+$l+$ls < $cols) {
      $line .= $next.$space;
      $at+=$l+$ls;
    } elsif ($at+$l < $cols) {
      push(@fold, $line.$next);
      $line=""; $at=0;
    } elsif ($l > $cols) {
      push(@fold, $line.substr($next,0,$cols-$at));
      while (length($next) > $cols) {
        push(@fold, substr($next,0,$cols, ""));
      }
      $line = $next;
      $at = length($next);
      if ($at+$ls < $cols) {
        $line.=$space;
        $at+=$ls;
      } else {
        push(@fold, $line);
        $line=""; $at=0;
      }
    } elsif ($l+$ls < $cols) {
      push(@fold, $line);
      $line=$next.$space;
      $at=$l+$ls;
    } else {
      push(@fold, $line, $next);
      $line=""; $at=0;
    }
  }
  push(@fold, $line) if $line;
  my $ret = $pre.join("\n$pre2",@fold);
  split("\n", $ret);
}

sub write_scurvy {
  my ($opt,$script) = @_;

  my $t = "\t";

  foreach my $set ( @$script ) {
    my $what = shift @$set;
    if ($what == $DIALOGUE) {
      my ($name,$paren,$txt) = (@$set);
      $txt = "($paren) $txt" if $paren;
      print OUT "\n";
      print OUT "${name}:\t$txt\n";
      next;
    }
    my $tabs=0;
    $tabs=0 if $what == $HEADING;
    $tabs=1 if $what == $ACTION;
    $tabs=0 if $what == $GENERAL;
    $tabs=2 if $what == $TRANSITION;
    print OUT ${t}x$tabs.join('',@$set)."\n";
  }
  print OUT "\n";
}

sub write_text {
  my ($opt,$script) = @_;

  my $head = 1;
  my $tabsize = $TABSIZE;
  my $t = " "x$tabsize;
  # Indent is 2 tabs
  my $indent = $opt->{indent} ? $opt->{indent}*$tabsize : 0;
  $indent -= 3 if $indent && $opt->{num};
  $indent = " "x$indent;

  my $line = 1;
  my $page = 1;
  my @add;

  print OUT "PAGE $page:\n" if $opt->{num};

  foreach my $set ( @$script ) {
    my $what = shift @$set;
    if ($what == $HEADING) {
      my $txt = $set->[0];
      $txt = "$head $txt" if $opt->{count_head};
      $head++;
      @add = ("", fold(61,$txt));
    } elsif ($what == $ACTION) {
      @add = ("", fold(61,$set->[0]));
    } elsif ($what == $GENERAL) {
      @add = fold(78,$set->[0]);
    } elsif ($what == $TRANSITION) {
      @add = fold(16,$set->[0],"$t"x8);
    } elsif ($what == $DIALOGUE) {
      my ($name,$paren,$txt) = (@$set);
      @add = ("", fold(38,$name,"$t"x4));
      push(@add, fold(24,"$paren)","$t$t$t(","$t$t$t ")) if $paren;
      push(@add, fold(35,$txt,"$t$t")) if $txt;
    }

    if ($opt->{page_breaks} && $line + $#add+1 > $opt->{per_page}) {
      print OUT " "x50,"(CONTINUED)\n\nCONTINUED";
      print OUT " PAGE $page" if $opt->{num};
      print OUT ":\n";
      $line = 1;
      $page++;
    }

    foreach ( @add ) {
      printf OUT "%2d ",$line if $opt->{num} && /\S/;
      printf OUT "",$line if $opt->{num};
      print OUT "$indent$_\n";
      $line++;
    }
  }
}

sub write_output {
  my ($opt,$script) = @_;

  my $out = $opt->{out} || '&STDOUT';
  open(OUT, ">$out") || usage("Couldn't write output? [$out]");

  if ($opt->{outfmt} eq 'scurvy') {
    write_scurvy($opt,$script);
  } elsif ($opt->{outfmt} eq 'text') {
    write_text($opt,$script);
  } elsif ($opt->{outfmt} eq 'final') {
    $opt->{indent} = 3 if !defined $opt->{indent};
    write_text($opt,$script);
  }
  close OUT;
}

sub main {
  my $opt = parse_args();

  debug("Version: $VERSION\n");

  my $script = read_input($opt);
  write_output($opt,$script);
}
main();

##################################################
# POD/man
##################################################

__END__

=pod
=head1 NAME

scurvy - Format scripts / screenplays

=head1 SYNOPSIS

B<scurvy> [S<options>] E<lt>I<file>E<gt> [S<I<-o E<lt>I<file>E<gt>>>]

=head1 DESCRIPTION

scurvy converts text files in a simple format into proper screenplay
format.  It's something I wrote because I hate using snifty GUI editors
when I believe a text editor is all you need.

  "If you can't vi it, it sucks"

It takes a text file as input and outputs a screenplay.  More formats
may occur someday..

=head1 OPTIONS


=over 4


=item B<-out> I<file>

Set the output file (otherwise write to standard out)

=item B<-c>

Number the scene headings (INT/EXT)

=item B<-C>

Show the "CONTINUED" page breaks

=item B<-i>

Add the left margin indentation.  (Good for final print)

=item B<-indent> I<tabs>

Add the left margin indentation with a specific number of tabs.

=item B<-in_indent> I<tabs>

Specify the extra number of indent tabs in the input file.
(Defaults to three for 'final' input format)

=item B<-n>

Show page/line numbers

=item B<-I> I<fmt>

Specify the input format:

B<scurvy>   Our simple input format (described below)

B<text>     The default text script output (can be an input format also)

B<final>    Final Draft format (text-with-layout)

B<rtf>      RTF (input only) format
  Using the Final Draft style sheet with styles:
    GENERAL
    SCENE HEADING
    ACTION
    CHARACTER NAME
    PARENTHETICAL
    DIALOG
    TRANSITION
    SHOT

If you want to convert a Final Draft document so you can edit it in scurvy,
then first save it as text-with-layout - we'll call this file 'mymovie.fdr'

Then you can use scurvy to convert this to scurvy format:

% scurvy -I final mymovie.fdr -O scurvy -o mymovie.scr

It's possible you'll get an error if your margins are different - the
default margins should be read correctly, but if not try adjusting -in_indent.

You can also try saving as RTF and then reading in the RTF, though
this isn't guaranteed to work with all versions of Final Draft.
If you have a different script writer that does RTF output, I'd love
to see a sample copy.  To convert rtf we would save as rtf and:

% scurvy -I rtf mymovie.rtf -O scurvy -o mymovie.scr

Then you can edit mymovie.scr and get the final output with scurvy:

% scurvy mymovie.scr -o mymovie.txt

You will, of course, lose any aliases you may have had.  Try a simple
search and replace.  For example, to convert the name "Dave" to be
an alias "D" you can add the alias line to the top:

  Dv:=Dave Madison

And then do the search and replace (example using vi/vim):

  :%s/^Dave Madison/Dv:/
  :%s/^Dave Madison (/Dv (/

(The second line is for quotes with parenthesis)

=item B<-O> I<fmt>

Specify the output format.  (Same formats as input)

=back

=head1 SCURVY FORMAT

There are five types of line formats: heading, action, dialogue, transition, general.
Each type B<must> be on it's own line.  (Use I<:set wrap> in vi/vim to make it easier to edit)

=over 4

=item B<scene heading>

Scene headings are automatically recognized since they start with INT or EXT.

=item B<action>

Action lines start after one tab.

=item B<transition>

Transition lines start after two tabs

=item B<dialogue>

Dialogue follows the characters name, a colon and a tab.  Some examples:

  Dave:	I think we should go shopping!
  God (V.O.):	That's a bad idea, Dave
  Dave:	(pondering)	You're probably right.

Parentheticals go after the colon, but V.O., O.S. go before.

=item B<general>

Generals are just regular text not prefaced by tabs.

=item B<comments>

Any line that starts with a '#' character is ignored.

=back

=head1 ALIASES

Aliases for characters can be defined on any line:

  D:=Dave

And then they can be used as the character speaking dialogue:

  D:	I think we should go shopping!

Or in any line of text if inside {curly braces}

  God (V.O.):	That's a bad idea, {D}

=head1 EXAMPLE

Here's an example input file:

  D:=Dave (aliases for characters look like this)
  INT. SCENE HEADING - DAY
  	Actions have one tab
  		Transitions have two tabs
  General text is just plain text.
  Dave:	dialogue follows the ":<tab>"
  John (V.O.):	voice overs go before the :
  D:	(using an alias!)	And parentheticals go after!

=head1 BUGS

Garbage in, garbage out.

When reading 'final' or 'text' formats, it's impossible to
differentiate between a new 'GENERAL' line or a continuation
of an 'ACTION' line since they look the same.  Sad but true.
Hence I assume them to all be ACTIONs.

The different format parsing/output is somewhat beta - if you find
any bugs please send me an example script and the problem.

=head1 AUTHOR

David Ljung Madison <http://MarginalHacks.com/>

=cut

