#!/usr/bin/perl
# Filename:	html2jpg
# Author:	David Ljung Madison <DaveSource.com>
  my $VERSION=  1.00;
# See License:	http://MarginalHacks.com/License
# Description:	Takes a screenshot of an HTML page (uses opera)
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

my $GRAB = 'xwd -silent -nobdrs -id %id | convert -quality 85 - %out';
my $XINFO = 'xwininfo -tree -root';
my $BROWSER = 'opera';	# Required by find_window() - see usage()

# Default snapshot type (based off html2<type>)
my $TYPE = ($PROGNAME =~ /2(.+)$/) ? $1 : "jpg";

##################################################
# Usage
##################################################
sub usage {
  foreach my $msg (@_) { print STDERR "ERROR:  $msg\n"; }

print STDERR <<USAGE;

Usage:	$PROGNAME [-d] [-o out] <URL>
	Takes a screenshot of an HTML page and saves to an image

	-o <out>	postfix determines image type [$PROGNAME.$TYPE]
	-s <sleep>	Time to sleep before window dump
	-g <geom>	Browser geometry
	-d		Set debug mode

Kludges:
	- We don't know when the page is finished loading
	- Rule of thumb for finding subwindow is guesswork!
	  If you're getting the wrong window, comment out call to subwindow()
	- Browser geometry ignored if browser doesn't create a new window
	  (Such as opera in "mdi" mode)
	- Dependent on xwd command and output of xwininfo and ...
	- Browser can't be iconified or partially off-screen
	- Only gets portion of html displayed in browser
	- Requires "opera" browser - update find_window() for other browsers
	- Opens up a bunch o' windows in your browser and leaves them there

Examples:
% $PROGNAME MarginalHacks.com -o MH.gif		# Snapshot of MarginalHacks
% $PROGNAME GetDave.com -o gif:- | xv -		# Pipe output, type gif

Heck - I just needed something to automate screenshots of HTML output.
Ask <your browser> to add '-remote SaveAsImage(file)' if you don't like it.  :)

Author:      David Ljung Madison
License:     http://MarginalHacks.com/License
Please see!  http://MarginalHacks.com/Pay

USAGE

  exit -1;
}

sub version {
  print "\n";
  printf "This is $PROGNAME version %4.2f\n",$VERSION;
  print "\n";
  print "Copyright (c) 2002 David Ljung Madison <MarginalHacks.com>\n";
  print "\n";
  exit -1;
}

sub parse_args {
  my $url;
  my ($out,$sleep,$geom) = ("$PROGNAME.$TYPE",5,"800x600");
  while (my $arg=shift(@ARGV)) {
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-v$/) { version(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /^-o$/) { $out = shift(@ARGV); next; }
    if ($arg =~ /^-s$/) { $sleep = shift(@ARGV); next; }
    if ($arg =~ /^-g$/) { $geom = shift(@ARGV); next; }
    if ($arg =~ /^-.+/) { usage("Unknown option: $arg"); }
    usage("Too many URLs specified [$arg and $url]") if defined $url;
    $url=$arg;
  }
  usage("No URLs specified!") unless $url;

  ($out,$url,$sleep,$geom);
}

sub debug {
  return unless $MAIN::DEBUG;
  foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}

##################################################
# Main code
##################################################
sub load {
  my ($url,$geom) = @_;

  # -newwindow doesn't seem to work on my MDI opera setup
  system("$BROWSER -geometry $geom -remote \'openURL($url,new-window)\'");
}

# Geometry regexp for xwininfo (saves x and y and offset)
my $GEOM_RE = '\s(\d+)x(\d+)\+(\-?\d+)\+(\-?\d+)\s+\+\-?\d+\+\-?\d+$';

# Find smallest subwindow that is at least 80%
# (We're trying to get rid of the scrollbars, menubars, etc...)
sub subwindow {
  my ($spacing,$window,$x,$y) = @_;
  my $smallest = $x*$y;
  $x*=.8;
  $y*=.8;
  while (<XINFO>) {
    # We're done traversing this window's tree when we find a new window
    # with the same or less spacing (or if we run out of xinfo).
    return $window if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing));

    if (/^\s+(0x[0-9a-f]+).*$GEOM_RE/ &&
        # $4>=0 && $5>=0 &&	# Only look for positive offset windows?
        $2>=$x && $3>=$y && $2*$3<$smallest) {
      $window = $1;
      $smallest = $2*$3;
      debug("Smaller subwindow: $window ${2}x$3",$_);
    }
  }
  return $window;
}

sub find_window {
  open(XINFO,"$XINFO|") || die("Couldn't run: [$XINFO]\n");

  # Find the opera window (and the current title of the top window)
  my ($spacing,$title,$x,$y);
  while(<XINFO>) {
    # This could easily break and is very opera specific (works on 6.03)
    last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "Opera .*\[(.+)\]": \("opera" "opera"\)\s*$GEOM_RE$/));
  }
  die("Couldn't find window [Opera] in [$XINFO]\n") unless $title && $x && $y;

  # Now find the subwindow with the same title
  my $window;
  while (<XINFO>) {
    die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n")
      if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing));
    if (/^(\s+)(0x[0-9a-f]+)\s+"$title".*$GEOM_RE/) {
      ($spacing,$window,$x,$y) = ($1,$2,$3,$4);
      last;
    }
  }
  die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n")
    unless $window;
  debug("Found: $window [$title] ${x}x$y");

  $window = subwindow($spacing,$window,$x,$y);
  debug("Final window: $window");

  close XINFO;
  $window;
}

sub grab {
  my ($id,$out) = @_;
  my $grab = $GRAB;
  $grab =~ s/%id/$id/g;
  $grab =~ s/%out/$out/g;
  system("$grab");
  die("Trouble with grab [$?]:\n  $grab\n") if $?;
}

sub main {
  my ($out,$url,$sleep,$geom) = parse_args();
  load($url,$geom);
  sleep($sleep);
  my $window = find_window();
  grab($window,$out);
}
main();
