#!/usr/bin/perl -w
use strict;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use File::Spec;
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use LaTeXML;
use LaTeXML::Post;
use LaTeXML::Util::Pathname;

#======================================================================
# Parse command line.
#======================================================================
my $identity = "latexmlpost (LaTeXML version $LaTeXML::VERSION)";

my($help,$showversion,$verbosity,$validate)=(0,0,0,1);

# initialize these undefined!!!
# undef => unspecified; 0 = NO, 1 = YES
my($sourcedir,$destination);
my($format,$parallelmath,$mathimages,$svg,$keepXMath,$stylesheet,$css);
my($dographics,$destdir);

my @math_formats =();
my %removed_math_formats=();

# Get the command line arguments.
GetOptions("destination=s"=>sub { $destination = $_[1];
				  if(!defined $format){
				    $format='xhtml' if $destination=~/\.xhtml$/;
				    $format='html'  if $destination=~/\.html$/; }},
	   "sourcedirectory=s"     =>\$sourcedir,
	   "verbose+"              =>\$verbosity,
	   "stylesheet=s"          =>\$stylesheet,
	   "css=s"                 =>\$css,
	   "format=s"              =>\$format,
	   "validate!"             =>\$validate,

	   # Various choices for math processing.
	   # Note: Could want OM embedded in mml annotation, too.
	   # In general, could(?) want multiple math reps within <Math>
	   # OR, multiple math reps combined with <mml:sematics>
	   #   or, in fact, _other_ parallel means? (om?, omdoc? ...)
	   # So, need to separate multiple transformations from the combination.
	   # However, IF combining, then will need to support a id/ref mechanism.
#	   "mathml|mml!"  =>\$mathml,
	   "parallelmath!"               => \$parallelmath,
	   "mathimages!"                 => \$mathimages,
	   "presentationmathml|pmml"     => sub { addMathFormat('pmml'); },
	   "contentmathml|cmml"          => sub { addMathFormat('cmml'); },
	   "openmath|om"                 => sub { addMathFormat('om'); },
	   "nopresentationmathml|nopmml" => sub { removeMathFormat('pmml'); },
	   "nocontentmathml|nocmml"      => sub { removeMathFormat('cmml'); },
	   "noopenmath|noom"             => sub { removeMathFormat('om'); },
	   "keepXMath!"                  =>\$keepXMath,

	   # For graphics: vaguely similar issues, but more limited.
	   # includegraphics images (eg. ps) can be converted to webimages (eg.png)
	   # picture/pstricks images can be converted to png or possibly svg.
	   "graphicimages!"=>\$dographics,
	   "svg!"          =>\$svg,

	   "VERSION"      =>\$showversion,
	   "help|?"       =>\$help,
	  ) or pod2usage(-message => $identity, -exitval=>1, -verbose=>0, -output=>\*STDERR);
pod2usage(-message=>$identity, -exitval=>1, -verbose=>2, -output=>\*STDOUT) if $help;
if($showversion){ print STDERR "$identity\n"; exit(1); }

# Get the requested XML file
Error("Missing input xmlfile") unless @ARGV;
my $xmlfile = shift(@ARGV);
if($xmlfile ne '-'){
    $xmlfile .= '.xml' unless -f $xmlfile;
    Error("No input file \"$xmlfile\" found") unless -f $xmlfile; }

#======================================================================
# Sanity check and Completion of options.
#======================================================================
if(!defined $sourcedir){
    if($xmlfile eq '-'){
	$sourcedir = '.'; }
    else {
	my($vol,$dir,$name) = File::Spec->splitpath($xmlfile);
	$sourcedir = $dir || '.'; }}
if((defined $destination) && (!defined $destdir)){
  my($vol,$dir,$name)=File::Spec->splitpath($destination);
  $destdir = $dir; }
$sourcedir = '.' unless $sourcedir;
$destdir   = '.' unless $destdir;

# Sanity check & option completion for known output formats.
if(!defined $format){}
elsif($format eq 'html'){
  Error("Default html stylesheet only supports math images")
    if (!defined $stylesheet) && ($parallelmath || scalar(@math_formats));
  Error("Default html stylesheet does not support SVG") if $svg;

  $mathimages = 1 unless defined $mathimages;
  $dographics = 1 unless defined $dographics;
  if(!defined $stylesheet){
    $stylesheet = "LaTeXML-html.xsl";
    $css = "LaTeXML.css" unless defined $css; }}
elsif($format eq 'xhtml'){
  Error("Default xhtml stylesheet does not support math images")
    if (!defined $stylesheet) && $mathimages;
  $parallelmath = 1 if (!defined $parallelmath) && (scalar(@math_formats)>1);
  Error("Default xhtml stylesheet only supports OpenMath in parallel with MathMl")
    if (!defined $stylesheet) && !$parallelmath && grep($_ eq 'om',@math_formats);
  if(!@math_formats){		# No math format specified?
    addMathFormat('pmml');
    addMathFormat('cmml') if $parallelmath || !@math_formats; }
  $svg        = 1 unless defined $svg;
  $dographics = 1 unless defined $dographics;
  if(!defined $stylesheet){
    $stylesheet = "LaTeXML-xhtml.xsl";
    $css = "LaTeXML.css" unless defined $css; }}

Error("Parallel math markup needs at least two formats")
  if $parallelmath && (scalar(@math_formats) < 2);
Error("Parallel math markup can only be based on MathML") 
  if $parallelmath && ($math_formats[0] !~ /^(cmml|pmml)$/);

#======================================================================
# Do the processing.
#======================================================================

binmode(STDERR,":utf8");
binmode(STDOUT,":utf8");
print STDERR "$identity\n" if $verbosity > 0;

# Create destination directory if needed,
if($destdir && !-d $destdir){
    (mkdir $destdir
     or die "Couldn't create destination dir \"$destdir\": $!"); }

# Create the processors:
my @procs = ();
if($mathimages){
  require 'LaTeXML/Post/MathImages.pm';
  push(@procs,LaTeXML::Post::MathImages->new()); }
if($dographics){
  require 'LaTeXML/Post/Graphics.pm';
  push(@procs,LaTeXML::Post::Graphics->new()); }
if($svg){
  require 'LaTeXML/Post/SVG.pm';
  push(@procs,LaTeXML::Post::SVG->new()); }
if(@math_formats){
  $keepXMath  = 0 unless defined $keepXMath;
  my @mprocs=();
  foreach my $fmt (@math_formats){
    if($fmt eq 'pmml'){
      require 'LaTeXML/Post/MathML.pm';
      push(@mprocs,LaTeXML::Post::MathML::Presentation->new()); }
    elsif($fmt eq 'cmml'){
      require 'LaTeXML/Post/MathML.pm';
      push(@mprocs,LaTeXML::Post::MathML::Content->new()); }
    elsif($fmt eq 'om'){
      require 'LaTeXML/Post/OpenMath.pm';
      push(@mprocs,LaTeXML::Post::OpenMath->new()); }}
  if($parallelmath){
    require 'LaTeXML/Post/MathML.pm';
    push(@procs,LaTeXML::Post::MathML::Parallel->new(math_processors=>[@mprocs])); }
  else {
    push(@procs,@mprocs); }}
else {
    $keepXMath  = 1 unless defined $keepXMath;
  }
if(!$keepXMath){
  require 'LaTeXML/Post/PurgeXMath.pm';
  push(@procs,LaTeXML::Post::PurgeXMath->new()); }
if($stylesheet){
  require 'LaTeXML/Post/XSLT.pm'; 
  push(@procs,LaTeXML::Post::XSLT->new(stylesheet=>$stylesheet)); }

my $POST = LaTeXML::Post->new();
my $indoc;
if($xmlfile eq '-'){
    my $content;
    { local $/ = undef; $content = <>; }
    $indoc = $POST->readDocumentFromString($content,validate=>$validate); }
else {
    $indoc = $POST->readDocument($xmlfile,validate=>$validate); }

my $doc = $POST->process($indoc, 
			 processors     => [@procs],
			 sourceDirectory=> $sourcedir,
			 destinationDirectory=>$destdir,
			 format         => $format,
			 stylesheet     => $stylesheet,
			 CSS            => $css,
			 verbosity      => $verbosity);
if(!$stylesheet){      # If no XSL, we'll munge our own DTD for now...
  my @addns=( (grep(/^(pmml|cmml)$/,@math_formats) ? ('MathML'):()),
	      (grep(/^(om)$/,       @math_formats) ? ('OpenMath'):()),
	      ($svg ? ('SVG'):()) );
  $POST->adjust_latexml_doctype($doc,@addns) if @addns; }

if($destination){ 
  $POST->writeDocument($doc,$destination,$format); 
  if(($css eq 'LaTeXML.css') && !-f "$destdir/LaTeXML.css"){
    foreach my $dir (@INC){	# Find installed stylesheet.
      next unless -f "$dir/LaTeXML/dtd/LaTeXML.css";
      pathname_copy("$dir/LaTeXML/dtd/LaTeXML.css", "$destdir/LaTeXML.css"); 
      last; }}
}
else {
  print $POST->toString($doc,$format); }

#======================================================================
# helpers
#======================================================================
sub Error {
  my($message)=@_;
  pod2usage(-message=>"$identity\n$message",-exitval=>1, -verbose=>0,-output=>\*STDERR); }

sub addMathFormat {
  my($fmt)=@_;
  push(@math_formats,$fmt) 
    unless grep($_ eq $fmt,@math_formats) || $removed_math_formats{$fmt}; }
sub removeMathFormat {
  my($fmt)=@_;
  @math_formats = grep($_ ne $fmt, @math_formats);
  $removed_math_formats{$fmt}=1; }

#**********************************************************************
__END__

=head1 NAME

C<latexmlpost> postprocesses an xml file generated by latexml for common tasks,
such ad convert math to images and processing graphics inclusions for the web.

=head1 SYNOPSIS

latexmlpost [options] xmlfile

  Options:
   --format=html|xhtml|xml    requests the output format.
   --stylesheet=xslfile       requests the XSL transform of the document using the given
                              xslfile as stylesheet.
   --css=cssfile              use the cssfile in html/xhtml (when using default stylesheet)
   --mathml                   requests conversion of the math to Presentation MathML
   --presentationmathml       requests conversion of the math to Presentation MathML
   --nopresentationmathml     requests no conversion of the math to Presentation MathML
   --contentmathml            requests conversion of the math to Content MathML
   --nocontentmathml          requests no conversion of the math to Content MathML
   --openmath                 requests conversion of the math to OpenMath
   --parallelmath             requests parallel math markup for MathML
   --noparallelmath           requests no parallel math markup for MathML
   --svg                      requests conversion of the pictures to SVG
   --keepXMath                preserves the intermediate XMath representation
   --verbose                  prints progress messages during processing.
   --VERSION                  show version number.
   --source=sourcedir         specifies directory of source TeX file.
   --destination=outputfile   specifies output file (and directory).
   --help                     shows help message.

If texfile is '-', latexmlpost reads the XML from standard input.

=head1 OPTIONS AND ARGUMENTS

latexmlpost postprocesses an xml file, C<xmlfile>, generated by latexml 
to perform common tasks, such as convert math to images and processing
graphics inclusions for the web.  If C<-> is given for C<xmlfile>, the
xml data is read from standard input.

=over 4

=item B<--format>=I<html|xhtml|xml>

Specifies the output format for post processing. 
html format converts the material to html and the mathematics to png images.
xhtml format converts to xhtml and uses presentation MathML (after attempting
to parse the mathematics) for representing the math.  In both cases, any
graphics will be converted to web-friendly formats and/or copied to the
destination directory.  By default, the output is left in LaTeXML's xml,
but the math is parsed and converted to presentation MathML.
For html and xhtml, a default stylesheet is provided, but see
the B<--stylesheet> option.

=item B<--stylesheet>=I<stylesheet>

Specifies an XSLT stylesheet for transforming the xml.

=item B<--mathml>
=item B<--presentationmathml>
=item B<--contentmathml>
=item B<--nopresentationmathml>
=item B<--nocontentmathml>

Requests that the mathematical material be converted to MathML, either
Presentation MathML, Content MathML or both.  If both presentation and
content are requested, the first request will be the primary one,
with the second being an C<annotation>.
Presentation MathML.  If a value is given to the option it must be one
of: presentation, content, presentation+content (presentation annotated with content)
or content+presentation (content annotated with presentation).

=item B<--openmath>

Requests that the mathematical material be parsed, and converted to
OpenMath.  This is currently a very naive conversion.

=item B<--parallelmath>
=item B<--noparallelmath>

Requests (or diables) parallel math markup using the MathML semantics element.
The first math markup shouldbe either Presentation or Content MathML markup;
additional markup models will be added as annotations to the semantics element.

=item B<--svg>

Requests that the picture and pstricks material be parsed and converted to SVG.

=item B<--keepXMath>

By default, when either of the C<--mathml> or C<--openmath> conversions
is used, the intermediate math representation will be removed;  
this option preserves it.

=item B<--source>=I<source>

Specifies the directory where the original latex source is located.
Unless latexmlpost is run from that directory, it may be necessary
to specify this option in order to find graphics and style files.

=item B<--destination=>I<destination>

Specifies the destination file and directory.  The directory is needed for
mathimages and graphics processing.


=item B<--stylesheet>=I<xslfile>

Requests the XSL transformation of the document using the given xslfile as stylesheet.
If the stylesheet is omitted, a `standard' one will be used (C<dtd/LaTeXML.xsl>).

=item B<--css>=I<cssfile>

Requests that the css file will be used in the transformed html/xhtml.
It is passed as a parameter to the stylesheet.
By default, the default stylesheets will use the included LaTeXML.css.

=item B<--VERSION>

Shows the version number of the LaTeXML package..

=item B<--help>

Shows this help message.

=back

=head1 SEE ALSO

L<latexml>, L<LaTeXML>

=cut
#**********************************************************************

