Linux ip-172-26-7-228 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64
Your IP : 18.119.119.119
#!/usr/bin/perl -w
# $Id: exceltex 155 2006-04-30 11:25:16Z pez $
#
# helper script for the exceltex latex package. reads contents of
# M$ Excel files using the Spreadsheet::ParseExcel perl module.
#
# (c) 2004-2006 by Hans-Peter Doerr <doerr@cip.physik.uni-freiburg.de>
#
# exceltex is free software. you can redistribute or modify it under
# the terms of the GNU GENERAL PUBLIC LICENSE Version 2. See COPYING for
# details.
use strict;
my $VERSION = "0.5.1";
my $DEBUG = 0;
# this is the default encoding for all files written by exceltex
my $ENCODING = "latin1";
# some lookup tables
my %L2N;
{ my $i = 1; for ('A' .. 'Z') { $L2N{$_} = $i; ++$i } }
my @N2L;
{ for ('A' .. 'Z') { push(@N2L, $_) } }
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
package util;
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub DEBUG
{
my ($msg) = @_;
if ($DEBUG)
{
chomp($msg);
print STDERR "DEBUG: $msg\n";
}
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub cell2xy
{
my ($cell) = @_;
$cell = uc($cell);
return undef if (! ($cell =~ m/^([A-Z]+)([0-9]+)$/));
# return (column, row)
return (col2x($1), $2-1);
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub xy2cell
{
my ($x, $y) = @_;
# is is likely to be slow as hell
return undef if (! ($x =~ m/^[0-9]+$/g && $y =~ m/^[0-9]+$/g));
++$y;
return x2col($x) . "$y";
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub col2x
{
my ($l) = @_;
my $len = length($l);
my @S = split //, $l;
my $n = 0;
if ($len == 1) { $n = $L2N{$l} - 1 }
if ($len == 2) { $n = 26 * $L2N{$S[0]} + $L2N{$S[1]} - 1 }
return $n;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub x2col
{
my ($n) = @_;
return undef if (! $n =~ m/[0-9]+/);
my $l;
if ($n > 25) { $l = $N2L[int($n/26)-1] . $N2L[$n % 26] }
else { $l = $N2L[$n] }
return $l;
}
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
package main;
use strict;
use Getopt::Long;
use Encode;
my $Workbooks = {}; # workbook cache
my $nItems = 0; # number of processed items
my $nCells = 0; # number of processed cells
my $nTabs = 0; # number of processed tabs
my $Warnings = 0; # number of warnings
my $Jobname = ""; # jobname of .tex file
my $inputDir = ""; # data goes here
my $indexFile = "";
my $optFormat = 1; # use formatting?
my $optColor = 1; # use colors?
my $optComma = 0; # use comma for decimals
my $optFmtSci = 0; # reformat scientific numbers?
my $ignWarn = 0; # ignore warnings?
my $euroSymbol = "EUR"; # use this for the EUR currency symbol
my $cleanup = 0; # cleanup inputDir
my $EXTNS = "excltx";
# read global and local config files
# will be overwritten by commandline options
readConfigFile("$ENV{HOME}/.exceltexrc") if (-f "$ENV{HOME}/.exceltexrc");
readConfigFile(".exceltexrc") if (-f ".exceltexrc");
# handle commandline switches
GetOptions('format!' => \$optFormat,
'color!' => \$optColor,
'comma!' => \$optComma,
'reformat-sn!' => \$optFmtSci,
'p|plain' => sub{$optFormat=0; $optColor=0; $optFmtSci=0;},
'd|debug' => \$DEBUG,
'c|cleanup' => \$cleanup,
'e|encoding=s' => \$ENCODING,
'o|euro-symbol=s' => \$euroSymbol,
'h|help' => sub{ usage(); exit 0;},
'v|version' => sub{ print "exceltex, version $VERSION\n"; exit 0},
'w|ignore-warnings' => \$ignWarn,
) || exit 2;
if (@ARGV == 0) {
usage();
exit 2;
}
# encoding of output files
$ENCODING = lc($ENCODING);
if (! ($ENCODING eq "latin1" ||
$ENCODING eq "iso-8859-1" ||
$ENCODING eq "latin9" ||
$ENCODING eq "iso-8859-15" ||
$ENCODING eq "utf8"))
{
print STDERR "Unsupported encoding: $ENCODING\n";
print STDERR "currently supported: iso-8859-1 (latin1), iso-8859-15 "
. "(latin9), utf8\n";
exit_error();
}
print "exceltex helper script, version $VERSION\n";
# get jobname from first argument
if ($ARGV[0] =~ m/(\w+)\.tex/g) {
$Jobname = $1;
}
else { $Jobname = $ARGV[0]; }
if (! defined($Jobname))
{
print STDERR "can't determine jobname.\n";
exit_error();
}
util::DEBUG("jobname is: $Jobname\n");
# data from spreadsheet goes here
$inputDir = $Jobname . '-' . $EXTNS;
$indexFile = $Jobname . '.' . $EXTNS;
# cleanup?
if ($cleanup)
{
cleanup();
exit 0;
}
if (! -f $indexFile)
{
print STDERR "can't read index '$indexFile: $!.\n";
print STDERR "run latex first to create the index.\n";
exit_error();
}
# create data dir
if (! -d $inputDir)
{
if (! mkdir $inputDir)
{
print STDERR "can't create data directory '$inputDir': $!\n";
exit_error();
}
}
# rock'n'roll
processIndex($indexFile);
# exiting message
if ($Warnings)
{
print STDERR "exceltex finished with $nItems items ($nCells cells "
. "and $nTabs tabulars)\n";
print STDERR "*** not all items proccessed fine, there were "
. "$Warnings Warnings\n";
exit 0 if ($ignWarn);
exit 1;
}
else
{
print "exceltex successfully finished with $nItems items ($nCells cells "
. "and $nTabs tabulars)\n";
}
exit 0;
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub exit_error
{
print STDERR "aborting.\n";
exit 2;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub usage
{
print <<__EOF;
exceltex version $VERSION
usage: exceltex [options] file[.tex]
options:
-h|--help show this help
-v|--version show program version and exit
-c|--cleanup remove temporary files created by previous runs
-w|--ignore-warnings exit with status zero, even on warnings
-o|--euro-symbol=sym use sym for displaying the euro currency symbol [EUR]
-e|--encoding=enc set encoding to enc. Currently supported
encodings are: latin1, latin9, utf8 [latin1]
--[no]reformat-sn (dont) reformat scientific numbers to A X 10^B notation
--[no]comma (dont) use comma for decimal numbers
--[no]format (dont) use formatting
--[no]nocolor (dont) use colors
-p|--plain shorthand for --noformat --nocolor --noreformat-sn
__EOF
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub readConfigFile
{
my ($file) = @_;
open(I, "<$file") || return;
while(<I>)
{
chomp();
my ($key, $val) = split('=', $_);
if ($key eq "encoding") { $ENCODING = $val }
elsif ($key eq "reformat-sn") { $optFmtSci = $val }
elsif ($key eq "color") { $optColor = $val }
elsif ($key eq "format") { $optFormat = $val }
elsif ($key eq "comma") { $optComma = $val }
elsif ($key eq "euro-symbol") { $euroSymbol = $val }
elsif ($key eq "plain") { $optColor=0; $optFormat=0}
else { print STDERR "unsupported config option in '$file': "
. "$key, ignoring\n" }
}
close(I);
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub cleanup
{
if (-f $indexFile)
{
print "removing $indexFile\n";
unlink($indexFile);
}
return if (! defined($inputDir));
return if (! -d $inputDir);
print "cleaning up $inputDir/\n";
unlink <$inputDir/c-*>;
unlink <$inputDir/t-*>;
}
# read index file, extract data, write to files
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub processIndex
{
my ($index) = @_;
if (! open(I, "< $index"))
{
print STDERR "can't read index '$index': $!. Run latex first?\n";
exit_error();
}
my $cellrefs = 0;
while(<I>)
{
chomp($_);
# if cellrefs is switched on, we store data in files with name
# inputDir/[c,t]-file!sheet!cell
if ($. == 1 && $_ eq ";cellrefs")
{
print "using cell referencing by name\n";
$cellrefs = 1;
}
# ignore lines beginning with ;
next if ($_ =~ m/^;/);
my $type;
my $idx = undef;
my $source;
my @flags;
if ($cellrefs == 1) {($type, $source, @flags) = split(':', $_)}
else {($type, $idx, $source, @flags) = split(':', $_)}
# parse flags
# not used yet
if (@flags != 0)
{
my $ok = 0;
foreach (@flags)
{
$ok = 1 if ($_ eq 'plain');
}
print STDERR "Index '$indexFile' corrupt at line $.: bad flag\n";
exit_error();
}
if (! defined($source))
{
print STDERR "Index '$indexFile' corrupt at line $.\n";
exit_error();
}
my ($f, $s, $c1, $c2) = parseSource($source, $type);
if (! $s)
{
print STDERR "Index '$indexFile' corrupt at line $.\n";
exit_error();
}
my $string = "";
if ($type eq 't')
{
$string = getTabString($f, $s, $c1, $c2);
}
elsif ($type eq 'c')
{
$string = getCellString($f, $s, $c1);
}
else
{
print STDERR "Index '$indexFile' corrupt at line $.: "
. "unknown $type '$type'\n";
exit_error();
}
# no need to write empty data
next if (! defined($string));
$string = decode_utf8($string, 0);
$string = encode($ENCODING, $string, 0);
if (! writeBuf($type, $idx, $string, $source))
{
print STDERR "can't write cell data: $!.\n";
exit_error();
}
}
close(I);
return 1;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub getFile
{
my $file = $Jobname . ".xls";
return $file if (-f $file);
return undef;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub parseSource
{
my ($string, $type) = @_;
my @tokens = split('!', $string);
my $file;
my $sheet;
my $cel1;
my $cel2;
if ($type eq 'c')
{
if (@tokens == 2)
{
$file = getFile();
$sheet = $tokens[0];
$cel1 = $tokens[1];
}
elsif (@tokens == 3)
{
$file = $tokens[0];
$sheet = $tokens[1];
$cel1 = $tokens[2];
}
else { return undef; }
}
elsif ($type eq 't')
{
if (@tokens == 3)
{
$file = getFile();
$sheet = $tokens[0];
$cel1 = $tokens[1];
$cel2 = $tokens[2];
}
elsif (@tokens == 4)
{
$file = $tokens[0];
$sheet = $tokens[1];
$cel1 = $tokens[2];
$cel2 = $tokens[3];
}
else { return undef; }
}
else { return (undef, undef, undef, undef); }
return ($file, $sheet, $cel1, $cel2);
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub writeBuf
{
my ($type, $idx, $string, $source) = @_;
return 1 if ($string eq "");
my $wfile;
if (! defined($idx)) { $wfile = $inputDir . "/" . $type . "-" . $source }
else { $wfile = $inputDir . "/" . $type . "-" . $idx }
return 0 if (! open (O, ">$wfile"));
return 0 if (! print O $string);
return 0 if (! close(O));
return 1;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub getWorkbook
{
my ($file) = @_;
if (! -f $file) {
print STDERR "can't read '$file': it does not exists.\n";
exit_error();
}
if (! defined($Workbooks->{$file}))
{
my $type = "xls";
if ($file =~ m/^\w+\.(\w+)$/g) { $type = $1 };
if ($type eq "xls")
{
print("reading '$file'\n");
# readExcel->new() will exit, if anything bad happens
$Workbooks->{$file} = readExcel->new($file);
}
}
return $Workbooks->{$file};
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub texifyCell
{
my ($cel) = @_;
my $MATH = 0; # wether to enclose string in $$ for math mode
my $string = $cel->{value};
return "" if (! defined($string) || $string eq "");
# escape latex control characters
$string =~ s/\{/\\\{/g;
$string =~ s/\}/\\\}/g;
# escape backslash if not followed by { or }
$string =~ s/\\(?!({|}))/\\ensuremath\{\\backslash\}/g;
$string =~ s/#/\\#/g;
$string =~ s/\$/\\\$/g;
$string =~ s/%/\\%/g;
$string =~ s/&/\\&/g;
$string =~ s/~/\\~/g;
$string =~ s/_/\\_/g;
$string =~ s/\^/\{\\verb ^ \}/g;
# FIXME: needs more testing!
# hack for EUR sign, works for me but is definetly ugly :(
# will need some information on how exactly this
# user-entered formatting stuff works (in excel)
$string =~ s/\[.+\xac-{0,1}\d{1,3}\]/$euroSymbol/g; # EUR in currency cells
$string =~ s/\[.+EUR\]/$euroSymbol/g; # "
$string =~ s/.{1,1}\xac/$euroSymbol/g; # single EUR sign
# use decimal comma if requested
$string =~ s/(\d)\.(\d)/$1,$2/g if ($optComma == 1);
# reformat scientific numbers if requested
if ($optFmtSci == 1)
{
$MATH = 1 if ($string =~ s/([0-9]+)[eE]([+-][0-9]+)/
simplifyScientificNumber($1, $2)/ge);
}
# apply cell formatting and colors if not requested otherweise
$string = applyFormatting($string, $cel, $MATH) if($optFormat == 1);
# same goes for colors
$string = applyColor($string, $cel) if ($optColor == 1);
util::DEBUG " texified => '$string'";
return "\$$string\$" if ($MATH);
return $string;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub applyFormatting
{
my ($s, $c, $m) = @_;
# font bold?
if ($c->{bold} && $m) { $s = "\\mathbf{$s}"; }
elsif ($c->{bold}) { $s = "\\textbf{$s}"; }
# font italic?
if ($c->{italic} && $m) { $s = "\\mathit{$s}"; }
elsif ($c->{italic}) { $s = "\\textit{$s}"; }
# underlined? striked out?
$s = "\\uline{$s}" if ($c->{underline});
$s = "\\sout{$s}" if ($c->{strikeout});
return $s;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub applyColor
{
my ($s, $c) = @_;
# only evaluate color if it is not black anyway
if ($c->{color} != 8)
{
# convert rrggbb hex color triplet to float rgb
$s = "\\textcolor[rgb]{" . colormap($c->{color}) . "}{$s}";
}
return $s;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub colormap
{
my ($color) = @_;
util::DEBUG(" color => $color\n");
# get rgb triplet as floats
return join(', ', map {hex($_)/255.0}
unpack('a2a2a2', Spreadsheet::ParseExcel->ColorIdxToRGB($color)));
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub simplifyScientificNumber
{
my ($f, $e) = @_;
# extract signum
my $s = substr($e, 0, 1);
$s = '' if ($s eq "+");
# remove signum & trailing zeros
$e =~ s/^[+-]*//;
$e =~ s/^0*(\d+)/$1/;
return $f if ($e == 0);
return sprintf("%s \\times 10^{%s%s}", $f, $s, $e);
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub warning
{
my ($msg) = @_;
chomp($msg);
print STDERR "warning: " . $msg . "\n";
++$Warnings;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub getCellString
{
my ($file, $sheet, $cell) = @_;
util::DEBUG("get $file -> $sheet -> $cell\n");
my $w = getWorkbook($file);
return undef if (! defined($w));
my ($x, $y) = util::cell2xy($cell);
if (! defined($x) || ! defined($y)) {
warning("cell '$cell' is invalid\n");
return undef;
}
my $c = $w->getCell($sheet, $x, $y);
warning($w->error()) if (! defined($c->{value}));
++$nItems;
++$nCells;
return texifyCell($c);
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub getTabString
{
my ($file, $sheet, $cell1, $cell2) = @_;
# cell1 is upper-left and cell2 is lower-right corner
# of the selected cell-range
my ($lfx, $upy) = util::cell2xy($cell1);
my ($rtx, $loy) = util::cell2xy($cell2);
if (! defined($lfx))
{
warning "cell '$cell1' is invalid\n";
return undef;
}
if (! defined($rtx))
{
warning "cell '$cell2' is invlaid\n";
return undef;
}
my $book = getWorkbook($file);
my $buf;
my $empty = 1;
for (my $y = $upy; $y <= $loy; ++$y)
{
for (my $x = $lfx; $x <= $rtx; ++$x)
{
my $c = $book->getCell($sheet, $x, $y);
warning($book->error()) if (! defined($c));
$empty = 0 if(defined($c->{value}));
$buf .= texifyCell($c);
$buf .= " & " if ($x < $rtx);
}
$buf .= " \\\\\n";
}
warning("table $cell1!$cell2 is empty\n") if ($empty);
++$nItems;
++$nTabs;
return $buf;
}
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
package readExcel;
use strict;
use Spreadsheet::ParseExcel;
use Encode;
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub new
{
my ($class, $file) = @_;
my $self = {};
$self->{ERROR} = "";
if (! -f $file)
{
print STDERR "can't read '$file', it does not exists.\n";
exit_error();
}
$self->{book} =
Spreadsheet::ParseExcel::Workbook->Parse($file);
if (! defined($self->{book}))
{
print STDERR "SpreadSheet::ParseExcel Error: "
. "can't parse '$file' : $!\n";
exit_error();
}
$self->{file} = $file;
bless $self, $class;
return $self;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub getCell
{
my ($self, $sheet, $x, $y) = @_;
my $ws = $self->{book}->Worksheet($sheet);
if (! defined($ws))
{
$self->_setError("sheet '$sheet' does not exist");
return undef;
}
my $cell = $ws->Cell($y, $x);
if(! $cell)
{
$self->_setError("cell '$sheet!" . util::xy2cell($x, $y) . "' is emtpy "
. "or out of range");
return { value => undef }
}
# we're internally operating with utf8, for now
my $value = encode_utf8($cell->Value());
util::DEBUG " value => '$value'";
# return hash containing value and formatting
return { value => $value,
bold => $cell->{Format}->{Font}->{Bold},
italic => $cell->{Format}->{Font}->{Italic},
color => $cell->{Format}->{Font}->{Color},
underline => $cell->{Format}->{Font}->{Underline},
strikeout => $cell->{Format}->{Font}->{Strikeout}
};
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub _setError
{
my ($self, $err) = @_;
$self->{ERROR} = $err;
}
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
sub error
{
my $self = shift;
my $err = $self->{ERROR};
$self->{ERROR} = undef;
return $err;
}
|