Linux ip-172-26-7-228 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64
Your IP : 3.12.163.23
#! /usr/bin/env perl
=begin COPYRIGHT
----------------------------------------------------------------------------
Copyright (C) 2005-2013 Marc Penninga.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation, either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to
Free Software Foundation, Inc.,
59 Temple Place,
Suite 330,
Boston, MA 02111-1307,
USA
----------------------------------------------------------------------------
=end COPYRIGHT
=cut
use strict;
use warnings;
use integer;
use List::Util @List::Util::EXPORT_OK;
use Pod::Usage;
our ($NUM_GLYPHS, $UNITS_PER_EM, %KPX);
sub main {
pod2usage(-verbose => 0) if @ARGV != 1;
my %font = OTF::get_tables($ARGV[0]);
$NUM_GLYPHS = unpack '@4n', $font{maxp};
$UNITS_PER_EM = unpack '@18n', $font{head};
my @glyph_name = map { sprintf "index%d", $_ } 0 .. $NUM_GLYPHS - 1;
if (defined $font{CFF}) {
OTF::CFF::get_glyph_names($font{CFF}, \@glyph_name);
}
if (defined $font{kern}) {
OTF::Kern::parse_kerntable($font{kern});
}
if (defined $font{GPOS}) {
my @lookup_list_indices
= OTF::GPOS::get_lookup_list_indices($font{GPOS});
my @all_lookups = OTF::GPOS::get_lookups($font{GPOS});
my @lookups = @all_lookups[@lookup_list_indices];
my @subtables = map { OTF::GPOS::get_subtables($_) } @lookups;
for my $subtable (@subtables) {
my $pos_format = unpack 'n', $subtable;
if ($pos_format == 1) {
OTF::GPOS::parse_pos_format_1($subtable);
}
elsif ($pos_format == 2) {
OTF::GPOS::parse_pos_format_2($subtable);
}
else {
warn "[ERROR] invalid PosFormat $pos_format ignored";
}
}
}
my $num_kernpairs = sum map { scalar keys %{$KPX{$_}} } keys %KPX;
print "StartKernData\nStartKernPairs $num_kernpairs\n";
for my $first (sort { $a <=> $b } keys %KPX) {
my $first_glyph = $glyph_name[$first];
for my $second (sort { $a <=> $b } keys %{$KPX{$first}}) {
print "KPX $first_glyph $glyph_name[$second] ",
"$KPX{$first}{$second}\n";
}
}
print "EndKernPairs\nEndKernData\n";
return;
}
########################################################################
package OTF;
#-----------------------------------------------------------------------
# Return a hash with all tables (as strings) from the font
#-----------------------------------------------------------------------
sub get_tables {
my $filename = shift;
open my $fontfile, '<:raw', $filename
or die "[ERROR] can't open $filename: $!";
my $fontdata = do { local $/; <$fontfile> };
close $fontfile;
my ($sfnt_version, @tables) = unpack 'a4nx6/(a16)', $fontdata;
die "[ERROR] $filename: unknown font type"
unless $sfnt_version eq "\x00\x01\x00\x00"
|| $sfnt_version eq 'OTTO';
my %table = map { my ($tag, $offset, $length) = unpack 'A4@8N@12N', $_;
($tag => substr $fontdata, $offset, $length);
}
@tables;
return %table;
}
########################################################################
package OTF::CFF;
my @STANDARD_STRINGS = qw(
.notdef space exclam quotedbl
numbersign dollar percent ampersand
quoteright parenleft parenright asterisk
plus comma hyphen period
slash zero one two
three four five six
seven eight nine colon
semicolon less equal greater
question at A B
C D E F
G H I J
K L M N
O P Q R
S T U V
W X Y Z
bracketleft backslash bracketright asciicircum
underscore quoteleft a b
c d e f
g h i j
k l m n
o p q r
s t u v
w x y z
braceleft bar braceright asciitilde
exclamdown cent sterling fraction
yen florin section currency
quotesingle quotedblleft guillemotleft guilsinglleft
guilsinglright fi fl endash
dagger daggerdbl periodcentered paragraph
bullet quotesinglbase quotedblbase quotedblright
guillemotright ellipsis perthousand questiondown
grave acute circumflex tilde
macron breve dotaccent dieresis
ring cedilla hungarumlaut ogonek
caron emdash AE ordfeminine
Lslash Oslash OE ordmasculine
ae dotlessi lslash oslash
oe germandbls onesuperior logicalnot
mu trademark Eth onehalf
plusminus Thorn onequarter divide
brokenbar degree thorn threequarters
twosuperior registered minus eth
multiply threesuperior copyright Aacute
Acircumflex Adieresis Agrave Aring
Atilde Ccedilla Eacute Ecircumflex
Edieresis Egrave Iacute Icircumflex
Idieresis Igrave Ntilde Oacute
Ocircumflex Odieresis Ograve Otilde
Scaron Uacute Ucircumflex Udieresis
Ugrave Yacute Ydieresis Zcaron
aacute acircumflex adieresis agrave
aring atilde ccedilla eacute
ecircumflex edieresis egrave iacute
icircumflex idieresis igrave ntilde
oacute ocircumflex odieresis ograve
otilde scaron uacute ucircumflex
udieresis ugrave yacute ydieresis
zcaron exclamsmall Hungarumlautsmall dollaroldstyle
dollarsuperior ampersandsmall Acutesmall parenleftsuperior
parenrightsuperior twodotenleader onedotenleader zerooldstyle
oneoldstyle twooldstyle threeoldstyle fouroldstyle
fiveoldstyle sixoldstyle sevenoldstyle eightoldstyle
nineoldstyle commasuperior threequartersemdash periodsuperior
questionsmall asuperior bsuperior centsuperior
dsuperior esuperior isuperior lsuperior
msuperior nsuperior osuperior rsuperior
ssuperior tsuperior ff ffi
ffl parenleftinferior parenrightinferior Circumflexsmall
hyphensuperior Gravesmall Asmall Bsmall
Csmall Dsmall Esmall Fsmall
Gsmall Hsmall Ismall Jsmall
Ksmall Lsmall Msmall Nsmall
Osmall Psmall Qsmall Rsmall
Ssmall Tsmall Usmall Vsmall
Wsmall Xsmall Ysmall Zsmall
colonmonetary onefitted rupiah Tildesmall
exclamdownsmall centoldstyle Lslashsmall Scaronsmall
Zcaronsmall Dieresissmall Brevesmall Caronsmall
Dotaccentsmall Macronsmall figuredash hypheninferior
Ogoneksmall Ringsmall Cedillasmall questiondownsmall
oneeighth threeeighths fiveeighths seveneighths
onethird twothirds zerosuperior foursuperior
fivesuperior sixsuperior sevensuperior eightsuperior
ninesuperior zeroinferior oneinferior twoinferior
threeinferior fourinferior fiveinferior sixinferior
seveninferior eightinferior nineinferior centinferior
dollarinferior periodinferior commainferior Agravesmall
Aacutesmall Acircumflexsmall Atildesmall Adieresissmall
Aringsmall AEsmall Ccedillasmall Egravesmall
Eacutesmall Ecircumflexsmall Edieresissmall Igravesmall
Iacutesmall Icircumflexsmall Idieresissmall Ethsmall
Ntildesmall Ogravesmall Oacutesmall Ocircumflexsmall
Otildesmall Odieresissmall OEsmall Oslashsmall
Ugravesmall Uacutesmall Ucircumflexsmall Udieresissmall
Yacutesmall Thornsmall Ydieresissmall 001.000
001.001 001.002 001.003 Black
Bold Book Light Medium
Regular Roman Semibold
);
#-----------------------------------------------------------------------
#
#-----------------------------------------------------------------------
sub get_glyph_names {
my ($CFF, $glyph_name) = @_;
# skip header; parse (and ignore) NAME index
my ($rest) = _parse_index(substr $CFF, unpack '@2C', $CFF);
# parse TopDict index
($rest, my @top_dict) = _parse_index($rest);
if (@top_dict > 1) {
warn "[WARNING] multiple fonts in single file not supported";
}
my %top_dict = _parse_dict($top_dict[0]);
if ($NUM_GLYPHS != unpack 'n', substr($CFF, $top_dict{17}, 2)) {
die "[ERROR] NumGlyphs different in 'maxp' and 'CFF' tables";
}
# parse String index
($rest, my @strings) = _parse_index($rest);
@strings = (@STANDARD_STRINGS, @strings);
my $charset = substr $CFF, $top_dict{15};
my $format = unpack 'C', $charset;
if ($format == 0) {
my @codes = unpack "\@1n@{[ $NUM_GLYPHS - 1 ]}", $charset;
@$glyph_name = @strings[0, @codes];
}
elsif ($format == 1) {
my $i = 0;
$glyph_name->[$i++] = $strings[0];
for (my $j = 0; $i < $NUM_GLYPHS; $j++) {
my ($first, $n_left)
= unpack "\@@{[ 1 + 3*$j ]}nC", $charset;
$glyph_name->[$i + $_] = $strings[$first + $_]
for 0 .. $n_left;
$i += 1 + $n_left;
}
}
elsif ($format == 2) {
my $i = 0;
$glyph_name->[$i++] = $strings[0];
for (my $j = 0; $i < $NUM_GLYPHS; $j++) {
my ($first, $n_left)
= unpack "\@@{[ 1 + 4*$j ]}nn", $charset;
$glyph_name->[$i + $_] = $strings[$first + $_]
for 0 .. $n_left;
$i += 1 + $n_left;
}
}
else {
warn "[ERROR] invalid CharsetFormat '$format' ignored";
}
return;
}
#-----------------------------------------------------------------------
# Parse Adobe's INDEX format (see CFF spec); return "rest" plus data array
#-----------------------------------------------------------------------
sub _parse_index {
my $index = shift;
my ($count, $off_size) = unpack 'nC', $index;
my @offsets = unpack "\@3(a$off_size)@{[ $count + 1 ]}", $index;
if ($off_size == 1) { $_ = unpack 'C', $_ for @offsets }
elsif ($off_size == 2) { $_ = unpack 'n', $_ for @offsets }
elsif ($off_size == 4) { $_ = unpack 'N', $_ for @offsets }
elsif ($off_size == 3) {
for my $offset (@offsets) {
my @bytes = map 'C3', $offset;
$offset = ($bytes[0] << 16) + ($bytes[1] << 8) + $bytes[2];
}
}
else {
warn "[ERROR] invalid OffSize $off_size ignored";
}
$_ += 2 + $off_size * ($count + 1) for @offsets;
my @data;
for my $i (0 .. $count - 1) {
$data[$i]
= substr $index, $offsets[$i], $offsets[$i + 1] - $offsets[$i];
}
return substr($index, $offsets[$count]), @data;
}
#-----------------------------------------------------------------------
# Parse Adobe's DICT format (see CFF spec); return data hash
#-----------------------------------------------------------------------
sub _parse_dict {
my $dict = shift;
my @bytes = unpack 'C*', $dict;
my (@operands, %dict);
while (@bytes) {
if (( 0 <= $bytes[0] && $bytes[0] <= 11)
|| (13 <= $bytes[0] && $bytes[0] <= 22))
{
my $operator = shift @bytes;
($dict{$operator} = join q{ }, @operands) =~ s/^\s+//xms;
@operands = ();
}
elsif ($bytes[0] == 12) {
my $operator = join q{ }, splice @bytes, 0, 2;
($dict{$operator} = join q{ }, @operands) =~ s/^\s+//xms;
@operands = ();
}
elsif ($bytes[0] == 28
|| $bytes[0] == 29
|| $bytes[0] == 30
|| (32 <= $bytes[0] && $bytes[0] <= 254))
{
push @operands, _get_number(\@bytes);
}
else {
warn "[ERROR] invalid byte $bytes[0] in DICT ignored";
shift @bytes;
}
}
return %dict;
}
#-----------------------------------------------------------------------
# Parse Adobe's variable-length numbers (see CFF spec); shorten arg array
#-----------------------------------------------------------------------
sub _get_number {
my $bytes = shift;
my $b0 = shift @$bytes;
if ($b0 == 28) {
my ($b1, $b2) = splice @$bytes, 0, 2;
return ($b1 << 8) | $b2;
}
elsif ($b0 == 29) {
my ($b1, $b2, $b3, $b4) = splice @$bytes, 0, 4;
return ((((($b1 << 8) + $b2) << 8) + $b3) << 8) + $b4;
}
elsif ($b0 == 30) {
# Reals not implemented; just dump appropriate number of bytes
until (($b0 & 0xf0) == 0xf0 || ($b0 & 0xf) == 0xf) {
$b0 = shift @$bytes;
}
}
elsif (32 <= $b0 && $b0 <= 246) {
return $b0 - 139;
}
elsif (247 <= $b0 && $b0 <= 250) {
my $b1 = shift @$bytes;
return 256 * ($b0 - 247) + $b1 + 108;
}
elsif (251 <= $b0 && $b0 <= 254) {
my $b1 = shift @$bytes;
return -256 * ($b0 - 251) - $b1 - 108;
}
}
########################################################################
package OTF::GPOS;
#-----------------------------------------------------------------------
# Return list of all Lookups from the GPOS table
#-----------------------------------------------------------------------
sub get_lookups {
my $GPOS = shift;
my $lookup_list = substr $GPOS, unpack '@8n', $GPOS;
my @lookup_offsets = unpack 'n/n', $lookup_list;
return map { substr $lookup_list, $_ } @lookup_offsets;
}
#-----------------------------------------------------------------------
# Return list of all LookupListIndices pointed to by 'kern' feature
#-----------------------------------------------------------------------
sub get_lookup_list_indices {
my $GPOS = shift;
my $feature_list = substr $GPOS, unpack '@6n', $GPOS;
my %lookup_list_index;
for my $feature_record (unpack 'n/(a6)', $feature_list) {
my ($tag, $offset) = unpack 'a4n', $feature_record;
next if $tag ne 'kern';
my $feature = substr $feature_list, $offset;
my @lookup_list_indices = unpack '@2n/n', $feature;
$lookup_list_index{$_} = 1 for @lookup_list_indices;
}
return keys %lookup_list_index;
}
#-----------------------------------------------------------------------
# Return list of all subtables in Lookup table
#-----------------------------------------------------------------------
sub get_subtables {
my $lookup = shift;
my ($lookup_type, @subtable_offsets) = unpack 'n@4n/n', $lookup;
if ($lookup_type != 2) {
warn "[WARNING] LookupType $lookup_type not implemented";
return;
}
return map { substr $lookup, $_ } @subtable_offsets;
}
#-----------------------------------------------------------------------
# Parse subtable in PairPosFormat 1, store kern pairs in global %KPX
#-----------------------------------------------------------------------
sub parse_pos_format_1 {
my $subtable = shift;
my ($coverage_offset, $value_format_1, $value_format_2, @pair_set_offsets)
= unpack '@2nnnn/n', $subtable;
my @coverage = _get_coverage(substr $subtable, $coverage_offset);
my @pair_sets = map { substr $subtable, $_ } @pair_set_offsets;
if (@coverage != @pair_sets) {
warn "[ERROR] Coverage table doesn't match PairSet table";
return;
}
my ($record_size, $value_offset)
= _parse_value_formats($value_format_1, $value_format_2, 2, 2);
while (1) {
my $pair_set = shift @pair_sets or last;
my $first = shift @coverage;
my @pair_value_records = unpack "n/(a$record_size)", $pair_set;
for my $pair_value_record (@pair_value_records) {
my ($second, $value)
= unpack "n\@${value_offset}s>", $pair_value_record;
next if $value == 0;
$KPX{$first}{$second} ||= 1000 * $value / $UNITS_PER_EM;
}
}
return;
}
#-----------------------------------------------------------------------
# Parse subtable in PairPosFormat 2, store kern pairs in global %KPX
#-----------------------------------------------------------------------
sub parse_pos_format_2 {
my $subtable = shift;
my ($coverage_offset, $value_format_1, $value_format_2,
$class_def_1, $class_def_2, $class_1_count, $class_2_count)
= unpack '@2nnnnnnn', $subtable;
my @coverage = _get_coverage(substr $subtable, $coverage_offset);
my ($class_2_record_size, $value_offset)
= _parse_value_formats($value_format_1, $value_format_2, 0, 0);
my @class_1 = _get_class(substr $subtable, $class_def_1);
my @class_2 = _get_class(substr $subtable, $class_def_2);
my $class_1_record_size = $class_2_count * $class_2_record_size;
my @class_1_records
= unpack "\@16(a$class_1_record_size)$class_1_count", $subtable;
for my $class_1 (0 .. $class_1_count - 1) {
my $class_1_record = $class_1_records[$class_1];
my @class_2_records
= unpack "(a$class_2_record_size)$class_2_count", $class_1_record;
for my $class_2 (0 .. $class_2_count - 1) {
my $class_2_record = $class_2_records[$class_2];
my $value = unpack "\@${value_offset}s>", $class_2_record;
next if $value == 0;
$value = 1000 * $value / $UNITS_PER_EM;
my @first = grep { $class_1[$_] == $class_1 } @coverage;
my @second = grep { $class_2[$_] == $class_2 } 0 .. $#class_2;
for my $first (@first) {
for my $second (@second) {
$KPX{$first}{$second} ||= $value;
}
}
}
}
return;
}
#-----------------------------------------------------------------------
# Get class number for all glyphs
#-----------------------------------------------------------------------
sub _get_class {
my $class = shift;
my @class = (0) x $NUM_GLYPHS;
my $class_format = unpack 'n', $class;
if ($class_format == 1) {
my ($start_glyph, @class_value) = unpack '@2nn/n', $class;
@class[$start_glyph .. $start_glyph + $#class_value]
= @class_value;
}
elsif ($class_format == 2) {
my @class_ranges = unpack '@2n/(a6)', $class;
for my $class_range (@class_ranges) {
my ($start, $end, $class) = unpack 'nnn', $class_range;
@class[$start .. $end] = ($class) x ($end - $start + 1);
}
}
else {
warn "[ERROR] invalid ClassFormat '$class_format' ignored";
return;
}
return @class;
}
#-----------------------------------------------------------------------
# Determine size of ValueRecord and offset of XAdvance value in it
#-----------------------------------------------------------------------
sub _parse_value_formats {
my ($value_format_1, $value_format_2, $record_size, $value_offset) = @_;
unless ($value_format_1 & 0x4 && $value_format_2 == 0) {
warn "[WARNING] ValueFormat ($value_format_1, ",
"$value_format_2) not implemented";
return;
}
for my $bit ((0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80)) {
$record_size += 2 if $value_format_1 & $bit;
}
for my $bit ((0x1, 0x2)) {
$value_offset += 2 if $value_format_1 & $bit;
}
return ($record_size, $value_offset);
}
#-----------------------------------------------------------------------
# Return a list of GlyphIDs in a Coverage table
#-----------------------------------------------------------------------
sub _get_coverage {
my $coverage = shift;
my $coverage_format = unpack 'n', $coverage;
if ($coverage_format == 1) {
return unpack '@2n/n', $coverage;
}
elsif ($coverage_format == 2) {
my @range_records = unpack '@2n/(a6)', $coverage;
return map { my ($start, $end) = unpack 'nn', $_;
$start .. $end;
}
@range_records;
}
else {
warn "[WARNING] unknown CoverageFormat $coverage_format ignored";
}
}
########################################################################
package OTF::Kern;
#-----------------------------------------------------------------------
# Parse "kern"table, store kern pairs in global %KPX
#-----------------------------------------------------------------------
sub parse_kerntable {
my $kern = shift;
my $num_tables = unpack '@2n', $kern;
my $start_subtable = 4;
for my $i (0 .. $num_tables - 1) {
my $subtable = substr $kern, $start_subtable;
my ($length, $coverage) = unpack '@2nn', $subtable;
$start_subtable += $length;
if ($coverage != 0x1) {
warn "[WARNING] kern table format '$coverage' not implemented";
next;
}
my @kern_pairs = unpack '@6nx6/(a6)', $subtable;
for my $kern_pair (@kern_pairs) {
my ($first, $second, $value) = unpack 'nns>', $kern_pair;
next if $value == 0;
$KPX{$first}{$second} ||= 1000 * $value / $UNITS_PER_EM;
}
}
return;
}
########################################################################
package main;
main();
__END__
########################################################################
To create the documentation:
pod2man --center="Marc Penninga" --release="fontools" --section=1 \
ot2kpx - | groff -Tps -man - | ps2pdf - ot2kpx.pdf
=pod
=head1 NAME
ot2kpx - extract kerning information from OpenType fonts
=head1 SYNOPSIS
=over 8
=item B<ot2kpx>
B<fontfile>
=back
=head1 DESCRIPTION
B<ot2kpx> extract the kerning data from OpenType fonts (both F<otf>
and F<ttf> formats) and prints it (in F<afm> format) to C<stdout>.
=head1 OPTIONS AND ARGUMENTS
=over 4
=item B<fontfile>
The OpenType font (in either F<otf> or F<ttf> format).
=back
=head1 RESTRICTIONS
=over 4
=item B<->
B<ot2kpx> doesn't implement all of the OpenType specification.
Things that are missing include: support for font files containing
multiple fonts, LookupTables with LookupTypes other than 2,
"kern" tables with format other than 0 and ValueRecords with
other types of data than just XAdvance data.
Most of these limitations won't matter, since the missing features
are rare (the only fonts I know of that use them are the non-western fonts
that come with Adobe Reader). Furthermore, many of these features define
(according to the OpenType specification) I<"subtle, device-dependent
adjustments at specific font sizes or device resolutions">;
since there's no way to express such adjustments in F<afm> format,
ignoring them seems to be the only option anyway.
=item B<->
B<ot2kpx> collects kerning data first from the "kern" table, then from
all LookupTables associated with the "kern" feature; if a kerning pair
occurs multiple times, the first value seen is chosen.
There are (or may be) several issues with this approach:
=over 4
=item -
The OpenType specification says that fonts in F<otf> format shouldn't
use the "kern" table at all, just the lookups from the "GPOS" table.
Many such fonts do, however, contain a "kern" table, but no "GPOS" table;
so we use the "kern" table anyway.
=item -
Instead of reading all LookupTables, it might be better to let the user
specify a script and language and process only the LookupTables for
those values.
However, at least in the fonts I checked, all script/language combinations
eventually point to the I<same> "kern" LookupTables, so this approach
wouldn't make any difference (apart from further complicating the code).
=back
=back
=head1 AUTHOR
Marc Penninga <marcpenninga@gmail.com>
=head1 COPYRIGHT
Copyright (C) 2005-2013 Marc Penninga.
=head1 LICENSE
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation, either version 2 of the License,
or (at your option) any later version.
A copy of the GNU General Public License is included with B<ot2kpx>;
see the file F<GPLv2.txt>.
=head1 DISCLAIMER
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
=head1 RECENT CHANGES
(See the source code for the rest of the story.)
=over 12
=item I<2013-08-07>
Replaced all C<given/when> constructions in the source code by C<if>'s,
to avoid warnings about experimental features in Perl 5.18 and later.
=item I<2012-02-01>
Refactored the code, and fixed a number of bugs in the process.
Updated the documentation.
=back
=begin Really_old_history
=over 12
=item I<2005-01-10>
First version
=item I<2005-02-18>
Rewrote some of the code
=item I<2005-03-08>
Input files searched via B<kpsewhich> (where available)
=item I<2005-03-15>
Input files searched using B<kpsewhich> or B<findtexmf>
=item I<2005-03-21>
Test if GPOS table is present before trying to read it
=item I<2005-04-29>
Improved the documentation
=item I<2005-05-25>
Changed warning that's given when the font contains no GPOS table, to an
informational message.
=item I<2005-07-29>
A few updates to the documentation
=back
=cut
|