#!/usr/bin/perl
# Program Name: PDF Metadata Editor
# Program URL: http://www.arilabs.com/software/pdfmeta/pdfmeta.pl
# Filename: pdfmeta.pl
# Version: 1.3
# Last Modified: 2005-06-02
# Author: Brian High <bkh AT arilabs DOT com>
# Copyright: Analytical Resources, Inc. (2005)
# License: GNU GPL version 2 or greater. See LICENSE below.
# Requires: pdftk version 1.12 or greater, Perl/Tk
# pdftk must be in your environment's PATH
# Tested under: Mandrake 10.1.0 (KDE 3.2.3-99, Perl 5.8.5-3, Perl-Tk # 804.027-2), Debian unstable (Kanotix 2005-01, KDE 3.3.2-1, # Perl 5.8.4-6, Perl-Tk 800.025-2), Win2K Pro, and # WinXP Pro (SP2) with ActivePerl 5.8.6.811.
# Known Issues: (1) Under Windows XP Pro SP2 and ActivePerl 5.8.6.811,
# Windows will not let me drop files onto Perl script icons,
# so you can use the drag-and-drop built into the script
# to select the PDF file (by dropping the file onto the # listbox widget) or you can simply run the script
# from the command line. This issue is a function of
# how Windows works and may be addressed by some sort
# or "registry hack"[1] or "power toy". Anyway, if you
# really want to be able to drop PDFs right onto your
# script icon, you can simply create a DOS batch file
# or WSH/VBS script which will run pdfmeta.pl. Example:
# Type this one-liner into a file <pdfmeta.bat>:
# @perl pdfmeta.pl %1
# And save the file in the same folder as pdftk.exe and
# pdfmeta.pl. Then make a shortcut to pdfmeta.bat and
# place that shortcut on your desktop. Modify the shortcut
# properties so that the console window it brings up will
# be "minimized".
# Right-Click -> Properties -> Run: Minimized
# Rename the shortcut: PDF Metadata Editor
# (2) While the drag-and-drop (XDND, remote) works fine
# under Mandrake's KDE, it does not under Debian's (Kanotix) # KDE. This may have something to do with window manager
# settings. I just get a cursor of a circle with a line
# through it. I will have to look into this...
#
# [1] For a discussion of your registry hacking options, see:
# http://www.perlmonks.org/?node=122205
# This program provides a simple and limited graphical interface for
# pdftk. It only provides the functionality to modify PDF metadata # (document properties) fields.
# Usage: [ perl ] pdfmeta.pl [ <filename> ]
#
# You can either run from the command line, with or without a filename,
# or you can drag a file onto the script icon, if your desktop supports it,
# or you can execute the script by clicking the icon and you will be # offered a drag and drop interface.
# NOTE: You must have write permissions for the directory which
# contains the original PDF file, as well as for the original
# PDF file itself.
# NOTE: Modify the @fieldnames array to use a different set of document
# properties (metadata fields). Use only standard PDF field names:
# Title, Author, Subject, Keywords, Creator, Producer, CreationDate,
# ModDate, and Trapped.
# PDFTK can be found here: http://www.accesspdf.com/pdftk/
# See also: http://hacks.oreilly.com/pub/h/2422
# Here is the pdftk man page section on the update_info feature:
#
# update_info <info data filename | - | PROMPT>
# Changes the metadata stored in a single PDF's
# Info dictionary to match the input data file.
# The input data file uses the same syntax as the
# output from dump_data. This does not change the
# metadata stored in the PDF's XMP stream, if it
# has one. For example:
#
# pdftk in.pdf update_info in.info output out.pdf
# Also, the author of pdftk (Sid Steward) has this to say about the # XMP stream:
#
# PDFs store this metadata is two places: the Info dictionary and
# the XMP (RDF/XML) stream. Pdftk updates only the Info dictionary, # but newer versions of Acrobat/Reader defer to the XMP stream.
#
# I am currently working on new features for updating both the Info # dictionary and the XMP stream.
#
# One workaround might be to remove the PDF's XMP stream. You can do # this using pdftk, but it also removes bookmarks and other PDF # features. Run:
#
# pdftk mydoc.xmp.pdf cat output mydoc.no_xmp.pdf
#
# to burn of the XMP stream. Then maybe the viewer will fall back to # the Info dictionary with your updated data.
# # ( From: http://www.accesspdf.com/comment.php?mode=view&cid=153 )
# ====================================================================
# LICENSE: GNU GPL v2 or greater: http://www.gnu.org/licenses/gpl.txt
# ====================================================================
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
use strict;
use warnings;
use File::Basename;
use File::Spec::Functions;
use File::Copy;
use POSIX;
use Tk;
use Tk::DropSite;
use constant SUCCESS => 1;
use constant FAILURE => 0;
use constant TRUE => 1;
use constant FALSE => 0;
use constant PROG_NAME => "PDF Metadata Editor";
use constant PROG_VERS => "1.3";
use constant FILE_SUFF => "-meta";
my (%fields, $err_msg, $main);
my ($input_fn, $output_fn, $meta_fn);
#----------------------------------------------------------------------
# Configuration
#----------------------------------------------------------------------
# PDFTK command to use. Include explicit path if necessary.
my $pdftk = "pdftk";
# You may modify the @fieldnames array to use a different set of document
# properties (metadata fields). Use only standard PDF field names:
# Title, Author, Subject, Keywords, Creator, Producer, CreationDate, # ModDate, and Trapped. The order of field names in this array is the # same as the order of the fields as listed on the user interface.
my @fieldnames = qw( Title Subject Author Keywords );
#----------------------------------------------------------------------
# Main Routine
#----------------------------------------------------------------------
&create_main_window();
&set_fonts();
&create_field_hash();
&get_input_file();
&MainLoop();
#----------------------------------------------------------------------
# Subroutines
#----------------------------------------------------------------------
sub create_main_window {
$main = MainWindow->new();
$main->title( PROG_NAME . " " . PROG_VERS );
}
sub set_fonts {
my $font_family = 'Helvetica';
my $large_size = 12;
my $small_size = 10;
$main->fontCreate( 'title',
-size => $large_size, -weight => 'bold', -family => $font_family,
);
$main->fontCreate( 'header', -size => $large_size, -family => $font_family,
);
$main->fontCreate( 'label',
-size => $small_size, -weight => 'bold', -family => $font_family,
);
$main->fontCreate( 'button', -size => $small_size, -family => $font_family,
);
$main->fontCreate( 'input',
-size => $small_size, -family => $font_family,
);
}
sub create_field_hash {
# Create hash to store metadata fields and values
%fields = ();
foreach my $field ( @fieldnames ) {
$fields{$field} = '';
}
}
sub get_input_file {
# If a filename was given as an argument, use it
if ( $ARGV[0] ) {
$input_fn = $ARGV[0];
&get_metadata() && &complete_gui();
}
else {
# Otherwise offer drag and drop interface
&create_drop_widgets();
}
}
sub create_drop_widgets {
my $drop_label = $main->Label ( -text => "Drag your PDF file into the box below:", -font => 'title',
)
->pack (
-ipadx => 12, -ipady => 4, -padx => 8, -pady => 8,
);
# Define a DropSite (source side) for Drag and Drop functionality
my $drop = $main->Scrolled ( 'Listbox',
-scrollbars => "osoe", -height => 1,
)
->pack (
-pady => 8,
);
# Tell Tk that $drop should accept drops.
# When dropping occurs, execute the accept_drop callback.
$drop->DropSite (
-dropcommand => [\&accept_drop, $drop],
-droptypes => ( $^O eq 'MSWin32' ? 'Win32' : 'XDND' )
);
}
sub accept_drop {
my( $widget, $selection ) = @_;
eval {
$input_fn = $widget->SelectionGet (
-selection => $selection, 'STRING' );
$input_fn =~ s/^file:(.*)/$1/;
};
if ( defined $input_fn ) {
$widget->insert( 0, $input_fn );
}
# After the file is dropped, hide the widgets, and present new widgets
&clean_gui(); &get_metadata() && &complete_gui();
}
sub clean_gui {
# Remove (hide) widgets on the form, if any
my @w = $main->packSlaves;
foreach (@w) { $_->packForget; }
}
sub complete_gui {
&clean_gui();
# Finish defining MainWindow attributes and add widgets
$main->Label ( -justify => 'left', -text => "Filename: \n$input_fn", -font => 'label',
)
->pack ( -anchor => 'w', -padx => 8, -pady => 8,
);
foreach my $field ( @fieldnames ) {
$main->Label ( -justify => 'left', -text => "$field: ", -font => 'label',
)
->pack ( -anchor => 'w', -padx => 8,
);
$main->Entry ( -textvariable => \$fields{$field}, -font => 'input',
)
->pack ( -fill => 'x', -padx => 8,
);
}
$main->Button ( -text => "Save Changes and Exit", -font => 'button', -command => sub { &save_and_exit(); },
)
->pack ( -side => 'left', -ipadx => 12, -ipady => 4, -pady => 8, -expand => TRUE,
);
$main->Button ( -text => "Close", -font => 'button',
-command => sub { exit },
)
->pack ( -side => 'right', -ipadx => 12, -ipady => 4, -pady => 8, -expand => TRUE,
);
}
sub get_metadata {
my ( $input_fn_base, $input_fn_path, $input_fn_type );
$err_msg = "Input file must be a (single) PDF. Please try again.";
# If more than one file is selected, then show abort error
if ( defined( $output_fn ) ) {
undef $output_fn;
&show_msg() && &create_drop_widgets() && return FAILURE; }
# Parse the file path, abort if not a PDF, create new file names
($input_fn_base, $input_fn_path, $input_fn_type) = fileparse($input_fn, qr{\.pdf}i);
$input_fn_type =~ /\.pdf/i || &abort_me() && return FAILURE;
$output_fn = $input_fn_path . $input_fn_base . FILE_SUFF . $input_fn_type;
$meta_fn = $input_fn_path . $input_fn_base . '.mta';
# Check to make sure we can read/write to files and directories
$err_msg = "Cannot read from and/or write to input file!";
(-r $input_fn && -w $input_fn) || &abort_me() && return FAILURE;
$err_msg = "Cannot write to $input_fn_path directory!\n\n" . "You must place the original file in a writable directory\n" .
"before running this program.";
-w $input_fn_path || &abort_me() && return FAILURE;
# Dump the PDF's metadata to an ASCII text file and check for errors
&dump_meta_data() || &abort_me() && return FAILURE;
&check_for_dict() || &abort_me() && return FAILURE;
# Read metadata text file into %fields hash
$err_msg = "Error opening temporary metadata file for reading!";
open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE;
while ( <METADATA> ) { foreach my $field ( @fieldnames ) {
if ( /^InfoKey: $field$/ ) {
$_ = <METADATA>;
chomp;
s/^InfoValue: (.*)$/$1/;
$fields{$field} = $_;
}
}
}
&close_and_delete_metadata_file();
}
sub check_for_dict {
my $no_dict = FALSE;
# Read metadata text file to check for "no dictionary" error
$err_msg = "Error opening temporary metadata file for reading!";
open ( METADATA, "<", $meta_fn ) || return FAILURE;
while ( <METADATA> ) { if ( /no info dictionary found/ ) {
$no_dict = TRUE;
last;
}
}
close METADATA;
# If there was no info dictionary, then create one and save metadata
if ( $no_dict ) {
# Delete metadata file
unlink $meta_fn;
# Use PDFTK to add a new info dictionary using the 'cat' feature
&cat_pdf() || return FAILURE;
# Move repaired file to orig. file, get metadata, then store in file
my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; $err_msg = "Error running pdftk dump_data command!";
move ( $output_fn, $input_fn ) && &dump_meta_data() || return FAILURE;
}
return SUCCESS;
}
sub dump_meta_data {
my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; $err_msg = "Error running dump_data command!\n\n";
system ( $cmd ) == 0 || ( &report_metadata_errors() && return FAILURE );
return SUCCESS
}
sub report_metadata_errors {
open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE; local $/ = undef;
$err_msg .= <METADATA>;
&close_and_delete_metadata_file();
}
sub close_and_delete_metadata_file {
close METADATA;
unlink $meta_fn;
}
sub cat_pdf {
# Get current metadata info from input file and store in text file
$err_msg = "Error running pdftk cat command!";
system ( "$pdftk \"$input_fn\" cat output \"$output_fn\" " . "dont_ask 2>&1" ) == 0 || return FAILURE; return SUCCESS;
}
sub abort_me {
&clean_gui();
# Show an error message and an exit button
$main->Label ( -text => $err_msg, -font => 'header',
)
->pack (
-ipadx => 12, -ipady => 4, -padx => 8, -pady => 8,
);
$main->Button ( -text => "Exit",
-command => sub { exit }, -font => 'button',
)
->pack ( -ipadx => 12, -ipady => 4, -padx => 8, -pady => 8,
);
}
sub show_msg {
&clean_gui();
# Show an error message and an exit button
$main->Label ( -text => $err_msg, -font => 'header',
)
->pack (
-ipadx => 12, -ipady => 4, -padx => 8, -pady => 8,
);
}
sub save_and_exit {
# Save new metadata to metadata file
$err_msg = "Error opening temporary metadata file for writing!";
open ( METADATA, ">", $meta_fn ) || &abort_me() && return FAILURE;
foreach my $field ( @fieldnames ) {
print METADATA "InfoKey: $field\n";
print METADATA "InfoValue: $fields{$field}\n";
}
# Write new metadata into new PDF file
system ( "$pdftk \"$input_fn\" " . "update_info \"$meta_fn\" " . "output \"$output_fn\" dont_ask" );
# Replace orig. PDF with new one, so that the old one has new metadata
$err_msg = "Error updating PDF! Changes are in:\n" . $output_fn;
move ( $output_fn, $input_fn ) || &abort_me() && return FAILURE;
# Delete metadata file. (The delete does not work without opening first.)
$err_msg = "Error opening temporary metadata file for reading!";
open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE;
&close_and_delete_metadata_file();
exit;
}
__END__
|