Further top this question: PDF::FromHTML - Corrupt file and no output
The code in question is 'working' in that it produces a PDF document just fine, just NONE of the HTML anchors are being translated, and on larger documents the processing ceases at Page 11 of the PDF - with no error, it closes the document just fine!
Edit: To save looking at the Question Link:
# print "<p>".$textblob."</p>";
# $textblob = decode('UTF-8', $textblob);
my $output;
my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
# With PDF::API2, font names such as 'traditional' also works
Font => 'Arial',
LineHeight => 10,
Landscape => 0,
print $output;
$textblob when uncommented to print and commenting out the PDF section displays the full 400 reference adventure with links in html just fine...
Update:In desperation here is the entire script (it's not TOO long...)
use cPanelUserConfig;
use CGI::Carp qw(fatalsToBrowser);
use CGI;
use List::Util qw(shuffle);
use PDF::FromHTML;
require "authenticate.pl";
$query = new CGI;
print $query->header(-type=>'application/pdf');
print $query->header(-charset=>'utf-8');
print "\n\n\n\n<!-- -------------------------- BEGIN: ff.net Script generated text ------------------------------------------- -->";
print "Randomise working? Let me know if you find a bug.<br />";
%refhash = $doc =~ /^[\n\s\t\.\#]*(\d+)[\s\t\.\#\n]+(?!\n*^[\n\s\t\.\#]*\d+[\s\t\.\#\n]+)(.+?)(?=^[\s\t\.\#\n]*\d+[\s\t\.\#\n]+|EOF)/smcgi; # refhash{key}=content, where key==refnumber and content==well, ref content
my %anchors;
my $refhashref=&recreate_refhash($query->param('references'),\%anchors);
print "Your adventure looks like this: <br /><br />";
elsif(defined($query->param('Auto-HTML Tag'))){
print "Your adventure looks like this: <br /><br />";
elsif(defined($query->param('Auto-ABML Tag'))){
print "Your adventure looks like this: <br /><br />";
print "undefined function call";
else{ # output form to input doc content
print "Please input your document text into the textarea below (copy and paste should do it):";
print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
print $query->textarea(-name=>'doc',-rows=>20,-cols=>100, -style=>"font-family:arial;width:98%");
print $query->submit('Go!');
print '</form>';
# print "<!-- -------------------------- END: ff.net Script generated text ------------------------------------------- -->";
sub recreate_refhash{
my %refhash;
my $references=shift;
my $anchors_ref=shift;
for(my $x=0;$x<$references;$x++){
my $referencekey="reference"."$x";
my $referencecontent="reftext"."$x";
my $anchorname="anchor"."$x";
my $deletename="delete"."$x";
return \%refhash;
sub randomise{
my $refhashref=shift;
my $anchor_ref=shift;
my %refhash=%$refhashref;
my %randomisedrefhash, %Xrefhash, @refstack, $ref;
my %anchors=%$anchor_ref;
# randomise the list
@refstack=shuffle sort {$a <=> $b} keys %refhash; # inflict an order on the pre-shuffle (therefore we can xref predicatably?) not sure this makes ANY sense i'm melting....
## transpose anchors back to their required location
my $anchor=\$refstack[$anchors{$refstack[$x]}];
my $temp=$refstack[$x];
print "---Swapping $temp with ".$$anchor;
if($refstack[$anchors{$refstack[$x]}] ne $$anchor){
## randomise the refs and the content associations, and create the cross-ref hash
foreach $ref(sort {$a <=> $b} keys %refhash){
$key=shift @refstack;
## now do the content link substitutions
foreach $ref(keys %randomisedrefhash){
print "You asked for the following anchors:";
foreach $key(keys %anchors){
print $anchors{$key};
return \%randomisedrefhash;
sub substitute_xref{ ## not sure that this is necessary but the verboseness was easier to work out
my $pretext1=shift;
my $pretext2=shift;
my $pretext3=shift;
my $pretext4=shift;
my $link=shift;
my $Xrefhashref=shift;
my %Xrefhash=%$Xrefhashref;
my $newlink=$Xrefhash{$link};
return "$pretext1$pretext2$pretext3$pretext4$newlink";
sub save{
print "Will Save soon";
sub display_refhash{
my $refhashref=shift;
my %refhash=%$refhashref;
print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
my $x=0;
my $ref,$textblob;
foreach $ref (sort {$a <=> $b} keys %refhash){
my $reference="reference"."$x";
my $reftext="reftext"."$x";
my $anchor="anchor"."$x";
my $delete="delete"."$x";
my $default=$refhash{$ref};
print "Reference is: ".$query->textfield(-name=>$reference,-value=>$ref, -override=>1)."<br />";
print $query->checkbox_group(-name=>$delete,-values=>$ref,-labels=>{$ref=>'Delete Me'})."<br />";
print $query->checkbox_group(-name=>$anchor,-values=>$ref, -labels=>{$ref=>'Anchor Me (Will NOT get Randomised)'})."<br />";
print "Content is: ".$query->textarea(-name=>$reftext, -default=>$default, -rows=>5, -override=>1, -cols=>100, -style=>"font-family:arial;width:98%")."<br />";
print "<br /><br />";
if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
$ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#$5\"\>$1 $2 $3 $4 $5\<\/a\>/gi;
if(defined($query->param('Auto-ABML Tag'))){
# $ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<\;tt ref\=\"$5\"\>\;$1 $2 $3 $4 $5\<\;\/tt\>\;/gi;
$textblob.=$ref." ".$default."<br /><br />";
print $query->hidden(-name=>'references',-value=>$x,override=>1);
# print $query->submit(-name=>'Save');
print $query->submit('Randomise');
print $query->submit('Auto-ABML Tag');
print $query->submit('Auto-HTML Tag');
print $query->submit('PDF');
print "</form><br /><br /><br />";
$textblob=~s/\n/\<br \/\>/gi;
print "<p>".$textblob."</p>";
sub output_pdf{
my $refhashref=shift;
my %refhash=%$refhashref;
my $x=0;
my $ref,$textblob;
foreach $ref (sort {$a <=> $b} keys %refhash){
my $reference="reference"."$x";
my $reftext="reftext"."$x";
my $anchor="anchor"."$x";
my $delete="delete"."$x";
my $default=$refhash{$ref};
if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
$ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#$5\"\>$1 $2 $3 $4 $5\<\/a\>/gi;
if(defined($query->param('Auto-ABML Tag'))){
# $ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<\;tt ref\=\"$5\"\>\;$1 $2 $3 $4 $5\<\;\/tt\>\;/gi;
$textblob.=$ref." ".$default."<br /><br />";
$textblob=~s/\n/\<br \/\>/gi;
# print "<p>".$textblob."</p>";
my $output;
my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
# With PDF::API2, font names such as 'traditional' also works
Font => 'Arial',
LineHeight => 10,
Landscape => 0,
print $output;
sub html_header{
sub html_footer{
If you want sample data let me know I'll upload it somewhere
"... and on larger documents the processing ceases at Page 11 of the PDF..."
This seems to be due to a bug in PDF::FromHTML::Template::Container::PageDef. Notice the line:
last if $::x++ > 10;
It means it will never create more than 11 pages. I have filed a bug report