Philip Hendry's Blog

December 23, 2011

Create a contents page with page numbers from html input using Websupergoo Abcpdf

Filed under: ASP.NET — philiphendry @ 9:25 am

I’ve created a report pdf from an html page output from our ASP.NET based product but I needed to change a contents list that was rendered as a hyperlink list on the page into a list of section headings and page number since the pdf was primarily for printing. The solution wasn’t immediately obvious but I’ve come up with the following which I was running in a unit test for simple quick prototyping. The key is using the HtmlOptions.AddTags property in ABCpdf which allows areas of the HTML to be retrieve during pdf rendering and modified.

 

   1: using System.Diagnostics;

   2: using System.IO;

   3: using System.Collections.Generic;

   4: using System.Linq;

   5: using Microsoft.VisualStudio.TestTools.UnitTesting;

   6: using WebSupergoo.ABCpdf8;

   7:  

   8: namespace TestPdfFormFields

   9: {

  10:    [TestClass]

  11:    public class PdfPrototyping

  12:    {

  13:       struct Tag

  14:       {

  15:          public string name;

  16:          public string rectString;

  17:          public int pagenumber;

  18:       }

  19:  

  20:          

  21:       [TestMethod]

  22:       public void testCreatingAPdfWithContentsPageFromHtml()

  23:       {

  24:          // Create Websupergoo ABCpdf document and set up the page size

  25:          var theDoc = new Doc();

  26:          theDoc.Rect.Inset(100, 100);

  27:          theDoc.Rect.Top = 700;

  28:  

  29:          // The following HtmlOption instructs ABCpdf to interpret the style tags 'abcpdf-tag-visible' and

  30:          // create a array of the id's take from the html and the rectangles representing the size of the 

  31:          // html element. 

  32:          theDoc.HtmlOptions.AddTags = true;

  33:  

  34:          // The basis of this solution therefore is to create placeholders where page numbers will be rendered

  35:          // in the contents page and back-fill them once we know where the sections/chapters have been rendered

  36:          // since it won't be known until ABCpdf has chained pages together.

  37:          var theID = theDoc.AddImageHtml(

  38:                   @"

  39:                      <h1>Contents</h1>

  40:                      <ul>

  41:                         <li><span id='contents1' style='abcpdf-tag-visible: true; width: 20px;'></span>.......First paragraph</li>

  42:                         <li><span id='contents2' style='abcpdf-tag-visible: true; width: 20px;'></span>.......Second paragraph</li>

  43:                         <li><span id='contents3' style='abcpdf-tag-visible: true; width: 20px;'></span>.......Third paragraph</li>

  44:                      </ul>

  45:                      <h1 id='heading1' style='abcpdf-tag-visible: true; page-break-before:always;'>Section One</h1>

  46:                      <h1 id='heading2' style='abcpdf-tag-visible: true; page-break-before:always;'>Section Two</h1>

  47:                      <h1 id='heading3' style='abcpdf-tag-visible: true; page-break-before:always;'>Section Three</h1>

  48:                   ");

  49:  

  50:          var tagCache = new List<Tag>();

  51:          var pagenumber = 1;

  52:          while (true)

  53:          {

  54:             // Fetch all the tags and rectangles and add them to a tagCache for the current theID. Chaining

  55:             // creates a new theID which will contain more tags to add

  56:             var tags = theDoc.HtmlOptions.GetTagIDs(theID);

  57:             var tagRects = theDoc.HtmlOptions.GetTagRects(theID);

  58:             tagCache.AddRange(tags.Select((t, i) => new Tag {name = t, pagenumber = pagenumber, rectString = tagRects[i].String}));

  59:  

  60:             if (!theDoc.Chainable(theID))

  61:                break;

  62:  

  63:             theDoc.Page = theDoc.AddPage();

  64:             theID = theDoc.AddImageToChain(theID);

  65:             pagenumber++;

  66:          }

  67:  

  68:          // Now we have a cache of all contents and heading tags we can iterate through the

  69:          // contents tags, find the smallest page number of the corresponding section then

  70:          // render the page number into the contents.

  71:          theDoc.HPos = 1.0;   // Right justify

  72:          theDoc.VPos = 0.5;   // Centre vertically

  73:          theDoc.FontSize = 8;

  74:          foreach (var tag in tagCache.Where(t => t.name.StartsWith("contents")))

  75:          {

  76:             var paragraphName = "heading" + tag.name.Substring("contents".Length);

  77:             var paragraphPageNumber = tagCache.Where(t => t.name == paragraphName).Select(t => t.pagenumber).Min();

  78:  

  79:             theDoc.PageNumber = tag.pagenumber;

  80:             theDoc.Rect.String = tag.rectString;

  81:             theDoc.AddText(paragraphPageNumber.ToString());

  82:          }

  83:  

  84:          // Now iterate through all the pages, add page numbers and flatten the layers.

  85:          theDoc.Rect.String = "100 70 500 150";

  86:          var pageCount = theDoc.PageCount;

  87:          for (var pageNumber = 1; pageNumber <= pageCount; pageNumber++)

  88:          {

  89:             theDoc.PageNumber = pageNumber;

  90:             theDoc.AddText("Page " + pageNumber + " of " + pageCount);

  91:             theDoc.Flatten();

  92:          }

  93:  

  94:          const string testFilename = @"c:\temp\HtmlOptionsGetTagRects.pdf";

  95:          if (File.Exists(testFilename))

  96:             File.Delete(testFilename);

  97:          theDoc.Save(testFilename);

  98:          Process.Start(testFilename);

  99:       }

 100:    }

 101: }

Advertisement

Leave a Comment »

No comments yet.

RSS feed for comments on this post. TrackBack URI

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s

Theme: Shocking Blue Green. Blog at WordPress.com.

Follow

Get every new post delivered to your Inbox.