I’ve created a report pdf from an html page output from our ASP.NET based product but I needed to change a contents list that was rendered as a hyperlink list on the page into a list of section headings and page number since the pdf was primarily for printing. The solution wasn’t immediately obvious but I’ve come up with the following which I was running in a unit test for simple quick prototyping. The key is using the HtmlOptions.AddTags property in ABCpdf which allows areas of the HTML to be retrieve during pdf rendering and modified.
1: using System.Diagnostics;
2: using System.IO;
3: using System.Collections.Generic;
4: using System.Linq;
5: using Microsoft.VisualStudio.TestTools.UnitTesting;
6: using WebSupergoo.ABCpdf8;
7:
8: namespace TestPdfFormFields
9: {
10: [TestClass]
11: public class PdfPrototyping
12: {
13: struct Tag
14: {
15: public string name;
16: public string rectString;
17: public int pagenumber;
18: }
19:
20:
21: [TestMethod]
22: public void testCreatingAPdfWithContentsPageFromHtml()
23: {
24: // Create Websupergoo ABCpdf document and set up the page size
25: var theDoc = new Doc();
26: theDoc.Rect.Inset(100, 100);
27: theDoc.Rect.Top = 700;
28:
29: // The following HtmlOption instructs ABCpdf to interpret the style tags 'abcpdf-tag-visible' and
30: // create a array of the id's take from the html and the rectangles representing the size of the
31: // html element.
32: theDoc.HtmlOptions.AddTags = true;
33:
34: // The basis of this solution therefore is to create placeholders where page numbers will be rendered
35: // in the contents page and back-fill them once we know where the sections/chapters have been rendered
36: // since it won't be known until ABCpdf has chained pages together.
37: var theID = theDoc.AddImageHtml(
38: @"
39: <h1>Contents</h1>
40: <ul>
41: <li><span id='contents1' style='abcpdf-tag-visible: true; width: 20px;'></span>.......First paragraph</li>
42: <li><span id='contents2' style='abcpdf-tag-visible: true; width: 20px;'></span>.......Second paragraph</li>
43: <li><span id='contents3' style='abcpdf-tag-visible: true; width: 20px;'></span>.......Third paragraph</li>
44: </ul>
45: <h1 id='heading1' style='abcpdf-tag-visible: true; page-break-before:always;'>Section One</h1>
46: <h1 id='heading2' style='abcpdf-tag-visible: true; page-break-before:always;'>Section Two</h1>
47: <h1 id='heading3' style='abcpdf-tag-visible: true; page-break-before:always;'>Section Three</h1>
48: ");
49:
50: var tagCache = new List<Tag>();
51: var pagenumber = 1;
52: while (true)
53: {
54: // Fetch all the tags and rectangles and add them to a tagCache for the current theID. Chaining
55: // creates a new theID which will contain more tags to add
56: var tags = theDoc.HtmlOptions.GetTagIDs(theID);
57: var tagRects = theDoc.HtmlOptions.GetTagRects(theID);
58: tagCache.AddRange(tags.Select((t, i) => new Tag {name = t, pagenumber = pagenumber, rectString = tagRects[i].String}));
59:
60: if (!theDoc.Chainable(theID))
61: break;
62:
63: theDoc.Page = theDoc.AddPage();
64: theID = theDoc.AddImageToChain(theID);
65: pagenumber++;
66: }
67:
68: // Now we have a cache of all contents and heading tags we can iterate through the
69: // contents tags, find the smallest page number of the corresponding section then
70: // render the page number into the contents.
71: theDoc.HPos = 1.0; // Right justify
72: theDoc.VPos = 0.5; // Centre vertically
73: theDoc.FontSize = 8;
74: foreach (var tag in tagCache.Where(t => t.name.StartsWith("contents")))
75: {
76: var paragraphName = "heading" + tag.name.Substring("contents".Length);
77: var paragraphPageNumber = tagCache.Where(t => t.name == paragraphName).Select(t => t.pagenumber).Min();
78:
79: theDoc.PageNumber = tag.pagenumber;
80: theDoc.Rect.String = tag.rectString;
81: theDoc.AddText(paragraphPageNumber.ToString());
82: }
83:
84: // Now iterate through all the pages, add page numbers and flatten the layers.
85: theDoc.Rect.String = "100 70 500 150";
86: var pageCount = theDoc.PageCount;
87: for (var pageNumber = 1; pageNumber <= pageCount; pageNumber++)
88: {
89: theDoc.PageNumber = pageNumber;
90: theDoc.AddText("Page " + pageNumber + " of " + pageCount);
91: theDoc.Flatten();
92: }
93:
94: const string testFilename = @"c:\temp\HtmlOptionsGetTagRects.pdf";
95: if (File.Exists(testFilename))
96: File.Delete(testFilename);
97: theDoc.Save(testFilename);
98: Process.Start(testFilename);
99: }
100: }
101: }
