DevLost

A developer lost in the mountains

Assembling pdf files from different sources - part 2

On Assembling pdf files from different sources - part 1 we started to face the challenge of assembling a pdf file from different sources through an automatizated process.

We have seen how to add sections and how to add images and bookmarks to the sections.

When you create sections you can also create headers and footers for these sections, like in the code snippet below where even and odd headers and footers are differentiated:


        void CreateHeaders(string headerText, string footerText)
        {
            Section sec = document.LastSection;

            Paragraph parNumPages = new Paragraph();
            parNumPages.AddText("Page ");
            parNumPages.AddPageField();
            parNumPages.AddText(" of ");
            parNumPages.AddNumPagesField();

            var header = document.LastSection.Headers.Primary;
            var holesImg = header.AddImage("holes.png");
            holesImg.Height = "9cm";
            holesImg.LockAspectRatio = true;
            holesImg.RelativeVertical = RelativeVertical.Page;
            holesImg.RelativeHorizontal = RelativeHorizontal.Page;
            holesImg.Top = "6.5cm";
            holesImg.Left = "8mm";

            var headerPrimary = document.LastSection.Headers.Primary;
            var headerPrimaryImg = headerPrimary.AddImage("headers\\header_right.png");
            headerPrimaryImg.Height = "1.2cm";
            headerPrimaryImg.LockAspectRatio = true;
            headerPrimaryImg.RelativeVertical = RelativeVertical.Page;
            headerPrimaryImg.RelativeHorizontal = RelativeHorizontal.Page;
            headerPrimaryImg.Top = "7mm";
            headerPrimaryImg.Left = "2cm";

            var tfHeaderPrimary = sec.Headers.Primary.Elements.AddTextFrame();
            tfHeaderPrimary.AddParagraph(headerText).Style = "BreadcrumbsRight";
            tfHeaderPrimary.Width = "6cm";
            tfHeaderPrimary.RelativeVertical = RelativeVertical.Page;
            tfHeaderPrimary.RelativeHorizontal = RelativeHorizontal.Page;
            tfHeaderPrimary.Top = "1.3cm";
            tfHeaderPrimary.Left = "2cm";

            var headerEven = document.LastSection.Headers.EvenPage;
            var headerEvenImg = headerEven.AddImage("headers\\header_left.png");
            headerEvenImg.Height = "1.2cm";
            headerEvenImg.LockAspectRatio = true;
            headerEvenImg.RelativeVertical = RelativeVertical.Page;
            headerEvenImg.RelativeHorizontal = RelativeHorizontal.Page;
            headerEvenImg.Top = "7mm";
            headerEvenImg.Left = "2cm";

            var parHeaderEven = sec.Headers.EvenPage.AddTextFrame();
            parHeaderEven.AddParagraph(headerText).Style = "BreadcrumbsLeft";
            parHeaderEven.Width = "6cm";
            parHeaderEven.RelativeVertical = RelativeVertical.Page;
            parHeaderEven.RelativeHorizontal = RelativeHorizontal.Page;
            parHeaderEven.Top = "1.3cm";
            parHeaderEven.Left = "22cm";

            // FOOTERS
            var footerPrimary = document.LastSection.Footers.Primary;
            var footerPrimaryImg = footerPrimary.AddImage("headers\\footer_right.png");
            footerPrimaryImg.Height = "2.5mm";
            footerPrimaryImg.LockAspectRatio = true;
            footerPrimaryImg.RelativeVertical = RelativeVertical.Page;
            footerPrimaryImg.RelativeHorizontal = RelativeHorizontal.Page;
            footerPrimaryImg.Top = "19cm";
            footerPrimaryImg.Left = "2cm";

            var tfFooterPrimary = sec.Footers.Primary.Elements.AddTextFrame();
            tfFooterPrimary.AddParagraph(footerText).Style = "BreadcrumbsRight";
            tfFooterPrimary.Width = "6cm";
            tfFooterPrimary.RelativeVertical = RelativeVertical.Page;
            tfFooterPrimary.RelativeHorizontal = RelativeHorizontal.Page;
            tfFooterPrimary.Top = "18.9cm";
            tfFooterPrimary.Left = "25.5cm";

            var footerEven = document.LastSection.Footers.EvenPage;
            var footerEvenImg = footerEven.AddImage("headers\\footer_left.png");
            footerEvenImg.Height = "2.5mm";
            footerEvenImg.LockAspectRatio = true;
            footerEvenImg.RelativeVertical = RelativeVertical.Page;
            footerEvenImg.RelativeHorizontal = RelativeHorizontal.Page;
            footerEvenImg.Top = "19cm";
            footerEvenImg.Left = "2cm";

            var tfFooterEven = sec.Footers.EvenPage.Elements.AddTextFrame();
            tfFooterEven.AddParagraph(footerText).Style = "BreadcrumbsLeft";
            tfFooterEven.Width = "6cm";
            tfFooterEven.RelativeVertical = RelativeVertical.Page;
            tfFooterEven.RelativeHorizontal = RelativeHorizontal.Page;
            tfFooterEven.Top = "18.9cm";
            tfFooterEven.Left = "20.5cm";
        }

As already explained in part 1, it is important to create room for the external pdf files before they are concatenated to the main pdf; in fact MigraDoc does not include an API to import external files and you have to go through PDFSharp, but in that case you lose the abstraction layer of sections, paragraphs, etc; in other words, when you leverage the pdf importing capability of PDFSharp, then you lose the logic structure of the MigraDoc; so, concatenate pdf files is the last thing to do (almost last.. you have to add page numbers at the end).

So, to create room for external pdf files and let their content to be bookmarked, you can take a look to the trick below, where there are created as many sections as the pdf files listed in a declist.txt file:

        public void PreFillPart6Section()
        {
            Section section = GetSection("Part6Section");
            CreateHeaders("Decisions and Regulations", "");
            string titleSection = "PART 6: DECISIONS AND REGULATIONS";
            var parT = section.AddParagraph(titleSection);
            parT.Format.Font.Color = Colors.Transparent;
            parT.Format.SpaceBefore = "10cm";
            bCreator.AddBookmarkMigraDoc(parT, titleSection, titleSection, typeOfBookmark.part);
            section.AddPageBreak();

            // section for Decisions and Regulations
            AddDefaultSection("DecRegsSection");
            section = GetSection("DecRegsSection");
            // read list of pdf files and save names in a list
            using (StreamReader sr = new StreamReader("dec\\declist.txt"))
            {
                try
                {
                    while (sr.EndOfStream == false)
                    {
                        declist.Add("rfu\\" + sr.ReadLine());
                    }
                }
                catch (Exception ex){}
            }

            // Create blank sections pages
            foreach (string dec in declist)
            {
                for (int i = 1; i <= XPdfForm.FromFile(dec).PageCount; i++)
                {
                    // NB each page contains 2 dec
                    string sectionName = dec + "_" + i.ToString();

                    if (IsOdd(decPagesCounter))
                    {
                        AddDefaultSection(sectionName);
                    }
                    var par = document.LastSection.AddParagraph(sectionName);
                    par.Format.Font.Color = Colors.Transparent;
                    string bookmarktext = dec.Replace(".pdf", "").Replace("dec\\", "");
                    bCreator.AddBookmarkMigraDoc(par, bookmarktext, bookmarktext, typeOfBookmark.part);
                    decPagesCounter += 1;
                }
            }
            pageAdded = decPagesCounter / 2;
        }

Then you can add to the other sections all the data needed, in my case they came mainly form Sharepoint lists and some text from extenal txt files; in this case I leveraged the MigraDoc DDL (MigraDoc Document Description Language) to add formatted text in the following way:

        public void FillIntroSection()
        {
            var document2 = MigraDoc.DocumentObjectModel.IO.DdlReader.DocumentFromFile("intro.mdddl");

            int indexIntroSection = GetSectionIndex("IntroSection");
            Section section = GetSection("IntroSection");

            foreach (DocumentObject el in document2.Sections[0].Elements)
            {
                DocumentObject elCloned = (DocumentObject)el.Clone();
                document.Sections[indexIntroSection].Elements.Add(elCloned);
            }
        }

It is important to note that we imported just the elements included in the first section of the intro.mdddl file and that they need to be cloned before added to the relevant section of the main pdf.

A portion of intro.mdddl file is showed below:

\document
{
  \styles
  {
    Normal
    {
       Font.Name = "Swis721 Lt BT"
       Font.Color = Black
    }
  
    Heading1
    {
      Font.Name = "Swis721 BT"
      Font.Size = 16
      Font.Bold = true
      ParagraphFormat.SpaceBefore = 6
      ParagraphFormat.SpaceAfter = 6
      ParagraphFormat.KeepWithNext = true
      ParagraphFormat.Alignment = Left
      ParagraphFormat.OutlineLevel = BodyText
    }
    
    Heading2
    {
      Font.Size = 11
      ParagraphFormat.SpaceBefore = 12
      ParagraphFormat.SpaceAfter = 6
      ParagraphFormat.KeepWithNext = true
    }
    
    DirChapter : Heading1
    {
        ParagraphFormat.Alignment = Left
        ParagraphFormat.OutlineLevel = BodyText
        Font.Color = Black
        Font.Bold = true    
    }
    
    DirParagraph : Normal
    {
        Font.Bold = false
        Font.Size = 11
        ParagraphFormat.Alignment = Justify
        ParagraphFormat.SpaceAfter = 1
        ParagraphFormat.OutlineLevel = BodyText
    }
  
  }

  \section [
  
  
  PageSetup
    {
        StartingNumber = 1
        Orientation    = Landscape
        TopMargin      = "20mm"
        BottomMargin   = "35mm"
        LeftMargin     = "15mm"
        RightMargin    = "25mm"
        HeaderDistance = "20mm"
        FooterDistance = "15mm"
        MirrorMargins  = false
        OddAndEvenPagesHeaderFooter = true
        //DifferentFirstPageHeaderFooter = true
    }
    ]
  {
    \header
    {
      
    }
    \footer [Format.Alignment = Center]
    {
      
    }

    // -------------------------------------------------------------------------
    \paragraph [Style = "DirChapter"]
    {
      Disclaimer
    }
    \paragraph [Style = "DirParagraph"]
    {
      This document has been prepared for guidance only ...
    }
    
    \paragraph
    
    \paragraph [Style = "DirChapter"]
    {
      About
    }
    \paragraph [Style = "DirParagraph"]
    {
      The Guidelines are prepared to ...
    }
     \paragraph [Style = "DirParagraph"]
    {

As you may see the sintax is very clear and in some way similar to LaTEX language.


Now it's time to add the extenal pdf files; in the code snippet below they are added injecting two pages of the external files in one extisting (and bookmarked) page of the main pdf:

      void AddExternalPdfs()
        {
            // Associate the MigraDoc document with a renderer.
            pdfRenderer.Document = document;

            // Layout and render document to PDF.
            pdfRenderer.RenderDocument();

            // Create a renderer and prepare (=layout) the document.
            var docRenderer = new DocumentRenderer(document);
            docRenderer.PrepareDocument();

            int pageFilled = 0;

            int pagesNumWithoutRfU = docRenderer.FormattedDocument.PageCount - pageAdded;

            var page1 = pdfRenderer.PdfDocument.Pages[pagesNumWithoutRfU];
            XGraphics gfx = null;
            int currentNumpage = pagesNumWithoutRfU;
            foreach (string dec in decList)
            {
                var DECform = XPdfForm.FromFile(dec);

                double extWidth = DECform.PixelWidth;
                double extHeight = DECform.PixelHeight;
                page1.Width = 2 * extWidth;
                page1.Height = extHeight;// -extHeight / 4;

                // draw two pages into one page of the main pdf 
                for (var idx = 0; idx < DECform.PageCount; idx++)
                {
                    // Get a graphics object for page1.
                    gfx = XGraphics.FromPdfPage(page1);

                    // Set the page number (which is one-based).
                    DECform.PageNumber = idx + 1;

                    // Draw the page identified by the page number.
                    if (pageFilled == 0)
                    {
                        XRect decRect = new XRect(0, 0, DECform.PointWidth, DECform.PointHeight);
                        gfx.DrawImage(DECform, decRect);
                        pageFilled += 1;
                    }
                    else
                    {
                        XRect decRect = new XRect(page1.Width / 2, 0, DECform.PointWidth, DECform.PointHeight);
                        gfx.DrawImage(DECform, decRect);
                        pageFilled = 0;
                        currentNumpage += 1;
                        if (currentNumpage == pdfRenderer.PdfDocument.Pages.Count)
                            pdfRenderer.PdfDocument.Pages.Add();
                        page1 = pdfRenderer.PdfDocument.Pages[currentNumpage];
                        extWidth = DECform.PixelWidth;
                        extHeight = DECform.PixelHeight;
                        page1.Width = 2 * extWidth;
                        page1.Height = extHeight;
                    }
                    gfx.Dispose();
                }
            }
        }


If you want add a cover to the main pdf file by importing an external pdf file you can go in this way:

        public void AddCover(int coverNum, int numPage)
        {
            PdfPage page1 = new PdfPage();
            page1.Size = PageSize.A4;
            page1.Orientation = PageOrientation.Landscape;

            StreamReader sr = new StreamReader("covers\\cover0.pdf");

            var coverDocument = PdfReader.Open(sr.BaseStream, PdfDocumentOpenMode.Import);

            var pageCover = coverDocument.Pages[coverNum];

            pdfRenderer.PdfDocument.Pages.RemoveAt(0);

            pdfRenderer.PdfDocument.InsertPage(numPage, pageCover);

            for (int i = 0; i < pdfRenderer.PdfDocument.Pages.Count; i++)
            {
                PdfPage pageDbg = pdfRenderer.PdfDocument.Pages[i];
            }
        }

At the end, if you want to add page numbers, you can leverage PDFSharp capabilities:

        private void AddPageNumbers()
        {
            // Make a font and a brush to draw the page counter.
            XFont font = new XFont("Swis 721 Th BT", 6);
            XBrush brush = XBrushes.Gray;

            // Add the page counter.
            string noPages = pdfRenderer.PdfDocument.Pages.Count.ToString();
            for (int i = 1; i < pdfRenderer.PdfDocument.Pages.Count; ++i)
            {
                // skip page 0 which is the cover
                PdfPage page = pdfRenderer.PdfDocument.Pages[i];

                // Make a layout rectangle.
                XRect layoutRectangle = new XRect(0/*X*/, page.Height - font.Height - 50/*Y*/, page.Width - 25/*Width*/, font.Height/*Height*/);

                XGraphics gfx = XGraphics.FromPdfPage(page);
                gfx.DrawString(
                    "Page " + (i + 1).ToString() + " of " + noPages,
                    font,
                    brush,
                    layoutRectangle,
                    XStringFormats.BottomRight);
                gfx.Dispose();
            }

        }



Assembling pdf files from different sources - part 1

Let’s suppose you want to create a pdf as a result of a mixture of data and information coming from different sources; and, suppose you want to automatize the creation process.


In my case I had to face a similar challenge when I was asked to assemble a pdf based on the following sources of information:

  • Sharepoint lists
  • Existing pdf files
  • Existing images

Features to be provided included:

  • Easy look&feel upgradability  (covers, header’s and footer’s images)
  • Text modifications of some parts without coding
  • Automatic generation of internal bookmarks
  • Automatic generation of the table of content

What to do then?

After some investigation I got just the thing: PDFsharp & Migradoc library.
It is a double-face c# library, double-face in the meaning that it provides a low level library (PDFsharp) to interact with pdf objects and a high level library (MigraDoc) which adds an abstraction layer that allows to think in terms of sections, headers, chapters, paragraphs etc.

The solution to the scenario described above involved the use of both:

  • PDFsharp to concatenate two or more pdf files
  • MigraDoc to create various sections of the document
  • MigraDoc Document Description Language to modify some text formatting included
  • PDFsharp to add page numbers to the whole document
  • MigraDoc to add internal bookmars
  • MigraDoc to create the table of contents

The strategy adopted was the following:

  1. Create all the sections needed for the document included those which will be populated by external pdf files.
  2. Add headers and footers to the sections created above
  3. Create all the bookmarks needed when creating the sections above; this step is important for the internal links to be correctly resolved and the table of contents to be build accordingly. In fact, if you add content with references to parts of document which at the time are not present, you get an unresolved link error. It does not matter if they are filled with content or they are just blank pages at the moment, what matters is that they exist and are bookmarked.
  4. Fill the remaining parts of the document importing Sharepoint data and mdddl files.
  5. Create the table of contents.
  6. Inject the external pdf files in the sections foreseen above
  7. Add page numbers. This action at the end of all the operations to be sure not to exclude any pages.

Implementation

Let me skip the “getting started” part of PDFsharp& MigraDoc, you can find exhaustive tutorials on the product web site, and jump to the interesting parts.

First of all, prepare the MigraDoc document:

void PrepareDocument()
{
    // Create a new MigraDoc document.
    document = new Document();
    document.Info.Title = "GUIDELINES";
    document.Info.Subject = "Guidelines for working group";
    document.Info.Author = "Ab";
    PageSetup pageSetup = document.DefaultPageSetup.Clone();
    // set orientation
    pageSetup.Orientation = Orientation.Landscape;
    Guide_Creator.Classes.Styles.DefineStyles(document);
}


Then create all default sections you need:

public void AddDefaultSection(string sectionName)
        {
            var section = document.AddSection();
            section.Tag = sectionName;
            section.PageSetup.Orientation = Orientation.Landscape;
            section.PageSetup.TopMargin = "25mm";
            section.PageSetup.BottomMargin = "25mm";
            section.PageSetup.LeftMargin = "25mm";
            section.PageSetup.RightMargin = "25mm";
            section.PageSetup.HeaderDistance = "10mm";
            section.PageSetup.FooterDistance = "15mm";
            section.PageSetup.MirrorMargins = true;
            section.PageSetup.OddAndEvenPagesHeaderFooter = true;
        }


For instance, you can add an Introduction" section with a cover image and a bookmark in this way:

        public void AddIntroSection()
        {
            AddDefaultSection("RetroCoverSection");
            document.LastSection.AddParagraph();

            // add a page break
            document.LastSection.AddPageBreak();

            // Add the "intro" section to the document.
            AddDefaultSection("IntroSection");

            // add cover
            var img = document.LastSection.AddImage("covers\\intro.png");
            img.Height = "21cm";
            img.Width = "29.7cm";
            img.RelativeVertical = RelativeVertical.Page;
            img.RelativeHorizontal = RelativeHorizontal.Page;
            document.LastSection.AddPageBreak();

            // Add a bookmark
            string titleSection = "INTRODUCTION";
            var par = document.LastSection.AddParagraph(titleSection);
            par.Format.Font.Color = Colors.Transparent;
            par.Format.SpaceBefore = "1mm";

            bCreator.AddBookmarkMigraDoc(par, titleSection, titleSection, typeOfBookmark.part);
        }
        
        /// <param name="par"> paragraph</param>
        /// <param name="bookmarkName"> name or key name of the paragraph</param>
        /// <param name="bookmarkTOC">name of the paragraph as it appears in the Table Of Content</param>
        /// <param name="bkType">bookmark type</param>
        public void AddBookmarkMigraDoc(Paragraph par, string bookmarkName, string bookmarkTOC, typeOfBookmark bkType)
        {
            if (bkType == typeOfBookmark.part)
            {
                par.Format.OutlineLevel = OutlineLevel.Level1;
            }
            else if (bkType == typeOfBookmark.chapter)
            {
                par.Format.OutlineLevel = OutlineLevel.Level2;
            }
            else
            {
                par.Format.OutlineLevel = OutlineLevel.BodyText;
            }
            par.AddBookmark(bookmarkName);
            // update list of bookmarks; it is needed when adding internal links
            bookmarkList.Add(new Bookmark(bookmarkName, bookmarkTOC, bkType));
        }

Continue on part 2