iText pdf library
Website search

pdfHTML: Accessible PDF Creation

A simple example showcasing the creation of a Tagged PDF with pdfHTML, and the configuration to include the metadata, necessary to obtain an Accessible PDF.

CreateAccessiblePDF.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2020 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfViewerPreferences;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.samples.sandbox.pdfhtml.headertagging.AccessibilityTagWorkerFactory;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
 
public class CreateAccessiblePDF {
    public static final String SRC = "./src/main/resources/pdfhtml/AccessiblePDF/";
    public static final String DEST = "./target/sandbox/pdfhtml/Accessibility.pdf";
 
    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        String htmlSource = SRC + "Accessibility.html";
 
        new CreateAccessiblePDF().manipulatePdf(htmlSource, DEST);
    }
 
    public void manipulatePdf(String src, String dest) throws IOException {
        FileOutputStream outputStream = new FileOutputStream(dest);
        WriterProperties writerProperties = new WriterProperties();
        writerProperties.addXmpMetadata();
 
        PdfWriter pdfWriter = new PdfWriter(outputStream, writerProperties);
        PdfDocument pdfDoc = new PdfDocument(pdfWriter);
        pdfDoc.getCatalog().setLang(new PdfString("en-US"));
 
        pdfDoc.setTagged();
        pdfDoc.getCatalog().setViewerPreferences(new PdfViewerPreferences().setDisplayDocTitle(true));
 
        PdfDocumentInfo pdfMetaData = pdfDoc.getDocumentInfo();
        pdfMetaData.setAuthor("Samuel Huylebroeck");
        pdfMetaData.addCreationDate();
        pdfMetaData.getProducer();
        pdfMetaData.setCreator("iText Software");
        pdfMetaData.setKeywords("example, accessibility");
        pdfMetaData.setSubject("PDF accessibility");
        // Title is derived from html
 
        // pdf conversion
        FontProvider fontProvider = new FontProvider();
        fontProvider.addStandardPdfFonts();
        // The noto-nashk font file (.ttf extension) is placed in the resources
        fontProvider.addDirectory(SRC);
 
        ConverterProperties props = new ConverterProperties();
        props.setFontProvider(fontProvider);
        // Base URI is required to resolve the path to source files
        props.setBaseUri(SRC);
 
        // Setup custom tagworker factory for better tagging of headers
        DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
        props.setTagWorkerFactory(tagWorkerFactory);
 
        HtmlConverter.convertToPdf(new FileInputStream(src), pdfDoc, props);
 
        pdfDoc.close();
    }
}
AccessibilityTagWorkerFactory.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2020 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;
 
import com.itextpdf.html2pdf.attach.ITagWorker;
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.styledxmlparser.node.IElementNode;
 
public class AccessibilityTagWorkerFactory extends DefaultTagWorkerFactory {
 
    @Override
    public ITagWorker getCustomTagWorker(IElementNode tag, ProcessorContext context) {
        switch (tag.name()) {
            case "h1":
                return new CustomHTagWorker(tag, context, 1);
            case "h2":
                return new CustomHTagWorker(tag, context, 2);
            case "h3":
                return new CustomHTagWorker(tag, context, 3);
            case "h4":
                return new CustomHTagWorker(tag, context, 4);
            case "h5":
                return new CustomHTagWorker(tag, context, 5);
            case "h6":
                return new CustomHTagWorker(tag, context, 6);
            case "th":
                return new CustomThTagWorker(tag, context);
            default:
                return null;
        }
    }
}
CustomHTagWorker.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2020 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;
 
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.tags.DivTagWorker;
import com.itextpdf.layout.IPropertyContainer;
import com.itextpdf.layout.element.Div;
import com.itextpdf.styledxmlparser.node.IElementNode;
 
 
public class CustomHTagWorker extends DivTagWorker {
    private int i;
 
    public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) {
        super(element, context);
        this.i = i;
    }
 
    @Override
    public IPropertyContainer getElementResult() {
        Div div = (Div) super.getElementResult();
        div.getAccessibilityProperties().setRole("H" + i);
        return super.getElementResult();
    }
}
CreateAccessiblePDF.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2020 iText Group NV
Authors: iText Software.
 
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
 
using System.IO;
using iText.Html2pdf;
using iText.Html2pdf.Attach.Impl;
using iText.Kernel.Pdf;
using iText.Layout.Font;
using iText.Samples.Sandbox.Pdfhtml.Headertagging;
 
namespace iText.Samples.Sandbox.Pdfhtml
{
    public class CreateAccessiblePDF
    {
        public static readonly string SRC = "../../../resources/pdfhtml/AccessiblePDF/";
        public static readonly string DEST = "results/sandbox/pdfhtml/Accessibility.pdf";
 
        public static void Main(string[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();
            string htmlSource = SRC + "Accessibility.html";
 
            new CreateAccessiblePDF().ManipulatePdf(htmlSource, DEST);
        }
 
        public void ManipulatePdf(string src, string dest)
        {
            WriterProperties writerProperties = new WriterProperties();
            writerProperties.AddXmpMetadata();
 
            PdfWriter pdfWriter = new PdfWriter(dest, writerProperties);
            PdfDocument pdfDoc = new PdfDocument(pdfWriter);
            pdfDoc.GetCatalog().SetLang(new PdfString("en-US"));
 
            pdfDoc.SetTagged();
            pdfDoc.GetCatalog().SetViewerPreferences(new PdfViewerPreferences().SetDisplayDocTitle(true));
 
            PdfDocumentInfo pdfMetaData = pdfDoc.GetDocumentInfo();
            pdfMetaData.SetAuthor("Samuel Huylebroeck");
            pdfMetaData.AddCreationDate();
            pdfMetaData.GetProducer();
            pdfMetaData.SetCreator("iText Software");
            pdfMetaData.SetKeywords("example, accessibility");
            pdfMetaData.SetSubject("PDF accessibility");
 
            // Title is derived from html
 
            // pdf conversion
            ConverterProperties props = new ConverterProperties();
            FontProvider fontProvider = new FontProvider();
            fontProvider.AddStandardPdfFonts();
            fontProvider.AddDirectory(SRC);
 
            // The noto-nashk font file (.ttf extension) is placed in the resources
            props.SetFontProvider(fontProvider);
            // Base URI is required to resolve the path to source files
            props.SetBaseUri(SRC);
 
            // Setup custom tagworker factory for better tagging of headers
            DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
            props.SetTagWorkerFactory(tagWorkerFactory);
            
            HtmlConverter.ConvertToPdf(new FileStream(src, FileMode.Open), pdfDoc, props);
 
            pdfDoc.Close();
        }
    }
}
AccessibilityTagWorkerFactory.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2020 iText Group NV
Authors: iText Software.
 
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
 
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl;
using iText.StyledXmlParser.Node;
 
namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
    public class AccessibilityTagWorkerFactory : DefaultTagWorkerFactory
    {
        public override ITagWorker GetCustomTagWorker(IElementNode tag, ProcessorContext context)
        {
            switch (tag.Name())
            {
                case "h1":
                    return new CustomHTagWorker(tag, context, 1);
                case "h2":
                    return new CustomHTagWorker(tag, context, 2);
                case "h3":
                    return new CustomHTagWorker(tag, context, 3);
                case "h4":
                    return new CustomHTagWorker(tag, context, 4);
                case "h5":
                    return new CustomHTagWorker(tag, context, 5);
                case "h6":
                    return new CustomHTagWorker(tag, context, 6);
                case "th":
                    return new CustomThTagWorker(tag, context);
                default:
                    return null;
            }
        }
    }
}
CustomHTagWorker.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2020 iText Group NV
Authors: iText Software.
 
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
 
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl.Tags;
using iText.Layout;
using iText.Layout.Element;
using iText.StyledXmlParser.Node;
 
namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
    public class CustomHTagWorker : DivTagWorker
    {
        private int i;
        public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) : base(element, context)
        {
            this.i = i;
        }
 
        public override IPropertyContainer GetElementResult()
        {
            Div div = (Div) base.GetElementResult();
            div.GetAccessibilityProperties().SetRole("H" + i);
            return base.GetElementResult();
        }
    }
}


Contact

Still have questions? 

We're happy to answer your questions. Reach out to us and we'll get back to you shortly.

Contact us
Stay updated

Join 11,000+ subscribers and become an iText PDF expert by staying up to date with our new products, updates, tips, technical solutions and happenings.

Subscribe Now