XML Worker examples

This is a code example of iText PDF, discover more.

24th October 2015
admin-marketing

Switch code for this example

D00_XHTML.java
  1. package sandbox.xmlworker;
  2.  
  3. import java.io.File;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6.  
  7. import org.jsoup.Jsoup;
  8.  
  9. /**
  10.  * Converts an HTML file into an XTHML file.
  11.  */
  12. public class D00_XHTML {
  13.  
  14.     /** The name of a HTML file */
  15.     public static final String WALDEN = "resources/html/walden.html";
  16.     /** The name of a HTML file */
  17.     public static final String THOREAU = "resources/html/thoreau.html";
  18.    
  19.     /** The main method. */
  20.     public static void main(String[] args) throws IOException {
  21.         tidyUp(WALDEN);
  22.         tidyUp(THOREAU);
  23.     }
  24.    
  25.     public static void tidyUp(String path) throws IOException {
  26.         File html = new File(path);
  27.         byte[] xhtml = Jsoup.parse(html, "US-ASCII").html().getBytes();
  28.         File dir = new File("results/xml");
  29.         dir.mkdirs();
  30.         FileOutputStream fos = new FileOutputStream(new File(dir, html.getName()));
  31.         fos.write(xhtml);
  32.         fos.close();
  33.     }
  34. }
D01_CustomElementHandler.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Element;
  4. import com.itextpdf.tool.xml.ElementHandler;
  5. import com.itextpdf.tool.xml.Writable;
  6. import com.itextpdf.tool.xml.XMLWorkerHelper;
  7. import com.itextpdf.tool.xml.pipeline.WritableElement;
  8. import sandbox.WrapToTest;
  9.  
  10. import java.io.FileInputStream;
  11. import java.io.IOException;
  12. import java.util.List;
  13.  
  14. //without @WrapToTest annotation, because this test only illustrated custom element handler
  15. public class D01_CustomElementHandler {
  16.  
  17.     public static final String SRC = "resources/xml/walden.html";
  18.  
  19.     public static void main(String[] args) throws IOException {
  20.         XMLWorkerHelper.getInstance().parseXHtml(new ElementHandler() {
  21.             public void add(final Writable w) {
  22.                 if (w instanceof WritableElement) {
  23.                     List elements = ((WritableElement) w).elements();
  24.                     for (Element element : elements) {
  25.                         System.out.println(element.getClass().getName());
  26.                     }
  27.                 }
  28.  
  29.             }
  30.         }, new FileInputStream(SRC), null);
  31.     }
  32.  
  33.  
  34.  
  35. }
D02_ParseHtml.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorkerHelper;
  7.  
  8. import java.io.File;
  9. import java.io.FileInputStream;
  10. import java.io.FileOutputStream;
  11. import java.io.IOException;
  12.  
  13. import sandbox.WrapToTest;
  14.  
  15. @WrapToTest
  16. public class D02_ParseHtml {
  17.  
  18.     public static final String HTML = "resources/xml/walden.html";
  19.     public static final String DEST = "results/xmlworker/walden1.pdf";
  20.    
  21.     /**
  22.      * Html to pdf conversion example.
  23.      * @param file
  24.      * @throws IOException
  25.      * @throws DocumentException
  26.      */
  27.     public void createPdf(String file) throws IOException, DocumentException {
  28.         // step 1
  29.         Document document = new Document();
  30.         // step 2
  31.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  32.         // step 3
  33.         document.open();
  34.         // step 4
  35.         XMLWorkerHelper.getInstance().parseXHtml(writer, document,
  36.                 new FileInputStream(HTML));
  37.         // step 5
  38.         document.close();
  39.     }
  40.    
  41.     /**
  42.      * Main method
  43.      */
  44.     public static void main(String[] args) throws IOException, DocumentException {
  45.         File file = new File(DEST);
  46.         file.getParentFile().mkdirs();
  47.         new D02_ParseHtml().createPdf(DEST);
  48.     }
  49. }
D03_ParseHtmlPipelines.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerHelper;
  8. import com.itextpdf.tool.xml.html.Tags;
  9. import com.itextpdf.tool.xml.parser.XMLParser;
  10. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  11. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  12. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  13. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  14. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  15.  
  16. import java.io.File;
  17. import java.io.FileInputStream;
  18. import java.io.FileOutputStream;
  19. import java.io.IOException;
  20.  
  21. import sandbox.WrapToTest;
  22.  
  23. @WrapToTest
  24. public class D03_ParseHtmlPipelines {
  25.  
  26.     public static final String HTML = "resources/xml/walden.html";
  27.     public static final String DEST = "results/xmlworker/walden2.pdf";
  28.  
  29.     public void createPdf(String file) throws IOException, DocumentException {
  30.         // step 1
  31.         Document document = new Document();
  32.        
  33.         // step 2
  34.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  35.         writer.setInitialLeading(12.5f);
  36.        
  37.         // step 3
  38.         document.open();
  39.        
  40.         // step 4
  41.        
  42.         // CSS
  43.         CSSResolver cssResolver =
  44.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(false);
  45.        
  46.         // HTML
  47.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  48.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  49.         htmlContext.autoBookmark(false);
  50.        
  51.         // Pipelines
  52.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  53.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  54.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  55.        
  56.         // XML Worker
  57.         XMLWorker worker = new XMLWorker(css, true);
  58.         XMLParser p = new XMLParser(worker);
  59.         p.parse(new FileInputStream(HTML));
  60.        
  61.         // step 5
  62.         document.close();
  63.     }
  64.    
  65.     /**
  66.      * Main method
  67.      */
  68.     public static void main(String[] args) throws IOException, DocumentException {
  69.         File file = new File(DEST);
  70.         file.getParentFile().mkdirs();
  71.         new D03_ParseHtmlPipelines().createPdf(DEST);
  72.     }
  73. }
D04_ParseHtmlCss.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerHelper;
  8. import com.itextpdf.tool.xml.css.CssFile;
  9. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  10. import com.itextpdf.tool.xml.html.Tags;
  11. import com.itextpdf.tool.xml.parser.XMLParser;
  12. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  13. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  14. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  15. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  16. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  17.  
  18. import java.io.File;
  19. import java.io.FileInputStream;
  20. import java.io.FileOutputStream;
  21. import java.io.IOException;
  22.  
  23. import sandbox.WrapToTest;
  24.  
  25. @WrapToTest
  26. public class D04_ParseHtmlCss {
  27.  
  28.     public static final String HTML = "resources/xml/walden.html";
  29.     public static final String CSS = "resources/xml/walden.css";
  30.     public static final String DEST = "results/xmlworker/walden3.pdf";
  31.  
  32.     public void createPdf(String file) throws IOException, DocumentException {
  33.         // step 1
  34.         Document document = new Document();
  35.        
  36.         // step 2
  37.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  38.         writer.setInitialLeading(12.5f);
  39.        
  40.         // step 3
  41.         document.open();
  42.        
  43.         // step 4
  44.        
  45.         // CSS
  46.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  47.         CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
  48.         cssResolver.addCss(cssFile);
  49.        
  50.         // HTML
  51.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  52.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  53.        
  54.         // Pipelines
  55.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  56.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  57.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  58.        
  59.         // XML Worker
  60.         XMLWorker worker = new XMLWorker(css, true);
  61.         XMLParser p = new XMLParser(worker);
  62.         p.parse(new FileInputStream(HTML));
  63.        
  64.         // step 5
  65.         document.close();
  66.     }
  67.    
  68.     /**
  69.      * Main method
  70.      */
  71.     public static void main(String[] args) throws IOException, DocumentException {
  72.         File file = new File(DEST);
  73.         file.getParentFile().mkdirs();
  74.         new D04_ParseHtmlCss().createPdf(DEST);
  75.     }
  76. }
D05_ParseHtmlCssLink.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerHelper;
  8. import com.itextpdf.tool.xml.html.Tags;
  9. import com.itextpdf.tool.xml.net.FileRetrieve;
  10. import com.itextpdf.tool.xml.net.FileRetrieveImpl;
  11. import com.itextpdf.tool.xml.parser.XMLParser;
  12. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  13. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  14. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  15. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  16. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  17.  
  18. import java.io.File;
  19. import java.io.FileInputStream;
  20. import java.io.FileOutputStream;
  21. import java.io.IOException;
  22.  
  23. import sandbox.WrapToTest;
  24.  
  25. @WrapToTest
  26. public class D05_ParseHtmlCssLink {
  27.  
  28.     public static final String HTML = "resources/xml/test.html";
  29.     public static final String CSS_DIR = "resources/xml/";
  30.     public static final String DEST = "results/xmlworker/test.pdf";
  31.    
  32.     /**
  33.      * @param file
  34.      * @throws IOException
  35.      * @throws DocumentException
  36.      */
  37.     public void createPdf(String file) throws IOException, DocumentException {
  38.         // step 1
  39.         Document document = new Document();
  40.         // step 2
  41.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  42.         writer.setInitialLeading(12.5f);
  43.         // step 3
  44.         document.open();
  45.         // step 4
  46.        
  47.         // CSS
  48.         CSSResolver cssResolver =
  49.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(false);
  50.         FileRetrieve retrieve = new FileRetrieveImpl(CSS_DIR);
  51.         cssResolver.setFileRetrieve(retrieve);
  52.        
  53.         // HTML
  54.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  55.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  56.         htmlContext.autoBookmark(false);
  57.        
  58.         // Pipelines
  59.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  60.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  61.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  62.        
  63.         // XML Worker
  64.         XMLWorker worker = new XMLWorker(css, true);
  65.         XMLParser p = new XMLParser(worker);
  66.         p.parse(new FileInputStream(HTML));
  67.        
  68.         // step 5
  69.         document.close();
  70.     }
  71.    
  72.     /**
  73.      * Main method
  74.      */
  75.     public static void main(String[] args) throws IOException, DocumentException {
  76.         File file = new File(DEST);
  77.         file.getParentFile().mkdirs();
  78.         new D05_ParseHtmlCssLink().createPdf(DEST);
  79.     }
  80. }
D06_ParseHtmlFonts.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  8. import com.itextpdf.tool.xml.XMLWorkerHelper;
  9. import com.itextpdf.tool.xml.css.CssFile;
  10. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  11. import com.itextpdf.tool.xml.html.CssAppliers;
  12. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  13. import com.itextpdf.tool.xml.html.Tags;
  14. import com.itextpdf.tool.xml.parser.XMLParser;
  15. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  16. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  17. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  18. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  19. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  20.  
  21. import java.io.File;
  22. import java.io.FileInputStream;
  23. import java.io.FileOutputStream;
  24. import java.io.IOException;
  25.  
  26. import sandbox.WrapToTest;
  27.  
  28. @WrapToTest
  29. public class D06_ParseHtmlFonts {
  30.  
  31.     public static final String HTML = "resources/xml/walden.html";
  32.     public static final String CSS = "resources/xml/walden.css";
  33.     public static final String DEST = "results/xmlworker/walden4.pdf";
  34.  
  35.     public void createPdf(String file) throws IOException, DocumentException {
  36.         // step 1
  37.         Document document = new Document();
  38.        
  39.         // step 2
  40.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  41.         writer.setInitialLeading(12.5f);
  42.        
  43.         // step 3
  44.         document.open();
  45.        
  46.         // step 4
  47.  
  48.         // CSS
  49.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  50.         CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
  51.         cssResolver.addCss(cssFile);
  52.        
  53.         // HTML
  54.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  55.         fontProvider.register("resources/fonts/Cardo-Regular.ttf");
  56.         fontProvider.register("resources/fonts/Cardo-Bold.ttf");
  57.         fontProvider.register("resources/fonts/Cardo-Italic.ttf");
  58.         fontProvider.addFontSubstitute("lowagie", "cardo");
  59.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  60.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  61.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  62.        
  63.         // Pipelines
  64.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  65.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  66.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  67.        
  68.         // XML Worker
  69.         XMLWorker worker = new XMLWorker(css, true);
  70.         XMLParser p = new XMLParser(worker);
  71.         p.parse(new FileInputStream(HTML));
  72.        
  73.         // step 5
  74.         document.close();
  75.     }
  76.    
  77.     /**
  78.      * Main method
  79.      */
  80.     public static void main(String[] args) throws IOException, DocumentException {
  81.         File file = new File(DEST);
  82.         file.getParentFile().mkdirs();
  83.         new D06_ParseHtmlFonts().createPdf(DEST);
  84.     }
  85. }
D07_ParseHtmlAsian.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorkerHelper;
  7.  
  8. import java.io.File;
  9. import java.io.FileInputStream;
  10. import java.io.FileOutputStream;
  11. import java.io.IOException;
  12. import java.nio.charset.Charset;
  13.  
  14. public class D07_ParseHtmlAsian {
  15.  
  16.     public static final String HTML = "resources/xml/hero.html";
  17.     public static final String DEST = "results/xmlworker/hero.pdf";
  18.  
  19.     /**
  20.      * Creates a PDF with the words "Hello World"
  21.      * @param file
  22.      * @throws IOException
  23.      * @throws DocumentException
  24.      */
  25.     public void createPdf(String file) throws IOException, DocumentException {
  26.         // step 1
  27.         Document document = new Document();
  28.         // step 2
  29.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  30.         // step 3
  31.         document.open();
  32.         // step 4
  33.         XMLWorkerHelper.getInstance().parseXHtml(writer, document,
  34.                 new FileInputStream(HTML), Charset.forName("UTF-8"));
  35.         // step 5
  36.         document.close();
  37.     }
  38.    
  39.     /**
  40.      * Main method
  41.      */
  42.     public static void main(String[] args) throws IOException, DocumentException {
  43.         File file = new File(DEST);
  44.         file.getParentFile().mkdirs();
  45.         new D07_ParseHtmlAsian().createPdf(DEST);
  46.     }
  47. }
D07bis_ParseHtmlAsian.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  8. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  9. import com.itextpdf.tool.xml.html.CssAppliers;
  10. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  11. import com.itextpdf.tool.xml.html.Tags;
  12. import com.itextpdf.tool.xml.parser.XMLParser;
  13. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  14. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  15. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  16. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  17. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  18.  
  19. import java.io.File;
  20. import java.io.FileInputStream;
  21. import java.io.FileOutputStream;
  22. import java.io.IOException;
  23. import java.nio.charset.Charset;
  24.  
  25. import sandbox.WrapToTest;
  26.  
  27. @WrapToTest
  28. public class D07bis_ParseHtmlAsian {
  29.  
  30.     public static final String HTML = "resources/xml/hero.html";
  31.     public static final String DEST = "results/xmlworker/asian.pdf";
  32.  
  33.     /**
  34.      * Creates a PDF with the words "Hello World"
  35.      * @param file
  36.      * @throws IOException
  37.      * @throws DocumentException
  38.      */
  39.     public void createPdf(String file) throws IOException, DocumentException {
  40.         // step 1
  41.         Document document = new Document();
  42.         // step 2
  43.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  44.         // step 3
  45.         document.open();
  46.         // step 4
  47.         // CSS
  48.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  49.        
  50.         // HTML
  51.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  52.         fontProvider.register("resources/fonts/cfmingeb.ttf", "MS Mincho");
  53.         fontProvider.register("resources/fonts/PT_Serif-Web-Regular.ttf", "Serif");
  54.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  55.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  56.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  57.        
  58.         // Pipelines
  59.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  60.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  61.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  62.        
  63.         // XML Worker
  64.         XMLWorker worker = new XMLWorker(css, true);
  65.         XMLParser p = new XMLParser(worker);
  66.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));
  67.         // step 5
  68.         document.close();
  69.     }
  70.    
  71.     /**
  72.      * Main method
  73.      */
  74.     public static void main(String[] args) throws IOException, DocumentException {
  75.         File file = new File(DEST);
  76.         file.getParentFile().mkdirs();
  77.         new D07bis_ParseHtmlAsian().createPdf(DEST);
  78.     }
  79. }
D07tris_ParseHtmlAsian.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  8. import com.itextpdf.tool.xml.XMLWorkerHelper;
  9. import com.itextpdf.tool.xml.css.CssFile;
  10. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  11. import com.itextpdf.tool.xml.html.CssAppliers;
  12. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  13. import com.itextpdf.tool.xml.html.Tags;
  14. import com.itextpdf.tool.xml.parser.XMLParser;
  15. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  16. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  17. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  18. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  19. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  20.  
  21. import java.io.ByteArrayInputStream;
  22. import java.io.File;
  23. import java.io.FileInputStream;
  24. import java.io.FileOutputStream;
  25. import java.io.IOException;
  26. import java.nio.charset.Charset;
  27.  
  28. import sandbox.WrapToTest;
  29.  
  30. @WrapToTest
  31. public class D07tris_ParseHtmlAsian {
  32.  
  33.     public static final String HTML = "resources/xml/hero2.html";
  34.     public static final String DEST = "results/xmlworker/asian2.pdf";
  35.  
  36.     /**
  37.      * Creates a PDF with the words "Hello World"
  38.      * @param file
  39.      * @throws IOException
  40.      * @throws DocumentException
  41.      */
  42.     public void createPdf(String file) throws IOException, DocumentException {
  43.         // step 1
  44.         Document document = new Document();
  45.         // step 2
  46.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  47.         // step 3
  48.         document.open();
  49.         // step 4
  50.         // CSS
  51.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  52.         CssFile cssFile = XMLWorkerHelper.getCSS(new ByteArrayInputStream("body {font-family:tsc fming s tt}".getBytes()));
  53.         cssResolver.addCss(cssFile);
  54.        
  55.         // HTML
  56.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  57.         fontProvider.register("resources/fonts/cfmingeb.ttf");
  58.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  59.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  60.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  61.        
  62.         // Pipelines
  63.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  64.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  65.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  66.        
  67.         // XML Worker
  68.         XMLWorker worker = new XMLWorker(css, true);
  69.         XMLParser p = new XMLParser(worker);
  70.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));
  71.         // step 5
  72.         document.close();
  73.     }
  74.    
  75.     /**
  76.      * Main method
  77.      */
  78.     public static void main(String[] args) throws IOException, DocumentException {
  79.         File file = new File(DEST);
  80.         file.getParentFile().mkdirs();
  81.         new D07tris_ParseHtmlAsian().createPdf(DEST);
  82.     }
  83. }
D08_ParseHtmlImagesLinksOops.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorkerHelper;
  7.  
  8. import java.io.File;
  9. import java.io.FileInputStream;
  10. import java.io.FileOutputStream;
  11. import java.io.IOException;
  12. import java.nio.charset.Charset;
  13.  
  14. import sandbox.WrapToTest;
  15.  
  16. @WrapToTest
  17. public class D08_ParseHtmlImagesLinksOops {
  18.  
  19.     public static final String HTML = "resources/xml/thoreau.html";
  20.     public static final String DEST = "results/xmlworker/thoreau_oops.pdf";
  21.  
  22.     /**
  23.      * Creates a PDF with the words "Hello World"
  24.      * @param file
  25.      * @throws IOException
  26.      * @throws DocumentException
  27.      */
  28.     public void createPdf(String file) throws IOException, DocumentException {
  29.         // step 1
  30.         Document document = new Document();
  31.         // step 2
  32.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  33.         // step 3
  34.         document.open();
  35.         // step 4
  36.         XMLWorkerHelper.getInstance().parseXHtml(writer, document,
  37.                 new FileInputStream(HTML), Charset.forName("UTF-8"));
  38.         // step 5
  39.         document.close();
  40.     }
  41.  
  42.     /**
  43.      * Main method
  44.      */
  45.     public static void main(String[] args) throws IOException, DocumentException {
  46.         File file = new File(DEST);
  47.         file.getParentFile().mkdirs();
  48.         new D08_ParseHtmlImagesLinksOops().createPdf(DEST);
  49.     }
  50. }
D09_ParseHtmlImagesLinks.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.XMLWorker;
  7. import com.itextpdf.tool.xml.XMLWorkerHelper;
  8. import com.itextpdf.tool.xml.html.Tags;
  9. import com.itextpdf.tool.xml.parser.XMLParser;
  10. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  11. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  12. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  13. import com.itextpdf.tool.xml.pipeline.html.AbstractImageProvider;
  14. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  15. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  16. import com.itextpdf.tool.xml.pipeline.html.LinkProvider;
  17.  
  18. import java.io.File;
  19. import java.io.FileInputStream;
  20. import java.io.FileOutputStream;
  21. import java.io.IOException;
  22.  
  23. import sandbox.WrapToTest;
  24.  
  25. @WrapToTest
  26. public class D09_ParseHtmlImagesLinks {
  27.  
  28.     public static final String HTML = "resources/xml/thoreau.html";
  29.     public static final String DEST = "results/xmlworker/thoreau.pdf";
  30.     public static final String IMG_PATH = "resources/xml/";
  31.     public static final String RELATIVE_PATH = "../../resources/xml/";
  32.  
  33.     /**
  34.      * Creates a PDF with the words "Hello World"
  35.      * @param file
  36.      * @throws IOException
  37.      * @throws DocumentException
  38.      */
  39.     public void createPdf(String file) throws IOException, DocumentException {
  40.         // step 1
  41.         Document document = new Document();
  42.         // step 2
  43.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  44.         // step 3
  45.         document.open();
  46.         // step 4
  47.  
  48.         // CSS
  49.         CSSResolver cssResolver =
  50.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
  51.        
  52.         // HTML
  53.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  54.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  55.         htmlContext.setImageProvider(new AbstractImageProvider() {
  56.             public String getImageRootPath() {
  57.                 return IMG_PATH;
  58.             }
  59.         });
  60.         htmlContext.setLinkProvider(new LinkProvider() {
  61.             public String getLinkRoot() {
  62.                 return RELATIVE_PATH;
  63.             }
  64.         });
  65.        
  66.         // Pipelines
  67.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  68.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  69.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  70.        
  71.         // XML Worker
  72.         XMLWorker worker = new XMLWorker(css, true);
  73.         XMLParser p = new XMLParser(worker);
  74.         p.parse(new FileInputStream(HTML));
  75.        
  76.         // step 5
  77.         document.close();
  78.     }
  79.    
  80.    
  81.     /**
  82.      * Main method
  83.      */
  84.     public static void main(String[] args) throws IOException, DocumentException {
  85.         File file = new File(DEST);
  86.         file.getParentFile().mkdirs();
  87.         new D09_ParseHtmlImagesLinks().createPdf(DEST);
  88.     }
  89. }
D10_ParseCustomTag.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.Element;
  6. import com.itextpdf.text.Paragraph;
  7. import com.itextpdf.text.pdf.PdfWriter;
  8. import com.itextpdf.tool.xml.Tag;
  9. import com.itextpdf.tool.xml.WorkerContext;
  10. import com.itextpdf.tool.xml.XMLWorker;
  11. import com.itextpdf.tool.xml.XMLWorkerHelper;
  12. import com.itextpdf.tool.xml.html.Div;
  13. import com.itextpdf.tool.xml.html.TagProcessorFactory;
  14. import com.itextpdf.tool.xml.html.Tags;
  15. import com.itextpdf.tool.xml.net.FileRetrieve;
  16. import com.itextpdf.tool.xml.net.FileRetrieveImpl;
  17. import com.itextpdf.tool.xml.parser.XMLParser;
  18. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  19. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  20. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  21. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  22. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  23.  
  24. import java.io.File;
  25. import java.io.FileInputStream;
  26. import java.io.FileOutputStream;
  27. import java.io.IOException;
  28. import java.text.DateFormat;
  29. import java.util.ArrayList;
  30. import java.util.Date;
  31. import java.util.List;
  32. import java.util.Locale;
  33.  
  34. public class D10_ParseCustomTag {
  35.  
  36.     public static final String HTML = "resources/xml/test.html";
  37.     public static final String DEST = "results/xmlworker/date.pdf";
  38.     public static final String CSS_DIR = "resources/xml/";
  39.    
  40.     /**
  41.      * Creates a PDF with the words "Hello World"
  42.      * @param file
  43.      * @throws IOException
  44.      * @throws DocumentException
  45.      */
  46.     public void createPdf(String file) throws IOException, DocumentException {
  47.         // step 1
  48.         Document document = new Document();
  49.         // step 2
  50.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  51.         writer.setInitialLeading(12.5f);
  52.         // step 3
  53.         document.open();
  54.         // step 4
  55.        
  56.         // CSS
  57.         CSSResolver cssResolver =
  58.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(false);
  59.         FileRetrieve retrieve = new FileRetrieveImpl(CSS_DIR);
  60.         cssResolver.setFileRetrieve(retrieve);
  61.        
  62.         // HTML
  63.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  64.         TagProcessorFactory factory = Tags.getHtmlTagProcessorFactory();
  65.         factory.addProcessor(
  66.             new Div(){
  67.                 public List end(WorkerContext ctx, Tag tag, List l) {
  68.                     List list = new ArrayList(1);
  69.                     String date = DateFormat.getDateInstance(DateFormat.LONG, Locale.US).format(new Date());
  70.                     list.add(new Paragraph(date));
  71.                     return list;
  72.                 }
  73.             },
  74.             "date");
  75.         htmlContext.setTagFactory(factory);
  76.         htmlContext.autoBookmark(false);
  77.        
  78.         // Pipelines
  79.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  80.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  81.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  82.        
  83.         // XML Worker
  84.         XMLWorker worker = new XMLWorker(css, true);
  85.         XMLParser p = new XMLParser(worker);
  86.         p.parse(new FileInputStream(HTML));
  87.        
  88.         // step 5
  89.         document.close();
  90.     }
  91.    
  92.     /**
  93.      * Main method
  94.      */
  95.     public static void main(String[] args) throws IOException, DocumentException {
  96.         File file = new File(DEST);
  97.         file.getParentFile().mkdirs();
  98.         new D10_ParseCustomTag().createPdf(DEST);
  99.     }
  100. }
D11_ParseHtmlObjects.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.Element;
  6. import com.itextpdf.text.PageSize;
  7. import com.itextpdf.text.Rectangle;
  8. import com.itextpdf.text.pdf.ColumnText;
  9. import com.itextpdf.text.pdf.PdfWriter;
  10. import com.itextpdf.tool.xml.ElementList;
  11. import com.itextpdf.tool.xml.XMLWorker;
  12. import com.itextpdf.tool.xml.XMLWorkerHelper;
  13. import com.itextpdf.tool.xml.html.Tags;
  14. import com.itextpdf.tool.xml.parser.XMLParser;
  15. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  16. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  17. import com.itextpdf.tool.xml.pipeline.end.ElementHandlerPipeline;
  18. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  19. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  20.  
  21. import java.io.File;
  22. import java.io.FileInputStream;
  23. import java.io.FileOutputStream;
  24. import java.io.IOException;
  25.  
  26. import sandbox.WrapToTest;
  27.  
  28. @WrapToTest
  29. public class D11_ParseHtmlObjects {
  30.  
  31.     public static final String HTML = "resources/xml/walden.html";
  32.     public static final String DEST = "results/xmlworker/walden5.pdf";
  33.  
  34.     public void createPdf(String file) throws IOException, DocumentException {
  35.        
  36.         // Parse HTML into Element list
  37.        
  38.         // CSS
  39.         CSSResolver cssResolver =
  40.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
  41.        
  42.         // HTML
  43.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  44.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  45.         htmlContext.autoBookmark(false);
  46.        
  47.         // Pipelines
  48.         ElementList elements = new ElementList();
  49.         ElementHandlerPipeline end = new ElementHandlerPipeline(elements, null);
  50.         HtmlPipeline html = new HtmlPipeline(htmlContext, end);
  51.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  52.        
  53.         // XML Worker
  54.         XMLWorker worker = new XMLWorker(css, true);
  55.         XMLParser p = new XMLParser(worker);
  56.         p.parse(new FileInputStream(HTML));
  57.        
  58.         // step 1
  59.         Document document = new Document(PageSize.LEGAL.rotate());
  60.        
  61.         // step 2
  62.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  63.         writer.setInitialLeading(12.5f);
  64.        
  65.         // step 3
  66.         document.open();
  67.        
  68.         // step 4
  69.         Rectangle left = new Rectangle(36, 36, 486, 586);
  70.         Rectangle right = new Rectangle(522, 36, 972, 586);
  71.         ColumnText column = new ColumnText(writer.getDirectContent());
  72.         column.setSimpleColumn(left);
  73.         boolean leftside = true;
  74.         int status = ColumnText.START_COLUMN;
  75.         for (Element e : elements) {
  76.             if (ColumnText.isAllowedElement(e)) {
  77.                 column.addElement(e);
  78.                 status = column.go();
  79.                 while (ColumnText.hasMoreText(status)) {
  80.                     if (leftside) {
  81.                         leftside = false;
  82.                         column.setSimpleColumn(right);
  83.                     }
  84.                     else {
  85.                         document.newPage();
  86.                         leftside = true;
  87.                         column.setSimpleColumn(left);
  88.                     }
  89.                     status = column.go();
  90.                 }
  91.             }
  92.         }
  93.        
  94.         // step 5
  95.         document.close();
  96.     }
  97.    
  98.     /**
  99.      * Main method
  100.      */
  101.     public static void main(String[] args) throws IOException, DocumentException {
  102.         File file = new File(DEST);
  103.         file.getParentFile().mkdirs();
  104.         new D11_ParseHtmlObjects().createPdf(DEST);
  105.     }
  106. }
D12_ParseHtmlCustomPipeline.java
  1. package sandbox.xmlworker;
  2.  
  3. import com.itextpdf.text.Document;
  4. import com.itextpdf.text.DocumentException;
  5. import com.itextpdf.text.pdf.PdfWriter;
  6. import com.itextpdf.tool.xml.CustomContext;
  7. import com.itextpdf.tool.xml.Pipeline;
  8. import com.itextpdf.tool.xml.PipelineException;
  9. import com.itextpdf.tool.xml.ProcessObject;
  10. import com.itextpdf.tool.xml.Tag;
  11. import com.itextpdf.tool.xml.WorkerContext;
  12. import com.itextpdf.tool.xml.XMLWorker;
  13. import com.itextpdf.tool.xml.XMLWorkerHelper;
  14. import com.itextpdf.tool.xml.html.Tags;
  15. import com.itextpdf.tool.xml.parser.XMLParser;
  16. import com.itextpdf.tool.xml.pipeline.AbstractPipeline;
  17. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  18. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  19. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  20. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  21. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  22.  
  23. import java.io.File;
  24. import java.io.FileInputStream;
  25. import java.io.FileOutputStream;
  26. import java.io.IOException;
  27.  
  28. import sandbox.WrapToTest;
  29.  
  30. @WrapToTest
  31. public class D12_ParseHtmlCustomPipeline {
  32.  
  33.     public static final String HTML = "resources/xml/walden.html";
  34.     public static final String DEST = "results/xmlworker/walden6.pdf";
  35.  
  36.     class CustomPipeline extends AbstractPipeline {
  37.  
  38.         private int indent = -1;
  39.        
  40.         /* (non-Javadoc)
  41.          * @see com.itextpdf.tool.xml.pipeline.AbstractPipeline#open(com.itextpdf.tool.xml.WorkerContext, com.itextpdf.tool.xml.Tag, com.itextpdf.tool.xml.ProcessObject)
  42.          */
  43.         @Override
  44.         public Pipeline open(WorkerContext context, Tag t, ProcessObject po)
  45.                 throws PipelineException {
  46.             indent++;
  47.             for (int i = 0; i < indent; i++)
  48.                 System.out.print("\t");
  49.             System.out.println("");
  50.             return super.open(context, t, po);
  51.         }
  52.  
  53.         /* (non-Javadoc)
  54.          * @see com.itextpdf.tool.xml.pipeline.AbstractPipeline#close(com.itextpdf.tool.xml.WorkerContext, com.itextpdf.tool.xml.Tag, com.itextpdf.tool.xml.ProcessObject)
  55.          */
  56.         @Override
  57.         public Pipeline close(WorkerContext context, Tag t, ProcessObject po)
  58.                 throws PipelineException {
  59.             for (int i = 0; i < indent; i++)
  60.                 System.out.print("\t");
  61.             System.out.println("");
  62.             indent--;
  63.             return super.close(context, t, po);
  64.         }
  65.  
  66.         public CustomPipeline(Pipeline next) {
  67.             super(next);
  68.         }
  69.        
  70.     }
  71.    
  72.     public void createPdf(String file) throws IOException, DocumentException {
  73.         // step 1
  74.         Document document = new Document();
  75.        
  76.         // step 2
  77.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  78.         writer.setInitialLeading(12.5f);
  79.        
  80.         // step 3
  81.         document.open();
  82.        
  83.         // step 4
  84.        
  85.         // CSS
  86.         CSSResolver cssResolver =
  87.                 XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
  88.        
  89.         // HTML
  90.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
  91.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  92.         htmlContext.autoBookmark(false);
  93.        
  94.         // Pipelines
  95.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  96.         CustomPipeline custom = new CustomPipeline(pdf);
  97.         HtmlPipeline html = new HtmlPipeline(htmlContext, custom);
  98.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  99.        
  100.         // XML Worker
  101.         XMLWorker worker = new XMLWorker(css, true);
  102.         XMLParser p = new XMLParser(worker);
  103.         p.parse(new FileInputStream(HTML));
  104.        
  105.         // step 5
  106.         document.close();
  107.     }
  108.    
  109.     /**
  110.      * Main method
  111.      */
  112.     public static void main(String[] args) throws IOException, DocumentException {
  113.         File file = new File(DEST);
  114.         file.getParentFile().mkdirs();
  115.         new D12_ParseHtmlCustomPipeline().createPdf(DEST);
  116.     }
  117. }
Contact

Still have questions? 

We're happy to answer your questions. Reach out to us and we'll get back to you shortly.

Contact us
Stay updated

Join 11,000+ subscribers and become an iText PDF expert by staying up to date with our new products, updates, tips, technical solutions and happenings.

Subscribe Now