Parsing more than one HTML files to a single PDF

This is a code example of iText PDF, discover more.

5th November 2015
admin-marketing

Switch code for this example

ParseMultipleHtmlFiles1.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/27814701/itextsharp-how-to-use-htmlworker-to-covert-html-to-pdf-with-pagination
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.pdf.PdfCopy;
  10. import com.itextpdf.text.pdf.PdfReader;
  11. import com.itextpdf.text.pdf.PdfWriter;
  12. import com.itextpdf.tool.xml.XMLWorkerHelper;
  13.  
  14. import java.io.ByteArrayOutputStream;
  15. import java.io.File;
  16. import java.io.FileInputStream;
  17. import java.io.FileOutputStream;
  18. import java.io.IOException;
  19. import sandbox.WrapToTest;
  20.  
  21. /**
  22.  *
  23.  * @author iText
  24.  */
  25. @WrapToTest
  26. public class ParseMultipleHtmlFiles1 {
  27.     public static final String[] HTML = {
  28.         "resources/xml/page1.html",
  29.         "resources/xml/page2.html",
  30.         "resources/xml/page3.html"
  31.     };
  32.     public static final String DEST = "results/xmlworker/multiple_html_pages1.pdf";
  33.  
  34.     public static void main(String[] args) throws IOException, DocumentException {
  35.         File file = new File(DEST);
  36.         file.getParentFile().mkdirs();
  37.         new ParseMultipleHtmlFiles1().createPdf(DEST);
  38.     }
  39.        
  40.     /**
  41.      * Creates a PDF with the words "Hello World"
  42.      * @param file
  43.      * @throws IOException
  44.      * @throws DocumentException
  45.      */
  46.     public void createPdf(String file) throws IOException, DocumentException {
  47.         Document document = new Document();
  48.         PdfCopy copy = new PdfCopy(document, new FileOutputStream(file));
  49.         document.open();
  50.         PdfReader reader;
  51.         for (String html : HTML) {
  52.             reader = new PdfReader(parseHtml(html));
  53.             copy.addDocument(reader);
  54.             reader.close();
  55.         }
  56.         document.close();
  57.     }
  58.    
  59.     public byte[] parseHtml(String html) throws DocumentException, IOException {
  60.         ByteArrayOutputStream baos = new ByteArrayOutputStream();
  61.         // step 1
  62.         Document document = new Document();
  63.         // step 2
  64.         PdfWriter writer = PdfWriter.getInstance(document, baos);
  65.         // step 3
  66.         document.open();
  67.         // step 4
  68.         XMLWorkerHelper.getInstance().parseXHtml(writer, document,
  69.                 new FileInputStream(html));
  70.         // step 5
  71.         document.close();
  72.         // return the bytes of the PDF
  73.         return baos.toByteArray();
  74.     }
  75. }
ParseMultipleHtmlFiles2.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/27814701/itextsharp-how-to-use-htmlworker-to-covert-html-to-pdf-with-pagination
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.Element;
  10. import com.itextpdf.text.Utilities;
  11. import com.itextpdf.text.pdf.PdfWriter;
  12. import com.itextpdf.tool.xml.ElementList;
  13. import com.itextpdf.tool.xml.XMLWorkerHelper;
  14.  
  15. import java.io.ByteArrayOutputStream;
  16. import java.io.File;
  17. import java.io.FileOutputStream;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import sandbox.WrapToTest;
  21.  
  22. /**
  23.  *
  24.  * @author iText
  25.  */
  26. @WrapToTest
  27. public class ParseMultipleHtmlFiles2 {
  28.     public static final String[] HTML = {
  29.         "resources/xml/page1.html",
  30.         "resources/xml/page2.html",
  31.         "resources/xml/page3.html"
  32.     };
  33.     public static final String DEST = "results/xmlworker/multiple_html_pages2.pdf";
  34.  
  35.     public static void main(String[] args) throws IOException, DocumentException {
  36.         File file = new File(DEST);
  37.         file.getParentFile().mkdirs();
  38.         new ParseMultipleHtmlFiles2().createPdf(DEST);
  39.     }
  40.        
  41.     /**
  42.      * Creates a PDF with the words "Hello World"
  43.      * @param file
  44.      * @throws IOException
  45.      * @throws DocumentException
  46.      */
  47.     public void createPdf(String file) throws IOException, DocumentException {
  48.         Document document = new Document();
  49.         PdfWriter.getInstance(document, new FileOutputStream(file));
  50.         document.open();
  51.         String css = readCSS();
  52.         for (String htmlfile : HTML) {
  53.             String html = Utilities.readFileToString(htmlfile);
  54.             ElementList list = XMLWorkerHelper.parseToElementList(html, css);
  55.             for (Element e : list) {
  56.                 document.add(e);
  57.             }
  58.             document.newPage();
  59.         }
  60.         document.close();
  61.     }
  62.    
  63.     private String readCSS() throws IOException {
  64.         ByteArrayOutputStream baos = new ByteArrayOutputStream();
  65.         byte[] buffer = new byte[1024];
  66.         int length;
  67.         InputStream is = XMLWorkerHelper.class.getResourceAsStream("/default.css");
  68.         while ((length = is.read(buffer)) != -1) {
  69.             baos.write(buffer, 0, length);
  70.         }
  71.         return new String(baos.toByteArray());
  72.     }
  73. }
Contact

Still have questions? 

We're happy to answer your questions. Reach out to us and we'll get back to you shortly.

Contact us
Stay updated

Join 11,000+ subscribers and become an iText PDF expert by staying up to date with our new products, updates, tips, technical solutions and happenings.

Subscribe Now