HTML and right-to-left writing systems

This is a code example of iText PDF, discover more.

5th November 2015
admin-marketing

Switch code for this example

ParseHtml7.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/30214147/arabic-characters-from-html-content-to-pdf-using-itext
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.Element;
  10. import com.itextpdf.text.pdf.PdfPCell;
  11. import com.itextpdf.text.pdf.PdfPTable;
  12. import com.itextpdf.text.pdf.PdfWriter;
  13. import com.itextpdf.tool.xml.ElementList;
  14. import com.itextpdf.tool.xml.XMLWorker;
  15. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  16. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  17. import com.itextpdf.tool.xml.html.CssAppliers;
  18. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  19. import com.itextpdf.tool.xml.html.Tags;
  20. import com.itextpdf.tool.xml.parser.XMLParser;
  21. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  22. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  23. import com.itextpdf.tool.xml.pipeline.end.ElementHandlerPipeline;
  24. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  25. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  26.  
  27. import java.io.File;
  28. import java.io.FileInputStream;
  29. import java.io.FileOutputStream;
  30. import java.io.IOException;
  31. import java.nio.charset.Charset;
  32. import sandbox.WrapToTest;
  33.  
  34. /**
  35.  *
  36.  * @author iText
  37.  */
  38. @WrapToTest
  39. public class ParseHtml7 {
  40.     public static final String DEST = "results/xmlworker/arabic.pdf";
  41.     public static final String HTML = "resources/xml/arabic.html";
  42.    
  43.     public static void main(String[] args) throws IOException, DocumentException {
  44.         File file = new File(DEST);
  45.         file.getParentFile().mkdirs();
  46.         new ParseHtml7().createPdf(DEST);
  47.     }
  48.        
  49.     /**
  50.      * Creates a PDF with the words "Hello World"
  51.      * @param file
  52.      * @throws IOException
  53.      * @throws DocumentException
  54.      */
  55.     public void createPdf(String file) throws IOException, DocumentException {
  56.         // step 1
  57.         Document document = new Document();
  58.         // step 2
  59.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  60.         // step 3
  61.         document.open();
  62.         // step 4
  63.         // Styles
  64.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  65.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  66.         fontProvider.register("resources/fonts/NotoNaskhArabic-Regular.ttf");
  67.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  68.         // HTML
  69.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  70.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  71.         // Pipelines
  72.         ElementList elements = new ElementList();
  73.         ElementHandlerPipeline pdf = new ElementHandlerPipeline(elements, null);
  74.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  75.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  76.        
  77.         // XML Worker
  78.         XMLWorker worker = new XMLWorker(css, true);
  79.         XMLParser p = new XMLParser(worker);
  80.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));
  81.        
  82.         PdfPTable table = new PdfPTable(1);
  83.         PdfPCell cell = new PdfPCell();
  84.         cell.setRunDirection(PdfWriter.RUN_DIRECTION_RTL);
  85.         for (Element e : elements) {
  86.             cell.addElement(e);
  87.         }
  88.         table.addCell(cell);
  89.         document.add(table);
  90.         // step 5
  91.         document.close();
  92.     }
  93. }
ParseHtml8.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/30214147/arabic-characters-from-html-content-to-pdf-using-itext
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.pdf.PdfWriter;
  10. import com.itextpdf.tool.xml.XMLWorker;
  11. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  12. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  13. import com.itextpdf.tool.xml.html.CssAppliers;
  14. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  15. import com.itextpdf.tool.xml.html.Tags;
  16. import com.itextpdf.tool.xml.parser.XMLParser;
  17. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  18. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  19. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  20. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  21. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  22.  
  23. import java.io.File;
  24. import java.io.FileInputStream;
  25. import java.io.FileOutputStream;
  26. import java.io.IOException;
  27. import java.nio.charset.Charset;
  28. import sandbox.WrapToTest;
  29.  
  30. /**
  31.  *
  32.  * @author iText
  33.  */
  34. @WrapToTest
  35. public class ParseHtml8 {
  36.     public static final String DEST = "results/xmlworker/arabic2.pdf";
  37.     public static final String HTML = "resources/xml/arabic2.html";
  38.    
  39.     public static void main(String[] args) throws IOException, DocumentException {
  40.         File file = new File(DEST);
  41.         file.getParentFile().mkdirs();
  42.         new ParseHtml8().createPdf(DEST);
  43.     }
  44.        
  45.     /**
  46.      * Creates a PDF with the words "Hello World"
  47.      * @param file
  48.      * @throws IOException
  49.      * @throws DocumentException
  50.      */
  51.     public void createPdf(String file) throws IOException, DocumentException {
  52.         // step 1
  53.         Document document = new Document();
  54.         // step 2
  55.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  56.         // step 3
  57.         document.open();
  58.         // step 4
  59.         // Styles
  60.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  61.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  62.         fontProvider.register("resources/fonts/NotoNaskhArabic-Regular.ttf");
  63.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  64.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  65.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  66.        
  67.         // Pipelines
  68.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  69.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  70.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  71.        
  72.         // XML Worker
  73.         XMLWorker worker = new XMLWorker(css, true);
  74.         XMLParser p = new XMLParser(worker);
  75.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));;
  76.         // step 5
  77.         document.close();
  78.     }
  79. }
ParseHtml9.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/30214147/arabic-characters-from-html-content-to-pdf-using-itext
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.pdf.PdfWriter;
  10. import com.itextpdf.tool.xml.XMLWorker;
  11. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  12. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  13. import com.itextpdf.tool.xml.html.CssAppliers;
  14. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  15. import com.itextpdf.tool.xml.html.Tags;
  16. import com.itextpdf.tool.xml.parser.XMLParser;
  17. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  18. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  19. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  20. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  21. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  22.  
  23. import java.io.File;
  24. import java.io.FileInputStream;
  25. import java.io.FileOutputStream;
  26. import java.io.IOException;
  27. import java.nio.charset.Charset;
  28. import sandbox.WrapToTest;
  29.  
  30. /**
  31.  *
  32.  * @author iText
  33.  */
  34. @WrapToTest
  35. public class ParseHtml9 {
  36.     public static final String DEST = "results/xmlworker/arabic3.pdf";
  37.     public static final String HTML = "resources/xml/arabic3.html";
  38.    
  39.     public static void main(String[] args) throws IOException, DocumentException {
  40.         File file = new File(DEST);
  41.         file.getParentFile().mkdirs();
  42.         new ParseHtml9().createPdf(DEST);
  43.     }
  44.        
  45.     /**
  46.      * Creates a PDF with the words "Hello World"
  47.      * @param file
  48.      * @throws IOException
  49.      * @throws DocumentException
  50.      */
  51.     public void createPdf(String file) throws IOException, DocumentException {
  52.         // step 1
  53.         Document document = new Document();
  54.         // step 2
  55.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  56.         // step 3
  57.         document.open();
  58.         // step 4
  59.         // Styles
  60.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  61.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  62.         fontProvider.register("resources/fonts/NotoNaskhArabic-Regular.ttf");
  63.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  64.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  65.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  66.        
  67.         // Pipelines
  68.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  69.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  70.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  71.        
  72.         // XML Worker
  73.         XMLWorker worker = new XMLWorker(css, true);
  74.         XMLParser p = new XMLParser(worker);
  75.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));;
  76.         // step 5
  77.         document.close();
  78.     }
  79. }
ParseHtml10.java
  1. /**
  2.  * Example written by Bruno Lowagie in answer to the following question:
  3.  * http://stackoverflow.com/questions/30847907/cant-set-rtl-direction-for-hebrew-letters-while-converting-from-xhtml-to-pd
  4.  */
  5. package sandbox.xmlworker;
  6.  
  7. import com.itextpdf.text.Document;
  8. import com.itextpdf.text.DocumentException;
  9. import com.itextpdf.text.pdf.PdfWriter;
  10. import com.itextpdf.tool.xml.XMLWorker;
  11. import com.itextpdf.tool.xml.XMLWorkerFontProvider;
  12. import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
  13. import com.itextpdf.tool.xml.html.CssAppliers;
  14. import com.itextpdf.tool.xml.html.CssAppliersImpl;
  15. import com.itextpdf.tool.xml.html.Tags;
  16. import com.itextpdf.tool.xml.parser.XMLParser;
  17. import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
  18. import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
  19. import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
  20. import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
  21. import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
  22.  
  23. import java.io.File;
  24. import java.io.FileInputStream;
  25. import java.io.FileOutputStream;
  26. import java.io.IOException;
  27. import java.nio.charset.Charset;
  28. import sandbox.WrapToTest;
  29.  
  30. /**
  31.  *
  32.  * @author iText
  33.  */
  34. @WrapToTest
  35. public class ParseHtml10 {
  36.     public static final String DEST = "results/xmlworker/hebrew.pdf";
  37.     public static final String HTML = "resources/xml/hebrew.html";
  38.    
  39.     public static void main(String[] args) throws IOException, DocumentException {
  40.         File file = new File(DEST);
  41.         file.getParentFile().mkdirs();
  42.         new ParseHtml10().createPdf(DEST);
  43.     }
  44.        
  45.     /**
  46.      * Creates a PDF with the words "Hello World"
  47.      * @param file
  48.      * @throws IOException
  49.      * @throws DocumentException
  50.      */
  51.     public void createPdf(String file) throws IOException, DocumentException {
  52.         // step 1
  53.         Document document = new Document();
  54.         // step 2
  55.         PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
  56.         // step 3
  57.         document.open();
  58.         // step 4
  59.         // Styles
  60.         CSSResolver cssResolver = new StyleAttrCSSResolver();
  61.         XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
  62.         fontProvider.register("resources/fonts/NotoSansHebrew-Regular.ttf");
  63.         CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
  64.         HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
  65.         htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
  66.        
  67.         // Pipelines
  68.         PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
  69.         HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
  70.         CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
  71.        
  72.         // XML Worker
  73.         XMLWorker worker = new XMLWorker(css, true);
  74.         XMLParser p = new XMLParser(worker);
  75.         p.parse(new FileInputStream(HTML), Charset.forName("UTF-8"));;
  76.         // step 5
  77.         document.close();
  78.     }
  79. }
Contact

Still have questions? 

We're happy to answer your questions. Reach out to us and we'll get back to you shortly.

Contact us
Stay updated

Join 11,000+ subscribers and become an iText PDF expert by staying up to date with our new products, updates, tips, technical solutions and happenings.

Subscribe Now