Extracting objects from a PDF

This is a code example of iText PDF, discover more.

31st May 2016
iText PDF

Switch code for this example

ExtractStreams.java
  1. /*
  2.  
  3.     This file is part of the iText (R) project.
  4.     Copyright (c) 1998-2016 iText Group NV
  5.  
  6. */
  7.  
  8. /**
  9.  * Example written by Bruno Lowagie in answer to the following question:
  10.  * http://stackoverflow.com/questions/30286601/extracting-an-embedded-object-from-a-pdf
  11.  */
  12. package com.itextpdf.samples.sandbox.parse;
  13.  
  14. import com.itextpdf.kernel.PdfException;
  15. import com.itextpdf.kernel.pdf.PdfDocument;
  16. import com.itextpdf.kernel.pdf.PdfObject;
  17. import com.itextpdf.kernel.pdf.PdfReader;
  18. import com.itextpdf.kernel.pdf.PdfStream;
  19. import com.itextpdf.test.annotations.type.SampleTest;
  20.  
  21. import org.junit.Assert;
  22. import org.junit.BeforeClass;
  23. import org.junit.Test;
  24. import org.junit.experimental.categories.Category;
  25.  
  26. import java.io.File;
  27. import java.io.FileOutputStream;
  28. import java.io.IOException;
  29. import java.util.ArrayList;
  30. import java.util.List;
  31.  
  32. @Category(SampleTest.class)
  33. public class ExtractStreams {
  34.     public static final String DEST = "./target/test/resources/sandbox/parse/extract_streams%s";
  35.     public static final String SRC = "./src/test/resources/pdfs/image.pdf";
  36.  
  37.     @BeforeClass
  38.     public static void before() {
  39.         new File(DEST).getParentFile().mkdirs();
  40.     }
  41.  
  42.     public static void main(String[] args) throws IOException {
  43.         before();
  44.         new ExtractStreams().manipulatePdf();
  45.     }
  46.  
  47.     @Test
  48.     public void manipulatePdf() throws IOException {
  49.         PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));
  50.         PdfObject obj;
  51.         List<Integer> streamLengths = new ArrayList<>();
  52.         for (int i = 1; i <= pdfDoc.getNumberOfPdfObjects(); i++) {
  53.             obj = pdfDoc.getPdfObject(i);
  54.             if (obj != null && obj.isStream()) {
  55.                 byte[] b;
  56.                 try {
  57.                     b = ((PdfStream) obj).getBytes();
  58.                 } catch (PdfException exc) {
  59.                     b = ((PdfStream) obj).getBytes(false);
  60.                 }
  61.                 System.out.println(b.length);
  62.                 FileOutputStream fos = new FileOutputStream(String.format(DEST, i));
  63.                 fos.write(b);
  64.  
  65.                 streamLengths.add(b.length);
  66.                 fos.close();
  67.             }
  68.         }
  69.         Assert.assertArrayEquals(new Integer[]{30965, 74}, streamLengths.toArray(new Integer[streamLengths.size()]));
  70.         pdfDoc.close();
  71.     }
  72. }

Resources

Contact

Still have questions? 

We're happy to answer your questions. Reach out to us and we'll get back to you shortly.

Contact us
Stay updated

Join 11,000+ subscribers and become an iText PDF expert by staying up to date with our new products, updates, tips, technical solutions and happenings.

Subscribe Now