Texts and images are the main elements on the Presentation slides. In this article, we will show you how to extract text and image from a PowerPoint document using Free Spire.Presentation for Java.
Installation
If you use maven, you need to specify the dependencies for Free Spire.Presentation for Java library in your project’s pom.xml file.
<repositories>
<repository>
<id>com.e-iceblue</id>
<name>e-iceblue</name>
<url>http://repo.e-iceblue.com/nexus/content/groups/public/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.presentation.free</artifactId>
<version>3.9.0</version>
</dependency>
</dependencies>
For non-maven projects, download Free Spire.Presentation for Java, unzip the package and add Spire.Presentation.jar in the lib folder into your project as a dependency.
import com.spire.presentation.*;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileWriter;
public class Test {
public static void main(String[] args) throws Exception {
//Create a presentation instance.
Presentation ppt = new Presentation();
// Load the document from disk.
ppt.loadFromFile("Sample.pptx");
StringBuilder buffer = new StringBuilder();
//Traverse the presentation slides to extract the text.
for (Object slide : ppt.getSlides()) {
for (Object shape : ((ISlide) slide).getShapes()) {
if (shape instanceof IAutoShape) {
for (Object tp : ((IAutoShape) shape).getTextFrame().getParagraphs()) {
buffer.append(((ParagraphEx) tp).getText());
}
}
}
}
//Save to document to .txt
FileWriter writer = new FileWriter("ExtractText.txt");
writer.write(buffer.toString());
writer.flush();
writer.close();
//Extract all the images from the presentation slides
for (int i = 0; i < ppt.getImages().getCount(); i++) {
BufferedImage image = ppt.getImages().get(i).getImage();
ImageIO.write(image, "PNG", new File(String.format("extractImage-%1$s.png", i)));
}
}
}
Top comments (0)