Using Poppler, of course!
Poppler is a very useful tool for handling PDF, so I’ve discovered lately. Having tried both muPDF and ImageMagick’s Magick++ and failed, Poppler stepped up to the challenge and paid off.
So here’s a small example of how work the API (with OpenCV, naturally):
#include <iostream>
#include <fstream>
#include <sstream>
#include <opencv2/opencv.hpp>
#include <poppler-document.h>
#include <poppler-page.h>
#include <poppler-page-renderer.h>
#include <poppler-image.h>
using namespace cv;
using namespace std;
using namespace poppler;
Mat readPDFtoCV(const string& filename,int DPI) {
document* mypdf = document::load_from_file(filename);
if(mypdf == NULL) {
cerr << "couldn't read pdf\n";
return Mat();
}
cout << "pdf has " << mypdf->pages() << " pages\n";
page* mypage = mypdf->create_page(0);
page_renderer renderer;
renderer.set_render_hint(page_renderer::text_antialiasing);
image myimage = renderer.render_page(mypage,DPI,DPI);
cout << "created image of " << myimage.width() << "x"<< myimage.height() << "\n";
Mat cvimg;
if(myimage.format() == image::format_rgb24) {
Mat(myimage.height(),myimage.width(),CV_8UC3,myimage.data()).copyTo(cvimg);
} else if(myimage.format() == image::format_argb32) {
Mat(myimage.height(),myimage.width(),CV_8UC4,myimage.data()).copyTo(cvimg);
} else {
cerr << "PDF format no good\n";
return Mat();
}
return cvimg;
}
All you have to do is give it the DPI (say you want to render in 100 DPI) and a filename.
Keep in mind it only renders the first page, but getting the other pages is just as easy.
That’s it, enjoy!
Roy.