1 #! python2 2 # coding: utf-8 3 4 import sys 5 from cStringIO import StringIO 6 from pdfminer import pdfinterp 7 from pdfminer import pdfpage 8 from pdfminer import converter 9 from pdfminer import layout10 11 with file(path, 'rb') as fp:12 rsrcmgr = pdfinterp.PDFResourceManager()13 retstr = StringIO()14 codec = 'utf-8'15 laparams = layout.LAParams()16 device = converter.TextConverter(17 rsrcmgr, retstr, codec=codec, laparams=laparams)18 # Create a PDF interpreter object.19 interpreter = pdfinterp.PDFPageInterpreter(rsrcmgr, device)20 # Process each page contained in the document.21 pages = pdfpage.PDFPage.get_pages(fp)22 for page in pages:23 interpreter.process_page(page)24 data = retstr.getvalue()