python读取pdf


安装包

pip install pdfminer.six

读取pdf

import pdfminer.high_level

text = pdfminer.high_level.extract_text('D:\pypdfTest\MFT_SSCM_DT_HQ_00001_采购计划数据传输服务(MFT)-V0.12.pdf')
texts = text.split('服务提供方')
print(text)

批量读取

import os
import pdfminer.high_level

def get_file_names(directory):
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]


directory = 'D:\pypdfTest'  # 替换为你的文件夹路径
file_names = get_file_names(directory)
for file_name in file_names:
    #print(directory+"/"+file_name)
    # 读取并解析pdf
    text = pdfminer.high_level.extract_text(directory+"/"+file_name)
    print(text)

#print(file_names)

文章作者: 张一雄
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 张一雄 !
  目录