基于Python編寫一個PDF轉換工具箱
更新時間:2024年12月30日 11:25:43 作者:hvinsion
這篇文章主要為大家詳細介紹了如何使用Python編寫一個PDF轉換工具箱,可以實現(xiàn)PDF轉圖片,word,拆分,刪除,提取等功能,感興趣的可以了解下
1.簡介
使用Python自寫的pdf工具箱,包括pdf轉word,圖片,合并,頁面拆分,頁面刪除,頁面提取、
轉換word,圖片功能,支持文件拖入。
2.功能介紹
合并:添加順序就是合并順序,可多次添加。
拆分:將輸入頁碼的范圍拆分成每個獨立的pdf,單次可輸入多個范圍。
刪除:將輸入頁碼的范圍刪除,單次可輸入多個范圍,保存刪除后的文件。
提取:將輸入頁碼的范圍提取成獨立的pdf,單次可輸入多個范圍。
3.運行效果

4.相關源碼
import os
import re
import sys
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QFileDialog, QListWidget, \
QMessageBox, QLineEdit, QHBoxLayout
from PyQt5.QtCore import QThread, pyqtSignal
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from pdf2docx import Converter
import fitz # 用于PDF轉JPG的處理
class CustomListWidget(QListWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setAcceptDrops(True)
self.parentWindow = parent
def dragEnterEvent(self, event):
if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
event.acceptProposedAction()
def dragMoveEvent(self, event):
if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
event.acceptProposedAction()
def dropEvent(self, event):
pdf_files = [url.toLocalFile() for url in event.mimeData().urls() if url.toString().lower().endswith('.pdf')]
for f in pdf_files:
self.parentWindow.addPDFFile(f)
class Worker(QThread):
finished = pyqtSignal(str)
error = pyqtSignal(str)
def __init__(self, pdf_files, range_str=None, save_path=None, operation=None):
super().__init__()
self.pdf_files = pdf_files
self.range_str = range_str
self.save_path = save_path
self.operation = operation
def run(self):
try:
if self.operation == 'merge':
self.merge_pdfs()
elif self.operation == 'split':
self.split_pdfs()
elif self.operation == 'delete':
self.delete_pages()
elif self.operation == 'extract':
self.extract_pages()
elif self.operation == 'jpg':
self.pdf_to_jpg()
elif self.operation == 'word':
self.pdf_to_word()
except Exception as e:
self.error.emit(str(e))
def merge_pdfs(self):
merger = PdfMerger()
for pdf in self.pdf_files:
merger.append(pdf)
merger.write(self.save_path)
merger.close()
self.finished.emit('PDF文件已成功合并。')
def split_pdfs(self):
ranges = self.parse_ranges(self.range_str)
reader = PdfReader(self.pdf_files[0])
os.makedirs(self.save_path, exist_ok=True) # 確保目標文件夾存在
file_index = 1 # 用于創(chuàng)建唯一的文件名
for range_index, (start_page, end_page) in enumerate(ranges):
# 對于每個范圍,拆分出來的每個頁面為一個單獨的PDF文件
for page_num in range(start_page, end_page + 1):
writer = PdfWriter()
writer.add_page(reader.pages[page_num])
# 使用文件索引來確保每個文件的名稱都是唯一的
split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf')
with open(split_save_path, 'wb') as f:
writer.write(f)
file_index += 1
self.finished.emit('PDF文件已成功拆分并保存。')
def delete_pages(self):
ranges = self.parse_ranges(self.range_str)
reader = PdfReader(self.pdf_files[0])
writer = PdfWriter()
pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)}
for i in range(len(reader.pages)):
if i not in pages_to_delete:
writer.add_page(reader.pages[i])
with open(self.save_path, 'wb') as f:
writer.write(f)
self.finished.emit('指定頁面已從PDF中刪除。')
def extract_pages(self):
ranges = self.parse_ranges(self.range_str)
reader = PdfReader(self.pdf_files[0])
os.makedirs(self.save_path, exist_ok=True) # 在循環(huán)外提前確保目錄存在
for i, (start_page, end_page) in enumerate(ranges):
writer = PdfWriter()
for page_num in range(start_page, end_page + 1):
writer.add_page(reader.pages[page_num])
extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf')
with open(extract_save_path, 'wb') as f:
writer.write(f)
self.finished.emit('指定頁面已從PDF中提取。')
def pdf_to_jpg(self):
for file in self.pdf_files:
pdf = fitz.open(file)
img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0])
os.makedirs(img_folder, exist_ok=True)
for pg in range(pdf.page_count):
page = pdf[pg]
trans = fitz.Matrix(2, 2) # 設置轉換矩陣為放大2倍
pm = page.get_pixmap(matrix=trans, alpha=False)
pic_name = f'Page_{pg + 1}.jpg'
pic_path = os.path.join(img_folder, pic_name)
pm.save(pic_path)
self.finished.emit('PDF文件已成功轉換為圖片。')
def pdf_to_word(self):
for file in self.pdf_files:
docx_name = os.path.splitext(file)[0] + '.docx'
cv = Converter(file)
cv.convert(docx_name, start=0, end=None)
cv.close()
self.finished.emit('PDF文件已成功轉換為Word文檔。')
def parse_ranges(self, ranges_str):
ranges = []
for part in re.split(',|,', ranges_str):
if '-' in part:
start_page, end_page = map(int, part.split('-'))
ranges.append((start_page - 1, end_page - 1))
else:
page = int(part)
ranges.append((page - 1, page - 1))
return ranges
class PDFMergerApp(QMainWindow):
def __init__(self):
super().__init__()
self.initUI()
self.pdf_files = []
def initUI(self):
self.setWindowTitle('PDF 工具箱')
self.setGeometry(100, 100, 800, 600)
mainLayout = QVBoxLayout()
self.addButton = QPushButton('添加 PDF', self)
self.addButton.clicked.connect(self.addPDF)
mainLayout.addWidget(self.addButton)
self.listWidget = CustomListWidget(self)
mainLayout.addWidget(self.listWidget)
deleteLayout = QHBoxLayout()
self.removeButton = QPushButton('刪除選定', self)
self.removeButton.clicked.connect(self.removeSelected)
deleteLayout.addWidget(self.removeButton)
self.removeAllButton = QPushButton('刪除全部', self)
self.removeAllButton.clicked.connect(self.removeAll)
deleteLayout.addWidget(self.removeAllButton)
mainLayout.addLayout(deleteLayout)
convertLayout = QHBoxLayout()
self.convertJPGButton = QPushButton('轉換為圖片', self)
self.convertJPGButton.clicked.connect(self.convertToJPG)
convertLayout.addWidget(self.convertJPGButton)
self.convertWordButton = QPushButton('轉換為Word', self)
self.convertWordButton.clicked.connect(self.convertToWord)
convertLayout.addWidget(self.convertWordButton)
mainLayout.addLayout(convertLayout)
self.mergeButton = QPushButton('合并 PDFs', self)
self.mergeButton.clicked.connect(self.mergePDFs)
mainLayout.addWidget(self.mergeButton)
splitLayout = QHBoxLayout()
self.splitInput = QLineEdit(self)
self.splitInput.setPlaceholderText('輸入拆分頁碼范圍可輸入多個范圍,如1,3-4,8-15')
splitLayout.addWidget(self.splitInput)
self.splitButton = QPushButton('拆分頁面', self)
self.splitButton.clicked.connect(self.splitPDF)
splitLayout.addWidget(self.splitButton)
mainLayout.addLayout(splitLayout)
deletePageLayout = QHBoxLayout()
self.deleteInput = QLineEdit(self)
self.deleteInput.setPlaceholderText('輸入刪除頁碼范圍可輸入多個范圍,如1,3-4,8-15')
deletePageLayout.addWidget(self.deleteInput)
self.deleteButton = QPushButton('刪除頁面', self)
self.deleteButton.clicked.connect(self.deletePages)
deletePageLayout.addWidget(self.deleteButton)
mainLayout.addLayout(deletePageLayout)
extractLayout = QHBoxLayout()
self.extractInput = QLineEdit(self)
self.extractInput.setPlaceholderText('輸入提取頁碼范圍可輸入多個范圍,如1,3-4,8-15')
extractLayout.addWidget(self.extractInput)
self.extractButton = QPushButton('提取頁面', self)
self.extractButton.clicked.connect(self.extractPages)
extractLayout.addWidget(self.extractButton)
mainLayout.addLayout(extractLayout)
container = QWidget()
container.setLayout(mainLayout)
self.setCentralWidget(container)
def addPDF(self):
files, _ = QFileDialog.getOpenFileNames(self, '打開文件', '', 'PDF files (*.pdf)')
for file_path in files:
self.addPDFFile(file_path)
def addPDFFile(self, file_path):
if file_path and file_path not in self.pdf_files:
self.pdf_files.append(file_path)
self.listWidget.addItem(file_path)
def removeSelected(self):
for item in self.listWidget.selectedItems():
self.pdf_files.remove(item.text())
self.listWidget.takeItem(self.listWidget.row(item))
def removeAll(self):
self.pdf_files.clear()
self.listWidget.clear()
def mergePDFs(self):
save_path, _ = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')
if save_path:
self.thread = Worker(self.pdf_files, save_path=save_path, operation='merge')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def splitPDF(self):
if len(self.pdf_files) != 1:
QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行拆分。")
return
range_str = self.splitInput.text().strip()
folder_path = self.getFolderName()
if range_str and folder_path:
self.thread = Worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def deletePages(self):
if len(self.pdf_files) != 1:
QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行刪除操作。")
return
range_str = self.deleteInput.text().strip()
save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
if save_path and range_str:
self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def extractPages(self):
if len(self.pdf_files) != 1:
QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行提取操作。")
return
range_str = self.extractInput.text().strip()
save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
if save_path and range_str:
self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def convertToJPG(self):
save_path = QFileDialog.getExistingDirectory(self, "選擇保存圖片的位置")
if save_path:
self.thread = Worker(self.pdf_files, save_path=save_path, operation='jpg')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def convertToWord(self):
save_path = QFileDialog.getExistingDirectory(self, "選擇保存Word的位置")
if save_path:
self.thread = Worker(self.pdf_files, save_path=save_path, operation='word')
self.thread.finished.connect(self.onFinished)
self.thread.error.connect(self.onError)
self.thread.start()
def getFolderName(self):
folder_path = QFileDialog.getExistingDirectory(self, "選擇保存拆分文件的位置")
return folder_path
def onFinished(self, message):
QMessageBox.information(self, "操作完成", message)
self.clear_pdf_list()
self.clear_text_inputs()
def onError(self, error_message):
QMessageBox.warning(self, "操作失敗", error_message)
def clear_pdf_list(self):
self.pdf_files.clear()
self.listWidget.clear()
def clear_text_inputs(self):
# 清除所有的QLineEdit控件內容
self.splitInput.clear()
self.deleteInput.clear()
self.extractInput.clear()
def main():
app = QApplication(sys.argv)
ex = PDFMergerApp()
ex.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
到此這篇關于基于Python編寫一個PDF轉換工具箱的文章就介紹到這了,更多相關Python PDF轉換內容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關文章希望大家以后多多支持腳本之家!
相關文章
Python restful框架接口開發(fā)實現(xiàn)
這篇文章主要介紹了Python restful框架接口開發(fā)實現(xiàn),文中通過示例代碼介紹的非常詳細,對大家的學習或者工作具有一定的參考學習價值,需要的朋友可以參考下2020-04-04

