Python如何動(dòng)態(tài)修改Word文檔內(nèi)容并保留格式樣式

更新時(shí)間：2025年05月23日 09:06:42 作者：雨田L(fēng)arry

這篇文章主要為大家詳細(xì)介紹了如何使用Python的docx庫動(dòng)態(tài)修改Word文檔內(nèi)容,保留格式樣式,并通過win32com轉(zhuǎn)換為PDF,感興趣的小伙伴可以了解下

前言

假如你有一個(gè)Word模版文檔，要在里面填寫人員信息，但人員有成百上千個(gè)，手動(dòng)填起來太浪費(fèi)時(shí)間，還容易弄錯(cuò)，剛好你又會(huì)寫Python，請(qǐng)看下文

一、需要安裝的庫

操作word的庫 docx

pip install docx

轉(zhuǎn)pdf的庫 win32com，在python中是安裝pywin32

pip install pywin32

二、核心邏輯-替換

（1）獲取需要填入的數(shù)據(jù)，大部分情況是Excel(用Pandas讀取方便)或JSON

（2）在Word中需要填寫的位置填上唯一標(biāo)識(shí)的字符串（盡量短，比如我之前用NAME，結(jié)果被拆分成了N和AME），用代碼打開Word，找到這個(gè)唯一標(biāo)識(shí)的字符串，和原數(shù)據(jù)進(jìn)行替換操作，重新保存即可

（3）轉(zhuǎn)為PDF就很簡單了

替換Word內(nèi)容代碼如下：

from docx import Document
import pandas as pd
import json

def replaceText(wb, t, value):
    for x in wb.paragraphs:
        if t in x.text:  # t 盡量短，一個(gè)最好，不然這里可能會(huì)被拆分 如果替換失敗 DEBUG這里查看x.text
            inline = x.runs  # t 修改runs中的字符串 可以保留格式
            for i in range(len(inline)):
                if t in inline[i].text:
                    text = inline[i].text.replace(t, str(value))
                    inline[i].text = text

    for table in wb.tables:  # 遍歷文檔中的所有表格
        for row in table.rows:  # 遍歷表格中的所有行
            for cell in row.cells:  # 遍歷行中的所有單元格
                if t in cell.text:
                    for paragraph in cell.paragraphs:
                        if t in paragraph.text:
                            inline = paragraph.runs
                            for i in range(len(inline)):
                                if t in inline[i].text:
                                    text = inline[i].text.replace(t, str(value))
                                    inline[i].text = text

#  word表格居中：在字符串前面拼空格 這里的11是表格不換行的情況下最長可輸入的字符數(shù)
def getCenterText(text):
    text = text.replace(' ', '')
    for i in range(11 - len(text)):
        text = " " + text
    return text

# 程序入口
if __name__ == '__main__':
    # loan_data = pd.read_excel(r"C:\Users\Administrator\Desktop\排名\匯總.xlsx",
    #                           sheet_name="Sheet1", header=0, names=None, index_col=0)
    # jsonstr = loan_data.to_json(orient='records', force_ascii=False)

    loan_data = [
        {"AME": "張三", "XX": "優(yōu)秀"},
        {"AME": "李四", "XX": "良好"}
    ]

    for j in loan_data:
        wb = Document(r"C:\Users\Administrator\Desktop\排名\模版.docx")
        replaceText(wb, 'AME', j.get('AME'))  # 把Word中的AME替換成張三、李四
        replaceText(wb, 'XX', getCenterText(j.get('XX')))  # 如果是表格數(shù)據(jù)要居中
        wb.save(r"C:\Users\Administrator\Desktop\排名\結(jié)果(%s).docx" % j.get('AME'))
        print(j.get('AME'))
    print("完成")

轉(zhuǎn)為PDF代碼如下：

from win32com.client import Dispatch
from os import walk

wdFormatPDF = 17
def doc2pdf(input_file):
    word = Dispatch('Word.Application')
    doc = word.Documents.Open(input_file)
    doc.SaveAs(input_file.replace(".docx", ".pdf"), FileFormat=wdFormatPDF)
    doc.Close()
    word.Quit()

# 程序入口
if __name__ == '__main__':
    # 把此文件夾下所有的Word文檔轉(zhuǎn)為PDF
    directory = "C:\\Users\\Administrator\\Desktop\\排名"
    for root, dirs, filenames in walk(directory):
        for file in filenames:
            print(file)
            if file.endswith(".doc") or file.endswith(".docx"):
                doc2pdf(str(root + "\\" + file))
    print("全部完成")

三、知識(shí)延展

使用python不改變格式的情況下批量替換word里面的內(nèi)容

需要使用如$name,${id}這樣的模板

實(shí)現(xiàn)代碼

import os
import io
from python_docx_replace import docx_replace,docx_get_keys
from docx import Document
from random import randrange
student_list='''1,張三,2202330301
2,李四,2202330302
3,王五,2202330303
'''


review=["思路清晰、語言表達(dá)準(zhǔn)確，整體表現(xiàn)良好","，準(zhǔn)備工作一般，整體表現(xiàn)良好","思路清晰、語言表達(dá)一般、回答問題有理論依據(jù)，","有個(gè)別格式不對(duì)的需要修改。"]
score=['70', '88', '81']

students=student_list.split("\n")

# print(students)
students_dict_array=[]

for student in students:
    student_dict={}
    student_dict["name"]=student.split(",")[1]
    student_dict["sid"]=student.split(",")[2]
    students_dict_array.append(student_dict)

print(students_dict_array)

# 圖片存放的路徑
path = "C:\\BaiduSyncdisk\\大學(xué)生信息安全競賽評(píng)分表\\"
def alter(file,name,id,num):
    """
    替換文件中的字符串
    :param file:文件名
    :param old_str:就字符串
    :param new_str:新字符串
    :return:
    """
    doc = Document(file)
    keys = docx_get_keys(doc) # Let's suppose the Word document has the keys: ${name} and ${phone}
    print(keys)  # ['name', 'phone']
# call the replace function with your key value pairs
    docx_replace(doc, name=name,id=id,content=review[randrange(len(review))],score=score[num])
    doc.save(os.path.join(path,"new",file))

 
# 遍歷更改文件名
num = 0
for file in os.listdir(path):
    alter(os.path.join(path,file),students_dict_array[num]["name"],students_dict_array[num]["sid"],num)
    os.rename(os.path.join(path,file),os.path.join(path,"選手-"+students_dict_array[num]["sid"][-2:]+students_dict_array[num]["name"]+"-記錄表")+".doc")
    num = num + 1

到此這篇關(guān)于Python如何動(dòng)態(tài)修改Word文檔內(nèi)容并保留格式樣式的文章就介紹到這了,更多相關(guān)Python修改Word內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: