python批量替换多个word文档内容

发表于 2019-11-27 更新于 2022-06-17 分类于 rd ， python Changyan：本文字数： 1.8k 阅读时长 ≈ 7 分钟

之前博客里写过一个简单的切割图片的桌面小应用：python开发的桌面程序代码详解，今天同事有需求想批量修改一批word文档中包含的指定关键词，索性以练手的心态把这个东西给做了。

环境准备

pyqt5、docx两个必须的扩展包

代码展示

代码中几乎完券参考了上述中的代码，功能修改为了替换，且代码中含有注释，所这里直接上代码(注：代码中有很多废弃代码，大家自由删除，我直接复制过来的，懒了)：

`win.py`

import sys
import os
from PyQt5.QtWidgets import QWidget, QApplication, QGroupBox, QPushButton, QLabel, QHBoxLayout, QVBoxLayout, QGridLayout, QFormLayout, QLineEdit, QTextEdit, QInputDialog, QFileDialog, QMessageBox, QDesktopWidget
from PyQt5.QtGui import QIntValidator, QDoubleValidator, QRegExpValidator, QTextCursor, QIcon, QPixmap
from PyQt5.QtCore import QRegExp
from PIL import Image
import docx


class ImgCut(QWidget):

    def __init__(self):
        super(ImgCut, self).__init__()

        # 存储位置
        self.forder_label = QLabel('文档位置：')
        self.forder_text = QLineEdit()
        self.forder_text.setDisabled(True)
        self.forder_btn = QPushButton('选择文档位置…')

        # 要替换的关键词
        self.foot_label = QLabel('关键词：')
        self.foot_text = QLineEdit()
        self.foot_text.setPlaceholderText('多个关键词之间使用英文半角,分割')

        # 替换后的关键词
        self.nums_label = QLabel('替换后：')
        self.nums_text = QLineEdit()
        self.nums_text.setPlaceholderText('与关键词一致，一一对应')




        # 确认按钮
        self.submit_btn = QPushButton('开始替换')
        self.submit_btn.setStyleSheet("QPushButton{padding:20px 4px}")

        # 图片label
        self.img_label = QLabel()

        # 复制按钮
        self.copy_btn = QPushButton('复制代码')
        self.copy_btn.setStyleSheet("QPushButton{padding:20px 4px}")
        # 显示生成的文件html
        self.res_teatarea = QTextEdit()

        self.initUi()

    def initUi(self):
        self.createGridGroupBox()
        # self.creatVboxGroupBox()
        self.creatFormGroupBox()
        mainLayout = QVBoxLayout()
        hboxLayout = QHBoxLayout()
        # hboxLayout.addStretch()
        hboxLayout.addWidget(self.gridGroupBox)
        # hboxLayout.addWidget(self.vboxGroupBox)
        mainLayout.addLayout(hboxLayout)
        mainLayout.addWidget(self.formGroupBox)
        self.setLayout(mainLayout)
        # 禁止最大化
        # self.setFixedSize(self.width(), self.height())

    # 参数区域
    def createGridGroupBox(self):
        self.gridGroupBox = QGroupBox("基本参数")
        layout = QGridLayout()

        # 点击选择保存路径按钮
        self.forder_btn.clicked.connect(self.savePath)
        # 点击提交按钮
        self.submit_btn.clicked.connect(self.submit)
        layout.setSpacing(10)
        # 网格布局
        layout.addWidget(self.forder_label, 1, 0)
        layout.addWidget(self.forder_text, 1, 1)
        layout.addWidget(self.forder_btn, 1, 2)

        layout.addWidget(self.foot_label, 2, 0)
        layout.addWidget(self.foot_text, 2, 1, 1, 2)
        layout.addWidget(self.nums_label, 3, 0)
        layout.addWidget(self.nums_text, 3, 1, 1, 2)

        layout.addWidget(self.submit_btn, 4, 0, 1, 3)
        layout.setColumnStretch(1, 10)
        self.gridGroupBox.setLayout(layout)
        self.setWindowTitle('word替换_Anthor By Tony Yu')
        self.setWindowIcon(QIcon(r'E:\site\python\cutimg\favicon.ico'))

    # 图片预览区域，已废弃
    def creatVboxGroupBox(self):
        self.vboxGroupBox = QGroupBox("图片预览")
        layout = QVBoxLayout()
        pixmap = QPixmap(r"E:\site\python\cutimg\default.jpg").scaled(300, 300)
        self.img_label.setPixmap(pixmap)  # 在label上显示图片
        layout.addWidget(self.img_label)
        self.vboxGroupBox.setLayout(layout)

    # 代码区域
    def creatFormGroupBox(self):
        self.formGroupBox = QGroupBox("替换结果")
        layout = QGridLayout()

        layout.addWidget(self.res_teatarea, 1, 0)
        # layout.addWidget(self.copy_btn, 2, 0)
        # 点击选择保存路径按钮
        self.copy_btn.clicked.connect(self.copyText)

        self.formGroupBox.setLayout(layout)

    # 显示消息
    def showMsg(self, tit, content, icon=3):
        box = QMessageBox(QMessageBox.Question, tit, content)
        # 设置左上角消息框图标
        box.setWindowIcon(QIcon(r'E:\site\python\cutimg\favicon.ico'))

        # 添加按钮，可用中文
        yes = box.addButton('确定', QMessageBox.YesRole)
        # 设置消息框中内容前面的图标
        box.setIcon(icon)
        # 显示该问答框
        box.exec()

    # 选择保存文件夹
    def savePath(self):
        path = QFileDialog.getExistingDirectory(self, "请选择您要保存的位置")
        # 判断选择的文件是否存在
        if os.path.exists(path):
            # 将保存url放入路径文本框中
            self.forder_text.setText(path)
        else:
            self.showMsg('错误', '您选择的保存路径不存在，请重新选择！')

    # 提交替换
    def submit(self):
        forder = self.forder_text.text()
        if not forder:
            self.showMsg('错误', '您还没有选择文档所在目录')
            return False

        # 获取要替换的关键词
        before_words = self.foot_text.text()
        # 判断是否填写要替换的关键词
        if before_words:
            before_words = before_words.split(',')
        else:
            self.showMsg('错误', '请填写要替换的关键词')

        # 获取替换后的词
        after_words = self.nums_text.text().split(',')

        # 获取文件列表
        files = self.get_files(forder)

        for file in files:

            doc = docx.Document(file)

            # 判断替换前后关键词长度是否一致(一致就按位置替换关键词，否则使用第一个被替换的额关键词进行替换)
            if len(before_words) == len(after_words):
                for index, word in enumerate(before_words):
                    self.replace_word(doc, word, after_words[index])
            else:
                for word in before_words:
                    self.replace_word(doc, word, after_words[0])
            # 保存word
            doc.save("{}/{}".format(forder, file.split("/")[-1]))
            # 代码框光标移动至末尾
            self.res_teatarea.moveCursor(QTextCursor.End)
            # 每次打印的替换结果
            shtml = "{}替换完成\n".format(file)
            # 将代码粘贴至代码框光标位置
            self.res_teatarea.insertPlainText(shtml)


	# 获取docx文件列表
    def get_files(self, forder):
        files = []
        for file in os.listdir(forder):
            if file.endswith(".docx"): #排除文件夹内的其它干扰文件，只获取word文件
                files.append(forder+'/'+file)
        return files
    # 替换文档关键词
    def replace_word(self, doc, before_words, after_words):
        '''此函数用于批量替换合同中需要替换的信息
        doc:文件
        old_info和new_info：原文字和需要替换的新文字
        '''
        # 读取段落中的所有run，找到需替换的信息进行替换
        for para in doc.paragraphs:  #
            for run in para.runs:
                run.text = run.text.replace(before_words, after_words)  # 替换信息
        # 读取表格中的所有单元格，找到需替换的信息进行替换
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    cell.text = cell.text.replace(before_words, after_words)  # 替换信息


    # 复制代码，已废弃
    def copyText(self):
        # 获取代码框内容
        data = self.res_teatarea.toPlainText()
        # 判断代码框是否有内容
        if data:
            # 如果有内容将内容添加至剪贴板
            clipboard = QApplication.clipboard()
            clipboard.setText(data)
            self.showMsg('信息', '内容以成功复制到剪贴板', 1)
        else:
            self.showMsg('错误', '代码为空，没有代码可以复制')
	# 废弃代码，自由删除
    def cutImg(self, file, head, foot, pre, forder, nums):
        # 打开图片
        img = Image.open(file)
        # 图片宽度/高度
        width, height = img.size
        # 每份高度（减去废弃的头部和底部高度）
        item_height = (height - head - foot) / nums
        for i in range(nums):
            # crop参数：剪切起始点x坐标，起始点y坐标，终点x坐标，终点y坐标
            croped = img.crop((0, (head + i * item_height), width, (i + 1) * item_height + head))
            # 图片名称从1开始
            i += 1
            # 格式化图片名称，图片名称两位数，不足两位前面补0
            if i < 10:
                i = '0{}'.format(i)
            # 格式化图片名称，添加图片名前缀
            imgName = '{}{}.jpg'.format(pre, i)
            # 代码框光标移动至末尾
            self.res_teatarea.moveCursor(QTextCursor.End)
            # 每次打印的图片标签
            shtml = '<img src="{}" />\n'.format(imgName)
            # 将代码粘贴至代码框光标位置
            self.res_teatarea.insertPlainText(shtml)
            # 连接保存路径和图片名
            files = os.path.join(forder, imgName)
            # 保存图片
            croped.save(files)
        # 切割完成提示
        self.showMsg('成功', '图片切割完成，请到目标文件夹查看吧', 1)


if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = ImgCut()
    ex.show()
    sys.exit(app.exec_())

我这里要生成exe的，所以我这里准备了一个入口文件

`main.py`

import sys, os

if hasattr(sys, 'frozen'):
    os.environ['PATH'] = sys._MEIPASS + ";" + os.environ['PATH']

import win
from PyQt5.QtWidgets import QApplication, QMainWindow

if __name__ == '__main__':
    app = QApplication(sys.argv)
    MainWindow = QMainWindow()
    ui = win.ImgCut()
    ui.show()
    sys.exit(app.exec_())

打包`exe`

文件夹原有文件为：

file_version_info.txt #exe打包描述文件，程序属性
main.py # 打包入口文件
tony.ico # exe软件图标
win.py # 主功能文件

file_version_info.txt

# UTF-8
#
# For more details about fixed file info 'ffi' see:
# http://msdn.microsoft.com/en-us/library/ms646997.aspx
VSVersionInfo(
  ffi=FixedFileInfo(
    # filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4)
    # Set not needed items to zero 0.
    filevers=(10, 5, 3819, 400),
    prodvers=(10, 5, 3819, 400),
    # Contains a bitmask that specifies the valid bits 'flags'r
    mask=0x17,
    # Contains a bitmask that specifies the Boolean attributes of the file.
    flags=0x0,
    # The operating system for which this file was designed.
    # 0x4 - NT and there is no need to change it.
    OS=0x4,
    # The general type of file.
    # 0x1 - the file is an application.
    fileType=0x1,
    # The function of the file.
    # 0x0 - the function is not defined for this fileType
    subtype=0x0,
    # Creation date and time stamp.
    date=(0, 0)
    ),
  kids=[
    StringFileInfo(
      [
      StringTable(
        u'040904b0',
        [StringStruct(u'CompanyName', u'Tony'),
        StringStruct(u'FileDescription', u'作者托小尼646547989@qq.com'),
        StringStruct(u'FileVersion', u'1.0'),
        StringStruct(u'InternalName', u'Replace Word'),
        StringStruct(u'LegalCopyright', u'Copyright 2019 Tony. All rights reserved.'),
        StringStruct(u'OriginalFilename', u'replace_word.exe'),
        StringStruct(u'ProductName', u'Word批量替换'),
        StringStruct(u'ProductVersion', u'1.0'),
        StringStruct(u'CompanyShortName', u'Tony'),
        StringStruct(u'ProductShortName', u'Word批量替换'),
        StringStruct(u'LastChange', u'b4ce371876a3dcbfef2affeee4ea9c9163d4628c-refs/branch-heads/3538@{#516}'),
        StringStruct(u'Official Build', u'1'),
        StringStruct(u'SpecialBuild', u'1023'),
        StringStruct(u'PrivateBuild', u'10016'),
        StringStruct(u'KernelBuild', u'129'),
        StringStruct(u'ExpressBuild', u'1'),
        StringStruct(u'KernelVersion', u'70.0.3538.25')])
      ]), 
    VarFileInfo([VarStruct(u'Translation', [1033, 1200])])
  ]
)

执行打包

1	pyinstaller -F -w -i tony.ico --version-file=file_version_info.txt E:\site\python\docx_exc\main.py

测试

生成后的文件夹里多了一个dist目录，其中有一个main.exe可执行文件，我么执行下测试下

经测试，正常可以批量替换，关于打包详细使用，可参考另一个文章：python打包文件之pyinstaller

环境准备

代码展示

win.py

main.py

打包exe

执行打包

测试

`win.py`

`main.py`

打包`exe`