快捷導(dǎo)航

基于Python開發(fā)批量提取Excel圖片的小工具

更新時間：2025年03月19日 15:00:27 作者：很菜的小jiang

這篇文章主要為大家詳細介紹了如何使用Python中的openpyxl庫開發(fā)一個小工具,可以實現(xiàn)批量提取Excel圖片,有需要的小伙伴可以參考一下

目前有一個需求，就是批量讀取當前目錄下所有文件夾里的Excel文件，去獲取出Excel文件中的圖片，并根據(jù)圖片對應(yīng)的行去獲取某列的值作為命名方式進行命名，并統(tǒng)一保存在一個新的文件夾里面。

自己花了幾個小時寫了一個小工具出來，利用的是openpyxl這個庫，其他庫用了提取效果不太好，這個提取效果挺不錯的。以下代碼要根據(jù)實際需求，將“貨品編碼”改成你對應(yīng)需要的值。如果你不需要命名規(guī)則，則直接去掉都行。

第一個版本，針對于不規(guī)則分布圖片的Excel，進行每個單元格進行遍歷，比較費時：

import os
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from openpyxl_image_loader import SheetImageLoader
from openpyxl.utils.cell import get_column_letter
from PIL import Image
 
# 創(chuàng)建文件夾
def create_folder():
    if not os.path.exists('images'):
        os.makedirs('images')
    print("成功創(chuàng)建/更新文件夾!")
 
# 獲取當前目錄下的文件夾
def get_folders(directory):
    folders = []
    for entry in os.scandir(directory):
        if entry.is_dir():
            folders.append(entry.name)
    print("成功獲取當前目錄的文件夾！")
    return folders
 
# 提取圖片
def extract_images():
    # 創(chuàng)建存放文件夾
    create_folder()
    # 獲取當前目錄下的文件夾
    folders = get_folders('.')
    i = 1
    num = 1
    # 遍歷當前目錄下的文件夾
    for folder in folders:
        print(f"正在遍歷第{i}個文件夾{folder}......")
        # 進行提取圖片
        num = extract_images_from_excel(folder, num)
        i += 1
 
# 進行提取圖片
def extract_images_from_excel(folder, num):
    # 遍歷當前文件夾內(nèi)的所有文件
    for entry in os.scandir('.\\'+folder):
        # 如果當前對象是文件且后綴是xlsx
        if entry.is_file() and entry.name.endswith('.xlsx'):
            print(f'{folder}下的Excel文件路徑為：{entry.path}')
            # 打開當前文件
            wb = load_workbook(entry.path)
            # 獲取當前xlsx的所有Sheet表
            worksheets = wb.worksheets
            # 遍歷xlsx中每一個Sheet
            for ws in worksheets:
                # 獲取當前列名為貨品編碼的列序號
                code_index = ''
                for column in ws.iter_cols():
                    if column[0].value == "貨品編碼":
                        code_index = column[0].column
                # 創(chuàng)建圖片加載對象
                image_loader = SheetImageLoader(ws)
                # 每一行進行遍歷，獲取行序號和該行數(shù)據(jù)
                for row_index, row in enumerate(ws.rows, start=1):
                    # 每一列進行遍歷
                    for column_index in range(1, len(row) + 1):
                        # 獲取列序號
                        column_letter = get_column_letter(column_index)
                        # 如果當前單元格是圖片
                        if image_loader.image_in(f'{column_letter}{row_index}'):
                            # 獲取圖片
                            image = image_loader.get(f'{column_letter}{row_index}')
                            # 獲取圖片格式
                            image_type = image.format
                            # 獲取當前行的貨品編碼列的值
                            code = ws.cell(row=(row_index), column=code_index).internal_value
                            # 保存圖片(保存命名為 序號_貨品編碼)
                            print(f'正在提取單元格{column_letter}{row_index + 1}的圖片......')
                            image.save(f"./images/[code]_{num}.{image_type}")
                            # 序號遞增
                            num += 1
            # 關(guān)閉文件對象
            wb.close()
    return num
 
 
if __name__ == '__main__':
    print("此版本是針對于圖片分布不規(guī)則的情況，提取圖片速度尚且較慢")
    print("開始提取......")
    # 提取圖片
    extract_images()
    print("提取完成！")

第二個版本，針對于某一列統(tǒng)一分布圖片的Excel，只會進行有圖片那一列的遍歷，比較快速：

import os
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from openpyxl_image_loader import SheetImageLoader
from openpyxl.utils.cell import get_column_letter
from PIL import Image
 
# 創(chuàng)建文件夾
def create_folder():
    if not os.path.exists('images'):
        os.makedirs('images')
    print("成功創(chuàng)建/更新文件夾!")
 
# 獲取當前目錄下的文件夾
def get_folders(directory):
    folders = []
    for entry in os.scandir(directory):
        if entry.is_dir():
            folders.append(entry.name)
    print("成功獲取當前目錄的文件夾！")
    return folders
 
# 提取圖片
def extract_images():
    # 創(chuàng)建存放文件夾
    create_folder()
    # 獲取當前目錄下的文件夾
    folders = get_folders('.')
    i = 1
    num = 1
    # 遍歷當前目錄下的文件夾
    for folder in folders:
        print(f"正在遍歷第{i}個文件夾{folder}......")
        # 進行提取圖片
        num = extract_images_from_excel(folder, num)
        i += 1
 
# 進行提取圖片
def extract_images_from_excel(folder, num):
    # 遍歷當前文件夾內(nèi)的所有文件
    for entry in os.scandir('.\\'+folder):
        # 如果當前對象是文件且后綴是xlsx或者xls
        if entry.is_file() and (entry.name.endswith('.xlsx') or entry.name.endswith('.xls')):
            print(f'{folder}下的Excel文件路徑為：{entry.path}')
            # 打開當前文件
            wb = load_workbook(entry.path)
            # 獲取當前xlsx的所有Sheet表
            worksheets = wb.worksheets
            # 遍歷xlsx中每一個Sheet
            for ws in worksheets:
                # 獲取當前列名為貨品編碼的列序號
                code_index = ''
                for column in ws.iter_cols():
                    if column[0].value == "貨品編碼":
                        code_index = column[0].column
                # 創(chuàng)建圖片加載對象
                image_loader = SheetImageLoader(ws)
                # 記錄第一次遍歷的標志
                img_sign_index = ''
                # 每一行進行遍歷，獲取行序號和該行數(shù)據(jù)
                for row_index, row in enumerate(ws.rows, start=1):
                    # 只有第一次才會進行每列遍歷，去找到圖片所在的列
                    if img_sign_index == '':
                        # 每一列進行遍歷
                        for column_index in range(1, len(row) + 1):
                            # 獲取列序號
                            column_letter = get_column_letter(column_index)
                            if image_loader.image_in(f'{column_letter}{row_index}'):
                                # 獲取對應(yīng)圖片的列序號
                                img_sign_index = column_letter
                                break
                    # 如果不為空，則證明有圖片，反之直接跳過
                    if img_sign_index != '':
                        # 后面遍歷直接去找圖片所在的列
                        image = image_loader.get(f'{img_sign_index}{row_index}')
                        # 獲取圖片格式
                        image_type = image.format
                        # 獲取當前行的貨品編碼列的值
                        code = ws.cell(row=(row_index), column=code_index).internal_value
                        # 保存圖片(保存命名為 序號_貨品編碼)
                        print(f'正在提取單元格{img_sign_index}{row_index + 1}的圖片......')
                        image.save(f"./images/{num}_[code].{image_type}")
                        # 序號遞增
                        num += 1
 
            # 關(guān)閉文件對象
            wb.close()
    return num
 
 
if __name__ == '__main__':
    print("此版本是針對于圖片集中分布在一列的情況，能更快提取圖片出來")
    print("開始提取......")
    # 提取圖片
    extract_images()
    print("提取完成！")

第三個版本更新

此版本不是遍歷單元格，是直接找圖片，再鎖定圖片的中心行位置去找相應(yīng)的貨品編碼，效率更高，而且不會因為圖片位于單元格邊緣存在識別不到的問題。

import os
 
from openpyxl import load_workbook
import os
from openpyxl_image_loader import SheetImageLoader
from openpyxl.utils.cell import get_column_letter
 
# 創(chuàng)建文件夾
def create_folder():
    if not os.path.exists('images'):
        os.makedirs('images')
    print("成功創(chuàng)建/更新文件夾!")
 
# 獲取當前目錄下的文件夾
def get_folders(directory):
    folders = []
    for entry in os.scandir(directory):
        if entry.is_dir():
            folders.append(entry.name)
    print("成功獲取當前目錄的文件夾！")
    return folders
 
# 提取圖片
def extract_images():
    # 創(chuàng)建存放文件夾
    create_folder()
    # 獲取當前目錄下的文件夾
    folders = get_folders('.')
    i = 1
    num = 1
    # 遍歷當前目錄下的文件夾
    for folder in folders:
        print(f"正在遍歷第{i}個文件夾{folder}......")
        # 進行提取圖片
        num = extract_images_from_excel(folder, num)
        i += 1
 
# 進行提取圖片
def extract_images_from_excel(folder, num):
    # 遍歷當前文件夾內(nèi)的所有文件
    for entry in os.scandir('.\\'+folder):
        # 如果當前對象是文件且后綴是xlsx或者xls
        if entry.is_file() and entry.name.endswith('.xlsx'):
            print(f'{folder}下的Excel文件路徑為：{entry.path}')
            # 打開當前文件
            wb = load_workbook(entry.path)
            # 遍歷每一個Sheet
            for sheet_name in wb.sheetnames:
                sheet = wb[sheet_name]
                image_loader = SheetImageLoader(sheet)
                # 獲取當前列名為貨品編碼的列序號
                code_index = ''
                for column in sheet.iter_cols():
                    if column[0].value == "貨品編碼":
                        code_index = column[0].column
                # 遍歷Sheet中的所有圖片
                for image in sheet._images:
                    # 獲取圖片中心行數(shù)，判斷貨品編碼是哪一個
                    row_index = (int(((image.anchor._from.row + 1) + (image.anchor.to.row + 1)) / 2))
                    # 獲取當前行的貨品編碼列的值(取中間值)
                    code = sheet.cell(row=row_index, column=code_index).value
                    # 獲取圖片格式
                    img_format = image.format
                    # 重新將圖片獲取出來(因為獲取下標這個image沒有存儲方法)，直接通過定位左上角坐標將圖片取出來
                    img = image_loader.get(f'{get_column_letter(image.anchor._from.col + 1)}{image.anchor._from.row + 1}')
                    # 保存圖片
                    print(f'正在提取貨品編碼為[code]的圖片{image}......')
                    img.save(f'./images/{num}_[code].{img_format}')
                    # 序號遞增
                    num += 1
 
            # 關(guān)閉文件對象
            wb.close()
    return num
 
# v1.0：此版本是針對于圖片分布不規(guī)則的情況，提取圖片速度尚且較慢
# v1.1：此版本是針對于圖片集中分布在一列的情況，能更快提取圖片出來。
# v1.2：此版本解決圖片位于Excel邊界時存在的問題，只要圖片中心行在這一行，就可以匹配相應(yīng)的國家編碼，同時不用去遍歷，直接獲取圖片。
if __name__ == '__main__':
    print("開始提取......")
    # 提取圖片
    extract_images()
    print("提取完成！")

第四個版本：增加了圖片的壓縮，不需要壓縮的可以直接不調(diào)用壓縮犯法即可，增加了交互，聽取了評論區(qū)大佬的意見，現(xiàn)在可以提取同一單元格多張圖片。

import math
 
from openpyxl import load_workbook
import os
from PIL import Image
 
# 命名規(guī)則
good_code = ""
# 命名字典
name_dict = {}
# 圖片數(shù)量
img_num = 0
# 記錄哪些文件夾已經(jīng)被提取過了
folder_name_dict = {}
# 是否輸出提取文本
is_text = True
 
 
# 創(chuàng)建文件夾
def create_folder():
    if not os.path.exists('images'):
        os.makedirs('images')
    print("成功創(chuàng)建/更新images文件夾!")
 
 
# 提取圖片
def extract_images(stop):
    if stop:
        return
    global img_num
    global good_code
    folder = input("請輸入需要提取的文件夾名稱（不輸入則遍歷當前目錄下未提取過的所有文件夾）：")
    good_code = input("請輸入命名規(guī)則對應(yīng)表格中的名字（不輸入則默認為貨品編碼）：")
    if good_code == "":
        good_code = "貨品編碼"
    if folder != '':
        # 查找指定文件夾
        extract_images_from_excel(folder)
    else:
        folders = []
        for entry in os.scandir('.'):
            if entry.is_dir():
                folders.append(entry.name)
        i = 1
        # 記錄可提取的文件夾的數(shù)量
        number = 0
        # 遍歷當前目錄下的文件夾
        for folder in folders:
            if folder in folder_name_dict:
                continue
            print(f"正在遍歷第{i}個文件夾{folder}......")
            # 進行提取圖片
            extract_images_from_excel(folder)
            number += 1
            i += 1
        if number == 0:
            print("沒有可供提取的文件夾了！")
            return
    is_success()
    img_num = 0
    status = input("\n是否繼續(xù)提?。ㄝ斎隮表示是，輸入其他則退出）：")
    if status == "Y" or status == "y":
        extract_images(False)
    else:
        extract_images(True)
 
 
def is_success():
    if img_num == 0:
        if is_text:
            print(f'沒有提取到圖片！')
    else:
        print(f'成功提取{img_num}張圖片！')
        print("圖片提取完成，請到images文件夾中查看！")
 
 
# 進行提取圖片
def extract_images_from_excel(folder):
    global img_num
    global is_text
    is_have_excel = False
    path = os.path.join('.', folder)
    if not os.path.exists(path):
        print(f'{folder}文件夾未找到！')
        return
    # 判斷文件夾是否已經(jīng)被提取過了
    if folder not in folder_name_dict:
        is_text = True
    else:
        print(f'{folder}文件夾已經(jīng)被提取過了！')
        is_text = False
        return
    try:
        # 遍歷當前文件夾內(nèi)的所有文件
        for entry in os.scandir(path):
            # 如果當前對象是文件且后綴是xlsx或者xls
            if entry.is_file() and entry.name.endswith('.xlsx'):
                is_have_excel = True
                print(f'{folder}下的Excel文件路徑為：{entry.path}')
                # 打開當前文件
                wb = load_workbook(entry.path)
                # 遍歷每一個Sheet
                for sheet_name in wb.sheetnames:
                    sheet = wb[sheet_name]
                    # 獲取當前列名為貨品編碼的列序號
                    code_index = ""
                    for column in sheet.iter_cols():
                        if column[0].value == good_code:
                            code_index = column[0].column
                            break
                    if code_index == "":
                        print(f'列名{good_code}在{entry.path}的文件中不存在！')
                        break
                    else:
                        folder_name_dict[folder] = True
                    # 遍歷Sheet中的所有圖片
                    for image in sheet._images:
                        # 獲取圖片中心行數(shù)，判斷貨品編碼是哪一個
                        row_index = (int(((image.anchor._from.row + 1) + (image.anchor.to.row + 1)) / 2))
                        # 獲取當前行的貨品編碼列的值(取中間值)
                        code = ""
                        if code_index != "":
                            code = str(sheet.cell(row=row_index, column=code_index).value)
                        # 獲取圖片格式
                        img_format = image.format
                        # 這個if else只是命名規(guī)則，不重要
                        if code not in name_dict:
                            name_dict[code] = 1
                        else:
                            name_dict[code] = name_dict[code] + 1
                        save_path = f"./images/[code]-{name_dict[code]}.{img_format}"
                        # 保存
                        file = open(save_path, "wb")
                        file.write(image.ref.getvalue())
                        file.close()
                        # 壓縮圖片
                        compress_and_save_image(save_path)
                        img_num += 1
                    break
                # 關(guān)閉文件對象
                wb.close()
    except FileNotFoundError:
        # 處理文件未找到異常
        print(f'{folder}文件夾未找到！')
        extract_images(good_code)
    except Exception as e:
        # 處理其他異常
        print("提取圖片異常:", e)
    if not is_have_excel:
        print(f'{folder}文件夾內(nèi)未找到Excel文件！')
        folder_name_dict[folder] = True
 
 
# 壓縮圖片
def compress_and_save_image(image_path):
    # 打開原始圖片
    original_image = Image.open(image_path)
    # 檢查文件大小，并根據(jù)需要進行進一步壓縮，壓縮到1M
    if os.path.getsize(image_path) > 1024 * 1024:
        size = os.path.getsize(image_path)
        # 壓縮到1mb需要壓縮的比例(百分比)
        quality = math.floor(((1024 * 1024) / size) * 100)
        original_image.save(image_path, optimize=True, quality=quality)
    original_image.close()
 
 
# v1.0：此版本是針對于圖片分布不規(guī)則的情況，提取圖片速度尚且較慢
# v1.1：此版本是針對于圖片集中分布在一列的情況，能更快提取圖片出來。
# v1.2：此版本解決圖片位于Excel邊界時存在的問題，只要圖片中心行在這一行，就可以匹配相應(yīng)的國家編碼，同時不用去遍歷，直接獲取圖片。
# v1.3：此版本是讓用戶自己輸入指定的文件夾，增加異常交互。
# v1.4：此版本增加了對1MB以上圖片的壓縮，解決了多圖片在同一單元格的問題。
if __name__ == '__main__':
    print("開始提取......")
    # 創(chuàng)建存放文件夾
    create_folder()
    # 提取圖片
    extract_images(False)
    # 最后加入輸入語句，以阻塞程序的執(zhí)行
    input("按下任意鍵以關(guān)閉程序")

以上就是基于Python開發(fā)批量提取Excel圖片的小工具的詳細內(nèi)容，更多關(guān)于Python提取Excel圖片的資料請關(guān)注腳本之家其它相關(guān)文章！

您可能感興趣的文章: