快捷導(dǎo)航

Python實(shí)現(xiàn)獲取帶合并單元格的表格數(shù)據(jù)

更新時(shí)間：2025年05月18日 13:34:40 作者：tainyu

由于在日常運(yùn)維中經(jīng)常出現(xiàn)一些合并單元格的表格,如果要獲取數(shù)據(jù)比較麻煩,所以本文我們就來(lái)聊聊如何使用Python實(shí)現(xiàn)獲取帶合并單元格的表格數(shù)據(jù)吧

由于在日常運(yùn)維中經(jīng)常出現(xiàn)一些合并單元格的表格，如果要獲取數(shù)據(jù)比較麻煩，現(xiàn)將將封裝成類，并通過(guò)調(diào)用list_excel_data()獲取列表形式的數(shù)據(jù), dict_excel_data()：獲取字典格式的數(shù)據(jù)。

當(dāng)以字典形式獲取數(shù)據(jù)時(shí)要注意，默認(rèn)以第一行作為字典的key。

代碼如下：

from openpyxl import load_workbook
 
 
class Get_table_data():
    """
    對(duì)帶有合并單元格的表格數(shù)據(jù)進(jìn)行處理
    """
    def __init__(self,sh):
        """
        定義初始傳入的表
        :param sh: 表
        """
        self.sh = sh
 
    def get_row_col(self):
        """
        # 獲取表格的行、列信息
        :param sh: 表
        :return:
        """
        # title = sh.title  #  獲取sheet名稱
        max_row_num = self.sh.max_row  # 獲取最大行數(shù)
        max_col_num = self.sh.max_column  # 獲取最大列數(shù)
        # min_row_num=sh.min_row  # 獲取最小行數(shù)
        # min_col_num = sh.min_column  # 獲取最小列數(shù)
        return max_row_num,max_col_num
 
    # 獲取合并的單元格的坐標(biāo)信息及合并的單元格行、列數(shù)
    def get_merge_data(self):
        """
        通過(guò)獲取的合并單元格的轉(zhuǎn)換成特定的格式
        :return: 合并的單元格的索引信息
        """
        # 查詢?cè)搒heet表單所有合并單元格
        merge_lists = self.sh.merged_cells
        # print('merge_lists',merge_lists)
        merge_all_list = []  # 接收最終內(nèi)容并返回
        # 遍歷合并單元格
        for merge_list in merge_lists:
            # 獲取單個(gè)合并單元格的起始行(row_min)終止行(row_max)和起始列(col_min)終止列(col_max)
            row_min, row_max, col_min, col_max = merge_list.min_row, merge_list.max_row, merge_list.min_col, merge_list.max_col
            # 這里判斷如果合并單元格起始、終止的行和列都不相等，說(shuō)明合并單元格既合并了行又合并了列，兩個(gè)for循環(huán)依次取出行列位置分別存在x,y中
            if row_min != row_max and col_min != col_max:
                row_col = [(x, y) for x in range(row_min, row_max + 1) for y in range(col_min, col_max + 1)]
                merge_all_list.append(row_col)  # 取出的值存在列表中
            # 這里判斷如果合并單元格起始、終止行相等，起始、終止列不相等，說(shuō)明合并單元格只合并了列，所以行不動(dòng)，只循環(huán)取出列的值，存在y中，行可以隨意取row_min/row_max
            elif row_min == row_max and col_min != col_max:
                row_col = [(row_min, y) for y in range(col_min, col_max + 1)]
                merge_all_list.append(row_col)  # 取出的值存在列表中
            # 這里判斷如果合并單元格起始、終止行不相等，起始、終止列相等，說(shuō)明合并單元格只合并了行，所以列不動(dòng)，只循環(huán)取出行的值，存在x中，列可以隨意取col_min/col_max
            elif row_min != row_max and col_min == col_max:
                row_col = [(x, col_min) for x in range(row_min, row_max + 1)]
                merge_all_list.append(row_col)  # 取出的值存在列表中
        return merge_all_list  # 最終返回列表
        # 得到的是個(gè)這樣的列表值：[[(2, 1), (3, 1)], [(10, 1), (10, 2), (10, 3), (11, 1), (11, 2), (11, 3)]]
 
    def merge_values(self,merge_cell):  # 傳入一個(gè)元組入?yún)?
        """
        處理合并單元格,返回合并的單元格數(shù)值
        :param merge_cell: 合并的單元格信息，以內(nèi)嵌二元組的列表形式
        :return: 返回單元格數(shù)值
        """
        # 循環(huán)取出合并單元格方法得到的值（這個(gè)值還是列表），檢查傳入的參數(shù)是不是在這些值里面
        for i in range(0, len(merge_cell)):
            # 獲取合并單元格的值：合并單元格左上角的第一個(gè)行列坐標(biāo)的值
            cell_value = self.sh.cell(row=merge_cell[i][0][0], column=merge_cell[i][0][1]).value
            return cell_value
 
    def list_excel_data(self):
        """
        按列表格式獲取表中所有行數(shù)據(jù)
        :return: 按行以列表嵌套的格式
        """
        merge_list = self.get_merge_data()  # 獲取表格合并的單元格的信息
        merge_list_all = sum(merge_list,[])     # 將合并的單元格轉(zhuǎn)換成一個(gè)大列表
        table_value = []
        for row in range(1,self.sh.max_row + 1):
            row_value = []  # 定義一個(gè)空列表存放有數(shù)據(jù)的行數(shù)據(jù)
            for col in range(1,self.sh.max_column + 1):
                cell_data = (row, col)
                if cell_data in merge_list_all:
                    row_value.append(self.merge_values(merge_list))      # 是合并單元格，則調(diào)用合并單元格數(shù)值獲取函數(shù)
                else:   # 不在，說(shuō)明不是合并單元格，使用普通單元格方法獲取即可
                    row_value.append(self.sh.cell(*cell_data).value)
            table_value.append(row_value)
        return table_value
 
    def dict_excel_data(self):
        """
        按字典格式顯示表中數(shù)據(jù)
        :return: 按行以字典嵌套列表的格式
        """
        merge_list = self.get_merge_data()  # 獲取表格合并的單元格的信息
        merge_list_all = sum(merge_list, [])  # 將合并的單元格轉(zhuǎn)換成一個(gè)大列表
        list_val = []
        for row in range(1, self.sh.max_row + 1):
            if row > 1:  # 第二行開(kāi)始
                dict_val = {}  # 定義一個(gè)空字典存放數(shù)據(jù)
                for col in range(1, self.sh.max_column + 1):
                    title_row = (1, col)     # 表格的第一行：標(biāo)題欄
                    cell_data = (row, col)
                    if cell_data in merge_list_all:
                        # 是合并單元格，則調(diào)用合并單元格數(shù)值獲取函數(shù)
                        dict_val[self.merge_values(merge_list)] = self.merge_values(merge_list)
                    else:  # 不在，說(shuō)明不是合并單元格，使用普通單元格方法獲取即可
                        dict_val[self.sh.cell(*title_row).value] = self.sh.cell(*cell_data).value
                list_val.append(dict_val)
        return list_val
 
# 讀取excel表
wb = load_workbook('shebei.xlsx')
# 獲取指定的sheet
sheet_sb = wb['sheet']
 
c = Get_table_data(sheet_sb)        # 創(chuàng)建獲取表格數(shù)據(jù)對(duì)象
 
print(c.dict_excel_data())     # 字典格式
# print(c.list_excel_data())     # 列表格式

知識(shí)延展

Python使用xlrd實(shí)現(xiàn)讀取合并單元格

操作方法：

1.使用xlrd自帶屬性：merged_cells

# 獲取表格中所有合并單元格位置，以列表形式返回 （起始行，結(jié)束行，起始列，結(jié)束列）
merged = sheet.merged_cells #結(jié)果：[(1,5,0,1),(5,9,0,1)]

2.使用循環(huán)判斷是合并單元格還是普通單元格，并將合并單元格中的首行值賦值給合并單元格

def get_cell_type(row_index, col_index):
"""既能得到合并單元格也能得到普通單元格"""
cell_value = None
for (rlow, rhigh, clow, chigh) in merged: # 遍歷表格中所有合并單元格位置信息
# print(rlow,rhigh,clow,chigh)
if (row_index >= rlow and row_index < rhigh): # 行坐標(biāo)判斷
if (col_index >= clow and col_index < chigh): # 列坐標(biāo)判斷
# 如果滿足條件，就把合并單元格第一個(gè)位置的值賦給其它合并單元格
cell_value = sheet.cell_value(rlow, clow)
print('合并單元格')
break # 不符合條件跳出循環(huán)，防止覆蓋
else:
print('普通單元格')
cell_value = sheet.cell_value(row_index, col_index)
# else: 添加改行后只那一個(gè)單元格的內(nèi)容5，0 會(huì)返回2個(gè)值普通單元格/合并單元格
# print('普通單元格')
# cell_value = sheet.cell_value(row_index, col_index)
return cell_value
# 直接輸入單元格的坐標(biāo)。來(lái)獲取單元格內(nèi)容
# print(get_cell_type(5, 0))
# 利用循環(huán)輸出某列的單元格內(nèi)容
for i in range(1, 9):
print(get_cell_type(i, 2))

PS：最簡(jiǎn)單的讀取Excel文件中合并單元格操作

問(wèn)題：

1.當(dāng)輸出內(nèi)容時(shí)，使用坐標(biāo)來(lái)獲取print，若最外層有else會(huì)返回2個(gè)值（還在確認(rèn)若無(wú)最外層else是否會(huì)有其他問(wèn)題存在）

2.第一次使用時(shí)可以正常，再次使用時(shí)sheet.merged_cells返回列表為空？？

解決方法：在打開(kāi)文件中加入formatting_info=True，就能正常顯示

python 讀取excel 并處理被合并單元格的數(shù)據(jù)

以下代碼僅是示例，視情況優(yōu)化調(diào)整

from openpyxl import load_workbook
from openpyxl.cell import MergedCell
import time
 
import pandas as pd
 
 
def excel_to_md(file_path, output_file_path):
    # 替換原數(shù)據(jù)中的\r\n
    def replace_value(str):
        return str.replace('\n', '').replace('\r', '') if str else ''
    start = time.time()
    # 使用 openpyxl的load_workbook 讀取excel單元格屬性 合并單元格數(shù)據(jù)組
    merged_xls = pd.ExcelFile(load_workbook(file_path), engine="openpyxl")
    # 使用 pandas的read_excel 讀取數(shù)據(jù)
    with pd.ExcelFile(file_path, engine="openpyxl") as xls:
        with open(output_file_path, 'w') as f:
            for sheet_name in xls.sheet_names:
                df = pd.read_excel(xls, sheet_name=sheet_name,engine="openpyxl")
                df.dropna(axis=1, how='all', inplace=True)
                df.dropna(axis=0, how='all', inplace=True)
                # print(f'sheet_name: {sheet_name},開(kāi)始獲取 合并單元格集合 ')
                # cells_time = time.time()
                sheet = merged_xls.book[sheet_name]
                merged_cells = sheet.merged_cells
                # print(f'sheet_name: {sheet_name},存在 合并的單元格 {len(merged_cells.ranges)if merged_cells else 0}個(gè)')
                for item in merged_cells:
                    top_col, top_row, bottom_col, bottom_row = item.bounds
                    base_value = replace_value(item.start_cell.value)
                    # 1-based index轉(zhuǎn)為0-based index
                    top_row -= 1
                    top_col -= 1
                    df.iloc[top_row:bottom_row, top_col:bottom_col] = base_value
                # print(f'sheet_name: {sheet_name},給合并的單元格賦值 完成 %.5f sec' %(time.time()-cells_time))
                # 將空單元格賦值為空字符
                df = df.fillna('')
                # 開(kāi)始是寫(xiě)入
                f.write(f'# {sheet_name}\n')
                for index, row in df.iterrows():
                    # 處理空表頭讀取為 Unnamed: 0 替換成 Unnamed-0
                    row_str = ';'.join([f'{str(col).replace(": ","-") if "Unnamed:" in str(col) else col}:{row[col]}' for col in df.columns])
                    replace_value(row_str)
                    f.write(f'{row_str}\n')
    merged_xls.close()
    print(file_path+' 執(zhí)行時(shí)間  : %.5f sec' %(time.time()-start))
 
excel_to_md('test.xlsx','test.xlsx.md')

到此這篇關(guān)于Python實(shí)現(xiàn)獲取帶合并單元格的表格數(shù)據(jù)的文章就介紹到這了,更多相關(guān)Python獲取表格數(shù)據(jù)內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: