快捷導(dǎo)航

Python查找大文件的實(shí)用腳本分享

更新時(shí)間：2024年11月04日 11:09:10 作者：zhongcx01

這篇文章主要為大家分享一個(gè)利用Python查找大文件的實(shí)用腳本,文中的示例代碼講解詳細(xì),感興趣的小伙伴可以跟隨小編一起學(xué)習(xí)一下

C盤滿了，寫了一個(gè)python腳本，2分多鐘能找到比較大的文件，然后手動(dòng)刪除或者遷移D盤，最后發(fā)現(xiàn)是微信小程序開(kāi)發(fā)工具緩存文件太多了，騰出來(lái)10個(gè)G念頭通達(dá)了，這里備份一下腳本。

運(yùn)行工具：PyCharm 2024.1.3 (Community Edition)

完整代碼

import os
import threading
import time
import sys
from threading import Event
 
def is_large(file_path, threshold_mb):
    """判斷文件大小是否超過(guò)指定MB閾值"""
    return os.path.getsize(file_path) / (1024 * 1024) > threshold_mb
 
def show_loading_animation(stop_event, interval=0.5):
    """顯示簡(jiǎn)易的文本加載動(dòng)畫，直到接收到停止信號(hào)"""
    loading_chars = ['.', '..', '...', '....']
    total_cycles = int(interval * 10)
    cycle_length = 10
 
    for _ in range(total_cycles):
        for char in loading_chars:
            for _ in range(cycle_length):
                sys.stdout.write('\r正在查找大文件... ' + char)
                sys.stdout.flush()
                time.sleep(interval / cycle_length)
            sys.stdout.write('\r正在查找大文件... ' + loading_chars[0])
            sys.stdout.flush()
 
def filter_files(files, skip_file_keywords, include_file_keywords, extension=None):
    """根據(jù)文件名關(guān)鍵詞和擴(kuò)展名過(guò)濾文件列表"""
    filtered_files = [file for file in files if (not skip_file_keywords or all(keyword not in file for keyword in skip_file_keywords)) and
                      (not include_file_keywords or any(keyword in file for keyword in include_file_keywords))]
    if extension is not None:
        filtered_files = [file for file in filtered_files if file.endswith('.' + extension)]
    return filtered_files
 
def filter_dirs(dirs, skip_dir_keywords, include_dir_keywords):
    """根據(jù)目錄名關(guān)鍵詞過(guò)濾目錄列表"""
    return [dir for dir in dirs if (not skip_dir_keywords or all(keyword not in dir for keyword in skip_dir_keywords)) and
             (not include_dir_keywords or any(keyword in dir for keyword in include_dir_keywords))]
 
def get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension=None, interval=0.5):
    """查找目錄下所有大于指定大小的文件，同時(shí)跳過(guò)或僅包括特定關(guān)鍵詞的文件夾及文件名稱，并顯示加載動(dòng)畫直到完成"""
    start_time = time.time()
    stop_event = Event()
    large_files = []
    loading_thread = threading.Thread(target=show_loading_animation, args=(stop_event, interval))
    loading_thread.daemon = True
    loading_thread.start()
 
    try:
        for root, dirs, files in os.walk(dir_path):
            dirs[:] = filter_dirs(dirs, skip_dir_keywords, include_dir_keywords)
            filtered_files = filter_files(files, skip_file_keywords, include_file_keywords, extension)
            for file in filtered_files:
                full_path = os.path.join(root, file)
                try:
                    if is_large(full_path, threshold_mb):
                        file_info = {'path': full_path, 'size': os.path.getsize(full_path) / 1024 / 1024}
                        large_files.append(file_info)
                except Exception as e:
                    print(f"警告訪問(wèn)文件出錯(cuò) {full_path} 出錯(cuò)信息: {e}")
 
    finally:
        stop_event.set()
        loading_thread.join()
    large_files.sort(key=lambda x: x['size'], reverse=True)
    for file_info in large_files:
        print(f"文件路徑: {file_info['path']} | 文件大小: {file_info['size']:.2f} MB")
 
    end_time = time.time()
    print(f"\n查找共耗時(shí): {end_time - start_time:.2f} 秒")
 
def main():
    dir_path = input("請(qǐng)輸入要檢查的目錄路徑: ")
    try:
        threshold_mb = float(input("請(qǐng)輸入文件大小閾值(單位: MB): "))
        skip_dir_keywords = input("請(qǐng)輸入要跳過(guò)的文件夾名關(guān)鍵詞，用逗號(hào)分隔(直接回車跳過(guò)，推薦modules,~~,.gradle): ").split(',')
        skip_file_keywords = input("請(qǐng)輸入要跳過(guò)的文件名關(guān)鍵詞，用逗號(hào)分隔(直接回車跳過(guò)，推薦$): ").split(',')
        include_dir_keywords = input("請(qǐng)輸入要包含的文件夾名關(guān)鍵詞，用逗號(hào)分隔(直接回車跳過(guò)): ").split(',')
        include_file_keywords = input("請(qǐng)輸入要包含的文件名關(guān)鍵詞，用逗號(hào)分隔(直接回車跳過(guò)): ").split(',')
        extension = input("請(qǐng)輸入要篩選的文件擴(kuò)展名(例如：txt，可選，直接回車跳過(guò)): ").strip('.') or None
        get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension)
        print("搜索結(jié)束.")
    except ValueError:
        print("錯(cuò)誤：請(qǐng)輸入有效的數(shù)字作為文件大小閾值.")
    except OSError as e:
        print(e)
 
if __name__ == '__main__':
    main()

方法補(bǔ)充

除了上文的方法，小編還為大家整理了其他Python查找大文件的方法，希望對(duì)大家有所幫助

完整代碼如下

#! python3
#chapter09-test02.py - 找出一個(gè)文件夾內(nèi)的大文件，并打印出大文件的絕對(duì)路徑<br>#-----為了防止運(yùn)行時(shí)間過(guò)長(zhǎng)，我把程序設(shè)置為了只檢查前1000個(gè)超過(guò)size的文件，他們并不是最大的1000個(gè)
 
import os,pprint,sys
import timeit,time
 
 
#裝飾器--計(jì)算程序運(yùn)行時(shí)間
def colocked_decorator(func):
    def colock(*args):
        startTime=timeit.default_timer()
        result=func(*args)  #運(yùn)行程序
        spendTime=timeit.default_timer()-startTime
        name=func.__name__  #獲取程序名字
        arg_str=','.join(repr(arg) for arg in args) #注意不是*args  組成程序參數(shù)的字符串
        print('[0.7fs] %s(%s) '%(spendTime,name,arg_str),end='')
        print('%r',result)
        return result
    return colock
 
#尋找指定文件夾內(nèi)的的大文件
#返回包含所有大文件的絕對(duì)地址的一個(gè)列表
#folder-指定的文件夾地址
#size-閾值，超過(guò)這個(gè)為大文件
@colocked_decorator
def findBigFile(folder,size):
    bigFileAbs=[]
    for foldername,subfolders,filenames in os.walk(folder):
        #對(duì)文件進(jìn)行遍歷
        for filename in filenames:
            #.getsize(path)必須是完整路徑
            fileAbs=os.path.join(foldername,filename)
            if os.path.getsize(fileAbs)>size and len(bigFileAbs)<100:   
                #fileAbs=os.path.join(foldername,filename)
                fileAbs=os.path.abspath(fileAbs)
                bigFileAbs.append(fileAbs)
    return bigFileAbs
 
#定義一個(gè)函數(shù)用來(lái)將尺寸變?yōu)镵B、MB這樣的單位，但是沒(méi)有在這個(gè)程序中使用
#size-是os.getsize()返回的文件尺寸數(shù)值
#is_1024_byte 代表以1024去轉(zhuǎn)化還是1000去轉(zhuǎn)化，默認(rèn)是1024
#先定義的后綴
SUFFIXES = {1000:['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'],
            1024:['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']}
def humanReadable_size(size,is_1024_byte=True):
    #mutiple默認(rèn)是1000
    mutiple=1000 if is_1024_byte else 1024
    #與for遍歷結(jié)合起來(lái)，這樣來(lái)進(jìn)行遞級(jí)的轉(zhuǎn)換
    for suffix in SUFFIXES[mutiple]:
        size/=mutiple
        #直到Size小于能往下一個(gè)單位變的數(shù)值
        if size<mutiple:
            return '{0:.1f}{1}'.format(size,suffix)
    raise ValueError('number too large')
 
         
 
path='F:\DCIM'
size=1000000    #設(shè)定的閾值
#先判斷路徑是否存在
if os.path.exists(path):   
    resultList=findBigFile(path,size)
    pprint.pprint(resultList)
     
else:
    print('You enter path does not exist')
    sys.exit()

到此這篇關(guān)于Python查找大文件的實(shí)用腳本分享的文章就介紹到這了,更多相關(guān)Python查找大文件內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: