用python 下载&替换markdown文章中的图床url为本机图片路径，从此再也不怕图床链接失效！

站长

2024年08月01日 11:21 · 阅读数 91

1、问题&困扰

下边这个问题困扰我有段时间了：

2、我的目标

将这类三方图床文件下载到本地
将markdown中的图传连接替换为指向本机的图片路径，如下：

我不可能一个个手动去下载替换，那样士可忍孰不可忍。所以我决定使用python批量替换，基本逻辑是：

找到替换前的markdown文章集合
依次读取文章内容，并使用正则找到图床文件，并下载到本地（会在当前目录创建个专门放图片的文件夹）
替换markdown中的图床链接为本地的图片路径。
另外我还生成了word文件，以作备份。

3、使用python实现

读取指定文件夹中的文章集合，并解析，正则匹配与下载替换：

下载图片 download_util 工具类：



import os
import time

import requests


from util.logger_util import default_logger

logger = default_logger()

default_headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept-Encoding': 'gzip, deflate, br',
    'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': '"macOS"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-User': '?0',
    'Upgrade-Insecure-Requests': '1'
}


def download_pic(pic_path, url, headers=None):
    try:
        if headers is None:
            headers = default_headers
        if url.startswith("http") | url.startswith("https"):
            if os.path.exists(pic_path):
                logger.info("图片已存在，跳过下载：%s" % pic_path)
            else:
                res1 = requests.get(url, headers=headers,verify=False)

                # res1.encoding = "utf8"
                with open(pic_path, "wb+") as f:
                    f.write(res1.content)
        else:
            logger.info("图片链接格式不正确：%s - %s" % (pic_path, url))
        time.sleep(1)
    except Exception as e:
        logger.info("下载异常：{}\n{}".format(url, e))

logger 工具类：


import logging


# 默认日志工具
def default_logger():
    custom_logger = logging.getLogger("CpPythonBox")
    if not custom_logger.hasHandlers():
        handler = logging.StreamHandler()
        handler.setFormatter(logging.Formatter(
            fmt='%(asctime)s %(process)d:%(processName)s- %(levelname)s === %(message)s',
            datefmt="%Y-%m-%d %H:%M:%S %p"))
        custom_logger.addHandler(handler)
        custom_logger.setLevel(logging.INFO)
    return custom_logger

file_util 工具类：


def search_all_file(file_dir=os.getcwd(), target_suffix_tuple=()):
    """ 递归遍历文夹与子文件夹中的特定后缀文件

    Args:
        file_dir (str): 文件目录
        target_suffix_tuple (Tuple(Str)): 文件目录

    Returns:
        list : 文件路径列表
    """
    file_list = []
    # 切换到目录下
    os.chdir(file_dir)
    file_name_list = os.listdir(os.curdir)
    for file_name in file_name_list:
        # 获取文件绝对路径
        file_path = "{}{}{}".format(os.getcwd(), os.path.sep, file_name)
        # 判断是否为目录，是往下递归
        if os.path.isdir(file_path):
            # print("[-]", file_path)
            file_list.extend(search_all_file(file_path, target_suffix_tuple))
            os.chdir(os.pardir)
        elif target_suffix_tuple is not None and file_name.endswith(target_suffix_tuple):
            # print("[!]", file_path)
            file_list.append(file_path)
        else:
            pass
            # print("[+]", file_path)
    return file_list



def write_text_to_file(content, file_path, mode="w+"):
    """ 将文字写入到文件中

    Args:
        content (str): 文字内容
        file_path (str): 写入文件路径
        mode (str): 文件写入模式，w写入、a追加、+可读写

    Returns:
        None
    """
    with lock:
        try:
            with open(file_path, mode, encoding='utf-8') as f:
                f.write(content + "\n", )
        except OSError as reason:
            print(str(reason))



def read_file_text_content(file_path):
    """ 以文本形式读取文件内容

    Args:
        file_path (str): 文件路径

    Returns:
        str: 文件内容
    """
    if not os.path.exists(file_path):
        return None
    else:
        with open(file_path, 'r+', encoding='utf-8') as f:
            return f.read()


def is_dir_existed(file_path, mkdir=True, is_recreate=False):
    """ 判断目录是否存在，不存在则创建

    Args:
        file_path (str): 文件路径
        mkdir (bool): 不存在是否新建
        is_recreate (bool): 存在是否删掉重建

    Returns:
        默认返回None，如果mkdir为False返回文件是否存在
    """
    if mkdir:
        if not os.path.exists(file_path):
            os.makedirs(file_path)
        else:
            if is_recreate:
                delete_file(file_path)
                if not os.path.exists(file_path):
                    os.makedirs(file_path)
    else:
        return os.path.exists(file_path)