Typecho博文迁移到Hugo平台简易py脚本

2025-05-30

Sandeepin计划将Typecho的博客文章迁移到Hugo平台上，在网上找了很多教程，要么太复杂，要么不适合自己的需求。想到自己的博客就是个人写，没多少篇，也不需要什么信息都完美迁移，只要把关键的内容带过来就行啦。想来想去，还是自己写个脚本处理下最快，借助AI，一下午折腾好，python脚本也贴到博客收藏，方便后续再用。

Python脚本名：typecho2hugo.py

内容如下，注意修改账号密码、表名、适配内容微调，如果是服务器上的账号密码，注意脱敏：

import os
import datetime
import shutil
import mysql.connector

id_category_dict = dict()


def timestamp_to_time(timestamp):
    # 将整数时间戳转换为日期字符串
    dt = datetime.datetime.fromtimestamp(int(timestamp))
    date_str = dt.strftime("%Y-%m-%d %H:%M:%S")
    return date_str


def content_handle(content):
    # 文章正文优化，去除markdown、more标签
    content = (content.replace("<!--markdown-->", "")
               .replace("<！--more-->", "")   # 请将中文！替换为英文!
               # .replace("\r\n\r\n", "\r\n")  # 去重不必要换行，太粗暴有副作用，暂不启用
               # .replace("\n\n", "\n")
               )
    return content


def save_markdown_article(title, content, slug, created, category):
    # 生成文章md文件
    time = timestamp_to_time(created)
    date = time[:10]
    year = date[:4]
    os.makedirs('post', exist_ok=True)
    filename = f"{date}_{title}.md"
    file_path = os.path.join('post', filename)
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(f"---\n")
        file.write(f"title: {title}\n")
        file.write(f"date: {time}\n")
        file.write(f"categories: [\"{category}\"]\n")
        file.write(f"tags: [\"Code\"]\n")
        file.write(f"url: \"/{year}/{slug}.html\"\n")
        file.write(f"---\n\n")
        file.write(content_handle(content))


def save_content_form_database(config):
    """从数据库中读取内容"""
    # 连接数据库
    cnx = mysql.connector.connect(**config)
    cursor = cnx.cursor()
    # 查询文章对应的分类名，注意改为自己的表名
    query = "SELECT r.cid, m.name FROM blog_metas m JOIN blog_relationships r ON m.mid = r.mid"
    cursor.execute(query)
    for item in cursor:
        id_category_dict[item[0]] = item[1]
    # 查询文章，注意改为自己的表名
    query = "SELECT cid, title, slug, created, text FROM blog_contents"
    cursor.execute(query)
    # 读取数据库内容
    i = 0
    for item in cursor:
        category = id_category_dict.get(item[0], '未分类')
        title = item[1]
        slug = item[2]
        created = item[3]
        text = item[4]
        save_markdown_article(title, text, slug, created, category)
        print(f"{i} item: {title}")
        i = i + 1
    # 关闭连接
    cursor.close()
    cnx.close()


if __name__ == "__main__":
    # 配置数据库连接信息
    config = {
        'user': 'root',
        'password': 'root',
        'host': 'localhost',
        'database': 'blog',
        'raise_on_warnings': True
    }
    # 删除post文件夹（如果存在）
    if os.path.exists('post'):
        shutil.rmtree('post')
    # 从数据库读文件并保存
    save_content_form_database(config)