一个多月前
工作需要
用GPT手搓了
采集插件
仅支持“标题+链接+单页采集”
满足场景单一
今天换了国产模型豆包
试了几轮
终于能
一键采集全部历史发文
采集数据范围包括
标题、链接、阅读数、点赞数、分享数、推荐数
安装油猴脚本+以下代码即可实现
油猴安装图文教程(教程

    // ==UserScript==

// @name 微信公众号跨页采集工具
// @namespace http://tampermonkey.net/
// @version 1.5.0
// @description 通过localStorage和URL参数实现跨页面采集
// @author doubao
// @match https://mp.weixin.qq.com/cgi-bin/appmsgpublish?sub=list*
// @grant GM_addStyle
// @grant GM_download
// @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js
// @require https://cdn.jsdelivr.net/npm/file-saver@2.0.5/dist/FileSaver.min.js
// ==/UserScript==

(function() {

'use strict';

// 配置参数
const TOTAL_PAGES = 11;
const ARTICLES_PER_PAGE = 10;
const STORAGE_KEY = 'wechat_articles_data';
const PARAM_KEY = 'collect_page';

// 添加样式
GM_addStyle(`
    #wechat-data-collector {
        position: fixed;
        top: 20px;
        right: 20px;
        padding: 10px 20px;
        background-color: #4CAF50;
        color: white;
        border: none;
        border-radius: 4px;
        cursor: pointer;
        z-index: 9999;
    }
    #wechat-data-progress {
        position: fixed;
        top: 70px;
        right: 20px;
        background-color: #fff;
        border: 1px solid #ddd;
        padding: 8px 12px;
        border-radius: 4px;
        z-index: 9998;
        display: none;
    }
`);

// 从URL获取当前采集页
function getCurrentPageFromUrl() {
    const params = new URLSearchParams(window.location.search);
    const page = params.get(PARAM_KEY);
    return page ? parseInt(page) : null;
}

// 从localStorage获取已采集数据
function loadSavedData() {
    const savedData = localStorage.getItem(STORAGE_KEY);
    return savedData ? JSON.parse(savedData) : [];
}

// 保存数据到localStorage
function saveData(data) {
    localStorage.setItem(STORAGE_KEY, JSON.stringify(data));
}

// 清除保存的数据
function clearSavedData() {
    localStorage.removeItem(STORAGE_KEY);
}

// 采集当前页数据
function collectCurrentPage() {
    const articles = [];
    const articleElements = document.querySelectorAll('.weui-desktop-mass-appmsg__bd');

    console.log(`开始采集当前页,找到 ${articleElements.length} 篇文章`);

    articleElements.forEach(element => {
        try {
            const titleElement = element.querySelector('.weui-desktop-mass-appmsg__title');
            const link = titleElement?.href || '';
            const title = titleElement?.textContent.trim() || '无标题';

            // 提取时间
            const timeMatch = link.match(/send_time=(\d+)/);
            const publishTime = timeMatch ?
                new Date(parseInt(timeMatch[1]) * 1000).toLocaleString() : '未知时间';

            // 提取统计数据
            const stats = {
                readCount: element.querySelector('.appmsg-view .weui-desktop-mass-media__data__inner')?.textContent.trim() || '0',
                likeCount: element.querySelector('.appmsg-like .weui-desktop-mass-media__data__inner')?.textContent.trim() || '0',
                shareCount: element.querySelector('.appmsg-share .weui-desktop-mass-media__data__inner')?.textContent.trim() || '0',
                recommendCount: element.querySelector('.appmsg-haokan .weui-desktop-mass-media__data__inner')?.textContent.trim() || '0'
            };

            articles.push({
                标题: title,
                链接: link,
                发文时间: publishTime,
                阅读人数: stats.readCount,
                点赞人数: stats.likeCount,
                分享人数: stats.shareCount,
                推荐人数: stats.recommendCount
            });
        } catch (error) {
            console.error('解析文章数据时出错:', error);
        }
    });

    return articles;
}

// 导航到下一页
function navigateToNextPage(currentPage) {
    const nextPage = currentPage + 1;
    if (nextPage > TOTAL_PAGES) {
        return false; // 已到达最后一页
    }

    const nextBegin = (nextPage - 1) * ARTICLES_PER_PAGE;
    const baseUrl = window.location.origin + window.location.pathname;
    const params = new URLSearchParams(window.location.search);
    params.set(PARAM_KEY, nextPage);
    params.set('begin', nextBegin);

    // 跳转至下一页并携带采集参数
    window.location.href = `${baseUrl}?${params.toString()}`;
    return true;
}

// 导出为XLSX
function exportToExcel(data) {
    const ws = XLSX.utils.json_to_sheet(data);
    const wb = XLSX.utils.book_new();
    XLSX.utils.book_append_sheet(wb, ws, '公众号数据');
    const wbout = XLSX.write(wb, { bookType: 'xlsx', type: 'array' });
    const blob = new Blob([wbout], { type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' });
    const fileName = `公众号数据_${new Date().toISOString().replace(/[:T]/g, '-')}.xlsx`;
    saveAs(blob, fileName);
}

// 创建采集按钮
function createCollectorButton() {
    const button = document.createElement('button');
    button.id = 'wechat-data-collector';
    button.textContent = '开始采集全部数据';
    document.body.appendChild(button);

    const progress = document.createElement('div');
    progress.id = 'wechat-data-progress';
    document.body.appendChild(progress);

    button.addEventListener('click', () => {
        // 清除之前保存的数据
        clearSavedData();

        // 开始从第一页采集
        const params = new URLSearchParams(window.location.search);
        params.set(PARAM_KEY, 1);
        params.set('begin', 0);

        window.location.href = `${window.location.origin}${window.location.pathname}?${params.toString()}`;
    });
}

// 页面加载后初始化
window.addEventListener('load', () => {
    if (!window.location.href.includes('appmsgpublish?sub=list')) {
        return;
    }

    const currentPage = getCurrentPageFromUrl();

    // 创建进度显示元素
    const progress = document.createElement('div');
    progress.id = 'wechat-data-progress';
    document.body.appendChild(progress);

    if (currentPage) {
        // 正在采集流程中
        progress.style.display = 'block';
        progress.textContent = `正在采集第 ${currentPage}/${TOTAL_PAGES} 页...`;

        // 检查是否登录超时
        if (document.title.includes('登录超时')) {
            progress.textContent = '登录超时,请重新扫码登录后再次点击采集按钮';
            return;
        }

        try {
            // 采集当前页数据
            const articles = collectCurrentPage();

            // 加载已保存的数据并添加当前页数据
            const allData = loadSavedData();
            const updatedData = allData.concat(articles);
            saveData(updatedData);

            console.log(`第 ${currentPage} 页采集完成,共 ${articles.length} 篇文章,累计 ${updatedData.length} 篇`);
            progress.textContent = `第 ${currentPage}/${TOTAL_PAGES} 页采集完成,共 ${articles.length} 篇`;

            // 导航到下一页或导出数据
            if (currentPage < TOTAL_PAGES) {
                setTimeout(() => {
                    navigateToNextPage(currentPage);
                }, 1500); // 等待1.5秒确保UI更新
            } else {
                // 所有页采集完成,导出数据
                setTimeout(() => {
                    exportToExcel(updatedData);
                    progress.textContent = `全部 ${TOTAL_PAGES} 页采集完成,共 ${updatedData.length} 篇文章`;
                    clearSavedData(); // 导出后清除数据
                }, 1000);
            }
        } catch (error) {
            progress.textContent = `采集失败: ${error.message}`;
            console.error('采集过程中出错:', error);
        }
    } else {
        // 初始状态,创建采集按钮
        createCollectorButton();
    }
});

})();