效果(主要使用charles和python实现)

批量获取kuwo的所有歌手数据，共169页
歌手列表

保存的数据

操作步骤

1、使用抓包工具Charles 的mirror功能

工具：Charles 的mirror

点击菜单栏Tools，弹出菜单选择Mirror，点开mirror窗口后，设置保存路径和url

2、然后打开浏览器访问kuwo歌手的列表页，就可以在目录中看到下载文件了

3、rename.py对下载的文件进行批量重命名

注意：rename.py 与下载的歌手数据的目录结构，rename.py 应与存放歌手数据文件的目录名同目录，这里是 artist_11

rename.py 代码


import os
import re
import urllib.parse
import sys


# 下载后的歌手文件，批量重命名
#例如：
#原名字：artistInfo%3fcategory%3d0%26prefix%3d%26pn%3d1%26rn%3d60%26httpsStatus%3d1%26reqId%3d3b128760-1ce3-11f1-b0ff-171b6be4e54a%26plat%3dweb_www%26from%3d
#新名字：pn1.json
def rename_files(directory='.'):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if not os.path.isfile(filepath):
            continue

        # 先尝试直接匹配编码后的 pn%3d数字
        match = re.search(r'pn%3d(\d+)', filename, re.IGNORECASE)
        if not match:
            # 如果匹配不到，尝试解码后匹配 pn=数字
            decoded = urllib.parse.unquote(filename)
            match = re.search(r'pn=(\d+)', decoded, re.IGNORECASE)

        if match:
            page_num = match.group(1)
            new_name = f'pn{page_num}.json'
            new_path = os.path.join(directory, new_name)

            # 如果新文件名已存在，添加数字后缀避免覆盖
            counter = 1
            while os.path.exists(new_path):
                base, ext = os.path.splitext(new_name)
                new_name = f'{base}_{counter}{ext}'
                new_path = os.path.join(directory, new_name)
                counter += 1

            os.rename(filepath, new_path)
            print(f'Renamed: {filename} -> {new_name}')

if __name__ == '__main__':
    target_dir = sys.argv[1] if len(sys.argv) > 1 else './artist_11'
    rename_files(target_dir)

使用python，自动调用浏览器模拟自动点击页码，这里只点击到169页

from selenium import webdriver  
from selenium.webdriver.common.by import By  
from selenium.webdriver.support.ui import WebDriverWait  
from selenium.webdriver.support import expected_conditions as EC  
from selenium.webdriver.common.action_chains import ActionChains
import time
import sys
import json
import re

from selenium.webdriver.common.keys import Keys

import undetected_chromedriver as uc
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import pandas as pd

import select
import os
import hashlib

def openMiguHtml(miguUrl):
    driverpath = './res/chrom_driver/chromedriver-mac-x64/chromedriver'
    chromepath = "./res/chrom/chrome-mac-x64/chrometest.app/Contents/MacOS/GoogleChromeTesting";
    profile = './res/profile/Default'
    os.makedirs(profile, exist_ok=True)
    chrome_options = Options()
    chrome_options.add_argument(f"--user-data-dir={profile}")
    driver = uc.Chrome(options=chrome_options, driver_executable_path=driverpath, browser_executable_path=chromepath)
    driver.get(miguUrl)
    return driver

def scrollToElement(driver, element) :
    driver.execute_script("arguments[0].scrollIntoView();", element)


# 模拟点击元素
def clickElement(driver, element) :
    scrollToElement(driver, element)
    time.sleep(1)
    # 获取鼠标指针的当前位置  
    mouse_position = driver.execute_script("return {x: window.screenX, y: window.screenY}")  
    # 构建鼠标点击的动作链  
    actions = ActionChains(driver)  
    actions.move_to_element(element)  # 将鼠标指针移动到div元素的位置  
    actions.click()  # 触发点击事件 
    # 执行动作链并等待点击事件完成  
    actions.perform() 

#等待用户确认函数
def wait_for_enter(prompt="按回车键继续..."):
    """等待用户按下回车键，并清除输入缓冲区"""
    try:
        input(prompt)
    except EOFError:  # 处理没有控制台的情况
        pass
    # 清除可能存在的多余输入
    sys.stdin.flush()

def wait_element_appeared(driver, xpath_text) :

    print(f"\n{xpath_text}\n")
    times = 1 
    while True:
        time.sleep(1)
        try:
            #driver.find_element(By.XPATH,'//span[contains(text(),"编辑")]')
            ele = driver.find_element(By.XPATH, xpath_text)
            times += 1 
            return ele;
        except Exception as e:
            print(f"等待标签出现{times}秒")
            times+=1;
            if times > 60 :
            	print("60秒结束等待")
            	return None


import time
import random

def click_next_page_in_loop(driver):
    """
    循环点击下一页按钮，每次点击后随机等待5~10秒
    :param driver: selenium的WebDriver实例
    """
    # 下一页按钮的XPath
    next_page_xpath = "//i[@class='li-page iconfont icon-icon_pagedown']"
    
    i = 1 
    while True:
        try:
            if i >= 169 :
                print("循环结束")
                break;
                
            print(f"开始点击第 {i+1} 页")
            next_ele = wait_element_appeared(driver, "//i[@class='li-page iconfont icon-icon_pagedown']")
            # 随机等待5~10秒
            wait_time = random.randint(5, 10)
            print(f"等待 {wait_time} 秒后继续...")
            time.sleep(wait_time)

            clickElement(driver, next_ele)

            i = i + 1;
            
        except Exception as e:
            # 捕获所有异常（元素不存在/超时/点击失败等），终止循环
            print(f"循环终止：{str(e)}")
            break

driver = openMiguHtml('https://www.kuwo.cn/singers')
click_next_page_in_loop(driver)

kidsmiless

kuwo批量获取所有歌手信息过程记录

效果(主要使用charles和python实现)

操作步骤

1、使用抓包工具Charles 的mirror功能

2、然后打开浏览器访问kuwo歌手的列表页，就可以在目录中看到下载文件了

3、rename.py对下载的文件进行批量重命名

使用python，自动调用浏览器模拟自动点击页码，这里只点击到169页