spiderdemo简单题

T1:请求头控制

首先开始抓包,发现一直显示"爬虫模式",不知道哪儿出现了问题
但是看标题说是请求头的反爬,就先注释掉一些请求头,发现不带 cache-control 这个字段就能够拿到数据了
挺简单的,代码如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import httpx
from loguru import logger
import numpy as np
headers = {
    "accept": "application/json, text/javascript, */*; q=0.01",
    "accept-language": "zh-CN,zh;q=0.9",
    # "cache-control": "no-cache",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://www.spiderdemo.cn/sec1/header_check/",
    "sec-ch-ua": "\"Chromium\";v=\"140\", \"Not=A?Brand\";v=\"24\", \"Google Chrome\";v=\"140\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"Windows\"",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
    "x-requested-with": "XMLHttpRequest"
}
cookies = {
    "sessionid": ""
}

params = {
    "challenge_type": "header_check"
}
sum = 0
for page in range(1, 101):
    url = f"https://www.spiderdemo.cn/sec1/api/challenge/page/{page}/"
    response = httpx.get(url, headers=headers, cookies=cookies, params=params)
    page_data = response.json()['page_data']
    logger.info(f"page: {page}, {page_data}")
    page_data = np.array(page_data)
    sum += page_data.sum()

    logger.info(response)

logger.success(f"sum: {sum}")

T21:摘要算法

调试之后发现有 4个加密参数,请求头中的 x-request-tokenx-verify-code,以及请求参数中的 signcode,跟栈

这个都有点明显了,都是从 o 里面取值的,o 又是调用 r 方法得到的,直接扣下来
里面的算法,直接调库
js 代码如下,写个接口调就行:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
const express = require('express');
const {sha3_256} = require('js-sha3');
const CryptoJS = require("crypto-js")
// 创建Express应用实例
const app = express();

const o = "spiderdemo_sha_salt_2025";
function r(e, t, n) {
    const s = `${e}_${t}_${n}`
        , a = (r = s,
    c = "spiderdemo_hmac_secret_2025",
    CryptoJS.HmacSHA256(r, c).toString());
    var r, c;
    const i = function(e) {
        return CryptoJS.MD5(e).toString()
    }(s + "spiderdemo_md5_salt_2025")
        , l = function(e) {
        return CryptoJS.SHA256(e).toString()
    }(s + o)
        , u = function(e) {
        return sha3_256(e)
    }(s + o);
    return {
        request_token: a,
        verify_code: i,
        sign: l,
        code: u
    }
}


app.use(express.json());
app.use(express.urlencoded({ extended: true }));
app.get('/api/get-sign', (req, res) => {
    try {
        const page = req.query.page ? parseInt(req.query.page) : 1;
        const salt = "hash_challenge";
        const timestamp = req.query.timestamp ? req.query.timestamp : Date.now();
        
        const signData = r(page, salt, timestamp);
        
        res.status(200).json({
            code: 200,
            message: "签名生成成功",
            data: signData
        });
    } catch (error) {
        res.status(500).json({
            code: 500,
            message: "签名生成失败",
            error: error.message
        });
    }
});
const PORT = 3000;
app.listen(PORT, () => {
    console.log(`服务器已启动,访问地址:http://localhost:${PORT}`);
    console.log(`示例GET请求:http://localhost:${PORT}/api/get-sign?page=1&timestamp=1735689600000`);
});