
Quizlet API 已经停用一段时间了,但我想从事一个使用他们的学习集的项目。我花了一段时间试图找到一个 API 来使用,但有很多误导性的说法;有人说旧的 API 可以工作,但您需要一个访问令牌(无法生成),有人说它不起作用。我发现我需要在没有 API 的情况下完成额外的工作,并创建一个从 Quizlet 获取 HTML 并从中获取卡片的后端。我确实寻找过
quizlet-fetc 和 quizlet-fetcher 之类的软件包,但它们都没有用。这比预期的要困难得多,因为 Quizlet 似乎在阻止请求,并且 Request failed with status code 403 在我发出请求时不断被记录。以下是我到目前为止的代码,如果有任何建议,我将不胜感激。
const express = require('express');
const app = express();
const axios = require('axios');
const cheerio = require('cheerio');
const https = require('https');
const httpProxy = require('http-proxy');
const proxy = ateProxyServer();
// Proxy server
('error', function(err, req, res) {
(err);
res.writeHead(500, {
'Content-Type': 'text/plain'
});
d('Error');
});
// Allow all origins to access this API
app.use(function(req, res, next) {
res.header('Access-Control-Allow-Origin', '*');
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE');
res.header('Access-Control-Allow-Headers', 'Content-Type');
next();
});
// Fetch the HTML for a Quizlet set
const fetchQuizletSet = async (url) => {
const config = {
headers: {
'Content-Type': 'text/html',
},
httpsAgent: new https.Agent({
rejectUnauthorized: false
})
};
const response = (url, config);
return response.data;
};
// Parse the HTML for a Quizlet set and extract the card data
const parseQuizletSet = (html) => {
const $ = cheerio.load(html);
const title = $("h1.PageTitle-heading").text().trim();
const cards = [];
$(".SetPageTerms-term").each((index, element) => {
const front = $(element).find(".SetPageTerm-wordText").text().trim();
const back = $(element).find(".SetPageTerm-definitionText").text().trim();
const image = $(element).find(".SetPageTerm-media img").attr("src");
const audio = $(element)
.find(".SetPageTerm-media audio source")
.attr("src");
cards.push({ front, back, image, audio });
});
return { title, cards };
};
// Define a route to handle Quizlet set requests
('/quizlet-set/:setId', async (req, res) => {
const setId = req.params.setId;
const url = `/${setId}`;
try {
const html = await fetchQuizletSet(url);
const data = parseQuizletSet(html);
res.json(data);
} catch (error) {
console.log(error);
res.status(500).ssage);
}
});
// Start the server
const PORT = v.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server listening on port ${PORT}`);
});
回答如下:
您发布的代码中的 Axios 无法通过 cloudflare,因此只会给出错误
Request failed with status code 403。它可以像下面的代码一样使用puppeteer-extra来解决。
package.json : 添加以下内容
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
代码:
const express = require('express');
const app = express();
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
// Allow all origins to access this API
app.use(function (req, res, next) {
res.header('Access-Control-Allow-Origin', '*');
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE');
res.header('Access-Control-Allow-Headers', 'Content-Type');
next();
});
// Fetch the HTML for a Quizlet set
const getQuizletSet = async (url) => {
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled',
'--window-size=1920,2700',
'--lang=en-US,en;q=0.9'
]
});
const page = wPage();
await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36');
(url, {
waitUntil: 'networkidle2',
timeout: 30000
});
await page.waitForSelector('script[id=__NEXT_DATA__]');
let raw = await page.$eval('script[id=__NEXT_DATA__]', el => el.textContent);
let parsed = JSON.parse(raw); // check this or dehydratedReduxStateKey below if you need anything more from the page
parsed = parsed.props.pageProps;
let result = null;
try {
const { setTitle, canonicalUrl, socialImageUrl, dehydratedReduxStateKey } = JSON.parse(raw).props.pageProps;
const terms = Object.values(JSON.parse(dehydratedReduxStateKey).IdToTermsMap);
const cards = terms.map(({
word: front,
_wordAudioUrl: frontAudio,
definition: back,
_definitionAudioUrl: backAudio,
_imageUrl: image
}) => ({
front,
frontAudio,
back: place(/[rn]/gm, ''), // remove line breaks
backAudio,
image
}));
result = ({ url: canonicalUrl, socialImg: socialImageUrl, title: setTitle, cards: cards });
} catch (error) {
(error);
}
browser.close();
return result;
};
// Define a route to handle Quizlet set requests
('/quizlet-set/:setId', async (req, res) => {
const setId = req.params.setId;
const url = `quizlet/${setId}`;
try {
const data = await getQuizletSet(url);
res.json(data);
} catch (error) {
console.log(error);
res.status(500).ssage);
}
});
// Start the server
const PORT = v.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server listening on port ${PORT}`);
});
本文发布于:2024-05-30 16:14:16,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/1717391191275543.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
| 留言与评论(共有 0 条评论) |