1
0
mirror of https://github.com/vbalien/voca.git synced 2025-12-06 19:36:20 +09:00

refactoring

This commit is contained in:
2022-02-26 03:16:54 +09:00
parent 0bc8aee9ff
commit 0467dd90e1
17 changed files with 47320 additions and 165 deletions

52
src/crawl.ts Normal file
View File

@@ -0,0 +1,52 @@
import { path } from "./deps.ts";
import { JSONType, Voca } from "./types.ts";
const __dirname = path.dirname(path.fromFileUrl(import.meta.url));
async function getVocaList(levels: number[], day: number): Promise<Voca[]> {
const re =
/<p class="word">(?:\d+?)\. (?<word>.+?)<\/p>.*?<span class="af_answer">(?<answer>.+?)<\/span>/gis;
const res = await fetch(
`https://www.hackers.co.kr/?c=s_toeic/new_voca_toeic_testpaper/toeic_study/new_paper&mode=new_view&level=${
levels.join(",")
}&level_type=&lang_text=2&question=1000&day3=${day}&day4=${day}&day_auto=N&index=1`,
);
const body = await res.text();
const matches = [...body.matchAll(re)].map((m) => ({
...(m.groups as unknown as Voca),
}));
return matches;
}
const levels = [6, 7, 8, 9];
const days = [...Array(30)].map((_, i) => i + 1);
const result: JSONType = { data: [] };
for (const level of levels) {
for (const day of days) {
console.log(`GET: ${level}-${day}`);
const voca_list = await getVocaList([level], day);
voca_list.sort((a, b) => {
const wordA = a.word.toUpperCase();
const wordB = b.word.toUpperCase();
if (wordA < wordB) {
return -1;
}
if (wordA > wordB) {
return 1;
}
return 0;
});
result.data.push({
level,
day,
voca_list,
});
}
}
Deno.writeTextFileSync(
path.join(__dirname, "../doc/", "voca.json"),
JSON.stringify(result),
);