-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
107 lines (90 loc) · 2.99 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
const puppeteer = require("puppeteer");
const download = require("image-downloader");
const sss = require("shortid");
let scrape = async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
const pages = [
"https://www.imdb.com/search/name?gender=male",
"https://www.imdb.com/search/name/?gender=male&start=51"
// "https://www.imdb.com/search/name/?gender=male&start=101",
// "https://www.imdb.com/search/name/?gender=male&start=151",
// "https://www.imdb.com/search/name/?gender=male&start=201",
// "https://www.imdb.com/search/name/?gender=male&start=251",
// "https://www.imdb.com/search/name/?gender=male&start=301",
// "https://www.imdb.com/search/name/?gender=male&start=351",
// "https://www.imdb.com/search/name/?gender=male&start=401",
// "https://www.imdb.com/search/name/?gender=male&start=451"
];
const fullArray = await loopPages(pages);
await browser.close();
return fullArray;
async function loopPages(pages) {
let all = [];
for (let i = 0; i < pages.length; i++) {
await new Promise(async resolve => {
await page.goto(pages[i]);
await page.waitFor(1000);
const result = await getFromPage(page);
let a = result.slice(0, 1);
let b = result.slice(0, 3);
await loopActorsInList(a, b);
all.push(...b);
resolve();
});
}
return all;
}
async function getFromPage(page) {
return await page.evaluate(async () => {
let data = []; // Create an empty array
let elements = document.querySelectorAll(".lister-item.mode-detail");
Array.from(elements).map(item => {
// get name and link from every item
let name = item.querySelector(".lister-item-header a").innerText;
let link = item.querySelector(".lister-item-header a").href;
data.push({
name,
link
});
});
return data;
});
}
async function loopActorsInList(abc, result) {
for (let i = 0; i < abc.length; i++) {
await new Promise(async resolve => {
let actorPage = await browser.newPage();
await actorPage.goto(abc[i].link); //
const linkImage = await actorPage.evaluate(() => {
let image = document.querySelector("#name-poster").src;
return image;
});
result[i].linkImage = linkImage;
result[i].id = result[i].link.split("/").pop();
await actorPage.close();
await downloadIMG(result[i]);
resolve();
});
}
}
async function downloadIMG(item) {
try {
const { id, linkImage, name } = item;
const path = `./b/${id}-${name.split(" ")[0]}.jpg`;
const { filename } = await download.image({
url: linkImage,
dest: path
});
console.log(filename); // => /path/to/dest/image.jpg
} catch (e) {
console.error(e);
}
}
};
scrape().then(value => {
console.log("start");
console.log(value); // Success!
});