here’s the HTML code.
<div class="list-row">
<div class="list-item">
<div class="imgframe">
<div class="img-wrap">
<div class="img-item">
<a href="">
<img src="img1">
</a>
<div class="in-lable">
<a href="link1">
<span class="title">title1</span>
</a>
</div>
</div>
<div class="img-item">
<a href="">
<img src="img2">
</a>
<div class="in-lable">
<a href="link2">
<span class="title">title2</span>
</a>
</div>
</div>
</div>
</div>
</div>
</div>
Here’s my puppeteer code.
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch({
headless: false,
targetFilter: (target) => target.type() !== "other",
});
const page = await browser.newPage();
try {
var url = 'https://booktoki351.com/novel?book=%EC%9D%BC%EB%B0%98%EC%86%8C%EC%84%A4';
await page.goto(url, {'timeout': 50000, 'waitUntil':'load'});
await page.waitForSelector('#webtoon-list');
const titlesAndImage = await page.evaluate(() => {
const listItems = Array.from(document.querySelectorAll('.img-item'));
return listItems.map((list) => {
const img = list.querySelector('img').src;
const link = list.querySelector('.in-lable').getAttribute('href');
const title = list.querySelector('.in-lable').textContent;
return { title, img, link };
})
});
} catch (error) {
console.log(error);
} finally {
console.log('done');
await browser.close();
}
})();
And here’s my console.log testing.
Array.from(document.querySelectorAll('.img-item')).map((itemlist) => itemlist.querySelector('img').src);
Array.from(document.querySelectorAll('.img-item span.title')).map((itemlist) => itemlist.textContent);
both are working but I wanted to get them inside 1 map which is inside the img-item or whatever selector can be used based on the example HTML.
I’m expecting an output like this.
[{
title: ‘title1’,
img: ‘img1’,
link: ‘link1’
},
{
title: ‘title2’,
img: ‘img2’,
link: ‘link2’
},
]
2
Answers
Currently this is what I did I separate getting the title and link to image.
Maybe someone can help me merge them together on one process.
The following solution uses puppeteer’s "multi-selector"
$$
to retrieve all elements with classimg-item
with one asynchronous operation. It then starts parallel asynchronous operations perelem
, which compute the DOM representatione
and access its desired properties with synchronous DOM operationsquerySelector
.