const articles = await page.$$eval('.SearchArticleResult_article__wIpzh', (elements, dateFilter) => {
console.log("Articles before filtering:", elements.length); // Print number of articles before filtering
const allArticles = elements.map(element => {
const urlElement = element; // The element itself is the anchor containing the URL
const titleElement = element.querySelector('h3.type__h5'); // Selector for the title
const sourceElement = element.querySelectorAll('.SearchArticleResult_articleMeta__QpcRj')[0]; // First span for source
const dateElement = element.querySelectorAll('.SearchArticleResult_articleMeta__QpcRj')[1]; // Second span for date
const dateText = dateElement ? dateElement.textContent.trim() : null;
const date = new Date(dateText); // Convert date text to Date object
return {
url: urlElement ? urlElement.href : null,
title: titleElement ? titleElement.textContent.trim() : null,
source: sourceElement ? sourceElement.textContent.trim() : null,
date: date,
dateText: dateText // Keep the original date text for reference
};
});
const filteredArticles = allArticles.filter(article => article.date > new Date(dateFilter));
console.log("Articles after filtering:", filteredArticles.length); // Print number of articles after filtering
return filteredArticles;
}, config.dateFilter); // Pass dateFilter as an argument
log.info(`Found ${articles.length} articles.`);
await Dataset.pushData(articles);
log.info(`Saved articles from current page.`);
// log.info(`Processing ${JSON.stringify(articles)}`);
// Insert articles after scraping
await insertArticles(articles);
This section of code isn’t running. It works when I just have the articles variable without the allarticles variable inside of it.