Skip to content

Commit

Permalink
Revert "Gets sitelist from database"
Browse files Browse the repository at this point in the history
This reverts commit c6b1029.
  • Loading branch information
olrafa committed Oct 30, 2023
1 parent c6b1029 commit f11dc1f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 19 deletions.
41 changes: 41 additions & 0 deletions constants.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
export const SEARCH_TERMS = ["Neymar"];

export const WEBSITES = [
{ mediaOutlet: "UOL", url: "http://www.uol.com.br" },
{ mediaOutlet: "Folha de S. Paulo", url: "https://www.folha.uol.com.br/" },
{ mediaOutlet: "Estado de S. Paulo", url: "https://www.estadao.com.br/" },
{ mediaOutlet: "Globo", url: "https://www.globo.com/" },
{ mediaOutlet: "G1", url: "https://g1.globo.com/" },
{ mediaOutlet: "O Globo", url: "https://oglobo.globo.com/" },
{ mediaOutlet: "R7", url: "https://www.r7.com/" },
{ mediaOutlet: "Jornal do Brasil", url: "https://www.jb.com.br/" },
{ mediaOutlet: "Globo Esporte", url: "https://globoesporte.globo.com/" },
{ mediaOutlet: "ESPN", url: "https://www.espn.com.br/" },
{ mediaOutlet: "SporTV", url: "https://sportv.globo.com/" },
{ mediaOutlet: "TNT Sports", url: "https://tntsports.com.br/" },
{ mediaOutlet: "CNN Brasil", url: "https://www.cnnbrasil.com.br/" },
{ mediaOutlet: "Veja", url: "https://veja.abril.com.br/" },
{ mediaOutlet: "Terra", url: "https://www.terra.com.br/" },
{ mediaOutlet: "IG", url: "https://www.ig.com.br/" },
{ mediaOutlet: "Nexo", url: "https://www.nexojornal.com.br/" },
{ mediaOutlet: "Metrópoles", url: "https://www.metropoles.com/" },
{ mediaOutlet: "O Dia", url: "https://odia.ig.com.br/" },
{
mediaOutlet: "Diário do Nordeste",
url: "https://diariodonordeste.verdesmares.com.br/",
},
{
mediaOutlet: "Correio Braziliense",
url: "https://www.correiobraziliense.com.br/",
},
{ mediaOutlet: "Estado de Minas", url: "https://www.em.com.br/" },
{ mediaOutlet: "O Povo", url: "https://www.opovo.com.br/" },
{ mediaOutlet: "Zero Hora", url: "https://gauchazh.clicrbs.com.br/" },
{
mediaOutlet: "Diário de Pernambuco",
url: "https://www.diariodepernambuco.com.br/",
},
{ mediaOutlet: "Quem", url: "https://revistaquem.globo.com/" },
{ mediaOutlet: "Caras", url: "https://caras.uol.com.br/" },
{ mediaOutlet: "Contigo", url: "https://contigo.uol.com.br/" },
];
24 changes: 5 additions & 19 deletions index.mjs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import puppeteer from "puppeteer";
import { client } from "./config.mjs";
import { SEARCH_TERMS, WEBSITES } from "./constants.mjs";

const { launch } = puppeteer;

export const SEARCH_TERMS = ["Neymar"];

const scrapeWebsiteForTerm = async (mediaOutlet, url, searchTerm) => {
const browser = await launch({ headless: "new" });
const page = await browser.newPage();
Expand Down Expand Up @@ -45,27 +44,14 @@ const scrapeWebsiteForTerm = async (mediaOutlet, url, searchTerm) => {
}
};

const runScrapingSequentially = async (siteList) => {
const runScrapingSequentially = async () => {
for (const searchTerm of SEARCH_TERMS) {
for (const { site, url } of siteList) {
await scrapeWebsiteForTerm(site, url, searchTerm);
for (const { mediaOutlet, url } of WEBSITES) {
await scrapeWebsiteForTerm(mediaOutlet, url, searchTerm);
}
}
console.log("Search finished at", new Date());
process.exit();
};

const getSites = async () => {
try {
const response = await fetch("https://neymarmeter.vercel.app/sites");
if (!response.ok) {
throw new Error("Something went wrong with the request.");
}
const siteList = await response.json();
siteList && setTimeout(() => runScrapingSequentially(siteList), 5000);
} catch (error) {
console.error("Error:", error);
}
};

getSites();
setTimeout(() => runScrapingSequentially(), 5000);

0 comments on commit f11dc1f

Please sign in to comment.