From 514263c01cd5404831bce61fdebd3f315a285539 Mon Sep 17 00:00:00 2001 From: Tom Dooner Date: Tue, 15 Sep 2020 10:54:58 -0700 Subject: [PATCH] Add `last_pushed_within` field to project index For multiple types of our users (CfA Staff, Brigade Leader, Project Leaders), being able to tell which projects are still active is a crucial aspect of the index. In #26 we discuss using a bucketed approach so as to not create unnecessary noise by committing the timestamp for every update. This commit implements a coarse timestamp: for projects updated within the last week, month, year, or over a year ago. --- crawler/run.js | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/crawler/run.js b/crawler/run.js index 891d3ec8ff2..83e72af325a 100755 --- a/crawler/run.js +++ b/crawler/run.js @@ -24,6 +24,23 @@ const githubAxios = axios.create({ : null }); +// Calculate the bucket for when the project was most recently pushed +const ONE_WEEK = 60 * 60 * 24 * 7; +const ONE_MONTH = 60 * 60 * 24 * 30; +const ONE_YEAR = 60 * 60 * 24 * 365; +function lastPushedWithin(repoPushedAt) { + const repoPushedDate = new Date(repoPushedAt); + if (repoPushedDate > new Date() - ONE_WEEK) { + return 'week'; + } else if (repoPushedDate > new Date() - ONE_MONTH) { + return 'month'; + } else if (repoPushedDate > new Date() - ONE_YEAR) { + return 'year'; + } else { + return 'over_a_year'; + } +} + require('yargs') .command({ command: '$0', @@ -381,7 +398,8 @@ async function loadGithubOrgProjects(repo, username) { git_url: repo.git_url, git_branch: repo.default_branch, link_url: repo.homepage || null, - topics: repo.topics.length ? repo.topics : null + topics: repo.topics.length ? repo.topics : null, + last_pushed_within: lastPushedWithin(repo.pushed_at), }; const toml = GitSheets.stringifyRecord(projectData); const blob = await tree.writeChild(`${repo.name}.toml`, toml); @@ -632,6 +650,8 @@ async function loadFeedProjects(repo, projectsListUrl) { if (projectData.topics) { projectData.topics = projectData.topics.sort(); } + + projectData.last_pushed_within = lastPushedWithin(response.data.pushed_at) } catch (err) { if (err.response && err.response.status == 404) { projectData.flags = [ 'github_404' ]