Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce audit for broken backlinks SITES-18417 #77

Merged
merged 25 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
af5e795
feat: introduce backlink audit
dzehnder Jan 12, 2024
e4b310a
test: adding unit tests
dzehnder Jan 12, 2024
adef814
chore: ahrefs-client
iuliag Jan 16, 2024
2e9b731
chore: ahrefs-client
iuliag Jan 16, 2024
26034bc
chore: adjust reporting fields to reqs
iuliag Jan 17, 2024
af4fc56
chore: rename
iuliag Jan 17, 2024
2f27f52
chore: coverage
iuliag Jan 17, 2024
0ab70ad
chore: check site and disabled, store audit result
iuliag Jan 17, 2024
245e13b
chore: merge branch 'main' of github.com:adobe-rnd/spacecat-audit-wor…
iuliag Jan 18, 2024
87c2b31
chore: error
iuliag Jan 18, 2024
8b4182c
chore: review
iuliag Jan 18, 2024
0235043
chore: merge branch 'main' of github.com:adobe-rnd/spacecat-audit-wor…
iuliag Jan 18, 2024
7122242
chore: update dep
iuliag Jan 18, 2024
5bdb401
chore: merge branch 'main' of github.com:adobe-rnd/spacecat-audit-wor…
iuliag Jan 18, 2024
a035fa0
chore: update dep
iuliag Jan 18, 2024
02789f0
chore: update dep
iuliag Jan 19, 2024
aa6d982
chore: log number of broken backlinks
iuliag Jan 19, 2024
3305363
chore: log number of broken backlinks
iuliag Jan 19, 2024
3998d20
chore: for the sake of testing
iuliag Jan 19, 2024
2f57602
fix: www and non-www
iuliag Jan 19, 2024
0c1e71a
chore: review
iuliag Jan 22, 2024
cffb2b4
chore: review
iuliag Jan 22, 2024
c9aa8e0
chore: audit result with multiple urls (www, non-www) should be object
iuliag Jan 23, 2024
0c1c0c5
chore: merge branch 'main' of github.com:adobe/spacecat-audit-worker …
iuliag Jan 23, 2024
a517fd1
chore: merge branch 'main' of github.com:adobe/spacecat-audit-worker …
iuliag Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/backlinks/handler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright 2024 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import {
internalServerError, noContent, notFound, ok,
} from '@adobe/spacecat-shared-http-utils';
import AhrefsAPIClient from '../support/ahrefs-client.js';
import { retrieveSiteBySiteId } from '../utils/data-access.js';

export default async function auditBrokenBacklinks(message, context) {
const { type, url: siteId, auditContext } = message;
const { dataAccess, log, sqs } = context;
const {
AUDIT_RESULTS_QUEUE_URL: queueUrl,
} = context.env;

try {
log.info(`Received ${type} audit request for siteId: ${siteId}`);

const site = await retrieveSiteBySiteId(dataAccess, siteId, log);
if (!site) {
return notFound('Site not found');
}

const auditConfig = site.getAuditConfig();
if (auditConfig.auditsDisabled()) {
log.info(`Audits disabled for site ${siteId}`);
return ok();
}

if (auditConfig.getAuditTypeConfig(type)?.disabled()) {
log.info(`Audit type ${type} disabled for site ${siteId}`);
return ok();
}

const ahrefsAPIClient = AhrefsAPIClient.createFrom(context);
const data = await ahrefsAPIClient.getBrokenBacklinks(siteId);

const auditResult = {
brokenBacklinks: data.backlinks,
};
const auditData = {
iuliag marked this conversation as resolved.
Show resolved Hide resolved
siteId: site.getId(),
auditedAt: new Date().toISOString(),
auditResult,
};

await dataAccess.addAudit(auditData);

await sqs.sendMessage(queueUrl, {
type,
url: siteId,
auditContext,
auditResult,
});

log.info(`Successfully audited ${siteId} for ${type} type audit`);
return noContent();
} catch (e) {
return internalServerError(`Internal server error: ${e.message}`);
}
}
2 changes: 2 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ import apex from './apex/handler.js';
import cwv from './cwv/handler.js';
import lhs from './lhs/handler.js';
import notfound from './notfound/handler.js';
import backlinks from './backlinks/handler.js';

const HANDLERS = {
apex,
cwv,
'lhs-mobile': lhs,
'lhs-desktop': lhs,
404: notfound,
'broken-backlinks': backlinks,
};

/**
Expand Down
86 changes: 86 additions & 0 deletions src/support/ahrefs-client.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
iuliag marked this conversation as resolved.
Show resolved Hide resolved
* Copyright 2024 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { isValidUrl } from '@adobe/spacecat-shared-utils';
import { fetch } from './utils.js';

export default class AhrefsAPIClient {
static createFrom(context) {
const { AHREFS_API_BASE_URL: apiBaseUrl, AHREFS_API_KEY: apiKey } = context.env;
return new AhrefsAPIClient({ apiBaseUrl, apiKey });
}

constructor(config) {
const { apiKey, apiBaseUrl } = config;

if (!isValidUrl(apiBaseUrl)) {
throw new Error(`Invalid Ahrefs API Base URL: ${apiBaseUrl}`);
}

this.apiBaseUrl = apiBaseUrl;
this.apiKey = apiKey;
}

async sendRequest(endpoint, queryParams = {}) {
const queryParamsKeys = Object.keys(queryParams);
const queryString = queryParamsKeys.length > 0
? `?${queryParamsKeys
.map((key) => `${encodeURIComponent(key)}=${encodeURIComponent(queryParams[key])}`)
.join('&')}` : '';

const response = await fetch(`${this.apiBaseUrl}${endpoint}${queryString}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.apiKey}`,
},
});

if (!response.ok) {
throw new Error(`Ahrefs API request failed with status: ${response.status}`);
}

try {
return await response.json();
} catch (e) {
throw new Error(`Error parsing Ahrefs API response: ${e.message}`);
}
}

async getBrokenBacklinks(url) {
const filter = {
and: [
{ field: 'is_dofollow', is: ['eq', 1] },
{ field: 'is_content', is: ['eq', 1] },
{ field: 'domain_rating_source', is: ['gte', 29.5] },
{ field: 'traffic_domain', is: ['gte', 500] },
{ field: 'links_external', is: ['lte', 300] },
],
};

const queryParams = {
select: [
'title',
'url_from',
'url_to',
].join(','),
limit: 50,
mode: 'prefix',
order_by: 'traffic_domain:desc',
target: url,
output: 'json',
where: JSON.stringify(filter),
};

return this.sendRequest('/site-explorer/broken-backlinks', queryParams);
}
}
154 changes: 154 additions & 0 deletions test/audits/backlinks.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Copyright 2024 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

/* eslint-env mocha */

import { createSite } from '@adobe/spacecat-shared-data-access/src/models/site.js';

import chai from 'chai';
import chaiAsPromised from 'chai-as-promised';
import sinon from 'sinon';
import sinonChai from 'sinon-chai';
import nock from 'nock';
import auditBrokenBacklinks from '../../src/backlinks/handler.js';

chai.use(sinonChai);
chai.use(chaiAsPromised);
const { expect } = chai;

describe('Backlinks Tests', () => {
let message;
let context;
let mockLog;
let mockDataAccess;

const sandbox = sinon.createSandbox();

const siteData = {
id: 'site1',
baseURL: 'https://adobe.com',
};

const site = createSite(siteData);

const auditResult = {
backlinks: [
{
title: 'backlink title',
url_from: 'url-from',
url_to: 'url-to',
},
{
title: 'backlink title 2',
url_from: 'url-from-2',
url_to: 'url-to-2',
},
],
};

beforeEach(() => {
mockDataAccess = {
getSiteByID: sinon.stub().resolves(site),
addAudit: sinon.stub(),
};

message = {
type: 'broken-backlinks',
url: 'site1',
};

mockLog = {
info: sinon.spy(),
warn: sinon.spy(),
error: sinon.spy(),
};

context = {
log: mockLog,
env: {
AHREFS_API_BASE_URL: 'https://ahrefs.com',
AHREFS_API_KEY: 'ahrefs-token',
AUDIT_RESULTS_QUEUE_URL: 'queueUrl',
},
dataAccess: mockDataAccess,
sqs: {
sendMessage: sandbox.stub().resolves(),
},
};
});

afterEach(() => {
nock.cleanAll();
sinon.restore();
});

it('should successfully perform an audit to detect broken backlinks', async () => {
nock('https://ahrefs.com')
.get(/.*/)
.reply(200, auditResult);

const response = await auditBrokenBacklinks(message, context);

expect(response.status).to.equal(204);
expect(mockDataAccess.addAudit).to.have.been.calledOnce;
expect(context.sqs.sendMessage).to.have.been.calledOnce;
expect(context.log.info).to.have.been.calledWith('Successfully audited site1 for broken-backlinks type audit');
});

it('returns a 404 when site does not exist', async () => {
mockDataAccess.getSiteByID.resolves(null);

const response = await auditBrokenBacklinks(message, context);

expect(response.status).to.equal(404);
});

it('returns a 200 when site audits are disabled', async () => {
const siteWithDisabledAudits = createSite({
...siteData,
auditConfig: { auditsDisabled: true },
});

mockDataAccess.getSiteByID.resolves(siteWithDisabledAudits);

const response = await auditBrokenBacklinks(message, context);

expect(response.status).to.equal(200);
expect(mockLog.info).to.have.been.calledTwice;
expect(mockLog.info).to.have.been.calledWith('Audits disabled for site site1');
});

it('returns a 200 when audits for type are disabled', async () => {
const siteWithDisabledAudits = createSite({
...siteData,
auditConfig: { auditsDisabled: false, auditTypeConfigs: { 'broken-backlinks': { disabled: true } } },
});

mockDataAccess.getSiteByID.resolves(siteWithDisabledAudits);

const response = await auditBrokenBacklinks(message, context);

expect(response.status).to.equal(200);
expect(mockLog.info).to.have.been.calledWith('Audit type broken-backlinks disabled for site site1');
});

it('should handle audit api errors gracefully', async () => {
nock('https://ahrefs.com')
.get(/.*/)
.reply(500);

const response = await auditBrokenBacklinks(message, context);

expect(response.status).to.equal(500);
expect(context.sqs.sendMessage).to.not.have.been.called;
});
});
Loading
Loading