Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop deprecated Page fields: site, agency #777

Merged
merged 5 commits into from
Nov 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions app/assets/data/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,6 @@ paths:
- $ref: '#/parameters/chunk_size'
- $ref: '#/parameters/include_total'
- $ref: '#/parameters/sort'
- name: site
in: query
description: Filter results by site.
required: false
type: string
- name: agency
in: query
description: Filter results by agency.
required: false
type: string
- name: url
in: query
description: Match Page url (may include wildcard *).
Expand Down Expand Up @@ -1425,10 +1415,6 @@ definitions:
http://crawler.archive.org/articles/user_manual/glossary.html#surt
title:
type: string
site:
type: string
agency:
type: string
created_at:
type: string
format: datetime
Expand Down
3 changes: 1 addition & 2 deletions app/controllers/api/v0/pages_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def page_collection
# ) match_tags ON match_tags.inner_t_uuid = pages.uuid
# SQL).all

# TODO: remove agency and site here
collection = Page.where(params.permit(:agency, :site, :title))
collection = Page.where(params.permit(:title))

if params.key?(:capture_time)
collection = where_in_range_param(
Expand Down
2 changes: 0 additions & 2 deletions app/jobs/import_versions_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,7 @@ def page_for_record(record, create: true, row:)
return nil unless page

(record['page_maintainers'] || []).each {|name| page.add_maintainer(name)}
page.add_maintainer(record['site_agency']) if record.key?('site_agency')
(record['page_tags'] || []).each {|name| page.add_tag(name)}
page.add_tag("site:#{record['site_name']}") if record.key?('site_name')

page
end
Expand Down
6 changes: 6 additions & 0 deletions db/migrate/20201111182822_remove_site_and_agency_from_page.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class RemoveSiteAndAgencyFromPage < ActiveRecord::Migration[6.0]
def change
remove_column(:pages, :site, :string)
remove_column(:pages, :agency, :string)
end
end
5 changes: 1 addition & 4 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2020_02_16_215324) do
ActiveRecord::Schema.define(version: 2020_11_11_182822) do

# These are extensions that must be enabled in order to support this database
enable_extension "citext"
Expand Down Expand Up @@ -87,14 +87,11 @@
create_table "pages", primary_key: "uuid", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
t.string "url", null: false
t.string "title"
t.string "agency"
t.string "site"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "url_key"
t.boolean "active", default: true
t.integer "status"
t.index ["site"], name: "index_pages_on_site"
t.index ["url"], name: "index_pages_on_url"
t.index ["url_key"], name: "index_pages_on_url_key"
end
Expand Down
32 changes: 0 additions & 32 deletions lib/tasks/rename_site.rake

This file was deleted.

79 changes: 10 additions & 69 deletions test/controllers/api/v0/imports_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,74 +149,15 @@ def teardown
assert_equal(import_data[1][:page_url], versions[0].capture_url)
end

test 'can import data with deprecated `site_agency`, `site_name` fields' do
import_data = [
{
page_url: 'http://testsite.com/',
title: 'Example Page',
site_agency: 'The Federal Example Agency',
site_name: 'Example Site',
capture_time: '2017-05-01T12:33:01Z',
uri: 'https://test-bucket.s3.amazonaws.com/example-v1',
version_hash: 'f366e89639758cd7f75d21e5026c04fb1022853844ff471865004b3274059686',
source_type: 'some_source',
source_metadata: { test_meta: 'data' }
},
{
page_url: 'http://testsite.com/',
title: 'Example Page',
site_agency: 'The Federal Example Agency',
site_name: 'Example Site',
capture_time: '2017-05-02T12:33:01Z',
uri: 'https://test-bucket.s3.amazonaws.com/example-v2',
version_hash: 'f366e89639758cd7f75d21e5026c04fb1022853844ff471865004b3274059687',
source_type: 'some_source',
source_metadata: { test_meta: 'data' }
}
]

sign_in users(:alice)

perform_enqueued_jobs do
post(
api_v0_imports_path,
headers: { 'Content-Type': 'application/x-json-stream' },
params: import_data.map(&:to_json).join("\n")
)
end

assert_response :success
body_json = JSON.parse(@response.body)
job_id = body_json['data']['id']
assert_equal 'pending', body_json['data']['status']

get api_v0_import_path(id: job_id)
body_json = JSON.parse(@response.body)
assert_equal 'complete', body_json['data']['status']
assert_equal 0, body_json['data']['processing_errors'].length

pages = Page.where(url: 'http://testsite.com/')
assert_equal 1, pages.length
assert_equal import_data[0][:title], pages[0].title

maintainer_names = pages[0].maintainers.pluck(:name)
tag_names = pages[0].tags.pluck(:name).collect(&:downcase)
assert_includes(maintainer_names, 'The Federal Example Agency')
assert_includes(tag_names, 'site:example site')

versions = pages[0].versions
assert_equal 2, versions.length
end

test 'does not add or modify a version if it already exists' do
page_versions_count = pages(:home_page).versions.count
original_data = versions(:page1_v1).as_json
import_data = [
{
page_url: pages(:home_page).url,
page_title: pages(:home_page).title,
site_agency: 'The Federal Example Agency',
site_name: pages(:home_page).site,
page_maintainers: ['The Federal Example Agency'],
page_tags: pages(:home_page).tag_names,
capture_time: versions(:page1_v1).capture_time,
uri: 'https://test-bucket.s3.amazonaws.com/example-v1',
version_hash: 'INVALID_HASH',
Expand Down Expand Up @@ -248,8 +189,8 @@ def teardown
{
page_url: pages(:home_page).url,
page_title: pages(:home_page).title,
site_agency: 'The Federal Example Agency',
site_name: pages(:home_page).site,
page_maintainers: ['The Federal Example Agency'],
page_tags: pages(:home_page).tag_names,
capture_time: versions(:page1_v1).capture_time,
uri: 'https://test-bucket.s3.amazonaws.com/example-v1',
version_hash: 'INVALID_HASH',
Expand Down Expand Up @@ -281,8 +222,8 @@ def teardown
{
page_url: pages(:home_page).url,
page_title: pages(:home_page).title,
site_agency: 'The Federal Example Agency',
site_name: pages(:home_page).site,
page_maintainers: ['The Federal Example Agency'],
page_tags: pages(:home_page).tag_names,
capture_time: versions(:page1_v1).capture_time,
uri: 'https://test-bucket.s3.amazonaws.com/example-v1',
version_hash: 'INVALID_HASH',
Expand Down Expand Up @@ -313,8 +254,8 @@ def teardown
{
page_url: 'testsite',
title: 'Example Page',
site_agency: 'The Federal Example Agency',
site_name: 'Example Site',
page_maintainers: ['The Federal Example Agency'],
page_tags: ['site:Example Site'],
capture_time: '2017-05-01T12:33:01Z',
uri: 'https://test-bucket.s3.amazonaws.com/example-v1',
version_hash: 'f366e89639758cd7f75d21e5026c04fb1022853844ff471865004b3274059686',
Expand Down Expand Up @@ -356,8 +297,8 @@ def teardown
{
page_url: 'http://testsite.com/',
title: 'Example Page',
site_agency: 'The Federal Example Agency',
site_name: 'Example Site',
page_maintainers: ['The Federal Example Agency'],
page_tags: ['site:Example Site'],
capture_time: '2017-05-01T12:33:01Z',
uri: 'http://example.storage/example-v1',
version_hash: 'f366e89639758cd7f75d21e5026c04fb1022853844ff471865004b3274059686',
Expand Down
30 changes: 2 additions & 28 deletions test/controllers/api/v0/pages_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,32 +67,6 @@ def chunk_size_in_url(url)
assert_equal PagingConcern::MAX_PAGE_SIZE, last_size
end

test 'can filter pages by site' do
sign_in users(:alice)
site = 'http://example1.com/'
get "/api/v0/pages/?site=#{URI.encode_www_form_component site}"
body_json = JSON.parse @response.body
ids = body_json['data'].pluck 'uuid'

assert_includes ids, pages(:home_page).uuid,
'Results did not include pages for the filtered site'
assert_not_includes ids, pages(:home_page_site2).uuid,
'Results included pages not matching filtered site'
end

test 'can filter pages by agency' do
sign_in users(:alice)
agency = 'Department of Testing'
get "/api/v0/pages/?agency=#{URI.encode_www_form_component agency}"
body_json = JSON.parse @response.body
ids = body_json['data'].pluck 'uuid'

assert_includes ids, pages(:dot_home_page).uuid,
'Results did not include pages for the filtered agency'
assert_not_includes ids, pages(:home_page_site2).uuid,
'Results included pages not matching filtered agency'
end

test 'can filter pages by title' do
sign_in users(:alice)
title = 'Page One'
Expand All @@ -101,9 +75,9 @@ def chunk_size_in_url(url)
ids = body_json['data'].pluck 'uuid'

assert_includes ids, pages(:home_page).uuid,
'Results did not include pages for the filtered site'
'Results did not include pages for the filtered title'
assert_not_includes ids, pages(:home_page_site2).uuid,
'Results included pages not matching filtered site'
'Results included pages not matching filtered title'
end

test 'can filter pages by URL' do
Expand Down
12 changes: 12 additions & 0 deletions test/fixtures/maintainers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,15 @@ doi:

someone:
name: Someone

dept_of_testing:
name: Department of Testing

dept_of_examples:
name: Department of Examples

dept_of_whatever:
name: Department of Whatever

bureau_of_monitoring:
name: Bureau of Monitoring
4 changes: 4 additions & 0 deletions test/fixtures/maintainerships.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# EXAMPLE:
# home_page__dept_of_examples:
# page: home_page
# maintainer: dept_of_examples
18 changes: 6 additions & 12 deletions test/fixtures/pages.yml
Original file line number Diff line number Diff line change
@@ -1,42 +1,36 @@
# NOTE: do not specify maintainers and tags here; set up the appropriate
# models in `maintainerships.yml` and `taggings.yml`. (We get DB errors about)
# the `created_at` field in the many-to-many models if we try and set
# `maintainers` or `tags` here, which *shouldn't* be the case. Not sure what's
# broken in Rails.

home_page:
url: 'http://example1.com/'
title: 'Page One'
agency: 'Department of Examples'
site: 'http://example1.com/'
status: 200

sub_page:
url: 'http://example1.com/page2.html'
title: 'Page Two'
agency: 'Department of Examples'
site: 'http://example1.com/'

home_page_site2:
url: 'http://example2.com/index.html'
title: 'Site 2, yo!'
agency: 'Department of Examples'
site: 'http://example2.com/'
status: 404

dot_home_page:
url: 'http://depart-of-testing.com/index.html'
title: 'Welcome to the Department of Testing'
agency: 'Department of Testing'
site: 'DOT - Home Site'
status: 403

other_page_one:
url: 'http://example3.com/'
title: 'Page One'
agency: 'Department of Whatever'
site: 'http://example3.com/'
status: 500

home_page_site3:
url: 'http://example3.com/'
title: 'Site 3 Home Page'
agency: 'Bureau of Monitoring'
site: 'http://example3.com/'
status: 200

inactive_page:
Expand Down
19 changes: 19 additions & 0 deletions test/fixtures/taggings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
home_page__site_1:
taggable: home_page
tag: site_1

sub_page__site_1:
taggable: sub_page
tag: site_1

home_page_site2__site_2:
taggable: home_page_site2
tag: site_2

other_page_one__site_3:
taggable: other_page_one
tag: site_3

home_page_site3__site_3:
taggable: home_page_site3
tag: site_3
9 changes: 9 additions & 0 deletions test/fixtures/tags.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,12 @@ frequently_updated:

home_page:
name: 'Home Page'

site_1:
name: 'site:Site1 (http://example1.com/)'

site_2:
name: 'site:Site2 (http://example2.com/)'

site_3:
name: 'site:Site3 (http://example3.com/)'
Loading