-
Notifications
You must be signed in to change notification settings - Fork 1
/
example.rb
25 lines (23 loc) · 907 Bytes
/
example.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# frozen_string_literal: true
#
# An example of parsing hackaday.com
# (C) 2013 Jurriaan Pruis
#
$LOAD_PATH.unshift __dir__
require File.expand_path('lib/docparser.rb', __dir__)
include DocParser
output = MultiOutput.new(filename: 'hackaday')
output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary'
files = Dir[File.join(__dir__, 'test/support/hackaday/*.html')]
parser = Parser.new(files: files, parallel: false, output: output)
parser.parse! do
elements('#content .post').each do |post|
title_el = post.search('.entry-title a').first
title = title_el.content
author = post.search('.post-info .author .fn a').first.content
published_time = post.search('.post-info .date.published').first.content
url = title_el.attributes['href'].value
summary = post.search('.entry-content').first.content.strip
add_row title, author, published_time, url, summary
end
end