From 8f0bfa0017026b7ca5710f5799a7d11885b863e4 Mon Sep 17 00:00:00 2001 From: takatea Date: Sat, 13 Jan 2024 19:17:39 +0900 Subject: [PATCH 1/4] update example of latest_user_agents Mechanize::AGENT_ALIASES can easily update by using examples/latest_user_agents.rb example: ``` require './examples/latest_user_agents' agent = LatestUAFetcher.new updated_user_agents = agent.user_agents.dup.delete_if do |key, value| Mechanize::AGENT_ALIASES[key] == value end pp updated_user_agents ``` --- examples/latest_user_agents.rb | 70 +++++++++++++++------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/examples/latest_user_agents.rb b/examples/latest_user_agents.rb index 5773d003..0cfb1194 100644 --- a/examples/latest_user_agents.rb +++ b/examples/latest_user_agents.rb @@ -1,5 +1,7 @@ require 'mechanize' +# LatestUAFetcher fetches latest user agents from `WhatIsMyBrowser.com`. +# It can use to update `Mechanize::AGENT_ALIASES`. class LatestUAFetcher attr_reader :user_agents @@ -11,36 +13,31 @@ def initialize end def run + return unless user_agents.empty? + sleep_time = 1 - puts 'get chrome UA...' - chrome - puts "sleeping... (#{sleep_time}s)" - sleep 1 + fetch_user_agents('chrome') + fetch_user_agents('firefox') + fetch_user_agents('safari') + fetch_user_agents('edge') + end - puts 'get firefox UA...' - firefox - puts "sleeping... (#{sleep_time}s)" - sleep 1 + private - puts 'get safari UA...' - safari + def fetch_user_agents(browser_name, sleep_time = 1) + puts "fetch #{browser_name} UA..." + send(browser_name) puts "sleeping... (#{sleep_time}s)" - sleep 1 - - puts 'get edge UA...' - edge + sleep sleep_time end - private - def edge page = @agent.get("#{BASE_URL}/edge") windows_dom = page.css("h2:contains('Latest Edge on Windows User Agents')") - @user_agents[:edge] = { - windows: windows_dom.css('+ .listing-of-useragents .code').first.text - } + + @user_agents['Windows Edge'] = windows_dom.css('+ .listing-of-useragents .code').first.text end def firefox @@ -49,11 +46,9 @@ def firefox desktop_dom = page.css("h2:contains('Latest Firefox on Desktop User Agents')") table_dom = desktop_dom.css('+ .listing-of-useragents') - @user_agents[:firefox] = { - windows: table_dom.css('td:contains("Windows")').css('+ td .code').text, - macOS: table_dom.css('td:contains("Macos")').css('+ td .code').text, - linux: table_dom.css('td:contains("Linux")').css("+ td .code:contains('Ubuntu; Linux x86_64')").text - } + @user_agents['Linux Firefox'] = table_dom.css('td:contains("Linux")').css("+ td .code:contains('Ubuntu; Linux x86_64')").text + @user_agents['Windows Firefox'] = table_dom.css('td:contains("Windows")').css('+ td .code').text + @user_agents['Mac Firefox'] = table_dom.css('td:contains("Macos")').css('+ td .code').text end def safari @@ -62,30 +57,25 @@ def safari macos_dom = page.css("h2:contains('Latest Safari on macOS User Agents')") ios_dom = page.css("h2:contains('Latest Safari on iOS User Agents')") - @user_agents[:safari] = { - mac_os: macos_dom.css('+ .listing-of-useragents .code').first.text, - iphone: ios_dom.css('+ .listing-of-useragents').css("tr:contains('Iphone') .code").text, - ipad: ios_dom.css('+ .listing-of-useragents').css("tr:contains('Ipad') .code").text - } + @user_agents['Mac Safari'] = macos_dom.css('+ .listing-of-useragents .code').first.text + @user_agents['iPhone'] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Iphone') .code").text + @user_agents['iPad'] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Ipad') .code").text end def chrome page = @agent.get("#{BASE_URL}/chrome") windows_dom = page.css("h2:contains('Latest Chrome on Windows 10 User Agents')") - linux_dom = page.css("h2:contains('Latest Chrome on Linux User Agents')") - macos_dom = page.css("h2:contains('Latest Chrome on macOS User Agents')") android_dom = page.css("h2:contains('Latest Chrome on Android User Agents')") - @user_agents[:chrome] = { - windows: windows_dom.css('+ .listing-of-useragents .code').first.text, - linux: linux_dom.css('+ .listing-of-useragents .code').first.text, - mac_os: macos_dom.css('+ .listing-of-useragents .code').first.text, - android: android_dom.css('+ .listing-of-useragents .code').first.text - } + @user_agents['Windows Chrome'] = windows_dom.css('+ .listing-of-useragents .code').first.text + @user_agents['Android'] = android_dom.css('+ .listing-of-useragents .code').first.text end end -agent = LatestUAFetcher.new -agent.run -p agent.user_agents +if $0 == __FILE__ + agent = LatestUAFetcher.new + agent.run + + pp agent.user_agents +end From baae66d1951b304b1774036451dab36eaef44fe8 Mon Sep 17 00:00:00 2001 From: takatea Date: Sat, 13 Jan 2024 22:09:58 +0900 Subject: [PATCH 2/4] feat: output ordered user agents --- examples/latest_user_agents.rb | 39 +++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/examples/latest_user_agents.rb b/examples/latest_user_agents.rb index 0cfb1194..1a55932a 100644 --- a/examples/latest_user_agents.rb +++ b/examples/latest_user_agents.rb @@ -1,10 +1,23 @@ require 'mechanize' +require 'ostruct' # LatestUAFetcher fetches latest user agents from `WhatIsMyBrowser.com`. # It can use to update `Mechanize::AGENT_ALIASES`. class LatestUAFetcher attr_reader :user_agents + USER_AGENT_TYPES = OpenStruct.new( + linux_firefox: "Linux Firefox", + mac_firefox: "Mac Firefox", + mac_safari: "Mac Safari", + windows_chrome: "Windows Chrome", + windows_edge: "Windows Edge", + windows_firefox: "Windows Firefox", + android: "Android", + iphone: "iPhone", + ipad: "iPad", + ) + BASE_URL = 'https://www.whatismybrowser.com/guides/the-latest-user-agent' def initialize @@ -23,6 +36,12 @@ def run fetch_user_agents('edge') end + def ordered_user_agents + USER_AGENT_TYPES.to_h.values.each_with_object({}) do |type, ordered_user_agents| + ordered_user_agents[type] = user_agents[type] + end + end + private def fetch_user_agents(browser_name, sleep_time = 1) @@ -37,7 +56,7 @@ def edge windows_dom = page.css("h2:contains('Latest Edge on Windows User Agents')") - @user_agents['Windows Edge'] = windows_dom.css('+ .listing-of-useragents .code').first.text + @user_agents[USER_AGENT_TYPES.windows_edge] = windows_dom.css('+ .listing-of-useragents .code').first.text end def firefox @@ -46,9 +65,9 @@ def firefox desktop_dom = page.css("h2:contains('Latest Firefox on Desktop User Agents')") table_dom = desktop_dom.css('+ .listing-of-useragents') - @user_agents['Linux Firefox'] = table_dom.css('td:contains("Linux")').css("+ td .code:contains('Ubuntu; Linux x86_64')").text - @user_agents['Windows Firefox'] = table_dom.css('td:contains("Windows")').css('+ td .code').text - @user_agents['Mac Firefox'] = table_dom.css('td:contains("Macos")').css('+ td .code').text + @user_agents[USER_AGENT_TYPES.linux_firefox] = table_dom.css('td:contains("Linux")').css("+ td .code:contains('Ubuntu; Linux x86_64')").text + @user_agents[USER_AGENT_TYPES.windows_firefox] = table_dom.css('td:contains("Windows")').css('+ td .code').text + @user_agents[USER_AGENT_TYPES.mac_firefox] = table_dom.css('td:contains("Macos")').css('+ td .code').text end def safari @@ -57,9 +76,9 @@ def safari macos_dom = page.css("h2:contains('Latest Safari on macOS User Agents')") ios_dom = page.css("h2:contains('Latest Safari on iOS User Agents')") - @user_agents['Mac Safari'] = macos_dom.css('+ .listing-of-useragents .code').first.text - @user_agents['iPhone'] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Iphone') .code").text - @user_agents['iPad'] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Ipad') .code").text + @user_agents[USER_AGENT_TYPES.mac_safari] = macos_dom.css('+ .listing-of-useragents .code').first.text + @user_agents[USER_AGENT_TYPES.iphone] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Iphone') .code").text + @user_agents[USER_AGENT_TYPES.ipad] = ios_dom.css('+ .listing-of-useragents').css("tr:contains('Ipad') .code").text end def chrome @@ -68,8 +87,8 @@ def chrome windows_dom = page.css("h2:contains('Latest Chrome on Windows 10 User Agents')") android_dom = page.css("h2:contains('Latest Chrome on Android User Agents')") - @user_agents['Windows Chrome'] = windows_dom.css('+ .listing-of-useragents .code').first.text - @user_agents['Android'] = android_dom.css('+ .listing-of-useragents .code').first.text + @user_agents[USER_AGENT_TYPES.windows_chrome] = windows_dom.css('+ .listing-of-useragents .code').first.text + @user_agents[USER_AGENT_TYPES.android] = android_dom.css('+ .listing-of-useragents .code').first.text end end @@ -77,5 +96,5 @@ def chrome agent = LatestUAFetcher.new agent.run - pp agent.user_agents + pp agent.ordered_user_agents end From 32c74eec9dc8cf30132b87c58ad2c6f37ffcde08 Mon Sep 17 00:00:00 2001 From: takatea Date: Sat, 13 Jan 2024 19:31:00 +0900 Subject: [PATCH 3/4] update user agent strings for agent aliases --- lib/mechanize.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/mechanize.rb b/lib/mechanize.rb index 60e52860..afd54806 100644 --- a/lib/mechanize.rb +++ b/lib/mechanize.rb @@ -130,18 +130,18 @@ class Error < RuntimeError # TODO: use output from examples/latest_user_agents.rb as the underling data structure 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/sparklemotion/mechanize/)", - 'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:112.0) Gecko/20100101 Firefox/112.0', + 'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/121.0', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', - 'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13.3; rv:112.0) Gecko/20100101 Firefox/112.0', + 'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:109.0) Gecko/20100101 Firefox/121.0', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10', - 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15', + 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', - 'Windows Chrome' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', - 'Windows Edge' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.46', - 'Windows Firefox' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) Gecko/20100101 Firefox/112.0', + 'Windows Chrome' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Windows Edge' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.133', + 'Windows Firefox' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0', 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', @@ -150,9 +150,9 @@ class Error < RuntimeError 'Windows IE 11' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', - 'Android' => 'Mozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.48 Mobile Safari/537.36', - 'iPad' => 'Mozilla/5.0 (iPad; CPU OS 16_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Mobile/15E148 Safari/604.1', - 'iPhone' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Mobile/15E148 Safari/604.1', + 'Android' => 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.210 Mobile Safari/537.36', + 'iPad' => 'Mozilla/5.0 (iPad; CPU OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', + 'iPhone' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', } AGENT_ALIASES.default_proc = proc { |hash, key| From 34ccbd614dffe1525403f8a606d13006f42ec140 Mon Sep 17 00:00:00 2001 From: takatea Date: Sat, 13 Jan 2024 19:45:06 +0900 Subject: [PATCH 4/4] doc: update docstring for AGENT_ALIASES and remove todo comment --- lib/mechanize.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mechanize.rb b/lib/mechanize.rb index afd54806..acc9585b 100644 --- a/lib/mechanize.rb +++ b/lib/mechanize.rb @@ -105,6 +105,7 @@ class Error < RuntimeError # # Windows User-Agent aliases: # + # * "Windows Chrome" # * "Windows Edge" # * "Windows Firefox" # * "Windows IE 6" @@ -127,7 +128,6 @@ class Error < RuntimeError # agent.user_agent_alias = 'Mac Safari' # AGENT_ALIASES = { - # TODO: use output from examples/latest_user_agents.rb as the underling data structure 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/sparklemotion/mechanize/)", 'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/121.0',