summaryrefslogtreecommitdiff
path: root/.config/qutebrowser/scripts/dev/ua_fetch.py
diff options
context:
space:
mode:
Diffstat (limited to '.config/qutebrowser/scripts/dev/ua_fetch.py')
-rwxr-xr-x.config/qutebrowser/scripts/dev/ua_fetch.py122
1 files changed, 122 insertions, 0 deletions
diff --git a/.config/qutebrowser/scripts/dev/ua_fetch.py b/.config/qutebrowser/scripts/dev/ua_fetch.py
new file mode 100755
index 0000000..75ce4c2
--- /dev/null
+++ b/.config/qutebrowser/scripts/dev/ua_fetch.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
+
+# Copyright 2015-2018 lamarpavel
+# Copyright 2015-2018 Alexey Nabrodov (Averrin)
+# Copyright 2015-2018 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
+#
+# This file is part of qutebrowser.
+#
+# qutebrowser is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# qutebrowser is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""Fetch list of popular user-agents.
+
+The script is based on a gist posted by github.com/averrin, the output of this
+script is formatted to be pasted into configdata.yml
+"""
+
+import requests
+from lxml import html # pylint: disable=import-error
+
+
+def fetch():
+ """Fetch list of popular user-agents.
+
+ Return:
+ List of relevant strings.
+ """
+ url = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
+ page = requests.get(url)
+ page = html.fromstring(page.text)
+ path = '//*[@id="post-2229"]/div[2]/table/tbody'
+ return page.xpath(path)[0]
+
+
+def filter_list(complete_list, browsers):
+ """Filter the received list based on a look up table.
+
+ The LUT should be a dictionary of the format {browser: versions}, where
+ 'browser' is the name of the browser (eg. "Firefox") as string and
+ 'versions' is a set of different versions of this browser that should be
+ included when found (eg. {"Linux", "MacOSX"}). This function returns a
+ dictionary with the same keys as the LUT, but storing lists of tuples
+ (user_agent, browser_description) as values.
+ """
+ # pylint: disable=too-many-nested-blocks
+ table = {}
+ for entry in complete_list:
+ # Tuple of (user_agent, browser_description)
+ candidate = (entry[1].text_content(), entry[2].text_content())
+ for name in browsers:
+ found = False
+ if name.lower() in candidate[1].lower():
+ for version in browsers[name]:
+ if version.lower() in candidate[1].lower():
+ if table.get(name) is None:
+ table[name] = []
+ table[name].append(candidate)
+ browsers[name].remove(version)
+ found = True
+ break
+ if found:
+ break
+ return table
+
+
+def add_diversity(table):
+ """Insert a few additional entries for diversity into the dict.
+
+ (as returned by filter_list())
+ """
+ table["Obscure"] = [
+ ('Mozilla/5.0 (compatible; Googlebot/2.1; '
+ '+http://www.google.com/bot.html',
+ "Google Bot"),
+ ('Wget/1.16.1 (linux-gnu)',
+ "wget 1.16.1"),
+ ('curl/7.40.0',
+ "curl 7.40.0"),
+ ('Mozilla/5.0 (Linux; U; Android 7.1.2) AppleWebKit/534.30 '
+ '(KHTML, like Gecko) Version/4.0 Mobile Safari/534.30',
+ "Mobile Generic Android"),
+ ('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like '
+ 'Gecko',
+ "IE 11.0 for Desktop Win7 64-bit"),
+ ]
+ return table
+
+
+def main():
+ """Generate user agent code."""
+ fetched = fetch()
+ lut = {
+ "Firefox": {"Win", "MacOSX", "Linux", "Android"},
+ "Chrome": {"Win", "MacOSX", "Linux"},
+ "Safari": {"MacOSX", "iOS"}
+ }
+ filtered = filter_list(fetched, lut)
+ filtered = add_diversity(filtered)
+
+ tab = " "
+ for browser in ["Firefox", "Safari", "Chrome", "Obscure"]:
+ for it in filtered[browser]:
+ print('{}- - "{}"'.format(3 * tab, it[0]))
+ desc = it[1].replace('\xa0', ' ').replace(' ', ' ')
+ print("{}- {}".format(4 * tab, desc))
+ print("")
+
+
+if __name__ == '__main__':
+ main()