diff options
author | rubenwardy <rw@rubenwardy.com> | 2020-07-10 22:44:58 +0100 |
---|---|---|
committer | rubenwardy <rw@rubenwardy.com> | 2020-07-10 22:44:58 +0100 |
commit | b9e1be57e435fd8d69fa58f546b2dc1206e1f80c (patch) | |
tree | e1fbdcdc6f41200270ea7f6c601b9814a09eb868 /app/tasks/phpbbparser.py | |
parent | c3d96c745924f458846f08d90f48be0c1a4cc0ed (diff) | |
download | cheatdb-b9e1be57e435fd8d69fa58f546b2dc1206e1f80c.tar.xz |
Fix generation of forum profile URLs
Fixes #196
Diffstat (limited to 'app/tasks/phpbbparser.py')
-rw-r--r-- | app/tasks/phpbbparser.py | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/app/tasks/phpbbparser.py b/app/tasks/phpbbparser.py index caaa730..c0d15f5 100644 --- a/app/tasks/phpbbparser.py +++ b/app/tasks/phpbbparser.py @@ -9,6 +9,8 @@ from datetime import datetime import urllib.request import os.path import time, re +import urllib.parse as urlparse +from urllib.parse import urlencode def urlEncodeNonAscii(b): return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b) @@ -71,8 +73,24 @@ def __extract_signature(soup): else: return res[0] + +def getProfileURL(url, username): + url = urlparse.urlparse(url) + + # Update path + url = url._replace(path="/memberlist.php") + + # Set query args + query = dict(urlparse.parse_qsl(url.query)) + query.update({ "un": username, "mode": "viewprofile" }) + query_encoded = urlencode(query) + url = url._replace(query=query_encoded) + + return urlparse.urlunparse(url) + + def getProfile(url, username): - url = url + "/memberlist.php?mode=viewprofile&un=" + urlEncodeNonAscii(username) + url = getProfileURL(url, username) req = urllib.request.urlopen(url, timeout=5) if req.getcode() == 404: |