diff options
-rw-r--r-- | app/models.py | 16 | ||||
-rw-r--r-- | app/tasks/forumtasks.py | 127 | ||||
-rw-r--r-- | app/tasks/phpbbparser.py | 70 | ||||
-rw-r--r-- | app/templates/admin/list.html | 3 | ||||
-rw-r--r-- | app/templates/macros/topictable.html | 4 | ||||
-rw-r--r-- | app/templates/packages/view.html | 2 | ||||
-rw-r--r-- | app/views/admin.py | 9 | ||||
-rw-r--r-- | app/views/packages/__init__.py | 8 | ||||
-rw-r--r-- | app/views/packages/todo.py | 12 | ||||
-rw-r--r-- | app/views/users.py | 6 | ||||
-rw-r--r-- | migrations/versions/9fc23495713b_.py | 55 |
11 files changed, 226 insertions, 86 deletions
diff --git a/app/models.py b/app/models.py index 5332dbd..b5b4836 100644 --- a/app/models.py +++ b/app/models.py @@ -743,23 +743,25 @@ REPO_BLACKLIST = [".zip", "mediafire.com", "dropbox.com", "weebly.com", \ "digitalaudioconcepts.com", "hg.intevation.org", "www.wtfpl.net", \ "imageshack.com", "imgur.com"] -class KrockForumTopic(db.Model): +class ForumTopic(db.Model): topic_id = db.Column(db.Integer, primary_key=True, autoincrement=False) author_id = db.Column(db.Integer, db.ForeignKey("user.id"), nullable=False) author = db.relationship("User") - ttype = db.Column(db.Integer, nullable=False) + type = db.Column(db.Enum(PackageType), nullable=False) title = db.Column(db.String(200), nullable=False) name = db.Column(db.String(30), nullable=True) link = db.Column(db.String(200), nullable=True) - def getType(self): - if self.ttype == 1 or self.ttype == 2: - return PackageType.MOD - elif self.ttype == 6: - return PackageType.GAME + posts = db.Column(db.Integer, nullable=False) + views = db.Column(db.Integer, nullable=False) + + created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) def getRepoURL(self): + if self.link is None: + return None + for item in REPO_BLACKLIST: if item in self.link: return None diff --git a/app/tasks/forumtasks.py b/app/tasks/forumtasks.py index b2e0ca8..5513fb2 100644 --- a/app/tasks/forumtasks.py +++ b/app/tasks/forumtasks.py @@ -15,12 +15,12 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. -import flask, json +import flask, json, re from flask.ext.sqlalchemy import SQLAlchemy from app import app from app.models import * from app.tasks import celery -from .phpbbparser import getProfile +from .phpbbparser import getProfile, getTopicsFromForum import urllib.request from urllib.parse import urlparse, quote_plus @@ -51,71 +51,88 @@ def checkForumAccount(username, token=None): if needsSaving: db.session.commit() -@celery.task() -def importUsersFromModList(): - contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8") - list = json.loads(contents) - found = {} - imported = [] - - for user in User.query.all(): - found[user.username] = True - if user.forums_username is not None: - found[user.forums_username] = True - - for x in list: - author = x.get("author") - if author is not None and not author in found: - user = User(author) - user.forums_username = author - imported.append(author) - found[author] = True - db.session.add(user) - db.session.commit() - for author in found: - checkForumAccount.delay(author, None) +regex_tag = re.compile(r"\[([a-z0-9_]+)\]") +BANNED_NAMES = ["mod", "game", "old", "outdated", "wip", "api", "beta", "alpha", "git"] +def getNameFromTaglist(taglist): + for tag in reversed(regex_tag.findall(taglist)): + if len(tag) < 30 and not tag in BANNED_NAMES and \ + not re.match(r"^[a-z]?[0-9]+$", tag): + return tag + return None -BANNED_NAMES = ["mod", "game", "old", "outdated", "wip", "api"] -ALLOWED_TYPES = [1, 2, 6] +regex_title = re.compile(r"^((?:\[[^\]]+\] *)*)([^\[]+) *((?:\[[^\]]+\] *)*)[^\[]*$") +def parseTitle(title): + m = regex_title.match(title) + if m is None: + print("Invalid title format: " + title) + return title, getNameFromTaglist(title) + else: + return m.group(2).strip(), getNameFromTaglist(m.group(3)) + +def getLinksFromModSearch(): + links = {} -@celery.task() -def importKrocksModList(): contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8") - list = json.loads(contents) - username_to_user = {} + for x in json.loads(contents): + link = x.get("link") + if link is not None: + links[int(x["topicId"])] = link - KrockForumTopic.query.delete() + return links + +@celery.task() +def importTopicList(): + links_by_id = getLinksFromModSearch() + + info_by_id = {} + getTopicsFromForum(11, out=info_by_id, extra={ 'type': PackageType.MOD }) + getTopicsFromForum(15, out=info_by_id, extra={ 'type': PackageType.GAME }) + + # Caches + username_to_user = {} + topics_by_id = {} + for topic in ForumTopic.query.all(): + topics_by_id[topic.topic_id] = topic - for x in list: - type = int(x["type"]) - if not type in ALLOWED_TYPES: - continue + # Create or update + for info in info_by_id.values(): + id = int(info["id"]) - username = x["author"] + # Get author + username = info["author"] user = username_to_user.get(username) if user is None: user = User.query.filter_by(forums_username=username).first() - assert(user is not None) + if user is None: + print(username + " not found!") + user = User(username) + user.forums_username = username + db.session.add(user) username_to_user[username] = user - import re - tags = re.findall("\[([a-z0-9_]+)\]", x["title"]) - name = None - for tag in reversed(tags): - if len(tag) < 30 and not tag in BANNED_NAMES and \ - not re.match("^([a-z][0-9]+)$", tag): - name = tag - break - - topic = KrockForumTopic() - topic.topic_id = x["topicId"] - topic.author_id = user.id - topic.ttype = type - topic.title = x["title"] - topic.name = name - topic.link = x.get("link") - db.session.add(topic) + # Get / add row + topic = topics_by_id.get(id) + if topic is None: + topic = ForumTopic() + db.session.add(topic) + + # Parse title + title, name = parseTitle(info["title"]) + + # Get link + link = links_by_id.get(id) + + # Fill row + topic.topic_id = id + topic.author = user + topic.type = info["type"] + topic.title = title + topic.name = name + topic.link = link + topic.posts = info["posts"] + topic.views = info["views"] + topic.created_at = info["date"] db.session.commit() diff --git a/app/tasks/phpbbparser.py b/app/tasks/phpbbparser.py index d27ccec..9984ad0 100644 --- a/app/tasks/phpbbparser.py +++ b/app/tasks/phpbbparser.py @@ -5,6 +5,7 @@ import urllib, socket from bs4 import * from urllib.parse import urljoin +from datetime import datetime import urllib.request import os.path import time, re @@ -77,3 +78,72 @@ def getProfile(url, username): __extract_properties(profile, soup) return profile + + +regex_id = re.compile(r"^.*t=([0-9]+).*$") + +def parseForumListPage(id, page, out, extra=None): + num_per_page = 30 + start = page*num_per_page+1 + print(" - Fetching page {} (topics {}-{})".format(page, start, start+num_per_page)) + + url = "https://forum.minetest.net/viewforum.php?f=" + str(id) + "&start=" + str(start) + r = urllib.request.urlopen(url).read().decode("utf-8") + soup = BeautifulSoup(r, "html.parser") + + for row in soup.find_all("li", class_="row"): + classes = row.get("class") + if "sticky" in classes or "announce" in classes or "global-announce" in classes: + continue + + topic = row.find("dl") + + # Link info + link = topic.find(class_="topictitle") + id = regex_id.match(link.get("href")).group(1) + title = link.find(text=True) + + # Date + left = topic.find("dt") + date = left.get_text().split("ยป")[1].strip() + date = datetime.strptime(date, "%a %b %d, %Y %H:%M") + author = left.find_all("a")[-1].get_text().strip() + + # Get counts + posts = topic.find(class_="posts").find(text=True) + views = topic.find(class_="views").find(text=True) + + if id in out: + print(" - got {} again, title: {}".format(id, title)) + assert(title == out[id]['title']) + return False + + row = { + "id" : id, + "title" : title, + "author": author, + "posts" : posts, + "views" : views, + "date" : date + } + + if extra is not None: + for key, value in extra.items(): + row[key] = value + + out[id] = row + + return True + +def getTopicsFromForum(id, out={}, extra=None): + print("Fetching all topics from forum {}".format(id)) + page = 0 + while parseForumListPage(id, page, out, extra): + page = page + 1 + + return out + +def dumpTitlesToFile(topics, path): + with open(path, "w") as out_file: + for topic in topics.values(): + out_file.write(topic["title"] + "\n") diff --git a/app/templates/admin/list.html b/app/templates/admin/list.html index e5049f9..c565fe0 100644 --- a/app/templates/admin/list.html +++ b/app/templates/admin/list.html @@ -17,8 +17,7 @@ <form method="post" action="" class="box-body"> <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" /> <select name="action"> - <option value="importusers">Create users from mod list</option> - <option value="importmodlist">Import Krock's mod list</option> + <option value="importmodlist">Import forum topics</option> <option value="importscreenshots" selected>Import screenshots from VCS</option> <option value="importdepends">Import dependencies from downloads</option> <option value="modprovides">Set provides to mod name</option> diff --git a/app/templates/macros/topictable.html b/app/templates/macros/topictable.html index a0c5b1e..7ae8a35 100644 --- a/app/templates/macros/topictable.html +++ b/app/templates/macros/topictable.html @@ -11,12 +11,12 @@ {% for topic in topics %} <tr> <td>{{ topic.topic_id }}</td> - <td>[{{ topic.getType().value }}] <a href="https://forum.minetest.net/viewtopic.php?t={{ topic.topic_id}}">{{ topic.title }}</a></td> + <td>[{{ topic.type.value }}] <a href="https://forum.minetest.net/viewtopic.php?t={{ topic.topic_id}}">{{ topic.title }}</a></td> {% if show_author %} <td><a href="{{ url_for('user_profile_page', username=topic.author.username) }}">{{ topic.author.display_name}}</a></td> {% endif %} <td>{{ topic.name or ""}}</td> - <td><a href="{{ topic.link }}">{{ topic.link | domain }}</a></td> + <td>{% if topic.link %}<a href="{{ topic.link }}">{{ topic.link | domain }}</a>{% endif %}</td> <td> <a href="{{ url_for('create_edit_package_page', author=topic.author.username, repo=topic.getRepoURL(), forums=topic.topic_id, title=topic.title, bname=topic.name) }}">Create</a> </td> diff --git a/app/templates/packages/view.html b/app/templates/packages/view.html index ab48c6e..f69b5cf 100644 --- a/app/templates/packages/view.html +++ b/app/templates/packages/view.html @@ -292,7 +292,7 @@ <ul> {% for t in similar_topics %} <li> - [{{ t.getType().value }}] + [{{ t.type.value }}] <a href="https://forum.minetest.net/viewtopic.php?t={{ t.topic_id }}"> {{ t.title }} by {{ t.author.display_name }} </a> diff --git a/app/views/admin.py b/app/views/admin.py index 65d5264..92ee437 100644 --- a/app/views/admin.py +++ b/app/views/admin.py @@ -21,7 +21,7 @@ from flask.ext import menu from app import app from app.models import * from app.tasks.importtasks import importRepoScreenshot, importAllDependencies -from app.tasks.forumtasks import importUsersFromModList, importKrocksModList +from app.tasks.forumtasks import importTopicList from flask_wtf import FlaskForm from wtforms import * from app.utils import loginUser, rank_required @@ -31,11 +31,8 @@ from app.utils import loginUser, rank_required def admin_page(): if request.method == "POST": action = request.form["action"] - if action == "importusers": - task = importUsersFromModList.delay() - return redirect(url_for("check_task", id=task.id, r=url_for("user_list_page"))) - elif action == "importmodlist": - task = importKrocksModList.delay() + if action == "importmodlist": + task = importTopicList.delay() return redirect(url_for("check_task", id=task.id, r=url_for("todo_topics_page"))) elif action == "importscreenshots": packages = Package.query \ diff --git a/app/views/packages/__init__.py b/app/views/packages/__init__.py index 6ef76ec..4d357a6 100644 --- a/app/views/packages/__init__.py +++ b/app/views/packages/__init__.py @@ -100,11 +100,11 @@ def package_page(package): package.checkPerm(current_user, Permission.APPROVE_NEW) similar_topics = None if not show_similar_topics else \ - KrockForumTopic.query \ + ForumTopic.query \ .filter_by(name=package.name) \ - .filter(KrockForumTopic.topic_id != package.forums) \ - .filter(~ db.exists().where(Package.forums==KrockForumTopic.topic_id)) \ - .order_by(db.asc(KrockForumTopic.name), db.asc(KrockForumTopic.title)) \ + .filter(ForumTopic.topic_id != package.forums) \ + .filter(~ db.exists().where(Package.forums==ForumTopic.topic_id)) \ + .order_by(db.asc(ForumTopic.name), db.asc(ForumTopic.title)) \ .all() releases = getReleases(package) diff --git a/app/views/packages/todo.py b/app/views/packages/todo.py index 81735eb..84cfef4 100644 --- a/app/views/packages/todo.py +++ b/app/views/packages/todo.py @@ -41,8 +41,8 @@ def todo_page(): screenshots = PackageScreenshot.query.filter_by(approved=False).all() - topics_to_add = KrockForumTopic.query \ - .filter(~ db.exists().where(Package.forums==KrockForumTopic.topic_id)) \ + topics_to_add = ForumTopic.query \ + .filter(~ db.exists().where(Package.forums==ForumTopic.topic_id)) \ .count() return render_template("todo/list.html", title="Reports and Work Queue", @@ -54,11 +54,11 @@ def todo_page(): @app.route("/todo/topics/") @login_required def todo_topics_page(): - total = KrockForumTopic.query.count() + total = ForumTopic.query.count() - topics = KrockForumTopic.query \ - .filter(~ db.exists().where(Package.forums==KrockForumTopic.topic_id)) \ - .order_by(db.asc(KrockForumTopic.name), db.asc(KrockForumTopic.title)) \ + topics = ForumTopic.query \ + .filter(~ db.exists().where(Package.forums==ForumTopic.topic_id)) \ + .order_by(db.asc(ForumTopic.name), db.asc(ForumTopic.title)) \ .all() return render_template("todo/topics.html", topics=topics, total=total) diff --git a/app/views/users.py b/app/views/users.py index 256f7d1..a96fce2 100644 --- a/app/views/users.py +++ b/app/views/users.py @@ -98,10 +98,10 @@ def user_profile_page(username): topics_to_add = None if current_user == user or user.checkPerm(current_user, Permission.CHANGE_AUTHOR): - topics_to_add = KrockForumTopic.query \ + topics_to_add = ForumTopic.query \ .filter_by(author_id=user.id) \ - .filter(~ db.exists().where(Package.forums==KrockForumTopic.topic_id)) \ - .order_by(db.asc(KrockForumTopic.name), db.asc(KrockForumTopic.title)) \ + .filter(~ db.exists().where(Package.forums==ForumTopic.topic_id)) \ + .order_by(db.asc(ForumTopic.name), db.asc(ForumTopic.title)) \ .all() # Process GET or invalid POST diff --git a/migrations/versions/9fc23495713b_.py b/migrations/versions/9fc23495713b_.py new file mode 100644 index 0000000..f457ae5 --- /dev/null +++ b/migrations/versions/9fc23495713b_.py @@ -0,0 +1,55 @@ +"""empty message + +Revision ID: 9fc23495713b +Revises: de004661c5e1 +Create Date: 2018-07-04 00:03:20.123285 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '9fc23495713b' +down_revision = 'de004661c5e1' +branch_labels = None +depends_on = None +from sqlalchemy.dialects.postgresql import ENUM + +type_enum = ENUM('MOD', 'GAME', 'TXP', name='packagetype', create_type=False) + +def upgrade(): + type_enum.create(op.get_bind(), checkfirst=True) + + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('krock_forum_topic') + op.create_table('forum_topic', + sa.Column('topic_id', sa.Integer(), autoincrement=False, nullable=False), + sa.Column('author_id', sa.Integer(), nullable=False), + sa.Column('type', type_enum, nullable=True), + sa.Column('title', sa.String(length=200), nullable=False), + sa.Column('name', sa.String(length=30), nullable=True), + sa.Column('link', sa.String(length=200), nullable=True), + sa.Column('posts', sa.Integer(), nullable=False), + sa.Column('views', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['author_id'], ['user.id'], ), + sa.PrimaryKeyConstraint('topic_id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('forum_topic') + op.create_table('krock_forum_topic', + sa.Column('topic_id', sa.Integer(), autoincrement=False, nullable=False), + sa.Column('author_id', sa.Integer(), nullable=False), + sa.Column('ttype', sa.Integer(), nullable=False), + sa.Column('title', sa.String(length=200), nullable=False), + sa.Column('name', sa.String(length=30), nullable=True), + sa.Column('link', sa.String(length=50), nullable=True), + sa.ForeignKeyConstraint(['author_id'], ['user.id'], ), + sa.PrimaryKeyConstraint('topic_id') + ) + # ### end Alembic commands ### |