Initial Commit

master
rubenwardy 2016-05-07 18:13:24 +01:00
commit 4613017d28
8 changed files with 336 additions and 0 deletions

106
.gitignore vendored Normal file
View File

@ -0,0 +1,106 @@
# Created by https://www.gitignore.io/api/python,linux
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*

19
LICENSE.txt Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2016 rubenwardy
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include LICENCE.txt
recursive-include phpbb-parser *.py *.html *.js

26
README.md Normal file
View File

@ -0,0 +1,26 @@
# Python-PhpBB-Parser
## Profiles
```Python
import phpbb_parser as parser
profile = parser.get_profile("https://forum.minetest.net", "rubenwardy")
```
Signature is a BeautifulSoup object.
```Python
print(profile.signature.text)
```
Properties are pure text
```Python
print(profile.get("github"))
```
Keys are lowercase.
## Topics
TODO: no support yet

74
phpbb_parser/__init__.py Normal file
View File

@ -0,0 +1,74 @@
import urllib, socket
from bs4 import *
from urlparse import urljoin
import os.path
import time
class Profile:
def __init__(self, username):
self.username = username
self.signature = ""
self.properties = {}
def set(self, key, value):
self.properties[key] = value
def get(self, key):
return self.properties[key]
def __str__(self):
return self.username + "\n" + str(self.signature) + "\n" + str(self.properties)
def __extract_properties(profile, soup):
el = soup.find(id="viewprofile")
if el is None:
return None
res = el.find_all("dl", class_ = "left-box details")
if len(res) != 1:
return None
catch_next_key = None
# Look through
for element in res[0].children:
if element.name == "dt":
if catch_next_key is None:
catch_next_key = element.text.lower()[:-1].strip()
else:
print("Unexpected dt!")
elif element.name == "dd":
if catch_next_key is None:
print("Unexpected dd!")
else:
if catch_next_key != "groups":
profile.set(catch_next_key, element.text)
catch_next_key = None
elif element and element.name is not None:
print("Unexpected other")
def __extract_signature(soup):
res = soup.find_all("div", class_="signature")
if (len(res) != 1):
return None
else:
return res[0]
def get_profile(url, username):
url = url + "/memberlist.php?mode=viewprofile&un=" + username
response = urllib.urlopen(url)
if response.getcode() == 404:
return None
soup = BeautifulSoup(response.read(), "lxml")
if soup is None:
return None
else:
profile = Profile(username)
profile.signature = __extract_signature(soup)
__extract_properties(profile, soup)
return profile

9
phpbb_parser/__main__.py Normal file
View File

@ -0,0 +1,9 @@
import __init__ as parser
profile = parser.get_profile("https://forum.minetest.net", "rubenwardy")
if profile:
print(profile.signature.text)
print(profile.get("github"))
else:
print("Could not get profile!")

2
setup.cfg Normal file
View File

@ -0,0 +1,2 @@
[bdist_wheel]
universal=1

98
setup.py Normal file
View File

@ -0,0 +1,98 @@
""" A parser to read data from third party phpBB forums
"""
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
# To use a consistent encoding
from codecs import open
from os import path
setup(
name = "phpbb_parser",
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version = "1.0.0",
description = "A parser to read data from third party phpBB forums",
long_description = "A parser to read data from third party phpBB forums",
# The project's main homepage.
url = "https://github.com/rubenwardy/python-phpbb-parser",
# Author details
author = "rubenwardy",
author_email = "rubenwardy@gmail.com",
# Choose your license
license = "MIT",
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
classifiers = [
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
"Development Status :: 3 - Alpha",
# Indicate who your project is intended for
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
# Pick your license as you wish (should match "license" above)
"License :: OSI Approved :: MIT License",
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
],
# What does your project relate to?
keywords = "phpbb parser http api reader development",
# You can just specify the packages manually here if your project is
# simple. Or you can use find_packages().
packages = find_packages(exclude = ["contrib", "docs", "tests"]),
# Alternatively, if you want to distribute just a my_module.py, uncomment
# this:
# py_modules = ["my_module"],
# List run-time dependencies here. These will be installed by pip when
# your project is installed. For an analysis of "install_requires" vs pip's
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires = ["beautifulsoup4"],
# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
# for example:
# $ pip install -e .[dev,test]
extras_require = {
"dev": [],
"test": [],
},
# If there are data files included in your packages that need to be
# installed, specify them here. If using Python 2.6 or less, then these
# have to be included in MANIFEST.in as well.
package_data = {},
# Although "package_data" is the preferred approach, in some case you may
# need to place data files outside of your packages. See:
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
# In this case, "data_file" will be installed into "<sys.prefix>/my_data"
data_files = [],
# To provide executable scripts, use entry points in preference to the
# "scripts" keyword. Entry points provide cross-platform support and allow
# pip to create the appropriate form of executable for the target platform.
entry_points = {},
)