Supybot-plugins/WunderWeather/shortforms.py

339 lines
9.4 KiB
Python

# coding=utf-8
###
# Copyright (c) 2009 Michael Tughan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import sys
encoding = 'utf-8'
if sys.version_info[0] >= 3:
def u(s):
return s
else:
def u(s):
return unicode(s, "unicode_escape")
# Provinces. (Province being a metric state measurement mind you. :D)
_shortforms = {
# Canadian provinces
'ab': 'alberta',
'bc': 'british columbia',
'mb': 'manitoba',
'nb': 'new brunswick',
'nf': 'newfoundland',
'ns': 'nova scotia',
'nt': 'northwest territories',
'nwt':'northwest territories',
'nu': 'nunavut',
'on': 'ontario',
'pe': 'prince edward island',
'pei':'prince edward island',
'qc': 'quebec',
'sk': 'saskatchewan',
'yk': 'yukon',
# Countries
'ad': 'andorra',
'ae': 'united arab emirates',
'af': 'afghanistan',
'ag': 'antigua and barbuda',
'ai': 'anguilla',
'am': 'armenia',
'an': 'netherlands antilles',
'ao': 'angola',
'aq': 'antarctica',
'as': 'american samoa',
'at': 'austria',
'au': 'australia',
'aw': 'aruba',
'ax': u('åland islands'),
'ba': 'bosnia and herzegovina',
'bb': 'barbados',
'bd': 'bangladesh',
'be': 'belgium',
'bf': 'burkina faso',
'bg': 'bulgaria',
'bh': 'bahrain',
'bi': 'burundi',
'bj': 'benin',
'bl': 'saint barthélemy',
'bm': 'bermuda',
'bn': 'brunei darussalam',
'bo': 'bolivia',
'br': 'brazil',
'bs': 'bahamas',
'bt': 'bhutan',
'bv': 'bouvet island',
'bw': 'botswana',
'by': 'belarus',
'bz': 'belize',
'cc': 'cocos (keeling) islands',
'cd': 'congo, the democratic republic of the',
'cf': 'central african republic',
'cg': 'congo',
'ch': 'switzerland',
'ci': 'côte d\'ivoire',
'ck': 'cook islands',
'cl': 'chile',
'cm': 'cameroon',
'cn': 'china',
'cr': 'costa rica',
'cu': 'cuba',
'cv': 'cape verde',
'cx': 'christmas island',
'cy': 'cyprus',
'cz': 'czech republic',
'dj': 'djibouti',
'dk': 'denmark',
'dm': 'dominica',
'do': 'dominican republic',
'dz': 'algeria',
'ec': 'ecuador',
'ee': 'estonia',
'eg': 'egypt',
'eh': 'western sahara',
'er': 'eritrea',
'es': 'spain',
'et': 'ethiopia',
'fi': 'finland',
'fj': 'fiji',
'fk': 'falkland islands',
'fm': 'micronesia',
'fo': 'faroe islands',
'fr': 'france',
'gb': 'united kingdom',
'gd': 'grenada',
'ge': 'georgia',
'gf': 'french guiana',
'gg': 'guernsey',
'gh': 'ghana',
'gi': 'gibraltar',
'gl': 'greenland',
'gm': 'gambia',
'gn': 'guinea',
'gp': 'guadeloupe',
'gq': 'equatorial guinea',
'gr': 'greece',
'gs': 'south georgia and the south sandwich islands',
'gt': 'guatemala',
'gu': 'guam',
'gw': 'guinea-bissau',
'gy': 'guyana',
'hk': 'hong kong',
'hm': 'heard island and mcdonald islands',
'hn': 'honduras',
'hr': 'croatia',
'ht': 'haiti',
'hu': 'hungary',
'ie': 'ireland',
'im': 'isle of man',
'io': 'british indian ocean territory',
'iq': 'iraq',
'ir': 'iran, islamic republic of',
'is': 'iceland',
'it': 'italy',
'je': 'jersey',
'jm': 'jamaica',
'jo': 'jordan',
'jp': 'japan',
'ke': 'kenya',
'kg': 'kyrgyzstan',
'kh': 'cambodia',
'ki': 'kiribati',
'km': 'comoros',
'kn': 'saint kitts and nevis',
'kp': 'north korea',
'kr': 'south korea',
'kw': 'kuwait',
'kz': 'kazakhstan',
'lb': 'lebanon',
'lc': 'saint lucia',
'li': 'liechtenstein',
'lk': 'sri lanka',
'lr': 'liberia',
'ls': 'lesotho',
'lt': 'lithuania',
'lu': 'luxembourg',
'lv': 'latvia',
'ly': 'libyan arab jamahiriya',
'mc': 'monaco',
'mf': 'saint martin',
'mg': 'madagascar',
'mh': 'marshall islands',
'mk': 'macedonia, the former yugoslav republic of',
'ml': 'mali',
'mm': 'myanmar',
'mp': 'northern mariana islands',
'mq': 'martinique',
'mr': 'mauritania',
'mu': 'mauritius',
'mv': 'maldives',
'mw': 'malawi',
'mx': 'mexico',
'my': 'malaysia',
'mz': 'mozambique',
'na': 'namibia',
'nf': 'norfolk island',
'ng': 'nigeria',
'ni': 'nicaragua',
'nl': 'netherlands',
'no': 'norway',
'np': 'nepal',
'nr': 'nauru',
'nu': 'niue',
'nz': 'new zealand',
'om': 'oman',
'pe': 'peru',
'pf': 'french polynesia',
'pg': 'papua new guinea',
'ph': 'philippines',
'pk': 'pakistan',
'pl': 'poland',
'pm': 'saint pierre and miquelon',
'pn': 'pitcairn',
'pr': 'puerto rico',
'ps': 'palestinian territory',
'pt': 'portugal',
'pw': 'palau',
'py': 'paraguay',
'qa': 'qatar',
're': 'réunion',
'ro': 'romania',
'rs': 'serbia',
'ru': 'russian federation',
'rw': 'rwanda',
'sa': 'saudi arabia',
'sb': 'solomon islands',
'se': 'sweden',
'sg': 'singapore',
'sh': 'saint helena',
'si': 'slovenia',
'sj': 'svalbard and jan mayen',
'sk': 'slovakia',
'sl': 'sierra leone',
'sm': 'san marino',
'sn': 'senegal',
'so': 'somalia',
'sr': 'suriname',
'st': 'sao tome and principe',
'sv': 'el salvador',
'sy': 'syrian arab republic',
'sz': 'swaziland',
'tc': 'turks and caicos islands',
'td': 'chad',
'tf': 'french southern territories',
'tg': 'togo',
'th': 'thailand',
'tj': 'tajikistan',
'tk': 'tokelau',
'tl': 'timor-leste',
'tm': 'turkmenistan',
'to': 'tonga',
'tr': 'turkey',
'tt': 'trinidad and tobago',
'tv': 'tuvalu',
'tw': 'taiwan',
'tz': 'tanzania',
'ua': 'ukraine',
'ug': 'uganda',
'um': 'united states minor outlying islands',
'uy': 'uruguay',
'uz': 'uzbekistan',
'vc': 'saint vincent and the grenadines',
've': 'venezuela, bolivarian republic of',
'vg': 'virgin islands, british',
'vi': 'virgin islands, u.s.',
'vn': 'viet nam',
'vu': 'vanuatu',
'wf': 'wallis and futuna',
'ws': 'samoa',
'ye': 'yemen',
'yt': 'mayotte',
'za': 'south africa',
'zm': 'zambia',
'zw': 'zimbabwe'
}
_conflictingShortforms = {
'al': 'albania',
'ar': 'argentina',
'az': 'azerbaijan',
'ca': 'canada',
'co': 'colombia',
'de': 'germany',
'ga': 'gabon',
'id': 'indonesia',
'il': 'israel',
'in': 'india',
'ky': 'cayman islands',
'la': 'laos',
'ma': 'morocco',
'md': 'moldova',
'me': 'montenegro',
'mn': 'mongolia',
'mo': 'macao',
'ms': 'montserrat',
'mt': 'malta',
'nc': 'new caledonia',
'ne': 'niger',
'pa': 'panama',
'sc': 'seychelles',
'sd': 'sudan',
'tn': 'tunisia',
'va': 'vatican city'
}
def checkShortforms(query): # being Canadian, I often use something like "Toronto, ON"
# but wunderground needs "Toronto, Ontario"
if ' ' not in query and ',' not in query:
return query # if there's no spaces or commas, it's one word, no need to check for provinces
lastWord = query.split()[-1].lower() # split by spaces, see if the last word is a province shortform
if lastWord in _shortforms:
return (query[0:0 - len(lastWord)] + _shortforms[lastWord]).encode(encoding)
lastWord = query.split(',')[-1].lower() # if it's not separated by spaces, maybe commas
if lastWord in _shortforms:
return (query[0:0 - len(lastWord)] + _shortforms[lastWord]).encode(encoding)
return query # nope, probably not a province name, return original query
def checkConflictingShortforms(query):
if ' ' not in query and ',' not in query:
return None
lastWord = query.split()[-1].lower()
if lastWord in _conflictingShortforms:
return (query[0:0 - len(lastWord)] + _conflictingShortforms[lastWord]).encode(encoding)
lastWord = query.split(',')[-1].lower()
if lastWord in _conflictingShortforms:
return (query[0:0 - len(lastWord)] + _conflictingShortforms[lastWord]).encode(encoding)
return None