339 lines
9.4 KiB
Python
339 lines
9.4 KiB
Python
# coding=utf-8
|
|
###
|
|
# Copyright (c) 2009 Michael Tughan
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions, and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions, and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of the author of this software nor the name of
|
|
# contributors to this software may be used to endorse or promote products
|
|
# derived from this software without specific prior written consent.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
###
|
|
|
|
import sys
|
|
|
|
encoding = 'utf-8'
|
|
if sys.version_info[0] >= 3:
|
|
def u(s):
|
|
return s
|
|
else:
|
|
def u(s):
|
|
return unicode(s, "unicode_escape")
|
|
|
|
# Provinces. (Province being a metric state measurement mind you. :D)
|
|
_shortforms = {
|
|
# Canadian provinces
|
|
'ab': 'alberta',
|
|
'bc': 'british columbia',
|
|
'mb': 'manitoba',
|
|
'nb': 'new brunswick',
|
|
'nf': 'newfoundland',
|
|
'ns': 'nova scotia',
|
|
'nt': 'northwest territories',
|
|
'nwt':'northwest territories',
|
|
'nu': 'nunavut',
|
|
'on': 'ontario',
|
|
'pe': 'prince edward island',
|
|
'pei':'prince edward island',
|
|
'qc': 'quebec',
|
|
'sk': 'saskatchewan',
|
|
'yk': 'yukon',
|
|
|
|
# Countries
|
|
'ad': 'andorra',
|
|
'ae': 'united arab emirates',
|
|
'af': 'afghanistan',
|
|
'ag': 'antigua and barbuda',
|
|
'ai': 'anguilla',
|
|
'am': 'armenia',
|
|
'an': 'netherlands antilles',
|
|
'ao': 'angola',
|
|
'aq': 'antarctica',
|
|
'as': 'american samoa',
|
|
'at': 'austria',
|
|
'au': 'australia',
|
|
'aw': 'aruba',
|
|
'ax': u('åland islands'),
|
|
'ba': 'bosnia and herzegovina',
|
|
'bb': 'barbados',
|
|
'bd': 'bangladesh',
|
|
'be': 'belgium',
|
|
'bf': 'burkina faso',
|
|
'bg': 'bulgaria',
|
|
'bh': 'bahrain',
|
|
'bi': 'burundi',
|
|
'bj': 'benin',
|
|
'bl': 'saint barthélemy',
|
|
'bm': 'bermuda',
|
|
'bn': 'brunei darussalam',
|
|
'bo': 'bolivia',
|
|
'br': 'brazil',
|
|
'bs': 'bahamas',
|
|
'bt': 'bhutan',
|
|
'bv': 'bouvet island',
|
|
'bw': 'botswana',
|
|
'by': 'belarus',
|
|
'bz': 'belize',
|
|
'cc': 'cocos (keeling) islands',
|
|
'cd': 'congo, the democratic republic of the',
|
|
'cf': 'central african republic',
|
|
'cg': 'congo',
|
|
'ch': 'switzerland',
|
|
'ci': 'côte d\'ivoire',
|
|
'ck': 'cook islands',
|
|
'cl': 'chile',
|
|
'cm': 'cameroon',
|
|
'cn': 'china',
|
|
'cr': 'costa rica',
|
|
'cu': 'cuba',
|
|
'cv': 'cape verde',
|
|
'cx': 'christmas island',
|
|
'cy': 'cyprus',
|
|
'cz': 'czech republic',
|
|
'dj': 'djibouti',
|
|
'dk': 'denmark',
|
|
'dm': 'dominica',
|
|
'do': 'dominican republic',
|
|
'dz': 'algeria',
|
|
'ec': 'ecuador',
|
|
'ee': 'estonia',
|
|
'eg': 'egypt',
|
|
'eh': 'western sahara',
|
|
'er': 'eritrea',
|
|
'es': 'spain',
|
|
'et': 'ethiopia',
|
|
'fi': 'finland',
|
|
'fj': 'fiji',
|
|
'fk': 'falkland islands',
|
|
'fm': 'micronesia',
|
|
'fo': 'faroe islands',
|
|
'fr': 'france',
|
|
'gb': 'united kingdom',
|
|
'gd': 'grenada',
|
|
'ge': 'georgia',
|
|
'gf': 'french guiana',
|
|
'gg': 'guernsey',
|
|
'gh': 'ghana',
|
|
'gi': 'gibraltar',
|
|
'gl': 'greenland',
|
|
'gm': 'gambia',
|
|
'gn': 'guinea',
|
|
'gp': 'guadeloupe',
|
|
'gq': 'equatorial guinea',
|
|
'gr': 'greece',
|
|
'gs': 'south georgia and the south sandwich islands',
|
|
'gt': 'guatemala',
|
|
'gu': 'guam',
|
|
'gw': 'guinea-bissau',
|
|
'gy': 'guyana',
|
|
'hk': 'hong kong',
|
|
'hm': 'heard island and mcdonald islands',
|
|
'hn': 'honduras',
|
|
'hr': 'croatia',
|
|
'ht': 'haiti',
|
|
'hu': 'hungary',
|
|
'ie': 'ireland',
|
|
'im': 'isle of man',
|
|
'io': 'british indian ocean territory',
|
|
'iq': 'iraq',
|
|
'ir': 'iran, islamic republic of',
|
|
'is': 'iceland',
|
|
'it': 'italy',
|
|
'je': 'jersey',
|
|
'jm': 'jamaica',
|
|
'jo': 'jordan',
|
|
'jp': 'japan',
|
|
'ke': 'kenya',
|
|
'kg': 'kyrgyzstan',
|
|
'kh': 'cambodia',
|
|
'ki': 'kiribati',
|
|
'km': 'comoros',
|
|
'kn': 'saint kitts and nevis',
|
|
'kp': 'north korea',
|
|
'kr': 'south korea',
|
|
'kw': 'kuwait',
|
|
'kz': 'kazakhstan',
|
|
'lb': 'lebanon',
|
|
'lc': 'saint lucia',
|
|
'li': 'liechtenstein',
|
|
'lk': 'sri lanka',
|
|
'lr': 'liberia',
|
|
'ls': 'lesotho',
|
|
'lt': 'lithuania',
|
|
'lu': 'luxembourg',
|
|
'lv': 'latvia',
|
|
'ly': 'libyan arab jamahiriya',
|
|
'mc': 'monaco',
|
|
'mf': 'saint martin',
|
|
'mg': 'madagascar',
|
|
'mh': 'marshall islands',
|
|
'mk': 'macedonia, the former yugoslav republic of',
|
|
'ml': 'mali',
|
|
'mm': 'myanmar',
|
|
'mp': 'northern mariana islands',
|
|
'mq': 'martinique',
|
|
'mr': 'mauritania',
|
|
'mu': 'mauritius',
|
|
'mv': 'maldives',
|
|
'mw': 'malawi',
|
|
'mx': 'mexico',
|
|
'my': 'malaysia',
|
|
'mz': 'mozambique',
|
|
'na': 'namibia',
|
|
'nf': 'norfolk island',
|
|
'ng': 'nigeria',
|
|
'ni': 'nicaragua',
|
|
'nl': 'netherlands',
|
|
'no': 'norway',
|
|
'np': 'nepal',
|
|
'nr': 'nauru',
|
|
'nu': 'niue',
|
|
'nz': 'new zealand',
|
|
'om': 'oman',
|
|
'pe': 'peru',
|
|
'pf': 'french polynesia',
|
|
'pg': 'papua new guinea',
|
|
'ph': 'philippines',
|
|
'pk': 'pakistan',
|
|
'pl': 'poland',
|
|
'pm': 'saint pierre and miquelon',
|
|
'pn': 'pitcairn',
|
|
'pr': 'puerto rico',
|
|
'ps': 'palestinian territory',
|
|
'pt': 'portugal',
|
|
'pw': 'palau',
|
|
'py': 'paraguay',
|
|
'qa': 'qatar',
|
|
're': 'réunion',
|
|
'ro': 'romania',
|
|
'rs': 'serbia',
|
|
'ru': 'russian federation',
|
|
'rw': 'rwanda',
|
|
'sa': 'saudi arabia',
|
|
'sb': 'solomon islands',
|
|
'se': 'sweden',
|
|
'sg': 'singapore',
|
|
'sh': 'saint helena',
|
|
'si': 'slovenia',
|
|
'sj': 'svalbard and jan mayen',
|
|
'sk': 'slovakia',
|
|
'sl': 'sierra leone',
|
|
'sm': 'san marino',
|
|
'sn': 'senegal',
|
|
'so': 'somalia',
|
|
'sr': 'suriname',
|
|
'st': 'sao tome and principe',
|
|
'sv': 'el salvador',
|
|
'sy': 'syrian arab republic',
|
|
'sz': 'swaziland',
|
|
'tc': 'turks and caicos islands',
|
|
'td': 'chad',
|
|
'tf': 'french southern territories',
|
|
'tg': 'togo',
|
|
'th': 'thailand',
|
|
'tj': 'tajikistan',
|
|
'tk': 'tokelau',
|
|
'tl': 'timor-leste',
|
|
'tm': 'turkmenistan',
|
|
'to': 'tonga',
|
|
'tr': 'turkey',
|
|
'tt': 'trinidad and tobago',
|
|
'tv': 'tuvalu',
|
|
'tw': 'taiwan',
|
|
'tz': 'tanzania',
|
|
'ua': 'ukraine',
|
|
'ug': 'uganda',
|
|
'um': 'united states minor outlying islands',
|
|
'uy': 'uruguay',
|
|
'uz': 'uzbekistan',
|
|
'vc': 'saint vincent and the grenadines',
|
|
've': 'venezuela, bolivarian republic of',
|
|
'vg': 'virgin islands, british',
|
|
'vi': 'virgin islands, u.s.',
|
|
'vn': 'viet nam',
|
|
'vu': 'vanuatu',
|
|
'wf': 'wallis and futuna',
|
|
'ws': 'samoa',
|
|
'ye': 'yemen',
|
|
'yt': 'mayotte',
|
|
'za': 'south africa',
|
|
'zm': 'zambia',
|
|
'zw': 'zimbabwe'
|
|
}
|
|
|
|
_conflictingShortforms = {
|
|
'al': 'albania',
|
|
'ar': 'argentina',
|
|
'az': 'azerbaijan',
|
|
'ca': 'canada',
|
|
'co': 'colombia',
|
|
'de': 'germany',
|
|
'ga': 'gabon',
|
|
'id': 'indonesia',
|
|
'il': 'israel',
|
|
'in': 'india',
|
|
'ky': 'cayman islands',
|
|
'la': 'laos',
|
|
'ma': 'morocco',
|
|
'md': 'moldova',
|
|
'me': 'montenegro',
|
|
'mn': 'mongolia',
|
|
'mo': 'macao',
|
|
'ms': 'montserrat',
|
|
'mt': 'malta',
|
|
'nc': 'new caledonia',
|
|
'ne': 'niger',
|
|
'pa': 'panama',
|
|
'sc': 'seychelles',
|
|
'sd': 'sudan',
|
|
'tn': 'tunisia',
|
|
'va': 'vatican city'
|
|
}
|
|
|
|
def checkShortforms(query): # being Canadian, I often use something like "Toronto, ON"
|
|
# but wunderground needs "Toronto, Ontario"
|
|
if ' ' not in query and ',' not in query:
|
|
return query # if there's no spaces or commas, it's one word, no need to check for provinces
|
|
|
|
lastWord = query.split()[-1].lower() # split by spaces, see if the last word is a province shortform
|
|
if lastWord in _shortforms:
|
|
return (query[0:0 - len(lastWord)] + _shortforms[lastWord]).encode(encoding)
|
|
|
|
lastWord = query.split(',')[-1].lower() # if it's not separated by spaces, maybe commas
|
|
if lastWord in _shortforms:
|
|
return (query[0:0 - len(lastWord)] + _shortforms[lastWord]).encode(encoding)
|
|
|
|
return query # nope, probably not a province name, return original query
|
|
|
|
def checkConflictingShortforms(query):
|
|
if ' ' not in query and ',' not in query:
|
|
return None
|
|
|
|
lastWord = query.split()[-1].lower()
|
|
if lastWord in _conflictingShortforms:
|
|
return (query[0:0 - len(lastWord)] + _conflictingShortforms[lastWord]).encode(encoding)
|
|
|
|
lastWord = query.split(',')[-1].lower()
|
|
if lastWord in _conflictingShortforms:
|
|
return (query[0:0 - len(lastWord)] + _conflictingShortforms[lastWord]).encode(encoding)
|
|
|
|
return None
|