From 4ea80eec80ec49ebb4f039ce41ad7f082753f099 Mon Sep 17 00:00:00 2001 From: Ivan Kozik Date: Wed, 16 Dec 2015 13:11:16 +0000 Subject: [PATCH] global igset: Ignore a loop on archive.org --- libgrabsite/__init__.py | 2 +- libgrabsite/ignore_sets/global | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libgrabsite/__init__.py b/libgrabsite/__init__.py index d0564e5..17ea304 100644 --- a/libgrabsite/__init__.py +++ b/libgrabsite/__init__.py @@ -1 +1 @@ -__version__ = '0.9.7' +__version__ = '0.9.8' diff --git a/libgrabsite/ignore_sets/global b/libgrabsite/ignore_sets/global index 55faa97..da80bb5 100644 --- a/libgrabsite/ignore_sets/global +++ b/libgrabsite/ignore_sets/global @@ -220,6 +220,7 @@ ^https?://static\.licdn\.com/sc/p/.+/f// ^https?://tm\.uol\.com\.br/h/.+/h/ ^https?://((s-)?static\.ak\.fbcdn\.net|(connect\.|www\.)?facebook\.com)/connect\.php/js/.*rsrc\.php +^https?://web\.archive\.org/web/[^/]+/https?\:/[^/]+\.addthis\.com/.+/static/.+/static/ # This specifically catches only *invalid* flickr.com links extracted by wpull ^https?://www\.flickr\.com/(explore/|photos/[^/]+/(sets/\d+/(page\d+/)?)?)\d+_[a-f0-9]+(_[a-z])?\.jpg$