Merge pull request #623 from mwmiller/mo_dates_mo_problems
DatesRole: support standard output of date(1) & RFC 2822master
commit
59de8f3a87
|
@ -13,6 +13,21 @@ use Try::Tiny;
|
|||
# This appears to parse most/all of the big ones, however it doesn't present a regex
|
||||
use DateTime::Format::HTTP;
|
||||
|
||||
my %short_month_to_number = (
|
||||
jan => 1,
|
||||
feb => 2,
|
||||
mar => 3,
|
||||
apr => 4,
|
||||
may => 5,
|
||||
jun => 6,
|
||||
jul => 7,
|
||||
aug => 8,
|
||||
sep => 9,
|
||||
oct => 10,
|
||||
nov => 11,
|
||||
dec => 12,
|
||||
);
|
||||
|
||||
# Reused lists and components for below
|
||||
my $short_day_of_week = qr#Mon|Tue|Wed|Thu|Fri|Sat|Sun#i;
|
||||
my $full_day_of_week = qr#Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday#i;
|
||||
|
@ -35,7 +50,191 @@ my $ambiguous_dates_matches = qr#^(?<m>$date_number)$date_delim(?<d>$date_number
|
|||
my $number_suffixes = qr#(?:st|nd|rd|th)#i;
|
||||
|
||||
# Timezones: https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
|
||||
my $tz_suffixes = qr#(?:[+-][0-9]{4})|ACDT|ACST|ACT|ADT|AEDT|AEST|AFT|AKDT|AKST|AMST|AMT|ART|AST|AWDT|AWST|AZOST|AZT|BDT|BIOT|BIT|BOT|BRT|BST|BTT|CAT|CCT|CDT|CEDT|CEST|CET|CHADT|CHAST|CHOT|CHUT|CIST|CIT|CKT|CLST|CLT|COST|COT|CST|CT|CVT|CWST|CXT|ChST|DAVT|DDUT|DFT|EASST|EAST|EAT|ECT|EDT|EEDT|EEST|EET|EGST|EGT|EIT|EST|FET|FJT|FKST|FKT|FNT|GALT|GAMT|GET|GFT|GILT|GIT|GMT|GST|GYT|HADT|HAEC|HAST|HKT|HMT|HOVT|HST|ICT|IDT|IOT|IRDT|IRKT|IRST|IST|JST|KGT|KOST|KRAT|KST|LHST|LINT|MAGT|MART|MAWT|MDT|MEST|MET|MHT|MIST|MIT|MMT|MSK|MST|MUT|MVT|MYT|NCT|NDT|NFT|NPT|NST|NT|NUT|NZDT|NZST|OMST|ORAT|PDT|PET|PETT|PGT|PHOT|PHT|PKT|PMDT|PMST|PONT|PST|PYST|PYT|RET|ROTT|SAKT|SAMT|SAST|SBT|SCT|SGT|SLST|SRT|SST|SYOT|TAHT|TFT|THA|TJT|TKT|TLT|TMT|TOT|TVT|UCT|ULAT|UTC|UYST|UYT|UZT|VET|VLAT|VOLT|VOST|VUT|WAKT|WAST|WAT|WEDT|WEST|WET|WIT|WST|YAKT|YEKT|Z#i;
|
||||
my %tz_offsets = (
|
||||
ACDT => '+1030',
|
||||
ACST => '+0930',
|
||||
ACT => '+0800',
|
||||
ADT => '-0300',
|
||||
AEDT => '+1100',
|
||||
AEST => '+1000',
|
||||
AFT => '+0430',
|
||||
AKDT => '-0800',
|
||||
AKST => '-0900',
|
||||
AMST => '-0300',
|
||||
AMT => '-0400',
|
||||
ART => '-0300',
|
||||
AST => '+0300',
|
||||
AWDT => '+0900',
|
||||
AWST => '+0800',
|
||||
AZOST => '-0100',
|
||||
AZT => '+0400',
|
||||
BDT => '+0800',
|
||||
BIOT => '+0600',
|
||||
BIT => '-1200',
|
||||
BOT => '-0400',
|
||||
BRT => '-0300',
|
||||
BST => '+0100',
|
||||
BTT => '+0600',
|
||||
CAT => '+0200',
|
||||
CCT => '+0630',
|
||||
CDT => '-0500',
|
||||
CEDT => '+0200',
|
||||
CEST => '+0200',
|
||||
CET => '+0100',
|
||||
CHADT => '+1345',
|
||||
CHAST => '+1245',
|
||||
CHOT => '+0800',
|
||||
CHUT => '+1000',
|
||||
CIST => '-0800',
|
||||
CIT => '+0800',
|
||||
CKT => '-1000',
|
||||
CLST => '-0300',
|
||||
CLT => '-0400',
|
||||
COST => '-0400',
|
||||
COT => '-0500',
|
||||
CST => '-0600',
|
||||
CT => '+0800',
|
||||
CVT => '-0100',
|
||||
CWST => '+0845',
|
||||
CXT => '+0700',
|
||||
ChST => '+1000',
|
||||
DAVT => '+0700',
|
||||
DDUT => '+1000',
|
||||
DFT => '+0100',
|
||||
EASST => '-0500',
|
||||
EAST => '-0600',
|
||||
EAT => '+0300',
|
||||
ECT => '-0400',
|
||||
EDT => '-0400',
|
||||
EEDT => '+0300',
|
||||
EEST => '+0300',
|
||||
EET => '+0200',
|
||||
EGST => '+0000',
|
||||
EGT => '-0100',
|
||||
EIT => '+0900',
|
||||
EST => '-0500',
|
||||
FET => '+0300',
|
||||
FJT => '+1200',
|
||||
FKST => '-0300',
|
||||
FKT => '-0400',
|
||||
FNT => '-0200',
|
||||
GALT => '-0600',
|
||||
GAMT => '-0900',
|
||||
GET => '+0400',
|
||||
GFT => '-0300',
|
||||
GILT => '+1200',
|
||||
GIT => '-0900',
|
||||
GMT => '+0000',
|
||||
GST => '-0200',
|
||||
GYT => '-0400',
|
||||
HADT => '-0900',
|
||||
HAEC => '+0200',
|
||||
HAST => '-1000',
|
||||
HKT => '+0800',
|
||||
HMT => '+0500',
|
||||
HOVT => '+0700',
|
||||
HST => '-1000',
|
||||
ICT => '+0700',
|
||||
IDT => '+0300',
|
||||
IOT => '+0300',
|
||||
IRDT => '+0430',
|
||||
IRKT => '+0900',
|
||||
IRST => '+0330',
|
||||
IST => '+0530',
|
||||
JST => '+0900',
|
||||
KGT => '+0600',
|
||||
KOST => '+1100',
|
||||
KRAT => '+0700',
|
||||
KST => '+0900',
|
||||
LHST => '+1030',
|
||||
LINT => '+1400',
|
||||
MAGT => '+1200',
|
||||
MART => '-0930',
|
||||
MAWT => '+0500',
|
||||
MDT => '-0600',
|
||||
MEST => '+0200',
|
||||
MET => '+0100',
|
||||
MHT => '+1200',
|
||||
MIST => '+1100',
|
||||
MIT => '-0930',
|
||||
MMT => '+0630',
|
||||
MSK => '+0400',
|
||||
MST => '-0700',
|
||||
MUT => '+0400',
|
||||
MVT => '+0500',
|
||||
MYT => '+0800',
|
||||
NCT => '+1100',
|
||||
NDT => '-0230',
|
||||
NFT => '+1130',
|
||||
NPT => '+0545',
|
||||
NST => '-0330',
|
||||
NT => '-0330',
|
||||
NUT => '-1100',
|
||||
NZDT => '+1300',
|
||||
NZST => '+1200',
|
||||
OMST => '+0700',
|
||||
ORAT => '-0500',
|
||||
PDT => '-0700',
|
||||
PET => '-0500',
|
||||
PETT => '+1200',
|
||||
PGT => '+1000',
|
||||
PHOT => '+1300',
|
||||
PKT => '+0500',
|
||||
PMDT => '-0200',
|
||||
PMST => '-0300',
|
||||
PONT => '+1100',
|
||||
PST => '-0800',
|
||||
PYST => '-0300',
|
||||
PYT => '-0400',
|
||||
RET => '+0400',
|
||||
ROTT => '-0300',
|
||||
SAKT => '+1100',
|
||||
SAMT => '+0400',
|
||||
SAST => '+0200',
|
||||
SBT => '+1100',
|
||||
SCT => '+0400',
|
||||
SGT => '+0800',
|
||||
SLST => '+0530',
|
||||
SRT => '-0300',
|
||||
SST => '-1100',
|
||||
SYOT => '+0300',
|
||||
TAHT => '-1000',
|
||||
TFT => '+0500',
|
||||
THA => '+0700',
|
||||
TJT => '+0500',
|
||||
TKT => '+1300',
|
||||
TLT => '+0900',
|
||||
TMT => '+0500',
|
||||
TOT => '+1300',
|
||||
TVT => '+0500',
|
||||
UCT => '+0000',
|
||||
ULAT => '+0800',
|
||||
UTC => '+0000',
|
||||
UYST => '-0200',
|
||||
UYT => '-0300',
|
||||
UZT => '+0500',
|
||||
VET => '-0430',
|
||||
VLAT => '+1000',
|
||||
VOLT => '+0400',
|
||||
VOST => '+0600',
|
||||
VUT => '+1100',
|
||||
WAKT => '+1200',
|
||||
WAST => '+0200',
|
||||
WAT => '+0100',
|
||||
WEDT => '+0100',
|
||||
WEST => '+0100',
|
||||
WET => '+0000',
|
||||
WIT => '+0700',
|
||||
WST => '+0800',
|
||||
YAKT => '+1000',
|
||||
YEKT => '+0600',
|
||||
Z => '+0000',
|
||||
);
|
||||
my $tz_strings = join('|', keys %tz_offsets);
|
||||
my $tz_suffixes = qr#(?:[+-][0-9]{4})|$tz_strings#i;
|
||||
|
||||
my $date_standard = qr#$short_day_of_week $short_month\s{1,2}$date_number $time_24h $tz_suffixes [0-9]{4}#i;
|
||||
my $date_standard_matches = qr#$short_day_of_week (?<m>$short_month)\s{1,2}(?<d>$date_number) (?<t>$time_24h) (?<tz>$tz_suffixes) (?<y>[0-9]{4})#i;
|
||||
|
||||
# formats parsed by vague datestring, without colouring
|
||||
# the context of the code using it
|
||||
|
@ -104,6 +303,12 @@ sub build_datestring_regex {
|
|||
# RFC850 08-Feb-94 14:15:29 GMT
|
||||
push @regexes, qr#[0-9]{2}-$short_month-(?:[0-9]{2}|[0-9]{4}) $time_24h?(?: ?$tz_suffixes)#i;
|
||||
|
||||
# RFC2822 Sat, 13 Mar 2010 11:29:05 -0800
|
||||
push @regexes, qr#$short_day_of_week, $date_number $short_month [0-9]{4} $time_24h $tz_suffixes#i;
|
||||
|
||||
# date(1) default format Sun Sep 7 15:57:56 EDT 2014
|
||||
push @regexes, $date_standard;
|
||||
|
||||
# month-first date formats
|
||||
push @regexes, qr#$date_number$date_delim$short_month$date_delim[0-9]{4}#i;
|
||||
push @regexes, qr#$date_number$date_delim$full_month$date_delim[0-9]{4}#i;
|
||||
|
@ -145,6 +350,9 @@ sub parse_formatted_datestring_to_date {
|
|||
}
|
||||
|
||||
$d = sprintf("%04d-%02d-%02d", $year, $month, $day);
|
||||
} elsif ($d =~ $date_standard_matches) {
|
||||
# To ISO8601 for parsing
|
||||
$d = sprintf('%04d-%02d-%02dT%s%s', $+{'y'}, $short_month_to_number{lc $+{'m'}}, $+{'d'}, $+{'t'}, $tz_offsets{$+{'tz'}});
|
||||
}
|
||||
|
||||
$d =~ s/(\d+)\s?$number_suffixes/$1/i; # Strip ordinal text.
|
||||
|
|
|
@ -86,6 +86,12 @@ subtest 'Dates' => sub {
|
|||
'Sat, 09 Aug 2014 18:20:00' => 1407608400,
|
||||
# RFC850
|
||||
'08-Feb-94 14:15:29 GMT' => 760716929,
|
||||
# date(1) default
|
||||
'Sun Sep 7 15:57:56 EDT 2014' => 1410119876,
|
||||
'Sun Sep 14 15:57:56 UTC 2014' => 1410710276,
|
||||
'Sun Sep 7 20:11:44 BST 2014' => 1410117104,
|
||||
# RFC 2822
|
||||
'Sat, 13 Mar 2010 11:29:05 -0800' => 1268508545,
|
||||
#Undefined/Natural formats:
|
||||
'13/12/2011' => 1323734400, #DMY
|
||||
'01/01/2001' => 978307200, #Ambiguous, but valid
|
||||
|
|
Loading…
Reference in New Issue