[freeside-commits] branch master updated. 7d967f5ac6929fddc08cc077bcd44ea48a3937f2
Mark Wells
mark at 420.am
Fri Oct 4 15:25:38 PDT 2013
The branch, master has been updated
via 7d967f5ac6929fddc08cc077bcd44ea48a3937f2 (commit)
from 5d1f486c543c2e61cea6c050bed86c0c9815085e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 7d967f5ac6929fddc08cc077bcd44ea48a3937f2
Author: Mark Wells <mark at freeside.biz>
Date: Fri Oct 4 15:25:20 2013 -0700
improvements to TomTom address standardization, #13763
diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm
index f0f2b46..baca21d 100644
--- a/FS/FS/Conf.pm
+++ b/FS/FS/Conf.pm
@@ -4212,7 +4212,7 @@ and customer address. Include units.',
{
'key' => 'tomtom-userid',
'section' => 'UI',
- 'description' => 'TomTom geocoding service API key. See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key.',
+ 'description' => 'TomTom geocoding service API key. See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key. This is recommended for addresses in the United States only.',
'type' => 'text',
},
diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm
index 4dd6dc6..b5cc325 100644
--- a/FS/FS/Misc/Geo.pm
+++ b/FS/FS/Misc/Geo.pm
@@ -424,9 +424,15 @@ sub standardize_tomtom {
or die "no tomtom-userid configured\n";
my $country = code2country($location->{country});
+ my ($address1, $address2) = ($location->{address1}, $location->{address2});
+ # try to fix some cases of the address fields being switched
+ if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+ $address2 = $address1;
+ $address1 = $location->{address2};
+ }
my $result = $class->query(
key => $key,
- T => $location->{address1},
+ T => $address1,
L => $location->{city},
AA => $location->{state},
PC => $location->{zip},
@@ -439,24 +445,121 @@ sub standardize_tomtom {
if (!$match) {
die "Location not found.\n";
}
- warn "tomtom returned match:\n".Dumper($match) if $DEBUG > 1;
- my $tract = join('.', $match->{censusTract} =~ /(....)(..)/);
+ my $type = $match->{type};
+ warn "tomtom returned $type match\n" if $DEBUG;
+ warn Dumper($match) if $DEBUG > 1;
+ my $tract = '';
+ if ( defined $match->{censusTract} ) {
+ $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+ join('.', $match->{censusTract} =~ /(....)(..)/);
+ }
+ # match levels below "intersection" should not be considered clean
+ my $clean = ($type eq 'addresspoint' ||
+ $type eq 'poi' ||
+ $type eq 'house' ||
+ $type eq 'intersection'
+ ) ? 'Y' : '';
+
+ $address2 = normalize_address2($address2, $location->{country});
+
+ $address1 = '';
+ $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+ $address1 .= $match->{street} if $match->{street};
+
return +{
- address1 => join(' ', $match->{houseNumber}, $match->{street}),
- address2 => $location->{address2}, # XXX still need a solution to this
+ address1 => $address1,
+ address2 => $address2,
city => $match->{city},
- state => $match->{state},
- country => country2code($match->{country}, LOCALE_CODE_ALPHA_2),
+ state => $location->{state}, # this will never change
+ country => $location->{country}, # ditto
zip => ($match->{standardPostalCode} || $match->{postcode}),
latitude => $match->{latitude},
longitude => $match->{longitude},
- censustract => $match->{censusStateCode}.
- $match->{censusFipsCountyCode}.
- $tract,
- addr_clean => 'Y',
+ censustract => $tract,
+ addr_clean => $clean,
};
}
+=iten normalize_address2 STRING, COUNTRY
+
+Given an 'address2' STRING, normalize it for COUNTRY postal standards.
+Currently only works for US and CA.
+
+=cut
+
+# XXX really ought to be a separate module
+my %address2_forms = (
+ # Postal Addressing Standards, Appendix C
+ # (plus correction of "hanger" to "hangar")
+ US => {qw(
+ APARTMENT APT
+ BASEMENT BSMT
+ BUILDING BLDG
+ DEPARTMENT DEPT
+ FLOOR FL
+ FRONT FRNT
+ HANGAR HNGR
+ HANGER HNGR
+ KEY KEY
+ LOBBY LBBY
+ LOT LOT
+ LOWER LOWR
+ OFFICE OFC
+ PENTHOUSE PH
+ PIER PIER
+ REAR REAR
+ ROOM RM
+ SIDE SIDE
+ SLIP SLIP
+ SPACE SPC
+ STOP STOP
+ SUITE STE
+ TRAILER TRLR
+ UNIT UNIT
+ UPPER UPPR
+ )},
+ # Canada Post Addressing Guidelines 4.3
+ CA => {qw(
+ APARTMENT APT
+ APPARTEMENT APP
+ BUREAU BUREAU
+ SUITE SUITE
+ UNIT UNIT
+ UNITÉ UNITÉ
+ )},
+);
+
+sub normalize_address2 {
+ # Some things seen in the address2 field:
+ # Whitespace
+ # The complete address (with address1 containing part of the company name,
+ # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+ # number, etc.)
+ my ($addr2, $country) = @_;
+ $addr2 = uc($addr2);
+ if ( exists($address2_forms{$country}) ) {
+ my $dict = $address2_forms{$country};
+ # protect this
+ $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+ my @words;
+ # remove all punctuation and spaces
+ foreach my $w (split(/\W+/, $addr2)) {
+ if ( exists($dict->{$w}) ) {
+ push @words, $dict->{$w};
+ } else {
+ push @words, $w;
+ }
+ }
+ my $result = join(' ', @words);
+ # correct spacing of pound sign + number
+ $result =~ s/NUMBER(\d)/# $1/;
+ warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+ $addr2 = $result;
+ }
+ $addr2;
+}
+
+
=back
=cut
diff --git a/FS/FS/cust_location.pm b/FS/FS/cust_location.pm
index b98ade1..11e97ec 100644
--- a/FS/FS/cust_location.pm
+++ b/FS/FS/cust_location.pm
@@ -10,6 +10,8 @@ use FS::Conf;
use FS::prospect_main;
use FS::cust_main;
use FS::cust_main_county;
+use FS::GeocodeCache;
+use Date::Format qw( time2str );
$import = 0;
@@ -677,6 +679,13 @@ sub process_censustract_update {
return;
}
+=item process_set_coord
+
+Queueable function to find and fill in coordinates for all locations that
+lack them. Because this uses the Google Maps API, it's internally rate
+limited and must run in a single process.
+
+=cut
sub process_set_coord {
my $job = shift;
@@ -716,6 +725,67 @@ sub process_set_coord {
return;
}
+=item process_standardize [ LOCATIONNUMS ]
+
+Performs address standardization on locations with unclean addresses,
+using whatever method you have configured. If the standardize_* method
+returns a I<clean> address match, the location will be updated. This is
+always an in-place update (because the physical location is the same,
+and is just being referred to by a more accurate name).
+
+Disabled locations will be skipped, as nobody cares.
+
+If any LOCATIONNUMS are provided, only those locations will be updated.
+
+=cut
+
+sub process_standardize {
+ my $job = shift;
+ my @others = qsearch('queue', {
+ 'status' => 'locked',
+ 'job' => $job->job,
+ 'jobnum' => {op=>'!=', value=>$job->jobnum},
+ });
+ return if @others;
+ my @locationnums = grep /^\d+$/, @_;
+ my $where = "AND locationnum IN(".join(',', at locationnums).")"
+ if scalar(@locationnums);
+ my @locations = qsearch({
+ table => 'cust_location',
+ hashref => { addr_clean => '', disabled => '' },
+ extra_sql => $where,
+ });
+ my $n_todo = scalar(@locations);
+ my $n_done = 0;
+
+ # special: log this
+ my $log;
+ eval "use Text::CSV";
+ open $log, '>', "$FS::UID::cache_dir/process_standardize-" .
+ time2str('%Y%m%d',time) .
+ ".csv";
+ my $csv = Text::CSV->new({binary => 1, eol => "\n"});
+
+ foreach my $cust_location (@locations) {
+ $job->update_statustext( int(100 * $n_done/$n_todo) . ",$n_done / $n_todo locations" ) if $job;
+ my $result = FS::GeocodeCache->standardize($cust_location);
+ if ( $result->{addr_clean} and !$result->{error} ) {
+ my @cols = ($cust_location->locationnum);
+ foreach (keys %$result) {
+ push @cols, $cust_location->get($_), $result->{$_};
+ $cust_location->set($_, $result->{$_});
+ }
+ # bypass immutable field restrictions
+ my $error = $cust_location->FS::Record::replace;
+ warn "location ".$cust_location->locationnum.": $error\n" if $error;
+ $csv->print($log, \@cols);
+ }
+ $n_done++;
+ dbh->commit; # so that we can resume if interrupted
+ }
+ close $log;
+}
+
=head1 BUGS
=head1 SEE ALSO
diff --git a/bin/standardize-locations b/bin/standardize-locations
new file mode 100755
index 0000000..6e5fd3c
--- /dev/null
+++ b/bin/standardize-locations
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FS::UID 'adminsuidsetup';
+use FS::Conf;
+use FS::queue;
+
+my $user = shift or die "usage:\n standardize-locations user";
+adminsuidsetup($user);
+my $conf = FS::Conf->new;
+my $method = $conf->config('address_standardize_method')
+ or die "No address standardization method configured.\n";
+if ($method eq 'usps') {
+ # we're not supposed to do this
+ # (allow it anyway with a warning?)
+ die "USPS standardization does not allow batch processing.\n";
+}
+my $job = FS::queue->new({
+ job => 'FS::cust_location::process_standardize'
+});
+my $error = $job->insert('_JOB');
+die $error if $error;
+print "Address standardization job scheduled.\n";
+
+1;
-----------------------------------------------------------------------
Summary of changes:
FS/FS/Conf.pm | 2 +-
FS/FS/Misc/Geo.pm | 125 +++++++++++++++++++++++++++++++++++++++++----
FS/FS/cust_location.pm | 70 +++++++++++++++++++++++++
bin/standardize-locations | 25 +++++++++
4 files changed, 210 insertions(+), 12 deletions(-)
create mode 100755 bin/standardize-locations
More information about the freeside-commits
mailing list