[freeside-commits] branch master updated. 7d967f5ac6929fddc08cc077bcd44ea48a3937f2

Mark Wells mark at 420.am
Fri Oct 4 15:25:38 PDT 2013


The branch, master has been updated
       via  7d967f5ac6929fddc08cc077bcd44ea48a3937f2 (commit)
      from  5d1f486c543c2e61cea6c050bed86c0c9815085e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 7d967f5ac6929fddc08cc077bcd44ea48a3937f2
Author: Mark Wells <mark at freeside.biz>
Date:   Fri Oct 4 15:25:20 2013 -0700

    improvements to TomTom address standardization, #13763

diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm
index f0f2b46..baca21d 100644
--- a/FS/FS/Conf.pm
+++ b/FS/FS/Conf.pm
@@ -4212,7 +4212,7 @@ and customer address. Include units.',
   {
     'key'         => 'tomtom-userid',
     'section'     => 'UI',
-    'description' => 'TomTom geocoding service API key.  See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key.',
+    'description' => 'TomTom geocoding service API key.  See <a href="http://www.tomtom.com/">the TomTom website</a> to obtain a key.  This is recommended for addresses in the United States only.',
     'type'        => 'text',
   },
 
diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm
index 4dd6dc6..b5cc325 100644
--- a/FS/FS/Misc/Geo.pm
+++ b/FS/FS/Misc/Geo.pm
@@ -424,9 +424,15 @@ sub standardize_tomtom {
     or die "no tomtom-userid configured\n";
 
   my $country = code2country($location->{country});
+  my ($address1, $address2) = ($location->{address1}, $location->{address2});
+  # try to fix some cases of the address fields being switched
+  if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
+    $address2 = $address1;
+    $address1 = $location->{address2};
+  }
   my $result = $class->query(
     key => $key,
-    T   => $location->{address1},
+    T   => $address1,
     L   => $location->{city},
     AA  => $location->{state},
     PC  => $location->{zip},
@@ -439,24 +445,121 @@ sub standardize_tomtom {
   if (!$match) {
     die "Location not found.\n";
   }
-  warn "tomtom returned match:\n".Dumper($match) if $DEBUG > 1;
-  my $tract = join('.', $match->{censusTract} =~ /(....)(..)/);
+  my $type = $match->{type};
+  warn "tomtom returned $type match\n" if $DEBUG;
+  warn Dumper($match) if $DEBUG > 1;
+  my $tract = '';
+  if ( defined $match->{censusTract} ) {
+    $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
+             join('.', $match->{censusTract} =~ /(....)(..)/);
+  }
+  # match levels below "intersection" should not be considered clean
+  my $clean = ($type eq 'addresspoint'  ||
+               $type eq 'poi'           ||
+               $type eq 'house'         ||
+               $type eq 'intersection'
+              ) ? 'Y' : '';
+
+  $address2 = normalize_address2($address2, $location->{country});
+
+  $address1 = '';
+  $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
+  $address1 .= $match->{street} if $match->{street};
+
   return +{
-    address1    => join(' ', $match->{houseNumber}, $match->{street}),
-    address2    => $location->{address2}, # XXX still need a solution to this
+    address1    => $address1,
+    address2    => $address2,
     city        => $match->{city},
-    state       => $match->{state},
-    country     => country2code($match->{country}, LOCALE_CODE_ALPHA_2),
+    state       => $location->{state},    # this will never change
+    country     => $location->{country},  # ditto
     zip         => ($match->{standardPostalCode} || $match->{postcode}),
     latitude    => $match->{latitude},
     longitude   => $match->{longitude},
-    censustract => $match->{censusStateCode}.
-                   $match->{censusFipsCountyCode}.
-                   $tract,
-    addr_clean  => 'Y',
+    censustract => $tract,
+    addr_clean  => $clean,
   };
 }
 
+=iten normalize_address2 STRING, COUNTRY
+
+Given an 'address2' STRING, normalize it for COUNTRY postal standards.
+Currently only works for US and CA.
+
+=cut
+
+# XXX really ought to be a separate module
+my %address2_forms = (
+  # Postal Addressing Standards, Appendix C
+  # (plus correction of "hanger" to "hangar")
+  US => {qw(
+    APARTMENT     APT
+    BASEMENT      BSMT
+    BUILDING      BLDG
+    DEPARTMENT    DEPT
+    FLOOR         FL
+    FRONT         FRNT
+    HANGAR        HNGR
+    HANGER        HNGR
+    KEY           KEY
+    LOBBY         LBBY
+    LOT           LOT
+    LOWER         LOWR
+    OFFICE        OFC
+    PENTHOUSE     PH
+    PIER          PIER
+    REAR          REAR
+    ROOM          RM
+    SIDE          SIDE
+    SLIP          SLIP
+    SPACE         SPC
+    STOP          STOP
+    SUITE         STE
+    TRAILER       TRLR
+    UNIT          UNIT
+    UPPER         UPPR
+  )},
+  # Canada Post Addressing Guidelines 4.3
+  CA => {qw(
+    APARTMENT     APT
+    APPARTEMENT   APP
+    BUREAU        BUREAU
+    SUITE         SUITE
+    UNIT          UNIT
+    UNITÉ         UNITÉ
+  )},
+);
+ 
+sub normalize_address2 {
+  # Some things seen in the address2 field:
+  # Whitespace
+  # The complete address (with address1 containing part of the company name, 
+  # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
+  # number, etc.)
+  my ($addr2, $country) = @_;
+  $addr2 = uc($addr2);
+  if ( exists($address2_forms{$country}) ) {
+    my $dict = $address2_forms{$country};
+    # protect this
+    $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
+    my @words;
+    # remove all punctuation and spaces
+    foreach my $w (split(/\W+/, $addr2)) {
+      if ( exists($dict->{$w}) ) {
+        push @words, $dict->{$w};
+      } else {
+        push @words, $w;
+      }
+    }
+    my $result = join(' ', @words);
+    # correct spacing of pound sign + number
+    $result =~ s/NUMBER(\d)/# $1/;
+    warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
+    $addr2 = $result;
+  }
+  $addr2;
+}
+
+
 =back
 
 =cut
diff --git a/FS/FS/cust_location.pm b/FS/FS/cust_location.pm
index b98ade1..11e97ec 100644
--- a/FS/FS/cust_location.pm
+++ b/FS/FS/cust_location.pm
@@ -10,6 +10,8 @@ use FS::Conf;
 use FS::prospect_main;
 use FS::cust_main;
 use FS::cust_main_county;
+use FS::GeocodeCache;
+use Date::Format qw( time2str );
 
 $import = 0;
 
@@ -677,6 +679,13 @@ sub process_censustract_update {
   return;
 }
 
+=item process_set_coord
+
+Queueable function to find and fill in coordinates for all locations that 
+lack them.  Because this uses the Google Maps API, it's internally rate
+limited and must run in a single process.
+
+=cut
 
 sub process_set_coord {
   my $job = shift;
@@ -716,6 +725,67 @@ sub process_set_coord {
   return;
 }
 
+=item process_standardize [ LOCATIONNUMS ]
+
+Performs address standardization on locations with unclean addresses,
+using whatever method you have configured.  If the standardize_* method 
+returns a I<clean> address match, the location will be updated.  This is 
+always an in-place update (because the physical location is the same, 
+and is just being referred to by a more accurate name).
+
+Disabled locations will be skipped, as nobody cares.
+
+If any LOCATIONNUMS are provided, only those locations will be updated.
+
+=cut
+
+sub process_standardize {
+  my $job = shift;
+  my @others = qsearch('queue', {
+      'status'  => 'locked',
+      'job'     => $job->job,
+      'jobnum'  => {op=>'!=', value=>$job->jobnum},
+  });
+  return if @others;
+  my @locationnums = grep /^\d+$/, @_;
+  my $where = "AND locationnum IN(".join(',', at locationnums).")"
+    if scalar(@locationnums);
+  my @locations = qsearch({
+      table     => 'cust_location',
+      hashref   => { addr_clean => '', disabled => '' },
+      extra_sql => $where,
+  });
+  my $n_todo = scalar(@locations);
+  my $n_done = 0;
+
+  # special: log this
+  my $log;
+  eval "use Text::CSV";
+  open $log, '>', "$FS::UID::cache_dir/process_standardize-" . 
+                  time2str('%Y%m%d',time) .
+                  ".csv";
+  my $csv = Text::CSV->new({binary => 1, eol => "\n"});
+
+  foreach my $cust_location (@locations) {
+    $job->update_statustext( int(100 * $n_done/$n_todo) . ",$n_done / $n_todo locations" ) if $job;
+    my $result = FS::GeocodeCache->standardize($cust_location);
+    if ( $result->{addr_clean} and !$result->{error} ) {
+      my @cols = ($cust_location->locationnum);
+      foreach (keys %$result) {
+        push @cols, $cust_location->get($_), $result->{$_};
+        $cust_location->set($_, $result->{$_});
+      }
+      # bypass immutable field restrictions
+      my $error = $cust_location->FS::Record::replace;
+      warn "location ".$cust_location->locationnum.": $error\n" if $error;
+      $csv->print($log, \@cols);
+    }
+    $n_done++;
+    dbh->commit; # so that we can resume if interrupted
+  }
+  close $log;
+}
+
 =head1 BUGS
 
 =head1 SEE ALSO
diff --git a/bin/standardize-locations b/bin/standardize-locations
new file mode 100755
index 0000000..6e5fd3c
--- /dev/null
+++ b/bin/standardize-locations
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FS::UID 'adminsuidsetup';
+use FS::Conf;
+use FS::queue;
+
+my $user = shift or die "usage:\n  standardize-locations user";
+adminsuidsetup($user);
+my $conf = FS::Conf->new;
+my $method = $conf->config('address_standardize_method')
+  or die "No address standardization method configured.\n";
+if ($method eq 'usps') {
+  # we're not supposed to do this
+  # (allow it anyway with a warning?)
+  die "USPS standardization does not allow batch processing.\n";
+}
+my $job = FS::queue->new({
+  job => 'FS::cust_location::process_standardize'
+});
+my $error = $job->insert('_JOB');
+die $error if $error;
+print "Address standardization job scheduled.\n";
+
+1;

-----------------------------------------------------------------------

Summary of changes:
 FS/FS/Conf.pm             |    2 +-
 FS/FS/Misc/Geo.pm         |  125 +++++++++++++++++++++++++++++++++++++++++----
 FS/FS/cust_location.pm    |   70 +++++++++++++++++++++++++
 bin/standardize-locations |   25 +++++++++
 4 files changed, 210 insertions(+), 12 deletions(-)
 create mode 100755 bin/standardize-locations




More information about the freeside-commits mailing list