#!/usr/bin/perl # oxquery Copyright Michael Howe 2005 # Released under the GNU GPL (a copy of which is available from # http://www.gnu.org/copyleft/gpl.html ) # # Purpose: # Searches www.ox.ac.uk/contacts for a given person. # Based on the mutt_ldap_query script from # http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html # Thanks to Will for suggesting WWW::Mechanize. use strict; use warnings; #use LWP::Simple; #use LWP::UserAgent; #use URI::URL; #use HTML::LinkExtor; use WWW::Mechanize; die "Usage: $0 , [[], ...]\n" if ! @ARGV; $/ = ''; # Paragraph mode for input my %results; my $match_type = "approximate"; # exact or approximate #my $match_type = "exact"; # exact or approximate my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname="; sub getpeople { my $page = $_[0]; my $surname = $_[1]; my $m = WWW::Mechanize->new(); $m->get( $page ); my $c = $m->content; # The first table-worth of rows: my @outer = $c =~ m{(.*?)}gs; foreach my $row (@outer) { my ($name, $mail_addr) = $row =~ m{(.*?)}gs; $mail_addr = "" unless $mail_addr; if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) { # We have an email! $mail_addr =~ s//$1/; # Hack the name about: $name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1}; $results{$mail_addr} = $name; } } # The 'next' link: my ($endlink) = $c =~ m{.*(.*?).*?}gs; my ($nextlink) = $endlink =~ m{.*Next page.*}; ( defined( $nextlink ) ) && &getpeople( $nextlink, $surname ); } foreach my $surname( @ARGV ) { $surname =~ s/,$//; # Remove optional trailing comma. $surname = lc( $surname ); my $qpage = $base_page . $surname; &getpeople( $qpage, $surname ); } print "OxContact query: found ", scalar(keys(%results)), " email addresses\n"; foreach my $person ( sort(keys(%results))) { print $person, "\t", $results{$person}, "\n"; } # vim: tw=80