1 #!/usr/bin/perl
2 # oxquery Copyright Michael Howe 2005
3 # Released under the GNU GPL (a copy of which is available from
4 # http://www.gnu.org/copyleft/gpl.html )
5 #
6 # Purpose:
7 # Searches www.ox.ac.uk/contacts for a given person.
8 # Based on the mutt_ldap_query script from
9 # http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html
10 # Thanks to Will for suggesting WWW::Mechanize.
11
12 use strict;
13 use warnings;
14 #use LWP::Simple;
15 #use LWP::UserAgent;
16 #use URI::URL;
17 #use HTML::LinkExtor;
18 use WWW::Mechanize;
19
20
21 die "Usage: $0 <name_to_query>, [[<other_name_to_query>], ...]\n"
22 if ! @ARGV;
23
24 $/ = ''; # Paragraph mode for input
25
26 my %results;
27
28 my $match_type = "approximate"; # exact or approximate
29 #my $match_type = "exact"; # exact or approximate
30
31 my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname=";
32
33 sub getpeople {
34 my $page = $_[0];
35 my $surname = $_[1];
36
37 my $m = WWW::Mechanize->new();
38 $m->get( $page );
39 my $c = $m->content;
40 # The first table-worth of rows:
41 my @outer = $c =~ m{<tr.*?>(.*?)</tr>}gs;
42 foreach my $row (@outer) {
43 my ($name, $mail_addr) = $row =~ m{<td.*?>(.*?)</td>}gs;
44 $mail_addr = "" unless $mail_addr;
45 if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) {
46 # We have an email!
47 $mail_addr =~ s/<a href="mailto:(.*)".*<\/a>/$1/;
48 # Hack the name about:
49 $name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1};
50 $results{$mail_addr} = $name;
51 }
52 }
53 # The 'next' link:
54 my ($endlink) = $c =~ m{.*<tr*?>(.*?)</tr>.*?</html>}gs;
55 my ($nextlink) = $endlink =~ m{.*<a href="(.*)">Next page</a>.*};
56 ( defined( $nextlink ) ) && &getpeople( $nextlink, $surname );
57 }
58
59 foreach my $surname( @ARGV ) {
60 $surname =~ s/,$//; # Remove optional trailing comma.
61 $surname = lc( $surname );
62
63 my $qpage = $base_page . $surname;
64 &getpeople( $qpage, $surname );
65 }
66
67 print "OxContact query: found ", scalar(keys(%results)), " email addresses\n";
68 foreach my $person ( sort(keys(%results))) {
69 print $person, "\t", $results{$person}, "\n";
70 }
71
72 # vim: tw=80
73
syntax highlighted by Code2HTML, v. 0.9.1