1 #!/usr/bin/perl
 2 # oxquery Copyright Michael Howe 2005
 3 # Released under the GNU GPL (a copy of which is available from
 4 # http://www.gnu.org/copyleft/gpl.html )
 5 #
 6 # Purpose:
 7 #   Searches www.ox.ac.uk/contacts for a given person.
 8 #   Based on the mutt_ldap_query script from 
 9 #    http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html
10 #   Thanks to Will for suggesting WWW::Mechanize.
11 
12 use strict;
13 use warnings;
14 #use LWP::Simple;
15 #use LWP::UserAgent;
16 #use URI::URL;
17 #use HTML::LinkExtor;
18 use WWW::Mechanize;
19 
20 
21 die "Usage: $0 <name_to_query>, [[<other_name_to_query>], ...]\n"
22 	if ! @ARGV;
23 
24 $/ = '';	# Paragraph mode for input
25 
26 my %results;
27 
28 my $match_type = "approximate";	# exact or approximate
29 #my $match_type = "exact";	# exact or approximate
30 
31 my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname=";
32 
33 sub getpeople {
34 	my $page = $_[0];
35 	my $surname = $_[1];
36 
37 	my $m = WWW::Mechanize->new();
38 	$m->get( $page );
39 	my $c = $m->content;
40 	# The first table-worth of rows:
41 	my @outer = $c =~ m{<tr.*?>(.*?)</tr>}gs;
42 	foreach my $row (@outer) {
43 		my ($name, $mail_addr) = $row =~ m{<td.*?>(.*?)</td>}gs;
44 		$mail_addr = "" unless $mail_addr;
45 		if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) {
46 			# We have an email!
47 			$mail_addr =~ s/<a href="mailto:(.*)".*<\/a>/$1/;
48 			# Hack the name about:
49 			$name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1};
50 			$results{$mail_addr} = $name;
51 		}
52 	}
53 	# The 'next' link:
54 	my ($endlink) = $c =~ m{.*<tr*?>(.*?)</tr>.*?</html>}gs;
55 	my ($nextlink) = $endlink =~ m{.*<a href="(.*)">Next page</a>.*};
56 	( defined( $nextlink ) ) && &getpeople( $nextlink, $surname );
57 }
58 	
59 foreach my $surname( @ARGV ) {
60 	$surname =~ s/,$//;  # Remove optional trailing comma.
61 	$surname = lc( $surname );
62 
63 	my $qpage = $base_page . $surname;
64 	&getpeople( $qpage, $surname );
65 }
66 
67 print "OxContact query: found ", scalar(keys(%results)), " email addresses\n";
68 foreach my $person ( sort(keys(%results))) {
69 	print $person, "\t", $results{$person}, "\n";
70 }
71 
72 # vim: tw=80
73 


syntax highlighted by Code2HTML, v. 0.9.1