1 #!/usr/bin/perl
  2 
  3 =head1 NAME
  4 
  5 OxQuery
  6 
  7 =head1 SYNOPSIS
  8 
  9   use OxQuery;
 10 
 11   my $name = "howe";
 12 
 13   my %people = OxQuery->search( $name );
 14 
 15   foreach my $person ( sort( keys( %people ) ) ) {
 16     print "Person $person has email " . $people{$person} . "\n";
 17   }
 18 
 19 =head1 DESCRIPTION
 20 
 21 Module to query the Oxford University Contacts Search webpage.  Will return a
 22 hash of the form people => email for a given surname.  Does an approximate
 23 search, then only returns names containing the string for which you searched.
 24 
 25 =head1 METHODS
 26 
 27 =over 4
 28 
 29 =cut
 30 
 31 # $Id: OxQuery.pm 198 2007-02-06 13:57:03Z michael $
 32 package OxQuery;
 33 
 34 use strict;
 35 use warnings;
 36 use WWW::Mechanize;
 37 use Exporter;
 38 our @EXPORT_OK = qw( search );
 39 our @ISA = qw( Exporter );
 40 
 41 
 42 #die "Usage: $0 <name_to_query>, [[<other_name_to_query>], ...]\n"
 43 #	if ! @ARGV;
 44 
 45 $/ = '';	# Paragraph mode for input
 46 
 47 my %results;
 48 
 49 my $match_type = "approximate";	# exact or approximate
 50 #my $match_type = "exact";	# exact or approximate
 51 
 52 my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname=";
 53 
 54 sub getpeople {
 55 	my $page = $_[0];
 56 	my $surname = $_[1];
 57 
 58 	my $m = WWW::Mechanize->new();
 59 	$m->get( $page );
 60 	my $c = $m->content;
 61 	# The first table-worth of rows:
 62 	my @outer = $c =~ m{<tr.*?>(.*?)</tr>}gs;
 63 	foreach my $row (@outer) {
 64 		my ($name, $mail_addr) = $row =~ m{<td.*?>(.*?)</td>}gs;
 65 		$mail_addr = "" unless $mail_addr;
 66 		if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) {
 67 			# We have an email!
 68 			$mail_addr =~ s/<a href="mailto:(.*)".*<\/a>/$1/;
 69 			# Hack the name about:
 70 			$name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1};
 71 			$results{$mail_addr} = $name;
 72 		}
 73 	}
 74 	# The 'next' link:
 75 	my ($endlink) = $c =~ m{.*<tr*?>(.*?)</tr>.*?</html>}gs;
 76 	my ($nextlink) = $endlink =~ m{.*<a href="(.*)">Next page</a>.*};
 77 	( defined( $nextlink ) ) && &getpeople( $nextlink, $surname );
 78 }
 79 
 80 =item search() 
 81 
 82 Takes an array of surnames, and returns a hash containing name => email pairs
 83 returned by the contact search.
 84 
 85 =cut
 86 
 87 sub search {
 88     my $self = shift;
 89     my @targets = @_;
 90     foreach my $surname( @targets ) {
 91 	    $surname =~ s/,$//;  # Remove optional trailing comma.
 92 	    $surname = lc( $surname );
 93 
 94 	    my $qpage = $base_page . $surname;
 95 	    &getpeople( $qpage, $surname );
 96     }
 97     return %results;
 98 
 99 }
100 
101 =back
102 
103 =head1 AUTHOR
104 
105 Michael Howe <michael@michaelhowe.org>
106 
107 This program is free software; you can redistribute it
108 and/or modify it under the same terms as Perl itself.
109 
110 =head1 CREDITS
111 
112 Originally based on the mutt_ldap_query script from
113 http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html
114 
115 Thanks to Will for suggesting WWW:Mechanize, and also suggesting that I turn the
116 script into a module - 6 months later I got round to it.
117 
118 =head1 BUGS
119 
120 Could do with a way to set approximate or exact search, if anyone wants to use
121 it.
122 Probably many others.  WFM, YMMV, etc.  Fixes gratefully accepted.
123 
124 =cut
125 
126 1;
127 # vim: tw=80
128 


syntax highlighted by Code2HTML, v. 0.9.1