1 #!/usr/bin/perl
2
3 =head1 NAME
4
5 OxQuery
6
7 =head1 SYNOPSIS
8
9 use OxQuery;
10
11 my $name = "howe";
12
13 my %people = OxQuery->search( $name );
14
15 foreach my $person ( sort( keys( %people ) ) ) {
16 print "Person $person has email " . $people{$person} . "\n";
17 }
18
19 =head1 DESCRIPTION
20
21 Module to query the Oxford University Contacts Search webpage. Will return a
22 hash of the form people => email for a given surname. Does an approximate
23 search, then only returns names containing the string for which you searched.
24
25 =head1 METHODS
26
27 =over 4
28
29 =cut
30
31 # $Id: OxQuery.pm 198 2007-02-06 13:57:03Z michael $
32 package OxQuery;
33
34 use strict;
35 use warnings;
36 use WWW::Mechanize;
37 use Exporter;
38 our @EXPORT_OK = qw( search );
39 our @ISA = qw( Exporter );
40
41
42 #die "Usage: $0 <name_to_query>, [[<other_name_to_query>], ...]\n"
43 # if ! @ARGV;
44
45 $/ = ''; # Paragraph mode for input
46
47 my %results;
48
49 my $match_type = "approximate"; # exact or approximate
50 #my $match_type = "exact"; # exact or approximate
51
52 my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname=";
53
54 sub getpeople {
55 my $page = $_[0];
56 my $surname = $_[1];
57
58 my $m = WWW::Mechanize->new();
59 $m->get( $page );
60 my $c = $m->content;
61 # The first table-worth of rows:
62 my @outer = $c =~ m{<tr.*?>(.*?)</tr>}gs;
63 foreach my $row (@outer) {
64 my ($name, $mail_addr) = $row =~ m{<td.*?>(.*?)</td>}gs;
65 $mail_addr = "" unless $mail_addr;
66 if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) {
67 # We have an email!
68 $mail_addr =~ s/<a href="mailto:(.*)".*<\/a>/$1/;
69 # Hack the name about:
70 $name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1};
71 $results{$mail_addr} = $name;
72 }
73 }
74 # The 'next' link:
75 my ($endlink) = $c =~ m{.*<tr*?>(.*?)</tr>.*?</html>}gs;
76 my ($nextlink) = $endlink =~ m{.*<a href="(.*)">Next page</a>.*};
77 ( defined( $nextlink ) ) && &getpeople( $nextlink, $surname );
78 }
79
80 =item search()
81
82 Takes an array of surnames, and returns a hash containing name => email pairs
83 returned by the contact search.
84
85 =cut
86
87 sub search {
88 my $self = shift;
89 my @targets = @_;
90 foreach my $surname( @targets ) {
91 $surname =~ s/,$//; # Remove optional trailing comma.
92 $surname = lc( $surname );
93
94 my $qpage = $base_page . $surname;
95 &getpeople( $qpage, $surname );
96 }
97 return %results;
98
99 }
100
101 =back
102
103 =head1 AUTHOR
104
105 Michael Howe <michael@michaelhowe.org>
106
107 This program is free software; you can redistribute it
108 and/or modify it under the same terms as Perl itself.
109
110 =head1 CREDITS
111
112 Originally based on the mutt_ldap_query script from
113 http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html
114
115 Thanks to Will for suggesting WWW:Mechanize, and also suggesting that I turn the
116 script into a module - 6 months later I got round to it.
117
118 =head1 BUGS
119
120 Could do with a way to set approximate or exact search, if anyone wants to use
121 it.
122 Probably many others. WFM, YMMV, etc. Fixes gratefully accepted.
123
124 =cut
125
126 1;
127 # vim: tw=80
128
syntax highlighted by Code2HTML, v. 0.9.1