#!/usr/bin/perl
# oxquery Copyright Michael Howe 2005
# Released under the GNU GPL (a copy of which is available from
# http://www.gnu.org/copyleft/gpl.html )
#
# Purpose:
#   Searches www.ox.ac.uk/contacts for a given person.
#   Based on the mutt_ldap_query script from 
#    http://wwwhome.cs.utwente.nl/~meentr/mutt-ldap.html
#   Thanks to Will for suggesting WWW::Mechanize.

use strict;
use warnings;
#use LWP::Simple;
#use LWP::UserAgent;
#use URI::URL;
#use HTML::LinkExtor;
use WWW::Mechanize;


die "Usage: $0 <name_to_query>, [[<other_name_to_query>], ...]\n"
	if ! @ARGV;

$/ = '';	# Paragraph mode for input

my %results;

my $match_type = "approximate";	# exact or approximate
#my $match_type = "exact";	# exact or approximate

my $base_page = "http://www.ox.ac.uk/cgi-bin/contacts?match=$match_type+match&submit=Find+email+address&.cgifields=match&surname=";

sub getpeople {
	my $page = $_[0];
	my $surname = $_[1];

	my $m = WWW::Mechanize->new();
	$m->get( $page );
	my $c = $m->content;
	# The first table-worth of rows:
	my @outer = $c =~ m{<tr.*?>(.*?)</tr>}gs;
	foreach my $row (@outer) {
		my ($name, $mail_addr) = $row =~ m{<td.*?>(.*?)</td>}gs;
		$mail_addr = "" unless $mail_addr;
		if ( $mail_addr =~ m{mailto:} && $mail_addr =~ m/$surname/ ) {
			# We have an email!
			$mail_addr =~ s/<a href="mailto:(.*)".*<\/a>/$1/;
			# Hack the name about:
			$name =~ s{(.*?), (.*?)(\s.*|$)}{$2 $1};
			$results{$mail_addr} = $name;
		}
	}
	# The 'next' link:
	my ($endlink) = $c =~ m{.*<tr*?>(.*?)</tr>.*?</html>}gs;
	my ($nextlink) = $endlink =~ m{.*<a href="(.*)">Next page</a>.*};
	( defined( $nextlink ) ) && &getpeople( $nextlink, $surname );
}
	
foreach my $surname( @ARGV ) {
	$surname =~ s/,$//;  # Remove optional trailing comma.
	$surname = lc( $surname );

	my $qpage = $base_page . $surname;
	&getpeople( $qpage, $surname );
}

print "OxContact query: found ", scalar(keys(%results)), " email addresses\n";
foreach my $person ( sort(keys(%results))) {
	print $person, "\t", $results{$person}, "\n";
}

# vim: tw=80
