#!/usr/bin/perl -w # Pulls out some information about the links at the page given # as the argument. By default only text used in href. -l shows link target # Version: 0.1 # Dean Wilson (2004/11/27) # License: GPL # Homepage: http://www.unixdaemon.net/miniprojects.html#getlinks use strict; use warnings; use File::Basename; use Getopt::Std; use HTML::LinkExtractor; use LWP::Simple; my %opts; # u or h = show help l = show links getopts('uhl', \%opts); # get and check url my $url = shift; &usage unless $url; # error condition &usage if ($opts{'u'} || $opts{'h'}); #request for help $url = 'http://' . $url unless $url =~ m!^http://!; # get the page ... my $html = get($url); die "Failed to GET '$url'" unless defined $html; # ... and pull out links my $LX = new HTML::LinkExtractor(); $LX->strip(1); $LX->parse(\$html); # show link text for my $Link( @{ $LX->links } ) { if ($opts{'l'}) { if (defined $$Link{_TEXT} && defined $$Link{href}) { print "'$$Link{_TEXT}'\t==\t'$$Link{href}'\n"; } } else { print "'$$Link{_TEXT}'\n" if defined $$Link{_TEXT}; } } exit(0); ########################################### # subs funcs and utils ########################################### sub usage { # this means we didn't get an argument so show usage and die my $app = basename($0); print< -h|-u\thelp and usage (this text) -l\t show the actual links EOU exit(1); }