#!/usr/bin/perl -w
use strict;
use warnings;
use CGI;
use DB_File;
use Data::Dumper;
use File::Basename;
use File::Spec;
use XML::Feed;
use XML::RSS;

my $cgi = CGI->new;
my $tag = $cgi->param('tag');

my $debug  = 0;
my $thresh = 2592000; # 30 days / seconds
my $store  = '/tmp';

my $tagged_items  = get_tagged_items($tag);
my $persist_store = load_persist($store, $tag);

my @tbshown;
for my $entry ($tagged_items->entries()) {
  my $link = $entry->link();
  $link =~ s!/$!!; # normalise urls

  # add it if it's new so we don't see it again.
  unless ($persist_store->{$link}) {
    $persist_store->{$link} = time;
    push(@tbshown, $entry);
    next;
  }
}

untie %{ $persist_store } or die "Failed to untie!: $!\n";

generate_feed($tag, @tbshown);

####################################
# subs, funcs and utils
####################################

sub generate_feed {
  my $tag     = shift;
  my @tbshown = @_;

  my $rss = new XML::RSS (version => '1.0');
  $rss->channel(title => "del.icio.us de.dup.er for the $tag tag",
               link   => "http://www.unixdaemon.net/tools/online/delicious-deduper.html",
               description => "Remove duplicates entries for the $tag tag",
               );

  foreach my $entry (@tbshown) {
    $rss->add_item(title => $entry->title(),
                   link  => $entry->link(),
                   description => $entry->content->body,
                  );
  }

  print $cgi->header('application/rss+xml');
  binmode STDOUT, ":utf8";
  print $rss->as_string, "\n";
}

#-----------------------#

sub get_tagged_items {
  my $tag = shift; # normally a single word

  my $del_uri = "http://del.icio.us/rss/tag/" . $tag;

  my $feed = XML::Feed->parse(URI->new($del_uri))
    or die XML::Feed->errstr;

  return $feed;
}

#-----------------------#

sub load_persist {
  my $store     = shift;
  my $tag       = shift;
  my $tag_store = File::Spec->catfile($store, "$tag.stored");

  tie(my %persist_data, "DB_File", "$tag_store")
    || die "Can't tie to '$tag_store': $!\n";

  # expire old records so we can see them again.
  foreach my $entry (keys %persist_data) {
    if ( $persist_data{$entry} < ( time - $thresh ) ) {
      delete $persist_data{$entry->link()};
    }
  }

  return \%persist_data;
}
