#!/usr/bin/perl -w # keywords.pl - by dual # # Generates a reverse keyword index ################################### use strict; # Declare my $file = shift; my @array; my @unique; my %seen; # Open working files open FILE, "<$file" or die "Can't open file: $!"; open INDEX, ">>/some/dir/keyword.index" or die "Can't open index: $!"; # Do work while () { chomp; s/\s+/ /g; if ($_ !~ /^\s+$/) { my @temp = split(/ /, $_); foreach my $word (@temp) { if ($word !~ / \ba\b| \ball\b| \balso\b| \ban\b| \band\b| \bany\b| \bare\b| \bas\b| \bbe\b| \bbeen\b| \bbut\b| \bby\b| \bcan\b| \bcan't\b| \bdo\b| \bdoes\b| \bdoesn't\b| \bdon't\b| \belse\b| \beven\b| \bevery\b| \bfor\b| \bget\b| \bhad\b| \bhas\b| \bhave\b| \bhere\b| \bi\b| \bif\b| \bin\b| \bis\b| \bisn't\b| \bit\b| \blike\b| \bmany\b| \bmay\b| \bme\b| \bmore\b| \bmuch\b| \bmy\b| \bnot\b| \bof\b| \boff\b| \bon\b| \bonly\b| \bor\b| \bput\b| \bso\b| \bsome\b| \bthan\b| \bthat\b| \bthe\b| \btheir\b| \bthem\b| \bthen\b| \bthere\b| \bthey\b| \bthis\b| \bthose\b| \bto\b| \bvery\b| \bwas\b| \bwe\b| \bwhat\b| \bwhen\b| \bwhich\b| \bwho\b| \bwill\b| \bwith\b| \bwon't\b| \byou\b| \byour\b| \byou're\b /ix) { next if $seen{ $word }++; push @unique, $word; } } } } # Output print INDEX "$file=>@unique\n"; # Clean up close FILE; close INDEX;