Citat:
Ursprungligen postat av olol123
sortera ord, och få dem att ta bort dubletter?
Kan man göra med ett litet perl-script
Kod:
#!C:\Perl\bin\perl
# Use: perl wordlist.pl <INFIL> <UTFIL>
# Program som plockar ord från ett text dokument INFIL, sorterar bort allt annat
# än bokstäver, sorterar orden i bokstavsordning, tar bort dubletter
# och sparar dem i UTFIL
#
use strict;
my ($InFilename, $OutFilename, $i, $Line, @Words, $Single_word, @Wordlist);
my ($Previous);
$InFilename = $ARGV[0];
$OutFilename = $ARGV[1];
open(INFILE, '<'.$InFilename) or die "Cannot open file $InFilename \n";
$i = 0;
while(<INFILE>) {
$Line = $_;
chomp($Line);
@Words = split(/\s/, $Line); #Split each row at whitespace chracters
foreach $Single_word (@Words) {
$Single_word =~ tr/a-zA-ZåäöÅÄÖ//cd; #Strip away nonletters
$Wordlist[$i] = lc $Single_word; #make lowercase
$i++;
}
}
close(INFILE);
close (OUTFILE);
@Wordlist = sort(@Wordlist);
open(OUTFILE, '>'.$OutFilename) or die "Cannot open file $OutFilename \n";
foreach $Single_word (@Wordlist) {
if ($Previous ne $Single_word) { #Don't print the same word twice
print OUTFILE $Single_word."\n";
$Previous = $Single_word;
}
}
close (OUTFILE);