#!/usr/bin/perl
use strict;
use Regexp::Assemble;
use Getopt::Long;
use Text::CSV_XS;
use IO::File;
use YAML;
my ( $csv, $format, $replace_first, $config );
my $result = GetOptions(
"csv=s" => \$csv, # string
"format=s" => \$format,
"replace-first" => \$replace_first,
"config=s" => \$config,
);
if ( $config ) {
my $yaml = YAML::LoadFile($config);
$csv ||= $yaml->{csv};
$format ||= $yaml->{format};
$replace_first ||= $yaml->{replace_first};
}
my $replace_all = !$replace_first;
usage() unless $csv;
$format ||= q{$0};
my $trie = Regexp::Assemble->new;
my $parser = Text::CSV_XS->new({ binary => 1, });
my $fh = IO::File->new( $csv, 'r' )
or die "Can't open csv file $csv $!\n";
my $dict = {};
while ( !$fh->eof ) {
my $cols = $parser->getline($fh);
my $key = $cols->[0];
$dict->{$key} = $cols;
$trie->add($key);
}
my $re = $trie->re;
my $count = {};
while ( my $line = <> ) {
$line =~ s/$re/
my $key = $&;
$replace_all ? replace( $dict->{$key}, $format )
: $count->{$key}++ ? $key
: replace( $dict->{$key}, $format )
/eg;
print $line;
}
exit;
sub usage {
print <<"END";
Usage:
wordlink.pl --csv=data.csv --format='\$0' input.txt
or
wordlink.pl --config=config.yml
Options:
csv: link csv file.
format: replace format. replace from \$0,\$1,\$2 ... to csv[0],csv[1],csv[2] ...
replace-first: replace word which is found first.
END
exit;
}
sub replace {
my ( $cols, $format ) = @_;
$format =~ s{\$(\d+)}{ $cols->[$1] }eg;
return $format;
}