# script to process library-output # files for consistently # semicolon-delimited countries: USA, # Italy, Germany $in=shift; # take input file from command line $out = shift; # take output filename from command line open IN, $in or die "Cannot open $in for read:$!"; open OUT, ">$out" or die "Cannot open $out for write:$!"; print OUT "istc_number\tlocations\tcount\n"; while (<IN>) { $copycount=0; /^(i.\d{8})\t(.*)$/; $istc_number=$1; $locations=$2; @libraries=split /;/, $locations; foreach $library (@libraries) { while ($library=~/\((?:\D|\d+[^,])[^\(]*?\)/) { $library=~s/\((?:\D|\d+[^,])[^\(]*?\)//g; } #get rid of nested parentheses $library=~s/\((\d{1,2})[^\(]*\)/\(\1\)/g; #replace (3, 1 torn) with (3) if ($library=~/\((\d{1,2})\)/) {$copycount+=$1} else {$copycount++} } print OUT "$istc_number\t$locations\t$copycount\n"; }
Dan check.