Skip to content
Snippets Groups Projects
Commit dbce6e7f authored by Daniel Povey's avatar Daniel Povey
Browse files

Minor, mostly-cosmetic fixes to filter_scps.pl and split_data.sh (should not affect any recipes)

parent b068696d
No related branches found
No related tags found
No related merge requests found
......@@ -25,9 +25,27 @@
# the -f <n> switch
$field = 1;
$shifted = 0;
$print_warnings = 1;
do {
$shifted=0;
if ($ARGV[0] eq "-f") {
$field = $ARGV[1];
shift @ARGV; shift @ARGV;
$shifted = 1;
}
if (@ARGV[0] eq "--no-warn") {
$print_warnings = 0;
shift @ARGV;
$shifted = 1;
}
} while ($shifted);
if(@ARGV != 4) {
die "Usage: utils/filter_scps.pl <job-range-specifier> <filter-pattern> <input-scp> <output-scp-pattern>\n" .
"e.g.: utils/filter_scps.pl [-f <field-to-filter-on>] JOB=1:10 data/train/split10/JOB/spk2utt data/train/feats.scp data/train/split10/JOB/feats.scp\n" .
die "Usage: utils/filter_scps.pl [-f <field-to-filter-on>] <job-range-specifier> <filter-pattern> <input-scp> <output-scp-pattern>\n" .
"e.g.: utils/filter_scps.pl JOB=1:10 data/train/split10/JOB/spk2utt data/train/feats.scp data/train/split10/JOB/feats.scp\n" .
"similar to utils/filter_scp.pl, but it uses multiple filters and output multiple filtered files.\n".
"The -f option specifies the field in <input-scp> that we filter on (default: 1)." .
"See also: utils/filter_scp.pl\n";
......@@ -45,17 +63,6 @@ if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:10
die "filter_scps.pl: bad job-range specifier $ARGV[0]: expected e.g. JOB=1:10";
}
$field = 1;
$shifted = 0;
do {
$shifted=0;
if ($ARGV[0] eq "-f") {
$field = $ARGV[1];
shift @ARGV; shift @ARGV;
$shifted=1
}
} while ($shifted);
$idlist = shift @ARGV;
if ($idlist !~ m/$jobname/ &&
......@@ -154,9 +161,10 @@ for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
close(FW);
}
if ($warn_uncovered) {
if ($warn_uncovered && $print_warnings) {
print STDERR "filter_scps.pl: warning: some input lines did not get output\n";
}
if ($warn_multiply_covered) {
if ($warn_multiply_covered && $print_warnings) {
print STDERR "filter_scps.pl: warning: some input lines were output to multiple files\n";
}
......@@ -32,7 +32,7 @@ fi
data=$1
numsplit=$2
if [ $numsplit -le 0 ]; then
if ! [ "$numsplit" -gt 0 ]; then
echo "Invalid num-split argument $numsplit";
exit 1;
fi
......@@ -117,7 +117,10 @@ done
# split some things that are indexed by speaker
for f in spk2gender spk2warp cmvn.scp; do
if [ -f $data/$f ]; then
utils/filter_scps.pl JOB=1:$numsplit \
! $split_per_spk && warning_opt="--no-warn"
# suppress warnings from filter_scps.pl about 'some input lines were output
# to multiple files', which is expected in this case.
utils/filter_scps.pl $warning_opt JOB=1:$numsplit \
$data/split$numsplit/JOB/spk2utt $data/$f $data/split$numsplit/JOB/$f || exit 1;
fi
done
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment