Skip to content
Snippets Groups Projects
Commit e1f68e8d authored by Daniel Povey's avatar Daniel Povey
Browse files

Merge pull request #764 from pranavj/issue729

update to validate_dict_dir.pl to check for ^M (Issue 729)
parents 6c074c87 7f457c5d
No related branches found
No related tags found
No related merge requests found
......@@ -25,6 +25,7 @@ if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is
if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
$idx = 1;
%silence = ();
$crlf = 1;
print "--> reading $dict/silence_phones.txt\n";
while(<S>) {
......@@ -32,6 +33,11 @@ while(<S>) {
print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
set_to_fail();
}
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
my @col = split(" ", $_);
if (@col == 0) {
set_to_fail();
......@@ -59,6 +65,7 @@ if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.tx
if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
$idx = 1;
$success = 1;
$crlf = 1;
print "--> reading $dict/optional_silence.txt\n";
while(<OS>) {
chomp;
......@@ -68,6 +75,11 @@ while(<OS>) {
} elsif (!$silence{$col[0]}) {
set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
}
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
$idx ++;
}
close(OS);
......@@ -81,8 +93,14 @@ if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $d
$idx = 1;
%nonsilence = ();
$success = 1;
$crlf = 1;
print "--> reading $dict/nonsilence_phones.txt\n";
while(<NS>) {
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
if (! s/\n$//) {
print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
set_to_fail();
......@@ -134,9 +152,14 @@ sub check_lexicon {
print "Checking $lex\n";
!open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
my %seen_line = {};
$idx = 1; $success = 1;
$idx = 1; $success = 1; $crlf = 1;
print "--> reading $lex\n";
while (<L>) {
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
if (defined $seen_line{$_}) {
print "--> ERROR: line '$_' of $lex is repeated\n";
set_to_fail();
......@@ -191,7 +214,13 @@ if (-f "$dict/lexiconp_silprob.txt") {
if (-f "$dict/silprob.txt") {
!open(SP, "<$dict/silprob.txt") &&
print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
$crlf = 1;
while (<SP>) {
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
chomp; my @col = split;
@col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
if ($col[0] eq "<s>" || $col[0] eq "overall") {
......@@ -290,8 +319,14 @@ if (-s "$dict/extra_questions.txt") {
}
$idx = 1;
$success = 1;
$crlf = 1;
print "--> reading $dict/extra_questions.txt\n";
while(<EX>) {
if ($crlf == 1 && m/\r/) {
print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
set_to_fail();
$crlf = 0;
}
if (! s/\n$//) {
print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
set_to_fail();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment