Important
You are viewing the NeMo 2.0 documentation. This release introduces significant changes to the API and a new library, NeMo Run. We are currently porting all features from NeMo 1.0 to 2.0. For documentation on previous versions or features not yet available in 2.0, please refer to the NeMo 24.07 documentation.
Scores#
EN#
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_citrinet_256 |
en |
4.2 % WER |
10.7 % WER |
4.4 % WER |
10.7 % WER |
||||||||||||||
stt_en_citrinet_512 |
en |
3.7 % WER |
8.9 % WER |
3.7 % WER |
8.9 % WER |
||||||||||||||
stt_en_citrinet_1024 |
en |
3.7 % WER |
8.3 % WER |
3.6 % WER |
7.9 % WER |
||||||||||||||
stt_en_citrinet_256_gamma_0_25 |
en |
4.7 % |
10.6 % |
4.8 % |
10.7 % |
8.3 % |
5.8 % |
3.6 % |
|||||||||||
stt_en_citrinet_512_gamma_0_25 |
en |
4.0 % |
9.0 % |
3.9 % |
9.0 % |
6.9 % |
4.4 % |
3.6 % |
|||||||||||
stt_en_citrinet_1024_gamma_0_25 |
en |
3.4 % |
7.7 % |
3.4 % |
7.6 % |
6.2 % |
4.0 % |
2.5 % |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_conformer_ctc_small |
en |
3.6 |
8.1 |
3.7 |
8.1 |
||||||||||||||
stt_en_conformer_ctc_medium |
en |
2.5 |
5.8 |
2.6 |
5.9 |
||||||||||||||
stt_en_conformer_ctc_large |
en |
1.9 |
4.4 |
2.1 |
4.5 |
||||||||||||||
stt_en_conformer_ctc_xlarge |
en |
1.77 % |
3.79 % |
2.00 % |
3.74 % |
7.88 % |
5.99 % |
6.44 % |
22.90 % |
5.50 % |
2.36 % |
||||||||
stt_en_conformer_ctc_small_ls |
en |
3.3 |
8.8 |
3.4 |
8.8 |
||||||||||||||
stt_en_conformer_ctc_medium_ls |
en |
2.7 |
7.4 |
3.0 |
7.3 |
||||||||||||||
stt_en_conformer_ctc_large_ls |
en |
2.4 |
6.2 |
2.7 |
6.0 |
||||||||||||||
stt_en_conformer_transducer_small |
en |
2.8 |
6.6 |
2.5 |
6.6 |
||||||||||||||
stt_en_conformer_transducer_medium |
en |
2.0 |
4.6 |
2.1 |
4.7 |
||||||||||||||
stt_en_conformer_transducer_large |
en |
1.6 |
3.5 |
1.7 |
3.7 |
||||||||||||||
stt_en_conformer_transducer_large_ls |
en |
2.1 |
5.0 |
2.3 |
5.1 |
||||||||||||||
stt_en_conformer_transducer_xlarge |
en |
1.48 % |
2.95 % |
1.62 % |
3.01 % |
6.46 % |
4.59 % |
5.32 % |
5.70 % |
6.47 % |
21.32 % |
2.05 % |
1.17 % |
||||||
stt_en_conformer_transducer_xxlarge |
en |
1.52 % |
3.09 % |
1.72 % |
3.14 % |
5.29 % |
5.85 % |
6.64 % |
2.42 % |
1.49 % |
|||||||||
stt_en_fastconformer_hybrid_large_streaming_80ms (CTC) |
en |
3.5 % |
8.1 % |
10.2 % |
7.2 % |
3.5 % |
2.3 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_480ms (CTC) |
en |
3.6 % |
7.5 % |
9.8 % |
7.0 % |
3.5 % |
2.1 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_1040ms (CTC) |
en |
2.7 % |
6.4 % |
9.0 % |
7.0 % |
3.2 % |
1.9 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_80ms (RNNT) |
en |
2.7 % |
6.5 % |
9.1 % |
6.9 % |
3.2 % |
1.9 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_480ms (RNNT) |
en |
2.7 % |
6.1 % |
8.5 % |
6.7 % |
3.1 % |
1.8 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_1040ms (RNNT) |
en |
2.3 % |
5.5 % |
8.0 % |
6.6 % |
2.9 % |
1.6 % |
||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 0ms) |
en |
7.0 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 80ms) |
en |
6.4 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 480) |
en |
5.7 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 1040) |
en |
5.4 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 0ms) |
en |
8.4 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 80ms) |
en |
7.8 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 480) |
en |
6.7 % |
|||||||||||||||||
stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 1040) |
en |
6.2 % |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_contextnet_256 |
en |
3.3 % |
7.9 % |
3.3 % |
8.0 % |
9.7 % |
11.0 % |
7.1 % |
4.6 % |
3.2 % |
|||||||||
stt_en_contextnet_512 |
en |
2.0 % |
4.8 % |
2.2 % |
5.0 % |
6.6 % |
7.3 % |
5.9 % |
2.8 % |
1.4 % |
|||||||||
stt_en_contextnet_1024 |
en |
1.7 % |
3.8 % |
1.9 % |
4.0 % |
7.9 % |
5.9 % |
5.2 % |
6.5 % |
21.7 % |
4.7 % |
2.3 % |
1.3 % |
||||||
stt_en_contextnet_256_mls |
en |
9.0 % |
9.2 % |
9.4 % |
10.9 % |
||||||||||||||
stt_en_contextnet_512_mls |
en |
5.2 % |
5.2 % |
5.6 % |
6.6 % |
||||||||||||||
stt_en_contextnet_1024_mls |
en |
4.1 % |
4.2 % |
4.6 % |
5.6 % |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_fastconformer_ctc_large |
en |
1.9 |
4.2 |
2.1 |
4.2 |
||||||||||||||
stt_en_fastconformer_transducer_large |
en |
2.0 |
3.8 |
1.8 |
3.8 |
||||||||||||||
stt_en_fastconformer_hybrid_large_pc |
en |
8.0 % |
10.3 % |
2.0 % |
4.1 % |
8.2 % |
4.5 % |
4.6 % |
2.3 % |
4.5 % |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_jasper10x5dr |
en |
3.74 |
10.21 |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_quartznet15x5 |
en |
4.38 |
11.3 |
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MCV Test-Set v8.0 (en) |
MLS Dev (en) |
MLS Test (en) |
NSC Part1 |
NSC Part6 |
Peoples Speech Test v1 |
SLR 83 Test |
SPGI Test |
VoxPopuli Test (en) |
WSJ Dev 93 |
WSJ Eval 92 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_en_squeezeformer_ctc_xsmall_ls |
en |
3.6 % |
9.7 % |
3.8 % |
9.4 % |
||||||||||||||
stt_en_squeezeformer_ctc_small_ls |
en |
2.9 % |
7.4 % |
3.1 % |
7.4 % |
||||||||||||||
stt_en_squeezeformer_ctc_small_medium_ls |
en |
2.7 % |
7.0 % |
2.8 % |
7.1 % |
||||||||||||||
stt_en_squeezeformer_ctc_medium_ls |
en |
2.4 % |
6.2 % |
2.6 % |
6.3 % |
||||||||||||||
stt_en_squeezeformer_ctc_medium_large_ls |
en |
2.3 % |
6.0 % |
2.5 % |
5.9 % |
||||||||||||||
stt_en_squeezeformer_ctc_large_ls |
en |
2.3 % |
5.7 % |
2.4 % |
5.7 % |
BE#
Model Name |
Language |
MCV Test-Set v10 (be) |
---|---|---|
stt_be_conformer_ctc_large |
be |
4.7 % |
stt_be_conformer_transducer_large |
be |
3.8 % |
BY#
Model Name |
Language |
MCV Dev-Set v12.0 (be) |
MCV Test-Set v12.0 (be) |
---|---|---|---|
stt_by_fastconformer_hybrid_large_pc |
by |
2.7 % |
2.7 % |
CA#
Model Name |
Language |
MCV Dev-Set (v??) (ca) |
MCV Dev-Set v9.0 (ca) |
MCV Test-Set v9.0 (ca) |
---|---|---|---|---|
stt_ca_conformer_ctc_large |
ca |
4.70 |
4.27 |
|
stt_ca_conformer_transducer_large |
ca |
4.43 |
3.85 |
Model Name |
Language |
MCV Dev-Set (v??) (ca) |
MCV Dev-Set v9.0 (ca) |
MCV Test-Set v9.0 (ca) |
---|---|---|---|---|
stt_ca_quartznet15x5 |
ca |
6.0 |
DE#
Model Name |
Language |
MCV Dev-Set (v??) (de) |
MCV Dev-Set v12.0 (de) |
MCV Dev-Set v7.0 (de) |
MCV Test-Set v12.0 (de) |
MCV Test-Set v7.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|---|---|---|
stt_de_citrinet_1024 |
de |
6.63 |
7.59 |
4.06 |
5.07 |
12.33 |
10.02 |
Model Name |
Language |
MCV Dev-Set (v??) (de) |
MCV Dev-Set v12.0 (de) |
MCV Dev-Set v7.0 (de) |
MCV Test-Set v12.0 (de) |
MCV Test-Set v7.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|---|---|---|
stt_de_conformer_ctc_large |
de |
5.84 |
6.68 |
3.85 |
4.63 |
12.56 |
10.51 |
|||
stt_de_conformer_transducer_large |
de |
4.75 |
5.36 |
3.46 |
4.19 |
11.21 |
9.14 |
Model Name |
Language |
MCV Dev-Set (v??) (de) |
MCV Dev-Set v12.0 (de) |
MCV Dev-Set v7.0 (de) |
MCV Test-Set v12.0 (de) |
MCV Test-Set v7.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|---|---|---|
stt_de_contextnet_1024 |
de |
4.76 |
5.5 |
3.53 |
4.2 |
11.32 |
9.4 |
Model Name |
Language |
MCV Dev-Set (v??) (de) |
MCV Dev-Set v12.0 (de) |
MCV Dev-Set v7.0 (de) |
MCV Test-Set v12.0 (de) |
MCV Test-Set v7.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|---|---|---|
stt_de_fastconformer_hybrid_large_pc |
de |
4.2 % |
4.9 % |
3.3 % |
3.8 % |
10.8 % |
8.7 % |
Model Name |
Language |
MCV Dev-Set (v??) (de) |
MCV Dev-Set v12.0 (de) |
MCV Dev-Set v7.0 (de) |
MCV Test-Set v12.0 (de) |
MCV Test-Set v7.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|---|---|---|
stt_de_quartznet15x5 |
de |
11.78 |
ENES#
Model Name |
Language |
Fisher-Dev-En |
Fisher-Dev-Es |
Fisher-Test-En |
Fisher-Test-Es |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Dev-Set v7.0 (en) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set v7.0 (en) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Dev (es) |
MLS Test (en) |
MLS Test (es) |
VoxPopuli Dev (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (en) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_enes_conformer_ctc_large |
enes |
16.7 % |
2.2 % |
5.5 % |
2.6 % |
5.5 % |
5.8 % |
3.5 % |
5.7 % |
||||||||||||
stt_enes_conformer_ctc_large_codesw |
enes |
16.51 % |
16.31 % |
2.22 % |
5.36 % |
2.55 % |
5.38 % |
5.00 % |
5.51 % |
3.46 % |
3.73 % |
5.58 % |
6.63 % |
||||||||
stt_enes_conformer_transducer_large |
enes |
16.2 % |
2.0 % |
4.6 % |
2.2 % |
4.6 % |
5.0 % |
3.3 % |
5.3 % |
||||||||||||
stt_enes_conformer_transducer_large_codesw |
enes |
15.70 % |
15.66 % |
1.97 % |
4.54 % |
2.17 % |
4.53 % |
4.51 % |
5.06 % |
3.27 % |
3.67 % |
5.28 % |
6.54 % |
Model Name |
Language |
Fisher-Dev-En |
Fisher-Dev-Es |
Fisher-Test-En |
Fisher-Test-Es |
Librispeech Dev-Clean |
Librispeech Dev-Other |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Dev-Set v7.0 (en) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set v7.0 (en) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Dev (es) |
MLS Test (en) |
MLS Test (es) |
VoxPopuli Dev (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (en) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_enes_contextnet_large |
enes |
14.8 % |
2.2 % |
5.6 % |
2.3 % |
5.5 % |
4.7 % |
3.0 % |
5.0 % |
EO#
Model Name |
Language |
MCV Dev-Set v11.0 (eo) |
MCV Test-Set v11.0 (eo) |
---|---|---|---|
stt_eo_conformer_ctc_large |
eo |
2.9 % |
4.8 % |
stt_eo_conformer_transducer_large |
eo |
2.4 % |
4.0 % |
ES#
Model Name |
Language |
Call Home Dev Test (es) |
Call Home Eval Test (es) |
Call Home Train (es) |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set (v??) (es) |
MCV Dev-Set v12.0 (es) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set (v??) (es) |
MCV Test-Set v12.0 (es) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_es_citrinet_512 |
es |
9.1 % WER |
10.3 % WER |
4.9 % WER |
5.2 % WER |
|||||||||||
stt_es_citrinet_1024_gamma_0_25 |
es |
19.9 % |
21.3 % |
19.1 % |
15.8 % |
15.9 % |
6.1 % |
6.8 % |
3.5 % |
4.1 % |
5.6 % |
7.0 % |
Model Name |
Language |
Call Home Dev Test (es) |
Call Home Eval Test (es) |
Call Home Train (es) |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set (v??) (es) |
MCV Dev-Set v12.0 (es) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set (v??) (es) |
MCV Test-Set v12.0 (es) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_es_conformer_ctc_large |
es |
23.7 % |
25.3 % |
22.4 % |
18.3 % |
18.5 % |
6.3 % |
6.9 % |
4.3 % |
4.2 % |
6.1 % |
7.5 % |
||||
stt_es_conformer_transducer_large |
es |
18.0 % |
19.4 % |
17.2 % |
14.7 % |
14.8 % |
4.6 % |
5.2 % |
2.7 % |
3.2 % |
4.7 % |
6.0 % |
Model Name |
Language |
Call Home Dev Test (es) |
Call Home Eval Test (es) |
Call Home Train (es) |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set (v??) (es) |
MCV Dev-Set v12.0 (es) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set (v??) (es) |
MCV Test-Set v12.0 (es) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_es_contextnet_1024 |
es |
19.1 % |
20.7 % |
18.2 % |
15.3 % |
15.1 % |
4.8 % |
5.2 % |
3.1 % |
3.5 % |
5.1 % |
6.2 % |
Model Name |
Language |
Call Home Dev Test (es) |
Call Home Eval Test (es) |
Call Home Train (es) |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set (v??) (es) |
MCV Dev-Set v12.0 (es) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set (v??) (es) |
MCV Test-Set v12.0 (es) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_es_fastconformer_hybrid_large_pc |
es |
29.4 % |
28.9 % |
7.1 % |
7.5 % |
10.6 % |
11.8 % |
8.6 % |
9.8 % |
Model Name |
Language |
Call Home Dev Test (es) |
Call Home Eval Test (es) |
Call Home Train (es) |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set (v??) (es) |
MCV Dev-Set v12.0 (es) |
MCV Dev-Set v7.0 (es) |
MCV Test-Set (v??) (es) |
MCV Test-Set v12.0 (es) |
MCV Test-Set v7.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stt_es_quartznet15x5 |
es |
12.97 |
FR#
Model Name |
Language |
MCV Dev-Set (v??) (fr) |
MCV Dev-Set v7.0 (fr) |
MCV Dev-Set v7.0 (fr) (No Hyphen) |
MCV Test-Set v7.0 (fr) |
MCV Test-Set v7.0 (fr) (No Hyphen) |
MLS Dev (en) |
MLS Dev (en) (No Hyphen) |
MLS Test (en) |
MLS Test (en) (No Hyphen) |
---|---|---|---|---|---|---|---|---|---|---|
stt_fr_citrinet_1024_gamma_0_25 |
fr |
10.76 |
9.90 |
12.20 |
11.11 |
6.66 |
6.19 |
5.53 |
5.12 |
Model Name |
Language |
MCV Dev-Set (v??) (fr) |
MCV Dev-Set v7.0 (fr) |
MCV Dev-Set v7.0 (fr) (No Hyphen) |
MCV Test-Set v7.0 (fr) |
MCV Test-Set v7.0 (fr) (No Hyphen) |
MLS Dev (en) |
MLS Dev (en) (No Hyphen) |
MLS Test (en) |
MLS Test (en) (No Hyphen) |
---|---|---|---|---|---|---|---|---|---|---|
stt_fr_conformer_ctc_large |
fr |
8.35 |
7.88 |
9.63 |
9.01 |
5.88 |
5.90 |
4.91 |
4.63 |
|
stt_fr_conformer_transducer_large |
fr |
6.85 |
7.95 |
5.05 |
4.10 |
Model Name |
Language |
MCV Dev-Set (v??) (fr) |
MCV Dev-Set v7.0 (fr) |
MCV Dev-Set v7.0 (fr) (No Hyphen) |
MCV Test-Set v7.0 (fr) |
MCV Test-Set v7.0 (fr) (No Hyphen) |
MLS Dev (en) |
MLS Dev (en) (No Hyphen) |
MLS Test (en) |
MLS Test (en) (No Hyphen) |
---|---|---|---|---|---|---|---|---|---|---|
stt_fr_contextnet_1024 |
fr |
8.32 |
9.42 |
6.02 |
5.01 |
Model Name |
Language |
MCV Dev-Set (v??) (fr) |
MCV Dev-Set v7.0 (fr) |
MCV Dev-Set v7.0 (fr) (No Hyphen) |
MCV Test-Set v7.0 (fr) |
MCV Test-Set v7.0 (fr) (No Hyphen) |
MLS Dev (en) |
MLS Dev (en) (No Hyphen) |
MLS Test (en) |
MLS Test (en) (No Hyphen) |
---|---|---|---|---|---|---|---|---|---|---|
stt_fr_quartznet15x5 |
fr |
14.01 |
HR#
Model Name |
Language |
ParlaSpeech Dev-Set v1.0 (hr) |
ParlaSpeech Test-Set v1.0 (hr) |
Parlaspeech Dev-Set (v??) (hr) |
Parlaspeech Test-Set (v??) (hr) |
---|---|---|---|---|---|
stt_hr_conformer_ctc_large |
hr |
4.43 |
4.70 |
||
stt_hr_conformer_transducer_large |
hr |
4.56 |
4.69 |
Model Name |
Language |
ParlaSpeech Dev-Set v1.0 (hr) |
ParlaSpeech Test-Set v1.0 (hr) |
Parlaspeech Dev-Set (v??) (hr) |
Parlaspeech Test-Set (v??) (hr) |
---|---|---|---|---|---|
stt_hr_fastconformer_hybrid_large_pc |
hr |
4.5 % |
4.2 % |
IT#
Model Name |
Language |
MCV Dev-Set (v??) (it) |
MCV Dev-Set v11.0 (it) |
MCV Dev-Set v12.0 (it) |
MCV Test-Set v11.0 (it) |
MCV Test-Set v12.0 (it) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (it) |
VoxPopuli Test (it) |
---|---|---|---|---|---|---|---|---|---|---|
stt_it_conformer_ctc_large |
it |
5.38 |
5.92 |
13.16 |
10.62 |
13.43 |
16.75 |
|||
stt_it_conformer_transducer_large |
it |
4.80 |
5.24 |
14.62 |
12.18 |
12.00 |
15.15 |
Model Name |
Language |
MCV Dev-Set (v??) (it) |
MCV Dev-Set v11.0 (it) |
MCV Dev-Set v12.0 (it) |
MCV Test-Set v11.0 (it) |
MCV Test-Set v12.0 (it) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (it) |
VoxPopuli Test (it) |
---|---|---|---|---|---|---|---|---|---|---|
stt_it_fastconformer_hybrid_large_pc |
it |
5.2 % |
5.8 % |
13.6 % |
11.5 % |
12.7 % |
15.6 % |
Model Name |
Language |
MCV Dev-Set (v??) (it) |
MCV Dev-Set v11.0 (it) |
MCV Dev-Set v12.0 (it) |
MCV Test-Set v11.0 (it) |
MCV Test-Set v12.0 (it) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (it) |
VoxPopuli Test (it) |
---|---|---|---|---|---|---|---|---|---|---|
stt_it_quartznet15x5 |
it |
15.22 |
KAB#
Model Name |
Language |
MCV Test-Set v10.0 (kab) |
---|---|---|
stt_kab_conformer_transducer_large |
kab |
18.86 |
NL#
Model Name |
Language |
MCV Test-Set v12.0 (nl) |
MLS Test (nl) |
---|---|---|---|
stt_nl_fastconformer_hybrid_large_pc |
nl |
9.2 % |
12.1 % |
PL#
Model Name |
Language |
MCV Dev-Set (v??) (pl) |
MCV Dev-Set v12.0 (pl) |
MCV Test-Set v12.0 (pl) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (pl) |
VoxPopuli Test (pl) |
---|---|---|---|---|---|---|---|---|
stt_pl_fastconformer_hybrid_large_pc |
pl |
6.0 % |
8.7 % |
7.1 % |
5.8 % |
11.3 % |
8.5 % |
Model Name |
Language |
MCV Dev-Set (v??) (pl) |
MCV Dev-Set v12.0 (pl) |
MCV Test-Set v12.0 (pl) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (pl) |
VoxPopuli Test (pl) |
---|---|---|---|---|---|---|---|---|
stt_pl_quartznet15x5 |
pl |
14 |
RU#
Model Name |
Language |
GOLOS Crowd Test-Set (v??) (ru) |
GOLOS Farfield Test-Set (v??) (ru) |
Librispeech Test |
MCV Dev-Set (v??) (ru) |
MCV Dev-Set v10.0 (ru) |
MCV Test-Set v10.0 (ru) |
---|---|---|---|---|---|---|---|
stt_ru_conformer_ctc_large |
ru |
2.8 % |
7.1 % |
13.5 % |
3.9 % |
4.3 % |
|
stt_ru_conformer_transducer_large |
ru |
2.7% |
7.6% |
12.0% |
3.5% |
4.0% |
Model Name |
Language |
GOLOS Crowd Test-Set (v??) (ru) |
GOLOS Farfield Test-Set (v??) (ru) |
Librispeech Test |
MCV Dev-Set (v??) (ru) |
MCV Dev-Set v10.0 (ru) |
MCV Test-Set v10.0 (ru) |
---|---|---|---|---|---|---|---|
stt_ru_quartznet15x5 |
ru |
16.23 |
RW#
Model Name |
Language |
MCV Test-Set v9.0 (rw) |
---|---|---|
stt_rw_conformer_ctc_large |
rw |
18.2 % |
stt_rw_conformer_transducer_large |
rw |
16.2 % |
UA#
Model Name |
Language |
MCV Test-Set v12.0 (ua) |
---|---|---|
stt_ua_fastconformer_hybrid_large_pc |
ua |
5.2 % |
ZH#
Model Name |
Language |
AIShell Dev-Android v2 |
AIShell Dev-Ios v1 |
AIShell Dev-Ios v2 |
AIShell Dev-Mic v2 |
AIShell Test-Android v2 |
AIShell Test-Ios v1 |
AIShell Test-Ios v2 |
AIShell Test-Mic v2 |
---|---|---|---|---|---|---|---|---|---|
stt_zh_citrinet_512 |
zh |
6.25% |
6.44% |
||||||
stt_zh_citrinet_1024_gamma_0_25 |
zh |
5.2 % |
4.8 % |
5.2 % |
5.5 % |
5.1 % |
5.5 % |
Model Name |
Language |
AIShell Dev-Android v2 |
AIShell Dev-Ios v1 |
AIShell Dev-Ios v2 |
AIShell Dev-Mic v2 |
AIShell Test-Android v2 |
AIShell Test-Ios v1 |
AIShell Test-Ios v2 |
AIShell Test-Mic v2 |
---|---|---|---|---|---|---|---|---|---|
stt_zh_conformer_transducer_large |
zh |
3.4 |
3.2 |
3.4 |
3.4 |
3.2 |
3.4 |
Scores with Punctuation and Capitalization#
EN with P&C#
Model Name |
Language |
EuroParl Test Set (en) |
Fisher Test Set (en) |
Librispeech Test-Clean |
Librispeech Test-Other |
MCV Test-Set v11.0 (en) |
MLS Test (en) |
NSC Part1 |
SPGI Test |
VoxPopuli Test (en) |
---|---|---|---|---|---|---|---|---|---|---|
stt_en_fastconformer_hybrid_large_pc |
en |
12.5 % |
19.0 % |
7.3 % |
9.2 % |
10.1 % |
12.7 % |
7.2 % |
5.1 % |
6.7 % |
BY with P&C#
Model Name |
Language |
MCV Dev-Set v12.0 (be) |
MCV Test-Set v12.0 (be) |
---|---|---|---|
stt_by_fastconformer_hybrid_large_pc |
by |
3.8 % |
3.9 % |
DE with P&C#
Model Name |
Language |
MCV Dev-Set v12.0 (de) |
MCV Test-Set v12.0 (de) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (de) |
VoxPopuli Test (de) |
---|---|---|---|---|---|---|---|
stt_de_fastconformer_hybrid_large_pc |
de |
4.7 % |
5.4 % |
10.1 % |
11.1 % |
12.6 % |
10.4 % |
ES with P&C#
Model Name |
Language |
Fisher Dev Set (es) |
Fisher Test Set (es) |
MCV Dev-Set v12.0 (es) |
MCV Test-Set v12.0 (es) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (es) |
VoxPopuli Test (es) |
---|---|---|---|---|---|---|---|---|---|
stt_es_fastconformer_hybrid_large_pc |
es |
14.7 % |
14.6 % |
4.5 % |
5.0 % |
3.1 % |
3.9 % |
4.4 % |
5.6 % |
HR with P&C#
Model Name |
Language |
Parlaspeech Dev-Set (v??) (hr) |
Parlaspeech Test-Set (v??) (hr) |
---|---|---|---|
stt_hr_fastconformer_hybrid_large_pc |
hr |
10.4 % |
8.7 % |
IT with P&C#
Model Name |
Language |
MCV Dev-Set v12.0 (it) |
MCV Test-Set v12.0 (it) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (it) |
VoxPopuli Test (it) |
---|---|---|---|---|---|---|---|
stt_it_fastconformer_hybrid_large_pc |
it |
7.8 % |
8.2 % |
26.4 % |
22.5 % |
16.8 % |
19.6 % |
NL with P&C#
Model Name |
Language |
MCV Test-Set v12.0 (nl) |
MLS Test (nl) |
---|---|---|---|
stt_nl_fastconformer_hybrid_large_pc |
nl |
32.1 % |
25.1 % |
PL with P&C#
Model Name |
Language |
MCV Dev-Set v12.0 (pl) |
MCV Test-Set v12.0 (pl) |
MLS Dev (en) |
MLS Test (en) |
VoxPopuli Dev (pl) |
VoxPopuli Test (pl) |
---|---|---|---|---|---|---|---|
stt_pl_fastconformer_hybrid_large_pc |
pl |
8.9 % |
11.0 % |
16.0 % |
11.0 % |
14.0 % |
11.4 % |
UA with P&C#
Model Name |
Language |
MCV Test-Set v12.0 (ua) |
---|---|---|
stt_ua_fastconformer_hybrid_large_pc |
ua |
7.3 % |