Dmitry Chaplinsky commited on
Commit ·
eb80582
1
Parent(s): 2abbc46
First release
Browse files- README.md +33 -0
- best-lm.pt +3 -0
- flair_dictionary.pkl +3 -0
- loss.txt +336 -0
- pipeline.py +22 -0
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -1,3 +1,36 @@
|
|
| 1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
language:
|
| 3 |
+
- uk
|
| 4 |
+
tags:
|
| 5 |
+
- text2text-generation
|
| 6 |
+
- flair
|
| 7 |
+
library_name: generic
|
| 8 |
license: mit
|
| 9 |
+
metrics:
|
| 10 |
+
- perplexity
|
| 11 |
+
datasets:
|
| 12 |
+
- ubertext2.0
|
| 13 |
+
widget:
|
| 14 |
+
- text: "підсумував він."
|
| 15 |
+
- text: "Україна переможе!"
|
| 16 |
---
|
| 17 |
+
|
| 18 |
+
# Ukrainian flair embeddings (backward)
|
| 19 |
+
|
| 20 |
+
Trained for 12+ epochs on the texts from ubertext2.0 (WIP).
|
| 21 |
+
The characters dictionary used for training is in `flair_dictionary.pkl` file
|
| 22 |
+
|
| 23 |
+
For more information on flair embeddings see [the article](https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md) or the paper below:
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
```bibtex
|
| 27 |
+
@inproceedings{akbik2018coling,
|
| 28 |
+
title={Contextual String Embeddings for Sequence Labeling},
|
| 29 |
+
author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
|
| 30 |
+
booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
|
| 31 |
+
pages = {1638--1649},
|
| 32 |
+
year = {2018}
|
| 33 |
+
}
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
Copyright: Dmytro Chaplynskyi, [lang-uk](https://lang.org.ua) project, 2022
|
best-lm.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a810aa30566d93280cdb89385000d44cd6320d559710784072ade264200620a
|
| 3 |
+
size 22791455
|
flair_dictionary.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2125c32d2db5fb79676a8a6f087b19e9c3b788cb19b87073423e31e176d1fe24
|
| 3 |
+
size 11900
|
loss.txt
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
| end of split 1 / 28 | epoch 1 | time: 3789.14s | valid loss 1.9590 | valid ppl 7.0919 | learning rate 20.0000
|
| 2 |
+
| end of split 2 / 28 | epoch 1 | time: 3789.55s | valid loss 1.5745 | valid ppl 4.8282 | learning rate 20.0000
|
| 3 |
+
| end of split 3 / 28 | epoch 1 | time: 3801.06s | valid loss 1.4277 | valid ppl 4.1690 | learning rate 20.0000
|
| 4 |
+
| end of split 4 / 28 | epoch 1 | time: 3796.22s | valid loss 1.3590 | valid ppl 3.8922 | learning rate 20.0000
|
| 5 |
+
| end of split 5 / 28 | epoch 1 | time: 3796.46s | valid loss 1.3225 | valid ppl 3.7527 | learning rate 20.0000
|
| 6 |
+
| end of split 6 / 28 | epoch 1 | time: 3800.42s | valid loss 1.2908 | valid ppl 3.6357 | learning rate 20.0000
|
| 7 |
+
| end of split 7 / 28 | epoch 1 | time: 3795.50s | valid loss 1.2755 | valid ppl 3.5803 | learning rate 20.0000
|
| 8 |
+
| end of split 8 / 28 | epoch 1 | time: 3796.83s | valid loss 1.2515 | valid ppl 3.4956 | learning rate 20.0000
|
| 9 |
+
| end of split 9 / 28 | epoch 1 | time: 3795.35s | valid loss 1.2422 | valid ppl 3.4631 | learning rate 20.0000
|
| 10 |
+
| end of split 10 / 28 | epoch 1 | time: 3797.17s | valid loss 1.2255 | valid ppl 3.4059 | learning rate 20.0000
|
| 11 |
+
| end of split 11 / 28 | epoch 1 | time: 3792.19s | valid loss 1.2145 | valid ppl 3.3686 | learning rate 20.0000
|
| 12 |
+
| end of split 12 / 28 | epoch 1 | time: 3789.43s | valid loss 1.2078 | valid ppl 3.3463 | learning rate 20.0000
|
| 13 |
+
| end of split 13 / 28 | epoch 1 | time: 36736.65s | valid loss 1.1987 | valid ppl 3.3159 | learning rate 20.0000
|
| 14 |
+
| end of split 14 / 28 | epoch 1 | time: 3787.94s | valid loss 1.1954 | valid ppl 3.3047 | learning rate 20.0000
|
| 15 |
+
| end of split 15 / 28 | epoch 1 | time: 3809.75s | valid loss 1.1862 | valid ppl 3.2745 | learning rate 20.0000
|
| 16 |
+
| end of split 16 / 28 | epoch 1 | time: 3844.97s | valid loss 1.1829 | valid ppl 3.2637 | learning rate 20.0000
|
| 17 |
+
| end of split 17 / 28 | epoch 1 | time: 3843.82s | valid loss 1.1774 | valid ppl 3.2460 | learning rate 20.0000
|
| 18 |
+
| end of split 18 / 28 | epoch 1 | time: 3846.40s | valid loss 1.1728 | valid ppl 3.2310 | learning rate 20.0000
|
| 19 |
+
| end of split 19 / 28 | epoch 1 | time: 3844.98s | valid loss 1.1681 | valid ppl 3.2159 | learning rate 20.0000
|
| 20 |
+
| end of split 20 / 28 | epoch 1 | time: 3815.00s | valid loss 1.1632 | valid ppl 3.2000 | learning rate 20.0000
|
| 21 |
+
| end of split 21 / 28 | epoch 1 | time: 3794.38s | valid loss 1.1613 | valid ppl 3.1939 | learning rate 20.0000
|
| 22 |
+
| end of split 22 / 28 | epoch 1 | time: 3796.78s | valid loss 1.1564 | valid ppl 3.1786 | learning rate 20.0000
|
| 23 |
+
| end of split 23 / 28 | epoch 1 | time: 3797.39s | valid loss 1.1545 | valid ppl 3.1725 | learning rate 20.0000
|
| 24 |
+
| end of split 24 / 28 | epoch 1 | time: 3797.94s | valid loss 1.1518 | valid ppl 3.1640 | learning rate 20.0000
|
| 25 |
+
| end of split 25 / 28 | epoch 1 | time: 3796.01s | valid loss 1.1469 | valid ppl 3.1485 | learning rate 20.0000
|
| 26 |
+
| end of split 26 / 28 | epoch 1 | time: 3796.73s | valid loss 1.1459 | valid ppl 3.1451 | learning rate 20.0000
|
| 27 |
+
| end of split 27 / 28 | epoch 1 | time: 3796.46s | valid loss 1.1429 | valid ppl 3.1358 | learning rate 20.0000
|
| 28 |
+
| end of split 28 / 28 | epoch 1 | time: 1096.56s | valid loss 1.1447 | valid ppl 3.1414 | learning rate 20.0000
|
| 29 |
+
| end of split 1 / 28 | epoch 2 | time: 3793.96s | valid loss 1.1414 | valid ppl 3.1312 | learning rate 20.0000
|
| 30 |
+
| end of split 2 / 28 | epoch 2 | time: 1096.67s | valid loss 1.1419 | valid ppl 3.1329 | learning rate 20.0000
|
| 31 |
+
| end of split 3 / 28 | epoch 2 | time: 3796.47s | valid loss 1.1401 | valid ppl 3.1269 | learning rate 20.0000
|
| 32 |
+
| end of split 4 / 28 | epoch 2 | time: 3798.81s | valid loss 1.1371 | valid ppl 3.1176 | learning rate 20.0000
|
| 33 |
+
| end of split 5 / 28 | epoch 2 | time: 3797.67s | valid loss 1.1361 | valid ppl 3.1146 | learning rate 20.0000
|
| 34 |
+
| end of split 6 / 28 | epoch 2 | time: 3798.63s | valid loss 1.1336 | valid ppl 3.1067 | learning rate 20.0000
|
| 35 |
+
| end of split 7 / 28 | epoch 2 | time: 3791.11s | valid loss 1.1323 | valid ppl 3.1028 | learning rate 20.0000
|
| 36 |
+
| end of split 8 / 28 | epoch 2 | time: 3788.66s | valid loss 1.1296 | valid ppl 3.0944 | learning rate 20.0000
|
| 37 |
+
| end of split 9 / 28 | epoch 2 | time: 3797.21s | valid loss 1.1272 | valid ppl 3.0869 | learning rate 20.0000
|
| 38 |
+
| end of split 10 / 28 | epoch 2 | time: 3794.19s | valid loss 1.1253 | valid ppl 3.0810 | learning rate 20.0000
|
| 39 |
+
| end of split 11 / 28 | epoch 2 | time: 3797.66s | valid loss 1.1238 | valid ppl 3.0765 | learning rate 20.0000
|
| 40 |
+
| end of split 12 / 28 | epoch 2 | time: 3795.30s | valid loss 1.1242 | valid ppl 3.0777 | learning rate 20.0000
|
| 41 |
+
| end of split 13 / 28 | epoch 2 | time: 3799.97s | valid loss 1.1220 | valid ppl 3.0710 | learning rate 20.0000
|
| 42 |
+
| end of split 14 / 28 | epoch 2 | time: 3798.40s | valid loss 1.1198 | valid ppl 3.0644 | learning rate 20.0000
|
| 43 |
+
| end of split 15 / 28 | epoch 2 | time: 3800.94s | valid loss 1.1200 | valid ppl 3.0650 | learning rate 20.0000
|
| 44 |
+
| end of split 16 / 28 | epoch 2 | time: 3795.23s | valid loss 1.1184 | valid ppl 3.0600 | learning rate 20.0000
|
| 45 |
+
| end of split 17 / 28 | epoch 2 | time: 3797.60s | valid loss 1.1181 | valid ppl 3.0591 | learning rate 20.0000
|
| 46 |
+
| end of split 18 / 28 | epoch 2 | time: 3794.23s | valid loss 1.1155 | valid ppl 3.0512 | learning rate 20.0000
|
| 47 |
+
| end of split 19 / 28 | epoch 2 | time: 3794.97s | valid loss 1.1144 | valid ppl 3.0477 | learning rate 20.0000
|
| 48 |
+
| end of split 20 / 28 | epoch 2 | time: 3801.57s | valid loss 1.1144 | valid ppl 3.0476 | learning rate 20.0000
|
| 49 |
+
| end of split 21 / 28 | epoch 2 | time: 3797.96s | valid loss 1.1128 | valid ppl 3.0428 | learning rate 20.0000
|
| 50 |
+
| end of split 22 / 28 | epoch 2 | time: 3797.43s | valid loss 1.1112 | valid ppl 3.0381 | learning rate 20.0000
|
| 51 |
+
| end of split 23 / 28 | epoch 2 | time: 3794.87s | valid loss 1.1099 | valid ppl 3.0342 | learning rate 20.0000
|
| 52 |
+
| end of split 24 / 28 | epoch 2 | time: 3799.90s | valid loss 1.1100 | valid ppl 3.0344 | learning rate 20.0000
|
| 53 |
+
| end of split 25 / 28 | epoch 2 | time: 3802.10s | valid loss 1.1083 | valid ppl 3.0291 | learning rate 20.0000
|
| 54 |
+
| end of split 26 / 28 | epoch 2 | time: 3800.69s | valid loss 1.1076 | valid ppl 3.0270 | learning rate 20.0000
|
| 55 |
+
| end of split 27 / 28 | epoch 2 | time: 3796.47s | valid loss 1.1065 | valid ppl 3.0238 | learning rate 20.0000
|
| 56 |
+
| end of split 28 / 28 | epoch 2 | time: 3801.18s | valid loss 1.1051 | valid ppl 3.0196 | learning rate 20.0000
|
| 57 |
+
| end of split 1 / 28 | epoch 3 | time: 3796.57s | valid loss 1.1045 | valid ppl 3.0176 | learning rate 20.0000
|
| 58 |
+
| end of split 2 / 28 | epoch 3 | time: 3801.61s | valid loss 1.1035 | valid ppl 3.0146 | learning rate 20.0000
|
| 59 |
+
| end of split 3 / 28 | epoch 3 | time: 3800.25s | valid loss 1.1027 | valid ppl 3.0122 | learning rate 20.0000
|
| 60 |
+
| end of split 4 / 28 | epoch 3 | time: 3800.72s | valid loss 1.1013 | valid ppl 3.0080 | learning rate 20.0000
|
| 61 |
+
| end of split 5 / 28 | epoch 3 | time: 3802.82s | valid loss 1.1010 | valid ppl 3.0072 | learning rate 20.0000
|
| 62 |
+
| end of split 6 / 28 | epoch 3 | time: 3802.42s | valid loss 1.1003 | valid ppl 3.0052 | learning rate 20.0000
|
| 63 |
+
| end of split 7 / 28 | epoch 3 | time: 3798.84s | valid loss 1.1001 | valid ppl 3.0044 | learning rate 20.0000
|
| 64 |
+
| end of split 8 / 28 | epoch 3 | time: 3793.80s | valid loss 1.1002 | valid ppl 3.0046 | learning rate 20.0000
|
| 65 |
+
| end of split 9 / 28 | epoch 3 | time: 3797.24s | valid loss 1.0987 | valid ppl 3.0002 | learning rate 20.0000
|
| 66 |
+
| end of split 10 / 28 | epoch 3 | time: 3795.35s | valid loss 1.0976 | valid ppl 2.9969 | learning rate 20.0000
|
| 67 |
+
| end of split 11 / 28 | epoch 3 | time: 3796.91s | valid loss 1.0978 | valid ppl 2.9976 | learning rate 20.0000
|
| 68 |
+
| end of split 12 / 28 | epoch 3 | time: 3797.71s | valid loss 1.0973 | valid ppl 2.9962 | learning rate 20.0000
|
| 69 |
+
| end of split 13 / 28 | epoch 3 | time: 3795.99s | valid loss 1.0967 | valid ppl 2.9943 | learning rate 20.0000
|
| 70 |
+
| end of split 14 / 28 | epoch 3 | time: 3795.07s | valid loss 1.0957 | valid ppl 2.9913 | learning rate 20.0000
|
| 71 |
+
| end of split 15 / 28 | epoch 3 | time: 3793.25s | valid loss 1.0942 | valid ppl 2.9869 | learning rate 20.0000
|
| 72 |
+
| end of split 16 / 28 | epoch 3 | time: 3797.79s | valid loss 1.0940 | valid ppl 2.9863 | learning rate 20.0000
|
| 73 |
+
| end of split 17 / 28 | epoch 3 | time: 3796.74s | valid loss 1.0934 | valid ppl 2.9844 | learning rate 20.0000
|
| 74 |
+
| end of split 18 / 28 | epoch 3 | time: 3794.47s | valid loss 1.0924 | valid ppl 2.9815 | learning rate 20.0000
|
| 75 |
+
| end of split 19 / 28 | epoch 3 | time: 3794.62s | valid loss 1.0924 | valid ppl 2.9814 | learning rate 20.0000
|
| 76 |
+
| end of split 20 / 28 | epoch 3 | time: 3797.27s | valid loss 1.0907 | valid ppl 2.9764 | learning rate 20.0000
|
| 77 |
+
| end of split 21 / 28 | epoch 3 | time: 3796.49s | valid loss 1.0909 | valid ppl 2.9770 | learning rate 20.0000
|
| 78 |
+
| end of split 22 / 28 | epoch 3 | time: 3798.45s | valid loss 1.0913 | valid ppl 2.9783 | learning rate 20.0000
|
| 79 |
+
| end of split 23 / 28 | epoch 3 | time: 1098.05s | valid loss 1.0917 | valid ppl 2.9792 | learning rate 20.0000
|
| 80 |
+
| end of split 24 / 28 | epoch 3 | time: 3789.62s | valid loss 1.0908 | valid ppl 2.9768 | learning rate 20.0000
|
| 81 |
+
| end of split 25 / 28 | epoch 3 | time: 3790.60s | valid loss 1.0899 | valid ppl 2.9739 | learning rate 20.0000
|
| 82 |
+
| end of split 26 / 28 | epoch 3 | time: 3794.69s | valid loss 1.0878 | valid ppl 2.9677 | learning rate 20.0000
|
| 83 |
+
| end of split 27 / 28 | epoch 3 | time: 3789.68s | valid loss 1.0886 | valid ppl 2.9702 | learning rate 20.0000
|
| 84 |
+
| end of split 28 / 28 | epoch 3 | time: 3798.26s | valid loss 1.0890 | valid ppl 2.9712 | learning rate 20.0000
|
| 85 |
+
| end of split 1 / 28 | epoch 4 | time: 3791.05s | valid loss 1.0875 | valid ppl 2.9668 | learning rate 20.0000
|
| 86 |
+
| end of split 2 / 28 | epoch 4 | time: 3801.11s | valid loss 1.0872 | valid ppl 2.9658 | learning rate 20.0000
|
| 87 |
+
| end of split 3 / 28 | epoch 4 | time: 3799.85s | valid loss 1.0874 | valid ppl 2.9665 | learning rate 20.0000
|
| 88 |
+
| end of split 4 / 28 | epoch 4 | time: 3798.81s | valid loss 1.0856 | valid ppl 2.9611 | learning rate 20.0000
|
| 89 |
+
| end of split 5 / 28 | epoch 4 | time: 3799.37s | valid loss 1.0849 | valid ppl 2.9591 | learning rate 20.0000
|
| 90 |
+
| end of split 6 / 28 | epoch 4 | time: 3794.42s | valid loss 1.0845 | valid ppl 2.9578 | learning rate 20.0000
|
| 91 |
+
| end of split 7 / 28 | epoch 4 | time: 3795.86s | valid loss 1.0865 | valid ppl 2.9639 | learning rate 20.0000
|
| 92 |
+
| end of split 8 / 28 | epoch 4 | time: 3796.29s | valid loss 1.0845 | valid ppl 2.9580 | learning rate 20.0000
|
| 93 |
+
| end of split 9 / 28 | epoch 4 | time: 3799.07s | valid loss 1.0838 | valid ppl 2.9560 | learning rate 20.0000
|
| 94 |
+
| end of split 10 / 28 | epoch 4 | time: 3798.77s | valid loss 1.0856 | valid ppl 2.9612 | learning rate 20.0000
|
| 95 |
+
| end of split 11 / 28 | epoch 4 | time: 3795.42s | valid loss 1.0826 | valid ppl 2.9524 | learning rate 20.0000
|
| 96 |
+
| end of split 12 / 28 | epoch 4 | time: 3798.31s | valid loss 1.0829 | valid ppl 2.9533 | learning rate 20.0000
|
| 97 |
+
| end of split 13 / 28 | epoch 4 | time: 1097.39s | valid loss 1.0828 | valid ppl 2.9528 | learning rate 20.0000
|
| 98 |
+
| end of split 14 / 28 | epoch 4 | time: 3796.62s | valid loss 1.0831 | valid ppl 2.9538 | learning rate 20.0000
|
| 99 |
+
| end of split 15 / 28 | epoch 4 | time: 3794.73s | valid loss 1.0821 | valid ppl 2.9508 | learning rate 20.0000
|
| 100 |
+
| end of split 16 / 28 | epoch 4 | time: 3797.00s | valid loss 1.0810 | valid ppl 2.9476 | learning rate 20.0000
|
| 101 |
+
| end of split 17 / 28 | epoch 4 | time: 3806.15s | valid loss 1.0812 | valid ppl 2.9481 | learning rate 20.0000
|
| 102 |
+
| end of split 18 / 28 | epoch 4 | time: 3806.71s | valid loss 1.0809 | valid ppl 2.9473 | learning rate 20.0000
|
| 103 |
+
| end of split 19 / 28 | epoch 4 | time: 3795.87s | valid loss 1.0813 | valid ppl 2.9484 | learning rate 20.0000
|
| 104 |
+
| end of split 20 / 28 | epoch 4 | time: 3799.98s | valid loss 1.0817 | valid ppl 2.9497 | learning rate 20.0000
|
| 105 |
+
| end of split 21 / 28 | epoch 4 | time: 3795.32s | valid loss 1.0803 | valid ppl 2.9455 | learning rate 20.0000
|
| 106 |
+
| end of split 22 / 28 | epoch 4 | time: 3794.34s | valid loss 1.0797 | valid ppl 2.9438 | learning rate 20.0000
|
| 107 |
+
| end of split 23 / 28 | epoch 4 | time: 3804.34s | valid loss 1.0790 | valid ppl 2.9417 | learning rate 20.0000
|
| 108 |
+
| end of split 24 / 28 | epoch 4 | time: 3798.90s | valid loss 1.0796 | valid ppl 2.9434 | learning rate 20.0000
|
| 109 |
+
| end of split 25 / 28 | epoch 4 | time: 3804.95s | valid loss 1.0802 | valid ppl 2.9454 | learning rate 20.0000
|
| 110 |
+
| end of split 26 / 28 | epoch 4 | time: 3799.98s | valid loss 1.0779 | valid ppl 2.9385 | learning rate 20.0000
|
| 111 |
+
| end of split 27 / 28 | epoch 4 | time: 3804.99s | valid loss 1.0798 | valid ppl 2.9441 | learning rate 20.0000
|
| 112 |
+
| end of split 28 / 28 | epoch 4 | time: 3804.92s | valid loss 1.0784 | valid ppl 2.9399 | learning rate 20.0000
|
| 113 |
+
| end of split 1 / 28 | epoch 5 | time: 3793.19s | valid loss 1.0781 | valid ppl 2.9390 | learning rate 20.0000
|
| 114 |
+
| end of split 2 / 28 | epoch 5 | time: 3794.63s | valid loss 1.0771 | valid ppl 2.9363 | learning rate 20.0000
|
| 115 |
+
| end of split 3 / 28 | epoch 5 | time: 3797.63s | valid loss 1.0761 | valid ppl 2.9333 | learning rate 20.0000
|
| 116 |
+
| end of split 4 / 28 | epoch 5 | time: 3797.24s | valid loss 1.0752 | valid ppl 2.9305 | learning rate 20.0000
|
| 117 |
+
| end of split 5 / 28 | epoch 5 | time: 3835.87s | valid loss 1.0764 | valid ppl 2.9340 | learning rate 20.0000
|
| 118 |
+
| end of split 6 / 28 | epoch 5 | time: 3836.48s | valid loss 1.0759 | valid ppl 2.9327 | learning rate 20.0000
|
| 119 |
+
| end of split 7 / 28 | epoch 5 | time: 3804.72s | valid loss 1.0756 | valid ppl 2.9319 | learning rate 20.0000
|
| 120 |
+
| end of split 8 / 28 | epoch 5 | time: 3797.48s | valid loss 1.0757 | valid ppl 2.9321 | learning rate 20.0000
|
| 121 |
+
| end of split 9 / 28 | epoch 5 | time: 3800.06s | valid loss 1.0751 | valid ppl 2.9303 | learning rate 20.0000
|
| 122 |
+
| end of split 10 / 28 | epoch 5 | time: 3796.96s | valid loss 1.0766 | valid ppl 2.9346 | learning rate 20.0000
|
| 123 |
+
| end of split 11 / 28 | epoch 5 | time: 3796.87s | valid loss 1.0751 | valid ppl 2.9303 | learning rate 20.0000
|
| 124 |
+
| end of split 12 / 28 | epoch 5 | time: 3794.98s | valid loss 1.0740 | valid ppl 2.9270 | learning rate 20.0000
|
| 125 |
+
| end of split 13 / 28 | epoch 5 | time: 3794.18s | valid loss 1.0737 | valid ppl 2.9261 | learning rate 20.0000
|
| 126 |
+
| end of split 14 / 28 | epoch 5 | time: 3794.87s | valid loss 1.0749 | valid ppl 2.9296 | learning rate 20.0000
|
| 127 |
+
| end of split 15 / 28 | epoch 5 | time: 3794.59s | valid loss 1.0737 | valid ppl 2.9263 | learning rate 20.0000
|
| 128 |
+
| end of split 16 / 28 | epoch 5 | time: 3798.73s | valid loss 1.0746 | valid ppl 2.9288 | learning rate 20.0000
|
| 129 |
+
| end of split 17 / 28 | epoch 5 | time: 3799.97s | valid loss 1.0912 | valid ppl 2.9777 | learning rate 20.0000
|
| 130 |
+
| end of split 18 / 28 | epoch 5 | time: 1097.48s | valid loss 1.0744 | valid ppl 2.9284 | learning rate 20.0000
|
| 131 |
+
| end of split 19 / 28 | epoch 5 | time: 3800.18s | valid loss 1.0725 | valid ppl 2.9227 | learning rate 20.0000
|
| 132 |
+
| end of split 20 / 28 | epoch 5 | time: 3801.07s | valid loss 1.0746 | valid ppl 2.9288 | learning rate 20.0000
|
| 133 |
+
| end of split 21 / 28 | epoch 5 | time: 3803.87s | valid loss 1.0742 | valid ppl 2.9277 | learning rate 20.0000
|
| 134 |
+
| end of split 22 / 28 | epoch 5 | time: 3807.38s | valid loss 1.0745 | valid ppl 2.9286 | learning rate 20.0000
|
| 135 |
+
| end of split 23 / 28 | epoch 5 | time: 3802.41s | valid loss 1.0735 | valid ppl 2.9255 | learning rate 20.0000
|
| 136 |
+
| end of split 24 / 28 | epoch 5 | time: 3803.85s | valid loss 1.0714 | valid ppl 2.9193 | learning rate 20.0000
|
| 137 |
+
| end of split 25 / 28 | epoch 5 | time: 3802.20s | valid loss 1.0703 | valid ppl 2.9163 | learning rate 20.0000
|
| 138 |
+
| end of split 26 / 28 | epoch 5 | time: 3804.97s | valid loss 1.0696 | valid ppl 2.9142 | learning rate 20.0000
|
| 139 |
+
| end of split 27 / 28 | epoch 5 | time: 3805.82s | valid loss 1.0704 | valid ppl 2.9167 | learning rate 20.0000
|
| 140 |
+
| end of split 28 / 28 | epoch 5 | time: 3804.59s | valid loss 1.0692 | valid ppl 2.9130 | learning rate 20.0000
|
| 141 |
+
| end of split 1 / 28 | epoch 6 | time: 3798.75s | valid loss 1.0703 | valid ppl 2.9162 | learning rate 20.0000
|
| 142 |
+
| end of split 2 / 28 | epoch 6 | time: 3801.06s | valid loss 1.0702 | valid ppl 2.9159 | learning rate 20.0000
|
| 143 |
+
| end of split 3 / 28 | epoch 6 | time: 3796.51s | valid loss 1.0690 | valid ppl 2.9123 | learning rate 20.0000
|
| 144 |
+
| end of split 4 / 28 | epoch 6 | time: 3797.49s | valid loss 1.0686 | valid ppl 2.9114 | learning rate 20.0000
|
| 145 |
+
| end of split 5 / 28 | epoch 6 | time: 3802.58s | valid loss 1.0688 | valid ppl 2.9120 | learning rate 20.0000
|
| 146 |
+
| end of split 6 / 28 | epoch 6 | time: 3800.26s | valid loss 1.0689 | valid ppl 2.9121 | learning rate 20.0000
|
| 147 |
+
| end of split 7 / 28 | epoch 6 | time: 3801.18s | valid loss 1.0683 | valid ppl 2.9103 | learning rate 20.0000
|
| 148 |
+
| end of split 8 / 28 | epoch 6 | time: 3805.98s | valid loss 1.0674 | valid ppl 2.9079 | learning rate 20.0000
|
| 149 |
+
| end of split 9 / 28 | epoch 6 | time: 3804.26s | valid loss 1.0674 | valid ppl 2.9078 | learning rate 20.0000
|
| 150 |
+
| end of split 10 / 28 | epoch 6 | time: 3797.98s | valid loss 1.0696 | valid ppl 2.9143 | learning rate 20.0000
|
| 151 |
+
| end of split 11 / 28 | epoch 6 | time: 3801.56s | valid loss 1.0679 | valid ppl 2.9093 | learning rate 20.0000
|
| 152 |
+
| end of split 12 / 28 | epoch 6 | time: 3802.48s | valid loss 1.0672 | valid ppl 2.9074 | learning rate 20.0000
|
| 153 |
+
| end of split 13 / 28 | epoch 6 | time: 3812.54s | valid loss 1.0673 | valid ppl 2.9076 | learning rate 20.0000
|
| 154 |
+
| end of split 14 / 28 | epoch 6 | time: 3816.47s | valid loss 1.0680 | valid ppl 2.9094 | learning rate 20.0000
|
| 155 |
+
| end of split 15 / 28 | epoch 6 | time: 3808.34s | valid loss 1.0670 | valid ppl 2.9067 | learning rate 20.0000
|
| 156 |
+
| end of split 16 / 28 | epoch 6 | time: 3810.71s | valid loss 1.0668 | valid ppl 2.9062 | learning rate 20.0000
|
| 157 |
+
| end of split 17 / 28 | epoch 6 | time: 3811.31s | valid loss 1.0657 | valid ppl 2.9028 | learning rate 20.0000
|
| 158 |
+
| end of split 18 / 28 | epoch 6 | time: 3808.51s | valid loss 1.0663 | valid ppl 2.9046 | learning rate 20.0000
|
| 159 |
+
| end of split 19 / 28 | epoch 6 | time: 3806.94s | valid loss 1.0660 | valid ppl 2.9039 | learning rate 20.0000
|
| 160 |
+
| end of split 20 / 28 | epoch 6 | time: 3804.47s | valid loss 1.0658 | valid ppl 2.9031 | learning rate 20.0000
|
| 161 |
+
| end of split 21 / 28 | epoch 6 | time: 3803.28s | valid loss 1.0657 | valid ppl 2.9029 | learning rate 20.0000
|
| 162 |
+
| end of split 22 / 28 | epoch 6 | time: 1098.89s | valid loss 1.0650 | valid ppl 2.9009 | learning rate 20.0000
|
| 163 |
+
| end of split 23 / 28 | epoch 6 | time: 3801.72s | valid loss 1.0658 | valid ppl 2.9030 | learning rate 20.0000
|
| 164 |
+
| end of split 24 / 28 | epoch 6 | time: 3808.12s | valid loss 1.0656 | valid ppl 2.9025 | learning rate 20.0000
|
| 165 |
+
| end of split 25 / 28 | epoch 6 | time: 3806.53s | valid loss 1.0679 | valid ppl 2.9094 | learning rate 20.0000
|
| 166 |
+
| end of split 26 / 28 | epoch 6 | time: 3800.71s | valid loss 1.0656 | valid ppl 2.9026 | learning rate 20.0000
|
| 167 |
+
| end of split 27 / 28 | epoch 6 | time: 3802.33s | valid loss 1.0645 | valid ppl 2.8994 | learning rate 20.0000
|
| 168 |
+
| end of split 28 / 28 | epoch 6 | time: 3797.75s | valid loss 1.0645 | valid ppl 2.8994 | learning rate 20.0000
|
| 169 |
+
| end of split 1 / 28 | epoch 7 | time: 3800.93s | valid loss 1.0649 | valid ppl 2.9004 | learning rate 20.0000
|
| 170 |
+
| end of split 2 / 28 | epoch 7 | time: 3803.64s | valid loss 1.0637 | valid ppl 2.8969 | learning rate 20.0000
|
| 171 |
+
| end of split 3 / 28 | epoch 7 | time: 3803.79s | valid loss 1.0636 | valid ppl 2.8968 | learning rate 20.0000
|
| 172 |
+
| end of split 4 / 28 | epoch 7 | time: 3805.63s | valid loss 1.0641 | valid ppl 2.8983 | learning rate 20.0000
|
| 173 |
+
| end of split 5 / 28 | epoch 7 | time: 3795.80s | valid loss 1.0629 | valid ppl 2.8947 | learning rate 20.0000
|
| 174 |
+
| end of split 6 / 28 | epoch 7 | time: 3807.54s | valid loss 1.0630 | valid ppl 2.8950 | learning rate 20.0000
|
| 175 |
+
| end of split 7 / 28 | epoch 7 | time: 3804.15s | valid loss 1.0640 | valid ppl 2.8980 | learning rate 20.0000
|
| 176 |
+
| end of split 8 / 28 | epoch 7 | time: 3803.94s | valid loss 1.0637 | valid ppl 2.8972 | learning rate 20.0000
|
| 177 |
+
| end of split 9 / 28 | epoch 7 | time: 3803.38s | valid loss 1.0634 | valid ppl 2.8962 | learning rate 20.0000
|
| 178 |
+
| end of split 10 / 28 | epoch 7 | time: 3806.34s | valid loss 1.0650 | valid ppl 2.9008 | learning rate 20.0000
|
| 179 |
+
| end of split 11 / 28 | epoch 7 | time: 1098.92s | valid loss 1.0622 | valid ppl 2.8926 | learning rate 20.0000
|
| 180 |
+
| end of split 12 / 28 | epoch 7 | time: 3803.81s | valid loss 1.0622 | valid ppl 2.8926 | learning rate 20.0000
|
| 181 |
+
| end of split 13 / 28 | epoch 7 | time: 3806.59s | valid loss 1.0630 | valid ppl 2.8949 | learning rate 20.0000
|
| 182 |
+
| end of split 14 / 28 | epoch 7 | time: 3803.04s | valid loss 1.0620 | valid ppl 2.8920 | learning rate 20.0000
|
| 183 |
+
| end of split 15 / 28 | epoch 7 | time: 3803.29s | valid loss 1.0619 | valid ppl 2.8920 | learning rate 20.0000
|
| 184 |
+
| end of split 16 / 28 | epoch 7 | time: 3802.60s | valid loss 1.0630 | valid ppl 2.8950 | learning rate 20.0000
|
| 185 |
+
| end of split 17 / 28 | epoch 7 | time: 3805.28s | valid loss 1.0621 | valid ppl 2.8925 | learning rate 20.0000
|
| 186 |
+
| end of split 18 / 28 | epoch 7 | time: 3800.72s | valid loss 1.0616 | valid ppl 2.8910 | learning rate 20.0000
|
| 187 |
+
| end of split 19 / 28 | epoch 7 | time: 3801.59s | valid loss 1.0615 | valid ppl 2.8907 | learning rate 20.0000
|
| 188 |
+
| end of split 20 / 28 | epoch 7 | time: 3803.04s | valid loss 1.0610 | valid ppl 2.8892 | learning rate 20.0000
|
| 189 |
+
| end of split 21 / 28 | epoch 7 | time: 3809.57s | valid loss 1.0597 | valid ppl 2.8855 | learning rate 20.0000
|
| 190 |
+
| end of split 22 / 28 | epoch 7 | time: 3802.88s | valid loss 1.0621 | valid ppl 2.8923 | learning rate 20.0000
|
| 191 |
+
| end of split 23 / 28 | epoch 7 | time: 3799.92s | valid loss 1.0612 | valid ppl 2.8900 | learning rate 20.0000
|
| 192 |
+
| end of split 24 / 28 | epoch 7 | time: 3804.46s | valid loss 1.0615 | valid ppl 2.8907 | learning rate 20.0000
|
| 193 |
+
| end of split 25 / 28 | epoch 7 | time: 3798.64s | valid loss 1.0599 | valid ppl 2.8862 | learning rate 20.0000
|
| 194 |
+
| end of split 26 / 28 | epoch 7 | time: 3799.12s | valid loss 1.0603 | valid ppl 2.8873 | learning rate 20.0000
|
| 195 |
+
| end of split 27 / 28 | epoch 7 | time: 3798.12s | valid loss 1.0606 | valid ppl 2.8880 | learning rate 20.0000
|
| 196 |
+
| end of split 28 / 28 | epoch 7 | time: 3805.05s | valid loss 1.0604 | valid ppl 2.8875 | learning rate 20.0000
|
| 197 |
+
| end of split 1 / 28 | epoch 8 | time: 3797.40s | valid loss 1.0600 | valid ppl 2.8863 | learning rate 20.0000
|
| 198 |
+
| end of split 2 / 28 | epoch 8 | time: 3796.23s | valid loss 1.0608 | valid ppl 2.8886 | learning rate 20.0000
|
| 199 |
+
| end of split 3 / 28 | epoch 8 | time: 3797.50s | valid loss 1.0626 | valid ppl 2.8940 | learning rate 20.0000
|
| 200 |
+
| end of split 4 / 28 | epoch 8 | time: 3798.81s | valid loss 1.0599 | valid ppl 2.8861 | learning rate 20.0000
|
| 201 |
+
| end of split 5 / 28 | epoch 8 | time: 3800.00s | valid loss 1.0562 | valid ppl 2.8756 | learning rate 5.0000
|
| 202 |
+
| end of split 6 / 28 | epoch 8 | time: 3806.43s | valid loss 1.0559 | valid ppl 2.8747 | learning rate 5.0000
|
| 203 |
+
| end of split 7 / 28 | epoch 8 | time: 3804.50s | valid loss 1.0557 | valid ppl 2.8739 | learning rate 5.0000
|
| 204 |
+
| end of split 8 / 28 | epoch 8 | time: 3803.18s | valid loss 1.0555 | valid ppl 2.8735 | learning rate 5.0000
|
| 205 |
+
| end of split 9 / 28 | epoch 8 | time: 1098.26s | valid loss 1.0555 | valid ppl 2.8734 | learning rate 5.0000
|
| 206 |
+
| end of split 10 / 28 | epoch 8 | time: 3803.32s | valid loss 1.0553 | valid ppl 2.8730 | learning rate 5.0000
|
| 207 |
+
| end of split 11 / 28 | epoch 8 | time: 3805.59s | valid loss 1.0553 | valid ppl 2.8728 | learning rate 5.0000
|
| 208 |
+
| end of split 12 / 28 | epoch 8 | time: 3798.28s | valid loss 1.0551 | valid ppl 2.8724 | learning rate 5.0000
|
| 209 |
+
| end of split 13 / 28 | epoch 8 | time: 3798.22s | valid loss 1.0551 | valid ppl 2.8722 | learning rate 5.0000
|
| 210 |
+
| end of split 14 / 28 | epoch 8 | time: 3798.98s | valid loss 1.0550 | valid ppl 2.8720 | learning rate 5.0000
|
| 211 |
+
| end of split 15 / 28 | epoch 8 | time: 3796.37s | valid loss 1.0550 | valid ppl 2.8719 | learning rate 5.0000
|
| 212 |
+
| end of split 16 / 28 | epoch 8 | time: 3792.33s | valid loss 1.0549 | valid ppl 2.8717 | learning rate 5.0000
|
| 213 |
+
| end of split 17 / 28 | epoch 8 | time: 3801.12s | valid loss 1.0548 | valid ppl 2.8715 | learning rate 5.0000
|
| 214 |
+
| end of split 18 / 28 | epoch 8 | time: 3803.54s | valid loss 1.0548 | valid ppl 2.8713 | learning rate 5.0000
|
| 215 |
+
| end of split 19 / 28 | epoch 8 | time: 3794.99s | valid loss 1.0547 | valid ppl 2.8712 | learning rate 5.0000
|
| 216 |
+
| end of split 20 / 28 | epoch 8 | time: 3800.67s | valid loss 1.0546 | valid ppl 2.8709 | learning rate 5.0000
|
| 217 |
+
| end of split 21 / 28 | epoch 8 | time: 3802.07s | valid loss 1.0547 | valid ppl 2.8710 | learning rate 5.0000
|
| 218 |
+
| end of split 22 / 28 | epoch 8 | time: 3795.63s | valid loss 1.0546 | valid ppl 2.8707 | learning rate 5.0000
|
| 219 |
+
| end of split 23 / 28 | epoch 8 | time: 3797.48s | valid loss 1.0545 | valid ppl 2.8705 | learning rate 5.0000
|
| 220 |
+
| end of split 24 / 28 | epoch 8 | time: 3826.24s | valid loss 1.0545 | valid ppl 2.8705 | learning rate 5.0000
|
| 221 |
+
| end of split 25 / 28 | epoch 8 | time: 3796.29s | valid loss 1.0543 | valid ppl 2.8701 | learning rate 5.0000
|
| 222 |
+
| end of split 26 / 28 | epoch 8 | time: 3803.96s | valid loss 1.0545 | valid ppl 2.8705 | learning rate 5.0000
|
| 223 |
+
| end of split 27 / 28 | epoch 8 | time: 3802.34s | valid loss 1.0543 | valid ppl 2.8700 | learning rate 5.0000
|
| 224 |
+
| end of split 28 / 28 | epoch 8 | time: 3803.96s | valid loss 1.0543 | valid ppl 2.8699 | learning rate 5.0000
|
| 225 |
+
| end of split 1 / 28 | epoch 9 | time: 3798.65s | valid loss 1.0542 | valid ppl 2.8697 | learning rate 5.0000
|
| 226 |
+
| end of split 2 / 28 | epoch 9 | time: 3801.55s | valid loss 1.0542 | valid ppl 2.8696 | learning rate 5.0000
|
| 227 |
+
| end of split 3 / 28 | epoch 9 | time: 3806.56s | valid loss 1.0541 | valid ppl 2.8693 | learning rate 5.0000
|
| 228 |
+
| end of split 4 / 28 | epoch 9 | time: 3801.41s | valid loss 1.0541 | valid ppl 2.8695 | learning rate 5.0000
|
| 229 |
+
| end of split 5 / 28 | epoch 9 | time: 3799.18s | valid loss 1.0540 | valid ppl 2.8692 | learning rate 5.0000
|
| 230 |
+
| end of split 6 / 28 | epoch 9 | time: 3801.41s | valid loss 1.0540 | valid ppl 2.8690 | learning rate 5.0000
|
| 231 |
+
| end of split 7 / 28 | epoch 9 | time: 3792.65s | valid loss 1.0539 | valid ppl 2.8687 | learning rate 5.0000
|
| 232 |
+
| end of split 8 / 28 | epoch 9 | time: 3801.50s | valid loss 1.0539 | valid ppl 2.8688 | learning rate 5.0000
|
| 233 |
+
| end of split 9 / 28 | epoch 9 | time: 3799.22s | valid loss 1.0539 | valid ppl 2.8689 | learning rate 5.0000
|
| 234 |
+
| end of split 10 / 28 | epoch 9 | time: 3798.30s | valid loss 1.0537 | valid ppl 2.8683 | learning rate 5.0000
|
| 235 |
+
| end of split 11 / 28 | epoch 9 | time: 3794.81s | valid loss 1.0537 | valid ppl 2.8682 | learning rate 5.0000
|
| 236 |
+
| end of split 12 / 28 | epoch 9 | time: 3794.04s | valid loss 1.0537 | valid ppl 2.8682 | learning rate 5.0000
|
| 237 |
+
| end of split 13 / 28 | epoch 9 | time: 3798.63s | valid loss 1.0537 | valid ppl 2.8683 | learning rate 5.0000
|
| 238 |
+
| end of split 14 / 28 | epoch 9 | time: 3797.90s | valid loss 1.0535 | valid ppl 2.8678 | learning rate 5.0000
|
| 239 |
+
| end of split 15 / 28 | epoch 9 | time: 3796.44s | valid loss 1.0536 | valid ppl 2.8680 | learning rate 5.0000
|
| 240 |
+
| end of split 16 / 28 | epoch 9 | time: 3798.41s | valid loss 1.0536 | valid ppl 2.8678 | learning rate 5.0000
|
| 241 |
+
| end of split 17 / 28 | epoch 9 | time: 3799.93s | valid loss 1.0535 | valid ppl 2.8676 | learning rate 5.0000
|
| 242 |
+
| end of split 18 / 28 | epoch 9 | time: 3803.40s | valid loss 1.0534 | valid ppl 2.8673 | learning rate 5.0000
|
| 243 |
+
| end of split 19 / 28 | epoch 9 | time: 3807.52s | valid loss 1.0537 | valid ppl 2.8683 | learning rate 5.0000
|
| 244 |
+
| end of split 20 / 28 | epoch 9 | time: 3807.58s | valid loss 1.0534 | valid ppl 2.8673 | learning rate 5.0000
|
| 245 |
+
| end of split 21 / 28 | epoch 9 | time: 3799.18s | valid loss 1.0533 | valid ppl 2.8672 | learning rate 5.0000
|
| 246 |
+
| end of split 22 / 28 | epoch 9 | time: 3800.62s | valid loss 1.0532 | valid ppl 2.8668 | learning rate 5.0000
|
| 247 |
+
| end of split 23 / 28 | epoch 9 | time: 3796.79s | valid loss 1.0532 | valid ppl 2.8667 | learning rate 5.0000
|
| 248 |
+
| end of split 24 / 28 | epoch 9 | time: 1097.06s | valid loss 1.0532 | valid ppl 2.8669 | learning rate 5.0000
|
| 249 |
+
| end of split 25 / 28 | epoch 9 | time: 3795.86s | valid loss 1.0532 | valid ppl 2.8669 | learning rate 5.0000
|
| 250 |
+
| end of split 26 / 28 | epoch 9 | time: 3803.14s | valid loss 1.0531 | valid ppl 2.8665 | learning rate 5.0000
|
| 251 |
+
| end of split 27 / 28 | epoch 9 | time: 3798.92s | valid loss 1.0530 | valid ppl 2.8663 | learning rate 5.0000
|
| 252 |
+
| end of split 28 / 28 | epoch 9 | time: 3799.90s | valid loss 1.0530 | valid ppl 2.8663 | learning rate 5.0000
|
| 253 |
+
| end of split 1 / 28 | epoch 10 | time: 3798.57s | valid loss 1.0530 | valid ppl 2.8662 | learning rate 5.0000
|
| 254 |
+
| end of split 2 / 28 | epoch 10 | time: 3798.13s | valid loss 1.0529 | valid ppl 2.8661 | learning rate 5.0000
|
| 255 |
+
| end of split 3 / 28 | epoch 10 | time: 3799.82s | valid loss 1.0530 | valid ppl 2.8662 | learning rate 5.0000
|
| 256 |
+
| end of split 4 / 28 | epoch 10 | time: 3802.23s | valid loss 1.0529 | valid ppl 2.8659 | learning rate 5.0000
|
| 257 |
+
| end of split 5 / 28 | epoch 10 | time: 3801.56s | valid loss 1.0529 | valid ppl 2.8660 | learning rate 5.0000
|
| 258 |
+
| end of split 6 / 28 | epoch 10 | time: 3798.08s | valid loss 1.0528 | valid ppl 2.8656 | learning rate 5.0000
|
| 259 |
+
| end of split 7 / 28 | epoch 10 | time: 3800.12s | valid loss 1.0528 | valid ppl 2.8656 | learning rate 5.0000
|
| 260 |
+
| end of split 8 / 28 | epoch 10 | time: 3800.94s | valid loss 1.0526 | valid ppl 2.8652 | learning rate 5.0000
|
| 261 |
+
| end of split 9 / 28 | epoch 10 | time: 3801.43s | valid loss 1.0529 | valid ppl 2.8659 | learning rate 5.0000
|
| 262 |
+
| end of split 10 / 28 | epoch 10 | time: 3798.47s | valid loss 1.0526 | valid ppl 2.8652 | learning rate 5.0000
|
| 263 |
+
| end of split 11 / 28 | epoch 10 | time: 3803.15s | valid loss 1.0526 | valid ppl 2.8650 | learning rate 5.0000
|
| 264 |
+
| end of split 12 / 28 | epoch 10 | time: 3800.32s | valid loss 1.0526 | valid ppl 2.8650 | learning rate 5.0000
|
| 265 |
+
| end of split 13 / 28 | epoch 10 | time: 3802.61s | valid loss 1.0525 | valid ppl 2.8647 | learning rate 5.0000
|
| 266 |
+
| end of split 14 / 28 | epoch 10 | time: 3799.08s | valid loss 1.0525 | valid ppl 2.8648 | learning rate 5.0000
|
| 267 |
+
| end of split 15 / 28 | epoch 10 | time: 3801.19s | valid loss 1.0525 | valid ppl 2.8647 | learning rate 5.0000
|
| 268 |
+
| end of split 16 / 28 | epoch 10 | time: 3801.20s | valid loss 1.0524 | valid ppl 2.8646 | learning rate 5.0000
|
| 269 |
+
| end of split 17 / 28 | epoch 10 | time: 3802.37s | valid loss 1.0524 | valid ppl 2.8645 | learning rate 5.0000
|
| 270 |
+
| end of split 18 / 28 | epoch 10 | time: 3805.85s | valid loss 1.0523 | valid ppl 2.8643 | learning rate 5.0000
|
| 271 |
+
| end of split 19 / 28 | epoch 10 | time: 3804.15s | valid loss 1.0524 | valid ppl 2.8644 | learning rate 5.0000
|
| 272 |
+
| end of split 20 / 28 | epoch 10 | time: 3806.41s | valid loss 1.0523 | valid ppl 2.8642 | learning rate 5.0000
|
| 273 |
+
| end of split 21 / 28 | epoch 10 | time: 3809.13s | valid loss 1.0522 | valid ppl 2.8639 | learning rate 5.0000
|
| 274 |
+
| end of split 22 / 28 | epoch 10 | time: 3798.99s | valid loss 1.0523 | valid ppl 2.8641 | learning rate 5.0000
|
| 275 |
+
| end of split 23 / 28 | epoch 10 | time: 3802.76s | valid loss 1.0522 | valid ppl 2.8639 | learning rate 5.0000
|
| 276 |
+
| end of split 24 / 28 | epoch 10 | time: 3805.95s | valid loss 1.0522 | valid ppl 2.8639 | learning rate 5.0000
|
| 277 |
+
| end of split 25 / 28 | epoch 10 | time: 3803.67s | valid loss 1.0522 | valid ppl 2.8639 | learning rate 5.0000
|
| 278 |
+
| end of split 26 / 28 | epoch 10 | time: 3802.75s | valid loss 1.0521 | valid ppl 2.8635 | learning rate 5.0000
|
| 279 |
+
| end of split 27 / 28 | epoch 10 | time: 3804.63s | valid loss 1.0520 | valid ppl 2.8633 | learning rate 5.0000
|
| 280 |
+
| end of split 28 / 28 | epoch 10 | time: 1097.97s | valid loss 1.0520 | valid ppl 2.8634 | learning rate 5.0000
|
| 281 |
+
| end of split 1 / 28 | epoch 11 | time: 3793.51s | valid loss 1.0520 | valid ppl 2.8634 | learning rate 5.0000
|
| 282 |
+
| end of split 2 / 28 | epoch 11 | time: 3802.15s | valid loss 1.0520 | valid ppl 2.8633 | learning rate 5.0000
|
| 283 |
+
| end of split 3 / 28 | epoch 11 | time: 3801.09s | valid loss 1.0518 | valid ppl 2.8629 | learning rate 5.0000
|
| 284 |
+
| end of split 4 / 28 | epoch 11 | time: 3803.88s | valid loss 1.0518 | valid ppl 2.8629 | learning rate 5.0000
|
| 285 |
+
| end of split 5 / 28 | epoch 11 | time: 3803.72s | valid loss 1.0518 | valid ppl 2.8628 | learning rate 5.0000
|
| 286 |
+
| end of split 6 / 28 | epoch 11 | time: 3803.50s | valid loss 1.0518 | valid ppl 2.8629 | learning rate 5.0000
|
| 287 |
+
| end of split 7 / 28 | epoch 11 | time: 3798.93s | valid loss 1.0518 | valid ppl 2.8627 | learning rate 5.0000
|
| 288 |
+
| end of split 8 / 28 | epoch 11 | time: 3798.59s | valid loss 1.0516 | valid ppl 2.8623 | learning rate 5.0000
|
| 289 |
+
| end of split 9 / 28 | epoch 11 | time: 3797.52s | valid loss 1.0517 | valid ppl 2.8624 | learning rate 5.0000
|
| 290 |
+
| end of split 10 / 28 | epoch 11 | time: 3806.92s | valid loss 1.0518 | valid ppl 2.8627 | learning rate 5.0000
|
| 291 |
+
| end of split 11 / 28 | epoch 11 | time: 3806.04s | valid loss 1.0516 | valid ppl 2.8622 | learning rate 5.0000
|
| 292 |
+
| end of split 12 / 28 | epoch 11 | time: 3801.39s | valid loss 1.0519 | valid ppl 2.8632 | learning rate 5.0000
|
| 293 |
+
| end of split 13 / 28 | epoch 11 | time: 3801.24s | valid loss 1.0516 | valid ppl 2.8622 | learning rate 5.0000
|
| 294 |
+
| end of split 14 / 28 | epoch 11 | time: 3804.44s | valid loss 1.0515 | valid ppl 2.8620 | learning rate 5.0000
|
| 295 |
+
| end of split 15 / 28 | epoch 11 | time: 3801.34s | valid loss 1.0515 | valid ppl 2.8620 | learning rate 5.0000
|
| 296 |
+
| end of split 16 / 28 | epoch 11 | time: 3803.14s | valid loss 1.0514 | valid ppl 2.8618 | learning rate 5.0000
|
| 297 |
+
| end of split 17 / 28 | epoch 11 | time: 3801.11s | valid loss 1.0514 | valid ppl 2.8617 | learning rate 5.0000
|
| 298 |
+
| end of split 18 / 28 | epoch 11 | time: 3804.58s | valid loss 1.0513 | valid ppl 2.8613 | learning rate 5.0000
|
| 299 |
+
| end of split 19 / 28 | epoch 11 | time: 3796.04s | valid loss 1.0513 | valid ppl 2.8615 | learning rate 5.0000
|
| 300 |
+
| end of split 20 / 28 | epoch 11 | time: 3797.12s | valid loss 1.0512 | valid ppl 2.8611 | learning rate 5.0000
|
| 301 |
+
| end of split 21 / 28 | epoch 11 | time: 1097.96s | valid loss 1.0512 | valid ppl 2.8612 | learning rate 5.0000
|
| 302 |
+
| end of split 22 / 28 | epoch 11 | time: 3800.79s | valid loss 1.0513 | valid ppl 2.8613 | learning rate 5.0000
|
| 303 |
+
| end of split 23 / 28 | epoch 11 | time: 3801.51s | valid loss 1.0518 | valid ppl 2.8629 | learning rate 5.0000
|
| 304 |
+
| end of split 24 / 28 | epoch 11 | time: 3798.63s | valid loss 1.0513 | valid ppl 2.8614 | learning rate 5.0000
|
| 305 |
+
| end of split 25 / 28 | epoch 11 | time: 3796.99s | valid loss 1.0512 | valid ppl 2.8612 | learning rate 5.0000
|
| 306 |
+
| end of split 26 / 28 | epoch 11 | time: 3797.77s | valid loss 1.0512 | valid ppl 2.8610 | learning rate 5.0000
|
| 307 |
+
| end of split 27 / 28 | epoch 11 | time: 3797.73s | valid loss 1.0512 | valid ppl 2.8610 | learning rate 5.0000
|
| 308 |
+
| end of split 28 / 28 | epoch 11 | time: 3800.03s | valid loss 1.0511 | valid ppl 2.8607 | learning rate 5.0000
|
| 309 |
+
| end of split 1 / 28 | epoch 12 | time: 3796.72s | valid loss 1.0511 | valid ppl 2.8609 | learning rate 5.0000
|
| 310 |
+
| end of split 2 / 28 | epoch 12 | time: 1097.45s | valid loss 1.0510 | valid ppl 2.8604 | learning rate 5.0000
|
| 311 |
+
| end of split 3 / 28 | epoch 12 | time: 3803.10s | valid loss 1.0510 | valid ppl 2.8606 | learning rate 5.0000
|
| 312 |
+
| end of split 4 / 28 | epoch 12 | time: 3803.38s | valid loss 1.0510 | valid ppl 2.8604 | learning rate 5.0000
|
| 313 |
+
| end of split 5 / 28 | epoch 12 | time: 3796.86s | valid loss 1.0509 | valid ppl 2.8602 | learning rate 5.0000
|
| 314 |
+
| end of split 6 / 28 | epoch 12 | time: 3804.85s | valid loss 1.0509 | valid ppl 2.8601 | learning rate 5.0000
|
| 315 |
+
| end of split 7 / 28 | epoch 12 | time: 3804.65s | valid loss 1.0509 | valid ppl 2.8601 | learning rate 5.0000
|
| 316 |
+
| end of split 8 / 28 | epoch 12 | time: 3806.75s | valid loss 1.0508 | valid ppl 2.8599 | learning rate 5.0000
|
| 317 |
+
| end of split 9 / 28 | epoch 12 | time: 3800.05s | valid loss 1.0507 | valid ppl 2.8597 | learning rate 5.0000
|
| 318 |
+
| end of split 10 / 28 | epoch 12 | time: 3802.67s | valid loss 1.0507 | valid ppl 2.8596 | learning rate 5.0000
|
| 319 |
+
| end of split 11 / 28 | epoch 12 | time: 3806.56s | valid loss 1.0508 | valid ppl 2.8598 | learning rate 5.0000
|
| 320 |
+
| end of split 12 / 28 | epoch 12 | time: 3804.49s | valid loss 1.0507 | valid ppl 2.8598 | learning rate 5.0000
|
| 321 |
+
| end of split 13 / 28 | epoch 12 | time: 3804.60s | valid loss 1.0507 | valid ppl 2.8595 | learning rate 5.0000
|
| 322 |
+
| end of split 14 / 28 | epoch 12 | time: 3799.49s | valid loss 1.0506 | valid ppl 2.8594 | learning rate 5.0000
|
| 323 |
+
| end of split 15 / 28 | epoch 12 | time: 3807.23s | valid loss 1.0506 | valid ppl 2.8595 | learning rate 5.0000
|
| 324 |
+
| end of split 16 / 28 | epoch 12 | time: 3798.38s | valid loss 1.0506 | valid ppl 2.8592 | learning rate 5.0000
|
| 325 |
+
| end of split 17 / 28 | epoch 12 | time: 3806.09s | valid loss 1.0506 | valid ppl 2.8595 | learning rate 5.0000
|
| 326 |
+
| end of split 18 / 28 | epoch 12 | time: 3797.37s | valid loss 1.0506 | valid ppl 2.8594 | learning rate 5.0000
|
| 327 |
+
| end of split 19 / 28 | epoch 12 | time: 3800.94s | valid loss 1.0505 | valid ppl 2.8589 | learning rate 5.0000
|
| 328 |
+
| end of split 20 / 28 | epoch 12 | time: 3796.71s | valid loss 1.0505 | valid ppl 2.8590 | learning rate 5.0000
|
| 329 |
+
| end of split 21 / 28 | epoch 12 | time: 3795.95s | valid loss 1.0504 | valid ppl 2.8588 | learning rate 5.0000
|
| 330 |
+
| end of split 22 / 28 | epoch 12 | time: 3793.39s | valid loss 1.0504 | valid ppl 2.8588 | learning rate 5.0000
|
| 331 |
+
| end of split 23 / 28 | epoch 12 | time: 3797.13s | valid loss 1.0503 | valid ppl 2.8586 | learning rate 5.0000
|
| 332 |
+
| end of split 24 / 28 | epoch 12 | time: 3802.93s | valid loss 1.0503 | valid ppl 2.8586 | learning rate 5.0000
|
| 333 |
+
| end of split 25 / 28 | epoch 12 | time: 3798.55s | valid loss 1.0502 | valid ppl 2.8582 | learning rate 5.0000
|
| 334 |
+
| end of split 26 / 28 | epoch 12 | time: 3797.73s | valid loss 1.0502 | valid ppl 2.8582 | learning rate 5.0000
|
| 335 |
+
| end of split 27 / 28 | epoch 12 | time: 3798.53s | valid loss 1.0502 | valid ppl 2.8582 | learning rate 5.0000
|
| 336 |
+
| end of split 28 / 28 | epoch 12 | time: 3797.17s | valid loss 1.0502 | valid ppl 2.8582 | learning rate 5.0000
|
pipeline.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
from flair.models.language_model import LanguageModel
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class PreTrainedPipeline:
|
| 6 |
+
def __init__(self, path=""):
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
|
| 9 |
+
self.model = LanguageModel.load_language_model(
|
| 10 |
+
hf_hub_download(repo_id="dchaplinsky/flair-uk-backward", filename="best-lm.pt")
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
def __call__(self, inputs: str) -> List[Dict]:
|
| 14 |
+
"""
|
| 15 |
+
Args:
|
| 16 |
+
inputs (:obj:`str`):
|
| 17 |
+
a string containing some text
|
| 18 |
+
Return:
|
| 19 |
+
A :obj:`str`
|
| 20 |
+
"""
|
| 21 |
+
inputs = inputs.strip()
|
| 22 |
+
return [{"generated_text": self.model.generate_text(inputs[::-1], temperature=0.5)[0]}]
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
flair
|