DilLeiX-it / trainer_state.json
ameykaran's picture
Uploaded model
f53a038
{
"best_global_step": 7000,
"best_metric": 0.6139324903488159,
"best_model_checkpoint": "/scratch/ameyk/lma/model-finetuned/checkpoints/checkpoint-7000",
"epoch": 4.087049808429119,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04086845466155811,
"grad_norm": 60.702266693115234,
"learning_rate": 2.9700000000000004e-06,
"loss": 17.819,
"step": 100
},
{
"epoch": 0.08173690932311622,
"grad_norm": 47.32134246826172,
"learning_rate": 5.9700000000000004e-06,
"loss": 7.7058,
"step": 200
},
{
"epoch": 0.12260536398467432,
"grad_norm": 31.054290771484375,
"learning_rate": 8.97e-06,
"loss": 5.103,
"step": 300
},
{
"epoch": 0.16347381864623245,
"grad_norm": 25.974103927612305,
"learning_rate": 1.197e-05,
"loss": 4.0185,
"step": 400
},
{
"epoch": 0.20434227330779056,
"grad_norm": 19.508167266845703,
"learning_rate": 1.497e-05,
"loss": 3.3012,
"step": 500
},
{
"epoch": 0.20434227330779056,
"eval_accuracy": 0.0,
"eval_confusion_matrix": [
[
0,
0
],
[
2000,
0
]
],
"eval_f1_macro": 0.0,
"eval_humor_english_accuracy": 0.0,
"eval_humor_english_avg_loss": 0.7475204364605462,
"eval_humor_english_f1": 0.0,
"eval_humor_english_precision": 0.0,
"eval_humor_english_recall": 0.0,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.0,
"eval_humor_hindi_avg_loss": 0.591034443622271,
"eval_humor_hindi_f1": 0.0,
"eval_humor_hindi_precision": 0.0,
"eval_humor_hindi_recall": 0.0,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0,
"eval_humor_overall_avg_loss": 0.6701178589073499,
"eval_humor_overall_f1": 0.0,
"eval_humor_overall_precision": 0.0,
"eval_humor_overall_recall": 0.0,
"eval_humor_overall_samples": 2000,
"eval_loss": 1.5324292182922363,
"eval_perplexity": 4.629409284567538,
"eval_precision_macro": 0.0,
"eval_recall_macro": 0.0,
"eval_runtime": 1346.3326,
"eval_samples_per_second": 1.486,
"eval_steps_per_second": 0.743,
"eval_temporal_english_avg_loss": 0.9373093255760758,
"eval_temporal_hindi_avg_loss": 1.4527543705181314,
"eval_temporal_overall_avg_loss": 1.1912949920150167,
"step": 500
},
{
"epoch": 0.24521072796934865,
"grad_norm": 14.092637062072754,
"learning_rate": 1.797e-05,
"loss": 2.8053,
"step": 600
},
{
"epoch": 0.28607918263090676,
"grad_norm": 9.543878555297852,
"learning_rate": 2.097e-05,
"loss": 2.393,
"step": 700
},
{
"epoch": 0.3269476372924649,
"grad_norm": 7.029173851013184,
"learning_rate": 2.3970000000000003e-05,
"loss": 2.1078,
"step": 800
},
{
"epoch": 0.367816091954023,
"grad_norm": 6.117612361907959,
"learning_rate": 2.697e-05,
"loss": 1.924,
"step": 900
},
{
"epoch": 0.4086845466155811,
"grad_norm": 6.306148052215576,
"learning_rate": 2.997e-05,
"loss": 1.7717,
"step": 1000
},
{
"epoch": 0.4086845466155811,
"eval_accuracy": 0.0,
"eval_confusion_matrix": [
[
0,
0
],
[
2000,
0
]
],
"eval_f1_macro": 0.0,
"eval_humor_english_accuracy": 0.0,
"eval_humor_english_avg_loss": 0.5384818354636839,
"eval_humor_english_f1": 0.0,
"eval_humor_english_precision": 0.0,
"eval_humor_english_recall": 0.0,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.0,
"eval_humor_hindi_avg_loss": 0.3663946280421942,
"eval_humor_hindi_f1": 0.0,
"eval_humor_hindi_precision": 0.0,
"eval_humor_hindi_recall": 0.0,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0,
"eval_humor_overall_avg_loss": 0.4519576721899176,
"eval_humor_overall_f1": 0.0,
"eval_humor_overall_precision": 0.0,
"eval_humor_overall_recall": 0.0,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.9522949457168579,
"eval_perplexity": 2.5916505672889882,
"eval_precision_macro": 0.0,
"eval_recall_macro": 0.0,
"eval_runtime": 1361.0739,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.735,
"eval_temporal_english_avg_loss": 0.5830070118860753,
"eval_temporal_hindi_avg_loss": 0.9119034483581334,
"eval_temporal_overall_avg_loss": 0.7474362385274357,
"step": 1000
},
{
"epoch": 0.4495530012771392,
"grad_norm": 5.973597526550293,
"learning_rate": 2.9991044225324593e-05,
"loss": 1.6392,
"step": 1100
},
{
"epoch": 0.4904214559386973,
"grad_norm": 5.6661834716796875,
"learning_rate": 2.996382508630622e-05,
"loss": 1.5923,
"step": 1200
},
{
"epoch": 0.5312899106002554,
"grad_norm": 5.6116790771484375,
"learning_rate": 2.9918374831420734e-05,
"loss": 1.5159,
"step": 1300
},
{
"epoch": 0.5721583652618135,
"grad_norm": 6.842437744140625,
"learning_rate": 2.9854748834802644e-05,
"loss": 1.5072,
"step": 1400
},
{
"epoch": 0.6130268199233716,
"grad_norm": 4.068236827850342,
"learning_rate": 2.977302461492799e-05,
"loss": 1.362,
"step": 1500
},
{
"epoch": 0.6130268199233716,
"eval_accuracy": 0.0,
"eval_confusion_matrix": [
[
0,
0
],
[
2000,
0
]
],
"eval_f1_macro": 0.0,
"eval_humor_english_accuracy": 0.0,
"eval_humor_english_avg_loss": 0.43862412279962604,
"eval_humor_english_f1": 0.0,
"eval_humor_english_precision": 0.0,
"eval_humor_english_recall": 0.0,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.0,
"eval_humor_hindi_avg_loss": 0.2803115557623819,
"eval_humor_hindi_f1": 0.0,
"eval_humor_hindi_precision": 0.0,
"eval_humor_hindi_recall": 0.0,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0,
"eval_humor_overall_avg_loss": 0.3596194935054261,
"eval_humor_overall_f1": 0.0,
"eval_humor_overall_precision": 0.0,
"eval_humor_overall_recall": 0.0,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.8224886059761047,
"eval_perplexity": 2.2761572637241803,
"eval_precision_macro": 0.0,
"eval_recall_macro": 0.0,
"eval_runtime": 1364.4034,
"eval_samples_per_second": 1.466,
"eval_steps_per_second": 0.733,
"eval_temporal_english_avg_loss": 0.431482280489928,
"eval_temporal_hindi_avg_loss": 0.6994093691276505,
"eval_temporal_overall_avg_loss": 0.5653328100846917,
"step": 1500
},
{
"epoch": 0.6538952745849298,
"grad_norm": 6.672731876373291,
"learning_rate": 2.967330174017004e-05,
"loss": 1.3573,
"step": 1600
},
{
"epoch": 0.6947637292464879,
"grad_norm": 4.476126670837402,
"learning_rate": 2.9555701707490556e-05,
"loss": 1.3249,
"step": 1700
},
{
"epoch": 0.735632183908046,
"grad_norm": 4.317950248718262,
"learning_rate": 2.942036779441446e-05,
"loss": 1.2761,
"step": 1800
},
{
"epoch": 0.776500638569604,
"grad_norm": 4.1325578689575195,
"learning_rate": 2.9267464884468245e-05,
"loss": 1.2085,
"step": 1900
},
{
"epoch": 0.8173690932311622,
"grad_norm": 5.426914215087891,
"learning_rate": 2.9097179266294794e-05,
"loss": 1.2643,
"step": 2000
},
{
"epoch": 0.8173690932311622,
"eval_accuracy": 0.0015,
"eval_confusion_matrix": [
[
0,
0
],
[
1997,
3
]
],
"eval_f1_macro": 0.0014977533699450823,
"eval_humor_english_accuracy": 0.0030211480362537764,
"eval_humor_english_avg_loss": 0.38365533949005975,
"eval_humor_english_f1": 0.0030120481927710845,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.0015105740181268882,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.0,
"eval_humor_hindi_avg_loss": 0.23210459279744125,
"eval_humor_hindi_f1": 0.0,
"eval_humor_hindi_precision": 0.0,
"eval_humor_hindi_recall": 0.0,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0015,
"eval_humor_overall_avg_loss": 0.3081119789219082,
"eval_humor_overall_f1": 0.0014977533699450823,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.00075,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.7675837278366089,
"eval_perplexity": 2.1545540160759593,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.00075,
"eval_runtime": 1360.1194,
"eval_samples_per_second": 1.47,
"eval_steps_per_second": 0.735,
"eval_temporal_english_avg_loss": 0.3506820346533618,
"eval_temporal_hindi_avg_loss": 0.5820792646224661,
"eval_temporal_overall_avg_loss": 0.4664542024764292,
"step": 2000
},
{
"epoch": 0.8582375478927203,
"grad_norm": 5.104623794555664,
"learning_rate": 2.8909718406689366e-05,
"loss": 1.2051,
"step": 2100
},
{
"epoch": 0.8991060025542784,
"grad_norm": 4.696566104888916,
"learning_rate": 2.870531069783325e-05,
"loss": 1.1831,
"step": 2200
},
{
"epoch": 0.9399744572158365,
"grad_norm": 4.637947082519531,
"learning_rate": 2.8484205179033096e-05,
"loss": 1.1282,
"step": 2300
},
{
"epoch": 0.9808429118773946,
"grad_norm": 5.258996486663818,
"learning_rate": 2.824667123330487e-05,
"loss": 1.1429,
"step": 2400
},
{
"epoch": 1.021660280970626,
"grad_norm": 4.4945597648620605,
"learning_rate": 2.7992998259172142e-05,
"loss": 1.0593,
"step": 2500
},
{
"epoch": 1.021660280970626,
"eval_accuracy": 0.0745,
"eval_confusion_matrix": [
[
0,
0
],
[
1851,
149
]
],
"eval_f1_macro": 0.0693345742205677,
"eval_humor_english_accuracy": 0.07653575025176233,
"eval_humor_english_avg_loss": 0.34921365948140487,
"eval_humor_english_f1": 0.07109448082319925,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.038267875125881166,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.07249255213505462,
"eval_humor_hindi_avg_loss": 0.20103979772989347,
"eval_humor_hindi_f1": 0.06759259259259259,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.03624627606752731,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0745,
"eval_humor_overall_avg_loss": 0.27532809556280224,
"eval_humor_overall_f1": 0.0693345742205677,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.03725,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.7367945313453674,
"eval_perplexity": 2.0892278709676293,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.03725,
"eval_runtime": 1359.6405,
"eval_samples_per_second": 1.471,
"eval_steps_per_second": 0.735,
"eval_temporal_english_avg_loss": 0.2974580233862613,
"eval_temporal_hindi_avg_loss": 0.5058095154879095,
"eval_temporal_overall_avg_loss": 0.40173021953064236,
"step": 2500
},
{
"epoch": 1.062528735632184,
"grad_norm": 4.575131416320801,
"learning_rate": 2.7723495318078564e-05,
"loss": 0.9702,
"step": 2600
},
{
"epoch": 1.103397190293742,
"grad_norm": 4.862764835357666,
"learning_rate": 2.743849075784412e-05,
"loss": 0.9822,
"step": 2700
},
{
"epoch": 1.1442656449553001,
"grad_norm": 4.071940898895264,
"learning_rate": 2.713833181262386e-05,
"loss": 0.9633,
"step": 2800
},
{
"epoch": 1.1851340996168582,
"grad_norm": 4.650261878967285,
"learning_rate": 2.6823384179856602e-05,
"loss": 1.0072,
"step": 2900
},
{
"epoch": 1.2260025542784163,
"grad_norm": 4.2909321784973145,
"learning_rate": 2.64940315747189e-05,
"loss": 0.9524,
"step": 3000
},
{
"epoch": 1.2260025542784163,
"eval_accuracy": 0.111,
"eval_confusion_matrix": [
[
0,
0
],
[
1778,
222
]
],
"eval_f1_macro": 0.0999099909990999,
"eval_humor_english_accuracy": 0.06847935548841894,
"eval_humor_english_avg_loss": 0.3228903253947321,
"eval_humor_english_f1": 0.0640904806786051,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.03423967774420947,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.1529294935451837,
"eval_humor_hindi_avg_loss": 0.1783881702906315,
"eval_humor_hindi_f1": 0.1326442721791559,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.07646474677259185,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.111,
"eval_humor_overall_avg_loss": 0.2508712931905895,
"eval_humor_overall_f1": 0.0999099909990999,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.0555,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.71168053150177,
"eval_perplexity": 2.037412286720291,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.0555,
"eval_runtime": 1359.5963,
"eval_samples_per_second": 1.471,
"eval_steps_per_second": 0.736,
"eval_temporal_english_avg_loss": 0.25916867434431917,
"eval_temporal_hindi_avg_loss": 0.449419908061452,
"eval_temporal_overall_avg_loss": 0.35426122447886077,
"step": 3000
},
{
"epoch": 1.2668710089399744,
"grad_norm": 4.282441139221191,
"learning_rate": 2.6150675262627243e-05,
"loss": 0.9654,
"step": 3100
},
{
"epoch": 1.3077394636015325,
"grad_norm": 4.853513240814209,
"learning_rate": 2.5793733570357923e-05,
"loss": 0.9484,
"step": 3200
},
{
"epoch": 1.3486079182630908,
"grad_norm": 3.8319053649902344,
"learning_rate": 2.5423641376380324e-05,
"loss": 0.9162,
"step": 3300
},
{
"epoch": 1.3894763729246487,
"grad_norm": 4.360899448394775,
"learning_rate": 2.5040849581024466e-05,
"loss": 0.9263,
"step": 3400
},
{
"epoch": 1.430344827586207,
"grad_norm": 3.914219379425049,
"learning_rate": 2.464582455712844e-05,
"loss": 0.8967,
"step": 3500
},
{
"epoch": 1.430344827586207,
"eval_accuracy": 0.0075,
"eval_confusion_matrix": [
[
0,
0
],
[
1985,
15
]
],
"eval_f1_macro": 0.007444168734491315,
"eval_humor_english_accuracy": 0.004028197381671702,
"eval_humor_english_avg_loss": 0.3037518717352607,
"eval_humor_english_f1": 0.004012036108324975,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.002014098690835851,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.010923535253227408,
"eval_humor_hindi_avg_loss": 0.16131232707652512,
"eval_humor_hindi_f1": 0.010805500982318271,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.005461767626613704,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0075,
"eval_humor_overall_avg_loss": 0.2326810370445648,
"eval_humor_overall_f1": 0.007444168734491315,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.00375,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6824359893798828,
"eval_perplexity": 1.9786919166070414,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.00375,
"eval_runtime": 1319.0244,
"eval_samples_per_second": 1.516,
"eval_steps_per_second": 0.758,
"eval_temporal_english_avg_loss": 0.23054835682895794,
"eval_temporal_hindi_avg_loss": 0.4091248675787164,
"eval_temporal_overall_avg_loss": 0.31947562442693717,
"step": 3500
},
{
"epoch": 1.471213282247765,
"grad_norm": 4.441721439361572,
"learning_rate": 2.423904758183493e-05,
"loss": 0.9344,
"step": 3600
},
{
"epoch": 1.5120817369093231,
"grad_norm": 4.262256145477295,
"learning_rate": 2.3821014250229128e-05,
"loss": 0.8854,
"step": 3700
},
{
"epoch": 1.5529501915708812,
"grad_norm": 4.789700984954834,
"learning_rate": 2.3392233871532504e-05,
"loss": 0.8741,
"step": 3800
},
{
"epoch": 1.5938186462324393,
"grad_norm": 4.243940830230713,
"learning_rate": 2.2953228848587946e-05,
"loss": 0.8728,
"step": 3900
},
{
"epoch": 1.6346871008939976,
"grad_norm": 3.6989076137542725,
"learning_rate": 2.2504534041392377e-05,
"loss": 0.8662,
"step": 4000
},
{
"epoch": 1.6346871008939976,
"eval_accuracy": 0.0345,
"eval_confusion_matrix": [
[
0,
0
],
[
1931,
69
]
],
"eval_f1_macro": 0.0333494441759304,
"eval_humor_english_accuracy": 0.025176233635448138,
"eval_humor_english_avg_loss": 0.2894040331679491,
"eval_humor_english_f1": 0.02455795677799607,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.012588116817724069,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.04369414101290963,
"eval_humor_hindi_avg_loss": 0.1483633538700107,
"eval_humor_hindi_f1": 0.04186489058039962,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.021847070506454815,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0345,
"eval_humor_overall_avg_loss": 0.21891726321483457,
"eval_humor_overall_f1": 0.0333494441759304,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.01725,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6712037324905396,
"eval_perplexity": 1.9565911477290814,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.01725,
"eval_runtime": 1317.9709,
"eval_samples_per_second": 1.517,
"eval_steps_per_second": 0.759,
"eval_temporal_english_avg_loss": 0.20886689280834833,
"eval_temporal_hindi_avg_loss": 0.37523903380129076,
"eval_temporal_overall_avg_loss": 0.29192309029086067,
"step": 4000
},
{
"epoch": 1.6755555555555555,
"grad_norm": 4.29471492767334,
"learning_rate": 2.204669611545222e-05,
"loss": 0.8442,
"step": 4100
},
{
"epoch": 1.7164240102171138,
"grad_norm": 3.6822047233581543,
"learning_rate": 2.1580272875755707e-05,
"loss": 0.8287,
"step": 4200
},
{
"epoch": 1.7572924648786716,
"grad_norm": 3.862697124481201,
"learning_rate": 2.1105832587173418e-05,
"loss": 0.9005,
"step": 4300
},
{
"epoch": 1.79816091954023,
"grad_norm": 4.998415946960449,
"learning_rate": 2.0623953282115073e-05,
"loss": 0.8541,
"step": 4400
},
{
"epoch": 1.839029374201788,
"grad_norm": 4.094598770141602,
"learning_rate": 2.013522205628606e-05,
"loss": 0.8411,
"step": 4500
},
{
"epoch": 1.839029374201788,
"eval_accuracy": 0.0705,
"eval_confusion_matrix": [
[
0,
0
],
[
1859,
141
]
],
"eval_f1_macro": 0.0658570761326483,
"eval_humor_english_accuracy": 0.07452165156092648,
"eval_humor_english_avg_loss": 0.27731017850001527,
"eval_humor_english_f1": 0.06935332708528585,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.03726082578046324,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.06653426017874876,
"eval_humor_hindi_avg_loss": 0.1377803079530714,
"eval_humor_hindi_f1": 0.06238361266294227,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.03326713008937438,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0705,
"eval_humor_overall_avg_loss": 0.20768647205886417,
"eval_humor_overall_f1": 0.0658570761326483,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.03525,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6517492532730103,
"eval_perplexity": 1.9188945335622285,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.03525,
"eval_runtime": 1361.0394,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.735,
"eval_temporal_english_avg_loss": 0.19179030200470823,
"eval_temporal_hindi_avg_loss": 0.348848874167915,
"eval_temporal_overall_avg_loss": 0.2703094946072132,
"step": 4500
},
{
"epoch": 1.879897828863346,
"grad_norm": 4.929368495941162,
"learning_rate": 1.964023435340178e-05,
"loss": 0.8146,
"step": 4600
},
{
"epoch": 1.9207662835249042,
"grad_norm": 3.4482617378234863,
"learning_rate": 1.913959323973119e-05,
"loss": 0.8337,
"step": 4700
},
{
"epoch": 1.9616347381864623,
"grad_norm": 4.268481254577637,
"learning_rate": 1.863390866935344e-05,
"loss": 0.8111,
"step": 4800
},
{
"epoch": 2.0024521072796935,
"grad_norm": 3.2703258991241455,
"learning_rate": 1.8123796741022803e-05,
"loss": 0.8167,
"step": 4900
},
{
"epoch": 2.043320561941252,
"grad_norm": 3.365675926208496,
"learning_rate": 1.7609878947547232e-05,
"loss": 0.7072,
"step": 5000
},
{
"epoch": 2.043320561941252,
"eval_accuracy": 0.031,
"eval_confusion_matrix": [
[
0,
0
],
[
1938,
62
]
],
"eval_f1_macro": 0.030067895247332686,
"eval_humor_english_accuracy": 0.02920443101711984,
"eval_humor_english_avg_loss": 0.26709887150925354,
"eval_humor_english_f1": 0.02837573385518591,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.01460221550855992,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.03277060575968222,
"eval_humor_hindi_avg_loss": 0.1286927389570682,
"eval_humor_hindi_f1": 0.03173076923076923,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.01638530287984111,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.031,
"eval_humor_overall_avg_loss": 0.19798794310908208,
"eval_humor_overall_f1": 0.030067895247332686,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.0155,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6472519040107727,
"eval_perplexity": 1.9102839848633415,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.0155,
"eval_runtime": 1320.0922,
"eval_samples_per_second": 1.515,
"eval_steps_per_second": 0.758,
"eval_temporal_english_avg_loss": 0.17779056256882214,
"eval_temporal_hindi_avg_loss": 0.3266544083817946,
"eval_temporal_overall_avg_loss": 0.2523379425825881,
"step": 5000
},
{
"epoch": 2.0841890166028096,
"grad_norm": 4.201941013336182,
"learning_rate": 1.709278141859511e-05,
"loss": 0.7132,
"step": 5100
},
{
"epoch": 2.125057471264368,
"grad_norm": 4.3990888595581055,
"learning_rate": 1.6573134157852686e-05,
"loss": 0.6732,
"step": 5200
},
{
"epoch": 2.165925925925926,
"grad_norm": 4.231266021728516,
"learning_rate": 1.6051570275461666e-05,
"loss": 0.6784,
"step": 5300
},
{
"epoch": 2.206794380587484,
"grad_norm": 3.714615821838379,
"learning_rate": 1.552872521667198e-05,
"loss": 0.6785,
"step": 5400
},
{
"epoch": 2.247662835249042,
"grad_norm": 3.3602356910705566,
"learning_rate": 1.5005235987649652e-05,
"loss": 0.6877,
"step": 5500
},
{
"epoch": 2.247662835249042,
"eval_accuracy": 0.005,
"eval_confusion_matrix": [
[
0,
0
],
[
1990,
10
]
],
"eval_f1_macro": 0.004975124378109453,
"eval_humor_english_accuracy": 0.004028197381671702,
"eval_humor_english_avg_loss": 0.2567401882446778,
"eval_humor_english_f1": 0.004012036108324975,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.002014098690835851,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.005958291956305859,
"eval_humor_hindi_avg_loss": 0.12068944840764959,
"eval_humor_hindi_f1": 0.005923000987166831,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.0029791459781529296,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.005,
"eval_humor_overall_avg_loss": 0.18884940875979583,
"eval_humor_overall_f1": 0.004975124378109453,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.0025,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6365848183631897,
"eval_perplexity": 1.8900151414329684,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.0025,
"eval_runtime": 1323.2498,
"eval_samples_per_second": 1.511,
"eval_steps_per_second": 0.756,
"eval_temporal_english_avg_loss": 0.16544690548644253,
"eval_temporal_hindi_avg_loss": 0.3068489971076771,
"eval_temporal_overall_avg_loss": 0.23624011447018664,
"step": 5500
},
{
"epoch": 2.2885312899106003,
"grad_norm": 4.01371431350708,
"learning_rate": 1.4481740379382916e-05,
"loss": 0.688,
"step": 5600
},
{
"epoch": 2.329399744572158,
"grad_norm": 4.780395984649658,
"learning_rate": 1.3958876190632131e-05,
"loss": 0.6912,
"step": 5700
},
{
"epoch": 2.3702681992337165,
"grad_norm": 4.804982662200928,
"learning_rate": 1.3437280450870225e-05,
"loss": 0.6524,
"step": 5800
},
{
"epoch": 2.4111366538952748,
"grad_norm": 4.125489711761475,
"learning_rate": 1.2917588644160447e-05,
"loss": 0.659,
"step": 5900
},
{
"epoch": 2.4520051085568326,
"grad_norm": 3.899745225906372,
"learning_rate": 1.2400433934916879e-05,
"loss": 0.6537,
"step": 6000
},
{
"epoch": 2.4520051085568326,
"eval_accuracy": 0.044,
"eval_confusion_matrix": [
[
0,
0
],
[
1912,
88
]
],
"eval_f1_macro": 0.0421455938697318,
"eval_humor_english_accuracy": 0.05236656596173213,
"eval_humor_english_avg_loss": 0.24773680959614575,
"eval_humor_english_f1": 0.049760765550239235,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.026183282980866064,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.03574975173783516,
"eval_humor_hindi_avg_loss": 0.1137336753887836,
"eval_humor_hindi_f1": 0.03451581975071908,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.01787487586891758,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.044,
"eval_humor_overall_avg_loss": 0.18087796053705443,
"eval_humor_overall_f1": 0.0421455938697318,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.022,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6272982358932495,
"eval_perplexity": 1.8725445511265306,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.022,
"eval_runtime": 1361.0844,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.735,
"eval_temporal_english_avg_loss": 0.15524752458450397,
"eval_temporal_hindi_avg_loss": 0.29028965216244074,
"eval_temporal_overall_avg_loss": 0.22285711327104782,
"step": 6000
},
{
"epoch": 2.492873563218391,
"grad_norm": 3.0799901485443115,
"learning_rate": 1.1886446396491155e-05,
"loss": 0.6469,
"step": 6100
},
{
"epoch": 2.533742017879949,
"grad_norm": 4.32665491104126,
"learning_rate": 1.1376252243525146e-05,
"loss": 0.6748,
"step": 6200
},
{
"epoch": 2.574610472541507,
"grad_norm": 4.455280780792236,
"learning_rate": 1.0870473069004852e-05,
"loss": 0.654,
"step": 6300
},
{
"epoch": 2.615478927203065,
"grad_norm": 3.6130049228668213,
"learning_rate": 1.0369725086945106e-05,
"loss": 0.6495,
"step": 6400
},
{
"epoch": 2.6563473818646233,
"grad_norm": 4.499478816986084,
"learning_rate": 9.874618381627751e-06,
"loss": 0.6443,
"step": 6500
},
{
"epoch": 2.6563473818646233,
"eval_accuracy": 0.117,
"eval_confusion_matrix": [
[
0,
0
],
[
1766,
234
]
],
"eval_f1_macro": 0.10474485228290063,
"eval_humor_english_accuracy": 0.0986908358509567,
"eval_humor_english_avg_loss": 0.24009977730751,
"eval_humor_english_f1": 0.08982584784601283,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.04934541792547835,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.13505461767626614,
"eval_humor_hindi_avg_loss": 0.10769947945037281,
"eval_humor_hindi_f1": 0.1189851268591426,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.06752730883813307,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.117,
"eval_humor_overall_avg_loss": 0.17395776182075373,
"eval_humor_overall_f1": 0.10474485228290063,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.0585,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6204598546028137,
"eval_perplexity": 1.8597831080769023,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.0585,
"eval_runtime": 1361.8863,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.734,
"eval_temporal_english_avg_loss": 0.1464234281813041,
"eval_temporal_hindi_avg_loss": 0.27630730096865136,
"eval_temporal_overall_avg_loss": 0.21141392534496303,
"step": 6500
},
{
"epoch": 2.6972158365261816,
"grad_norm": 3.4488909244537354,
"learning_rate": 9.385756164307868e-06,
"loss": 0.6831,
"step": 6600
},
{
"epoch": 2.7380842911877394,
"grad_norm": 4.000279903411865,
"learning_rate": 8.903734038293804e-06,
"loss": 0.6437,
"step": 6700
},
{
"epoch": 2.7789527458492973,
"grad_norm": 4.4706220626831055,
"learning_rate": 8.429139273296304e-06,
"loss": 0.6284,
"step": 6800
},
{
"epoch": 2.8198212005108556,
"grad_norm": 3.7017948627471924,
"learning_rate": 7.96255008993078e-06,
"loss": 0.6357,
"step": 6900
},
{
"epoch": 2.860689655172414,
"grad_norm": 4.603327751159668,
"learning_rate": 7.504534955244629e-06,
"loss": 0.6207,
"step": 7000
},
{
"epoch": 2.860689655172414,
"eval_accuracy": 0.0635,
"eval_confusion_matrix": [
[
0,
0
],
[
1873,
127
]
],
"eval_f1_macro": 0.059708509637987774,
"eval_humor_english_accuracy": 0.05639476334340383,
"eval_humor_english_avg_loss": 0.2334255897185822,
"eval_humor_english_f1": 0.05338417540514776,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.028197381671701913,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.070506454816286,
"eval_humor_hindi_avg_loss": 0.10243915956694087,
"eval_humor_hindi_f1": 0.06586270871985157,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.035253227408143,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0635,
"eval_humor_overall_avg_loss": 0.1679845057619467,
"eval_humor_overall_f1": 0.059708509637987774,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.03175,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6139324903488159,
"eval_perplexity": 1.847683260757382,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.03175,
"eval_runtime": 1315.241,
"eval_samples_per_second": 1.521,
"eval_steps_per_second": 0.76,
"eval_temporal_english_avg_loss": 0.138804246816629,
"eval_temporal_hindi_avg_loss": 0.2640184406992659,
"eval_temporal_overall_avg_loss": 0.20143722001935127,
"step": 7000
},
{
"epoch": 2.9015581098339718,
"grad_norm": 3.4093639850616455,
"learning_rate": 7.055651890127625e-06,
"loss": 0.631,
"step": 7100
},
{
"epoch": 2.94242656449553,
"grad_norm": 3.2841861248016357,
"learning_rate": 6.616447789449488e-06,
"loss": 0.6271,
"step": 7200
},
{
"epoch": 2.9832950191570884,
"grad_norm": 2.856577157974243,
"learning_rate": 6.187457755752684e-06,
"loss": 0.6457,
"step": 7300
},
{
"epoch": 3.024112388250319,
"grad_norm": 4.275922775268555,
"learning_rate": 5.7692044473124276e-06,
"loss": 0.5875,
"step": 7400
},
{
"epoch": 3.0649808429118774,
"grad_norm": 4.011717319488525,
"learning_rate": 5.362197441358068e-06,
"loss": 0.5295,
"step": 7500
},
{
"epoch": 3.0649808429118774,
"eval_accuracy": 0.1215,
"eval_confusion_matrix": [
[
0,
0
],
[
1757,
243
]
],
"eval_f1_macro": 0.10833704859563086,
"eval_humor_english_accuracy": 0.12185297079556898,
"eval_humor_english_avg_loss": 0.22710613908185748,
"eval_humor_english_f1": 0.10861759425493717,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.06092648539778449,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.1211519364448858,
"eval_humor_hindi_avg_loss": 0.09781679747076281,
"eval_humor_hindi_f1": 0.10806023029229407,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.0605759682224429,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.1215,
"eval_humor_overall_avg_loss": 0.16250835471025796,
"eval_humor_overall_f1": 0.10833704859563086,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.06075,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6209594011306763,
"eval_perplexity": 1.8607123988418854,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.06075,
"eval_runtime": 1319.9889,
"eval_samples_per_second": 1.515,
"eval_steps_per_second": 0.758,
"eval_temporal_english_avg_loss": 0.13217057422883527,
"eval_temporal_hindi_avg_loss": 0.25253982628289234,
"eval_temporal_overall_avg_loss": 0.19244430216349742,
"step": 7500
},
{
"epoch": 3.1058492975734353,
"grad_norm": 3.4078967571258545,
"learning_rate": 4.966932613231739e-06,
"loss": 0.5529,
"step": 7600
},
{
"epoch": 3.1467177522349936,
"grad_norm": 3.757827043533325,
"learning_rate": 4.583891532240636e-06,
"loss": 0.5268,
"step": 7700
},
{
"epoch": 3.187586206896552,
"grad_norm": 3.6647462844848633,
"learning_rate": 4.213540874938976e-06,
"loss": 0.5249,
"step": 7800
},
{
"epoch": 3.22845466155811,
"grad_norm": 3.4517507553100586,
"learning_rate": 3.856331856554558e-06,
"loss": 0.5423,
"step": 7900
},
{
"epoch": 3.269323116219668,
"grad_norm": 3.9157795906066895,
"learning_rate": 3.5126996812524377e-06,
"loss": 0.5311,
"step": 8000
},
{
"epoch": 3.269323116219668,
"eval_accuracy": 0.1045,
"eval_confusion_matrix": [
[
0,
0
],
[
1791,
209
]
],
"eval_f1_macro": 0.0946129470348574,
"eval_humor_english_accuracy": 0.10372608257804633,
"eval_humor_english_avg_loss": 0.2202950545789041,
"eval_humor_english_f1": 0.09397810218978102,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.051863041289023165,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.10526315789473684,
"eval_humor_hindi_avg_loss": 0.09350429856929683,
"eval_humor_hindi_f1": 0.09523809523809523,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.05263157894736842,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.1045,
"eval_humor_overall_avg_loss": 0.15700095058811175,
"eval_humor_overall_f1": 0.0946129470348574,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.05225,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.621131181716919,
"eval_perplexity": 1.861031944826067,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.05225,
"eval_runtime": 1318.5114,
"eval_samples_per_second": 1.517,
"eval_steps_per_second": 0.758,
"eval_temporal_english_avg_loss": 0.1261224628154746,
"eval_temporal_hindi_avg_loss": 0.2422587038177751,
"eval_temporal_overall_avg_loss": 0.18421073652827494,
"step": 8000
},
{
"epoch": 3.310191570881226,
"grad_norm": 3.881890058517456,
"learning_rate": 3.183063011905744e-06,
"loss": 0.5225,
"step": 8100
},
{
"epoch": 3.3510600255427843,
"grad_norm": 4.492738723754883,
"learning_rate": 2.8678234600194414e-06,
"loss": 0.5358,
"step": 8200
},
{
"epoch": 3.391928480204342,
"grad_norm": 3.7062699794769287,
"learning_rate": 2.5673650964285718e-06,
"loss": 0.531,
"step": 8300
},
{
"epoch": 3.4327969348659004,
"grad_norm": 4.436982154846191,
"learning_rate": 2.2820539833671085e-06,
"loss": 0.5394,
"step": 8400
},
{
"epoch": 3.4736653895274587,
"grad_norm": 3.07208251953125,
"learning_rate": 2.0122377284775136e-06,
"loss": 0.5136,
"step": 8500
},
{
"epoch": 3.4736653895274587,
"eval_accuracy": 0.097,
"eval_confusion_matrix": [
[
0,
0
],
[
1806,
194
]
],
"eval_f1_macro": 0.08842297174111212,
"eval_humor_english_accuracy": 0.09667673716012085,
"eval_humor_english_avg_loss": 0.21453529215166584,
"eval_humor_english_f1": 0.0881542699724518,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.04833836858006042,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.09731876861966236,
"eval_humor_hindi_avg_loss": 0.08955117555728288,
"eval_humor_hindi_f1": 0.08868778280542987,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.04865938430983118,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.097,
"eval_humor_overall_avg_loss": 0.15204923117292898,
"eval_humor_overall_f1": 0.08842297174111212,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.0485,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.618234395980835,
"eval_perplexity": 1.8556488288019473,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.0485,
"eval_runtime": 1365.0675,
"eval_samples_per_second": 1.465,
"eval_steps_per_second": 0.733,
"eval_temporal_english_avg_loss": 0.12083532439477838,
"eval_temporal_hindi_avg_loss": 0.23295580905530663,
"eval_temporal_overall_avg_loss": 0.17692150853084088,
"step": 8500
},
{
"epoch": 3.5145338441890166,
"grad_norm": 4.052462577819824,
"learning_rate": 2.1103441209615935e-05,
"loss": 0.5403,
"step": 8600
},
{
"epoch": 3.5554022988505745,
"grad_norm": 4.443219184875488,
"learning_rate": 2.0863376714130244e-05,
"loss": 0.5796,
"step": 8700
},
{
"epoch": 3.5962707535121328,
"grad_norm": 4.265786170959473,
"learning_rate": 2.0621526177297613e-05,
"loss": 0.5538,
"step": 8800
},
{
"epoch": 3.637139208173691,
"grad_norm": 3.9799318313598633,
"learning_rate": 2.037796326913444e-05,
"loss": 0.5683,
"step": 8900
},
{
"epoch": 3.678007662835249,
"grad_norm": 5.084273815155029,
"learning_rate": 2.0132762181261985e-05,
"loss": 0.5716,
"step": 9000
},
{
"epoch": 3.678007662835249,
"eval_accuracy": 0.0995,
"eval_f1_macro": 0.0904956798544793,
"eval_humor_english_accuracy": 0.05740181268882175,
"eval_humor_english_avg_loss": 0.117691616244732,
"eval_humor_english_f1": 0.054285714285714284,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.028700906344410877,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.141012909632572,
"eval_humor_hindi_avg_loss": 0.031129881199932288,
"eval_humor_hindi_f1": 0.12358572671888599,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.070506454816286,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0995,
"eval_humor_overall_avg_loss": 0.07511869748413463,
"eval_humor_overall_f1": 0.0904956798544793,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.04975,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6332020163536072,
"eval_perplexity": 1.8836323104356882,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.04975,
"eval_runtime": 1319.1571,
"eval_samples_per_second": 1.516,
"eval_steps_per_second": 0.758,
"eval_temporal_english_avg_loss": 0.03940154156217748,
"eval_temporal_hindi_avg_loss": 0.09286631036147072,
"eval_temporal_overall_avg_loss": 0.0664199314402633,
"step": 9000
},
{
"epoch": 3.7188761174968072,
"grad_norm": 4.418786525726318,
"learning_rate": 1.9885997604306933e-05,
"loss": 0.5554,
"step": 9100
},
{
"epoch": 3.759744572158365,
"grad_norm": 3.633528470993042,
"learning_rate": 1.9637744705149887e-05,
"loss": 0.5581,
"step": 9200
},
{
"epoch": 3.8006130268199234,
"grad_norm": 4.484210014343262,
"learning_rate": 1.938807910402881e-05,
"loss": 0.5744,
"step": 9300
},
{
"epoch": 3.8414814814814813,
"grad_norm": 3.976949453353882,
"learning_rate": 1.913707685150437e-05,
"loss": 0.5831,
"step": 9400
},
{
"epoch": 3.8823499361430396,
"grad_norm": 4.015331745147705,
"learning_rate": 1.88848144052942e-05,
"loss": 0.5658,
"step": 9500
},
{
"epoch": 3.8823499361430396,
"eval_accuracy": 0.0505,
"eval_f1_macro": 0.04807234650166587,
"eval_humor_english_accuracy": 0.04934541792547835,
"eval_humor_english_avg_loss": 0.11883179473594759,
"eval_humor_english_f1": 0.04702495201535509,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.024672708962739175,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.05163853028798411,
"eval_humor_hindi_avg_loss": 0.030766832375286615,
"eval_humor_hindi_f1": 0.049102927289896126,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.025819265143992055,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.0505,
"eval_humor_overall_avg_loss": 0.07535547554798036,
"eval_humor_overall_f1": 0.04807234650166587,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.02525,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6302640438079834,
"eval_perplexity": 1.8781064395523288,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.02525,
"eval_runtime": 1320.57,
"eval_samples_per_second": 1.514,
"eval_steps_per_second": 0.757,
"eval_temporal_english_avg_loss": 0.03935090346366541,
"eval_temporal_hindi_avg_loss": 0.0936906353848563,
"eval_temporal_overall_avg_loss": 0.06659941324144447,
"step": 9500
},
{
"epoch": 3.923218390804598,
"grad_norm": 3.6775312423706055,
"learning_rate": 1.8631368606983134e-05,
"loss": 0.5588,
"step": 9600
},
{
"epoch": 3.9640868454661558,
"grad_norm": 3.3025190830230713,
"learning_rate": 1.8376816658616527e-05,
"loss": 0.5638,
"step": 9700
},
{
"epoch": 4.005312899106003,
"grad_norm": 5.129138469696045,
"learning_rate": 1.8121236099183743e-05,
"loss": 0.5504,
"step": 9800
},
{
"epoch": 4.046181353767561,
"grad_norm": 4.347407817840576,
"learning_rate": 1.7864704780999054e-05,
"loss": 0.5361,
"step": 9900
},
{
"epoch": 4.087049808429119,
"grad_norm": 5.224924564361572,
"learning_rate": 1.7607300845987055e-05,
"loss": 0.5143,
"step": 10000
},
{
"epoch": 4.087049808429119,
"eval_accuracy": 0.132,
"eval_f1_macro": 0.1166077738515901,
"eval_humor_english_accuracy": 0.1379657603222558,
"eval_humor_english_avg_loss": 0.117569191649303,
"eval_humor_english_f1": 0.12123893805309735,
"eval_humor_english_precision": 0.5,
"eval_humor_english_recall": 0.0689828801611279,
"eval_humor_english_samples": 993,
"eval_humor_hindi_accuracy": 0.12611717974180736,
"eval_humor_hindi_avg_loss": 0.030592078067631787,
"eval_humor_hindi_f1": 0.11199294532627865,
"eval_humor_hindi_precision": 0.5,
"eval_humor_hindi_recall": 0.06305858987090368,
"eval_humor_hindi_samples": 1007,
"eval_humor_overall_accuracy": 0.132,
"eval_humor_overall_avg_loss": 0.07420280479598583,
"eval_humor_overall_f1": 0.1166077738515901,
"eval_humor_overall_precision": 0.5,
"eval_humor_overall_recall": 0.066,
"eval_humor_overall_samples": 2000,
"eval_loss": 0.6409682035446167,
"eval_perplexity": 1.8983179897511162,
"eval_precision_macro": 0.5,
"eval_recall_macro": 0.066,
"eval_runtime": 1308.5558,
"eval_samples_per_second": 1.528,
"eval_steps_per_second": 0.764,
"eval_temporal_english_avg_loss": 0.039093032986785355,
"eval_temporal_hindi_avg_loss": 0.09288990841599172,
"eval_temporal_overall_avg_loss": 0.0659479490747604,
"step": 10000
}
],
"logging_steps": 100,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.2097341741727744e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}