Text Generation
Transformers
Safetensors
Hindi
English
deepseek_v3
mizo
english
hindi
multilingual
indic
humor detection
temporal reasoning
text-generation-inference
Instructions to use ameykaran/DilLeiX-it with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ameykaran/DilLeiX-it with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ameykaran/DilLeiX-it")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ameykaran/DilLeiX-it") model = AutoModelForCausalLM.from_pretrained("ameykaran/DilLeiX-it") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use ameykaran/DilLeiX-it with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ameykaran/DilLeiX-it" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ameykaran/DilLeiX-it", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/ameykaran/DilLeiX-it
- SGLang
How to use ameykaran/DilLeiX-it with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ameykaran/DilLeiX-it" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ameykaran/DilLeiX-it", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ameykaran/DilLeiX-it" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ameykaran/DilLeiX-it", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use ameykaran/DilLeiX-it with Docker Model Runner:
docker model run hf.co/ameykaran/DilLeiX-it
| { | |
| "best_global_step": 7000, | |
| "best_metric": 0.6139324903488159, | |
| "best_model_checkpoint": "/scratch/ameyk/lma/model-finetuned/checkpoints/checkpoint-7000", | |
| "epoch": 4.087049808429119, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04086845466155811, | |
| "grad_norm": 60.702266693115234, | |
| "learning_rate": 2.9700000000000004e-06, | |
| "loss": 17.819, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08173690932311622, | |
| "grad_norm": 47.32134246826172, | |
| "learning_rate": 5.9700000000000004e-06, | |
| "loss": 7.7058, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12260536398467432, | |
| "grad_norm": 31.054290771484375, | |
| "learning_rate": 8.97e-06, | |
| "loss": 5.103, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16347381864623245, | |
| "grad_norm": 25.974103927612305, | |
| "learning_rate": 1.197e-05, | |
| "loss": 4.0185, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.20434227330779056, | |
| "grad_norm": 19.508167266845703, | |
| "learning_rate": 1.497e-05, | |
| "loss": 3.3012, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20434227330779056, | |
| "eval_accuracy": 0.0, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2000, | |
| 0 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0, | |
| "eval_humor_english_accuracy": 0.0, | |
| "eval_humor_english_avg_loss": 0.7475204364605462, | |
| "eval_humor_english_f1": 0.0, | |
| "eval_humor_english_precision": 0.0, | |
| "eval_humor_english_recall": 0.0, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.0, | |
| "eval_humor_hindi_avg_loss": 0.591034443622271, | |
| "eval_humor_hindi_f1": 0.0, | |
| "eval_humor_hindi_precision": 0.0, | |
| "eval_humor_hindi_recall": 0.0, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0, | |
| "eval_humor_overall_avg_loss": 0.6701178589073499, | |
| "eval_humor_overall_f1": 0.0, | |
| "eval_humor_overall_precision": 0.0, | |
| "eval_humor_overall_recall": 0.0, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 1.5324292182922363, | |
| "eval_perplexity": 4.629409284567538, | |
| "eval_precision_macro": 0.0, | |
| "eval_recall_macro": 0.0, | |
| "eval_runtime": 1346.3326, | |
| "eval_samples_per_second": 1.486, | |
| "eval_steps_per_second": 0.743, | |
| "eval_temporal_english_avg_loss": 0.9373093255760758, | |
| "eval_temporal_hindi_avg_loss": 1.4527543705181314, | |
| "eval_temporal_overall_avg_loss": 1.1912949920150167, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.24521072796934865, | |
| "grad_norm": 14.092637062072754, | |
| "learning_rate": 1.797e-05, | |
| "loss": 2.8053, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.28607918263090676, | |
| "grad_norm": 9.543878555297852, | |
| "learning_rate": 2.097e-05, | |
| "loss": 2.393, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3269476372924649, | |
| "grad_norm": 7.029173851013184, | |
| "learning_rate": 2.3970000000000003e-05, | |
| "loss": 2.1078, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.367816091954023, | |
| "grad_norm": 6.117612361907959, | |
| "learning_rate": 2.697e-05, | |
| "loss": 1.924, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4086845466155811, | |
| "grad_norm": 6.306148052215576, | |
| "learning_rate": 2.997e-05, | |
| "loss": 1.7717, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4086845466155811, | |
| "eval_accuracy": 0.0, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2000, | |
| 0 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0, | |
| "eval_humor_english_accuracy": 0.0, | |
| "eval_humor_english_avg_loss": 0.5384818354636839, | |
| "eval_humor_english_f1": 0.0, | |
| "eval_humor_english_precision": 0.0, | |
| "eval_humor_english_recall": 0.0, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.0, | |
| "eval_humor_hindi_avg_loss": 0.3663946280421942, | |
| "eval_humor_hindi_f1": 0.0, | |
| "eval_humor_hindi_precision": 0.0, | |
| "eval_humor_hindi_recall": 0.0, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0, | |
| "eval_humor_overall_avg_loss": 0.4519576721899176, | |
| "eval_humor_overall_f1": 0.0, | |
| "eval_humor_overall_precision": 0.0, | |
| "eval_humor_overall_recall": 0.0, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.9522949457168579, | |
| "eval_perplexity": 2.5916505672889882, | |
| "eval_precision_macro": 0.0, | |
| "eval_recall_macro": 0.0, | |
| "eval_runtime": 1361.0739, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.735, | |
| "eval_temporal_english_avg_loss": 0.5830070118860753, | |
| "eval_temporal_hindi_avg_loss": 0.9119034483581334, | |
| "eval_temporal_overall_avg_loss": 0.7474362385274357, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4495530012771392, | |
| "grad_norm": 5.973597526550293, | |
| "learning_rate": 2.9991044225324593e-05, | |
| "loss": 1.6392, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4904214559386973, | |
| "grad_norm": 5.6661834716796875, | |
| "learning_rate": 2.996382508630622e-05, | |
| "loss": 1.5923, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5312899106002554, | |
| "grad_norm": 5.6116790771484375, | |
| "learning_rate": 2.9918374831420734e-05, | |
| "loss": 1.5159, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5721583652618135, | |
| "grad_norm": 6.842437744140625, | |
| "learning_rate": 2.9854748834802644e-05, | |
| "loss": 1.5072, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6130268199233716, | |
| "grad_norm": 4.068236827850342, | |
| "learning_rate": 2.977302461492799e-05, | |
| "loss": 1.362, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6130268199233716, | |
| "eval_accuracy": 0.0, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2000, | |
| 0 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0, | |
| "eval_humor_english_accuracy": 0.0, | |
| "eval_humor_english_avg_loss": 0.43862412279962604, | |
| "eval_humor_english_f1": 0.0, | |
| "eval_humor_english_precision": 0.0, | |
| "eval_humor_english_recall": 0.0, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.0, | |
| "eval_humor_hindi_avg_loss": 0.2803115557623819, | |
| "eval_humor_hindi_f1": 0.0, | |
| "eval_humor_hindi_precision": 0.0, | |
| "eval_humor_hindi_recall": 0.0, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0, | |
| "eval_humor_overall_avg_loss": 0.3596194935054261, | |
| "eval_humor_overall_f1": 0.0, | |
| "eval_humor_overall_precision": 0.0, | |
| "eval_humor_overall_recall": 0.0, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.8224886059761047, | |
| "eval_perplexity": 2.2761572637241803, | |
| "eval_precision_macro": 0.0, | |
| "eval_recall_macro": 0.0, | |
| "eval_runtime": 1364.4034, | |
| "eval_samples_per_second": 1.466, | |
| "eval_steps_per_second": 0.733, | |
| "eval_temporal_english_avg_loss": 0.431482280489928, | |
| "eval_temporal_hindi_avg_loss": 0.6994093691276505, | |
| "eval_temporal_overall_avg_loss": 0.5653328100846917, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6538952745849298, | |
| "grad_norm": 6.672731876373291, | |
| "learning_rate": 2.967330174017004e-05, | |
| "loss": 1.3573, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6947637292464879, | |
| "grad_norm": 4.476126670837402, | |
| "learning_rate": 2.9555701707490556e-05, | |
| "loss": 1.3249, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.735632183908046, | |
| "grad_norm": 4.317950248718262, | |
| "learning_rate": 2.942036779441446e-05, | |
| "loss": 1.2761, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.776500638569604, | |
| "grad_norm": 4.1325578689575195, | |
| "learning_rate": 2.9267464884468245e-05, | |
| "loss": 1.2085, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8173690932311622, | |
| "grad_norm": 5.426914215087891, | |
| "learning_rate": 2.9097179266294794e-05, | |
| "loss": 1.2643, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8173690932311622, | |
| "eval_accuracy": 0.0015, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1997, | |
| 3 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0014977533699450823, | |
| "eval_humor_english_accuracy": 0.0030211480362537764, | |
| "eval_humor_english_avg_loss": 0.38365533949005975, | |
| "eval_humor_english_f1": 0.0030120481927710845, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.0015105740181268882, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.0, | |
| "eval_humor_hindi_avg_loss": 0.23210459279744125, | |
| "eval_humor_hindi_f1": 0.0, | |
| "eval_humor_hindi_precision": 0.0, | |
| "eval_humor_hindi_recall": 0.0, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0015, | |
| "eval_humor_overall_avg_loss": 0.3081119789219082, | |
| "eval_humor_overall_f1": 0.0014977533699450823, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.00075, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.7675837278366089, | |
| "eval_perplexity": 2.1545540160759593, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.00075, | |
| "eval_runtime": 1360.1194, | |
| "eval_samples_per_second": 1.47, | |
| "eval_steps_per_second": 0.735, | |
| "eval_temporal_english_avg_loss": 0.3506820346533618, | |
| "eval_temporal_hindi_avg_loss": 0.5820792646224661, | |
| "eval_temporal_overall_avg_loss": 0.4664542024764292, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8582375478927203, | |
| "grad_norm": 5.104623794555664, | |
| "learning_rate": 2.8909718406689366e-05, | |
| "loss": 1.2051, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8991060025542784, | |
| "grad_norm": 4.696566104888916, | |
| "learning_rate": 2.870531069783325e-05, | |
| "loss": 1.1831, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9399744572158365, | |
| "grad_norm": 4.637947082519531, | |
| "learning_rate": 2.8484205179033096e-05, | |
| "loss": 1.1282, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9808429118773946, | |
| "grad_norm": 5.258996486663818, | |
| "learning_rate": 2.824667123330487e-05, | |
| "loss": 1.1429, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.021660280970626, | |
| "grad_norm": 4.4945597648620605, | |
| "learning_rate": 2.7992998259172142e-05, | |
| "loss": 1.0593, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.021660280970626, | |
| "eval_accuracy": 0.0745, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1851, | |
| 149 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0693345742205677, | |
| "eval_humor_english_accuracy": 0.07653575025176233, | |
| "eval_humor_english_avg_loss": 0.34921365948140487, | |
| "eval_humor_english_f1": 0.07109448082319925, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.038267875125881166, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.07249255213505462, | |
| "eval_humor_hindi_avg_loss": 0.20103979772989347, | |
| "eval_humor_hindi_f1": 0.06759259259259259, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.03624627606752731, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0745, | |
| "eval_humor_overall_avg_loss": 0.27532809556280224, | |
| "eval_humor_overall_f1": 0.0693345742205677, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.03725, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.7367945313453674, | |
| "eval_perplexity": 2.0892278709676293, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.03725, | |
| "eval_runtime": 1359.6405, | |
| "eval_samples_per_second": 1.471, | |
| "eval_steps_per_second": 0.735, | |
| "eval_temporal_english_avg_loss": 0.2974580233862613, | |
| "eval_temporal_hindi_avg_loss": 0.5058095154879095, | |
| "eval_temporal_overall_avg_loss": 0.40173021953064236, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.062528735632184, | |
| "grad_norm": 4.575131416320801, | |
| "learning_rate": 2.7723495318078564e-05, | |
| "loss": 0.9702, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.103397190293742, | |
| "grad_norm": 4.862764835357666, | |
| "learning_rate": 2.743849075784412e-05, | |
| "loss": 0.9822, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1442656449553001, | |
| "grad_norm": 4.071940898895264, | |
| "learning_rate": 2.713833181262386e-05, | |
| "loss": 0.9633, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1851340996168582, | |
| "grad_norm": 4.650261878967285, | |
| "learning_rate": 2.6823384179856602e-05, | |
| "loss": 1.0072, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.2260025542784163, | |
| "grad_norm": 4.2909321784973145, | |
| "learning_rate": 2.64940315747189e-05, | |
| "loss": 0.9524, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2260025542784163, | |
| "eval_accuracy": 0.111, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1778, | |
| 222 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0999099909990999, | |
| "eval_humor_english_accuracy": 0.06847935548841894, | |
| "eval_humor_english_avg_loss": 0.3228903253947321, | |
| "eval_humor_english_f1": 0.0640904806786051, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.03423967774420947, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.1529294935451837, | |
| "eval_humor_hindi_avg_loss": 0.1783881702906315, | |
| "eval_humor_hindi_f1": 0.1326442721791559, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.07646474677259185, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.111, | |
| "eval_humor_overall_avg_loss": 0.2508712931905895, | |
| "eval_humor_overall_f1": 0.0999099909990999, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.0555, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.71168053150177, | |
| "eval_perplexity": 2.037412286720291, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.0555, | |
| "eval_runtime": 1359.5963, | |
| "eval_samples_per_second": 1.471, | |
| "eval_steps_per_second": 0.736, | |
| "eval_temporal_english_avg_loss": 0.25916867434431917, | |
| "eval_temporal_hindi_avg_loss": 0.449419908061452, | |
| "eval_temporal_overall_avg_loss": 0.35426122447886077, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2668710089399744, | |
| "grad_norm": 4.282441139221191, | |
| "learning_rate": 2.6150675262627243e-05, | |
| "loss": 0.9654, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.3077394636015325, | |
| "grad_norm": 4.853513240814209, | |
| "learning_rate": 2.5793733570357923e-05, | |
| "loss": 0.9484, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3486079182630908, | |
| "grad_norm": 3.8319053649902344, | |
| "learning_rate": 2.5423641376380324e-05, | |
| "loss": 0.9162, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3894763729246487, | |
| "grad_norm": 4.360899448394775, | |
| "learning_rate": 2.5040849581024466e-05, | |
| "loss": 0.9263, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.430344827586207, | |
| "grad_norm": 3.914219379425049, | |
| "learning_rate": 2.464582455712844e-05, | |
| "loss": 0.8967, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.430344827586207, | |
| "eval_accuracy": 0.0075, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1985, | |
| 15 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.007444168734491315, | |
| "eval_humor_english_accuracy": 0.004028197381671702, | |
| "eval_humor_english_avg_loss": 0.3037518717352607, | |
| "eval_humor_english_f1": 0.004012036108324975, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.002014098690835851, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.010923535253227408, | |
| "eval_humor_hindi_avg_loss": 0.16131232707652512, | |
| "eval_humor_hindi_f1": 0.010805500982318271, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.005461767626613704, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0075, | |
| "eval_humor_overall_avg_loss": 0.2326810370445648, | |
| "eval_humor_overall_f1": 0.007444168734491315, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.00375, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6824359893798828, | |
| "eval_perplexity": 1.9786919166070414, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.00375, | |
| "eval_runtime": 1319.0244, | |
| "eval_samples_per_second": 1.516, | |
| "eval_steps_per_second": 0.758, | |
| "eval_temporal_english_avg_loss": 0.23054835682895794, | |
| "eval_temporal_hindi_avg_loss": 0.4091248675787164, | |
| "eval_temporal_overall_avg_loss": 0.31947562442693717, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.471213282247765, | |
| "grad_norm": 4.441721439361572, | |
| "learning_rate": 2.423904758183493e-05, | |
| "loss": 0.9344, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.5120817369093231, | |
| "grad_norm": 4.262256145477295, | |
| "learning_rate": 2.3821014250229128e-05, | |
| "loss": 0.8854, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5529501915708812, | |
| "grad_norm": 4.789700984954834, | |
| "learning_rate": 2.3392233871532504e-05, | |
| "loss": 0.8741, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5938186462324393, | |
| "grad_norm": 4.243940830230713, | |
| "learning_rate": 2.2953228848587946e-05, | |
| "loss": 0.8728, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.6346871008939976, | |
| "grad_norm": 3.6989076137542725, | |
| "learning_rate": 2.2504534041392377e-05, | |
| "loss": 0.8662, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6346871008939976, | |
| "eval_accuracy": 0.0345, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1931, | |
| 69 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0333494441759304, | |
| "eval_humor_english_accuracy": 0.025176233635448138, | |
| "eval_humor_english_avg_loss": 0.2894040331679491, | |
| "eval_humor_english_f1": 0.02455795677799607, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.012588116817724069, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.04369414101290963, | |
| "eval_humor_hindi_avg_loss": 0.1483633538700107, | |
| "eval_humor_hindi_f1": 0.04186489058039962, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.021847070506454815, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0345, | |
| "eval_humor_overall_avg_loss": 0.21891726321483457, | |
| "eval_humor_overall_f1": 0.0333494441759304, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.01725, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6712037324905396, | |
| "eval_perplexity": 1.9565911477290814, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.01725, | |
| "eval_runtime": 1317.9709, | |
| "eval_samples_per_second": 1.517, | |
| "eval_steps_per_second": 0.759, | |
| "eval_temporal_english_avg_loss": 0.20886689280834833, | |
| "eval_temporal_hindi_avg_loss": 0.37523903380129076, | |
| "eval_temporal_overall_avg_loss": 0.29192309029086067, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6755555555555555, | |
| "grad_norm": 4.29471492767334, | |
| "learning_rate": 2.204669611545222e-05, | |
| "loss": 0.8442, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.7164240102171138, | |
| "grad_norm": 3.6822047233581543, | |
| "learning_rate": 2.1580272875755707e-05, | |
| "loss": 0.8287, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.7572924648786716, | |
| "grad_norm": 3.862697124481201, | |
| "learning_rate": 2.1105832587173418e-05, | |
| "loss": 0.9005, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.79816091954023, | |
| "grad_norm": 4.998415946960449, | |
| "learning_rate": 2.0623953282115073e-05, | |
| "loss": 0.8541, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.839029374201788, | |
| "grad_norm": 4.094598770141602, | |
| "learning_rate": 2.013522205628606e-05, | |
| "loss": 0.8411, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.839029374201788, | |
| "eval_accuracy": 0.0705, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1859, | |
| 141 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0658570761326483, | |
| "eval_humor_english_accuracy": 0.07452165156092648, | |
| "eval_humor_english_avg_loss": 0.27731017850001527, | |
| "eval_humor_english_f1": 0.06935332708528585, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.03726082578046324, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.06653426017874876, | |
| "eval_humor_hindi_avg_loss": 0.1377803079530714, | |
| "eval_humor_hindi_f1": 0.06238361266294227, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.03326713008937438, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0705, | |
| "eval_humor_overall_avg_loss": 0.20768647205886417, | |
| "eval_humor_overall_f1": 0.0658570761326483, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.03525, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6517492532730103, | |
| "eval_perplexity": 1.9188945335622285, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.03525, | |
| "eval_runtime": 1361.0394, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.735, | |
| "eval_temporal_english_avg_loss": 0.19179030200470823, | |
| "eval_temporal_hindi_avg_loss": 0.348848874167915, | |
| "eval_temporal_overall_avg_loss": 0.2703094946072132, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.879897828863346, | |
| "grad_norm": 4.929368495941162, | |
| "learning_rate": 1.964023435340178e-05, | |
| "loss": 0.8146, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.9207662835249042, | |
| "grad_norm": 3.4482617378234863, | |
| "learning_rate": 1.913959323973119e-05, | |
| "loss": 0.8337, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.9616347381864623, | |
| "grad_norm": 4.268481254577637, | |
| "learning_rate": 1.863390866935344e-05, | |
| "loss": 0.8111, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.0024521072796935, | |
| "grad_norm": 3.2703258991241455, | |
| "learning_rate": 1.8123796741022803e-05, | |
| "loss": 0.8167, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.043320561941252, | |
| "grad_norm": 3.365675926208496, | |
| "learning_rate": 1.7609878947547232e-05, | |
| "loss": 0.7072, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.043320561941252, | |
| "eval_accuracy": 0.031, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1938, | |
| 62 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.030067895247332686, | |
| "eval_humor_english_accuracy": 0.02920443101711984, | |
| "eval_humor_english_avg_loss": 0.26709887150925354, | |
| "eval_humor_english_f1": 0.02837573385518591, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.01460221550855992, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.03277060575968222, | |
| "eval_humor_hindi_avg_loss": 0.1286927389570682, | |
| "eval_humor_hindi_f1": 0.03173076923076923, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.01638530287984111, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.031, | |
| "eval_humor_overall_avg_loss": 0.19798794310908208, | |
| "eval_humor_overall_f1": 0.030067895247332686, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.0155, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6472519040107727, | |
| "eval_perplexity": 1.9102839848633415, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.0155, | |
| "eval_runtime": 1320.0922, | |
| "eval_samples_per_second": 1.515, | |
| "eval_steps_per_second": 0.758, | |
| "eval_temporal_english_avg_loss": 0.17779056256882214, | |
| "eval_temporal_hindi_avg_loss": 0.3266544083817946, | |
| "eval_temporal_overall_avg_loss": 0.2523379425825881, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0841890166028096, | |
| "grad_norm": 4.201941013336182, | |
| "learning_rate": 1.709278141859511e-05, | |
| "loss": 0.7132, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.125057471264368, | |
| "grad_norm": 4.3990888595581055, | |
| "learning_rate": 1.6573134157852686e-05, | |
| "loss": 0.6732, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.165925925925926, | |
| "grad_norm": 4.231266021728516, | |
| "learning_rate": 1.6051570275461666e-05, | |
| "loss": 0.6784, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.206794380587484, | |
| "grad_norm": 3.714615821838379, | |
| "learning_rate": 1.552872521667198e-05, | |
| "loss": 0.6785, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.247662835249042, | |
| "grad_norm": 3.3602356910705566, | |
| "learning_rate": 1.5005235987649652e-05, | |
| "loss": 0.6877, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.247662835249042, | |
| "eval_accuracy": 0.005, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1990, | |
| 10 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.004975124378109453, | |
| "eval_humor_english_accuracy": 0.004028197381671702, | |
| "eval_humor_english_avg_loss": 0.2567401882446778, | |
| "eval_humor_english_f1": 0.004012036108324975, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.002014098690835851, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.005958291956305859, | |
| "eval_humor_hindi_avg_loss": 0.12068944840764959, | |
| "eval_humor_hindi_f1": 0.005923000987166831, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.0029791459781529296, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.005, | |
| "eval_humor_overall_avg_loss": 0.18884940875979583, | |
| "eval_humor_overall_f1": 0.004975124378109453, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.0025, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6365848183631897, | |
| "eval_perplexity": 1.8900151414329684, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.0025, | |
| "eval_runtime": 1323.2498, | |
| "eval_samples_per_second": 1.511, | |
| "eval_steps_per_second": 0.756, | |
| "eval_temporal_english_avg_loss": 0.16544690548644253, | |
| "eval_temporal_hindi_avg_loss": 0.3068489971076771, | |
| "eval_temporal_overall_avg_loss": 0.23624011447018664, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2885312899106003, | |
| "grad_norm": 4.01371431350708, | |
| "learning_rate": 1.4481740379382916e-05, | |
| "loss": 0.688, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.329399744572158, | |
| "grad_norm": 4.780395984649658, | |
| "learning_rate": 1.3958876190632131e-05, | |
| "loss": 0.6912, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.3702681992337165, | |
| "grad_norm": 4.804982662200928, | |
| "learning_rate": 1.3437280450870225e-05, | |
| "loss": 0.6524, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.4111366538952748, | |
| "grad_norm": 4.125489711761475, | |
| "learning_rate": 1.2917588644160447e-05, | |
| "loss": 0.659, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.4520051085568326, | |
| "grad_norm": 3.899745225906372, | |
| "learning_rate": 1.2400433934916879e-05, | |
| "loss": 0.6537, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.4520051085568326, | |
| "eval_accuracy": 0.044, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1912, | |
| 88 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0421455938697318, | |
| "eval_humor_english_accuracy": 0.05236656596173213, | |
| "eval_humor_english_avg_loss": 0.24773680959614575, | |
| "eval_humor_english_f1": 0.049760765550239235, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.026183282980866064, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.03574975173783516, | |
| "eval_humor_hindi_avg_loss": 0.1137336753887836, | |
| "eval_humor_hindi_f1": 0.03451581975071908, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.01787487586891758, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.044, | |
| "eval_humor_overall_avg_loss": 0.18087796053705443, | |
| "eval_humor_overall_f1": 0.0421455938697318, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.022, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6272982358932495, | |
| "eval_perplexity": 1.8725445511265306, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.022, | |
| "eval_runtime": 1361.0844, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.735, | |
| "eval_temporal_english_avg_loss": 0.15524752458450397, | |
| "eval_temporal_hindi_avg_loss": 0.29028965216244074, | |
| "eval_temporal_overall_avg_loss": 0.22285711327104782, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.492873563218391, | |
| "grad_norm": 3.0799901485443115, | |
| "learning_rate": 1.1886446396491155e-05, | |
| "loss": 0.6469, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.533742017879949, | |
| "grad_norm": 4.32665491104126, | |
| "learning_rate": 1.1376252243525146e-05, | |
| "loss": 0.6748, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.574610472541507, | |
| "grad_norm": 4.455280780792236, | |
| "learning_rate": 1.0870473069004852e-05, | |
| "loss": 0.654, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.615478927203065, | |
| "grad_norm": 3.6130049228668213, | |
| "learning_rate": 1.0369725086945106e-05, | |
| "loss": 0.6495, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.6563473818646233, | |
| "grad_norm": 4.499478816986084, | |
| "learning_rate": 9.874618381627751e-06, | |
| "loss": 0.6443, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.6563473818646233, | |
| "eval_accuracy": 0.117, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1766, | |
| 234 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.10474485228290063, | |
| "eval_humor_english_accuracy": 0.0986908358509567, | |
| "eval_humor_english_avg_loss": 0.24009977730751, | |
| "eval_humor_english_f1": 0.08982584784601283, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.04934541792547835, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.13505461767626614, | |
| "eval_humor_hindi_avg_loss": 0.10769947945037281, | |
| "eval_humor_hindi_f1": 0.1189851268591426, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.06752730883813307, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.117, | |
| "eval_humor_overall_avg_loss": 0.17395776182075373, | |
| "eval_humor_overall_f1": 0.10474485228290063, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.0585, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6204598546028137, | |
| "eval_perplexity": 1.8597831080769023, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.0585, | |
| "eval_runtime": 1361.8863, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.734, | |
| "eval_temporal_english_avg_loss": 0.1464234281813041, | |
| "eval_temporal_hindi_avg_loss": 0.27630730096865136, | |
| "eval_temporal_overall_avg_loss": 0.21141392534496303, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.6972158365261816, | |
| "grad_norm": 3.4488909244537354, | |
| "learning_rate": 9.385756164307868e-06, | |
| "loss": 0.6831, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.7380842911877394, | |
| "grad_norm": 4.000279903411865, | |
| "learning_rate": 8.903734038293804e-06, | |
| "loss": 0.6437, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.7789527458492973, | |
| "grad_norm": 4.4706220626831055, | |
| "learning_rate": 8.429139273296304e-06, | |
| "loss": 0.6284, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.8198212005108556, | |
| "grad_norm": 3.7017948627471924, | |
| "learning_rate": 7.96255008993078e-06, | |
| "loss": 0.6357, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.860689655172414, | |
| "grad_norm": 4.603327751159668, | |
| "learning_rate": 7.504534955244629e-06, | |
| "loss": 0.6207, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.860689655172414, | |
| "eval_accuracy": 0.0635, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1873, | |
| 127 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.059708509637987774, | |
| "eval_humor_english_accuracy": 0.05639476334340383, | |
| "eval_humor_english_avg_loss": 0.2334255897185822, | |
| "eval_humor_english_f1": 0.05338417540514776, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.028197381671701913, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.070506454816286, | |
| "eval_humor_hindi_avg_loss": 0.10243915956694087, | |
| "eval_humor_hindi_f1": 0.06586270871985157, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.035253227408143, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0635, | |
| "eval_humor_overall_avg_loss": 0.1679845057619467, | |
| "eval_humor_overall_f1": 0.059708509637987774, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.03175, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6139324903488159, | |
| "eval_perplexity": 1.847683260757382, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.03175, | |
| "eval_runtime": 1315.241, | |
| "eval_samples_per_second": 1.521, | |
| "eval_steps_per_second": 0.76, | |
| "eval_temporal_english_avg_loss": 0.138804246816629, | |
| "eval_temporal_hindi_avg_loss": 0.2640184406992659, | |
| "eval_temporal_overall_avg_loss": 0.20143722001935127, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.9015581098339718, | |
| "grad_norm": 3.4093639850616455, | |
| "learning_rate": 7.055651890127625e-06, | |
| "loss": 0.631, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.94242656449553, | |
| "grad_norm": 3.2841861248016357, | |
| "learning_rate": 6.616447789449488e-06, | |
| "loss": 0.6271, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.9832950191570884, | |
| "grad_norm": 2.856577157974243, | |
| "learning_rate": 6.187457755752684e-06, | |
| "loss": 0.6457, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.024112388250319, | |
| "grad_norm": 4.275922775268555, | |
| "learning_rate": 5.7692044473124276e-06, | |
| "loss": 0.5875, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.0649808429118774, | |
| "grad_norm": 4.011717319488525, | |
| "learning_rate": 5.362197441358068e-06, | |
| "loss": 0.5295, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.0649808429118774, | |
| "eval_accuracy": 0.1215, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1757, | |
| 243 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.10833704859563086, | |
| "eval_humor_english_accuracy": 0.12185297079556898, | |
| "eval_humor_english_avg_loss": 0.22710613908185748, | |
| "eval_humor_english_f1": 0.10861759425493717, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.06092648539778449, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.1211519364448858, | |
| "eval_humor_hindi_avg_loss": 0.09781679747076281, | |
| "eval_humor_hindi_f1": 0.10806023029229407, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.0605759682224429, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.1215, | |
| "eval_humor_overall_avg_loss": 0.16250835471025796, | |
| "eval_humor_overall_f1": 0.10833704859563086, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.06075, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6209594011306763, | |
| "eval_perplexity": 1.8607123988418854, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.06075, | |
| "eval_runtime": 1319.9889, | |
| "eval_samples_per_second": 1.515, | |
| "eval_steps_per_second": 0.758, | |
| "eval_temporal_english_avg_loss": 0.13217057422883527, | |
| "eval_temporal_hindi_avg_loss": 0.25253982628289234, | |
| "eval_temporal_overall_avg_loss": 0.19244430216349742, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.1058492975734353, | |
| "grad_norm": 3.4078967571258545, | |
| "learning_rate": 4.966932613231739e-06, | |
| "loss": 0.5529, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.1467177522349936, | |
| "grad_norm": 3.757827043533325, | |
| "learning_rate": 4.583891532240636e-06, | |
| "loss": 0.5268, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.187586206896552, | |
| "grad_norm": 3.6647462844848633, | |
| "learning_rate": 4.213540874938976e-06, | |
| "loss": 0.5249, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.22845466155811, | |
| "grad_norm": 3.4517507553100586, | |
| "learning_rate": 3.856331856554558e-06, | |
| "loss": 0.5423, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.269323116219668, | |
| "grad_norm": 3.9157795906066895, | |
| "learning_rate": 3.5126996812524377e-06, | |
| "loss": 0.5311, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.269323116219668, | |
| "eval_accuracy": 0.1045, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1791, | |
| 209 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.0946129470348574, | |
| "eval_humor_english_accuracy": 0.10372608257804633, | |
| "eval_humor_english_avg_loss": 0.2202950545789041, | |
| "eval_humor_english_f1": 0.09397810218978102, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.051863041289023165, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.10526315789473684, | |
| "eval_humor_hindi_avg_loss": 0.09350429856929683, | |
| "eval_humor_hindi_f1": 0.09523809523809523, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.05263157894736842, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.1045, | |
| "eval_humor_overall_avg_loss": 0.15700095058811175, | |
| "eval_humor_overall_f1": 0.0946129470348574, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.05225, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.621131181716919, | |
| "eval_perplexity": 1.861031944826067, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.05225, | |
| "eval_runtime": 1318.5114, | |
| "eval_samples_per_second": 1.517, | |
| "eval_steps_per_second": 0.758, | |
| "eval_temporal_english_avg_loss": 0.1261224628154746, | |
| "eval_temporal_hindi_avg_loss": 0.2422587038177751, | |
| "eval_temporal_overall_avg_loss": 0.18421073652827494, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.310191570881226, | |
| "grad_norm": 3.881890058517456, | |
| "learning_rate": 3.183063011905744e-06, | |
| "loss": 0.5225, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 3.3510600255427843, | |
| "grad_norm": 4.492738723754883, | |
| "learning_rate": 2.8678234600194414e-06, | |
| "loss": 0.5358, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 3.391928480204342, | |
| "grad_norm": 3.7062699794769287, | |
| "learning_rate": 2.5673650964285718e-06, | |
| "loss": 0.531, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 3.4327969348659004, | |
| "grad_norm": 4.436982154846191, | |
| "learning_rate": 2.2820539833671085e-06, | |
| "loss": 0.5394, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 3.4736653895274587, | |
| "grad_norm": 3.07208251953125, | |
| "learning_rate": 2.0122377284775136e-06, | |
| "loss": 0.5136, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.4736653895274587, | |
| "eval_accuracy": 0.097, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1806, | |
| 194 | |
| ] | |
| ], | |
| "eval_f1_macro": 0.08842297174111212, | |
| "eval_humor_english_accuracy": 0.09667673716012085, | |
| "eval_humor_english_avg_loss": 0.21453529215166584, | |
| "eval_humor_english_f1": 0.0881542699724518, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.04833836858006042, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.09731876861966236, | |
| "eval_humor_hindi_avg_loss": 0.08955117555728288, | |
| "eval_humor_hindi_f1": 0.08868778280542987, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.04865938430983118, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.097, | |
| "eval_humor_overall_avg_loss": 0.15204923117292898, | |
| "eval_humor_overall_f1": 0.08842297174111212, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.0485, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.618234395980835, | |
| "eval_perplexity": 1.8556488288019473, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.0485, | |
| "eval_runtime": 1365.0675, | |
| "eval_samples_per_second": 1.465, | |
| "eval_steps_per_second": 0.733, | |
| "eval_temporal_english_avg_loss": 0.12083532439477838, | |
| "eval_temporal_hindi_avg_loss": 0.23295580905530663, | |
| "eval_temporal_overall_avg_loss": 0.17692150853084088, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.5145338441890166, | |
| "grad_norm": 4.052462577819824, | |
| "learning_rate": 2.1103441209615935e-05, | |
| "loss": 0.5403, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.5554022988505745, | |
| "grad_norm": 4.443219184875488, | |
| "learning_rate": 2.0863376714130244e-05, | |
| "loss": 0.5796, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.5962707535121328, | |
| "grad_norm": 4.265786170959473, | |
| "learning_rate": 2.0621526177297613e-05, | |
| "loss": 0.5538, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.637139208173691, | |
| "grad_norm": 3.9799318313598633, | |
| "learning_rate": 2.037796326913444e-05, | |
| "loss": 0.5683, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 3.678007662835249, | |
| "grad_norm": 5.084273815155029, | |
| "learning_rate": 2.0132762181261985e-05, | |
| "loss": 0.5716, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.678007662835249, | |
| "eval_accuracy": 0.0995, | |
| "eval_f1_macro": 0.0904956798544793, | |
| "eval_humor_english_accuracy": 0.05740181268882175, | |
| "eval_humor_english_avg_loss": 0.117691616244732, | |
| "eval_humor_english_f1": 0.054285714285714284, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.028700906344410877, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.141012909632572, | |
| "eval_humor_hindi_avg_loss": 0.031129881199932288, | |
| "eval_humor_hindi_f1": 0.12358572671888599, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.070506454816286, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0995, | |
| "eval_humor_overall_avg_loss": 0.07511869748413463, | |
| "eval_humor_overall_f1": 0.0904956798544793, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.04975, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6332020163536072, | |
| "eval_perplexity": 1.8836323104356882, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.04975, | |
| "eval_runtime": 1319.1571, | |
| "eval_samples_per_second": 1.516, | |
| "eval_steps_per_second": 0.758, | |
| "eval_temporal_english_avg_loss": 0.03940154156217748, | |
| "eval_temporal_hindi_avg_loss": 0.09286631036147072, | |
| "eval_temporal_overall_avg_loss": 0.0664199314402633, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.7188761174968072, | |
| "grad_norm": 4.418786525726318, | |
| "learning_rate": 1.9885997604306933e-05, | |
| "loss": 0.5554, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 3.759744572158365, | |
| "grad_norm": 3.633528470993042, | |
| "learning_rate": 1.9637744705149887e-05, | |
| "loss": 0.5581, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 3.8006130268199234, | |
| "grad_norm": 4.484210014343262, | |
| "learning_rate": 1.938807910402881e-05, | |
| "loss": 0.5744, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 3.8414814814814813, | |
| "grad_norm": 3.976949453353882, | |
| "learning_rate": 1.913707685150437e-05, | |
| "loss": 0.5831, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.8823499361430396, | |
| "grad_norm": 4.015331745147705, | |
| "learning_rate": 1.88848144052942e-05, | |
| "loss": 0.5658, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.8823499361430396, | |
| "eval_accuracy": 0.0505, | |
| "eval_f1_macro": 0.04807234650166587, | |
| "eval_humor_english_accuracy": 0.04934541792547835, | |
| "eval_humor_english_avg_loss": 0.11883179473594759, | |
| "eval_humor_english_f1": 0.04702495201535509, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.024672708962739175, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.05163853028798411, | |
| "eval_humor_hindi_avg_loss": 0.030766832375286615, | |
| "eval_humor_hindi_f1": 0.049102927289896126, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.025819265143992055, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.0505, | |
| "eval_humor_overall_avg_loss": 0.07535547554798036, | |
| "eval_humor_overall_f1": 0.04807234650166587, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.02525, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6302640438079834, | |
| "eval_perplexity": 1.8781064395523288, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.02525, | |
| "eval_runtime": 1320.57, | |
| "eval_samples_per_second": 1.514, | |
| "eval_steps_per_second": 0.757, | |
| "eval_temporal_english_avg_loss": 0.03935090346366541, | |
| "eval_temporal_hindi_avg_loss": 0.0936906353848563, | |
| "eval_temporal_overall_avg_loss": 0.06659941324144447, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.923218390804598, | |
| "grad_norm": 3.6775312423706055, | |
| "learning_rate": 1.8631368606983134e-05, | |
| "loss": 0.5588, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.9640868454661558, | |
| "grad_norm": 3.3025190830230713, | |
| "learning_rate": 1.8376816658616527e-05, | |
| "loss": 0.5638, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.005312899106003, | |
| "grad_norm": 5.129138469696045, | |
| "learning_rate": 1.8121236099183743e-05, | |
| "loss": 0.5504, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.046181353767561, | |
| "grad_norm": 4.347407817840576, | |
| "learning_rate": 1.7864704780999054e-05, | |
| "loss": 0.5361, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 4.087049808429119, | |
| "grad_norm": 5.224924564361572, | |
| "learning_rate": 1.7607300845987055e-05, | |
| "loss": 0.5143, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.087049808429119, | |
| "eval_accuracy": 0.132, | |
| "eval_f1_macro": 0.1166077738515901, | |
| "eval_humor_english_accuracy": 0.1379657603222558, | |
| "eval_humor_english_avg_loss": 0.117569191649303, | |
| "eval_humor_english_f1": 0.12123893805309735, | |
| "eval_humor_english_precision": 0.5, | |
| "eval_humor_english_recall": 0.0689828801611279, | |
| "eval_humor_english_samples": 993, | |
| "eval_humor_hindi_accuracy": 0.12611717974180736, | |
| "eval_humor_hindi_avg_loss": 0.030592078067631787, | |
| "eval_humor_hindi_f1": 0.11199294532627865, | |
| "eval_humor_hindi_precision": 0.5, | |
| "eval_humor_hindi_recall": 0.06305858987090368, | |
| "eval_humor_hindi_samples": 1007, | |
| "eval_humor_overall_accuracy": 0.132, | |
| "eval_humor_overall_avg_loss": 0.07420280479598583, | |
| "eval_humor_overall_f1": 0.1166077738515901, | |
| "eval_humor_overall_precision": 0.5, | |
| "eval_humor_overall_recall": 0.066, | |
| "eval_humor_overall_samples": 2000, | |
| "eval_loss": 0.6409682035446167, | |
| "eval_perplexity": 1.8983179897511162, | |
| "eval_precision_macro": 0.5, | |
| "eval_recall_macro": 0.066, | |
| "eval_runtime": 1308.5558, | |
| "eval_samples_per_second": 1.528, | |
| "eval_steps_per_second": 0.764, | |
| "eval_temporal_english_avg_loss": 0.039093032986785355, | |
| "eval_temporal_hindi_avg_loss": 0.09288990841599172, | |
| "eval_temporal_overall_avg_loss": 0.0659479490747604, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.2097341741727744e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |