#this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment import warnings warnings.filterwarnings("ignore") from accelerate import FullyShardedDataParallelPlugin, Accelerator from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig from datasets import load_dataset import torch import transformers from datetime import datetime from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model fsdp_plugin = FullyShardedDataParallelPlugin( state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False), optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False), ) #made to distribute the weights across multi gpu env accelerator = Accelerator(fsdp_plugin=fsdp_plugin) ## Loading the dataset def Profiler_load_dataset(data_files , field = 'train'): return load_dataset('json' , data_files = data_files , field= field) ## high ram used here train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train') eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test') ### What is the use of formatting function ? ## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario ) def format_fun(example): text = f" The ICD10 code for {example['Input']} is , {example['Output']} " return text # base_model_id = "mistralai/Mixtral-8x7B-v0.1" #try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !! base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda") ## The model got loaded and works !! tokenizer = AutoTokenizer.from_pretrained( base_model_id, padding_side="left", add_eos_token=True, add_bos_token=True, ) tokenizer.pad_token = tokenizer.eos_token max_length = 50 #max number of word generation def generate_and_tokenize_prompt(prompt): result = tokenizer( format_fun(prompt), truncation=True, max_length=max_length, padding="max_length", ) result["labels"] = result["input_ids"].copy() #what this do ?? return result tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt) tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt) #Fine tuning the model model.gradient_checkpointing_enable() model = prepare_model_for_kbit_training(model) config = LoraConfig( r=32, lora_alpha=64, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "w1", "w2", "w3", "lm_head", ], bias="none", lora_dropout=0.05, # Conventional task_type="CAUSAL_LM", ) model = get_peft_model(model, config) if torch.cuda.device_count() > 1: # If more than 1 GPU model.is_parallelizable = True model.model_parallel = True project = "icd-finetune" base_model_name = "mixtral" run_name = base_model_name + "-" + project output_dir = "./" + run_name trainer = transformers.Trainer( model=model, train_dataset=tokenized_train_dataset, eval_dataset=tokenized_val_dataset, args=transformers.TrainingArguments( output_dir=output_dir, warmup_steps=1, per_device_train_batch_size=2, gradient_accumulation_steps=1, gradient_checkpointing=True, max_steps=300, learning_rate=2.5e-5, # Want a small lr for finetuning fp16=True, optim="paged_adamw_8bit", logging_steps=25, # When to start reporting loss logging_dir="./logs", # Directory for storing logs save_strategy="steps", # Save the model checkpoint every logging step save_steps=25, # Save checkpoints every 50 steps evaluation_strategy="steps", # Evaluate the model every logging step eval_steps=25, # Evaluate and save checkpoints every 50 steps do_eval=True, # Perform evaluation at the end of training ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train() # Implement RAG on the fine tuned model # final model prepared ''' 1) Make sure the model runs on multi gpu script ! 2) The dataset is loaded 3) The langchain implementation to oversee the prompt generation guide 4) Also try the bert models rather than directly using the mixtral model () 5) Once the model is trained copy the checkpoint folder and paste in a local env '''