DebasishDhal99 commited on
Commit
76b6ccd
·
verified ·
1 Parent(s): 71549ea

Fix inference code in readme.

Browse files
Files changed (1) hide show
  1. README.md +26 -37
README.md CHANGED
@@ -34,55 +34,46 @@ This repo contains a low-rank adapter for LLaMA-7b fit on the Stanford Alpaca da
34
 
35
  Model can be easily loaded with AutoModelForCausalLM.
36
  ``` python
37
-
38
  import torch
39
  from peft import PeftModel
40
  import transformers
41
-
42
- assert (
43
- "LlamaTokenizer" in transformers._import_structure["models.llama"]
44
- ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
45
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
46
 
47
- tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
 
48
 
49
- BASE_MODEL = "decapoda-research/llama-7b-hf"
50
- LORA_WEIGHTS = "OdiaGenAI/odiagenAI-model-v0"
51
 
 
 
 
 
 
 
52
 
53
- model = LlamaForCausalLM.from_pretrained(
54
- BASE_MODEL,
55
- load_in_8bit=False,
56
- torch_dtype=torch.float16,
57
- device_map="auto",
58
- )
59
- model = PeftModel.from_pretrained(
60
- model, LORA_WEIGHTS, torch_dtype=torch.float16, force_download=True
61
- )
62
 
63
- def generate_prompt(instruction, input=None):
64
- if input:
65
- return f"""ନିମ୍ନରେ ଏକ ନିର୍ଦ୍ଦେଶନମା ଯାହାକି ଏକ କା୍ଯ୍ୟକୁର୍ଣ୍ଣନା କରେ, ଏକ ଇନପୁଟ୍ ସହିତ ଯୋଡି ଯାହା ପବର୍ତ୍ତୀ ପ୍ରସଙ୍ଗ ପ୍ରଦାନ କର| ଏ ପ୍ରତିକ୍ରିୟା ଲେଖନ୍ତ ଯାା ଅୁରୋଧକୁ ସଠିକ ଭାବରେ ସମାପ୍ କରେ |
66
- ### ନିର୍ଦ୍ଦେଶ:
67
- {instruction}
68
- ### ଇନପୁଟ୍:
69
- {input}
70
- ### ପ୍ରତିକ୍ରିୟା:"""
71
- else:
72
- return f"""ନିମ୍ନରେ ଏକ ନିର୍ଦ୍ଦେଶ ଯାହାକି ଏକ କାର୍ଯ୍ୟକୁ ବର୍ଣ୍ଣନା କରେ | ଏକ ପ୍ରତିକ୍ରିୟା ଲେଖନ୍ତୁ ଯାହା ଅନୁରୋଧକୁ ସଠିକ୍ ଭାବରେ ସମାପ୍ତ କରେ |
73
- ### ନିର୍ଦ୍ଦେଶ:
74
- {instruction}
75
- ### ପ୍ରତିକ୍ରିୟା:"""
76
-
77
- prompt = generate_prompt(instruction, input)
78
- inputs = tokenizer(prompt, return_tensors="pt")
79
  input_ids = inputs["input_ids"].to(device)
80
  generation_config = GenerationConfig(
81
  temperature=0.1,
82
  top_p=0.75,
83
  top_k=40,
84
  num_beams=4,
85
- **kwargs,
86
  )
87
  with torch.no_grad():
88
  generation_output = model.generate(
@@ -94,9 +85,7 @@ with torch.no_grad():
94
  )
95
  s = generation_output.sequences[0]
96
  output = tokenizer.decode(s)
97
- print(output.split("### Response:")[1].strip())
98
-
99
-
100
  ```
101
 
102
 
 
34
 
35
  Model can be easily loaded with AutoModelForCausalLM.
36
  ``` python
 
37
  import torch
38
  from peft import PeftModel
39
  import transformers
40
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
41
+ from peft import PeftModel, PeftConfig
 
 
42
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
43
 
44
+ base_model_path = "meta-llama/Llama-2-7b-hf"
45
+ adapter_path = "OdiaGenAI/odiagenAI-model-v0"
46
 
47
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
48
+ tokenizer.pad_token = tokenizer.eos_token
49
 
50
+ bnb_config = BitsAndBytesConfig(
51
+ load_in_4bit=True,
52
+ bnb_4bit_quant_type="nf4",
53
+ bnb_4bit_use_double_quant=True,
54
+ bnb_4bit_compute_dtype=torch.float16,
55
+ )
56
 
57
+ base_model = AutoModelForCausalLM.from_pretrained(
58
+ base_model_path,
59
+ quantization_config=bnb_config,
60
+ device_map="auto",
61
+ trust_remote_code=True
62
+ )
 
 
 
63
 
64
+ model = PeftModel.from_pretrained(base_model, adapter_path)
65
+
66
+ instruction = "ଭାର ବିଷୟରେ କିି ୁହନ୍ତୁ"
67
+
68
+ device = "cuda" if torch.cuda.is_available() else "cpu"
69
+
70
+ inputs = tokenizer(instruction, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
71
  input_ids = inputs["input_ids"].to(device)
72
  generation_config = GenerationConfig(
73
  temperature=0.1,
74
  top_p=0.75,
75
  top_k=40,
76
  num_beams=4,
 
77
  )
78
  with torch.no_grad():
79
  generation_output = model.generate(
 
85
  )
86
  s = generation_output.sequences[0]
87
  output = tokenizer.decode(s)
88
+ print(output)
 
 
89
  ```
90
 
91