This is an installation tutorial of Falcon-180B model locally on Linux or Windows with all the steps.
Commands Used:
pip3 install transformers>=4.33.0 optimum>=1.12.0
!git clone https://github.com/PanQiWei/AutoGPTQ
cd AutoGPTQ
! git checkout a7167b1
!pip3 install .
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_name_or_path = "TheBloke/Falcon-180B-Chat-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-3bit--1g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
device_map="auto",
revision="main")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
prompt = "What is capital of Australia"
prompt_template=f'''User: {prompt}
Assistant: '''
print("\n\n*** Generate:")
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, do_sample=True, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))
# Inference can also be done using transformers' pipeline
print("*** Pipeline:")
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
top_p=0.95,
repetition_penalty=1.15
)
print(pipe(prompt_template)[0]['generated_text'])
No comments:
Post a Comment