Tuesday, July 30, 2024

Get Llama 3.1 70B-Level AI Quality from 8B with Ollama Locally for Free

 This video is a step-by-step easy tutorial to get quality of Llama 3.1 70B from Llama 3.1 8B with Ollama locally. It's inspired by Matt Shumer GPT Prompt Engineer.


Code:

import os
import re
import json
import sys

from ollama import Client
client = Client(host='http://localhost:11434')

# Define model names
small_model = "llama3.1"
big_model = "llama3.1:70b"

def generate_candidate_prompts(task, prompt_example, response_example):
    system_prompt = """Given an example training sample, create seven additional samples for the same task that are even better.
    Each example should contain:
    1. Ensure the new examples are diverse and unique from one another.
    2. They should all be perfect. If you make a mistake, this system won't work.

    Respond in this format:
    PUT_PROMPT_HERE
    PUT_RESPONSE_HERE

    PUT_PROMPT_HERE
    PUT_RESPONSE_HERE
    ...
    """
    user_content = f"""{task}
    {prompt_example}
    {response_example}
    """

    response = client.chat(
        model=big_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_content}
        ],
        options={
            "max_tokens": 4000,
            "temperature": 0.5
        }
    )
    response_text = response['message']['content']

    # Parse out the prompts and responses
    prompts_and_responses = []
    # Split examples by the delimiter
    examples = response_text.split('PUT_PROMPT_HERE')[1:]

    for example in examples:
        parts = example.split('PUT_RESPONSE_HERE')
        if len(parts) == 2:
            prompt, response = parts
            prompts_and_responses.append({'prompt': prompt.strip(), 'response': response.strip()})

    return prompts_and_responses

def generate_system_prompt(task, prompt_examples):
    system_prompt = """Given a user-description of their task and a set of prompt / response pairs (it'll be in JSON for easy reading)
                    for the types of outputs we want to generate given inputs, write a fantastic system prompt that describes
                    the task to be done perfectly.
                    1. Do this perfectly.
                    2. Respond only with the system prompt, and nothing else. No other text will be allowed.
                    Respond in this format:
                    WRITE_SYSTEM_PROMPT_HERE
                    """
    user_content = f"""{task}
    {json.dumps(prompt_examples, indent=2)}
    """

    response = client.chat(
        model=big_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_content}
        ],
        options={
            "max_tokens": 4000,
            "temperature": 0.5
        }
    )

    response_text = response['message']['content']

    # Directly use the response text since the prompt specifies it should be the only content
    generated_system_prompt = response_text.strip()

    return generated_system_prompt

def test_small_model(generated_examples, prompt_example, system_prompt):
    messages = [{"role": "system", "content": system_prompt}]

    for example in generated_examples:
        messages.append({"role": "user", "content": example['prompt']})
        messages.append({"role": "assistant", "content": example['response']})

    messages.append({"role": "user", "content": prompt_example.strip()})

    response = client.chat(
        model=small_model,
        messages=messages,
        options={
            "max_tokens": 2000,
            "temperature": 0.5
        }
    )

    response_text = response['message']['content']

    return response_text

def run_conversion_process(task, prompt_example, response_example):
    print('Generating the prompts / responses...')
    # Generate candidate prompts
    generated_examples = generate_candidate_prompts(task, prompt_example, response_example)

    print('Prompts / responses generated. Now generating system prompt...')

    # Generate the system prompt
    system_prompt = generate_system_prompt(task, generated_examples)

    print('System prompt generated:', system_prompt)

    print(f'\n\nTesting the new prompt on {small_model}, using your input example...')
    # Test the generated examples and system prompt with the small model
    small_model_response = test_small_model(generated_examples, prompt_example, system_prompt)

    print(f'{small_model} responded with:')
    print(small_model_response)

    print('\n\n!! CHECK THE FILE DIRECTORY, THE PROMPT IS NOW SAVED THERE !!')

    # Create a dictionary with all the relevant information
    result = {
        "task": task,
        "initial_prompt_example": prompt_example,
        "initial_response_example": response_example,
        "generated_examples": generated_examples,
        "system_prompt": system_prompt,
        "small_model_response": small_model_response
    }

    # Save the small model prompt to a Python file
    with open("small_model_prompt.py", "w") as file:
        file.write('system_prompt = """' + system_prompt + '"""\n\n')

        file.write('messages = [\n')
        for example in generated_examples:
            file.write('    {"role": "user", "content": """' + example['prompt'] + '"""},\n')
            file.write('    {"role": "assistant", "content": """' + example['response'] + '"""},\n')

        file.write('    {"role": "user", "content": """' + prompt_example.strip() + '"""}\n')
        file.write(']\n')

    return result

task = "refactoring code"

prompt_example = """def hello():
                    total = 0
                    total = total + 1
                    return total"""

response_example = """def hello():
                   total = 1
                   return total
                 """

result = run_conversion_process(task, prompt_example, response_example)
print(result)

No comments: