Friday, August 23, 2024

Easy Tutorial to Build Full Free RAG Pipeline from Scratch with Your Own Data

 This video shows how to install Haystack with Ollama locally for free end-to-end RAG pipeline with your own documents.



Code:


conda create -n hay python=3.11 -y && conda activate hay



pip install torch

pip install haystack-ai==2.2.4

pip install haystack-experimental==0.1.0

pip install sentence-transformers==3.0.1

pip install transformers==4.42.3

pip install ollama-haystack





conda install jupyter -y

pip uninstall charset_normalizer -y

pip install charset_normalizer

jupyter notebook



import transformers

import torch



from haystack_integrations.components.generators.ollama import OllamaGenerator



generator = OllamaGenerator(model="llama3.1",

                            url = "http://localhost:11434/api/generate",

                            generation_kwargs={

                              "num_predict": 100,

                              "temperature": 0.9,

                              })



print(generator.run("Who is the best American actor?"))



========



from haystack_integrations.components.generators.ollama import OllamaGenerator



from haystack import Pipeline, Document

from haystack.components.retrievers.in_memory import InMemoryBM25Retriever

from haystack.components.builders.prompt_builder import PromptBuilder

from haystack.document_stores.in_memory import InMemoryDocumentStore



template = """

Given the following information, answer the question.



Context:

{% for document in documents %}

    {{ document.content }}

{% endfor %}



Question: {{ query }}?

"""



docstore = InMemoryDocumentStore()

docstore.write_documents([Document(content="I really like summer"),

                          Document(content="My favorite sport is soccer"),

                          Document(content="I don't like reading sci-fi books"),

                          Document(content="I don't like crowded places"),])



generator = OllamaGenerator(model="llama3.1",

                            url = "http://localhost:11434/api/generate",

                            generation_kwargs={

                              "num_predict": 100,

                              "temperature": 0.9,

                              })



pipe = Pipeline()

pipe.add_component("retriever", InMemoryBM25Retriever(document_store=docstore))

pipe.add_component("prompt_builder", PromptBuilder(template=template))

pipe.add_component("llm", generator)

pipe.connect("retriever", "prompt_builder.documents")

pipe.connect("prompt_builder", "llm")



result = pipe.run({"prompt_builder": {"query": query},"retriever": {"query": query}})



print(result)


No comments: