This video shows how to install ScrapeGraphAI which is a web scraping python library that uses LLM and direct graph logic to create scraping pipelines for websites, documents and XML files.
Code Used:
conda create -n scrapeai python=3.11
conda activate scrapeai
pip install scrapegraphai==0.9.0b7 --upgrade
apt install chromium-chromedriver
pip install nest_asyncio
pip install playwright
playwright install-deps
playwright install
ollama run mistral
ollama run nomic-embed-text
python3
import nest_asyncio
nest_asyncio.apply()
from scrapegraphai.graphs import SmartScraperGraph
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
"base_url": "http://localhost:11434", # set Ollama URL
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"base_url": "http://localhost:11434", # set Ollama URL
}
}
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the articles",
source="https://fahdmirza.com",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
No comments:
Post a Comment