Tuesday, December 3, 2024

Install HunyuanVideo Model Locally for Text to Video Generation

This video shows how to install HunyuanVideo AI model for text to video long generation locally.


Code:

git clone https://github.com/tencent/HunyuanVideo && cd HunyuanVideo

conda env create -f environment.yml

conda activate HunyuanVideo

conda install gcc_linux-64 gxx_linux-64 -y
conda install cuda -c nvidia -y

python -m pip install -r requirements.txt

pip install packaging
pip uninstall -y ninja && pip install ninja

python -m pip install git+https://github.com/Dao-AILab/flash-attention.git@v2.5.9.post1

huggingface-cli login  #get Read token from huggingface.co

huggingface-cli download tencent/HunyuanVideo --local-dir ./ckpts

cd HunyuanVideo/ckpts

huggingface-cli download xtuner/llava-llama-3-8b-v1_1-transformers --local-dir ./llava-llama-3-8b-v1_1-transformers

cd ..

python hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py --input_dir ckpts/llava-llama-3-8b-v1_1-transformers --output_dir ckpts/text_encoder

cd HunyuanVideo/ckpts
huggingface-cli download openai/clip-vit-large-patch14 --local-dir ./text_encoder_2

cd HunyuanVideo

python3 sample_video.py \
    --video-size 720 1280 \
    --video-length 129 \
    --infer-steps 30 \
    --prompt "a cat is running, realistic." \
    --flow-reverse \
    --seed 0 \
    --use-cpu-offload \
    --save-path ./results



No comments: