Monday, December 2, 2024

Embedding Model in Oracle Database 23ai - Step-by-step Hands-on Tutorial


This is a step-by-step hands-on tutorial to use Oracle AI Vector Search on unstructured data combined with relational search on business data.



conda create -n ai python=3.11 -y && conda activate ai

sudo chmod 666 /var/run/docker.sock

docker pull container-registry.oracle.com/database/free:latest

docker run -d --name oracle-db \
  -p 1521:1521 \
  --dns 8.8.8.8 \
  -e ORACLE_PWD="YourStrongPassword" \
  container-registry.oracle.com/database/free:latest

docker ps -a

mkdir mymodel && cd mymodel

wget https://adwc4pm.objectstorage.us-ashburn-1.oci.customer-oci.com/p/VBRD9P8ZFWkKvnfhrWxkpPe8K03-JIoM5h_8EJyJcpE80c108fuUjg7R5L5O7mMZ/n/adwc4pm/b/OML-Resources/o/all_MiniLM_L12_v2_augmented.zip

wget 'https://huggingface.co/datasets/muhrafli/heart-diseases/resolve/main/heart%20(3).csv'
mv 'heart (3).csv' heart.csv

unzip -oq all_MiniLM_L12_v2_augmented.zip

docker cp all_MiniLM_L12_v2.onnx oracle-db:/home/oracle/
docker cp heart.csv oracle-db:/home/oracle/

docker exec -it oracle-db bash

mkdir mymodel
mv all_MiniLM_L12_v2.onnx mymodel
mv heart.csv mymodel
cd mymodel

sqlplus sys/YourStrongPassword@localhost:1521/FREEPDB1 as sysdba

create user if not exists myuser identified by myuser quota unlimited on users;
grant create session, db_developer_role, create mining model to myuser;

create or replace directory model_dir as '/home/oracle/mymodel';
grant read, write on directory model_dir to myuser;

begin
  dbms_vector.drop_onnx_model (
    model_name => 'ALL_MINILM_L12_V2',
    force => true);

  dbms_vector.load_onnx_model (
    directory  => 'model_dir',
    file_name  => 'all_MiniLM_L12_v2.onnx',
    model_name => 'ALL_MINILM_L12_V2');
end;
/

column model_name format a30
column algorithm format a10
column mining_function format a15

select model_name, algorithm, mining_function
from   user_mining_models
where  model_name = 'ALL_MINILM_L12_V2';

set long 1000000
select vector_embedding(all_minilm_l12_v2 using 'hello' as data) AS my_vector;


-- Create table
drop table if exists heart_disease_data purge;

create table heart_disease_data as
select age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope, heart_disease
from   external (
         (
           age            number(3),
           sex            varchar2(10),
           chest_pain_type varchar2(10),
           resting_bp     number(5),
           cholesterol    number(10),
           fasting_bs     number(1),
           resting_ecg    varchar2(10),
           max_hr         number(5),
           exercise_angina varchar2(10),
           oldpeak        number(10,1),
           st_slope       varchar2(10),
           heart_disease  number(1)
         )
         type oracle_loader
         default directory model_dir
         access parameters (
           records delimited by newline
           skip 1
           badfile model_dir
           logfile model_dir:'heart_disease_data_ext_tab_%a_%p.log'
           discardfile model_dir
           fields csv with embedded terminated by ',' optionally enclosed by '"'
           missing field values are null
           (
             age,
             sex,
             chest_pain_type,
             resting_bp,
             cholesterol,
             fasting_bs,
             resting_ecg,
             max_hr,
             exercise_angina,
             oldpeak,
             st_slope,
             heart_disease
           )
        )
        location ('heart.csv')
        reject limit unlimited
      );

-- Describe table
desc heart_disease_data;

-- Add vector column
alter table heart_disease_data add (
  patient_vector vector
);

-- Describe table
desc heart_disease_data;

-- Populate vector column
update heart_disease_data
set    patient_vector = vector_embedding(all_minilm_l12_v2 using concat(age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope) as data);

commit;


-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with high cholesterol and high blood pressure"

variable search_text varchar2(100);
exec :search_text := 'patient with high cholesterol and high blood pressure';

set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9

SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
       age,
       sex,
       chest_pain_type,
       resting_bp,
       cholesterol,
       fasting_bs,
       resting_ecg,
       max_hr,
       exercise_angina,
       oldpeak,
       st_slope,
       heart_disease
FROM   heart_disease_data
order by 1
fetch approximate first 5 rows only;



-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with chest pain and high heart rate"

variable search_text varchar2(100);
exec :search_text := 'patient with chest pain and high heart rate';

set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9

SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
       age,
       sex,
       chest_pain_type,
       resting_bp,
       cholesterol,
       fasting_bs,
       resting_ecg,
       max_hr,
       exercise_angina,
       oldpeak,
       st_slope,
       heart_disease
FROM   heart_disease_data
order by 1
fetch approximate first 5 rows only;


========================
Cleanup:

docker stop oracle-db && docker rm oracle-db

docker images

docker rmi <image_id>

No comments: