This is a step-by-step hands-on tutorial to use Oracle AI Vector Search on unstructured data combined with relational search on business data.
conda create -n ai python=3.11 -y && conda activate ai
sudo chmod 666 /var/run/docker.sock
docker pull container-registry.oracle.com/database/free:latest
docker run -d --name oracle-db \
-p 1521:1521 \
--dns 8.8.8.8 \
-e ORACLE_PWD="YourStrongPassword" \
container-registry.oracle.com/database/free:latest
docker ps -a
mkdir mymodel && cd mymodel
wget https://adwc4pm.objectstorage.us-ashburn-1.oci.customer-oci.com/p/VBRD9P8ZFWkKvnfhrWxkpPe8K03-JIoM5h_8EJyJcpE80c108fuUjg7R5L5O7mMZ/n/adwc4pm/b/OML-Resources/o/all_MiniLM_L12_v2_augmented.zip
wget 'https://huggingface.co/datasets/muhrafli/heart-diseases/resolve/main/heart%20(3).csv'
mv 'heart (3).csv' heart.csv
unzip -oq all_MiniLM_L12_v2_augmented.zip
docker cp all_MiniLM_L12_v2.onnx oracle-db:/home/oracle/
docker cp heart.csv oracle-db:/home/oracle/
docker exec -it oracle-db bash
mkdir mymodel
mv all_MiniLM_L12_v2.onnx mymodel
mv heart.csv mymodel
cd mymodel
sqlplus sys/YourStrongPassword@localhost:1521/FREEPDB1 as sysdba
create user if not exists myuser identified by myuser quota unlimited on users;
grant create session, db_developer_role, create mining model to myuser;
create or replace directory model_dir as '/home/oracle/mymodel';
grant read, write on directory model_dir to myuser;
begin
dbms_vector.drop_onnx_model (
model_name => 'ALL_MINILM_L12_V2',
force => true);
dbms_vector.load_onnx_model (
directory => 'model_dir',
file_name => 'all_MiniLM_L12_v2.onnx',
model_name => 'ALL_MINILM_L12_V2');
end;
/
column model_name format a30
column algorithm format a10
column mining_function format a15
select model_name, algorithm, mining_function
from user_mining_models
where model_name = 'ALL_MINILM_L12_V2';
set long 1000000
select vector_embedding(all_minilm_l12_v2 using 'hello' as data) AS my_vector;
-- Create table
drop table if exists heart_disease_data purge;
create table heart_disease_data as
select age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope, heart_disease
from external (
(
age number(3),
sex varchar2(10),
chest_pain_type varchar2(10),
resting_bp number(5),
cholesterol number(10),
fasting_bs number(1),
resting_ecg varchar2(10),
max_hr number(5),
exercise_angina varchar2(10),
oldpeak number(10,1),
st_slope varchar2(10),
heart_disease number(1)
)
type oracle_loader
default directory model_dir
access parameters (
records delimited by newline
skip 1
badfile model_dir
logfile model_dir:'heart_disease_data_ext_tab_%a_%p.log'
discardfile model_dir
fields csv with embedded terminated by ',' optionally enclosed by '"'
missing field values are null
(
age,
sex,
chest_pain_type,
resting_bp,
cholesterol,
fasting_bs,
resting_ecg,
max_hr,
exercise_angina,
oldpeak,
st_slope,
heart_disease
)
)
location ('heart.csv')
reject limit unlimited
);
-- Describe table
desc heart_disease_data;
-- Add vector column
alter table heart_disease_data add (
patient_vector vector
);
-- Describe table
desc heart_disease_data;
-- Populate vector column
update heart_disease_data
set patient_vector = vector_embedding(all_minilm_l12_v2 using concat(age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope) as data);
commit;
-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with high cholesterol and high blood pressure"
variable search_text varchar2(100);
exec :search_text := 'patient with high cholesterol and high blood pressure';
set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9
SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
age,
sex,
chest_pain_type,
resting_bp,
cholesterol,
fasting_bs,
resting_ecg,
max_hr,
exercise_angina,
oldpeak,
st_slope,
heart_disease
FROM heart_disease_data
order by 1
fetch approximate first 5 rows only;
-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with chest pain and high heart rate"
variable search_text varchar2(100);
exec :search_text := 'patient with chest pain and high heart rate';
set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9
SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
age,
sex,
chest_pain_type,
resting_bp,
cholesterol,
fasting_bs,
resting_ecg,
max_hr,
exercise_angina,
oldpeak,
st_slope,
heart_disease
FROM heart_disease_data
order by 1
fetch approximate first 5 rows only;
========================
Cleanup:
docker stop oracle-db && docker rm oracle-db
docker images
docker rmi <image_id>
No comments:
Post a Comment