JSON is a hierarchical (tree-structured) data model where entities are embedded within each other using nested objects and arrays. In contrast, relational tables are flat — each column holds a single value, and relationships are expressed using common keys across tables.
Traditional function-based indexes in relational databases are designed to index a single scalar value per row. This model works well for flat data, but JSON introduces a new challenge: arrays can contain multiple values for the same attribute. Because of this, conventional function-based indexes cannot directly index JSON arrays.
In this blog post, we’ll explore:
- Why indexing JSON arrays is challenging
- How queries behave without proper indexing
- How materialized views can help flatten JSON for indexing
- How performance improves with the right strategy
demo@ADB26ai> create table if not exists my_demo(c1 number,c2 json);
Table created.
demo@ADB26ai> declare
2 l_json long;
3 begin
4 l_json := q'# {
5 "id": 1,
6 "products": [
7 {
8 "id": $$b1$$,
9 "title": "Charger SXT RWD",
10 "price": 32999.99,
11 "quantity": 3,
12 "total": 98999.97,
13 "discountPercentage": 13.39,
14 "dimension" : [$$b3$$,$$b4$$,$$b5$$,$$b6$$],
15 "discountedTotal": 85743.87,
16 "thumbnail": "https://cdn.dummyjson.com/products/images/vehicle/Charger%20SXT%20RWD/thumbnail.png"
17 , "str" : "$$b11$$"
18 },
19 {
20 "id": $$b2$$,
21 "title": "Apple MacBook Pro 14 Inch Space Grey",
22 "price": 1999.99,
23 "quantity": 2,
24 "total": 3999.98,
25 "discountPercentage": 18.52,
26 "dimension" : [$$b7$$,$$b8$$,$$b9$$,$$b10$$],
27 "discountedTotal": 3259.18,
28 "thumbnail": "https://cdn.dummyjson.com/products/images/laptops/Apple%20MacBook%20Pro%2014%20Inch%20Space%20Grey/thumbnail.png"
29 , "str" : "$$b11$$"
30 }
31 ],
32 "total": 103774.85,
33 "discountedTotal": 89686.65,
34 "userId": 33,
35 "totalProducts": 4,
36 "totalQuantity": 15
37 } #';
38
39 insert /*+ append */ into my_demo(c1,c2)
40 select rownum, json(
41 replace(replace( replace( replace( replace(
42 replace( replace( replace( replace( replace(
43 replace(l_json,'$$b1$$',object_id)
44 ,'$$b2$$',object_id+1),
45 '$$b3$$', ceil(dbms_random.value(1,1000)) ) ,
46 '$$b4$$', ceil(dbms_random.value(1,1000)) ) ,
47
48 '$$b5$$', ceil(dbms_random.value(1,1000)) ) ,
49 '$$b6$$', ceil(dbms_random.value(1,1000)) ) ,
50 '$$b7$$', ceil(dbms_random.value(1,1000)) ) ,
51 '$$b8$$', ceil(dbms_random.value(1,1000)) ) ,
52 '$$b9$$', ceil(dbms_random.value(1,1000)) ) ,
53 '$$b10$$', ceil(dbms_random.value(1,1000)) ) ,
54 '$$b11$$', dbms_random.string('l',25) )
55 )
56 from all_objects , all_users
57 where rownum <=5000000;
58 commit;
59 end;
60 /
PL/SQL procedure successfully completed.
demo@ADB26ai> select count(*) from my_demo;
COUNT(*)
--------------------
5,000,000
demo@ADB26ai> alter table my_demo
2 add constraint my_demo_pk
3 primary key(c1);
Table altered.
2 l_json long;
3 begin
4 l_json := q'# {
5 "id": 1,
6 "products": [
7 {
8 "id": $$b1$$,
9 "title": "Charger SXT RWD",
10 "price": 32999.99,
11 "quantity": 3,
12 "total": 98999.97,
13 "discountPercentage": 13.39,
14 "dimension" : [$$b3$$,$$b4$$,$$b5$$,$$b6$$],
15 "discountedTotal": 85743.87,
16 "thumbnail": "https://cdn.dummyjson.com/products/images/vehicle/Charger%20SXT%20RWD/thumbnail.png"
17 , "str" : "$$b11$$"
18 },
19 {
20 "id": $$b2$$,
21 "title": "Apple MacBook Pro 14 Inch Space Grey",
22 "price": 1999.99,
23 "quantity": 2,
24 "total": 3999.98,
25 "discountPercentage": 18.52,
26 "dimension" : [$$b7$$,$$b8$$,$$b9$$,$$b10$$],
27 "discountedTotal": 3259.18,
28 "thumbnail": "https://cdn.dummyjson.com/products/images/laptops/Apple%20MacBook%20Pro%2014%20Inch%20Space%20Grey/thumbnail.png"
29 , "str" : "$$b11$$"
30 }
31 ],
32 "total": 103774.85,
33 "discountedTotal": 89686.65,
34 "userId": 33,
35 "totalProducts": 4,
36 "totalQuantity": 15
37 } #';
38
39 insert /*+ append */ into my_demo(c1,c2)
40 select rownum, json(
41 replace(replace( replace( replace( replace(
42 replace( replace( replace( replace( replace(
43 replace(l_json,'$$b1$$',object_id)
44 ,'$$b2$$',object_id+1),
45 '$$b3$$', ceil(dbms_random.value(1,1000)) ) ,
46 '$$b4$$', ceil(dbms_random.value(1,1000)) ) ,
47
48 '$$b5$$', ceil(dbms_random.value(1,1000)) ) ,
49 '$$b6$$', ceil(dbms_random.value(1,1000)) ) ,
50 '$$b7$$', ceil(dbms_random.value(1,1000)) ) ,
51 '$$b8$$', ceil(dbms_random.value(1,1000)) ) ,
52 '$$b9$$', ceil(dbms_random.value(1,1000)) ) ,
53 '$$b10$$', ceil(dbms_random.value(1,1000)) ) ,
54 '$$b11$$', dbms_random.string('l',25) )
55 )
56 from all_objects , all_users
57 where rownum <=5000000;
58 commit;
59 end;
60 /
--------------------
5,000,000
demo@ADB26ai> alter table my_demo
2 add constraint my_demo_pk
3 primary key(c1);
{
"id": 1,
"products": [
{
"id": 55,
"title": "Charger SXT RWD",
"price": 32999.99,
"quantity": 3,
"dimension": [935, 412, 120, 88]
},
{
"id": 56,
"title": "Apple MacBook Pro 14 Inch Space Grey",
"price": 1999.99,
"quantity": 2,
"dimension": [210, 330, 40, 25]
}
],
"total": 103774.85
}
"id": 1,
"products": [
{
"id": 55,
"title": "Charger SXT RWD",
"price": 32999.99,
"quantity": 3,
"dimension": [935, 412, 120, 88]
},
{
"id": 56,
"title": "Apple MacBook Pro 14 Inch Space Grey",
"price": 1999.99,
"quantity": 2,
"dimension": [210, 330, 40, 25]
}
],
"total": 103774.85
}
Suppose we frequently search for orders that contain a specific product ID. For example
select *
from my_demo
where json_exists(c2, '$.products[*]?(@.id.number() == 55)');
from my_demo
where json_exists(c2, '$.products[*]?(@.id.number() == 55)');
demo@ADB26ai> create index my_demo_idx on my_demo( json_value(c2,'$.products[*].id.number()' error on error) );
create index my_demo_idx on my_demo( json_value(c2,'$.products[*].id.number()' error on error) )
*
ERROR at line 1:
ORA-40470: JSON query '$.products[*].id.number()' evaluated to multiple values.
Help: https://docs.oracle.com/error-help/db/ora-40470/
create index my_demo_idx on my_demo( json_value(c2,'$.products[*].id.number()' error on error) )
*
ERROR at line 1:
ORA-40470: JSON query '$.products[*].id.number()' evaluated to multiple values.
Help: https://docs.oracle.com/error-help/db/ora-40470/
However this fails, Because products[*].id is an array path. Each JSON document can contain multiple product IDs, and a function-based index expects a single scalar value per row, Without an index, Oracle has no choice but to perform a full table scan
2 from my_demo
3 where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' );
----------------------------------------------------------
Plan hash value: 3804406768
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 50000 | 35M| 738 (40)| 00:00:01 |
| 1 | RESULT CACHE | bhn66uu5b6guf8qzf1w80zdqqr | 50000 | 35M| 738 (40)| 00:00:01 |
|* 2 | TABLE ACCESS FULL| MY_DEMO | 50000 | 35M| 738 (40)| 00:00:01 |
---------------------------------------------------
'$.products[*]?(@.id.number() == 55 )' /* json_path_str $.products[*]?(@.id.number() ==
55) */ FALSE ON ERROR TYPE(LAX) )=1)
------------------------------------------------------
from my_demo
where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' )"
-----
- automatic DOP: Computed Degree of Parallelism is 1 because of no expensive parallel operation
----------------------------------------------------------
0 recursive calls
0 db block gets
70151 consistent gets
0 redo size
92501 bytes sent via SQL*Net to client
405 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
122 rows processed
Even with JSON_EXISTS filtering, the database still has to inspect all 5 million rows, resulting in:
While this may work for small datasets, it becomes expensive at scale.
One effective workaround is to flatten the JSON array into relational rows using a materialized view.
- High consistent gets
- Full scan of JSON documents
- Increased CPU and I/O
demo@ADB26ai> create materialized view mv_my_demo
2 build immediate
3 refresh fast on statement
4 enable query rewrite
5 as
6 select c1, jt.*
7 from my_demo ,
8 json_table( c2, '$.products[*]'
9 columns(
10 product_id number path '$.id' error on error null on empty
11 , product_title varchar2(40) path '$.title' error on error null on empty) ) jt;
Materialized view created.
2 build immediate
3 refresh fast on statement
4 enable query rewrite
5 as
6 select c1, jt.*
7 from my_demo ,
8 json_table( c2, '$.products[*]'
9 columns(
10 product_id number path '$.id' error on error null on empty
11 , product_title varchar2(40) path '$.title' error on error null on empty) ) jt;
This converts each product inside the JSON array into a separate relational row:
C1 | PRODUCT_ID | PRODUCT_TITLE |
1 | 55 | Charger SXT RWD |
1 | 56 | MacBook Pro |
Now each product becomes indexable, With query rewrite enabled, Oracle can automatically redirect the original JSON query to use the materialized view.
2 from my_demo
3 where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' );
----------------------------------------------------------
Plan hash value: 1777526163
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 117 | 87282 | 229 (35)| 00:00:01 |
| 1 | RESULT CACHE | gq00k9mdyn70344q7at0r2wr7h | 117 | 87282 | 229 (35)| 00:00:01 |
| 2 | NESTED LOOPS | | 117 | 87282 | 229 (35)| 00:00:01 |
| 3 | NESTED LOOPS | | 117 | 87282 | 229 (35)| 00:00:01 |
| 4 | SORT UNIQUE | | 117 | 1287 | 110 (72)| 00:00:01 |
|* 5 | MAT_VIEW ACCESS FULL | MV_MY_DEMO | 117 | 1287 | 110 (72)| 00:00:01 |
| 7 | TABLE ACCESS BY INDEX ROWID| MY_DEMO | 1 | 735 | 2 (0)| 00:00:01 |
------------------------------------------------------------------------------------------------------------
---------------------------------------------------
6 - access("SYS_JMV_1"."C1"="MY_DEMO"."C1")
------------------------------------------------------
from my_demo
where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' )"
-----
- automatic DOP: Computed Degree of Parallelism is 1
- this is an adaptive plan
----------------------------------------------------------
0 recursive calls
0 db block gets
2495 consistent gets
0 redo size
92501 bytes sent via SQL*Net to client
405 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
2 sorts (memory)
0 sorts (disk)
122 rows processed
demo@ADB26ai> create index mv_my_demo_idx on mv_my_demo( product_id );
Index created.
Now the execution plan becomes:
2 from my_demo
3 where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' );
----------------------------------------------------------
Plan hash value: 3980998711
----------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)|
----------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 117 | 87282 | 123 (1)|
| 1 | RESULT CACHE | gq00k9mdyn70344q7| 117 | 87282 | 123 (1)|
| 2 | NESTED LOOPS | | 117 | 87282 | 123 (1)|
| 3 | NESTED LOOPS | | 117 | 87282 | 123 (1)|
| 4 | SORT UNIQUE | | 117 | 1287 | 4 (0)|
| 5 | MAT_VIEW ACCESS BY INDEX ROWID BATCHED| MV_MY_DEMO | 117 | 1287 | 4 (0)|
| 6 | SORT CLUSTER BY ROWID | | 117 | | 3 (0)|
|* 7 | INDEX RANGE SCAN | MV_MY_DEMO_IDX | 117 | | 3 (0)|
|* 8 | INDEX UNIQUE SCAN | MY_DEMO_PK | 1 | | 1 (0)|
| 9 | TABLE ACCESS BY INDEX ROWID | MY_DEMO | 1 | 735 | 2 (0)|
----------------------------------------------------------------------------------------------------
---------------------------------------------------
8 - access("SYS_JMV_1"."C1"="MY_DEMO"."C1")
------------------------------------------------------
from my_demo
where json_exists( c2, '$.products[*]?(@.id.number() == 55 )' )"
-----
- automatic DOP: Computed Degree of Parallelism is 1
- this is an adaptive plan
----------------------------------------------------------
0 recursive calls
0 db block gets
46 consistent gets
0 redo size
92501 bytes sent via SQL*Net to client
405 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
3 sorts (memory)
0 sorts (disk)
122 rows processed
And the statistics show:
This is now a fully optimized execution path. This strategy delivers excellent performance, but it comes with a cost:
In other words, we are trading storage and maintenance for query performance.
JSON’s hierarchical and multi-valued structure does not fit naturally into traditional indexing models. A JSON array path can return multiple values per row, which makes it incompatible with classic function-based indexes.
However, by:
we can achieve highly efficient and scalable search performance on JSON data.
In the next blog post, we’ll explore how Oracle Database has natively evolved its JSON storage and indexing architecture to support efficient JSON querying — without requiring relational flattening or materialized views. Stay tuned!
- Only 46 consistent gets
- No full table scan
- Fast index lookup
- Highly efficient nested loop access
- Additional storage for the materialized view
- Extra maintenance overhead on DML
- Flattening JSON into relational format
- Flattening JSON arrays using JSON_TABLE
- Leveraging materialized views
- Enabling query rewrite
- Adding indexes on extracted attributes
No comments:
Post a Comment