Tuesday, February 9, 2010

REGEX - HUGE cpu cost

scott@10G> create table T
  2  (
  3       x number,
  4       y varchar2(4000)
  5  );

Table created.

Elapsed: 00:00:00.31

LOAD DATA
INFILE *
TRUNCATE
INTO TABLE T
(
    x RECNUM,
    y POSITION(1:4000)
)

BEGINDATA
|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
.........
|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|


scott@10G> DECLARE
  2    l_tokens dbms_sql.varchar2a;
  3  begin
  4    for r in (select y from t)
  5    loop
  6        select substr (txt,
  7                     instr (txt, '|', 1, level  ) + 1,
  8                     instr (txt, '|', 1, level+1) - instr (txt, '|', 1, level) -1 )
  9               as token
 10         bulk collect into l_tokens
 11        from (select r.y as txt FROM dual)
 12        connect by level <= length(txt) - length(replace(txt,'|'))-1;
 13        l_tokens.DELETE;
 14    end loop;
 15
 16    for r in (select y from t)
 17    loop
 18        select trim(regexp_substr((txt),'[^|]+',1,level)) as Tokens
 19        bulk collect into l_tokens
 20        from (select r.y as txt FROM dual)
 21        connect by level <= length(regexp_replace(txt,'[^|]'))-1;
 22         l_tokens.DELETE;
 23    end loop;
 24  end;
 25  /

PL/SQL procedure successfully completed.

Elapsed: 00:00:30.23

********************************************************************************
DECLARE
  l_tokens dbms_sql.varchar2a;
begin
  for r in (select y from t)
  loop
      select substr (txt,
                   instr (txt, '|', 1, level  ) + 1,
                   instr (txt, '|', 1, level+1) - instr (txt, '|', 1, level) -1 )
             as token
       bulk collect into l_tokens
      from (select r.y as txt FROM dual)
      connect by level <= length(txt) - length(replace(txt,'|'))-1;
      l_tokens.DELETE;
  end loop;
  for r in (select y from t)
  loop
      select trim(regexp_substr((txt),'[^|]+',1,level)) as Tokens
      bulk collect into l_tokens
      from (select r.y as txt FROM dual)
      connect by level <= length(regexp_replace(txt,'[^|]'))-1;
       l_tokens.DELETE;
  end loop;
end;

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.01       0.01          0          0          0           0
Execute      1      2.00       2.38          0          0          0           1
Fetch        0      0.00       0.00          0          0          0           0
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        2      2.01       2.39          0          0          0           1

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 54

Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                       1        0.00          0.00
  SQL*Net message from client                     1        0.00          0.00
********************************************************************************

SELECT SUBSTR (TXT, INSTR (TXT, '|', 1, LEVEL ) + 1, INSTR (TXT, '|', 1,
  LEVEL+1) - INSTR (TXT, '|', 1, LEVEL) -1 ) AS TOKEN
FROM
 (SELECT :B1 AS TXT FROM DUAL) CONNECT BY LEVEL <= LENGTH(TXT) -
  LENGTH(REPLACE(TXT,'|'))-1


call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.01       0.00          0          0          0           0
Execute  15400      1.43       1.44          0          0          0           0
Fetch    15400      2.75       2.72          0          0          0      400400
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total    30801      4.20       4.17          0          0          0      400400

Misses in library cache during parse: 1
Misses in library cache during execute: 1
Optimizer mode: ALL_ROWS
Parsing user id: 54     (recursive depth: 1)

Rows     Row Source Operation
-------  ---------------------------------------------------
 400400  CONNECT BY WITHOUT FILTERING (cr=0 pr=0 pw=0 time=2696889 us)
  15400   FAST DUAL  (cr=0 pr=0 pw=0 time=46042 us)

********************************************************************************

SELECT TRIM(REGEXP_SUBSTR((TXT),'[^|]+',1,LEVEL)) AS TOKENS
FROM
 (SELECT :B1 AS TXT FROM DUAL) CONNECT BY LEVEL <= LENGTH(REGEXP_REPLACE(TXT,
  '[^|]'))-1


call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute  15400      1.54       1.48          0          0          0           0
Fetch    15400     21.96      21.98          0          0          0      400400
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total    30801     23.51      23.46          0          0          0      400400

Misses in library cache during parse: 1
Misses in library cache during execute: 1
Optimizer mode: ALL_ROWS
Parsing user id: 54     (recursive depth: 1)

Rows     Row Source Operation
-------  ---------------------------------------------------
 400400  CONNECT BY WITHOUT FILTERING (cr=0 pr=0 pw=0 time=14184509 us)
  15400   FAST DUAL  (cr=0 pr=0 pw=0 time=46371 us)
********************************************************************************

I'll stick with length and substr - regexp, neat but a HUGE cpu cost.

No comments:

Post a Comment