John August John August - 6 months ago 12
SQL Question

PL\SQL How can I improve the execution speed of my function?

I have written a function that takes an input string and wraps the input text to a specified width that is passed in as a parameter. Right now, it takes approximately 290ms to run this function 1,000 times with a 4000 character string.

Can anyone suggest ways to improve this function? I will be trying a number of ideas that I found online here, but I wanted to see if there are any recommendations here.

This is my function:

CREATE OR REPLACE FUNCTION SPLIT_STRING (
p_str VARCHAR2, --String input to split
p_del VARCHAR2 DEFAULT NULL, --Delimiter (If NULL, will only split on length [p_len NOT NULL])
p_len INTEGER DEFAULT NULL, --Length of each chunk (If NULL, will only split on delimiter)
p_force INTEGER DEFAULT NULL)
RETURN array_type IS
--
v_str_array array_type;
v_line_array array_type;
v_len PLS_INTEGER; -- Total string length
v_str VARCHAR2(32767); -- String being built
v_chunk VARCHAR2(32767); -- Current chunk
v_char VARCHAR2(1 CHAR); -- Current character
v_cur_len PLS_INTEGER; -- Current chuk length
v_idx PLS_INTEGER DEFAULT 1; -- For loop counter
v_start PLS_INTEGER DEFAULT 1;
v_stop PLS_INTEGER;
v_line PLS_INTEGER DEFAULT 0;
v_word binary_integer;
BEGIN
--Determine total string length
v_len:= LENGTH(p_str);

-- Split into individual word chunks by delimiter
IF p_del IS NOT NULL
THEN
FOR pos IN 1..v_len
LOOP
v_char:= SUBSTR(p_str, pos, 1);
IF (v_char = p_del) OR (pos = v_len)
THEN
v_stop:= pos+1;
v_chunk:=SUBSTR(p_str, v_start, v_stop-v_start);
v_str_array(v_idx):= v_chunk;
v_start:=v_stop;
v_line:=v_idx;
v_idx:=v_idx+1;
END IF;
END LOOP;
END IF;

--Split the string into chunks and add to an array
IF p_del IS NOT NULL AND p_len IS NOT NULL
THEN
-- Wrap words into each line
v_idx:=1;
v_cur_len:=0;
v_line:=0;
v_word:=v_str_array.FIRST;
WHILE v_str_array(v_word) IS NOT NULL
LOOP
IF(v_cur_len+LENGTH(v_str_array(v_word)) <= p_len)
THEN
IF(v_word <= 1)
THEN
v_chunk:=v_str_array(v_word);
ELSE
v_chunk:=v_chunk||v_str_array(v_word);
END IF;
IF v_line=0
THEN
v_line:=1;
END IF;
ELSIF (p_force = 1)
THEN
-- Force line width to match specification
v_chunk:=SUBSTR(v_str_array(v_word),1,p_len);
IF v_str_array.NEXT(v_word) IS NOT NULL
THEN
v_str_array(v_word+1) := SUBSTR(v_str_array(v_word),p_len+1,LENGTH(v_str_array(v_word)))|| v_str_array(v_word+1);
ELSE
v_str_array(v_word+1) := SUBSTR(v_str_array(v_word),p_len+1,LENGTH(v_str_array(v_word)));
END IF;
v_line:=v_line+1;
ELSE
-- Prevent words from being split if they are larger than width specification
v_chunk:=v_str_array(v_word);
v_line:=v_line+1;
END IF;
v_line_array(v_line):= v_chunk;
v_cur_len:= LENGTH(v_chunk);
v_word:=v_str_array.NEXT(v_word);
IF v_word IS NULL
THEN
EXIT;
END IF;
END LOOP;
ELSIF p_del IS NULL AND p_len IS NOT NULL
THEN
-- Chop into exact length lines
v_line:= CEIL(v_len/p_len);
v_start:=1;
v_stop:= p_len;
FOR line IN 1..v_line
LOOP
v_line_array(line):= SUBSTR(p_str, v_start, p_len);
v_start:=v_stop+1;
v_stop:=v_stop+p_len;
END LOOP;
ELSIF p_len IS NULL AND p_del IS NOT NULL
THEN
-- Split into word chunks by deliminator if length is not specified
v_line_array:= v_str_array;
ELSE
-- Do nothing to the string if no delimiter OR length is specified
v_line_array(1):= p_str;
RETURN v_line_array;
END IF;

RETURN v_line_array;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Exception in function "SPLIT_STRING": '||SQLERRM);
v_line_array(1):= p_str;
RETURN v_line_array;
END SPLIT_STRING;
/


This is how I am "testing" the function in a simulated table load environment where the function is called on every entry in a table.

DECLARE
type array_type is table of varchar2(32767) index by binary_integer;
v_str_array array_type;
P_STR VARCHAR2(32767);
P_DEL VARCHAR2(32767);
P_LEN NUMBER;
P_FORCE NUMBER;
V_ITTERATIONS NUMBER;
V_EXECUTIONTIME NUMBER;
V_TIMESTART NUMBER;
BEGIN
P_STR := 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia. Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi. Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis. Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id purus. Ut varius tincidunt libero. Phasellus dolor. Maecenas vestibulum mollis diam. Pellentesque ut neque. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac felis quis tortor malesuada pretium. Pellentesque auctor neque nec urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis pretium feugiat. Vivamus quis mi. Phasellus a est. Phas';
P_DEL := ' ';
P_LEN := 60;
P_FORCE := 1;
V_ITTERATIONS := 10;

--Get Start Time
V_TIMESTART:= DBMS_UTILITY.Get_time();

--Run Testing
FOR stress_test_loop IN 1..V_ITTERATIONS
LOOP
v_str_array:= SPLIT_STRING ( P_STR, P_DEL, P_LEN, P_FORCE );
END LOOP;

--Calculate run time
SELECT (dbms_utility.get_time() - V_TIMESTART) INTO V_EXECUTIONTIME FROM DUAL;

--Display output
DBMS_OUTPUT.Put_Line('Completed sucessfully.'||CHR(10)||'Itterations: '||V_ITTERATIONS||CHR(10)||'Execution Time: '||V_EXECUTIONTIME||'ms');
DBMS_OUTPUT.Put_Line('Function output: '||CHR(10));

FOR line IN 1..v_str_array.LAST
LOOP
DBMS_OUTPUT.Put_Line(v_str_array(line));
END LOOP;

COMMIT;
END;


The function simply wraps text like this (with force =0):

1---------n
This is a
test
testing
123...


Then like this with force = 1:

1---------n
This is a
test testi
g 123...


So far the function performs pretty badly with a high number of sequntial calls:
enter image description here

I would like to get it to under 1 sec for atleast 500 thuosand calls idealy (although this may be wishful thinking).

SOLUTION:

CREATE OR REPLACE FUNCTION SPLIT_STRINGV2 (
p_str VARCHAR2, --String input to split
p_len INTEGER, --Length of each chunk (If NULL, will only split on delimiter)
p_del VARCHAR2 DEFAULT ' ', --Delimiter (If NULL, will only split on length [p_len NOT NULL])
p_force INTEGER DEFAULT 0)
RETURN T_VAR_32K IS
--

v_line_array T_VAR_32K;
v_len PLS_INTEGER; -- Total string length
v_chunk VARCHAR2(32767); -- Current chunk
v_char VARCHAR2(1 CHAR); -- Current character
v_line PLS_INTEGER DEFAULT 1;
v_start PLS_INTEGER DEFAULT 1;
v_pos PLS_INTEGER DEFAULT 1;
v_prev PLS_INTEGER;
v_next PLS_INTEGER;
v_numb PLS_INTEGER;
BEGIN
--Determine total string length
v_len:= LENGTH(p_str);

WHILE (v_pos<=v_len)
LOOP
v_chunk:= SUBSTR(p_str, v_start, p_len);


IF p_force = 0 AND p_del IS NOT NULL
THEN
v_prev:=INSTR(SUBSTR(p_str, v_start, p_len), p_del, -1);
v_next:=INSTR(SUBSTR(p_str, v_prev+1, v_len), p_del, 1)+v_prev;
v_char:=SUBSTR(v_chunk, LENGTH(v_chunk)-1, 1);

IF v_char=p_del
THEN
v_chunk:=SUBSTR(v_chunk, 1, p_len);
ELSE
v_numb:=LENGTH(SUBSTR(v_chunk, 1, p_len)) - length(replace(SUBSTR(v_chunk, 1, p_len), p_del));
IF v_prev != 0 AND v_numb <= 1
THEN
v_chunk:=SUBSTR(p_str, v_start, v_next);
ELSIF v_prev !=0 AND v_numb > 1
THEN
v_chunk:=SUBSTR(p_str, v_start, v_prev);
ELSE
v_chunk:=SUBSTR(v_chunk, 1, p_len);
END IF;
END IF;
ELSE
v_chunk:=SUBSTR(v_chunk, 1, p_len);
END IF;

IF v_chunk IS NOT NULL
THEN
v_line_array(v_line):=v_chunk;
END IF;

v_pos:= v_pos+LENGTH(v_chunk);
v_line:= v_line+1;
v_start := v_pos+1;
END LOOP;

RETURN v_line_array;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Exception in function "SPLIT_STRING": '||SQLERRM);
v_line_array(1):= p_str;
RETURN v_line_array;
END SPLIT_STRINGV2;
/


In the end, I had a VERY impressive execution speed reduction from about 20min to 10 seconds for 1,000,000 calls to the function. That's a 99% improvement!

Answer

See here: https://community.oracle.com/thread/3700483 The main idea is:

  SELECT rn, txt
    FROM (    SELECT ROWNUM AS rn, TRIM (REGEXP_SUBSTR (:p_txt, '.{1,' || :v_linesize || '}( |$)', 1, ROWNUM)) AS txt
                FROM DUAL
          CONNECT BY ROWNUM <= CEIL (LENGTH (:p_txt) / :v_linesize) + 10)
   WHERE txt IS NOT NULL
ORDER BY rn

The delimiter here is ' '.