Home » Developer & Programmer » JDeveloper, Java & XML » Question regarding the processing of large XML files
icon6.gif  Question regarding the processing of large XML files [message #112662] Mon, 28 March 2005 11:20
bsc7080oqc
Messages: 2
Registered: March 2005
Location: Shelbyville, KY
Junior Member

Has anyone had any luck and/or experience loading batch data via XML ? I am performing a proof of concept to see if I can parse and process a 17mb XML file. My structure is like that of the following :

<TRANSACTION_SET>
<TRANSACTIONS>
<TRANSACTION>
<TROW>
<EMPLOYEE_NUMBER/>
<TIME_ENTRY_DATE/>
<SUBMISSION_DATE/>
<COID/>
<UDN/>
<PAYROLL_ID/>
<LAST_UPDATED_BY/>
<LAST_UPDATE_DATE/>
<CREATED_BY/>
<CREATION_DATE/>
<TIME_ENTRY_WAGES/>
<VAC_TIME/>
<PERSONAL_DAY/>
<SICK_TIME/>
<SYMPATHY_PAY/>
<JURY_DUTY_PAY/>
<UNPAID_WC/>
<UNPAID_OTHER/>
<SHIFT_HRS/>
<SHIFT_3_HOURS/>
<WEEKEND_DIFFERENTIAL/>
<ON_CALL_HOURS/>
<ON_CALL_DAILY/>
<ON_CALL_HOLIDAY/>
<ON_CALL_FRIDAY/>
<ON_CALL_WEEKEND/>
<LUNCH_WORKED/>
<CA_DOUBLETIME/>
<HOLIDAY_PAY/>
<ON_STD/>
<STD_QUALIFYING_DONE/>
<STD_QUAL_NO_VAC/>
<STD_SUPP_NO_STIP/>
<STD_SUPP_NO_VAC/>
<ON_FMLA/>
<FMLA_INT_CONT/>
<FMLA_SAVE_VAC_WEEK/>
<FMLA_NO_STIP/>
<FMLA_NO_VAC/>
<STD_QUALIFYING_HOURS/>
<STD_FMLA_HOURS/>
<FMLA_ONLY_HOURS/>
<STD_100_HOURS/>
</TROW>
</TRANSACTION>
</TRANSACTIONS>
</TRANSACTION_SET>


My processing procedure is the following. I am getting hung up around STEP9 in my process. Not really hung up, I suspect, just very slow in processing. I wanted to parse the clob completly but for testing purposes only, process and insert into my custom table only 10 records.

PROCEDURE process_actotk_table (
p_clob IN CLOB
, p_success OUT VARCHAR2
)
IS
l_message VARCHAR2 (32767);
l_message_array_empty humcust.hum_toolbox.vcmax_table;
l_message_array humcust.hum_toolbox.vcmax_table
:= l_message_array_empty;
l_success VARCHAR2 (2000);
l_bfile BFILE;
l_clob CLOB;
l_parser DBMS_XMLPARSER.parser;
l_doc DBMS_XMLDOM.domdocument;
l_nl DBMS_XMLDOM.domnodelist;
l_n DBMS_XMLDOM.domnode;
l_temp VARCHAR2 (1000);
l_value VARCHAR2 (1000);
l_count PLS_INTEGER := 1;
wellformed BOOLEAN;
errmessage VARCHAR2 (32000);
lbok BOOLEAN;

--
--
-- create table temp_hum_at_feed
-- (
-- employee_number VARCHAR2 (30 BYTE)
-- , time_entry_date VARCHAR2 (20 BYTE)
-- , submission_date VARCHAR2 (20 BYTE)
-- , coid NUMBER (5)
-- , udn NUMBER (3)
-- , payroll_id NUMBER (2)
-- , last_updated_by VARCHAR2 (30 BYTE)
-- , last_update_date VARCHAR2 (20 BYTE)
-- , created_by VARCHAR2 (30 BYTE)
-- , creation_date VARCHAR2 (20 BYTE)
-- , time_entry_wages NUMBER (15, 5)
-- , vac_time NUMBER (15, 5)
-- , personal_day NUMBER (15, 5)
-- , sick_time NUMBER (15, 5)
-- , sympathy_pay NUMBER (15, 5)
-- , jury_duty_pay NUMBER (15, 5)
-- , unpaid_wc NUMBER (15, 5)
-- , unpaid_other NUMBER (15, 5)
-- , shift_hrs NUMBER (15, 5)
-- , shift_3_hours NUMBER (15, 5)
-- , weekend_differential NUMBER (15, 5)
-- , on_call_hours NUMBER (15, 5)
-- , on_call_daily NUMBER (3)
-- , on_call_holiday NUMBER (3)
-- , on_call_friday NUMBER (3)
-- , on_call_weekend NUMBER (3)
-- , lunch_worked NUMBER (15, 5)
-- , ca_doubletime NUMBER (15, 5)
-- , holiday_pay NUMBER (15, 5)
-- , on_std CHAR (1 BYTE)
-- , std_qualifying_done CHAR (1 BYTE)
-- , std_qual_no_vac CHAR (1 BYTE)
-- , std_supp_no_stip CHAR (1 BYTE)
-- , std_supp_no_vac CHAR (1 BYTE)
-- , on_fmla CHAR (1 BYTE)
-- , fmla_int_cont CHAR (1 BYTE)
-- , fmla_save_vac_week CHAR (1 BYTE)
-- , fmla_no_stip CHAR (1 BYTE)
-- , fmla_no_vac CHAR (1 BYTE)
-- , std_qualifying_hours NUMBER (15, 5)
-- , std_fmla_hours NUMBER (15, 5)
-- , fmla_only_hours NUMBER (15, 5)
-- , std_100_hours NUMBER (15, 5)
-- );

--
TYPE r_tab_type IS RECORD (
employee_number VARCHAR2 (30 BYTE)
, time_entry_date VARCHAR2 (20 BYTE)
, submission_date VARCHAR2 (20 BYTE)
, coid NUMBER (5)
, udn NUMBER (3)
, payroll_id NUMBER (2)
, last_updated_by VARCHAR2 (30 BYTE)
, last_update_date VARCHAR2 (20 BYTE)
, created_by VARCHAR2 (30 BYTE)
, creation_date VARCHAR2 (20 BYTE)
, time_entry_wages NUMBER (15, 5)
, vac_time NUMBER (15, 5)
, personal_day NUMBER (15, 5)
, sick_time NUMBER (15, 5)
, sympathy_pay NUMBER (15, 5)
, jury_duty_pay NUMBER (15, 5)
, unpaid_wc NUMBER (15, 5)
, unpaid_other NUMBER (15, 5)
, shift_hrs NUMBER (15, 5)
, shift_3_hours NUMBER (15, 5)
, weekend_differential NUMBER (15, 5)
, on_call_hours NUMBER (15, 5)
, on_call_daily NUMBER (3)
, on_call_holiday NUMBER (3)
, on_call_friday NUMBER (3)
, on_call_weekend NUMBER (3)
, lunch_worked NUMBER (15, 5)
, ca_doubletime NUMBER (15, 5)
, holiday_pay NUMBER (15, 5)
, on_std CHAR (1 BYTE)
, std_qualifying_done CHAR (1 BYTE)
, std_qual_no_vac CHAR (1 BYTE)
, std_supp_no_stip CHAR (1 BYTE)
, std_supp_no_vac CHAR (1 BYTE)
, on_fmla CHAR (1 BYTE)
, fmla_int_cont CHAR (1 BYTE)
, fmla_save_vac_week CHAR (1 BYTE)
, fmla_no_stip CHAR (1 BYTE)
, fmla_no_vac CHAR (1 BYTE)
, std_qualifying_hours NUMBER (15, 5)
, std_fmla_hours NUMBER (15, 5)
, fmla_only_hours NUMBER (15, 5)
, std_100_hours NUMBER (15, 5)
);

--
TYPE tab_type IS TABLE OF r_tab_type;

--
--
t_tab tab_type := tab_type ();
l_step VARCHAR2 (100);
BEGIN
DBMS_APPLICATION_INFO.set_module ('PROCESS_ACTOTK_TABLE', NULL);
l_clob := p_clob;
l_step := 'STEP1 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Check the syntax of the current 'xmldoc' CLOB in the loop
checkxmlinclob (c => l_clob
, wellformed => wellformed
, error => errmessage
);
--
l_step := 'STEP2 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);

IF NOT wellformed
THEN
l_message :=
'ERROR :: XML Doc is ill-formed :: '
|| errmessage;
l_message_array (1) := REPLACE (l_message
, CHR (10)
, ' '
);
l_step := 'STEP3 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
ELSE
l_step := 'STEP4 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- make sure implicit date conversions are performed correctly
DBMS_SESSION.set_nls ('NLS_DATE_FORMAT', '''DD-MON-YYYY''');
l_step := 'STEP5 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Create a parser.
l_parser := DBMS_XMLPARSER.newparser;
l_step := 'STEP6 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Parse the document and create a new DOM document.
DBMS_XMLPARSER.parseclob (l_parser, l_clob);
l_doc := DBMS_XMLPARSER.getdocument (l_parser);
l_step := 'STEP7 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Free resources associated with the Parser now it is no longer needed.
DBMS_XMLPARSER.freeparser (l_parser);
l_step := 'STEP8 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Get a list of all the EMP nodes in the document using the XPATH syntax.
l_nl :=
DBMS_XSLPROCESSOR.selectnodes
(DBMS_XMLDOM.makenode (l_doc)
, '/TRANSACTION_SET/TRANSACTIONS/TRANSACTION/TROW');
l_step := 'STEP9 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);

-- Loop through the list and create a new record in a tble collection
-- for each EMP record.
FOR cur_emp IN 0 .. DBMS_XMLDOM.getlength (l_nl)
- 1
LOOP
l_n := DBMS_XMLDOM.item (l_nl, cur_emp);
t_tab.EXTEND;
-- MAPPING
-- Use XPATH syntax to assign values to he elements of the collection.
DBMS_XSLPROCESSOR.valueof (l_n
, 'EMPLOYEE_NUMBER/text()'
, t_tab (t_tab.LAST).employee_number
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'TIME_ENTRY_DATE/text()'
, t_tab (t_tab.LAST).time_entry_date
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'SUBMISSION_DATE/text()'
, t_tab (t_tab.LAST).submission_date
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'COID/text()'
, t_tab (t_tab.LAST).coid
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'UDN/text()'
, t_tab (t_tab.LAST).udn
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'PAYROLL_ID/text()'
, t_tab (t_tab.LAST).payroll_id
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'LAST_UPDATED_BY/text()'
, t_tab (t_tab.LAST).last_updated_by
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'LAST_UPDATE_DATE/text()'
, t_tab (t_tab.LAST).last_update_date
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'CREATED_BY/text()'
, t_tab (t_tab.LAST).created_by
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'CREATION_DATE/text()'
, t_tab (t_tab.LAST).creation_date
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'TIME_ENTRY_WAGES/text()'
, t_tab (t_tab.LAST).time_entry_wages
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'VAC_TIME/text()'
, t_tab (t_tab.LAST).vac_time
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'PERSONAL_DAY/text()'
, t_tab (t_tab.LAST).personal_day
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'SICK_TIME/text()'
, t_tab (t_tab.LAST).sick_time
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'SYMPATHY_PAY/text()'
, t_tab (t_tab.LAST).sympathy_pay
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'JURY_DUTY_PAY/text()'
, t_tab (t_tab.LAST).jury_duty_pay
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'UNPAID_WC/text()'
, t_tab (t_tab.LAST).unpaid_wc
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'UNPAID_OTHER/text()'
, t_tab (t_tab.LAST).unpaid_other
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'SHIFT_HRS/text()'
, t_tab (t_tab.LAST).shift_hrs
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'SHIFT_3_HOURS/text()'
, t_tab (t_tab.LAST).shift_3_hours
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'WEEKEND_DIFFERENTIAL/text()'
, t_tab (t_tab.LAST).weekend_differential
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_CALL_HOURS/text()'
, t_tab (t_tab.LAST).on_call_hours
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_CALL_DAILY/text()'
, t_tab (t_tab.LAST).on_call_daily
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_CALL_HOLIDAY/text()'
, t_tab (t_tab.LAST).on_call_holiday
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_CALL_FRIDAY/text()'
, t_tab (t_tab.LAST).on_call_friday
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_CALL_WEEKEND/text()'
, t_tab (t_tab.LAST).on_call_weekend
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'LUNCH_WORKED/text()'
, t_tab (t_tab.LAST).lunch_worked
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'CA_DOUBLETIME/text()'
, t_tab (t_tab.LAST).ca_doubletime
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'HOLIDAY_PAY/text()'
, t_tab (t_tab.LAST).holiday_pay
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_STD/text()'
, t_tab (t_tab.LAST).on_std
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_QUALIFYING_DONE/text()'
, t_tab (t_tab.LAST).std_qualifying_done
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_QUAL_NO_VAC/text()'
, t_tab (t_tab.LAST).std_qual_no_vac
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_SUPP_NO_STIP/text()'
, t_tab (t_tab.LAST).std_supp_no_stip
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_SUPP_NO_VAC/text()'
, t_tab (t_tab.LAST).std_supp_no_vac
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'ON_FMLA/text()'
, t_tab (t_tab.LAST).on_fmla
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'FMLA_INT_CONT/text()'
, t_tab (t_tab.LAST).fmla_int_cont
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'FMLA_SAVE_VAC_WEEK/text()'
, t_tab (t_tab.LAST).fmla_save_vac_week
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'FMLA_NO_STIP/text()'
, t_tab (t_tab.LAST).fmla_no_stip
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'FMLA_NO_VAC/text()'
, t_tab (t_tab.LAST).fmla_no_vac
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_QUALIFYING_HOURS/text()'
, t_tab (t_tab.LAST).std_qualifying_hours
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_FMLA_HOURS/text()'
, t_tab (t_tab.LAST).std_fmla_hours
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'FMLA_ONLY_HOURS/text()'
, t_tab (t_tab.LAST).fmla_only_hours
);
DBMS_XSLPROCESSOR.valueof (l_n
, 'STD_100_HOURS/text()'
, t_tab (t_tab.LAST).std_100_hours
);
END LOOP;

l_step := 'STEP10 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
-- Insert data into the real EMP table from the table collection.
-- Form better performance multiple collections should be used to allow
-- bulk binding using the FORALL construct but this would make the code
-- too long-winded for this example.
lbok :=
hum_utilities_pk.exec_ddl
(pnstatement_txt => 'TRUNCATE TEMP_HUM_AT_FEED DROP STORAGE'
, pverrmsg => errmessage);
l_step := 'STEP11 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);

IF errmessage IS NOT NULL
THEN
l_message :=
'ERROR :: TEMP_HUM_AT_FEED TRUNCATE FAILED :: '
|| errmessage;
l_message_array (1) := REPLACE (l_message
, CHR (10)
, ' '
);
ELSE
FOR cur_emp IN t_tab.FIRST .. t_tab.LAST
LOOP
INSERT INTO temp_hum_at_feed
(employee_number
, time_entry_date
, submission_date
, coid
, udn
, payroll_id
, last_updated_by
, last_update_date
, created_by
, creation_date
, time_entry_wages
, vac_time
, personal_day
, sick_time
, sympathy_pay
, jury_duty_pay
, unpaid_wc
, unpaid_other
, shift_hrs
, shift_3_hours
, weekend_differential
, on_call_hours
, on_call_daily
, on_call_holiday
, on_call_friday
, on_call_weekend
, lunch_worked
, ca_doubletime
, holiday_pay
, on_std
, std_qualifying_done
, std_qual_no_vac
, std_supp_no_stip
, std_supp_no_vac
, on_fmla
, fmla_int_cont
, fmla_save_vac_week
, fmla_no_stip
, fmla_no_vac
, std_qualifying_hours
, std_fmla_hours
, fmla_only_hours
, std_100_hours
)
VALUES (t_tab (cur_emp).employee_number
, t_tab (cur_emp).time_entry_date
, t_tab (cur_emp).submission_date
, t_tab (cur_emp).coid
, t_tab (cur_emp).udn
, t_tab (cur_emp).payroll_id
, t_tab (cur_emp).last_updated_by
, t_tab (cur_emp).last_update_date
, t_tab (cur_emp).created_by
, t_tab (cur_emp).creation_date
, t_tab (cur_emp).time_entry_wages
, t_tab (cur_emp).vac_time
, t_tab (cur_emp).personal_day
, t_tab (cur_emp).sick_time
, t_tab (cur_emp).sympathy_pay
, t_tab (cur_emp).jury_duty_pay
, t_tab (cur_emp).unpaid_wc
, t_tab (cur_emp).unpaid_other
, t_tab (cur_emp).shift_hrs
, t_tab (cur_emp).shift_3_hours
, t_tab (cur_emp).weekend_differential
, t_tab (cur_emp).on_call_hours
, t_tab (cur_emp).on_call_daily
, t_tab (cur_emp).on_call_holiday
, t_tab (cur_emp).on_call_friday
, t_tab (cur_emp).on_call_weekend
, t_tab (cur_emp).lunch_worked
, t_tab (cur_emp).ca_doubletime
, t_tab (cur_emp).holiday_pay
, t_tab (cur_emp).on_std
, t_tab (cur_emp).std_qualifying_done
, t_tab (cur_emp).std_qual_no_vac
, t_tab (cur_emp).std_supp_no_stip
, t_tab (cur_emp).std_supp_no_vac
, t_tab (cur_emp).on_fmla
, t_tab (cur_emp).fmla_int_cont
, t_tab (cur_emp).fmla_save_vac_week
, t_tab (cur_emp).fmla_no_stip
, t_tab (cur_emp).fmla_no_vac
, t_tab (cur_emp).std_qualifying_hours
, t_tab (cur_emp).std_fmla_hours
, t_tab (cur_emp).fmla_only_hours
, t_tab (cur_emp).std_100_hours
);

COMMIT;
l_count := l_count
+ 1;
EXIT WHEN l_count = 10;
END LOOP;

l_message :=
'File Generated. Run SELECT * FROM APPS.TEMP_HUM_AT_FEED to test and view data.';
l_message_array (1) := REPLACE (l_message
, CHR (10)
, ' '
);
END IF;

-- Free any resources associated with the document now it
-- is no longer needed.
DBMS_XMLDOM.freedocument (l_doc);
END IF;

l_step := 'STEP12 :: PROCESS_ACTOTK_TABLE';
DBMS_APPLICATION_INFO.set_action (l_step);
hum_notifications.send
(p_notification_grp_id => 0
, p_to_name => 'ztestuser005@humana.com'
, p_from_name => 'axta_test@humana.com'
, p_subject => 'TESTING :: Records Processed Email'
, p_priority => 3
, p_message_array => l_message_array
, p_attachment_name => NULL
, p_directory => NULL
, p_filename => NULL
, p_success => l_success
);
p_success := 'OK';
l_step := 'STEP99 :: PROCESS_ACTOTK_TABLE';
EXCEPTION
WHEN OTHERS
THEN
p_success :=
'ERROR :: '
|| l_step
|| ' :: '
|| SQLERRM;
l_message :=
'ERROR :: '
|| l_step
|| ' :: '
|| SQLERRM;
l_message_array (1) := REPLACE (l_message
, CHR (10)
, ' '
);
hum_notifications.send
(p_notification_grp_id => 0
, p_to_name => 'ztestuser005@humana.com'
, p_from_name => 'axta_test@humana.com'
, p_subject => 'Unexpected error ['
|| SQLCODE
|| '] on AXTA batch testing in HUM_XML.PROCESS_XMLDATA'
, p_priority => 3
, p_message_array => l_message_array
, p_attachment_name => NULL
, p_directory => NULL
, p_filename => NULL
, p_success => l_success
);
DBMS_XMLPARSER.freeparser (l_parser);
DBMS_XMLDOM.freedocument (l_doc);
END process_actotk_table;




** My end goal is to do one of two things ... Either determine that XML batch processing of large data files is feasible time wise and system resource wise OR that it is in fact a pipe dream and should be placed in file 13 and discarded.


=============================================
--
Barry Chase
Consulting Systems Engineer
HR Payroll Production and Support
Louisville, KY
RVS3
--
Email :
echo cbahesh@muna.aocm | sed 's/\(.\)\(.\)/\2\1/g'
--
=============================================
  • Attachment: HUM_XML.zip
    (Size: 12.25KB, Downloaded 1333 times)
Previous Topic: Query for XML node
Next Topic: JSP front end interface
Goto Forum:
  


Current Time: Fri Apr 19 14:28:06 CDT 2024