透過DBMS_LOB將Xml資料匯入,再透過TABLE EXTRACTVALUE的方式去解析,若是巢狀結構,就用XMLTABLE去解析,並解決xmlns xmlnamespaces的問題
本次需求主要是要將氣象局的開放資料匯入
https://opendata.cwb.gov.tw/dataset/climate/C-B0024-001
需登入取得註冊碼後透過爬蟲將資料下載產生Xml檔案
1.建立匯入Xml資料的Function
--Spec
CREATE OR REPLACE PACKAGE apps.lu_pub AS
FUNCTION get_clob_doc(
p_filename IN VARCHAR2,
p_charset IN VARCHAR2 DEFAULT NULL,
p_dir_name IN VARCHAR2 DEFAULT NULL
)
RETURN CLOB
DETERMINISTIC;
END lu_pub;
--Body
CREATE OR REPLACE PACKAGE BODY apps.lu_pub AS
FUNCTION get_clob_doc(
p_filename IN VARCHAR2,
p_charset IN VARCHAR2 DEFAULT NULL,
p_dir_name IN VARCHAR2 DEFAULT NULL
)
RETURN CLOB
DETERMINISTIC IS
file BFILE; -- := BFILENAME('ECX_UTL_XSLT_DIR_OBJ', p_filename);
charcontent CLOB := ' ';
targetfile BFILE;
lang_ctx NUMBER := DBMS_LOB.default_lang_ctx;
charset_id NUMBER := 0;
src_offset NUMBER := 1;
dst_offset NUMBER := 1;
warning NUMBER;
BEGIN
file := BFILENAME(NVL(p_dir_name, 'ECX_UTL_XSLT_DIR_OBJ'), p_filename);
IF p_charset IS NOT NULL THEN
charset_id := NLS_CHARSET_ID(p_charset);
END IF;
targetfile := file;
DBMS_LOB.fileopen(targetfile, DBMS_LOB.file_readonly);
DBMS_LOB.loadclobfromfile(
charcontent,
targetfile,
DBMS_LOB.getlength(targetfile),
src_offset,
dst_offset,
charset_id,
lang_ctx,
warning
);
DBMS_LOB.fileclose(targetfile);
RETURN charcontent;
END get_clob_doc;
END lu_pub;
2.建立匯入資料的Table
CREATE TABLE lu_xmltable
(
xml_id NUMBER,
file_path VARCHAR2(3000),
xml_data XMLTYPE
);
3.上傳Xml檔案到指定位置(/usr/tmp)
4.將XML Script匯入Table
INSERT INTO lu_xmltable
VALUES
(
1,
'/usr/tmp',
xmltype(lu_pub.get_clob_doc('C-B0024-001.xml','UTF8','ECX_UTL_XSLT_DIR_OBJ'))
);
5.解析Xml資料結構
<?xml version="1.0" encoding="UTF-8"?>
<cwbopendata xmlns="urn:cwb:gov:tw:cwbcommon:0.1">
<dataset>
<location>
<locationName>Tamsui,淡水</locationName>
<stationId>466900</stationId>
<time>
<obsTime>2019/07/04 01:00</obsTime>
<weatherElement>
<elementName>測站氣壓</elementName>
<elementValue>
<value>1005.9</value>
</elementValue>
</weatherElement>
</time>
</location>
<dataset>
</cwbopendata>
cwbopendata
dataset
location
locationName
stationId
time
obsTime
weatherElement
elementName
elementValue
Value
SELECT xt.location_name,
xt.station_id,
TO_DATE(REPLACE(xt1.obs_time, '24:', '00:'), 'YYYY/MM/DD HH24:MI') + DECODE(INSTR(xt1.obs_time, '24:'), 0, 0, 1)
obs_time,
xt2.element_name,
xt3.VALUE value
FROM lu_xmltable x,
XMLTABLE(
xmlnamespaces(DEFAULT 'urn:cwb:gov:tw:cwbcommon:0.1'), '/cwbopendata/dataset/location'
PASSING x.xml_data
COLUMNS location_name VARCHAR2(50) PATH 'locationName',
station_id NUMBER PATH 'stationId',
time XMLTYPE PATH 'time') xt,
XMLTABLE(
xmlnamespaces(DEFAULT 'urn:cwb:gov:tw:cwbcommon:0.1'), '/time'
PASSING xt.time
COLUMNS obs_time VARCHAR2(30) PATH 'obsTime', weather_element XMLTYPE PATH 'weatherElement'
) xt1,
XMLTABLE(
xmlnamespaces(DEFAULT 'urn:cwb:gov:tw:cwbcommon:0.1'), '/weatherElement'
PASSING xt1.weather_element
COLUMNS element_name VARCHAR2(30) PATH 'elementName', element_value XMLTYPE PATH 'elementValue'
) xt2,
XMLTABLE(
xmlnamespaces(DEFAULT 'urn:cwb:gov:tw:cwbcommon:0.1'), '/elementValue'
PASSING xt2.element_value
COLUMNS VALUE VARCHAR2(30) PATH 'value'
) xt3
WHERE 1 = 1
AND xt2.element_name in ('溫度','降水量')
6.資料結構是巢狀結構,所以用XMLTABLE,如果非巢狀結構,較單純,直接應用TABLE加上EXTRACTVALUE取值即可
SELECT EXTRACTVALUE(VALUE(n1), '/location/locationName') location_name,
EXTRACTVALUE(VALUE(n1), '/location/stationId') station_id
FROM lu_xmltable x,
TABLE(XMLSEQUENCE(EXTRACT(x.xml_data, '/cwbopendata/dataset/location', 'xmlns="urn:cwb:gov:tw:cwbcommon:0.1"'))) n1
應用XMLTABLE的時候,一層一層往下展
遇到應用xmlns的時候,每個都要加xmlnamespaces(DEFAULT 'urn:cwb:gov:tw:cwbcommon:0.1')
起始從/cwbopendata/dataset/location開始
在這層因為time之下又有新的結構,所以要用xmltype去承接,下一個xmltable就再把原本Table的XMLTYPE欄位給傳進去