doc-exports/docs/dli/sqlreference/dli_08_0411.html
Su, Xiaomeng 04d4597cf3 dli_sqlreference_0511_version
Reviewed-by: Pruthi, Vineet <vineet.pruthi@t-systems.com>
Co-authored-by: Su, Xiaomeng <suxiaomeng1@huawei.com>
Co-committed-by: Su, Xiaomeng <suxiaomeng1@huawei.com>
2023-11-02 14:34:08 +00:00

181 lines
15 KiB
HTML

<a name="dli_08_0411"></a><a name="dli_08_0411"></a>
<h1 class="topictitle1">CSV</h1>
<div id="body8662426"><div class="section" id="dli_08_0411__en-us_topic_0000001262815694_section226021020326"><h4 class="sectiontitle">Function</h4><p id="dli_08_0411__en-us_topic_0000001262815694_p31920232323">The CSV format allows you to read and write CSV data based on a CSV schema. Currently, the CSV schema is derived from table schema.</p>
</div>
<div class="section" id="dli_08_0411__en-us_topic_0000001262815694_section122491371116"><h4 class="sectiontitle">Supported Connectors</h4><ul id="dli_08_0411__en-us_topic_0000001262815694_ul188074312166"><li id="dli_08_0411__en-us_topic_0000001262815694_li18073310163">Kafka</li><li id="dli_08_0411__en-us_topic_0000001262815694_li16323181520161">Upsert Kafka</li></ul>
</div>
<div class="section" id="dli_08_0411__en-us_topic_0000001262815694_section15970162015401"><h4 class="sectiontitle">Parameters</h4>
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="dli_08_0411__en-us_topic_0000001262815694_table6702534155617" frame="border" border="1" rules="all"><caption><b>Table 1 </b></caption><thead align="left"><tr id="dli_08_0411__en-us_topic_0000001262815694_row1270218346560"><th align="left" class="cellrowborder" valign="top" width="15.55%" id="mcps1.3.3.2.2.6.1.1"><p id="dli_08_0411__en-us_topic_0000001262815694_p2070203417568">Parameter</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="11.19%" id="mcps1.3.3.2.2.6.1.2"><p id="dli_08_0411__en-us_topic_0000001262815694_p1270273412568">Mandatory</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="10.58%" id="mcps1.3.3.2.2.6.1.3"><p id="dli_08_0411__en-us_topic_0000001262815694_p1703634175615">Default value</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="13.5%" id="mcps1.3.3.2.2.6.1.4"><p id="dli_08_0411__en-us_topic_0000001262815694_p207031134135615">Type</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="49.18%" id="mcps1.3.3.2.2.6.1.5"><p id="dli_08_0411__en-us_topic_0000001262815694_p19703103414569">Description</p>
</th>
</tr>
</thead>
<tbody><tr id="dli_08_0411__en-us_topic_0000001262815694_row6703203412568"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p103551327175815">format</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p93551727105810">Yes</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p11355172712585">None</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p17355122775815">String</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1035512713585">Format to be used. Set the value to <strong id="dli_08_0411__en-us_topic_0000001262815694_b1170314499458">csv</strong>.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row16703113495617"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1874315505918">csv.field-delimiter</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p674310513592">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p20666171265910">,</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p157431051594">String</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p9743853595">Field delimiter character, which must be a single character. You can use backslash to specify special characters, for example, <strong id="dli_08_0411__en-us_topic_0000001262815694_b1712184095119">\t</strong> represents the tab character. You can also use unicode to specify them in plain SQL, for example, <strong id="dli_08_0411__en-us_topic_0000001262815694_b1170115315526">'csv.field-delimiter' = '\u0001'</strong> represents the 0x01 character.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row5943184510584"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p14743756594">csv.disable-quote-character</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p274318516591">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p16743115105912">false</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p67431451591">Boolean</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p474319513597">Disabled quote character for enclosing field values. If you set this parameter to <strong id="dli_08_0411__en-us_topic_0000001262815694_b954151135412">true</strong>, <strong id="dli_08_0411__en-us_topic_0000001262815694_b0879115319544">csv.quote-character</strong> cannot be set.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row4216952155814"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1174395115913">csv.quote-character</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1174310512596">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p8813181815595">''</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p3743145155920">String</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p3743155135919">Quote character for enclosing field values.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row512914914587"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1674385175912">csv.allow-comments</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p2074313515912">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1774315165917">false</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p12743135205919">Boolean</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p674314595919">Ignore comment lines that start with <strong id="dli_08_0411__en-us_topic_0000001262815694_b6879134515710">#</strong>. If you set this parameter to <strong id="dli_08_0411__en-us_topic_0000001262815694_b1141133411574">true</strong>, make sure to also ignore parse errors to allow empty rows.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row2614554145814"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p157437512598">csv.ignore-parse-errors</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p3743145175918">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p074455175910">false</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p274410516599">Boolean</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p974445105910">Whether fields and rows with parse errors will be skipped or failed. The default value is <strong id="dli_08_0411__en-us_topic_0000001262815694_b1890513775917">false</strong>, indicating that an error will be thrown. Fields are set to null in case of errors.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row6703163435617"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p15744185105910">csv.array-element-delimiter</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p17448585914">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p894143665919">;</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p6744115155910">String</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p18744752598">Array element delimiter string for separating array and row element values.</p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row1370353475616"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p6744355597">csv.escape-character</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p147442555913">No</p>
<p id="dli_08_0411__en-us_topic_0000001262815694_p974418515591"></p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p774405155911">None</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1674410519597">String</p>
<p id="dli_08_0411__en-us_topic_0000001262815694_p12744135175910"></p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1774418515592">Escape character for escaping values</p>
<p id="dli_08_0411__en-us_topic_0000001262815694_p774495105918"></p>
</td>
</tr>
<tr id="dli_08_0411__en-us_topic_0000001262815694_row7742944207"><td class="cellrowborder" valign="top" width="15.55%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_0411__en-us_topic_0000001262815694_p166610495216">csv.null-literal</p>
</td>
<td class="cellrowborder" valign="top" width="11.19%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1366616499212">No</p>
</td>
<td class="cellrowborder" valign="top" width="10.58%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_0411__en-us_topic_0000001262815694_p66663498211">None</p>
</td>
<td class="cellrowborder" valign="top" width="13.5%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_0411__en-us_topic_0000001262815694_p136661049728">String</p>
</td>
<td class="cellrowborder" valign="top" width="49.18%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_0411__en-us_topic_0000001262815694_p1866616491129">Null literal string that is interpreted as a null value.</p>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="section" id="dli_08_0411__en-us_topic_0000001262815694_section13511182319310"><h4 class="sectiontitle">Example</h4><p id="dli_08_0411__en-us_topic_0000001262815694_p15881132116016">Use Kafka to send data and output the data to print.</p>
<ol id="dli_08_0411__en-us_topic_0000001262815694_ol840395722311"><li id="dli_08_0411__en-us_topic_0000001262815694_li04031578234"><span>Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose <strong id="dli_08_0411__en-us_topic_0000001262815694_b23711581413">More</strong> &gt; <strong id="dli_08_0411__en-us_topic_0000001262815694_b43711981311">Test Address Connectivity</strong> in the <strong id="dli_08_0411__en-us_topic_0000001262815694_b1371282116">Operation</strong> column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.</span></li><li id="dli_08_0411__en-us_topic_0000001262815694_li1599913011242"><span>Create a Flink OpenSource SQL job. Copy the following statement and submit the job:</span><p><pre class="screen" id="dli_08_0411__en-us_topic_0000001262815694_screen299960162418">CREATE TABLE kafkaSource (
order_id string,
order_channel string,
order_time string,
pay_amount double,
real_pay double,
pay_time string,
user_id string,
user_name string,
area_id string
) WITH (
'connector' = 'kafka',
'topic' = '&lt;yourSourceTopic&gt;',
'properties.bootstrap.servers' = '&lt;yourKafkaAddress&gt;:&lt;yourKafkaPort&gt;',
'properties.group.id' = '&lt;yourGroupId&gt;',
'scan.startup.mode' = 'latest-offset',
"format" = "csv"
);
CREATE TABLE kafkaSink (
order_id string,
order_channel string,
order_time string,
pay_amount double,
real_pay double,
pay_time string,
user_id string,
user_name string,
area_id string
) WITH (
'connector' = 'kafka',
'topic' = '&lt;yourSinkTopic&gt;',
'properties.bootstrap.servers' = '&lt;yourKafkaAddress&gt;:&lt;yourKafkaPort&gt;',
"format" = "csv"
);
insert into kafkaSink select * from kafkaSource;
</pre>
</p></li><li id="dli_08_0411__en-us_topic_0000001262815694_li1511420343241"><span>Insert the following data into the source Kafka topic:</span><p><pre class="screen" id="dli_08_0411__en-us_topic_0000001262815694_screen107391221112410">202103251505050001,qqShop,2021-03-25 15:05:05,500.00,400.00,2021-03-25 15:10:00,0003,Cindy,330108
202103241606060001,appShop,2021-03-24 16:06:06,200.00,180.00,2021-03-24 16:10:06,0001,Alice,330106</pre>
</p></li><li id="dli_08_0411__en-us_topic_0000001262815694_li4353143193117"><span>Read data from the sink Kafka topic. The result is as follows:</span><p><pre class="screen" id="dli_08_0411__en-us_topic_0000001262815694_screen14251955184812">202103251505050001,qqShop,"2021-03-25 15:05:05",500.0,400.0,"2021-03-25 15:10:00",0003,Cindy,330108
202103241606060001,appShop,"2021-03-24 16:06:06",200.0,180.0,"2021-03-24 16:10:06",0001,Alice,330106</pre>
</p></li></ol>
</div>
</div>
<div>
<div class="familylinks">
<div class="parentlink"><strong>Parent topic:</strong> <a href="dli_08_0407.html">Format</a></div>
</div>
</div>