forked from docs/doc-exports
Reviewed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com> Co-authored-by: Yang, Tong <yangtong2@huawei.com> Co-committed-by: Yang, Tong <yangtong2@huawei.com>
133 lines
13 KiB
HTML
133 lines
13 KiB
HTML
<a name="mrs_01_1072"></a><a name="mrs_01_1072"></a>
|
|
|
|
<h1 class="topictitle1">Connecting Flume with Hive in Security Mode</h1>
|
|
<div id="body1590374796582"><div class="section" id="mrs_01_1072__section118041730183910"><h4 class="sectiontitle">Scenario</h4><p id="mrs_01_1072__p9709853203911">This section describes how to use Flume to connect to Hive (version 3.1.0) in the cluster.</p>
|
|
<p id="mrs_01_1072__p12343995610">This section applies to MRS 3.<em id="mrs_01_1072__i203959839142438">x</em> or later.</p>
|
|
</div>
|
|
<div class="section" id="mrs_01_1072__section6843813416"><h4 class="sectiontitle">Prerequisites</h4><p id="mrs_01_1072__p4841789347">Flume and Hive have been correctly installed in the cluster. The services are running properly, and no alarm is reported.</p>
|
|
</div>
|
|
<div class="section" id="mrs_01_1072__section323012579385"><h4 class="sectiontitle">Procedure</h4><ol id="mrs_01_1072__ol233532210497"><li id="mrs_01_1072__li1733512219493"><span>Import the following JAR packages to the lib directory (client/server) of the Flume instance to be tested as user <strong id="mrs_01_1072__b589117921510">omm</strong>:</span><p><ul id="mrs_01_1072__ul0112161612159"><li id="mrs_01_1072__li16112181619156">antlr-2.7.7.jar</li><li id="mrs_01_1072__li1112116111520">antlr-runtime-3.4.jar</li><li id="mrs_01_1072__li13112616141512">calcite-core-1.16.0.jar</li><li id="mrs_01_1072__li14112101617157">hadoop-mapreduce-client-core-3.1.1.jar</li><li id="mrs_01_1072__li1311241616158">hive-beeline-3.1.0.jar</li><li id="mrs_01_1072__li111210161152">hive-cli-3.1.0.jar</li><li id="mrs_01_1072__li10112816101512">hive-common-3.1.0.jar</li><li id="mrs_01_1072__li71121916161516">hive-exec-3.1.0.jar</li><li id="mrs_01_1072__li7113516141518">hive-hcatalog-core-3.1.0.jar</li><li id="mrs_01_1072__li1811361631516">hive-hcatalog-pig-adapter-3.1.0.jar</li><li id="mrs_01_1072__li0113191691510">hive-hcatalog-server-extensions-3.1.0.jar</li><li id="mrs_01_1072__li1611341617153">hive-hcatalog-streaming-3.1.0.jar</li><li id="mrs_01_1072__li18113616161518">hive-metastore-3.1.0.jar</li><li id="mrs_01_1072__li811311612158">hive-service-3.1.0.jar</li><li id="mrs_01_1072__li111131716181510">libfb303-0.9.3.jar</li><li id="mrs_01_1072__li1611331610157">hadoop-plugins-1.0.jar</li></ul>
|
|
<p id="mrs_01_1072__p1220119488420">You can obtain the JAR package from the Hive installation directory and restart the Flume process to ensure that the JAR package is loaded to the running environment.</p>
|
|
</p></li><li id="mrs_01_1072__li17992112916476"><span>Set Hive configuration items.</span><p><p id="mrs_01_1072__p1938315410473">On FusionInsight Manager, choose <strong id="mrs_01_1072__b1656215425282">Cluster</strong> > <em id="mrs_01_1072__i11642176442438">Name of the desired cluster</em> > <strong id="mrs_01_1072__b3914747152819">Services</strong> > <strong id="mrs_01_1072__b84281449192811">Hive</strong> > <strong id="mrs_01_1072__b83461852172811">Configurations</strong> > <strong id="mrs_01_1072__b19154123212297">All Configurations</strong> > <strong id="mrs_01_1072__b1672513452911">HiveServer</strong> > <strong id="mrs_01_1072__b99703372299">Customization</strong> > <strong id="mrs_01_1072__b329054011296">hive.server.customized.configs</strong>.</p>
|
|
<p id="mrs_01_1072__p1753434605110">Example configurations:</p>
|
|
|
|
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="mrs_01_1072__table205761017182" frame="border" border="1" rules="all"><thead align="left"><tr id="mrs_01_1072__row16577506188"><th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.3.2.2.2.3.1.3.1.1"><p id="mrs_01_1072__p657712011814">Name</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.3.2.2.2.3.1.3.1.2"><p id="mrs_01_1072__p95779016184">Value</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="mrs_01_1072__row15771005181"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.1 "><p id="mrs_01_1072__p10999118192">hive.support.concurrency</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.2 "><p id="mrs_01_1072__p22074314914">true</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_1072__row357780101818"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.1 "><p id="mrs_01_1072__p139781553121912">hive.exec.dynamic.partition.mode</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.2 "><p id="mrs_01_1072__p1520164314913">nonstrict</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_1072__row257714017188"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.1 "><p id="mrs_01_1072__p99791353111919">hive.txn.manager</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.2 "><p id="mrs_01_1072__p620043497">org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_1072__row816712071815"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.1 "><p id="mrs_01_1072__p9980195341919">hive.compactor.initiator.on</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.2 "><p id="mrs_01_1072__p32014431495">true</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_1072__row8274543141911"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.1 "><p id="mrs_01_1072__p898035317193">hive.compactor.worker.threads</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.3.2.2.2.3.1.3.1.2 "><p id="mrs_01_1072__p727517433195">1</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</p></li><li id="mrs_01_1072__li4650154320255"><span>Prepare the system user <strong id="mrs_01_1072__b63116953942438">flume_hive</strong> who has the supergroup and Hive permissions, install the client, and create the required Hive table.</span><p><div class="p" id="mrs_01_1072__p18637144417254">Example:<ol type="a" id="mrs_01_1072__ol16976145615014"><li id="mrs_01_1072__li1897695619016">The cluster client has been correctly installed. For example, the installation directory is <strong id="mrs_01_1072__b123480435542438">/opt/client</strong>.</li><li id="mrs_01_1072__li109763561602">Run the following command to authenticate the user:<p id="mrs_01_1072__p16648203110318"><a name="mrs_01_1072__li109763561602"></a><a name="li109763561602"></a><strong id="mrs_01_1072__b1762672413317">cd /opt/client</strong></p>
|
|
<p id="mrs_01_1072__p38501534143110"><strong id="mrs_01_1072__b19640142453310">source bigdata_env</strong></p>
|
|
<p id="mrs_01_1072__p7192113314317"><strong id="mrs_01_1072__b141031418165214">kinit flume_hive</strong></p>
|
|
</li><li id="mrs_01_1072__li1997615614017">Run the <strong id="mrs_01_1072__b3152101425214">beeline</strong> command and run the following table creation statement:<pre class="screen" id="mrs_01_1072__screen134516138486">create table flume_multi_type_part(id string, msg string)
|
|
partitioned by (country string, year_month string, day string)
|
|
clustered by (id) into 5 buckets
|
|
stored as orc TBLPROPERTIES('transactional'='true');</pre>
|
|
</li><li id="mrs_01_1072__li1485019520332">Run the <strong id="mrs_01_1072__b28597699942438">select * from</strong> <em id="mrs_01_1072__i212905276842438">Table name</em><strong id="mrs_01_1072__b1010922702314">;</strong> command to query data in the table.<p id="mrs_01_1072__p12310155303310">In this case, the number of data records in the table is <strong id="mrs_01_1072__b81689822942438">0</strong>.</p>
|
|
</li></ol>
|
|
</div>
|
|
</p></li><li id="mrs_01_1072__li6423111201"><span>Prepare related configuration files. Assume that the client installation package is stored in <strong id="mrs_01_1072__b91846479842438">/opt/FusionInsight_Cluster_1_Services_ClientConfig</strong>.</span><p><ol type="a" id="mrs_01_1072__ol158624331917"><li id="mrs_01_1072__li686218331417">Obtain the following files from the $<em id="mrs_01_1072__i71000670942438">Client decompression directory</em><strong id="mrs_01_1072__b134062243242438">/Hive/config</strong> directory:<ul id="mrs_01_1072__ul1044845518345"><li id="mrs_01_1072__li19448955193412">hivemetastore-site.xml</li><li id="mrs_01_1072__li16448165511343">hive-site.xml</li></ul>
|
|
</li><li id="mrs_01_1072__li188633334119">Obtain the following files from the <strong id="mrs_01_1072__b2065313476391">$</strong><em id="mrs_01_1072__i173854258642438">Client decompression directory</em><strong id="mrs_01_1072__b139996416542438">/HDFS/config</strong> directory:<p id="mrs_01_1072__p1693615316245">core-site.xml</p>
|
|
</li><li id="mrs_01_1072__li031247133718">Create a directory on the host where the Flume instance is started and save the prepared files to the created directory.<p id="mrs_01_1072__p1031515718376"><a name="mrs_01_1072__li031247133718"></a><a name="li031247133718"></a>Example: <span class="filepath" id="mrs_01_1072__filepath153158753713"><b>/opt/hivesink-conf/hive-site.xml</b></span>.</p>
|
|
</li><li id="mrs_01_1072__li483763119384">Copy all property configurations in the <span class="parmname" id="mrs_01_1072__parmname20838103117385"><b>hivemetastore-site.xml</b></span> file to the <span class="filepath" id="mrs_01_1072__filepath1783823111382"><b>hive-site.xml</b></span> file and ensure that the configurations are placed before the original configurations.<p id="mrs_01_1072__p1114710321388">Data is loaded in sequence in Hive.</p>
|
|
<div class="note" id="mrs_01_1072__note13426171654014"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><p id="mrs_01_1072__p1942641615402">Ensure that the Flume running user <strong id="mrs_01_1072__b106871424134018">omm</strong> has the read and write permissions on the directory where the configuration file is stored.</p>
|
|
</div></div>
|
|
</li></ol>
|
|
</p></li><li id="mrs_01_1072__li1267133920497"><span>Observe the result.</span><p><p id="mrs_01_1072__p1130611382244">On the Hive client, run the <strong id="mrs_01_1072__b95438620142438">select * from</strong> <em id="mrs_01_1072__i56715251042438">Table name</em><strong id="mrs_01_1072__b377885713517">;</strong> command. Check whether the corresponding data has been written to the Hive table.</p>
|
|
</p></li></ol>
|
|
</div>
|
|
<div class="section" id="mrs_01_1072__section185081644133817"><h4 class="sectiontitle">Examples</h4><div class="p" id="mrs_01_1072__p3508124493820">Flume configuration example (SpoolDir--Mem--Hive):<pre class="screen" id="mrs_01_1072__screen2508164413383">server.sources = spool_source
|
|
server.channels = mem_channel
|
|
server.sinks = Hive_Sink
|
|
|
|
#config the source
|
|
server.sources.spool_source.type = spooldir
|
|
server.sources.spool_source.spoolDir = /tmp/testflume
|
|
server.sources.spool_source.montime =
|
|
server.sources.spool_source.fileSuffix =.COMPLETED
|
|
server.sources.spool_source.deletePolicy = never
|
|
server.sources.spool_source.trackerDir =.flumespool
|
|
server.sources.spool_source.ignorePattern = ^$
|
|
server.sources.spool_source.batchSize = 20
|
|
server.sources.spool_source.inputCharset =UTF-8
|
|
server.sources.spool_source.selector.type = replicating
|
|
server.sources.spool_source.fileHeader = false
|
|
server.sources.spool_source.fileHeaderKey = file
|
|
server.sources.spool_source.basenameHeaderKey= basename
|
|
server.sources.spool_source.deserializer = LINE
|
|
server.sources.spool_source.deserializer.maxBatchLine= 1
|
|
server.sources.spool_source.deserializer.maxLineLength= 2048
|
|
server.sources.spool_source.channels = mem_channel
|
|
|
|
#config the channel
|
|
server.channels.mem_channel.type = memory
|
|
server.channels.mem_channel.capacity =10000
|
|
server.channels.mem_channel.transactionCapacity= 2000
|
|
server.channels.mem_channel.channelfullcount= 10
|
|
server.channels.mem_channel.keep-alive = 3
|
|
server.channels.mem_channel.byteCapacity =
|
|
server.channels.mem_channel.byteCapacityBufferPercentage= 20
|
|
|
|
#config the sink
|
|
server.sinks.Hive_Sink.type = hive
|
|
server.sinks.Hive_Sink.channel = mem_channel
|
|
server.sinks.Hive_Sink.hive.metastore = thrift://${any MetaStore service IP address}:21088
|
|
server.sinks.Hive_Sink.hive.hiveSite = /opt/hivesink-conf/hive-site.xml
|
|
server.sinks.Hive_Sink.hive.coreSite = /opt/hivesink-conf/core-site.xml
|
|
server.sinks.Hive_Sink.hive.metastoreSite = /opt/hivesink-conf/hivemeatastore-site.xml
|
|
server.sinks.Hive_Sink.hive.database = default
|
|
server.sinks.Hive_Sink.hive.table = flume_multi_type_part
|
|
server.sinks.Hive_Sink.hive.partition = Tag,%Y-%m,%d
|
|
server.sinks.Hive_Sink.hive.txnsPerBatchAsk= 100
|
|
server.sinks.Hive_Sink.hive.autoCreatePartitions= true
|
|
server.sinks.Hive_Sink.useLocalTimeStamp = true
|
|
server.sinks.Hive_Sink.batchSize = 1000
|
|
server.sinks.Hive_Sink.hive.kerberosPrincipal= super1
|
|
server.sinks.Hive_Sink.hive.kerberosKeytab= /opt/mykeytab/user.keytab
|
|
server.sinks.Hive_Sink.round = true
|
|
server.sinks.Hive_Sink.roundValue = 10
|
|
server.sinks.Hive_Sink.roundUnit = minute
|
|
server.sinks.Hive_Sink.serializer = DELIMITED
|
|
server.sinks.Hive_Sink.serializer.delimiter= ";"
|
|
server.sinks.Hive_Sink.serializer.serdeSeparator= ';'
|
|
server.sinks.Hive_Sink.serializer.fieldnames= id,msg
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div>
|
|
<div class="familylinks">
|
|
<div class="parentlink"><strong>Parent topic:</strong> <a href="mrs_01_0390.html">Using Flume</a></div>
|
|
</div>
|
|
</div>
|
|
|