2023-05-25T14:16:51,699 INFO [pool-8-thread-7]: HiveMetaStore.audit (HiveMetaStore.java:logAuditEvent(327)) - ugi=hive ip=10.20.20.80 cmd=source:10.20.20.80 get_config_value: name=metastore.batch.retrieve.max defaultValue=50 2023-05-25T14:17:04,390 WARN [PartitionDiscoveryTask-4]: utils.RetryUtilities$ExponentiallyDecayingBatchWork (RetryUtilities.java:run(93)) - Exception thrown while processing using a batch size 3000 org.apache.hadoop.hive.metastore.api.MetastoreException: org.apache.thrift.transport.TTransportException: java.net.SocketTimeoutException: Read timed out at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:393) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:358) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.utils.RetryUtilities$ExponentiallyDecayingBatchWork.run(RetryUtilities.java:91) [hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck.createPartitionsInBatches(Msck.java:396) [hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:207) [hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.PartitionManagementTask$MsckThread.run(PartitionManagementTask.java:216) [hive-exec-3.1.2.jar:3.1.2] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_352] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_352] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_352] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_352] at java.lang.Thread.run(Thread.java:750) [?:1.8.0_352] Caused by: org.apache.thrift.transport.TTransportException: java.net.SocketTimeoutException: Read timed out at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:129) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_add_partitions_req(ThriftHiveMetastore.java:2488) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.add_partitions_req(ThriftHiveMetastore.java:2475) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.add_partitions(HiveMetaStoreClient.java:730) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:386) ~[hive-exec-3.1.2.jar:3.1.2] ... 10 more Caused by: java.net.SocketTimeoutException: Read timed out at java.net.SocketInputStream.socketRead0(Native Method) ~[?:1.8.0_352] at java.net.SocketInputStream.socketRead(SocketInputStream.java:116) ~[?:1.8.0_352] at java.net.SocketInputStream.read(SocketInputStream.java:171) ~[?:1.8.0_352] at java.net.SocketInputStream.read(SocketInputStream.java:141) ~[?:1.8.0_352] at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) ~[?:1.8.0_352] at java.io.BufferedInputStream.read1(BufferedInputStream.java:286) ~[?:1.8.0_352] at java.io.BufferedInputStream.read(BufferedInputStream.java:345) ~[?:1.8.0_352] at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_add_partitions_req(ThriftHiveMetastore.java:2488) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.add_partitions_req(ThriftHiveMetastore.java:2475) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.add_partitions(HiveMetaStoreClient.java:730) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:386) ~[hive-exec-3.1.2.jar:3.1.2] ... 10 more 2023-05-25T14:17:06,986 INFO [pool-6-thread-7]: txn.AcidOpenTxnsCounterService (AcidOpenTxnsCounterService.java:run(51)) - AcidOpenTxnsCounterService ran for 0 seconds. isAliveCounter = 2134 2023-05-25T14:17:32,987 INFO [pool-6-thread-2]: txn.TxnHandler (TxnHandler.java:performWriteSetGC(1564)) - Deleted 0 obsolete rows from WRTIE_SET
这个日志让我非常困惑,因为在hiveserver.log中也几乎存在一样的日志:
2023-05-25T15:15:11,805 INFO [04ecba5d-d30c-49f3-8976-c3bbaed2b240 HiveServer2-Handler-Pool: Thread-132]: conf.HiveConf (HiveConf.java:getLogIdVar(5049)) - Using the default value passed in for log id: 04ecba5d-d30c-49f3-8976-c3bbaed2b240 2023-05-25T15:15:11,805 INFO [04ecba5d-d30c-49f3-8976-c3bbaed2b240 HiveServer2-Handler-Pool: Thread-132]: session.SessionState (:()) - Resetting thread name to HiveServer2-Handler-Pool: Thread-132 2023-05-25T15:15:20,095 WARN [HiveServer2-Background-Pool: Thread-138]: utils.RetryUtilities$ExponentiallyDecayingBatchWork (:()) - Exception thrown while processing using a batch size 750 org.apache.hadoop.hive.metastore.api.MetastoreException: org.apache.thrift.transport.TTransportException: java.net.SocketTimeoutException: Read timed out at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:393) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:358) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.utils.RetryUtilities$ExponentiallyDecayingBatchWork.run(RetryUtilities.java:91) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck.createPartitionsInBatches(Msck.java:396) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:207) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.DDLTask.msck(DDLTask.java:2128) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:446) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:205) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:97) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2664) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:2335) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:2011) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1709) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1703) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:157) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:224) ~[hive-service-3.1.2.jar:3.1.2] at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87) ~[hive-service-3.1.2.jar:3.1.2] at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:316) ~[hive-service-3.1.2.jar:3.1.2] at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_352] at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_352] at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878) ~[hadoop-common-3.3.4.jar:?] at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:329) ~[hive-service-3.1.2.jar:3.1.2] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_352] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_352] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_352] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_352] at java.lang.Thread.run(Thread.java:750) [?:1.8.0_352] Caused by: org.apache.thrift.transport.TTransportException: java.net.SocketTimeoutException: Read timed out at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:129) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_add_partitions_req(ThriftHiveMetastore.java:2488) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.add_partitions_req(ThriftHiveMetastore.java:2475) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.add_partitions(HiveMetaStoreClient.java:730) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:386) ~[hive-exec-3.1.2.jar:3.1.2] ... 26 more Caused by: java.net.SocketTimeoutException: Read timed out at java.net.SocketInputStream.socketRead0(Native Method) ~[?:1.8.0_352] at java.net.SocketInputStream.socketRead(SocketInputStream.java:116) ~[?:1.8.0_352] at java.net.SocketInputStream.read(SocketInputStream.java:171) ~[?:1.8.0_352] at java.net.SocketInputStream.read(SocketInputStream.java:141) ~[?:1.8.0_352] at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) ~[?:1.8.0_352] at java.io.BufferedInputStream.read1(BufferedInputStream.java:286) ~[?:1.8.0_352] at java.io.BufferedInputStream.read(BufferedInputStream.java:345) ~[?:1.8.0_352] at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_add_partitions_req(ThriftHiveMetastore.java:2488) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.add_partitions_req(ThriftHiveMetastore.java:2475) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.add_partitions(HiveMetaStoreClient.java:730) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.metastore.Msck$1.execute(Msck.java:386) ~[hive-exec-3.1.2.jar:3.1.2] ... 26 more 2023-05-25T15:15:46,807 INFO [HiveServer2-Handler-Pool: Thread-132]: conf.HiveConf (HiveConf.java:getLogIdVar(5049)) - Using the default value passed in for log id: 04ecba5d-d30c-49f3-8976-c3bbaed2b240
if (isExternal()) { tbl.setProperty("EXTERNAL", "TRUE"); tbl.setTableType(TableType.EXTERNAL_TABLE); // only add if user have not explicit set it (user explicitly disabled for example in which case don't flip it) if (tbl.getProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY) == null) { // partition discovery is on by default if undefined tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); } }
// Partition management task params PARTITION_MANAGEMENT_TASK_FREQUENCY("metastore.partition.management.task.frequency", "metastore.partition.management.task.frequency", 300, TimeUnit.SECONDS, "Frequency at which timer task runs to do automatic partition management for tables\n" + "with table property 'discover.partitions'='true'. Partition management include 2 pieces. One is partition\n" + "discovery and other is partition retention period. When 'discover.partitions'='true' is set, partition\n" + "management will look for partitions in table location and add partitions objects for it in metastore.\n" + "Similarly if partition object exists in metastore and partition location does not exist, partition object\n" + "will be dropped. The second piece in partition management is retention period. When 'discover.partition'\n" + "is set to true and if 'partition.retention.period' table property is defined, partitions that are older\n" + "than the specified retention period will be automatically dropped from metastore along with the data."),
if (acquireLock && lockRequired && table.getParameters() != null && MetaStoreServerUtils.isTransactionalTable(table.getParameters())) { // Running MSCK from beeline/cli will make DDL task acquire X lock when repair is enabled, since we are directly // invoking msck.repair() without SQL statement, we need to do the same and acquire X lock (repair is default) LockRequest lockRequest = createLockRequest(msckInfo.getDbName(), msckInfo.getTableName()); txnId = lockRequest.getTxnid(); try { LockResponse res = getMsc().lock(lockRequest); if (res.getState() != LockState.ACQUIRED) { throw new MetastoreException("Unable to acquire lock(X) on " + qualifiedTableName); } lockId = res.getLockid(); } catch (TException e) { throw new MetastoreException("Unable to acquire lock(X) on " + qualifiedTableName, e); } LOG.info("Acquired lock(X) on {}. LockId: {}", qualifiedTableName, lockId); }
具体的locK实现在:
private LockRequest createLockRequest(final String dbName, final String tableName) throws TException { UserGroupInformation loggedInUser = null; String username; try { loggedInUser = UserGroupInformation.getLoginUser(); } catch (IOException e) { LOG.warn("Unable to get logged in user via UGI. err: {}", e.getMessage()); } if (loggedInUser == null) { username = System.getProperty("user.name"); } else { username = loggedInUser.getShortUserName(); } long txnId = getMsc().openTxn(username); String agentInfo = Thread.currentThread().getName(); LockRequestBuilder requestBuilder = new LockRequestBuilder(agentInfo); requestBuilder.setUser(username); requestBuilder.setTransactionId(txnId);
LockComponentBuilder lockCompBuilder = new LockComponentBuilder() .setDbName(dbName) .setTableName(tableName) .setIsTransactional(true) .setExclusive() // WriteType is DDL_EXCLUSIVE for MSCK REPAIR so we need NO_TXN. Refer AcidUtils.makeLockComponents .setOperationType(DataOperationType.NO_TXN); requestBuilder.addLockComponent(lockCompBuilder.build());