Cannot write file into hadoop3.3.6

  Kiến thức lập trình

I’m trying to use docker compose to build a hadoop cluster on my macbook(m1, apple silicon chip). I have expose the port of 9000 on docker container, here is my java code below:

package cn.dioxide;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

public class HadoopTest {
    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://localhost:9000");
            conf.set("dfs.client.use.datanode.hostname", "true");

            FileSystem fs = FileSystem.get(conf);

            Path dirPath = new Path("/user/test");
            if (fs.exists(dirPath)) {
                fs.delete(dirPath, true);
            }
            fs.mkdirs(dirPath);

            Path filePath = new Path("/user/test/test.txt");
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(filePath, true)));
            writer.write("Hello Hadoop!");
            writer.close();

            System.out.println("content: ");
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath)));
            String line;
            while ((line = reader.readLine()) != null) {
                System.out.println(line);
            }
            reader.close();

            fs.delete(filePath, true);
            System.out.println("file has been deleted");

            fs.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

then the errors below occurred:

org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /user/test/test.txt could only be written to 0 of the 1 minReplication nodes. There are 3 datanode(s) running and 3 node(s) are excluded in this operation.
    at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:2350)
    at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.chooseTargetForNewBlock(FSDirWriteFileOp.java:294)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2989)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:912)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:595)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:621)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:589)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:573)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1227)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1094)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1017)
    at java.base/java.security.AccessController.doPrivileged(Native Method)
    at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:3048)

    at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1573)
    at org.apache.hadoop.ipc.Client.call(Client.java:1519)
    at org.apache.hadoop.ipc.Client.call(Client.java:1416)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:242)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:129)
    at jdk.proxy2/jdk.proxy2.$Proxy15.addBlock(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:530)
    at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
    at java.base/java.lang.reflect.Method.invoke(Method.java:580)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
    at jdk.proxy2/jdk.proxy2.$Proxy16.addBlock(Unknown Source)
    at org.apache.hadoop.hdfs.DFSOutputStream.addBlock(DFSOutputStream.java:1084)
    at org.apache.hadoop.hdfs.DataStreamer.locateFollowingBlock(DataStreamer.java:1898)
    at org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1700)
    at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:707)

I build a master image and a slave image with Dockerfile below:

Dockerfile-master and Dockerfile-slave

# Dockerfile-hadoop
FROM ubuntu:14.04

RUN apt-get update && apt-get install -y openssh-server wget iptables

COPY resources/jdk-11.0.23 /opt/jdk-11.0.23

ENV JAVA_HOME=/opt/jdk-11.0.23
ENV PATH=$JAVA_HOME/bin:$PATH

COPY resources/hadoop-3.3.6 /opt/hadoop-3.3.6
RUN mv /opt/hadoop-3.3.6 /opt/hadoop

RUN echo "export JAVA_HOME=$JAVA_HOME" >> /opt/hadoop/etc/hadoop/hadoop-env.sh

ENV HADOOP_HOME=/opt/hadoop
ENV PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

RUN ssh-keygen -t rsa -P '' -f /root/.ssh/id_rsa 
    && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys 
    && chmod 0600 /root/.ssh/authorized_keys 
    && echo "Host *n    StrictHostKeyChecking no" >> /etc/ssh/ssh_config

COPY config/core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
COPY config/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
COPY config/yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
COPY config/mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml

# if this file is master
COPY entrypoint/entrypoint-master.sh /entrypoint.sh
# if this file is slave
COPY entrypoint/entrypoint-slave.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

ENTRYPOINT ["/entrypoint.sh"]

and my entrypoint-master.sh shell script is below:

#!/bin/bash

iptables -F

service ssh start

echo "export PDSH_RCMD_TYPE=ssh" >> /etc/profile
source /etc/profile

export HADOOP_HOME=/opt/hadoop
export PATH=$HADOOP_HOME/bin:$PATH

if [ ! -d /hadoop/dfs/name ]; then
  hdfs namenode -format -force
fi

$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh

$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
$HADOOP_HOME/sbin/hadoop-daemon.sh start secondarynamenode
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
$HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager

$HADOOP_HOME/sbin/hadoop-daemon.sh start datanode

hdfs dfs -chmod 777 /

tail -f /dev/null

entrypoint-slave.sh:

#!/bin/bash

iptables -F

service ssh start

echo "export PDSH_RCMD_TYPE=ssh" >> /etc/profile
source /etc/profile

export HADOOP_HOME=/opt/hadoop
export PATH=$HADOOP_HOME/bin:$PATH

if [ ! -d /hadoop/dfs/name ]; then
  hdfs namenode -format -force
fi

$HADOOP_HOME/sbin/hadoop-daemon.sh start datanode
$HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager

tail -f /dev/null

And my hadoop yml configuration file is below:
core-site.xml

<?xml version="1.0"?>
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hadoop-master:9000/</value>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
    </property>

    <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/local/hadoop/hadoop/tmp</value>
    </property>

    <property>
        <name>hadoop.proxyuser.root.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.root.hosts</name>
        <value>*</value>
    </property>
</configuration>

hdfs-site.xml

<?xml version="1.0"?>
<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/root/hdfs/namenode</value>
        <description>NameNode directory for namespace and transaction logs storage.</description>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/root/hdfs/datanode</value>
        <description>DataNode directory</description>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>

    <property>
        <name>dfs.permissions.superusergroup</name>
        <value>supergroup</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
</configuration>

mapred-site.xml

<?xml version="1.0"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>hadoop-master:10020</value>
    </property>

    <property>
        <name>mapreduce.jobhistory.web.address</name>
        <value>hadoop-master:19888</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*</value>
    </property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<configuration>
    <!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hadoop-slave2</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>hadoop-slave2:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>hadoop-slave2:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>hadoop-slave2:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>hadoop-slave2:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>hadoop-slave2:8088</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>2048</value>
    </property>
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME</value>
    </property>
</configuration>

New contributor

CN Dioxide is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

Theme wordpress giá rẻ Theme wordpress giá rẻ Thiết kế website

LEAVE A COMMENT