2 # Cookbook Name:: hadoop
5 # Copyright 2013, whitestar
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
20 ::Chef::Recipe.send(:include, Commons::PackageUtils)
22 ::Chef::Recipe.send(:include, Hadoop::Helper)
24 install_root = '/usr/lib/hadoop'
25 conf_dir = '/etc/hadoop/conf'
40 'hdfs-secondarynamenode',
42 'yarn-resourcemanager',
44 'mapreduce-historyserver'
48 'capacity-scheduler.xml',
53 'hadoop-metrics2.properties',
59 'mapred-queue-acls.xml',
73 'hadoop-policy.xml', # missing!?
74 'mapred-queue-acls.xml',
78 # TODO: -> hdp_httpfs recipe. (hadoop-httpfs rpm package)
80 #'httpfs-log4j.properties',
81 #'httpfs-signature.secret',
88 version, major_version, middle_version \
89 = parse_hdp_version_number(node['hdp']['version'])
90 equivalent_apache_hadoop_middle_version \
91 = get_equivalent_apache_hadoop_version_of('hdp', node['hdp']['version'])
93 users = get_users(major_version)
94 # do not add any hadoop accounts by Chef.
99 if middle_version >= '1.3' then
100 daemons.push('historyserver')
102 conf_files = conf_files_v1
105 conf_files = conf_files_v2
108 daemons.each {|daemon|
109 package "hadoop-#{daemon}" do
110 action node['hadoop']['packages']['action']
113 service "hadoop-#{daemon}" do
114 action node['hadoop']['services']['action']
118 if major_version == '1' then
124 package "hadoop-#{native_lib}" do
125 action node['hadoop']['packages']['action']
129 # do nothing in the ver. 2
130 # native -> hadoop package
131 # pipes -> hadoop package
134 if node['hadoop']['with_hadoop_lzo'] then
139 package "hadoop-#{lzo_pkg}" do
140 action node['hadoop']['packages']['action']
145 if node['hadoop']['io.compression.codec.bzip2.library'] == 'system-native' \
146 && equivalent_apache_hadoop_middle_version >= '2.1' then
147 package get_libbz2_pkg_name do
152 link node['hadoop']['HADOOP_PREFIX'] do
154 action [:delete, :create]
157 active_vol_nums = setup_directories(major_version)
158 log "This node active volumes: #{active_vol_nums}"
160 alt_conf_path = node['hadoop']['hdp']['alternatives']['hadoop-conf']['path']
162 = node['hadoop']['hdp']['alternatives']['hadoop-conf']['priority']
163 bash 'alternatives_hadoop-conf' do
165 cp -R /etc/hadoop/conf.empty #{alt_conf_path}
166 alternatives --install #{conf_dir} hadoop-conf #{alt_conf_path} #{alt_conf_priority}
168 creates alt_conf_path
172 :active_vol_nums => active_vol_nums
174 conf_template(conf_dir, equivalent_apache_hadoop_middle_version, conf_files, tpl_vars)
177 if node['hadoop']['with_security'] then
178 directory node['hadoop']['this.keytab.dir'] do
188 # task-controller, jsvc
189 package 'hadoop-sbin' do
190 action node['hadoop']['packages']['action']
193 file "#{install_root}/bin/task-controller" do
195 group users[:mapred][:name]
196 mode '6050' # '4750' by the distribution?!
199 template "#{conf_dir}/taskcontroller.cfg" do
200 source "conf-#{equivalent_apache_hadoop_middle_version}/taskcontroller.cfg"
203 mode '0400' # '0644' by the distribution?!
205 :active_vol_nums => active_vol_nums
209 # jsvc installation is not necessary.
210 # hadoop-hdfs requires bigtop-jsvc package.
211 template "/etc/default/hadoop-hdfs-datanode" do
212 source "etc-#{equivalent_apache_hadoop_middle_version}/default/hadoop-hdfs-datanode"
218 file "/usr/lib/hadoop-yarn/bin/container-executor" do
220 group users[:yarn][:name]
224 template "#{conf_dir}/container-executor.cfg" do
225 source "etc-#{equivalent_apache_hadoop_middle_version}/hadoop/container-executor.cfg"
230 :active_vol_nums => active_vol_nums
234 if version >= '2.0.0.2' then
235 setup_cgroup(equivalent_apache_hadoop_middle_version)
240 # Note: update the following template (hadoop_pseudo_distributed_init.sh)
241 # if this section will be modified.
246 You must initialize HDFS in the first installation:
247 $ sudo -u hdfs hadoop namenode -format
248 $ sudo service hadoop-namenode start
249 $ sudo service hadoop-datanode start
250 $ sudo -u hdfs hadoop fs -chown hdfs:hdfs /
251 $ sudo -u hdfs hadoop fs -chmod 755 /
252 $ sudo -u hdfs hadoop fs -mkdir /user
253 $ sudo -u hdfs hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
254 $ sudo -u hdfs hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
257 examples_jar = '/usr/lib/hadoop/hadoop-examples.jar'
261 You must initialize HDFS in the first installation:
262 $ sudo -u hdfs hdfs namenode -format
263 $ sudo service hadoop-hdfs-namenode start
264 $ sudo service hadoop-hdfs-datanode start
265 $ sudo -u hdfs hadoop fs -chown hdfs:hdfs /
266 $ sudo -u hdfs hadoop fs -chmod 755 /
267 $ sudo -u hdfs hadoop fs -mkdir /user
268 $ sudo -u hdfs hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/log/yarn/nm
269 $ sudo -u hdfs hadoop fs -chown yarn:hadoop #{node['grid']['vol_root']}/0/var/log/yarn/nm
270 $ sudo -u hdfs hadoop fs -chmod 1777 #{node['grid']['vol_root']}/0/var/log/yarn/nm
271 $ sudo -u hdfs hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/lib/mapred/history
272 $ sudo -u hdfs hadoop fs -chown -R mapred:hadoop #{node['grid']['vol_root']}/0/var/lib/mapred
273 $ sudo -u hdfs hadoop fs -chmod -R 755 #{node['grid']['vol_root']}/0/var/lib/mapred
276 examples_jar = "/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-<version>.jar"
279 template "#{node['grid']['app_root']}/sbin/hadoop_pseudo_distributed_init.sh" do
280 source "grid/usr/sbin/hadoop_pseudo_distributed_init.hdp.sh"
285 :major_version => major_version,
289 if node['hadoop']['with_security'] then
292 Example MapReduce job execution:
293 $ sudo -u alice kinit
294 Password for alice@LOCALDOMAIN:
295 $ sudo -u alice hadoop jar #{examples_jar} pi \\
296 > -D mapreduce.job.acl-view-job=* -D mapreduce.job.acl-modify-job=alice 5 10
301 Example MapReduce job execution:
303 $ sudo -u hdfs hadoop fs -mkdir /user/alice
304 $ sudo -u hdfs hadoop fs -chown alice:alice /user/alice
305 $ sudo -u alice hadoop jar #{examples_jar} pi 5 10