OSDN Git Service

add hadoop 2.0.x recipe.
authorwhitestar <whitestar@gaea.test>
Sat, 20 Jul 2013 00:52:32 +0000 (09:52 +0900)
committerwhitestar <whitestar@gaea.test>
Sat, 20 Jul 2013 00:52:32 +0000 (09:52 +0900)
30 files changed:
cookbooks/hadoop/attributes/default.rb
cookbooks/hadoop/recipes/default.rb
cookbooks/hadoop/templates/default/etc-2.0/hadoop/capacity-scheduler.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/configuration.xsl [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/container-executor.cfg [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/core-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics2.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-policy.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hdfs-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.exclude [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.include [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-log4j.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-signature.secret [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/log4j.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-queues.xml.template [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml.template [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/slaves [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-client.xml.example [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-server.xml.example [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-site.xml [new file with mode: 0644]
roles/role-spaghetti.dot [moved from role-spaghetti.dot with 100% similarity]
roles/role-spaghetti.png [moved from role-spaghetti.png with 100% similarity]
roles/test-on-localhost.rb

index f3080b8..532f4df 100644 (file)
@@ -29,6 +29,11 @@ default['hadoop']['HADOOP_PID_DIR'] = "#{node['grid']['vol_root']}/0/var/run/${U
 default['hadoop']['HADOOP_LOG_DIR'] = "#{node['grid']['vol_root']}/0/var/log/${USER}"
 default['hadoop']['HADOOP_CLASSPATH'] = ''
 default['hadoop']['HADOOP_USER_CLASSPATH_FIRST'] = false
+if node['hadoop']['version'] >= '2.0.0' then
+  default['hadoop']['HADOOP_CONF_DIR'] = "#{node['hadoop']['HADOOP_PREFIX']}/etc/hadoop"
+else
+  default['hadoop']['HADOOP_CONF_DIR'] = "#{node['hadoop']['HADOOP_PREFIX']}/conf"
+end
 ### with_security
 default['hadoop']['HADOOP_SECURE_DN_USER'] = 'hdfs'
 default['hadoop']['HADOOP_SECURE_DN_PID_DIR'] = "#{node['grid']['vol_root']}/0/var/run/${HADOOP_SECURE_DN_USER}"
@@ -48,13 +53,22 @@ default['hadoop']['fs.checkpoint.dir'] = "#{node['grid']['vol_root']}/0/var/lib/
 ### with_security
 default['hadoop']['hadoop.security.authentication'] = 'kerberos'
 default['hadoop']['hadoop.security.authorization'] = 'true'
-default['hadoop']['hadoop.security.auth_to_local'] = '
+if node['hadoop']['version'] >= '2.0.0' then
+  default['hadoop']['hadoop.security.auth_to_local'] = '
             RULE:[2:$1@$0](.*@${this.realm})s/@.*//
             RULE:[1:$1@$0](.*@${this.realm})s/@.*//
-            RULE:[2:$1@$0](mapred@.*${this.realm})s/.*/mapred/
+            RULE:[2:$1@$0](hdfs@.*${this.realm})s/.*/hdfs/
             RULE:[2:$1@$0](yarn@.*${this.realm})s/.*/yarn/
+            RULE:[2:$1@$0](mapred@.*${this.realm})s/.*/mapred/
+            DEFAULT'
+else
+  default['hadoop']['hadoop.security.auth_to_local'] = '
+            RULE:[2:$1@$0](.*@${this.realm})s/@.*//
+            RULE:[1:$1@$0](.*@${this.realm})s/@.*//
             RULE:[2:$1@$0](hdfs@.*${this.realm})s/.*/hdfs/
+            RULE:[2:$1@$0](mapred@.*${this.realm})s/.*/mapred/
             DEFAULT'
+end
 default['hadoop']['hadoop.security.group.mapping'] = 'org.apache.hadoop.security.JniBasedUnixGroupsMapping'
 default['hadoop']['hadoop.security.groups.cache.secs'] = '14400'
 default['hadoop']['hadoop.kerberos.kinit.command'] = '/usr/bin/kinit'
@@ -78,8 +92,8 @@ default['hadoop']['hadoop.proxyuser'] = {
 default['hadoop']['this.secondary.namenode.fqdn'] = 'localhost'
 default['hadoop']['dfs.name.dir'] = "#{node['grid']['vol_root']}/0/var/lib/${user.name}/name"
 default['hadoop']['dfs.replication'] = '1'
-default['hadoop']['dfs.hosts'] = "#{node['grid']['app_root']}/hadoop/conf/hosts.include"
-default['hadoop']['dfs.hosts.exclude'] = "#{node['grid']['app_root']}/hadoop/conf/hosts.exclude"
+default['hadoop']['dfs.hosts'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.include"
+default['hadoop']['dfs.hosts.exclude'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.exclude"
 ### with_security
 default['hadoop']['dfs.cluster.administrators'] = ' hdfs'
 default['hadoop']['dfs.http.port'] = '50070'
@@ -118,8 +132,8 @@ default['hadoop']['mapred.job.tracker.persist.jobstatus.hours'] = '3'
 default['hadoop']['mapred.job.tracker.persist.jobstatus.dir'] = "#{node['grid']['vol_root']}/0/var/lib/mapred/jobstatus"
 default['hadoop']['hadoop.job.history.location'] = "file://#{node['grid']['vol_root']}/0/var/lib/mapred/history"
 default['hadoop']['mapred.job.tracker.history.completed.location'] = "hdfs://#{node['grid']['vol_root']}/0/var/lib/mapred/history/done"
-default['hadoop']['mapred.hosts'] = "#{node['grid']['app_root']}/hadoop/conf/hosts.include"
-default['hadoop']['mapred.hosts.exclude'] = "#{node['grid']['app_root']}/hadoop/conf/hosts.exclude"
+default['hadoop']['mapred.hosts'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.include"
+default['hadoop']['mapred.hosts.exclude'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.exclude"
 ### with_security
 default['hadoop']['mapred.acls.enabled'] = 'true'
 default['hadoop']['mapreduce.cluster.administrators'] = ' mapred'
@@ -180,7 +194,7 @@ default['hadoop']['YARN_HOME'] = '${HADOOP_PREFIX}'
 default['hadoop']['YARN_PID_DIR'] = "#{node['grid']['vol_root']}/0/var/run/${USER}"
 default['hadoop']['YARN_LOG_DIR'] = "#{node['grid']['vol_root']}/0/var/log/${USER}"
 default['hadoop']['HADOOP_MAPRED_HOME'] = '${HADOOP_PREFIX}'
-## mapred-env.sh
+## mapred-env.sh (available in the ver. 2.0.2-alpha and later.)
 default['hadoop']['HADOOP_MAPRED_PID_DIR'] = "#{node['grid']['vol_root']}/0/var/run/${USER}"
 default['hadoop']['HADOOP_MAPRED_LOG_DIR'] = "#{node['grid']['vol_root']}/0/var/log/${USER}"
 ## core-site.xml
@@ -197,11 +211,12 @@ default['hadoop']['this.resourcemanager.fqdn'] = 'localhost'
 default['hadoop']['yarn.log-aggregation-enable'] = 'true'
 default['hadoop']['yarn.resourcemanager.scheduler.class'] = 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler'
 default['hadoop']['yarn.nodemanager.resource.memory-mb'] = '8192'
+# cpu-cores is available in the ver. 2.0.3-alpha and later.
 default['hadoop']['yarn.nodemanager.resource.cpu-cores'] = '8'
 default['hadoop']['yarn.nodemanager.remote-app-log-dir'] = "#{node['grid']['vol_root']}/0/var/log/${user.name}/nm"
 default['hadoop']['yarn.nodemanager.aux-services'] = 'mapreduce.shuffle'
-default['hadoop']['yarn.resourcemanager.nodes.include-path'] = "#{node['grid']['app_root']}/hadoop/etc/hadoop/hosts.include"
-default['hadoop']['yarn.resourcemanager.nodes.exclude-path'] = "#{node['grid']['app_root']}/hadoop/etc/hadoop/hosts.exclude"
+default['hadoop']['yarn.resourcemanager.nodes.include-path'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.include"
+default['hadoop']['yarn.resourcemanager.nodes.exclude-path'] = "#{node['hadoop']['HADOOP_CONF_DIR']}/hosts.exclude"
 default['hadoop']['yarn.acl.enable'] = 'true'
 default['hadoop']['yarn.admin.acl'] = ' yarn,gridops'
 default['hadoop']['yarn.resourcemanager.principal'] = 'yarn/${this.resourcemanager.fqdn}@${this.realm}'
@@ -212,17 +227,27 @@ default['hadoop']['yarn.nodemanager.admin-env'] = 'MALLOC_ARENA_MAX=$MALLOC_AREN
 default['hadoop']['yarn.nodemanager.container-executor.class'] = 'org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor'
 default['hadoop']['yarn.nodemanager.linux-container-executor.group'] = 'yarn'
 default['hadoop']['yarn.nodemanager.linux-container-executor.resources-handler.class'] = 'org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler'
+# CgroupsLCEResourcesHandler is available in the ver. 2.0.3-alpha and later.
 #default['hadoop']['yarn.nodemanager.linux-container-executor.resources-handler.class'] = 'org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler'
 default['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'] = '/hadoop-yarn'
 default['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount'] = 'false'
 default['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount-path'] = ''
 ## mapred-site.xml
 default['hadoop']['mapreduce.framework.name'] = 'yarn'
-default['hadoop']['yarn.app.mapreduce.am.staging-dir'] = '/usr'
+default['hadoop']['yarn.app.mapreduce.am.staging-dir'] = '/user'
 default['hadoop']['mapreduce.jobhistory.intermediate-done-dir'] = "#{node['grid']['vol_root']}/0/var/lib/mapred/history/done_intermediate"
 default['hadoop']['mapreduce.jobhistory.done-dir'] = "#{node['grid']['vol_root']}/0/var/lib/mapred/history/done"
 default['hadoop']['mapreduce.jobhistory.principal'] = 'mapred/${this.jobhistory.fqdn}@${this.realm}'
 default['hadoop']['mapreduce.jobhistory.keytab'] = '${this.keytab.dir}/jh.keytab'
+## capacity-scheduler.xml
+default['hadoop']['yarn.scheduler.capacity.root.default.acl_submit_applications'] = '*'
+default['hadoop']['yarn.scheduler.capacity.root.default.acl_administer_queue'] = ' hadoop,gridops'
+## container-executor.cfg
+### e.g. CentOS: 500
+default['hadoop']['container-executor']['min.user.id'] = '1000'
+## hadoop-metrics2.properties
+default['hadoop']['metrics2']['resourcemanager.sink.ganglia.servers'] = ''
+default['hadoop']['metrics2']['nodemanager.sink.ganglia.servers'] = ''
 
 ## extra settings
 default['hadoop']['extra_configs'] = {
index a357fa0..c624dbc 100644 (file)
 
 require 'digest/sha2'
 
-users = {
+users = nil
+limits_files = nil
+conf_files = nil
+
+users_v1 = {
   :hadoop => {:name => 'hadoop', :uid => 10001},
   :hdfs   => {:name => 'hdfs',   :uid => 10002},
   :mapred => {:name => 'mapred', :uid => 10003}
 }
+users_v2 = users_v1.merge(
+  {:yarn => {:name => 'yarn', :uid => 10004}}
+)
 
-limits_files = [
+limits_files_v1 = [
   'hdfs.conf',
-  'yarn.conf',
   'mapreduce.conf'
 ]
+limits_files_v2 = limits_files_v1 + ['yarn.conf']
 
-conf_files = [
+conf_files_v1 = [
   'capacity-scheduler.xml',
   'configuration.xsl',
   'core-site.xml',
@@ -47,13 +54,14 @@ conf_files = [
   'mapred-site.xml',
   'masters',
   'slaves',
-  'ssl-client.xml.example',
-  'ssl-server.xml.example'
+  #'ssl-client.xml',
+  #'ssl-server.xml'
 ]
 
 conf_files_v2 = \
-  conf_files \
+  conf_files_v1 \
   - [
+    'fair-scheduler.xml',
     'mapred-queue-acls.xml',
     'masters'
   ] \
@@ -67,13 +75,16 @@ conf_files_v2 = \
     'yarn-site.xml'
   ] 
 
+# for ver. 1.0
 krb5_conf_files = [
   'krb5-strong.conf',
   'krb5-weak.conf'
 ]
 
 def conf_template(conf_dir, middle_version, conf_files, tpl_vars)
-  source_dir = middle_version == '2.0' ? "etc-#{middle_version}/hadoop" : "conf-#{middle_version}"
+  source_dir = (middle_version == '2.0') \
+    ? "etc-#{middle_version}/hadoop" \
+    : "conf-#{middle_version}"
 
   conf_files.each {|conf_file|
     template "#{conf_dir}/#{conf_file}" do
@@ -90,20 +101,35 @@ version = node['hadoop']['version']
 major_version = nil
 middle_version = nil
 
-if /^(\d+)\.(\d+)\.(\d+)\.?(\d*)$/ =~ version then
+if /^(\d+)\.(\d+)\.(\d+)\.?(\d*)[-\w]*$/ =~ version then
   major_version = $1
   middle_version = "#{$1}.#{$2}"
 else
   Chef::Application.fatal!("Invalid Hadoop version: #{version}")
 end
 
-if ! ('1.0.0' <= version \
-  && version < '1.2') then
+unless (('1.0.0' <= version && version < '1.2') \
+  || middle_version == '2.0') then
   Chef::Application.fatal!("Non supported version: #{version}")
 end
 
+hadoop_members = nil
+case major_version
+  when '1'
+    users = users_v1
+    limits_files = limits_files_v1
+    conf_files = conf_files_v1
+  when '2'
+    users = users_v2
+    limits_files = limits_files_v2
+    conf_files = conf_files_v2
+end
+
+hadoop_members = []
 users.each {|key, user|
   if key != :hadoop
+    hadoop_members.push(user[:name])
+
     group user[:name] do
       gid user[:uid]
       members []
@@ -125,7 +151,7 @@ users.each {|key, user|
 
 group users[:hadoop][:name] do
   gid users[:hadoop][:uid]
-  members ['hdfs', 'mapred']
+  members hadoop_members
   append true
   action :create
   not_if "getent group #{users[:hadoop][:name]}"
@@ -164,6 +190,32 @@ node['grid']['max_vol_nums'].to_i.times {|vol_num|
       end
     }
 
+    if major_version == '2' then
+      directory "#{target_vol_dir}/var/log/yarn" do
+        owner 'yarn'
+        group 'hadoop'
+        mode '0755'
+        action :create
+        recursive true
+      end
+
+      directory "#{target_vol_dir}/var/log/yarn/nm" do
+        owner 'yarn'
+        group 'hadoop'
+        mode '0755'
+        action :create
+        recursive true
+      end
+
+      directory "#{target_vol_dir}/var/log/yarn/nm/local" do
+        owner 'yarn'
+        group 'hadoop'
+        mode '0755'
+        action :create
+        recursive true
+      end
+    end
+
     directory "#{target_vol_dir}/tmp" do
       owner 'root'
       group 'root'
@@ -172,6 +224,16 @@ node['grid']['max_vol_nums'].to_i.times {|vol_num|
       recursive true
     end
 
+    if major_version == '2' then
+      directory "#{target_vol_dir}/mapred/tmp" do
+        owner 'root'
+        group 'root'
+        mode '1777'
+        action :create
+        recursive true
+      end
+    end
+
     if vol_num == 0 then
       directory "#{target_vol_dir}/var/run" do
         owner 'root'
@@ -198,7 +260,13 @@ log "This node active volumes: #{active_vol_nums}"
 
 file_cache_path = Chef::Config[:file_cache_path]
 install_root = "#{node['grid']['app_root']}/hadoop-#{version}"
-tarball = "hadoop-#{version}-bin.tar.gz"
+tarball = nil
+case major_version
+  when '1'
+    tarball = "hadoop-#{version}-bin.tar.gz"
+  when '2'
+    tarball = "hadoop-#{version}.tar.gz"
+end
 tarball_mds = "#{tarball}.mds"
 downloaded_tarball = "#{file_cache_path}/#{tarball}"
 downloaded_tarball_mds = "#{file_cache_path}/#{tarball_mds}"
@@ -247,6 +315,8 @@ hadoop-1.1.2-bin.tar.gz:   SHA1 = DCCC 01A0 4C42 587D 9DF1  83CA 7DC8 83F7 A6A4
   bash "install_hadoop-#{version}" do
     code <<-EOC
       tar xvzf #{downloaded_tarball} -C #{node['grid']['app_root']}
+      # for 2.0.x
+      chown -R root:root #{install_root}
     EOC
     creates install_root
   end
@@ -258,9 +328,6 @@ link node['hadoop']['HADOOP_PREFIX'] do
 end
 
 limits_files.each {|limits_file|
-  if limits_file == 'yarn.conf' && major_version.to_i < 2 then
-    next
-  end
   template "/etc/security/limits.d/#{limits_file}" do
     source "etc/security/limits.d/#{limits_file}"
     owner 'root'
@@ -269,7 +336,14 @@ limits_files.each {|limits_file|
   end
 }
 
-conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/conf"
+conf_dir = nil
+case major_version
+  when '1'
+    conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/conf"
+  when '2'
+    conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/etc/hadoop"
+end
+
 tpl_vars = {
   :active_vol_nums => active_vol_nums
 }
@@ -285,38 +359,61 @@ if node['hadoop']['with_security'] then
     recursive true
   end
   
-  file "#{node['grid']['app_root']}/hadoop-#{version}/bin/task-controller" do
-    owner 'root'
-    group 'mapred'
-    mode '6050'
-  end
-  
-  template "#{node['grid']['app_root']}/hadoop-#{version}/conf/taskcontroller.cfg" do
-    source "conf-#{middle_version}/taskcontroller.cfg"
-    owner 'root'
-    group 'root'
-    mode '0400'
-    variables({
-      :active_vol_nums => active_vol_nums
-    })
+  jsvc_pkg = nil
+  case node[:platform_family]
+    when 'debian'
+      jsvc_pkg = 'jsvc'
+    when 'rhel'
+      jsvc_pkg = 'jakarta-commons-daemon-jsvc'
   end
+      
+  case major_version
+    when '1'
+      if node[:kernel][:machine] != 'x86_64' then
+        package jsvc_pkg do
+          action :install
+        end
+      
+        link "#{install_root}/libexec/jsvc.i386" do
+          to '/usr/bin/jsvc'
+        end
+      end
   
-  if node[:kernel][:machine] != 'x86_64' then
-    jsvc_pkg = 'jsvc'
-    case node[:platform_family]
-      when 'debian'
-        jsvc_pkg = 'jsvc'
-      when 'rhel'
-        jsvc_pkg = 'jakarta-commons-daemon-jsvc'
-    end
-  
-    package jsvc_pkg do
-      action :install
-    end
-  
-    link "#{install_root}/libexec/jsvc.i386" do
-      to '/usr/bin/jsvc'
-    end
+      file "#{node['grid']['app_root']}/hadoop-#{version}/bin/task-controller" do
+        owner 'root'
+        group 'mapred'
+        mode '6050'
+      end
+      
+      template "#{node['grid']['app_root']}/hadoop-#{version}/conf/taskcontroller.cfg" do
+        source "conf-#{middle_version}/taskcontroller.cfg"
+        owner 'root'
+        group 'root'
+        mode '0400'
+        variables({
+          :active_vol_nums => active_vol_nums
+        })
+      end
+    when '2'
+      package jsvc_pkg do
+        action :install
+      end
+      
+      file "#{node['grid']['app_root']}/hadoop-#{version}/bin/container-executor" do
+        owner 'root'
+        group 'yarn'
+        mode '6050'
+      end
+      
+      template "#{node['grid']['app_root']}/hadoop-#{version}/etc/hadoop/container-executor.cfg" do
+        source "etc-#{middle_version}/hadoop/container-executor.cfg"
+        owner 'root'
+        group 'root'
+        mode '0400'
+        variables({
+          :active_vol_nums => active_vol_nums
+        })
+      end
   end
   
   if middle_version == '1.0' then
@@ -325,7 +422,9 @@ if node['hadoop']['with_security'] then
   end
 end
 
-log <<-EOM
+case major_version
+  when '1'
+    log <<-EOM
 Note:
 You must initialize HDFS in the first installation:
   $ cd #{node['grid']['app_root']}/hadoop
@@ -337,7 +436,30 @@ You must initialize HDFS in the first installation:
   $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
   $ sudo -u hdfs ./bin/hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
   $ sudo -u hdfs ./bin/hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
-EOM
+    EOM
+
+    examples_jar = "hadoop-examples-#{version}.jar"
+  when '2'
+    log <<-EOM
+Note:
+You must initialize HDFS in the first installation:
+  $ cd #{node['grid']['app_root']}/hadoop
+  $ sudo -u hdfs ./bin/hdfs namenode -format
+  $ sudo -u hdfs ./sbin/hadoop-daemon.sh start namenode
+  $ sudo -u hdfs ./sbin/hadoop-daemon.sh start datanode
+  $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
+  $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
+  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
+  $ sudo -u hdfs ./bin/hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/log/yarn/nm
+  $ sudo -u hdfs ./bin/hadoop fs -chown yarn:hadoop #{node['grid']['vol_root']}/0/var/log/yarn/nm
+  $ sudo -u hdfs ./bin/hadoop fs -chmod 1777        #{node['grid']['vol_root']}/0/var/log/yarn/nm
+  $ sudo -u hdfs ./bin/hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/lib/mapred/history
+  $ sudo -u hdfs ./bin/hadoop fs -chown -R mapred:hadoop #{node['grid']['vol_root']}/0/var/lib/mapred
+  $ sudo -u hdfs ./bin/hadoop fs -chmod -R 755           #{node['grid']['vol_root']}/0/var/lib/mapred
+    EOM
+
+    examples_jar = "share/hadoop/mapreduce/hadoop-mapreduce-examples-#{version}.jar"
+end
 
 if node['hadoop']['with_security'] then
   log <<-EOM
@@ -345,7 +467,7 @@ Note:
 Example MapReduce job execution:
   $ sudo -u alice kinit
   Password for alice@LOCALDOMAIN: 
-  $ sudo -u alice bin/hadoop jar hadoop-examples-#{version}.jar pi \\
+  $ sudo -u alice bin/hadoop jar #{examples_jar} pi \\
   > -D mapreduce.job.acl-view-job=* -D mapreduce.job.acl-modify-job=alice 5 10
   EOM
 else
@@ -355,7 +477,7 @@ Example MapReduce job execution:
   $ sudo adduser alice
   $ sudo -u hdfs ./bin/hadoop fs -mkdir /user/alice
   $ sudo -u hdfs ./bin/hadoop fs -chown alice:alice /user/alice
-  $ sudo -u alice ./bin/hadoop jar hadoop-examples-#{version}.jar pi 5 10
+  $ sudo -u alice ./bin/hadoop jar #{examples_jar} pi 5 10
   EOM
 end
 
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/capacity-scheduler.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/capacity-scheduler.xml
new file mode 100644 (file)
index 0000000..3657fe6
--- /dev/null
@@ -0,0 +1,111 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>10000</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.1</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.server.resourcemanager.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value>default</value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.default.acl_submit_applications'] %></value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.default.acl_administer_queue'] %></value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>-1</value>
+    <description>
+      Number of missed scheduling opportunities after which the CapacityScheduler 
+      attempts to schedule rack-local containers. 
+      Typically this should be set to number of racks in the cluster, this 
+      feature is disabled by default, set to -1.
+    </description>
+  </property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/configuration.xsl b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/configuration.xsl
new file mode 100644 (file)
index 0000000..d50d80b
--- /dev/null
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/container-executor.cfg b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/container-executor.cfg
new file mode 100644 (file)
index 0000000..3a8d874
--- /dev/null
@@ -0,0 +1,8 @@
+#yarn.nodemanager.local-dirs=/grid/vol/0/var/lib/yarn/nm/local
+#yarn.nodemanager.log-dirs=/grid/vol/0/var/log/yarn/nm
+yarn.nodemanager.linux-container-executor.group=yarn
+#comma separated list of users who can not run applications
+banned.users=hfds,yarn,mapred,bin
+#Prevent other super-users
+#min.user.id=1000    # default
+min.user.id=<%= node['hadoop']['container-executor']['min.user.id'] %>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/core-site.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/core-site.xml
new file mode 100644 (file)
index 0000000..52043fc
--- /dev/null
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.cluster.name</name>
+        <value><%= node['hadoop']['this.cluster.name'] %></value>
+        <!-- <value>pleiades</value> -->
+    </property>
+    <property>
+        <name>this.domain</name>
+        <value><%= node['hadoop']['this.domain'] %></value>
+        <!-- <value>grid.example.com</value> -->
+    </property>
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>this.realm</name>
+        <value><%= node['hadoop']['this.realm'] %></value>
+        <!-- <value>GRID.EXAMPLE.COM</value> -->
+    </property>
+    <property>
+        <name>this.keytab.dir</name>
+        <value><%= node['hadoop']['this.keytab.dir'] %></value>
+    </property>
+<% end -%>
+    <property>
+        <name>this.namenode.fqdn</name>
+        <value><%= node['hadoop']['this.namenode.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-nn.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>fs.defaultFS</name>
+        <value><%= node['hadoop']['fs.defaultFS'] %></value>
+    </property>
+    <property>
+        <name>hadoop.tmp.dir</name>
+        <value><%= node['hadoop']['hadoop.tmp.dir'] %></value>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>hadoop.security.authentication</name>
+        <value><%= node['hadoop']['hadoop.security.authentication'] %></value>
+        <description>
+            Set the authentication for the cluster. Valid values are: simple or
+            kerberos.
+        </description>
+    </property>
+    <property>
+        <name>hadoop.security.authorization</name>
+        <value><%= node['hadoop']['hadoop.security.authorization'] %></value>
+        <description>
+            Enable authorization for different protocols.
+        </description>
+    </property>
+    <property>
+        <name>hadoop.security.auth_to_local</name>
+        <value><%= node['hadoop']['hadoop.security.auth_to_local'] %></value>
+    </property>
+    <property>
+        <name>hadoop.security.group.mapping</name>
+        <value><%= node['hadoop']['hadoop.security.group.mapping'] %></value>
+    </property>
+    <property>
+        <name>hadoop.security.groups.cache.secs</name>
+        <value><%= node['hadoop']['hadoop.security.groups.cache.secs'] %></value>
+    </property>
+    <property>
+        <name>hadoop.kerberos.kinit.command</name>
+        <value><%= node['hadoop']['hadoop.kerberos.kinit.command'] %></value>
+    </property>
+
+    <property>
+        <name>hadoop.http.filter.initializers</name>
+        <value><%= node['hadoop']['hadoop.http.filter.initializers'] %></value>
+        <!-- <value>org.apache.hadoop.http.lib.StaticUserWebFilter</value> -->
+        <description>The name of a class that initializes an input filter for Jetty.
+            This filter will always return Dr.Who as the web user when the servlets
+            query for the authenticated user </description>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.signature.secret.file</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.signature.secret.file'] %></value>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.cookie.domain</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.cookie.domain'] %></value>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.type</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.type'] %></value>
+        <description>Defines authentication used for the HTTP web-consoles.
+            The supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#.
+            The dfeault value is simple.</description>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.kerberos.principal</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.kerberos.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value>
+            _HOST N/A!: v1.0, HDP1.2; OK: v2.0, CDH3, CDH4 -->
+    </property>
+    <property>
+        <name>hadoop.http.authentication.kerberos.keytab</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.kerberos.keytab'] %></value>
+    </property>
+
+  <%- node['hadoop']['hadoop.proxyuser'].each do |name, values| -%>
+    <property>
+        <name>hadoop.proxyuser.<%= name %>.hosts</name>
+        <value><%= values['hosts'] %></value>
+    </property>
+    <property>
+        <name>hadoop.proxyuser.<%= name %>.groups</name>
+        <value><%= values['groups'] %></value>
+    </property>
+  <%- end -%>
+<% end -%>
+<%
+this_file = 'core-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-env.sh b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-env.sh
new file mode 100644 (file)
index 0000000..b12c21a
--- /dev/null
@@ -0,0 +1,123 @@
+# Copyright 2011 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set Hadoop-specific environment variables here.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+# The directory where pid files are stored. /tmp by default.
+export HADOOP_PID_DIR=<%= node['hadoop']['HADOOP_PID_DIR'] %>
+# Where log files are stored. $HADOOP_PREFIX/logs by default.
+#export HADOOP_LOG_DIR=<%= node['hadoop']['HADOOP_LOG_DIR'] %>
+if [ x"$USER" = x'root' ]; then
+    export HADOOP_LOG_DIR=<%= File::dirname(node['hadoop']['HADOOP_LOG_DIR']) %>/hdfs
+else
+    export HADOOP_LOG_DIR=<%= node['hadoop']['HADOOP_LOG_DIR'] %>
+fi
+
+<% if node['hadoop']['with_security'] then -%>
+export HADOOP_SECURE_DN_USER=<%= node['hadoop']['HADOOP_SECURE_DN_USER'] %>
+# This property is N/A or overridden by the HADOOP_PID_DIR
+#export HADOOP_SECURE_DN_PID_DIR=/grid/vol/0/var/run/${HADOOP_SECURE_DN_USER}
+# This property is N/A or overridden by the HADOOP_LOG_DIR
+#export HADOOP_SECURE_DN_LOG_DIR=/grid/vol/0/var/log/${HADOOP_SECURE_DN_USER}
+export JSVC_HOME=<%= node['hadoop']['JSVC_HOME'] %>
+#export JSVC_HOME=/grid/usr/hadoop/sbin
+<% end -%>
+# Extra Java CLASSPATH elements.  Optional.
+if [ x"$HADOOP_CLASSPATH" = x ]; then
+    export HADOOP_CLASSPATH=<%= node['hadoop']['HADOOP_CLASSPATH'] %>
+    #export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:/grid/usr/commons-daemon-1.0.13/commons-daemon-1.0.13.jar
+else
+    # for Hive and HCatalog
+    export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:<%= node['hadoop']['HADOOP_CLASSPATH'] %>
+    #export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:/grid/usr/commons-daemon-1.0.13/commons-daemon-1.0.13.jar
+fi
+export HADOOP_USER_CLASSPATH_FIRST=<%= node['hadoop']['HADOOP_USER_CLASSPATH_FIRST'] %>
+
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.
+export JAVA_HOME=${JAVA_HOME}
+
+# The jsvc implementation to use. Jsvc is required to run secure datanodes.
+#export JSVC_HOME=${JSVC_HOME}
+
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
+
+# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
+for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
+  if [ "$HADOOP_CLASSPATH" ]; then
+    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
+  else
+    export HADOOP_CLASSPATH=$f
+  fi
+done
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+#export HADOOP_HEAPSIZE=
+#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
+
+# Extra Java runtime options.  Empty by default.
+export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true $HADOOP_CLIENT_OPTS"
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
+
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
+
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+export HADOOP_CLIENT_OPTS="-Xmx128m $HADOOP_CLIENT_OPTS"
+#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
+
+# On secure datanodes, user to run the datanode as after dropping privileges
+export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
+
+# Where log files are stored in the secure data environment.
+export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
+
+# The directory where pid files are stored. /tmp by default.
+# NOTE: this should be set to a directory that can only be written to by 
+#       the user that will run the hadoop daemons.  Otherwise there is the
+#       potential for a symlink attack.
+export HADOOP_PID_DIR=${HADOOP_PID_DIR}
+export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
+
+# A string representing this instance of hadoop. $USER by default.
+export HADOOP_IDENT_STRING=$USER
+
+<%
+this_file = 'hadoop-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics.properties b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics.properties
new file mode 100644 (file)
index 0000000..c1b2eb7
--- /dev/null
@@ -0,0 +1,75 @@
+# Configuration of the "dfs" context for null
+dfs.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "dfs" context for file
+#dfs.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.period=10
+#dfs.fileName=/tmp/dfsmetrics.log
+
+# Configuration of the "dfs" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# dfs.period=10
+# dfs.servers=localhost:8649
+
+
+# Configuration of the "mapred" context for null
+mapred.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "mapred" context for file
+#mapred.class=org.apache.hadoop.metrics.file.FileContext
+#mapred.period=10
+#mapred.fileName=/tmp/mrmetrics.log
+
+# Configuration of the "mapred" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# mapred.period=10
+# mapred.servers=localhost:8649
+
+
+# Configuration of the "jvm" context for null
+#jvm.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "jvm" context for file
+#jvm.class=org.apache.hadoop.metrics.file.FileContext
+#jvm.period=10
+#jvm.fileName=/tmp/jvmmetrics.log
+
+# Configuration of the "jvm" context for ganglia
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# jvm.period=10
+# jvm.servers=localhost:8649
+
+# Configuration of the "rpc" context for null
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "rpc" context for file
+#rpc.class=org.apache.hadoop.metrics.file.FileContext
+#rpc.period=10
+#rpc.fileName=/tmp/rpcmetrics.log
+
+# Configuration of the "rpc" context for ganglia
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# rpc.period=10
+# rpc.servers=localhost:8649
+
+
+# Configuration of the "ugi" context for null
+ugi.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "ugi" context for file
+#ugi.class=org.apache.hadoop.metrics.file.FileContext
+#ugi.period=10
+#ugi.fileName=/tmp/ugimetrics.log
+
+# Configuration of the "ugi" context for ganglia
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# ugi.period=10
+# ugi.servers=localhost:8649
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics2.properties b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-metrics2.properties
new file mode 100644 (file)
index 0000000..09af4d6
--- /dev/null
@@ -0,0 +1,78 @@
+#
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements.  See the NOTICE file distributed with
+#   this work for additional information regarding copyright ownership.
+#   The ASF licenses this file to You under the Apache License, Version 2.0
+#   (the "License"); you may not use this file except in compliance with
+#   the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+# syntax: [prefix].[source|sink].[instance].[options]
+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
+
+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
+# default sampling period, in seconds
+*.period=10
+
+# The namenode-metrics.out will contain metrics from all context
+#namenode.sink.file.filename=namenode-metrics.out
+# Specifying a special sampling period for namenode:
+#namenode.sink.*.period=8
+
+#datanode.sink.file.filename=datanode-metrics.out
+
+# the following example split metrics of different
+# context to different sinks (in this case files)
+#jobtracker.sink.file_jvm.context=jvm
+#jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out
+#jobtracker.sink.file_mapred.context=mapred
+#jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out
+
+#tasktracker.sink.file.filename=tasktracker-metrics.out
+
+#maptask.sink.file.filename=maptask-metrics.out
+
+#reducetask.sink.file.filename=reducetask-metrics.out
+
+
+#
+# Below are for sending metrics to Ganglia
+#
+# for Ganglia 3.0 support
+# *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30
+#
+# for Ganglia 3.1 support
+*.sink.ganglia.class=<%= node['hadoop']['metrics2']['*.sink.ganglia.class'] %>
+
+*.sink.ganglia.period=<%= node['hadoop']['metrics2']['*.sink.ganglia.period'] %>
+
+# default for supportsparse is false
+*.sink.ganglia.supportsparse=<%= node['hadoop']['metrics2']['*.sink.ganglia.supportsparse'] %>
+
+*.sink.ganglia.slope=<%= node['hadoop']['metrics2']['*.sink.ganglia.slope'] %>
+*.sink.ganglia.dmax=<%= node['hadoop']['metrics2']['*.sink.ganglia.dmax'] %>
+
+<%
+%w{
+  namenode
+  datanode
+  resourcemanager
+  nodemanager
+}.each do |prefix|
+  servers = node['hadoop']['metrics2']["#{prefix}.sink.ganglia.servers"]
+  if !servers.nil? && !servers.empty? then
+-%>
+<%= prefix %>.sink.ganglia.servers=<%= servers %>
+
+<%
+  end
+end
+-%>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-policy.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hadoop-policy.xml
new file mode 100644 (file)
index 0000000..c17966d
--- /dev/null
@@ -0,0 +1,219 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+
+ Copyright 2011 The Apache Software Foundation
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code
+    via the DistributedFileSystem.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+ <property>
+    <name>security.admin.operations.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for AdminOperationsProtocol. Used for admin commands.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.usertogroups.mappings.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RefreshUserMappingsProtocol. Used to refresh
+    users mappings. The ACL is a comma-separated list of user and
+    group names. The user and group list is separated by a blank. For
+    e.g. "alice,bob users,wheel".  A special value of "*" means all
+    users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the
+    dfsadmin and mradmin commands to refresh the security policy in-effect.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.ha.service.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HAService protocol used by HAAdmin to manage the
+      active and stand-by states of namenode.</description>
+  </property>
+
+  <property>
+    <name>security.zkfc.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for access to the ZK Failover Controller
+    </description>
+  </property>
+
+  <property>
+    <name>security.qjournal.service.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for QJournalProtocol, used by the NN to communicate with
+    JNs when using the QuorumJournalManager for edit logs.</description>
+  </property>
+
+  <property>
+    <name>security.mrhs.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HSClientProtocol, used by job clients to
+    communciate with the MR History Server job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <!-- YARN Protocols -->
+
+  <property>
+    <name>security.resourcetracker.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ResourceTracker protocol, used by the
+    ResourceManager and NodeManager to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.admin.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RMAdminProtocol, for admin commands. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.resourcemanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientRMProtocol, used by the ResourceManager 
+    and applications submission clients to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.applicationmaster.resourcemanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for AMRMProtocol, used by the ResourceManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.containermanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ContainerManager protocol, used by the NodeManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.resourcelocalizer.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ResourceLocalizer protocol, used by the NodeManager 
+    and ResourceLocalizer to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.task.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce
+    tasks to communicate with the parent tasktracker.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for MRClientProtocol, used by job clients to
+    communciate with the MR ApplicationMaster to query job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hdfs-site.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hdfs-site.xml
new file mode 100644 (file)
index 0000000..b4e8eab
--- /dev/null
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.secondary.namenode.fqdn</name>
+        <value><%= node['hadoop']['this.secondary.namenode.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-cn.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>dfs.namenode.name.dir</name>
+        <value><%= node['hadoop']['dfs.namenode.name.dir'] %></value>
+        <!-- <value>file://<%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/name,file:///export/home/${user.name}/var/lib/name</value> -->
+    </property>
+<%
+dfs_data_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  dfs_data_dir = dfs_data_dir == '' ? '' : "#{dfs_data_dir},"
+  dfs_data_dir = "#{dfs_data_dir}file://#{node['grid']['vol_root']}/#{vol_num}/var/lib/${user.name}/data"
+}
+-%>
+    <property>
+        <name>dfs.datanode.data.dir</name>
+        <value><%= dfs_data_dir %></value>
+        <!-- <value>file:///grid/vol/0/var/lib/${user.name}/data,file:///grid/vol/1/var/lib/${user.name}/data</value> -->
+    </property>
+    <property>
+        <name>dfs.namenode.checkpoint.dir</name>
+        <value><%= node['hadoop']['dfs.namenode.checkpoint.dir'] %></value>
+        <!-- <value>file://<%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/checkpoint,file:///export/home/${user.name}/var/lib/checkpoint</value> -->
+    </property>
+    <property>
+        <name>dfs.replication</name>
+        <value><%= node['hadoop']['dfs.replication'] %></value>
+        <!-- <value>3</value> -->
+    </property>
+
+    <property>
+        <name>dfs.hosts</name>
+        <value><%= node['hadoop']['dfs.hosts'] %></value>
+        <description>
+          Names a file that contains a list of hosts that are permitted to connect to the namenode.
+          The full pathname of the file must be specified. If the value is empty, all hosts are permitted.
+        </description>
+    </property>
+    <property>
+        <name>dfs.hosts.exclude</name>
+        <value><%= node['hadoop']['dfs.hosts.exclude'] %></value>
+        <description>
+          Names a file that contains a list of hosts that are not permitted to connect to the namenode.
+          The full pathname of the file must be specified. If the value is empty, no hosts are excluded.
+        </description>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>dfs.namenode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.principal'] %></value>
+        <!-- _HOST is replaced with the fs.defaultFS's host name -->
+        <!-- <value>hdfs/${this.namenode.fqdn}@${this.realm}</value> -->
+        <description>Kerberos principal name for the NameNode</description>
+    </property>
+    <property>
+        <name>dfs.namenode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.namenode.keytab.file'] %></value>
+        <description>
+            Combined keytab file containing the namenode service and host
+            principals.
+        </description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.principal'] %></value>
+        <!-- <value>hdfs/_HOST@${this.realm}</value> -->
+        <description>
+            Kerberos principal name for the secondary NameNode.
+        </description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.keytab.file'] %></value>
+        <description>
+            Combined keytab file containing the namenode service and host
+            principals.
+        </description>
+    </property>
+    <!-- for KSSL (NOT RECOMMENDED) -->
+    <property>
+        <name>hadoop.security.use-weak-http-crypto</name>
+        <value><%= node['hadoop']['hadoop.security.use-weak-http-crypto'] %></value>
+    </property>
+  <% if node['hadoop']['hadoop.security.use-weak-http-crypto'] == 'true' then -%>
+    <property>
+        <name>dfs.namenode.https-address</name>
+        <value><%= node['hadoop']['dfs.namenode.https-address'] %></value>
+        <description>The https address where namenode binds</description>
+    </property>
+    <property>
+        <name>dfs.namenode.kerberos.https.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.https.principal'] %></value>
+        <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! -->
+        <description>
+            The Kerberos principal for the host that the NameNode runs on.
+        </description>
+    </property>
+    <property>
+        <name>dfs.namenode.secondary.https-address</name>
+        <value><%= node['hadoop']['dfs.namenode.secondary.https-address'] %></value>
+        <description>The https address where secondary namenode binds</description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.https.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.https.principal'] %></value>
+        <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! -->
+        <description>
+            The Kerberos principal for the host that the secondary NameNode
+            runs on.
+        </description>
+    </property>
+  <% end -%>
+    <property>
+        <name>dfs.block.access.token.enable</name>
+        <value><%= node['hadoop']['dfs.block.access.token.enable'] %></value>
+        <description>
+            If "true", access tokens are used as capabilities for accessing
+            datanodes.
+            If "false", no access tokens are checked on accessing datanodes.
+        </description>
+    </property>
+    <property>
+        <name>dfs.datanode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.datanode.kerberos.principal'] %></value>
+        <!-- <value>hdfs/_HOST@${this.realm}</value> -->
+        <description>
+            The Kerberos principal that the DataNode runs as. "_HOST" is
+            replaced by the real host name.
+        </description>
+    </property>
+    <property>
+        <name>dfs.datanode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.datanode.keytab.file'] %></value>
+        <description>
+            The filename of the keytab file for the DataNode.
+        </description>
+    </property>
+    <property>
+        <name>dfs.namenode.kerberos.internal.spnego.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.internal.spnego.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value> -->
+        <!-- _HOST is replaced with dfs.namenode.http-address's host name. -->
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.internal.spnego.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value> -->
+        <!-- _HOST is replaced with dfs.namenode.secondary.http-address's host name. -->
+    </property>
+
+    <property>
+        <name>dfs.datanode.address</name>
+        <value><%= node['hadoop']['dfs.datanode.address'] %></value>
+    </property>
+    <property>
+        <name>dfs.datanode.http.address</name>
+        <value><%= node['hadoop']['dfs.datanode.http.address'] %></value>
+    </property>
+
+    <property>
+        <name>dfs.namenode.http-address</name>
+        <value><%= node['hadoop']['dfs.namenode.http-address'] %></value>
+    </property>
+    <property>
+        <name>dfs.namenode.secondary.http-address</name>
+        <value><%= node['hadoop']['dfs.namenode.secondary.http-address'] %></value>
+    </property>
+    <property>
+        <name>dfs.web.authentication.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.web.authentication.kerberos.principal'] %></value>
+    </property>
+    <property>
+        <name>dfs.web.authentication.kerberos.keytab</name>
+        <value><%= node['hadoop']['dfs.web.authentication.kerberos.keytab'] %></value>
+    </property>
+<% end -%>
+<%
+this_file = 'hdfs-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.exclude b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.exclude
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.include b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/hosts.include
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-env.sh b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-env.sh
new file mode 100644 (file)
index 0000000..84c67b7
--- /dev/null
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+# Set httpfs specific environment variables here.
+
+# Settings for the Embedded Tomcat that runs HttpFS
+# Java System properties for HttpFS should be specified in this variable
+#
+# export CATALINA_OPTS=
+
+# HttpFS logs directory
+#
+# export HTTPFS_LOG=${HTTPFS_HOME}/logs
+
+# HttpFS temporary directory
+#
+# export HTTPFS_TEMP=${HTTPFS_HOME}/temp
+
+# The HTTP port used by HttpFS
+#
+# export HTTPFS_HTTP_PORT=14000
+
+# The Admin port used by HttpFS
+#
+# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1`
+
+# The hostname HttpFS server runs on
+#
+# export HTTPFS_HTTP_HOSTNAME=`hostname -f`
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-log4j.properties b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-log4j.properties
new file mode 100644 (file)
index 0000000..284a819
--- /dev/null
@@ -0,0 +1,35 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+#
+
+# If the Java System property 'httpfs.log.dir' is not defined at HttpFSServer start up time
+# Setup sets its value to '${httpfs.home}/logs'
+
+log4j.appender.httpfs=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.httpfs.DatePattern='.'yyyy-MM-dd
+log4j.appender.httpfs.File=${httpfs.log.dir}/httpfs.log
+log4j.appender.httpfs.Append=true
+log4j.appender.httpfs.layout=org.apache.log4j.PatternLayout
+log4j.appender.httpfs.layout.ConversionPattern=%d{ISO8601} %5p %c{1} [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n
+
+log4j.appender.httpfsaudit=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.httpfsaudit.DatePattern='.'yyyy-MM-dd
+log4j.appender.httpfsaudit.File=${httpfs.log.dir}/httpfs-audit.log
+log4j.appender.httpfsaudit.Append=true
+log4j.appender.httpfsaudit.layout=org.apache.log4j.PatternLayout
+log4j.appender.httpfsaudit.layout.ConversionPattern=%d{ISO8601} %5p [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n
+
+log4j.logger.httpfsaudit=INFO, httpfsaudit
+
+log4j.logger.org.apache.hadoop.fs.http.server=INFO, httpfs
+log4j.logger.org.apache.hadoop.lib=INFO, httpfs
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-signature.secret b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-signature.secret
new file mode 100644 (file)
index 0000000..56466e9
--- /dev/null
@@ -0,0 +1 @@
+hadoop httpfs secret
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-site.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/httpfs-site.xml
new file mode 100644 (file)
index 0000000..4a718e1
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/log4j.properties b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/log4j.properties
new file mode 100644 (file)
index 0000000..b92ad27
--- /dev/null
@@ -0,0 +1,219 @@
+# Copyright 2011 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# mapred audit logging
+#
+mapred.audit.logger=INFO,NullAppender
+mapred.audit.log.maxfilesize=256MB
+mapred.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
+log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender 
+#
+# Use following logger to send summary to separate file defined by 
+# hadoop.mapreduce.jobsummary.log.file :
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+# 
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
+hadoop.mapreduce.jobsummary.log.maxbackupindex=20
+log4j.appender.JSA=org.apache.log4j.RollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
+log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# Yarn ResourceManager Application Summary Log 
+#
+# Set the ResourceManager summary log filename
+#yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+#log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+#log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+#log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+#log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+#log4j.appender.RMSUMMARY.MaxFileSize=256MB
+#log4j.appender.RMSUMMARY.MaxBackupIndex=20
+#log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-env.sh b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-env.sh
new file mode 100644 (file)
index 0000000..2e0839a
--- /dev/null
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+
+# The directory where pid files are stored. /tmp by default.
+export HADOOP_MAPRED_PID_DIR=<%= node['hadoop']['HADOOP_MAPRED_PID_DIR'] %>
+# Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+export HADOOP_MAPRED_LOG_DIR=<%= node['hadoop']['HADOOP_MAPRED_LOG_DIR'] %>
+
+
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+
+export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+
+export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+
+#export HADOOP_JOB_HISTORYSERVER_OPTS=
+#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
+#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
+#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
+#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+
+<%
+this_file = 'mapred-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-queues.xml.template b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-queues.xml.template
new file mode 100644 (file)
index 0000000..ce6cd20
--- /dev/null
@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!-- This is the template for queue configuration. The format supports nesting of
+     queues within queues - a feature called hierarchical queues. All queues are
+     defined within the 'queues' tag which is the top level element for this
+     XML document. The queue acls configured here for different queues are
+     checked for authorization only if the configuration property
+     mapreduce.cluster.acls.enabled is set to true. -->
+<queues>
+
+  <!-- Configuration for a queue is specified by defining a 'queue' element. -->
+  <queue>
+
+    <!-- Name of a queue. Queue name cannot contain a ':'  -->
+    <name>default</name>
+
+    <!-- properties for a queue, typically used by schedulers,
+    can be defined here -->
+    <properties>
+    </properties>
+
+       <!-- State of the queue. If running, the queue will accept new jobs.
+         If stopped, the queue will not accept new jobs. -->
+    <state>running</state>
+
+    <!-- Specifies the ACLs to check for submitting jobs to this queue.
+         If set to '*', it allows all users to submit jobs to the queue.
+         If set to ' '(i.e. space), no user will be allowed to do this
+         operation. The default value for any queue acl is ' '.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2
+
+         It is only used if authorization is enabled in Map/Reduce by setting
+         the configuration property mapreduce.cluster.acls.enabled to true.
+
+         Irrespective of this ACL configuration, the user who started the
+         cluster and cluster administrators configured via
+         mapreduce.cluster.administrators can do this operation. -->
+    <acl-submit-job> </acl-submit-job>
+
+    <!-- Specifies the ACLs to check for viewing and modifying jobs in this
+         queue. Modifications include killing jobs, tasks of jobs or changing
+         priorities.
+         If set to '*', it allows all users to view, modify jobs of the queue.
+         If set to ' '(i.e. space), no user will be allowed to do this
+         operation.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2
+
+         It is only used if authorization is enabled in Map/Reduce by setting
+         the configuration property mapreduce.cluster.acls.enabled to true.
+
+         Irrespective of this ACL configuration, the user who started the
+         cluster  and cluster administrators configured via
+         mapreduce.cluster.administrators can do the above operations on all
+         the jobs in all the queues. The job owner can do all the above
+         operations on his/her job irrespective of this ACL configuration. -->
+    <acl-administer-jobs> </acl-administer-jobs>
+  </queue>
+
+  <!-- Here is a sample of a hierarchical queue configuration
+       where q2 is a child of q1. In this example, q2 is a leaf level
+       queue as it has no queues configured within it. Currently, ACLs
+       and state are only supported for the leaf level queues.
+       Note also the usage of properties for the queue q2.
+  <queue>
+    <name>q1</name>
+    <queue>
+      <name>q2</name>
+      <properties>
+        <property key="capacity" value="20"/>
+        <property key="user-limit" value="30"/>
+      </properties>
+    </queue>
+  </queue>
+ -->
+</queues>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml
new file mode 100644 (file)
index 0000000..166af2e
--- /dev/null
@@ -0,0 +1,58 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.jobhistory.fqdn</name>
+        <value><%= node['hadoop']['this.jobhistory.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-jt.${this.domain}</value> -->
+        <!-- <value>${this.cluster.name}-jh.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>mapreduce.framework.name</name>
+        <value><%= node['hadoop']['mapreduce.framework.name'] %></value>
+        <description>The runtime framework for executing MapReduce jobs.
+        Can be one of local, classic or yarn.
+        (default: local)
+        </description>
+    </property>
+    <property>
+        <name>yarn.app.mapreduce.am.staging-dir</name>
+        <value><%= node['hadoop']['yarn.app.mapreduce.am.staging-dir'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.intermediate-done-dir</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.intermediate-done-dir'] %></value>
+        <!-- NG: <value>/user</value> -->
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.done-dir</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.done-dir'] %></value>
+    </property>
+
+    <property>
+        <name>mapreduce.jobhistory.principal</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.principal'] %></value>
+        <!-- <value>mapred/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.keytab</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.keytab'] %></value>
+    </property>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml.template b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/mapred-site.xml.template
new file mode 100644 (file)
index 0000000..761c352
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/slaves b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/slaves
new file mode 100644 (file)
index 0000000..2fbb50c
--- /dev/null
@@ -0,0 +1 @@
+localhost
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-client.xml.example b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-client.xml.example
new file mode 100644 (file)
index 0000000..a50dce4
--- /dev/null
@@ -0,0 +1,80 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<configuration>
+
+<property>
+  <name>ssl.client.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.reload.interval</name>
+  <value>10000</value>
+  <description>Truststore reload check interval, in milliseconds.
+  Default value is 10000 (10 seconds).
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.keypassword</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-server.xml.example b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/ssl-server.xml.example
new file mode 100644 (file)
index 0000000..4b363ff
--- /dev/null
@@ -0,0 +1,77 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<configuration>
+
+<property>
+  <name>ssl.server.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.reload.interval</name>
+  <value>10000</value>
+  <description>Truststore reload check interval, in milliseconds.
+  Default value is 10000 (10 seconds).
+</property>
+
+<property>
+  <name>ssl.server.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.password</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.keypassword</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-env.sh b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-env.sh
new file mode 100644 (file)
index 0000000..c8618cd
--- /dev/null
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+
+export HADOOP_PREFIX=<%= node['hadoop']['HADOOP_PREFIX'] %>
+export HADOOP_COMMON_HOME=<%= node['hadoop']['HADOOP_COMMON_HOME'] %>
+export HADOOP_HDFS_HOME=<%= node['hadoop']['HADOOP_HDFS_HOME'] %>
+export YARN_HOME=<%= node['hadoop']['YARN_HOME'] %>
+export YARN_PID_DIR=<%= node['hadoop']['YARN_PID_DIR'] %>
+export YARN_LOG_DIR=<%= node['hadoop']['YARN_LOG_DIR'] %>
+export HADOOP_MAPRED_HOME=<%= node['hadoop']['HADOOP_MAPRED_HOME'] %>
+
+
+# User for YARN daemons
+export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
+
+# resolve links - $0 may be a softlink
+export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
+
+# some Java parameters
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$YARN_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $YARN_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
+  #echo $JAVA_HEAP_MAX
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+
+# default log directory & file
+if [ "$YARN_LOG_DIR" = "" ]; then
+  YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
+fi
+if [ "$YARN_LOGFILE" = "" ]; then
+  YARN_LOGFILE='yarn.log'
+fi
+
+# default policy file for service-level authorization
+if [ "$YARN_POLICYFILE" = "" ]; then
+  YARN_POLICYFILE="hadoop-policy.xml"
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+
+YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
+YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
+YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
+YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
+YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
+YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
+YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
+YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
+
+<%
+this_file = 'yarn-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-site.xml b/cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-site.xml
new file mode 100644 (file)
index 0000000..31a9e91
--- /dev/null
@@ -0,0 +1,164 @@
+<?xml version="1.0"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+<!-- Site specific YARN configuration properties -->
+    <property>
+        <name>this.resourcemanager.fqdn</name>
+        <value><%= node['hadoop']['this.resourcemanager.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-rm.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>yarn.log-aggregation-enable</name>
+        <value><%= node['hadoop']['yarn.log-aggregation-enable'] %></value>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.scheduler.class</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.scheduler.class'] %></value>
+        <description>In case you do not want to use the default scheduler</description>
+    </property>
+<%
+yarn_nodemanager_local_dirs = ''
+@active_vol_nums.to_i.times {|vol_num|
+  yarn_nodemanager_local_dirs = yarn_nodemanager_local_dirs == '' ? '' : "#{yarn_nodemanager_local_dirs},"
+  yarn_nodemanager_local_dirs = "#{yarn_nodemanager_local_dirs}#{node['grid']['vol_root']}/#{vol_num}/var/lib/${user.name}/nm/local"
+}
+-%>
+    <property>
+        <name>yarn.nodemanager.local-dirs</name>
+        <value><%= yarn_nodemanager_local_dirs %></value>
+        <!-- <value>/grid/vol/0/var/lib/${user.name}/nm/local,/grid/vol/1/var/lib/${user.name}/nm/local</value> -->
+        <description>the local directories used by the nodemanager
+        (default: /tmp/nm-local-dir)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.resource.memory-mb</name>
+        <value><%= node['hadoop']['yarn.nodemanager.resource.memory-mb'] %></value>
+        <description>Amount of physical memory, in MB, that can be allocated
+        for containers. (default: 8192)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.resource.cpu-cores</name>
+        <value><%= node['hadoop']['yarn.nodemanager.resource.cpu-cores'] %></value>
+        <description>Number of CPU cores that can be allocated
+        for containers. (default: 8)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.remote-app-log-dir</name>
+        <value><%= node['hadoop']['yarn.nodemanager.remote-app-log-dir'] %></value>
+        <description>directory on hdfs where the application logs are moved to
+        (default: /tmp/logs)</description>
+    </property>
+<%
+yarn_nodemanager_log_dirs = ''
+@active_vol_nums.to_i.times {|vol_num|
+  yarn_nodemanager_log_dirs = yarn_nodemanager_log_dirs == '' ? '' : "#{yarn_nodemanager_log_dirs},"
+  yarn_nodemanager_log_dirs = "#{yarn_nodemanager_log_dirs}#{node['grid']['vol_root']}/#{vol_num}/var/log/${user.name}/nm"
+}
+-%>
+    <property>
+        <name>yarn.nodemanager.log-dirs</name>
+        <value><%= yarn_nodemanager_log_dirs %></value>
+        <!-- <value>/grid/vol/0/var/log/${user.name}/nm,/grid/vol/1/var/log/${user.name}/nm</value> -->
+        <description>the directories used by Nodemanagers as log directories
+        (default: /tmp/logs)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value><%= node['hadoop']['yarn.nodemanager.aux-services'] %></value>
+        <description>shuffle service that needs to be set for Map Reduce to run</description>
+    </property>
+
+    <property>
+        <name>yarn.resourcemanager.nodes.include-path</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.nodes.include-path'] %></value>
+        <description>Path to file with nodes to include.</description>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.nodes.exclude-path</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.nodes.exclude-path'] %></value>
+        <description>Path to file with nodes to exclude.</description>
+    </property>
+
+    <property>
+        <name>yarn.acl.enable</name>
+        <value><%= node['hadoop']['yarn.acl.enable'] %></value>
+    </property>
+    <property>
+        <name>yarn.admin.acl</name>
+        <value><%= node['hadoop']['yarn.admin.acl'] %></value>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.principal</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.principal'] %></value>
+        <!-- <value>yarn/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>yarn.resourcemanager.keytab</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.keytab'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.principal</name>
+        <value><%= node['hadoop']['yarn.nodemanager.principal'] %></value>
+        <!-- <value>yarn/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>yarn.nodemanager.keytab</name>
+        <value><%= node['hadoop']['yarn.nodemanager.keytab'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.admin-env</name>
+        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
+    </property>
+
+    <property>
+        <name>yarn.nodemanager.container-executor.class</name>
+        <value><%= node['hadoop']['yarn.nodemanager.container-executor.class'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.group</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.group'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.resources-handler.class'] %></value>
+        <description>The class which should help the LCE handle resources.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'] %></value>
+        <description>The cgroups hierarchy under which to place YARN proccesses (cannot contain commas).
+        If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have
+        been pre-configured), then this cgroups hierarchy must already exist and be writable by the
+        NodeManager user, otherwise the NodeManager may fail.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount'] %></value>
+        <description>Whether the LCE should attempt to mount cgroups if not found.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount-path'] %></value>
+        <description>Where the LCE should attempt to mount cgroups if not found. Common locations
+        include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux
+        distribution in use. This path must exist before the NodeManager is launched.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and
+        yarn.nodemanager.linux-container-executor.cgroups.mount is true.</description>
+    </property>
+</configuration>
similarity index 100%
rename from role-spaghetti.dot
rename to roles/role-spaghetti.dot
similarity index 100%
rename from role-spaghetti.png
rename to roles/role-spaghetti.png
index 415e4d4..9445055 100644 (file)
@@ -139,16 +139,25 @@ default_attributes(
     'version' => '3.4.5'
   },
   'hadoop' => {
+    'version' => '2.0.5-alpha',
+    #'version' => '2.0.4-alpha',
     #'version' => '1.1.2',
-    'version' => '1.0.4',
+    #'version' => '1.0.4',
     'metrics2' => {
       'namenode.sink.ganglia.servers' => 'localhost:8649',
       'datanode.sink.ganglia.servers' => 'localhost:8649',
+      # for 2.0.x only
+      'resourcemanager.sink.ganglia.servers' => 'localhost:8649',
+      'nodemanager.sink.ganglia.servers' => 'localhost:8649',
+      # for 1.0.x only
       'jobtracker.sink.ganglia.servers' => 'localhost:8649',
       'tasktracker.sink.ganglia.servers' => 'localhost:8649',
       'maptask.sink.ganglia.servers' => 'localhost:8649',
       'reducetask.sink.ganglia.servers' => 'localhost:8649'
     },
+    # for 2.0.x only >>
+    'yarn.nodemanager.linux-container-executor.resources-handler.class' \
+      => 'org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler',
     # for 1.0.x only >>
     'extra_configs' => {
       'core-site.xml' => {