OSDN Git Service

add Apache Hadoop 2.2.0 support to the hadoop cookbooks.
authorwhitestar <whitestar@gaea.test>
Tue, 22 Oct 2013 12:14:23 +0000 (21:14 +0900)
committerwhitestar <whitestar@gaea.test>
Tue, 22 Oct 2013 12:14:23 +0000 (21:14 +0900)
48 files changed:
cookbooks/grid/recipes/default.rb
cookbooks/hadoop/attributes/default.rb
cookbooks/hadoop/libraries/helper.rb
cookbooks/hadoop/metadata.rb
cookbooks/hadoop/recipes/apache.rb
cookbooks/hadoop/templates/default/etc-2.0/hadoop/yarn-site.xml
cookbooks/hadoop/templates/default/etc-2.1/hadoop/yarn-site.xml
cookbooks/hadoop/templates/default/etc-2.2/cgconfig.conf [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/default/hadoop-hdfs-datanode [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/capacity-scheduler.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/configuration.xsl [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/container-executor.cfg [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/core-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/fair-scheduler.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.cmd [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics2.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-policy.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hdfs-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.exclude [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.include [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-log4j.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-signature.secret [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/log4j.properties [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.cmd [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queue-acls.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queues.xml.template [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml.template [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/slaves [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-client.xml.example [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-server.xml.example [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/taskcontroller.cfg [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.cmd [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.sh [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-site.xml [new file with mode: 0644]
cookbooks/hadoop/templates/default/etc-2.2/init/cgconfig4yarn.conf [new file with mode: 0644]
cookbooks/hadoop/templates/default/grid/usr/sbin/hadoop_pseudo_distributed_init.sh [new file with mode: 0644]
nodes/localhost-ah.json [new file with mode: 0644]
nodes/localhost-ah2.json [new file with mode: 0644]
roles/hadoop-pseudo-distributed-ah.rb [new file with mode: 0644]
roles/hadoop-pseudo-distributed-ah2.rb [new file with mode: 0644]
roles/test-ah2-on-localhost.rb
roles/test-on-localhost.rb

index 9c764bd..87830fa 100644 (file)
 # limitations under the License.
 #
 
-directory node['grid']['etc_root'] do
-  owner 'root'
-  group 'root'
-  mode '0755'
-  action :create
-  recursive true
-end
-
-directory node['grid']['app_root'] do
-  owner 'root'
-  group 'root'
-  mode '0755'
-  action :create
-  recursive true
-end
-
-directory node['grid']['vol_root'] do
-  owner 'root'
-  group 'root'
-  mode '0755'
-  action :create
-  recursive true
-end
+[
+  node['grid']['etc_root'],
+  node['grid']['app_root'],
+  "#{node['grid']['app_root']}/bin",
+  "#{node['grid']['app_root']}/sbin",
+  node['grid']['vol_root'],
+].each {|dir|
+  directory dir do
+    owner 'root'
+    group 'root'
+    mode '0755'
+    action :create
+    recursive true
+  end
+}
 
index d066165..b2f355d 100644 (file)
@@ -306,7 +306,16 @@ default['hadoop']['yarn.nodemanager.resource.memory-mb'] = '8192'
 # cpu-cores is available in the ver. 2.0.3-alpha and later.
 default['hadoop']['yarn.nodemanager.resource.cpu-cores'] = '8'
 default['hadoop']['yarn.nodemanager.remote-app-log-dir'] = "#{node['grid']['vol_root']}/0/var/log/${user.name}/nm"
-default['hadoop']['yarn.nodemanager.aux-services'] = 'mapreduce.shuffle'
+case node['hadoop']['install_flavor']
+when 'apache'
+  default['hadoop']['yarn.nodemanager.aux-services'] = (node['hadoop']['version'] >= '2.2.0') \
+    ? 'mapreduce_shuffle' \
+    : 'mapreduce.shuffle'
+when 'cdh'
+  default['hadoop']['yarn.nodemanager.aux-services'] = 'mapreduce.shuffle'
+when 'hdp'
+  default['hadoop']['yarn.nodemanager.aux-services'] = 'mapreduce.shuffle'
+end
 case node['hadoop']['install_flavor']
 when 'apache'
   default['hadoop']['yarn.application.classpath'] = '
@@ -368,6 +377,7 @@ default['hadoop']['yarn.scheduler.capacity.root.default.acl_administer_queue'] =
 ## container-executor.cfg
 ### e.g. CentOS: 500
 default['hadoop']['container-executor']['min.user.id'] = '1000'
+default['hadoop']['container-executor']['allowed.system.users'] = ''  # ver. 2.2.0
 ## hadoop-metrics2.properties
 default['hadoop']['metrics2']['resourcemanager.sink.ganglia.servers'] = ''
 default['hadoop']['metrics2']['nodemanager.sink.ganglia.servers'] = ''
index 2249f31..8efa6a7 100644 (file)
@@ -45,7 +45,7 @@ module Helper
     end
   
     unless (('1.0.0' <= version && version < '1.3') \
-      || ('2.0' <= middle_version && middle_version < '2.2')) then
+      || ('2.0' <= middle_version && middle_version < '2.3')) then
       Chef::Application.fatal!("Non supported version: #{version}")
     end
 
@@ -230,7 +230,7 @@ module Helper
 
 
   def conf_template(conf_dir, middle_version, conf_files, tpl_vars)
-    source_dir = ('2.0' <= middle_version && middle_version < '2.2') \
+    source_dir = ('2.0' <= middle_version && middle_version < '2.3') \
       ? "etc-#{middle_version}/hadoop" \
       : "conf-#{middle_version}"
   
index f9e094b..c7faf79 100644 (file)
@@ -4,7 +4,7 @@ maintainer_email ''
 license          'Apache 2.0'
 description      'Installs/Configures hadoop'
 long_description IO.read(File.join(File.dirname(__FILE__), 'README.md'))
-version          '0.1.0'
+version          '0.2.0'
 
 %w{ debian ubuntu centos redhat fedora }.each do |os|
   supports os
index 4883dea..0c08698 100644 (file)
@@ -271,6 +271,7 @@ You must initialize HDFS in the first installation:
   $ sudo -u hdfs ./bin/hadoop namenode -format
   $ sudo -u hdfs ./bin/hadoop-daemon.sh start namenode
   $ sudo #{datanode_sudo_user_opt} ./bin/hadoop-daemon.sh start datanode
+  $ sudo -u hdfs ./bin/hadoop dfsadmin -safemode wait
   $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
   $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
   $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
@@ -287,6 +288,7 @@ You must initialize HDFS in the first installation:
   $ sudo -u hdfs ./bin/hdfs namenode -format
   $ sudo -u hdfs ./sbin/hadoop-daemon.sh start namenode
   $ sudo #{datanode_sudo_user_opt} ./sbin/hadoop-daemon.sh start datanode
+  $ sudo -u hdfs ./bin/hdfs dfsadmin -safemode wait
   $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
   $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
   $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
@@ -301,6 +303,17 @@ You must initialize HDFS in the first installation:
   examples_jar = "share/hadoop/mapreduce/hadoop-mapreduce-examples-#{version}.jar"
 end
 
+template "#{node['grid']['app_root']}/sbin/hadoop_pseudo_distributed_init.sh" do
+  source "grid/usr/sbin/hadoop_pseudo_distributed_init.sh"
+  owner 'root'
+  group 'root'
+  mode '0755'
+  variables({
+    :major_version => major_version,
+    :datanode_sudo_user_opt => datanode_sudo_user_opt
+  })
+end
+
 if node['hadoop']['with_security'] then
   log <<-EOM
 Note:
index 24c7bcf..ace2ef4 100644 (file)
@@ -99,6 +99,12 @@ yarn_nodemanager_log_dirs = ''
     </property>
 
     <property>
+        <name>yarn.nodemanager.admin-env</name>
+        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
         <name>yarn.acl.enable</name>
         <value><%= node['hadoop']['yarn.acl.enable'] %></value>
     </property>
@@ -124,10 +130,6 @@ yarn_nodemanager_log_dirs = ''
         <name>yarn.nodemanager.keytab</name>
         <value><%= node['hadoop']['yarn.nodemanager.keytab'] %></value>
     </property>
-    <property>
-        <name>yarn.nodemanager.admin-env</name>
-        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
-    </property>
 
     <property>
         <name>yarn.nodemanager.container-executor.class</name>
@@ -166,4 +168,19 @@ yarn_nodemanager_log_dirs = ''
         Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and
         yarn.nodemanager.linux-container-executor.cgroups.mount is true.</description>
     </property>
+<% end -%>
+<%
+this_file = 'mapred-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
 </configuration>
index 24c7bcf..ace2ef4 100644 (file)
@@ -99,6 +99,12 @@ yarn_nodemanager_log_dirs = ''
     </property>
 
     <property>
+        <name>yarn.nodemanager.admin-env</name>
+        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
         <name>yarn.acl.enable</name>
         <value><%= node['hadoop']['yarn.acl.enable'] %></value>
     </property>
@@ -124,10 +130,6 @@ yarn_nodemanager_log_dirs = ''
         <name>yarn.nodemanager.keytab</name>
         <value><%= node['hadoop']['yarn.nodemanager.keytab'] %></value>
     </property>
-    <property>
-        <name>yarn.nodemanager.admin-env</name>
-        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
-    </property>
 
     <property>
         <name>yarn.nodemanager.container-executor.class</name>
@@ -166,4 +168,19 @@ yarn_nodemanager_log_dirs = ''
         Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and
         yarn.nodemanager.linux-container-executor.cgroups.mount is true.</description>
     </property>
+<% end -%>
+<%
+this_file = 'mapred-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
 </configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/cgconfig.conf b/cookbooks/hadoop/templates/default/etc-2.2/cgconfig.conf
new file mode 100644 (file)
index 0000000..949bee4
--- /dev/null
@@ -0,0 +1,88 @@
+#
+#  Copyright IBM Corporation. 2007
+#
+#  Authors:    Balbir Singh <balbir@linux.vnet.ibm.com>
+#  This program is free software; you can redistribute it and/or modify it
+#  under the terms of version 2.1 of the GNU Lesser General Public License
+#  as published by the Free Software Foundation.
+#
+#  This program is distributed in the hope that it would be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+<%
+case node[:platform_family]
+when 'debian'
+-%>
+#group daemons/www {
+#      perm {
+#              task {
+#                      uid = root;
+#                      gid = webmaster;
+#              }
+#              admin {
+#                      uid = root;
+#                      gid = root;
+#              }
+#      }
+#      cpu {
+#              cpu.shares = 1000;
+#      }
+#}
+#
+#group daemons/ftp {
+#      perm {
+#              task {
+#                      uid = root;
+#                      gid = ftpmaster;
+#              }
+#              admin {
+#                      uid = root;
+#                      gid = root;
+#              }
+#      }
+#      cpu {
+#              cpu.shares = 500;
+#      }
+#}
+#
+mount {
+       cpu = /mnt/cgroups/cpu;
+       cpuacct = /mnt/cgroups/cpuacct;
+       devices = /mnt/cgroups/devices;
+}
+<%
+when 'rhel'
+-%>
+# See man cgconfig.conf for further details.
+#
+# By default, mount all controllers to /cgroup/<controller>
+
+mount {
+       cpuset  = /cgroup/cpuset;
+       cpu     = /cgroup/cpu;
+       cpuacct = /cgroup/cpuacct;
+       memory  = /cgroup/memory;
+       devices = /cgroup/devices;
+       freezer = /cgroup/freezer;
+       net_cls = /cgroup/net_cls;
+       blkio   = /cgroup/blkio;
+}
+<% end -%>
+
+
+group <%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'].gsub(/^\//, '') %> {
+    perm {
+        task {
+            uid = yarn;
+            gid = yarn;
+        }
+        admin {
+            uid = yarn;
+            gid = yarn;
+        }
+    }
+    cpu {
+    }
+}
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/default/hadoop-hdfs-datanode b/cookbooks/hadoop/templates/default/etc-2.2/default/hadoop-hdfs-datanode
new file mode 100644 (file)
index 0000000..c78e32e
--- /dev/null
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+export HADOOP_PID_DIR=/var/run/hadoop-hdfs
+export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
+export HADOOP_NAMENODE_USER=hdfs
+export HADOOP_SECONDARYNAMENODE_USER=hdfs
+export HADOOP_DATANODE_USER=hdfs
+export HADOOP_IDENT_STRING=hdfs
+
+# export HADOOP_SECURE_DN_USER=hdfs
+# export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs
+# export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs
+
+<% if node['hadoop']['with_security'] then -%>
+export HADOOP_SECURE_DN_USER=<%= node['hadoop']['HADOOP_SECURE_DN_USER'] %>
+export HADOOP_SECURE_DN_PID_DIR=<%= node['hadoop']['HADOOP_SECURE_DN_PID_DIR'] %>
+export HADOOP_SECURE_DN_LOG_DIR=<%= node['hadoop']['HADOOP_SECURE_DN_LOG_DIR'] %>
+<% end -%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/capacity-scheduler.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/capacity-scheduler.xml
new file mode 100644 (file)
index 0000000..d0359d5
--- /dev/null
@@ -0,0 +1,141 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>10000</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.1</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.queues'] %></value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.acl_submit_applications</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.acl_submit_applications'] %></value>
+    <description>
+      The ACL of who can submit jobs to the root queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.acl_administer_queue</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.acl_administer_queue'] %></value>
+    <description>
+      The ACL of who can administer jobs on the root queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.default.acl_submit_applications'] %></value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value><%= node['hadoop']['yarn.scheduler.capacity.root.default.acl_administer_queue'] %></value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>-1</value>
+    <description>
+      Number of missed scheduling opportunities after which the CapacityScheduler 
+      attempts to schedule rack-local containers. 
+      Typically this should be set to number of racks in the cluster, this 
+      feature is disabled by default, set to -1.
+    </description>
+  </property>
+
+<%
+this_file = 'capacity-scheduler.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && ! node['hadoop']['extra_configs'][this_file].nil? then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/configuration.xsl b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/configuration.xsl
new file mode 100644 (file)
index 0000000..d50d80b
--- /dev/null
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/container-executor.cfg b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/container-executor.cfg
new file mode 100644 (file)
index 0000000..6c0599e
--- /dev/null
@@ -0,0 +1,17 @@
+#yarn.nodemanager.local-dirs=/grid/vol/0/var/lib/yarn/nm/local
+#yarn.nodemanager.log-dirs=/grid/vol/0/var/log/yarn/nm
+yarn.nodemanager.linux-container-executor.group=yarn
+#comma separated list of users who can not run applications
+banned.users=hfds,yarn,mapred,bin
+#Prevent other super-users
+#min.user.id=1000    # default
+min.user.id=<%= node['hadoop']['container-executor']['min.user.id'] %>
+#comma separated list of system users who CAN run applications
+#allowed.system.users=
+<%
+allowed_system_users = node['hadoop']['container-executor']['allowed.system.users']
+unless allowed_system_users.empty? then
+-%>
+allowed.system.users=<%= allowed_system_users %>
+<% end -%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/core-site.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/core-site.xml
new file mode 100644 (file)
index 0000000..52043fc
--- /dev/null
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.cluster.name</name>
+        <value><%= node['hadoop']['this.cluster.name'] %></value>
+        <!-- <value>pleiades</value> -->
+    </property>
+    <property>
+        <name>this.domain</name>
+        <value><%= node['hadoop']['this.domain'] %></value>
+        <!-- <value>grid.example.com</value> -->
+    </property>
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>this.realm</name>
+        <value><%= node['hadoop']['this.realm'] %></value>
+        <!-- <value>GRID.EXAMPLE.COM</value> -->
+    </property>
+    <property>
+        <name>this.keytab.dir</name>
+        <value><%= node['hadoop']['this.keytab.dir'] %></value>
+    </property>
+<% end -%>
+    <property>
+        <name>this.namenode.fqdn</name>
+        <value><%= node['hadoop']['this.namenode.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-nn.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>fs.defaultFS</name>
+        <value><%= node['hadoop']['fs.defaultFS'] %></value>
+    </property>
+    <property>
+        <name>hadoop.tmp.dir</name>
+        <value><%= node['hadoop']['hadoop.tmp.dir'] %></value>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>hadoop.security.authentication</name>
+        <value><%= node['hadoop']['hadoop.security.authentication'] %></value>
+        <description>
+            Set the authentication for the cluster. Valid values are: simple or
+            kerberos.
+        </description>
+    </property>
+    <property>
+        <name>hadoop.security.authorization</name>
+        <value><%= node['hadoop']['hadoop.security.authorization'] %></value>
+        <description>
+            Enable authorization for different protocols.
+        </description>
+    </property>
+    <property>
+        <name>hadoop.security.auth_to_local</name>
+        <value><%= node['hadoop']['hadoop.security.auth_to_local'] %></value>
+    </property>
+    <property>
+        <name>hadoop.security.group.mapping</name>
+        <value><%= node['hadoop']['hadoop.security.group.mapping'] %></value>
+    </property>
+    <property>
+        <name>hadoop.security.groups.cache.secs</name>
+        <value><%= node['hadoop']['hadoop.security.groups.cache.secs'] %></value>
+    </property>
+    <property>
+        <name>hadoop.kerberos.kinit.command</name>
+        <value><%= node['hadoop']['hadoop.kerberos.kinit.command'] %></value>
+    </property>
+
+    <property>
+        <name>hadoop.http.filter.initializers</name>
+        <value><%= node['hadoop']['hadoop.http.filter.initializers'] %></value>
+        <!-- <value>org.apache.hadoop.http.lib.StaticUserWebFilter</value> -->
+        <description>The name of a class that initializes an input filter for Jetty.
+            This filter will always return Dr.Who as the web user when the servlets
+            query for the authenticated user </description>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.signature.secret.file</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.signature.secret.file'] %></value>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.cookie.domain</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.cookie.domain'] %></value>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.type</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.type'] %></value>
+        <description>Defines authentication used for the HTTP web-consoles.
+            The supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#.
+            The dfeault value is simple.</description>
+    </property>
+    <property>
+        <name>hadoop.http.authentication.kerberos.principal</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.kerberos.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value>
+            _HOST N/A!: v1.0, HDP1.2; OK: v2.0, CDH3, CDH4 -->
+    </property>
+    <property>
+        <name>hadoop.http.authentication.kerberos.keytab</name>
+        <value><%= node['hadoop']['hadoop.http.authentication.kerberos.keytab'] %></value>
+    </property>
+
+  <%- node['hadoop']['hadoop.proxyuser'].each do |name, values| -%>
+    <property>
+        <name>hadoop.proxyuser.<%= name %>.hosts</name>
+        <value><%= values['hosts'] %></value>
+    </property>
+    <property>
+        <name>hadoop.proxyuser.<%= name %>.groups</name>
+        <value><%= values['groups'] %></value>
+    </property>
+  <%- end -%>
+<% end -%>
+<%
+this_file = 'core-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/fair-scheduler.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/fair-scheduler.xml
new file mode 100644 (file)
index 0000000..4ae0516
--- /dev/null
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+
+<!--
+  This file contains pool and user allocations for the Fair Scheduler.
+  Its format is explained in the Fair Scheduler documentation at
+  http://hadoop.apache.org/common/docs/r0.20.205.0/fair_scheduler.html.
+  The documentation also includes a sample config file.
+-->
+
+<allocations>
+
+</allocations>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.cmd b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.cmd
new file mode 100644 (file)
index 0000000..05badc2
--- /dev/null
@@ -0,0 +1,81 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@rem Set Hadoop-specific environment variables here.
+
+@rem The only required environment variable is JAVA_HOME.  All others are
+@rem optional.  When running a distributed configuration it is best to
+@rem set JAVA_HOME in this file, so that it is correctly defined on
+@rem remote nodes.
+
+@rem The java implementation to use.  Required.
+set JAVA_HOME=%JAVA_HOME%
+
+@rem The jsvc implementation to use. Jsvc is required to run secure datanodes.
+@rem set JSVC_HOME=%JSVC_HOME%
+
+@rem set HADOOP_CONF_DIR=
+
+@rem Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
+if exist %HADOOP_HOME%\contrib\capacity-scheduler (
+  if not defined HADOOP_CLASSPATH (
+    set HADOOP_CLASSPATH=%HADOOP_HOME%\contrib\capacity-scheduler\*.jar
+  ) else (
+    set HADOOP_CLASSPATH=%HADOOP_CLASSPATH%;%HADOOP_HOME%\contrib\capacity-scheduler\*.jar
+  )
+)
+
+@rem The maximum amount of heap to use, in MB. Default is 1000.
+@rem set HADOOP_HEAPSIZE=
+@rem set HADOOP_NAMENODE_INIT_HEAPSIZE=""
+
+@rem Extra Java runtime options.  Empty by default.
+@rem set HADOOP_OPTS=%HADOOP_OPTS% -Djava.net.preferIPv4Stack=true
+
+@rem Command specific options appended to HADOOP_OPTS when specified
+if not defined HADOOP_SECURITY_LOGGER (
+  set HADOOP_SECURITY_LOGGER=INFO,RFAS
+)
+if not defined HDFS_AUDIT_LOGGER (
+  set HDFS_AUDIT_LOGGER=INFO,NullAppender
+)
+
+set HADOOP_NAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_NAMENODE_OPTS%
+set HADOOP_DATANODE_OPTS=-Dhadoop.security.logger=ERROR,RFAS %HADOOP_DATANODE_OPTS%
+set HADOOP_SECONDARYNAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_SECONDARYNAMENODE_OPTS%
+
+@rem The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+set HADOOP_CLIENT_OPTS=-Xmx128m %HADOOP_CLIENT_OPTS%
+@rem set HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData %HADOOP_JAVA_PLATFORM_OPTS%"
+
+@rem On secure datanodes, user to run the datanode as after dropping privileges
+set HADOOP_SECURE_DN_USER=%HADOOP_SECURE_DN_USER%
+
+@rem Where log files are stored.  %HADOOP_HOME%/logs by default.
+@rem set HADOOP_LOG_DIR=%HADOOP_LOG_DIR%\%USERNAME%
+
+@rem Where log files are stored in the secure data environment.
+set HADOOP_SECURE_DN_LOG_DIR=%HADOOP_LOG_DIR%\%HADOOP_HDFS_USER%
+
+@rem The directory where pid files are stored. /tmp by default.
+@rem NOTE: this should be set to a directory that can only be written to by 
+@rem       the user that will run the hadoop daemons.  Otherwise there is the
+@rem       potential for a symlink attack.
+set HADOOP_PID_DIR=%HADOOP_PID_DIR%
+set HADOOP_SECURE_DN_PID_DIR=%HADOOP_PID_DIR%
+
+@rem A string representing this instance of hadoop. %USERNAME% by default.
+set HADOOP_IDENT_STRING=%USERNAME%
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.sh b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-env.sh
new file mode 100644 (file)
index 0000000..ff3ed3e
--- /dev/null
@@ -0,0 +1,138 @@
+# Copyright 2011 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set Hadoop-specific environment variables here.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+<%
+# Unset this $HADOOP_PID_DIR (default: /var/run/hadoop-{hdfs,0.20-mapreduce})
+# Because both HDFS and MRv1 daemons use the $HADOOP_PID_DIR on the MRv1 of the CDH4.
+unless (node['hadoop']['install_flavor'] == 'cdh' \
+  && node['hadoop']['cdh']['resource_negotiator_framework'] == 'MRv1') then -%>
+# The directory where pid files are stored. /tmp by default.
+export HADOOP_PID_DIR=<%= node['hadoop']['HADOOP_PID_DIR'] %>
+<% end -%>
+# Where log files are stored. $HADOOP_PREFIX/logs by default.
+#export HADOOP_LOG_DIR=<%= node['hadoop']['HADOOP_LOG_DIR'] %>
+# for secure datanode. $USER ('root': Apache, HDP; '': CDH)
+if [ x"$USER" = x'root' -o x"$USER" = x'' ]; then
+    export HADOOP_LOG_DIR=<%= File::dirname(node['hadoop']['HADOOP_LOG_DIR']) %>/hdfs
+else
+    export HADOOP_LOG_DIR=<%= node['hadoop']['HADOOP_LOG_DIR'] %>
+fi
+
+# Extra Java CLASSPATH elements.  Optional.
+if [ x"$HADOOP_CLASSPATH" = x ]; then
+    export HADOOP_CLASSPATH="<%= node['hadoop']['HADOOP_CLASSPATH'] %>"
+    #export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:/grid/usr/commons-daemon-1.0.13/commons-daemon-1.0.13.jar
+else
+    # for Hive and HCatalog
+    export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:<%= node['hadoop']['HADOOP_CLASSPATH'] %>"
+    #export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:/grid/usr/commons-daemon-1.0.13/commons-daemon-1.0.13.jar
+fi
+export HADOOP_USER_CLASSPATH_FIRST=<%= node['hadoop']['HADOOP_USER_CLASSPATH_FIRST'] %>
+<% if node['hadoop']['install_flavor'] == 'cdh' then -%>
+  <% if node['hadoop']['cdh']['resource_negotiator_framework'] == 'YARN' then -%>
+# for hadoop, yarn commands on the CDH
+export HADOOP_MAPRED_HOME=<%= node['hadoop']['HADOOP_MAPRED_HOME'] %>
+  <% end -%>
+<% elsif node['hadoop']['install_flavor'] == 'apache' then -%>
+  <% if node['hadoop']['with_security'] then -%>
+export HADOOP_SECURE_DN_USER=<%= node['hadoop']['HADOOP_SECURE_DN_USER'] %>
+# This property is N/A or overridden by the HADOOP_PID_DIR
+#export HADOOP_SECURE_DN_PID_DIR=/grid/vol/0/var/run/${HADOOP_SECURE_DN_USER}
+# This property is N/A or overridden by the HADOOP_LOG_DIR
+#export HADOOP_SECURE_DN_LOG_DIR=/grid/vol/0/var/log/${HADOOP_SECURE_DN_USER}
+export JSVC_HOME=<%= node['hadoop']['JSVC_HOME'] %>
+#export JSVC_HOME=/grid/usr/hadoop/sbin
+  <% end -%>
+
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.
+export JAVA_HOME=${JAVA_HOME}
+
+# The jsvc implementation to use. Jsvc is required to run secure datanodes.
+#export JSVC_HOME=${JSVC_HOME}
+
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
+
+# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
+for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
+  if [ "$HADOOP_CLASSPATH" ]; then
+    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
+  else
+    export HADOOP_CLASSPATH=$f
+  fi
+done
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+#export HADOOP_HEAPSIZE=
+#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
+
+# Extra Java runtime options.  Empty by default.
+export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
+
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
+
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
+#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
+
+# On secure datanodes, user to run the datanode as after dropping privileges
+export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
+
+# Where log files are stored in the secure data environment.
+export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
+
+# The directory where pid files are stored. /tmp by default.
+# NOTE: this should be set to a directory that can only be written to by 
+#       the user that will run the hadoop daemons.  Otherwise there is the
+#       potential for a symlink attack.
+export HADOOP_PID_DIR=${HADOOP_PID_DIR}
+export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
+
+# A string representing this instance of hadoop. $USER by default.
+export HADOOP_IDENT_STRING=$USER
+
+<% end -%>
+<%
+this_file = 'hadoop-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics.properties b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics.properties
new file mode 100644 (file)
index 0000000..c1b2eb7
--- /dev/null
@@ -0,0 +1,75 @@
+# Configuration of the "dfs" context for null
+dfs.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "dfs" context for file
+#dfs.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.period=10
+#dfs.fileName=/tmp/dfsmetrics.log
+
+# Configuration of the "dfs" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# dfs.period=10
+# dfs.servers=localhost:8649
+
+
+# Configuration of the "mapred" context for null
+mapred.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "mapred" context for file
+#mapred.class=org.apache.hadoop.metrics.file.FileContext
+#mapred.period=10
+#mapred.fileName=/tmp/mrmetrics.log
+
+# Configuration of the "mapred" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# mapred.period=10
+# mapred.servers=localhost:8649
+
+
+# Configuration of the "jvm" context for null
+#jvm.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "jvm" context for file
+#jvm.class=org.apache.hadoop.metrics.file.FileContext
+#jvm.period=10
+#jvm.fileName=/tmp/jvmmetrics.log
+
+# Configuration of the "jvm" context for ganglia
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# jvm.period=10
+# jvm.servers=localhost:8649
+
+# Configuration of the "rpc" context for null
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "rpc" context for file
+#rpc.class=org.apache.hadoop.metrics.file.FileContext
+#rpc.period=10
+#rpc.fileName=/tmp/rpcmetrics.log
+
+# Configuration of the "rpc" context for ganglia
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# rpc.period=10
+# rpc.servers=localhost:8649
+
+
+# Configuration of the "ugi" context for null
+ugi.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "ugi" context for file
+#ugi.class=org.apache.hadoop.metrics.file.FileContext
+#ugi.period=10
+#ugi.fileName=/tmp/ugimetrics.log
+
+# Configuration of the "ugi" context for ganglia
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# ugi.period=10
+# ugi.servers=localhost:8649
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics2.properties b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-metrics2.properties
new file mode 100644 (file)
index 0000000..09af4d6
--- /dev/null
@@ -0,0 +1,78 @@
+#
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements.  See the NOTICE file distributed with
+#   this work for additional information regarding copyright ownership.
+#   The ASF licenses this file to You under the Apache License, Version 2.0
+#   (the "License"); you may not use this file except in compliance with
+#   the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+# syntax: [prefix].[source|sink].[instance].[options]
+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
+
+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
+# default sampling period, in seconds
+*.period=10
+
+# The namenode-metrics.out will contain metrics from all context
+#namenode.sink.file.filename=namenode-metrics.out
+# Specifying a special sampling period for namenode:
+#namenode.sink.*.period=8
+
+#datanode.sink.file.filename=datanode-metrics.out
+
+# the following example split metrics of different
+# context to different sinks (in this case files)
+#jobtracker.sink.file_jvm.context=jvm
+#jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out
+#jobtracker.sink.file_mapred.context=mapred
+#jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out
+
+#tasktracker.sink.file.filename=tasktracker-metrics.out
+
+#maptask.sink.file.filename=maptask-metrics.out
+
+#reducetask.sink.file.filename=reducetask-metrics.out
+
+
+#
+# Below are for sending metrics to Ganglia
+#
+# for Ganglia 3.0 support
+# *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30
+#
+# for Ganglia 3.1 support
+*.sink.ganglia.class=<%= node['hadoop']['metrics2']['*.sink.ganglia.class'] %>
+
+*.sink.ganglia.period=<%= node['hadoop']['metrics2']['*.sink.ganglia.period'] %>
+
+# default for supportsparse is false
+*.sink.ganglia.supportsparse=<%= node['hadoop']['metrics2']['*.sink.ganglia.supportsparse'] %>
+
+*.sink.ganglia.slope=<%= node['hadoop']['metrics2']['*.sink.ganglia.slope'] %>
+*.sink.ganglia.dmax=<%= node['hadoop']['metrics2']['*.sink.ganglia.dmax'] %>
+
+<%
+%w{
+  namenode
+  datanode
+  resourcemanager
+  nodemanager
+}.each do |prefix|
+  servers = node['hadoop']['metrics2']["#{prefix}.sink.ganglia.servers"]
+  if !servers.nil? && !servers.empty? then
+-%>
+<%= prefix %>.sink.ganglia.servers=<%= servers %>
+
+<%
+  end
+end
+-%>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-policy.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hadoop-policy.xml
new file mode 100644 (file)
index 0000000..cb4d2e6
--- /dev/null
@@ -0,0 +1,219 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+
+ Copyright 2011 The Apache Software Foundation
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code
+    via the DistributedFileSystem.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+ <property>
+    <name>security.admin.operations.protocol.acl</name>
+    <value><%= node['hadoop']['security.admin.operations.protocol.acl'] %></value>
+    <description>ACL for AdminOperationsProtocol. Used for admin commands.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.usertogroups.mappings.protocol.acl</name>
+    <value><%= node['hadoop']['security.refresh.usertogroups.mappings.protocol.acl'] %></value>
+    <description>ACL for RefreshUserMappingsProtocol. Used to refresh
+    users mappings. The ACL is a comma-separated list of user and
+    group names. The user and group list is separated by a blank. For
+    e.g. "alice,bob users,wheel".  A special value of "*" means all
+    users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value><%= node['hadoop']['security.refresh.policy.protocol.acl'] %></value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the
+    dfsadmin and mradmin commands to refresh the security policy in-effect.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.ha.service.protocol.acl</name>
+    <value><%= node['hadoop']['security.ha.service.protocol.acl'] %></value>
+    <description>ACL for HAService protocol used by HAAdmin to manage the
+      active and stand-by states of namenode.</description>
+  </property>
+
+  <property>
+    <name>security.zkfc.protocol.acl</name>
+    <value><%= node['hadoop']['security.zkfc.protocol.acl'] %></value>
+    <description>ACL for access to the ZK Failover Controller
+    </description>
+  </property>
+
+  <property>
+    <name>security.qjournal.service.protocol.acl</name>
+    <value><%= node['hadoop']['security.qjournal.service.protocol.acl'] %></value>
+    <description>ACL for QJournalProtocol, used by the NN to communicate with
+    JNs when using the QuorumJournalManager for edit logs.</description>
+  </property>
+
+  <property>
+    <name>security.mrhs.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HSClientProtocol, used by job clients to
+    communciate with the MR History Server job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <!-- YARN Protocols -->
+
+  <property>
+    <name>security.resourcetracker.protocol.acl</name>
+    <value><%= node['hadoop']['security.resourcetracker.protocol.acl'] %></value>
+    <description>ACL for ResourceTrackerProtocol, used by the
+    ResourceManager and NodeManager to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.resourcemanager-administration.protocol.acl</name>
+    <value><%= node['hadoop']['security.resourcemanager-administration.protocol.acl'] %></value>
+    <description>ACL for ResourceManagerAdministrationProtocol, for admin commands. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.applicationclient.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ApplicationClientProtocol, used by the ResourceManager 
+    and applications submission clients to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.applicationmaster.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ApplicationMasterProtocol, used by the ResourceManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.containermanagement.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ContainerManagementProtocol protocol, used by the NodeManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.resourcelocalizer.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ResourceLocalizer protocol, used by the NodeManager 
+    and ResourceLocalizer to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.task.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce
+    tasks to communicate with the parent tasktracker.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for MRClientProtocol, used by job clients to
+    communciate with the MR ApplicationMaster to query job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hdfs-site.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hdfs-site.xml
new file mode 100644 (file)
index 0000000..e291b46
--- /dev/null
@@ -0,0 +1,217 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.secondary.namenode.fqdn</name>
+        <value><%= node['hadoop']['this.secondary.namenode.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-cn.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>dfs.namenode.name.dir</name>
+        <value><%= node['hadoop']['dfs.namenode.name.dir'] %></value>
+        <!-- <value>file://<%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/name,file:///export/home/${user.name}/var/lib/name</value> -->
+    </property>
+<%
+dfs_data_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  dfs_data_dir = dfs_data_dir == '' ? '' : "#{dfs_data_dir},"
+  dfs_data_dir = "#{dfs_data_dir}file://#{node['grid']['vol_root']}/#{vol_num}/var/lib/${user.name}/data"
+}
+-%>
+    <property>
+        <name>dfs.datanode.data.dir</name>
+        <value><%= dfs_data_dir %></value>
+        <!-- <value>file:///grid/vol/0/var/lib/${user.name}/data,file:///grid/vol/1/var/lib/${user.name}/data</value> -->
+    </property>
+    <property>
+        <name>dfs.namenode.checkpoint.dir</name>
+        <value><%= node['hadoop']['dfs.namenode.checkpoint.dir'] %></value>
+        <!-- <value>file://<%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/checkpoint,file:///export/home/${user.name}/var/lib/checkpoint</value> -->
+    </property>
+    <property>
+        <name>dfs.replication</name>
+        <value><%= node['hadoop']['dfs.replication'] %></value>
+        <!-- <value>3</value> -->
+    </property>
+
+    <property>
+        <name>dfs.hosts</name>
+        <value><%= node['hadoop']['dfs.hosts'] %></value>
+        <description>
+          Names a file that contains a list of hosts that are permitted to connect to the namenode.
+          The full pathname of the file must be specified. If the value is empty, all hosts are permitted.
+        </description>
+    </property>
+    <property>
+        <name>dfs.hosts.exclude</name>
+        <value><%= node['hadoop']['dfs.hosts.exclude'] %></value>
+        <description>
+          Names a file that contains a list of hosts that are not permitted to connect to the namenode.
+          The full pathname of the file must be specified. If the value is empty, no hosts are excluded.
+        </description>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>dfs.namenode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.principal'] %></value>
+        <!-- _HOST is replaced with the fs.defaultFS's host name -->
+        <!-- <value>hdfs/${this.namenode.fqdn}@${this.realm}</value> -->
+        <description>Kerberos principal name for the NameNode</description>
+    </property>
+    <property>
+        <name>dfs.namenode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.namenode.keytab.file'] %></value>
+        <description>
+            Combined keytab file containing the namenode service and host
+            principals.
+        </description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.principal'] %></value>
+        <!-- <value>hdfs/_HOST@${this.realm}</value> -->
+        <description>
+            Kerberos principal name for the secondary NameNode.
+        </description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.keytab.file'] %></value>
+        <description>
+            Combined keytab file containing the namenode service and host
+            principals.
+        </description>
+    </property>
+  <% unless node['hadoop']['install_flavor'] == 'cdh' then -%>
+    <!-- for KSSL (NOT RECOMMENDED). Note: N/A on the CDH4 -->
+    <property>
+        <name>hadoop.security.use-weak-http-crypto</name>
+        <value><%= node['hadoop']['hadoop.security.use-weak-http-crypto'] %></value>
+    </property>
+  <% end -%>
+  <% if node['hadoop']['hadoop.security.use-weak-http-crypto'] == 'true' then -%>
+    <property>
+        <name>dfs.namenode.https-address</name>
+        <value><%= node['hadoop']['dfs.namenode.https-address'] %></value>
+        <description>The https address where namenode binds</description>
+    </property>
+    <property>
+        <name>dfs.namenode.kerberos.https.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.https.principal'] %></value>
+        <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! -->
+        <description>
+            The Kerberos principal for the host that the NameNode runs on.
+        </description>
+    </property>
+    <property>
+        <name>dfs.namenode.secondary.https-address</name>
+        <value><%= node['hadoop']['dfs.namenode.secondary.https-address'] %></value>
+        <description>The https address where secondary namenode binds</description>
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.https.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.https.principal'] %></value>
+        <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! -->
+        <description>
+            The Kerberos principal for the host that the secondary NameNode
+            runs on.
+        </description>
+    </property>
+  <% end -%>
+    <property>
+        <name>dfs.block.access.token.enable</name>
+        <value><%= node['hadoop']['dfs.block.access.token.enable'] %></value>
+        <description>
+            If "true", access tokens are used as capabilities for accessing
+            datanodes.
+            If "false", no access tokens are checked on accessing datanodes.
+        </description>
+    </property>
+    <property>
+        <name>dfs.datanode.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.datanode.kerberos.principal'] %></value>
+        <!-- <value>hdfs/_HOST@${this.realm}</value> -->
+        <description>
+            The Kerberos principal that the DataNode runs as. "_HOST" is
+            replaced by the real host name.
+        </description>
+    </property>
+    <property>
+        <name>dfs.datanode.keytab.file</name>
+        <value><%= node['hadoop']['dfs.datanode.keytab.file'] %></value>
+        <description>
+            The filename of the keytab file for the DataNode.
+        </description>
+    </property>
+    <property>
+        <name>dfs.namenode.kerberos.internal.spnego.principal</name>
+        <value><%= node['hadoop']['dfs.namenode.kerberos.internal.spnego.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value> -->
+        <!-- _HOST is replaced with dfs.namenode.http-address's host name. -->
+    </property>
+    <property>
+        <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
+        <value><%= node['hadoop']['dfs.secondary.namenode.kerberos.internal.spnego.principal'] %></value>
+        <!-- <value>HTTP/_HOST@${this.realm}</value> -->
+        <!-- _HOST is replaced with dfs.namenode.secondary.http-address's host name. -->
+    </property>
+
+    <property>
+        <name>dfs.datanode.address</name>
+        <value><%= node['hadoop']['dfs.datanode.address'] %></value>
+    </property>
+    <property>
+        <name>dfs.datanode.http.address</name>
+        <value><%= node['hadoop']['dfs.datanode.http.address'] %></value>
+    </property>
+
+    <property>
+        <name>dfs.namenode.http-address</name>
+        <value><%= node['hadoop']['dfs.namenode.http-address'] %></value>
+    </property>
+    <property>
+        <name>dfs.namenode.secondary.http-address</name>
+        <value><%= node['hadoop']['dfs.namenode.secondary.http-address'] %></value>
+    </property>
+    <property>
+        <name>dfs.web.authentication.kerberos.principal</name>
+        <value><%= node['hadoop']['dfs.web.authentication.kerberos.principal'] %></value>
+    </property>
+    <property>
+        <name>dfs.web.authentication.kerberos.keytab</name>
+        <value><%= node['hadoop']['dfs.web.authentication.kerberos.keytab'] %></value>
+    </property>
+<% end -%>
+<%
+this_file = 'hdfs-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.exclude b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.exclude
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.include b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/hosts.include
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-env.sh b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-env.sh
new file mode 100644 (file)
index 0000000..84c67b7
--- /dev/null
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+# Set httpfs specific environment variables here.
+
+# Settings for the Embedded Tomcat that runs HttpFS
+# Java System properties for HttpFS should be specified in this variable
+#
+# export CATALINA_OPTS=
+
+# HttpFS logs directory
+#
+# export HTTPFS_LOG=${HTTPFS_HOME}/logs
+
+# HttpFS temporary directory
+#
+# export HTTPFS_TEMP=${HTTPFS_HOME}/temp
+
+# The HTTP port used by HttpFS
+#
+# export HTTPFS_HTTP_PORT=14000
+
+# The Admin port used by HttpFS
+#
+# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1`
+
+# The hostname HttpFS server runs on
+#
+# export HTTPFS_HTTP_HOSTNAME=`hostname -f`
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-log4j.properties b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-log4j.properties
new file mode 100644 (file)
index 0000000..284a819
--- /dev/null
@@ -0,0 +1,35 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+#
+
+# If the Java System property 'httpfs.log.dir' is not defined at HttpFSServer start up time
+# Setup sets its value to '${httpfs.home}/logs'
+
+log4j.appender.httpfs=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.httpfs.DatePattern='.'yyyy-MM-dd
+log4j.appender.httpfs.File=${httpfs.log.dir}/httpfs.log
+log4j.appender.httpfs.Append=true
+log4j.appender.httpfs.layout=org.apache.log4j.PatternLayout
+log4j.appender.httpfs.layout.ConversionPattern=%d{ISO8601} %5p %c{1} [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n
+
+log4j.appender.httpfsaudit=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.httpfsaudit.DatePattern='.'yyyy-MM-dd
+log4j.appender.httpfsaudit.File=${httpfs.log.dir}/httpfs-audit.log
+log4j.appender.httpfsaudit.Append=true
+log4j.appender.httpfsaudit.layout=org.apache.log4j.PatternLayout
+log4j.appender.httpfsaudit.layout.ConversionPattern=%d{ISO8601} %5p [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n
+
+log4j.logger.httpfsaudit=INFO, httpfsaudit
+
+log4j.logger.org.apache.hadoop.fs.http.server=INFO, httpfs
+log4j.logger.org.apache.hadoop.lib=INFO, httpfs
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-signature.secret b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-signature.secret
new file mode 100644 (file)
index 0000000..56466e9
--- /dev/null
@@ -0,0 +1 @@
+hadoop httpfs secret
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-site.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/httpfs-site.xml
new file mode 100644 (file)
index 0000000..4a718e1
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/log4j.properties b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/log4j.properties
new file mode 100644 (file)
index 0000000..7e0834a
--- /dev/null
@@ -0,0 +1,231 @@
+# Copyright 2011 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hadoop configuration logging
+#
+
+# Uncomment the following line to turn off configuration deprecation warnings.
+# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# mapred audit logging
+#
+mapred.audit.logger=INFO,NullAppender
+mapred.audit.log.maxfilesize=256MB
+mapred.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
+log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender 
+#
+# Use following logger to send summary to separate file defined by 
+# hadoop.mapreduce.jobsummary.log.file :
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+# 
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
+hadoop.mapreduce.jobsummary.log.maxbackupindex=20
+log4j.appender.JSA=org.apache.log4j.RollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
+log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# Yarn ResourceManager Application Summary Log 
+#
+# Set the ResourceManager summary log filename
+yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# To enable AppSummaryLogging for the RM, 
+# set yarn.server.resourcemanager.appsummary.logger to 
+# <LEVEL>,RMSUMMARY in hadoop-env.sh
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+log4j.appender.RMSUMMARY.MaxFileSize=256MB
+log4j.appender.RMSUMMARY.MaxBackupIndex=20
+log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.cmd b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.cmd
new file mode 100644 (file)
index 0000000..610d593
--- /dev/null
@@ -0,0 +1,20 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+set HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+
+set HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.sh b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-env.sh
new file mode 100644 (file)
index 0000000..318dda6
--- /dev/null
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+
+# The directory where pid files are stored. /tmp by default.
+export HADOOP_MAPRED_PID_DIR=<%= node['hadoop']['HADOOP_MAPRED_PID_DIR'] %>
+# Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+export HADOOP_MAPRED_LOG_DIR=<%= node['hadoop']['HADOOP_MAPRED_LOG_DIR'] %>
+
+
+<% if node['hadoop']['install_flavor'] == 'apache' then -%>
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+
+export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+
+export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+
+#export HADOOP_JOB_HISTORYSERVER_OPTS=
+#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
+#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
+#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
+#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+<% end -%>
+<%
+this_file = 'mapred-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queue-acls.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queue-acls.xml
new file mode 100644 (file)
index 0000000..d5c6aae
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- This is a template file for queue acls configuration properties -->
+
+<configuration>
+
+<property>
+  <name>mapred.queue.default.acl-submit-job</name>
+  <value><%= node['hadoop']['mapred.queue.default.acl-submit-job'] %></value>
+  <description> Comma separated list of user and group names that are allowed
+    to submit jobs to the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. user1,user2 group1,group2. 
+    If set to the special value '*', it means all users are allowed to 
+    submit jobs. If set to ' '(i.e. space), no user will be allowed to submit
+    jobs.
+
+    It is only used if authorization is enabled in Map/Reduce by setting the
+    configuration property mapred.acls.enabled to true.
+
+    Irrespective of this ACL configuration, the user who started the cluster and
+    cluster administrators configured via
+    mapreduce.cluster.administrators can submit jobs.
+  </description>
+</property>
+
+<property>
+  <name>mapred.queue.default.acl-administer-jobs</name>
+  <value><%= node['hadoop']['mapred.queue.default.acl-administer-jobs'] %></value>
+  <description> Comma separated list of user and group names that are allowed
+    to view job details, kill jobs or modify job's priority for all the jobs
+    in the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. user1,user2 group1,group2. 
+    If set to the special value '*', it means all users are allowed to do 
+    this operation. If set to ' '(i.e. space), no user will be allowed to do
+    this operation.
+
+    It is only used if authorization is enabled in Map/Reduce by setting the
+    configuration property mapred.acls.enabled to true.
+
+    Irrespective of this ACL configuration, the user who started the cluster and
+    cluster administrators configured via
+    mapreduce.cluster.administrators can do the above operations on all the jobs
+    in all the queues. The job owner can do all the above operations on his/her
+    job irrespective of this ACL configuration.
+  </description>
+</property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queues.xml.template b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-queues.xml.template
new file mode 100644 (file)
index 0000000..ce6cd20
--- /dev/null
@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!-- This is the template for queue configuration. The format supports nesting of
+     queues within queues - a feature called hierarchical queues. All queues are
+     defined within the 'queues' tag which is the top level element for this
+     XML document. The queue acls configured here for different queues are
+     checked for authorization only if the configuration property
+     mapreduce.cluster.acls.enabled is set to true. -->
+<queues>
+
+  <!-- Configuration for a queue is specified by defining a 'queue' element. -->
+  <queue>
+
+    <!-- Name of a queue. Queue name cannot contain a ':'  -->
+    <name>default</name>
+
+    <!-- properties for a queue, typically used by schedulers,
+    can be defined here -->
+    <properties>
+    </properties>
+
+       <!-- State of the queue. If running, the queue will accept new jobs.
+         If stopped, the queue will not accept new jobs. -->
+    <state>running</state>
+
+    <!-- Specifies the ACLs to check for submitting jobs to this queue.
+         If set to '*', it allows all users to submit jobs to the queue.
+         If set to ' '(i.e. space), no user will be allowed to do this
+         operation. The default value for any queue acl is ' '.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2
+
+         It is only used if authorization is enabled in Map/Reduce by setting
+         the configuration property mapreduce.cluster.acls.enabled to true.
+
+         Irrespective of this ACL configuration, the user who started the
+         cluster and cluster administrators configured via
+         mapreduce.cluster.administrators can do this operation. -->
+    <acl-submit-job> </acl-submit-job>
+
+    <!-- Specifies the ACLs to check for viewing and modifying jobs in this
+         queue. Modifications include killing jobs, tasks of jobs or changing
+         priorities.
+         If set to '*', it allows all users to view, modify jobs of the queue.
+         If set to ' '(i.e. space), no user will be allowed to do this
+         operation.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2
+
+         It is only used if authorization is enabled in Map/Reduce by setting
+         the configuration property mapreduce.cluster.acls.enabled to true.
+
+         Irrespective of this ACL configuration, the user who started the
+         cluster  and cluster administrators configured via
+         mapreduce.cluster.administrators can do the above operations on all
+         the jobs in all the queues. The job owner can do all the above
+         operations on his/her job irrespective of this ACL configuration. -->
+    <acl-administer-jobs> </acl-administer-jobs>
+  </queue>
+
+  <!-- Here is a sample of a hierarchical queue configuration
+       where q2 is a child of q1. In this example, q2 is a leaf level
+       queue as it has no queues configured within it. Currently, ACLs
+       and state are only supported for the leaf level queues.
+       Note also the usage of properties for the queue q2.
+  <queue>
+    <name>q1</name>
+    <queue>
+      <name>q2</name>
+      <properties>
+        <property key="capacity" value="20"/>
+        <property key="user-limit" value="30"/>
+      </properties>
+    </queue>
+  </queue>
+ -->
+</queues>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml
new file mode 100644 (file)
index 0000000..c9844fd
--- /dev/null
@@ -0,0 +1,293 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+    <property>
+        <name>this.jobhistory.fqdn</name>
+        <value><%= node['hadoop']['this.jobhistory.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-jt.${this.domain}</value> -->
+        <!-- <value>${this.cluster.name}-jh.${this.domain}</value> -->
+    </property>
+
+<%
+install_flavor = node['hadoop']['install_flavor']
+if install_flavor == 'apache' \
+  || install_flavor == 'hdp' \
+  || (install_flavor == 'cdh' && node['hadoop']['cdh']['resource_negotiator_framework'] == 'YARN') then
+-%>
+    <property>
+        <name>mapreduce.framework.name</name>
+        <value><%= node['hadoop']['mapreduce.framework.name'] %></value>
+        <description>The runtime framework for executing MapReduce jobs.
+        Can be one of local, classic or yarn.
+        (default: local)
+        </description>
+    </property>
+    <property>
+        <name>yarn.app.mapreduce.am.staging-dir</name>
+        <value><%= node['hadoop']['yarn.app.mapreduce.am.staging-dir'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.intermediate-done-dir</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.intermediate-done-dir'] %></value>
+        <!-- NG: <value>/user</value> -->
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.done-dir</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.done-dir'] %></value>
+    </property>
+
+<%
+mapred_local_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  mapred_local_dir = mapred_local_dir == '' ? '' : "#{mapred_local_dir},"
+  mapred_local_dir = "#{mapred_local_dir}#{node['grid']['vol_root']}/#{vol_num}/var/lib/mapred/local"
+}
+-%>
+    <!-- NOT necessary.
+    <property>
+        <name>mapreduce.cluster.local.dir</name>
+        <value><%= mapred_local_dir %></value>
+        <description>
+          The local directory where MapReduce stores intermediate data files.
+          May be a comma-separated list of directories on different devices in order to spread disk i/o.
+          Directories that do not exist are ignored.
+        </description>
+    </property>
+    -->
+<%
+mapred_temp_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  mapred_temp_dir = mapred_temp_dir == '' ? '' : "#{mapred_temp_dir},"
+  mapred_temp_dir = "#{mapred_temp_dir}#{node['grid']['vol_root']}/#{vol_num}/tmp/mapred"
+}
+-%>
+    <!-- NOT necessary.
+    <property>
+        <name>mapreduce.cluster.temp.dir</name>
+        <value><%= mapred_temp_dir %></value>
+        <description>
+          A shared directory for temporary files.
+        </description>
+    </property>
+    -->
+
+  <% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>mapreduce.jobhistory.principal</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.principal'] %></value>
+        <!-- <value>mapred/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.keytab</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.keytab'] %></value>
+    </property>
+  <% end -%>
+<%
+elsif install_flavor == 'cdh' \
+  && node['hadoop']['cdh']['resource_negotiator_framework'] == 'MRv1' then
+-%>
+    <!-- CDH4 MRv1 -->
+    <property>
+        <name>this.jobtracker.fqdn</name>
+        <value><%= node['hadoop']['this.jobtracker.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-jt.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>mapred.job.tracker</name>
+        <value><%= node['hadoop']['mapred.job.tracker'] %></value>
+    </property>
+    <property>
+        <name>mapred.system.dir</name>
+        <value><%= node['hadoop']['mapred.system.dir'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobtracker.staging.root.dir</name>
+        <value><%= node['hadoop']['mapreduce.jobtracker.staging.root.dir'] %></value>
+    </property>
+
+    <property>
+        <name>mapred.job.tracker.persist.jobstatus.active</name>
+        <value><%= node['hadoop']['mapred.job.tracker.persist.jobstatus.active'] %></value>
+        <description>Indicates if persistency of job status information is
+          active or not. (default: false)
+        </description>
+    </property>
+    <property>
+        <name>mapred.job.tracker.persist.jobstatus.hours</name>
+        <value><%= node['hadoop']['mapred.job.tracker.persist.jobstatus.hours'] %></value>
+        <description>The number of hours job status information is persisted in DFS.
+          The job status information will be available after it drops of the memory
+          queue and between jobtracker restarts. With a zero value the job status
+          information is not persisted at all in DFS. (default: 0)
+        </description>
+    </property>
+    <property>
+        <name>mapred.job.tracker.persist.jobstatus.dir</name>
+        <value><%= node['hadoop']['mapred.job.tracker.persist.jobstatus.dir'] %></value>
+        <description>The directory where the job status information is persisted
+          in a file system to be available after it drops of the memory queue and
+          between jobtracker restarts. (default: /jobtracker/jobsInfo)
+        </description>
+    </property>
+
+    <property>
+        <name>hadoop.job.history.location</name>
+        <value><%= node['hadoop']['hadoop.job.history.location'] %></value>
+        <description>hdfs:// is UNusable.</description>
+    </property>
+    <property>
+        <name>mapred.job.tracker.history.completed.location</name>
+        <value><%= node['hadoop']['mapred.job.tracker.history.completed.location'] %></value>
+    </property>
+
+<%
+mapred_local_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  mapred_local_dir = mapred_local_dir == '' ? '' : "#{mapred_local_dir},"
+  # CDH3: ${user.name} is N/A!
+  mapred_local_dir = "#{mapred_local_dir}#{node['grid']['vol_root']}/#{vol_num}/var/lib/mapred/local"
+}
+-%>
+    <property>
+        <name>mapred.local.dir</name>
+        <value><%= mapred_local_dir %></value>
+        <!-- <value><%= node['grid']['vol_root'] %>/0/var/lib/mapred/local,<%= node['grid']['vol_root'] %>/1/var/lib/mapred/local</value> -->
+        <description>
+          The local directory where MapReduce stores intermediate data files.
+          May be a comma-separated list of directories on different devices in order to spread disk i/o.
+          Directories that do not exist are ignored.
+        </description>
+    </property>
+<%
+mapred_temp_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  mapred_temp_dir = mapred_temp_dir == '' ? '' : "#{mapred_temp_dir},"
+  # CDH3: ${user.name} is N/A!
+  if node['hadoop']['install_flavor'] == 'cdh' then
+    mapred_temp_dir = "#{mapred_temp_dir}#{node['grid']['vol_root']}/#{vol_num}/tmp/mapred/temp"
+  else
+    mapred_temp_dir = "#{mapred_temp_dir}#{node['grid']['vol_root']}/#{vol_num}/tmp/${user.name}/temp"
+  end
+}
+-%>
+    <property>
+        <name>mapred.temp.dir</name>
+        <value><%= mapred_temp_dir %></value>
+        <!-- <value><%= node['grid']['vol_root'] %>/0/tmp/${user.name}/temp,<%= node['grid']['vol_root'] %>/1/tmp/${user.name}/temp</value> -->
+        <description>
+          A shared directory for temporary files.
+        </description>
+    </property>
+
+    <property>
+        <name>mapred.hosts</name>
+        <value><%= node['hadoop']['mapred.hosts'] %></value>
+        <description>
+          Names a file that contains the list of nodes that may connect to the jobtracker.
+          If the value is empty, all hosts are permitted.
+        </description>
+    </property>
+    <property>
+        <name>mapred.hosts.exclude</name>
+        <value><%= node['hadoop']['mapred.hosts.exclude'] %></value>
+        <description>
+          Names a file that contains the list of hosts that should be excluded by the jobtracker.
+          If the value is empty, no hosts are excluded.
+        </description>
+    </property>
+
+    <property>
+        <name>mapred.jobtracker.taskScheduler</name>
+        <value><%= node['hadoop']['mapred.jobtracker.taskScheduler'] %></value>
+    </property>
+
+  <% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>mapred.acls.enabled</name>
+        <value><%= node['hadoop']['mapred.acls.enabled'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.cluster.administrators</name>
+        <value><%= node['hadoop']['mapreduce.cluster.administrators'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobtracker.kerberos.principal</name>
+        <value><%= node['hadoop']['mapreduce.jobtracker.kerberos.principal'] %></value>
+        <!-- _HOST is replaced with the mapred.job.tracker's host name -->
+        <!-- <value>mapred/${this.jobtracker.fqdn}@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>mapreduce.jobtracker.keytab.file</name>
+        <value><%= node['hadoop']['mapreduce.jobtracker.keytab.file'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.tasktracker.kerberos.principal</name>
+        <value><%= node['hadoop']['mapreduce.tasktracker.kerberos.principal'] %></value>
+        <!-- <value>mapred/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>mapreduce.tasktracker.keytab.file</name>
+        <value><%= node['hadoop']['mapreduce.tasktracker.keytab.file'] %></value>
+    </property>
+
+    <property>
+        <name>mapred.task.tracker.task-controller</name>
+        <value><%= node['hadoop']['mapred.task.tracker.task-controller'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.tasktracker.group</name>
+        <value><%= node['hadoop']['mapreduce.tasktracker.group'] %></value>
+    </property>
+
+    <!-- separated job history server is N/A on the MRv1 of the CDH4.
+    <property>
+        <name>mapreduce.history.server.embedded</name>
+        <value><%= node['hadoop']['mapreduce.history.server.embedded'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.history.server.http.address</name>
+        <value><%= node['hadoop']['mapreduce.history.server.http.address'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.kerberos.principal</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.kerberos.principal'] %></value>
+    </property>
+    <property>
+        <name>mapreduce.jobhistory.keytab.file</name>
+        <value><%= node['hadoop']['mapreduce.jobhistory.keytab.file'] %></value>
+    </property>
+    -->
+  <% end -%>
+<% end -%>
+<%
+this_file = 'mapred-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && !node['hadoop']['extra_configs'][this_file].nil? then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml.template b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/mapred-site.xml.template
new file mode 100644 (file)
index 0000000..761c352
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/slaves b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/slaves
new file mode 100644 (file)
index 0000000..2fbb50c
--- /dev/null
@@ -0,0 +1 @@
+localhost
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-client.xml.example b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-client.xml.example
new file mode 100644 (file)
index 0000000..a50dce4
--- /dev/null
@@ -0,0 +1,80 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<configuration>
+
+<property>
+  <name>ssl.client.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.reload.interval</name>
+  <value>10000</value>
+  <description>Truststore reload check interval, in milliseconds.
+  Default value is 10000 (10 seconds).
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.keypassword</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-server.xml.example b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/ssl-server.xml.example
new file mode 100644 (file)
index 0000000..4b363ff
--- /dev/null
@@ -0,0 +1,77 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<configuration>
+
+<property>
+  <name>ssl.server.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.reload.interval</name>
+  <value>10000</value>
+  <description>Truststore reload check interval, in milliseconds.
+  Default value is 10000 (10 seconds).
+</property>
+
+<property>
+  <name>ssl.server.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.password</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.keypassword</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. The keystore file format, default value is "jks".
+  </description>
+</property>
+
+</configuration>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/taskcontroller.cfg b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/taskcontroller.cfg
new file mode 100644 (file)
index 0000000..33b67cd
--- /dev/null
@@ -0,0 +1,17 @@
+#configured value of mapred.local.dir. It can be a list of comma separated paths.
+<%
+mapred_local_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+  mapred_local_dir = mapred_local_dir == '' ? '' : "#{mapred_local_dir},"
+  mapred_local_dir = "#{mapred_local_dir}#{node['grid']['vol_root']}/#{vol_num}/var/lib/mapred/local"
+}
+%>
+mapred.local.dir=<%= mapred_local_dir %>
+#mapred.local.dir=/grid/vol/0/var/lib/mapred/local,/grid/vol/1/var/lib/mapred/local
+#configured value of hadoop.log.dir.
+hadoop.log.dir=<%= node['grid']['vol_root'] %>/0/var/log/mapred
+#sleep time before sig kill is to be sent to process group after sigterm is sent. Should be in seconds
+mapred.tasktracker.tasks.sleeptime-before-sigkill=5000
+mapreduce.tasktracker.group=mapred
+#min.user.id=1000    # default
+min.user.id=<%= node['hadoop']['taskcontroller']['min.user.id'] %>
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.cmd b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.cmd
new file mode 100644 (file)
index 0000000..3329f8f
--- /dev/null
@@ -0,0 +1,60 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@rem User for YARN daemons
+if not defined HADOOP_YARN_USER (
+  set HADOOP_YARN_USER=%yarn%
+)
+
+if not defined YARN_CONF_DIR (
+  set YARN_CONF_DIR=%HADOOP_YARN_HOME%\conf
+)
+
+if defined YARN_HEAPSIZE (
+  @rem echo run with Java heapsize %YARN_HEAPSIZE%
+  set JAVA_HEAP_MAX=-Xmx%YARN_HEAPSIZE%m
+)
+
+if not defined YARN_LOG_DIR (
+  set YARN_LOG_DIR=%HADOOP_YARN_HOME%\logs
+)
+
+if not defined YARN_LOGFILE (
+  set YARN_LOGFILE=yarn.log
+)
+
+@rem default policy file for service-level authorization
+if not defined YARN_POLICYFILE (
+  set YARN_POLICYFILE=hadoop-policy.xml
+)
+
+if not defined YARN_ROOT_LOGGER (
+  set YARN_ROOT_LOGGER=INFO,console
+)
+
+set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.dir=%YARN_LOG_DIR%
+set YARN_OPTS=%YARN_OPTS% -Dyarn.log.dir=%YARN_LOG_DIR%
+set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.file=%YARN_LOGFILE%
+set YARN_OPTS=%YARN_OPTS% -Dyarn.log.file=%YARN_LOGFILE%
+set YARN_OPTS=%YARN_OPTS% -Dyarn.home.dir=%HADOOP_YARN_HOME%
+set YARN_OPTS=%YARN_OPTS% -Dyarn.id.str=%YARN_IDENT_STRING%
+set YARN_OPTS=%YARN_OPTS% -Dhadoop.home.dir=%HADOOP_YARN_HOME%
+set YARN_OPTS=%YARN_OPTS% -Dhadoop.root.logger=%YARN_ROOT_LOGGER%
+set YARN_OPTS=%YARN_OPTS% -Dyarn.root.logger=%YARN_ROOT_LOGGER%
+if defined JAVA_LIBRARY_PATH (
+  set YARN_OPTS=%YARN_OPTS% -Djava.library.path=%JAVA_LIBRARY_PATH%
+)
+set YARN_OPTS=%YARN_OPTS% -Dyarn.policy.file=%YARN_POLICYFILE%
\ No newline at end of file
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.sh b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-env.sh
new file mode 100644 (file)
index 0000000..d0da367
--- /dev/null
@@ -0,0 +1,154 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+export JAVA_HOME=<%= node['java']['java_home'] %>
+
+export YARN_LOG_DIR=<%= node['hadoop']['YARN_LOG_DIR'] %>
+<% if node['hadoop']['install_flavor'] == 'apache' then -%>
+export YARN_PID_DIR=<%= node['hadoop']['YARN_PID_DIR'] %>
+export YARN_HOME=<%= node['hadoop']['YARN_HOME'] %>
+
+export HADOOP_PREFIX=<%= node['hadoop']['HADOOP_PREFIX'] %>
+export HADOOP_COMMON_HOME=<%= node['hadoop']['HADOOP_COMMON_HOME'] %>
+export HADOOP_HDFS_HOME=<%= node['hadoop']['HADOOP_HDFS_HOME'] %>
+export HADOOP_MAPRED_HOME=<%= node['hadoop']['HADOOP_MAPRED_HOME'] %>
+<% elsif node['hadoop']['install_flavor'] == 'cdh' then -%>
+# Do not set $YARN_PID_DIR in this file! -> /etc/default/hadoop-*
+# (/var/run/hadoop-yarn on the YARN daemons,
+#  /var/run/hadoop-mapreduce on the HistoryServer)
+#export YARN_PID_DIR=<%= node['hadoop']['YARN_PID_DIR'] %>
+export MAPRED_LOG_DIR=<%= node['hadoop']['MAPRED_LOG_DIR'] %>
+<% elsif node['hadoop']['install_flavor'] == 'cdh' then -%>
+export YARN_PID_DIR=<%= node['hadoop']['YARN_PID_DIR'] %>
+<% end -%>
+
+
+# User for YARN daemons
+export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
+
+# resolve links - $0 may be a softlink
+<% if node['hadoop']['install_flavor'] == 'cdh' then -%>
+export YARN_CONF_DIR="${YARN_CONF_DIR:-$YARN_HOME/conf}"
+<% else -%>
+export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
+<% end -%>
+
+# some Java parameters
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# For setting YARN specific HEAP sizes please use this
+# Parameter and set appropriately
+# YARN_HEAPSIZE=1000
+
+# check envvars which might override default args
+if [ "$YARN_HEAPSIZE" != "" ]; then
+  JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
+fi
+
+# Resource Manager specific parameters
+
+# Specify the max Heapsize for the ResourceManager using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS
+# and/or YARN_RESOURCEMANAGER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
+
+# Specify the JVM options to be used when starting the ResourceManager.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
+#export YARN_RESOURCEMANAGER_OPTS=
+
+# Node Manager specific parameters
+
+# Specify the max Heapsize for the NodeManager using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS
+# and/or YARN_NODEMANAGER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#export YARN_NODEMANAGER_HEAPSIZE=1000
+
+# Specify the JVM options to be used when starting the NodeManager.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
+#export YARN_NODEMANAGER_OPTS=
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+
+# default log directory & file
+if [ "$YARN_LOG_DIR" = "" ]; then
+<% if node['hadoop']['install_flavor'] == 'cdh' then -%>
+  YARN_LOG_DIR="$YARN_HOME/logs"
+<% else -%>
+  YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
+<% end -%>
+fi
+if [ "$YARN_LOGFILE" = "" ]; then
+  YARN_LOGFILE='yarn.log'
+fi
+
+# default policy file for service-level authorization
+if [ "$YARN_POLICYFILE" = "" ]; then
+  YARN_POLICYFILE="hadoop-policy.xml"
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+
+YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
+YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
+YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
+YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
+YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
+YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
+YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
+YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
+
+<%
+this_file = 'yarn-env.sh'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key, value|
+-%>
+export <%= key %>=<%= value %>
+<%
+  end
+end
+-%>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-site.xml b/cookbooks/hadoop/templates/default/etc-2.2/hadoop/yarn-site.xml
new file mode 100644 (file)
index 0000000..f39a378
--- /dev/null
@@ -0,0 +1,187 @@
+<?xml version="1.0"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+<!-- Site specific YARN configuration properties -->
+    <property>
+        <name>this.resourcemanager.fqdn</name>
+        <value><%= node['hadoop']['this.resourcemanager.fqdn'] %></value>
+        <!-- <value>${this.cluster.name}-rm.${this.domain}</value> -->
+    </property>
+
+    <property>
+        <name>yarn.log-aggregation-enable</name>
+        <value><%= node['hadoop']['yarn.log-aggregation-enable'] %></value>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.scheduler.class</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.scheduler.class'] %></value>
+        <description>In case you do not want to use the default scheduler</description>
+    </property>
+<%
+yarn_nodemanager_local_dirs = ''
+@active_vol_nums.to_i.times {|vol_num|
+  yarn_nodemanager_local_dirs = yarn_nodemanager_local_dirs == '' ? '' : "#{yarn_nodemanager_local_dirs},"
+  yarn_nodemanager_local_dirs = "#{yarn_nodemanager_local_dirs}#{node['grid']['vol_root']}/#{vol_num}/var/lib/${user.name}/nm/local"
+}
+-%>
+    <property>
+        <name>yarn.nodemanager.local-dirs</name>
+        <value><%= yarn_nodemanager_local_dirs %></value>
+        <!-- <value>/grid/vol/0/var/lib/${user.name}/nm/local,/grid/vol/1/var/lib/${user.name}/nm/local</value> -->
+        <description>the local directories used by the nodemanager
+        (default: /tmp/nm-local-dir)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.resource.memory-mb</name>
+        <value><%= node['hadoop']['yarn.nodemanager.resource.memory-mb'] %></value>
+        <description>Amount of physical memory, in MB, that can be allocated
+        for containers. (default: 8192)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.resource.cpu-cores</name>
+        <value><%= node['hadoop']['yarn.nodemanager.resource.cpu-cores'] %></value>
+        <description>Number of CPU cores that can be allocated
+        for containers. (default: 8)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.remote-app-log-dir</name>
+        <value><%= node['hadoop']['yarn.nodemanager.remote-app-log-dir'] %></value>
+        <description>directory on hdfs where the application logs are moved to
+        (default: /tmp/logs)</description>
+    </property>
+<%
+yarn_nodemanager_log_dirs = ''
+@active_vol_nums.to_i.times {|vol_num|
+  yarn_nodemanager_log_dirs = yarn_nodemanager_log_dirs == '' ? '' : "#{yarn_nodemanager_log_dirs},"
+  yarn_nodemanager_log_dirs = "#{yarn_nodemanager_log_dirs}#{node['grid']['vol_root']}/#{vol_num}/var/log/${user.name}/nm"
+}
+-%>
+    <property>
+        <name>yarn.nodemanager.log-dirs</name>
+        <value><%= yarn_nodemanager_log_dirs %></value>
+        <!-- <value>/grid/vol/0/var/log/${user.name}/nm,/grid/vol/1/var/log/${user.name}/nm</value> -->
+        <description>the directories used by Nodemanagers as log directories
+        (default: /tmp/logs)</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value><%= node['hadoop']['yarn.nodemanager.aux-services'] %></value>
+        <description>shuffle service that needs to be set for Map Reduce to run</description>
+    </property>
+    <property>
+        <name>yarn.application.classpath</name>
+        <value><%= node['hadoop']['yarn.application.classpath'] %></value>
+        <description>Classpath for typical applications.</description>
+    </property>
+
+    <property>
+        <name>yarn.resourcemanager.nodes.include-path</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.nodes.include-path'] %></value>
+        <description>Path to file with nodes to include.</description>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.nodes.exclude-path</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.nodes.exclude-path'] %></value>
+        <description>Path to file with nodes to exclude.</description>
+    </property>
+
+    <property>
+        <name>yarn.nodemanager.admin-env</name>
+        <value><%= node['hadoop']['yarn.nodemanager.admin-env'] %></value>
+    </property>
+
+<% if node['hadoop']['with_security'] then -%>
+    <property>
+        <name>yarn.acl.enable</name>
+        <value><%= node['hadoop']['yarn.acl.enable'] %></value>
+    </property>
+    <property>
+        <name>yarn.admin.acl</name>
+        <value><%= node['hadoop']['yarn.admin.acl'] %></value>
+    </property>
+    <property>
+        <name>yarn.resourcemanager.principal</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.principal'] %></value>
+        <!-- <value>yarn/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>yarn.resourcemanager.keytab</name>
+        <value><%= node['hadoop']['yarn.resourcemanager.keytab'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.principal</name>
+        <value><%= node['hadoop']['yarn.nodemanager.principal'] %></value>
+        <!-- <value>yarn/_HOST@${this.realm}</value> -->
+    </property>
+    <property>
+        <name>yarn.nodemanager.keytab</name>
+        <value><%= node['hadoop']['yarn.nodemanager.keytab'] %></value>
+    </property>
+
+    <property>
+        <name>yarn.nodemanager.container-executor.class</name>
+        <value><%= node['hadoop']['yarn.nodemanager.container-executor.class'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.group</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.group'] %></value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.resources-handler.class'] %></value>
+        <description>The class which should help the LCE handle resources.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'] %></value>
+        <description>The cgroups hierarchy under which to place YARN proccesses (cannot contain commas).
+        If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have
+        been pre-configured), then this cgroups hierarchy must already exist and be writable by the
+        NodeManager user, otherwise the NodeManager may fail.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount'] %></value>
+        <description>Whether the LCE should attempt to mount cgroups if not found.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    </property>
+    <property>
+        <name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
+        <value><%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.mount-path'] %></value>
+        <description>Where the LCE should attempt to mount cgroups if not found. Common locations
+        include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux
+        distribution in use. This path must exist before the NodeManager is launched.
+        Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and
+        yarn.nodemanager.linux-container-executor.cgroups.mount is true.</description>
+    </property>
+<% end -%>
+<%
+this_file = 'mapred-site.xml'
+if defined? node['hadoop']['extra_configs'][this_file] \
+  && node['hadoop']['extra_configs'][this_file] != nil then
+  node['hadoop']['extra_configs'][this_file].each do |key,value|
+-%>
+    <property>
+        <name><%= key %></name>
+        <value><%= value %></value>
+    </property>
+<%
+  end
+end 
+-%>
+</configuration>
+
diff --git a/cookbooks/hadoop/templates/default/etc-2.2/init/cgconfig4yarn.conf b/cookbooks/hadoop/templates/default/etc-2.2/init/cgconfig4yarn.conf
new file mode 100644 (file)
index 0000000..b0bb101
--- /dev/null
@@ -0,0 +1,14 @@
+description "cgroup configurations for YARN"
+start on started cgroup-lite
+#start on mounted MOUNTPOINT=/sys/fs/cgroup
+script
+       for subsys in cpu; do
+               HIERARCHY_PATH=/sys/fs/cgroup/${subsys}/<%= node['hadoop']['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'].gsub(/^\//, '') %>
+               if [ ! -d $HIERARCHY_PATH ]; then
+                       mkdir $HIERARCHY_PATH
+                       chown -R yarn:yarn $HIERARCHY_PATH
+               fi
+       done
+end script
diff --git a/cookbooks/hadoop/templates/default/grid/usr/sbin/hadoop_pseudo_distributed_init.sh b/cookbooks/hadoop/templates/default/grid/usr/sbin/hadoop_pseudo_distributed_init.sh
new file mode 100644 (file)
index 0000000..bf8a168
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/sh
+#
+# Initialize script for Apache Hadoop pseudo distributed mode.
+#
+# Copyright 2013, whitestar
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+HADOOP_PREFIX=<%= node['grid']['app_root'] %>/hadoop
+GRID_VOL_ROOT=<%= node['grid']['vol_root'] %>
+
+<%
+case @major_version
+when '1'
+-%>
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop namenode -format
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop-daemon.sh start namenode
+sudo <%= @datanode_sudo_user_opt %> ${HADOOP_PREFIX}/bin/hadoop-daemon.sh start datanode
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop dfsadmin -safemode wait
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chown hdfs:hdfs /
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chmod 755 /
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -mkdir /user
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -mkdir ${GRID_VOL_ROOT}/0/var/lib/mapred
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chown mapred:mapred ${GRID_VOL_ROOT}/0/var/lib/mapred
+<%
+when '2'
+-%>
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hdfs namenode -format
+sudo -u hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh start namenode
+sudo <%= @datanode_sudo_user_opt %> ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh start datanode
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hdfs dfsadmin -safemode wait
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chown hdfs:hdfs /
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chmod 755 /
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -mkdir /user
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -mkdir -p ${GRID_VOL_ROOT}/0/var/log/yarn/nm
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chown yarn:hadoop ${GRID_VOL_ROOT}/0/var/log/yarn/nm
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chmod 1777        ${GRID_VOL_ROOT}/0/var/log/yarn/nm
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -mkdir -p ${GRID_VOL_ROOT}/0/var/lib/mapred/history
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chown -R mapred:hadoop ${GRID_VOL_ROOT}/0/var/lib/mapred
+sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop fs -chmod -R 755           ${GRID_VOL_ROOT}/0/var/lib/mapred
+<%
+end
+-%>
+
diff --git a/nodes/localhost-ah.json b/nodes/localhost-ah.json
new file mode 100644 (file)
index 0000000..fecc0ab
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "run_list": [
+    "role[hadoop-pseudo-distributed-ah]"
+  ]
+}
diff --git a/nodes/localhost-ah2.json b/nodes/localhost-ah2.json
new file mode 100644 (file)
index 0000000..26207b3
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "run_list": [
+    "role[hadoop-pseudo-distributed-ah2]"
+  ]
+}
diff --git a/roles/hadoop-pseudo-distributed-ah.rb b/roles/hadoop-pseudo-distributed-ah.rb
new file mode 100644 (file)
index 0000000..71f1109
--- /dev/null
@@ -0,0 +1,38 @@
+name 'hadoop-pseudo-distributed-ah'
+description 'Hadoop pseudo distributed mode configurations for Apache Hadoop 1'
+
+run_list(
+  'role[node_commons]',
+  'role[hadoop-pseudo-distributed]'
+)
+
+#env_run_lists()
+
+default_attributes(
+  'hadoop' => {
+    'version' => '1.2.1',
+    #'version' => '1.1.2',
+    #'version' => '1.0.4',
+    'hadoop.http.authentication.type' => 'simple',
+    'metrics2' => {
+      'namenode.sink.ganglia.servers' => 'localhost:8649',
+      'datanode.sink.ganglia.servers' => 'localhost:8649',
+      # for 1.0.x only
+      'jobtracker.sink.ganglia.servers' => 'localhost:8649',
+      'tasktracker.sink.ganglia.servers' => 'localhost:8649',
+      'maptask.sink.ganglia.servers' => 'localhost:8649',
+      'reducetask.sink.ganglia.servers' => 'localhost:8649'
+    },
+    # for 1.0.x only >>
+    'extra_configs' => {
+      'core-site.xml' => {
+        #'hadoop.http.authentication.signature.secret' \
+        #  => '91d365813d6dd1f4ceafff73f90a9a06'
+      }
+    },
+  },
+)
+
+override_attributes(
+)
+
diff --git a/roles/hadoop-pseudo-distributed-ah2.rb b/roles/hadoop-pseudo-distributed-ah2.rb
new file mode 100644 (file)
index 0000000..f036b31
--- /dev/null
@@ -0,0 +1,32 @@
+name 'hadoop-pseudo-distributed-ah2'
+description 'Hadoop pseudo distributed mode configurations for Apache Hadoop 2'
+
+run_list(
+  'role[node_commons]',
+  'role[hadoop-pseudo-distributed]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+  'hadoop' => {
+    'version' => '2.2.0',
+    #'version' => '2.1.1-beta',
+    #'version' => '2.0.6-alpha',
+    'hadoop.http.authentication.type' => 'simple',
+    'metrics2' => {
+      'namenode.sink.ganglia.servers' => 'localhost:8649',
+      'datanode.sink.ganglia.servers' => 'localhost:8649',
+      # for 2.0.x only
+      'resourcemanager.sink.ganglia.servers' => 'localhost:8649',
+      'nodemanager.sink.ganglia.servers' => 'localhost:8649',
+    },
+    # for 2.0.x only >>
+    #'yarn.nodemanager.admin-env' => 'MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX',
+    'yarn.nodemanager.admin-env' => 'MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX,LD_LIBRARY_PATH=${HADOOP_COMMON_HOME}/lib/native',
+    'yarn.nodemanager.linux-container-executor.resources-handler.class' \
+      => 'org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler',
+  },
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
index bc8c940..ab52f31 100644 (file)
@@ -12,7 +12,8 @@ default_attributes(
     'version' => '3.4.5'
   },
   'hadoop' => {
-    'version' => '2.1.0-beta',
+    'version' => '2.2.0',
+    #'version' => '2.1.1-beta',
     #'version' => '2.0.6-alpha',
     'hadoop.http.authentication.type' => 'simple',
     'metrics2' => {
index d4e4422..b855353 100644 (file)
@@ -180,8 +180,8 @@ default_attributes(
     # for 1.0.x only >>
     'extra_configs' => {
       'core-site.xml' => {
-        'hadoop.http.authentication.signature.secret' \
-          => '91d365813d6dd1f4ceafff73f90a9a06'
+        #'hadoop.http.authentication.signature.secret' \
+        #  => '91d365813d6dd1f4ceafff73f90a9a06'
       }
     },
     'krb5_strong_crypto' => {