# limitations under the License.
#
+default['grid']['etc_root'] = '/grid/etc'
+default['grid']['app_root'] = '/grid/usr'
+default['grid']['vol_root'] = '/grid/vol'
+default['grid']['max_vol_nums'] = '1'
+
# default: for pseudo-distributed
+default['hadoop']['install_flavor'] = 'apache'
+default['hadoop']['version'] = '1.1.2'
+default['hadoop']['archive_url'] = 'http://archive.apache.org/dist/hadoop/core'
default['hadoop']['with_security'] = false
## core-site.xml
default['hadoop']['this.cluster.name'] = 'localhost'
default['hadoop']['this.domain'] = 'localhost'
default['hadoop']['this.realm'] = 'LOCALDOMAIN'
-default['hadoop']['this.keytab.dir'] = "/grid/etc/keytabs/#{default['hadoop']['this.cluster.name']}"
+default['hadoop']['this.keytab.dir'] = "#{default['grid']['etc_root']}/keytabs/#{default['hadoop']['this.cluster.name']}"
default['hadoop']['this.namenode.fqdn'] = 'localhost'
+default['hadoop']['fs.checkpoint.dir'] = "#{default['grid']['vol_root']}/0/var/lib/${user.name}/checkpoint"
default['hadoop']['hadoop.http.authentication.kerberos.principal'] = 'HTTP/localhost@${this.realm}'
## hdfs-site.xml
default['hadoop']['this.secondary.namenode.fqdn'] = 'localhost'
+default['hadoop']['dfs.name.dir'] = "#{default['grid']['vol_root']}/0/var/lib/${user.name}/name"
default['hadoop']['dfs.datanode.kerberos.principal'] = 'hdfs/localhost@${this.realm}'
default['hadoop']['dfs.replication'] = '1'
## mapred-site.xml
=begin
# e.g. for full-distributed
+default['grid']['max_vol_nums'] = '4'
## core-site.xml
default['hadoop']['this.cluster.name'] = 'pleiades'
default['hadoop']['this.domain'] = 'grid.example.com'
default['hadoop']['this.realm'] = 'GRID.EXAMPLE.COM'
default['hadoop']['this.keytab.dir'] = '/grid/etc/keytabs/${this.cluster.name}'
default['hadoop']['this.namenode.fqdn'] = '${this.cluster.name}-nn.${this.domain}'
+default['hadoop']['fs.checkpoint.dir'] = "#{default['grid']['vol_root']}/0/var/lib/${user.name}/checkpoint,/export/home/${user.name}/var/lib/checkpoint"
default['hadoop']['hadoop.http.authentication.kerberos.principal'] = 'HTTP/${this.fqdn}@${this.realm}'
## hdfs-site.xml
default['hadoop']['this.secondary.namenode.fqdn'] = '${this.cluster.name}-cn.${this.domain}'
+default['hadoop']['dfs.name.dir'] = "#{default['grid']['vol_root']}/0/var/lib/${user.name}/name,/export/home/${user.name}/var/lib/name"
default['hadoop']['dfs.datanode.kerberos.principal'] = 'hdfs/_HOST@${this.realm}'
default['hadoop']['dfs.replication'] = '3'
## mapred-site.xml
supports :manage_home => false
end
-directory '/grid/usr' do
+directory node['grid']['app_root'] do
owner 'root'
group 'root'
mode '0755'
recursive true
end
-directory '/grid/vol/0/var' do
- owner 'root'
- group 'root'
- mode '0755'
- action :create
- recursive true
-end
-
-%w{lib log run}.each {|dir|
- directory "/grid/vol/0/var/#{dir}" do
- owner 'root'
- group 'hadoop'
- mode '0775'
- action :create
- recursive true
+active_vol_nums = 0
+node['grid']['max_vol_nums'].to_i.times {|vol_num|
+ target_vol_dir = "#{node['grid']['vol_root']}/#{vol_num}"
+
+ if vol_num == 0 || FileTest::directory?(target_vol_dir) then
+ directory "#{target_vol_dir}/var" do
+ owner 'root'
+ group 'root'
+ mode '0755'
+ action :create
+ recursive true
+ end
+
+ %w{lib log}.each {|dir|
+ directory "#{target_vol_dir}/var/#{dir}" do
+ owner 'root'
+ group 'hadoop'
+ mode '0775'
+ action :create
+ recursive true
+ end
+ }
+
+ directory "#{target_vol_dir}/tmp" do
+ owner 'root'
+ group 'root'
+ mode '1777'
+ action :create
+ recursive true
+ end
+
+ if vol_num == 0 then
+ directory "#{target_vol_dir}/var/run" do
+ owner 'root'
+ group 'hadoop'
+ mode '0755'
+ action :create
+ recursive true
+ end
+
+ directory "#{target_vol_dir}/var/log/hdfs" do
+ owner 'hdfs'
+ group 'hdfs'
+ mode '0755'
+ action :create
+ recursive true
+ end
+ end
+ else
+ break
end
+ active_vol_nums = vol_num + 1
}
+log "This node active volumes: #{active_vol_nums}"
-directory '/grid/vol/0/var/log/hdfs' do
- owner 'hdfs'
- group 'hdfs'
- mode '0755'
- action :create
- recursive true
-end
-
-hadoop_mirror_url = 'http://ftp.riken.jp/net/apache/hadoop/common'
-hadoop_ver = '1.1.2'
-hadoop_tarball = "hadoop-#{hadoop_ver}.tar.gz"
+hadoop_tarball = "hadoop-#{node['hadoop']['version']}.tar.gz"
remote_file "#{Chef::Config[:file_cache_path]}/#{hadoop_tarball}" do
- source "#{hadoop_mirror_url}/hadoop-#{hadoop_ver}/#{hadoop_tarball}"
+ source "#{node['hadoop']['archive_url']}/hadoop-#{node['hadoop']['version']}/#{hadoop_tarball}"
action :create_if_missing
end
bash 'install_hadoop' do
code <<-EOC
- tar xvzf "#{Chef::Config[:file_cache_path]}/#{hadoop_tarball}" -C /grid/usr
+ tar xvzf "#{Chef::Config[:file_cache_path]}/#{hadoop_tarball}" -C "#{node['grid']['app_root']}"
EOC
- creates "/grid/usr/hadoop-#{hadoop_ver}"
+ creates "#{node['grid']['app_root']}/hadoop-#{node['hadoop']['version']}"
end
=begin
link '/grid/usr/hadoop' do
only_if 'test -L /grid/usr/hadoop'
end
=end
-link '/grid/usr/hadoop' do
- to "/grid/usr/hadoop-#{hadoop_ver}"
+link "#{node['grid']['app_root']}/hadoop" do
+ to "#{node['grid']['app_root']}/hadoop-#{node['hadoop']['version']}"
end
conf_files = [
'hadoop-metrics2.properties',
'hadoop-policy.xml',
'hdfs-site.xml',
+ 'hosts.include',
+ 'hosts.exclude',
'log4j.properties',
'mapred-queue-acls.xml',
'mapred-site.xml',
]
conf_files.each {|conf_file|
- template "/grid/usr/hadoop-#{hadoop_ver}/conf/#{conf_file}" do
+ template "#{node['grid']['app_root']}/hadoop-#{node['hadoop']['version']}/conf/#{conf_file}" do
source "conf/#{conf_file}"
owner 'root'
group 'root'
mode '0644'
+ variables({
+ :active_vol_nums => active_vol_nums
+ })
end
}
recursive true
end
-template "/grid/usr/hadoop-#{hadoop_ver}/conf/taskcontroller.cfg" do
+template "#{node['grid']['app_root']}/hadoop-#{node['hadoop']['version']}/conf/taskcontroller.cfg" do
source "conf/taskcontroller.cfg"
owner 'root'
group 'root'
log <<-EOM
Note:
You must initialize HDFS in the first installation:
- $ cd /grid/usr/hadoop
+ $ cd #{node['grid']['app_root']}/hadoop
$ sudo -u hdfs ./bin/hadoop namenode -format
$ sudo -u hdfs ./bin/hadoop-daemon.sh start namenode
$ sudo -u hdfs ./bin/hadoop-daemon.sh start datanode
$ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
$ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
$ sudo -u hdfs ./bin/hadoop fs -mkdir /user
- $ sudo -u hdfs ./bin/hadoop fs -mkdir /grid/vol/0/var/lib/mapred
- $ sudo -u hdfs ./bin/hadoop fs -chown mapred:mapred /grid/vol/0/var/lib/mapred
+ $ sudo -u hdfs ./bin/hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
+ $ sudo -u hdfs ./bin/hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
EOM
log <<-EOM
Note:
$ sudo adduser alice
$ sudo -u hdfs ./bin/hadoop fs -mkdir /user/alice
$ sudo -u hdfs ./bin/hadoop fs -chown alice:alice /user/alice
- $ sudo -u alice ./bin/hadoop jar hadoop-examples-#{hadoop_ver}.jar pi 5 10
+ $ sudo -u alice ./bin/hadoop jar hadoop-examples-#{node['hadoop']['version']}.jar pi 5 10
EOM
</property>
<property>
<name>fs.checkpoint.dir</name>
- <value>/grid/vol/0/var/lib/${user.name}/checkpoint</value>
- <!-- <value>/grid/vol/0/var/lib/${user.name}/checkpoint,/export/home/${user.name}/var/lib/checkpoint</value> -->
+ <value><%= node['hadoop']['fs.checkpoint.dir'] %></value>
+ <!-- <value><%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/checkpoint,/export/home/${user.name}/var/lib/checkpoint</value> -->
</property>
<% if node['hadoop']['with_security'] then %>
export JAVA_HOME=<%= node['java']['java_home'] %>
# The directory where pid files are stored. /tmp by default.
-export HADOOP_PID_DIR=/grid/vol/0/var/run/$USER
+export HADOOP_PID_DIR=<%= node['grid']['vol_root'] %>/0/var/run/$USER
# Where log files are stored. $HADOOP_HOME/logs by default.
-export HADOOP_LOG_DIR=/grid/vol/0/var/log/$USER
+export HADOOP_LOG_DIR=<%= node['grid']['vol_root'] %>/0/var/log/$USER
<% if node['hadoop']['with_security'] then %>
export HADOOP_SECURE_DN_USER=hdfs
-export HADOOP_SECURE_DN_PID_DIR=/grid/vol/0/var/run/${HADOOP_SECURE_DN_USER}
-export HADOOP_SECURE_DN_LOG_DIR=/grid/vol/0/var/log/${HADOOP_SECURE_DN_USER}
+export HADOOP_SECURE_DN_PID_DIR=<%= node['grid']['vol_root'] %>/0/var/run/${HADOOP_SECURE_DN_USER}
+export HADOOP_SECURE_DN_LOG_DIR=<%= node['grid']['vol_root'] %>/0/var/log/${HADOOP_SECURE_DN_USER}
# Extra Java CLASSPATH elements. Optional.
if [ x"$HADOOP_CLASSPATH" = x ]; then
export HADOOP_CLASSPATH=/usr/share/java/commons-daemon.jar
<property>
<name>dfs.name.dir</name>
- <value>/grid/vol/0/var/lib/${user.name}/name</value>
- <!-- <value>/grid/vol/0/var/lib/${user.name}/name,/export/home/${user.name}/var/lib/name</value> -->
+ <value><%= node['hadoop']['dfs.name.dir'] %></value>
+ <!-- <value><%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/name,/export/home/${user.name}/var/lib/name</value> -->
</property>
+<%
+dfs_data_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+ dfs_data_dir = dfs_data_dir == '' ? '' : "#{dfs_data_dir},"
+ dfs_data_dir = "#{dfs_data_dir}#{node['grid']['vol_root']}/#{vol_num}/var/lib/${user.name}/data"
+}
+%>
<property>
<name>dfs.data.dir</name>
- <value>/grid/vol/0/var/lib/${user.name}/data</value>
- <!-- <value>/grid/vol/0/var/lib/${user.name}/data,/grid/vol/1/var/lib/${user.name}/data</value> -->
+ <value><%= dfs_data_dir %></value>
+ <!-- <value><%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/data,<%= node['grid']['vol_root'] %>/1/var/lib/${user.name}/data</value> -->
</property>
<property>
<name>dfs.replication</name>
<!-- <value>3</value> -->
</property>
- <!--
<property>
<name>dfs.hosts</name>
- <value>/grid/usr/hadoop/conf/hosts.include</value>
+ <value><%= node['grid']['app_root'] %>/hadoop/conf/hosts.include</value>
<description>
Names a file that contains a list of hosts that are permitted to connect to the namenode.
The full pathname of the file must be specified. If the value is empty, all hosts are permitted.
</property>
<property>
<name>dfs.hosts.exclude</name>
- <value>/grid/usr/hadoop/conf/hosts.exclude</value>
+ <value><%= node['grid']['app_root'] %>/hadoop/conf/hosts.exclude</value>
<description>
Names a file that contains a list of hosts that are not permitted to connect to the namenode.
The full pathname of the file must be specified. If the value is empty, no hosts are excluded.
</description>
</property>
- -->
<% if node['hadoop']['with_security'] then %>
<property>
</property>
<property>
<name>mapred.system.dir</name>
- <value>/grid/vol/0/var/lib/${user.name}/system</value>
+ <value><%= node['grid']['vol_root'] %>/0/var/lib/${user.name}/system</value>
</property>
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.dir</name>
- <value>/grid/vol/0/var/lib/mapred/jobstatus</value>
+ <value><%= node['grid']['vol_root'] %>/0/var/lib/mapred/jobstatus</value>
<description>The directory where the job status information is persisted
in a file system to be available after it drops of the memory queue and
between jobtracker restarts. (default: /jobtracker/jobsInfo)
<property>
<name>hadoop.job.history.location</name>
- <value>file:///grid/vol/0/var/lib/mapred/history</value>
+ <value>file://<%= node['grid']['vol_root'] %>/0/var/lib/mapred/history</value>
<description>hdfs:// is UNusable.</description>
</property>
<property>
<name>mapred.job.tracker.history.completed.location</name>
- <value>hdfs:///grid/vol/0/var/lib/mapred/history/done</value>
+ <value>hdfs://<%= node['grid']['vol_root'] %>/0/var/lib/mapred/history/done</value>
</property>
+<%
+mapred_local_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+ mapred_local_dir = mapred_local_dir == '' ? '' : "#{mapred_local_dir},"
+ mapred_local_dir = "#{mapred_local_dir}#{node['grid']['vol_root']}/#{vol_num}/var/lib/mapred/local"
+}
+%>
<property>
<name>mapred.local.dir</name>
- <value>/grid/vol/0/var/lib/mapred/local</value>
- <!-- <value>/grid/vol/0/var/lib/mapred/local,/grid/vol/1/var/lib/mapred/local</value> -->
+ <value><%= mapred_local_dir %></value>
+ <!-- <value><%= node['grid']['vol_root'] %>/0/var/lib/mapred/local,<%= node['grid']['vol_root'] %>/1/var/lib/mapred/local</value> -->
<description>
The local directory where MapReduce stores intermediate data files.
May be a comma-separated list of directories on different devices in order to spread disk i/o.
Directories that do not exist are ignored.
</description>
</property>
+<%
+mapred_temp_dir = ''
+@active_vol_nums.to_i.times {|vol_num|
+ mapred_temp_dir = mapred_temp_dir == '' ? '' : "#{mapred_temp_dir},"
+ mapred_temp_dir = "#{mapred_temp_dir}#{node['grid']['vol_root']}/#{vol_num}/tmp/${user.name}/mapred"
+}
+%>
<property>
<name>mapred.temp.dir</name>
- <value>/grid/vol/0/tmp/${user.name}/mapred</value>
- <!-- <value>/grid/vol/0/tmp/${user.name}/mapred,/grid/vol/1/tmp/${user.name}/mapred</value> -->
+ <value><%= mapred_temp_dir %></value>
+ <!-- <value><%= node['grid']['vol_root'] %>/0/tmp/${user.name}/mapred,<%= node['grid']['vol_root'] %>/1/tmp/${user.name}/mapred</value> -->
<description>
A shared directory for temporary files.
</description>
</property>
- <!--
<property>
<name>mapred.hosts</name>
- <value>/grid/usr/hadoop/conf/hosts.include</value>
+ <value><%= node['grid']['app_root'] %>/hadoop/conf/hosts.include</value>
<description>
Names a file that contains the list of nodes that may connect to the jobtracker.
If the value is empty, all hosts are permitted.
</property>
<property>
<name>mapred.hosts.exclude</name>
- <value>/grid/usr/hadoop/conf/hosts.exclude</value>
+ <value><%= node['grid']['app_root'] %>/hadoop/conf/hosts.exclude</value>
<description>
Names a file that contains the list of hosts that should be excluded by the jobtracker.
If the value is empty, no hosts are excluded.
</description>
</property>
- -->
<% if node['hadoop']['with_security'] then %>
<property>
--- /dev/null
+name 'grid-realm'
+description 'The Grid realm configurations.'
+
+run_list(
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+ 'krb5' => {
+ 'libdefaults' => {
+ 'default_realm' => 'GRID.EXAMPLE.COM'
+ },
+ 'realms' => {
+ 'default_realm' => {
+ 'kdcs' => [
+ 'ns01.grid.example.com',
+ 'ns02.grid.example.com'
+ ],
+ 'admin_server' => 'krb-admin.grid.example.com'
+ }
+ },
+ 'domain_realms' => [
+ '.grid.example.com = GRID.EXAMPLE.COM'
+ ]
+ }
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
--- /dev/null
+name 'hadoop-pleiades'
+description 'Pleiades Hadoop cluster node'
+
+run_list(
+ 'role[nameservice-client-grid]',
+ 'role[hadoop]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+ 'hadoop' => {
+ 'with_security' => true
+ }
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
description 'Hadoop pseudo distributed mode with security'
run_list(
- 'recipe[krb5::client]',
- 'recipe[krb5::admin]',
+ 'role[nameservice-master]',
'role[hadoop]'
)
--- /dev/null
+name 'nameservice-client-grid'
+description 'The NameService (Kerberos and LDAP) client role for Grid'
+
+run_list(
+ 'role[grid-realm]',
+ 'role[nameservice-client]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
--- /dev/null
+name 'nameservice-client'
+description 'The NameService (Kerberos and LDAP) client role'
+
+run_list(
+ 'recipe[node_commons]',
+ 'recipe[krb5::client]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
--- /dev/null
+name 'nameservice-master-grid'
+description 'The NameService (Kerberos and LDAP) master role for Grid'
+
+run_list(
+ 'role[grid-realm]',
+ 'role[nameservice-master]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
run_list(
'recipe[node_commons]',
- 'recipe[krb5:client]',
- 'recipe[krb5:admin]'
+ 'recipe[krb5::client]',
+ 'recipe[krb5::admin]'
)
#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
default_attributes(
- 'krb5' => {
- 'libdefaults' => {
- 'default_realm' => 'GRID.EXAMPLE.COM'
- },
- 'realms' => {
- 'default_realm' => {
- 'kdcs' => [
- 'ns01.grid.example.com',
- 'ns02.grid.example.com'
- ],
- 'admin_server' => 'krb-admin.grid.example.com'
- }
- },
- 'domain_realms' => [
- '.grid.example.com = GRID.EXAMPLE.COM'
- ]
- }
)
#override_attributes "apache2" => { "max_children" => "50" }
--- /dev/null
+name 'nameservice-slave-grid'
+description 'The NameService (Kerberos and LDAP) slave role for Grid'
+
+run_list(
+ 'role[grid-realm]',
+ 'role[nameservice-slave]'
+)
+
+#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
+
+default_attributes(
+)
+
+#override_attributes "apache2" => { "max_children" => "50" }
run_list(
'recipe[node_commons]',
- 'recipe[krb5:client]',
- 'recipe[krb5:kdc]'
+ 'recipe[krb5::client]',
+ 'recipe[krb5::kdc]'
)
#env_run_lists "prod" => ["recipe[apache2]"], "staging" => ["recipe[apache2::staging]"], "_default" => []
default_attributes(
- 'krb5' => {
- 'libdefaults' => {
- 'default_realm' => 'GRID.EXAMPLE.COM'
- },
- 'realms' => {
- 'default_realm' => {
- 'kdcs' => [
- 'ns01.grid.example.com',
- 'ns02.grid.example.com'
- ],
- 'admin_server' => 'krb-admin.grid.example.com'
- }
- },
- 'domain_realms' => [
- '.grid.example.com = GRID.EXAMPLE.COM'
- ]
- }
)
#override_attributes "apache2" => { "max_children" => "50" }