Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Claudio Fahey committed Sep 5, 2014
1 parent 5125c95 commit 67896b9
Show file tree
Hide file tree
Showing 11 changed files with 504 additions and 0 deletions.
175 changes: 175 additions & 0 deletions bde/bde_cluster_post_deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#!/usr/bin/env python
# Perform post-processing after VMware Big Data Extensions provisions a new cluster.
# Written by Claudio Fahey ([email protected])

app_ver = '0.3'

import subprocess
import sys
import os
import multiprocessing
import sys
import getopt
import shutil
import functools
import subprocess
import shutil
import glob
import json
import uuid
import datetime
import cookielib
import urllib2
import urllib
import re
import tempfile

def die(error_message='died'):
raise Exception(error_message)

def system_command(cmd, print_output=False):
print('# ' + cmd)
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, errors = p.communicate()
if print_output: print(output + errors)
return p.returncode, output, errors

def system_command_required(cmd, print_output=True):
returncode, output, errors = system_command(cmd, print_output=print_output)
returncode == 0 or die();
return returncode, output, errors

def load_json_from_file(filename):
with open(filename) as data_file:
data = json.load(data_file)
return data

def serengeti_auth(serurl, username, password):
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# Username is the base64 encoding of actual username
encoded_username = username.encode('base64').strip()
data = urllib.urlencode({'j_username': encoded_username, 'j_password': password})
r = opener.open(serurl + "/j_spring_security_check", data)
return opener

def serengeti_api_read(serurl, urlsuffix, opener=None, data=None):
r = opener.open(serurl + '/api' + urlsuffix)
return r.read()

def configure_ssh(host, username, password):
# host can be IP, fqdn, or relative host name
# Remove host from known_hosts file to avoid problems with IP address reuse
orgfilename = os.path.expanduser('~/.ssh/known_hosts')
if os.path.isfile(orgfilename):
orgfile = open(orgfilename, 'r')
newfilename = tempfile.mktemp()
newfile = open(newfilename, 'w')
for line in orgfile:
if line.startswith(host + ' '):
print('removing line ' + line)
pass
else:
newfile.write(line)
newfile.close()
orgfile.close()
os.rename(newfilename, orgfilename)

returncode, output, errors = system_command_required(
'cat ~/.ssh/id_rsa.pub | sshpass -p ' + password + ' ssh -o StrictHostKeyChecking=no ' + username + '@' + host +
' "mkdir -p .ssh ; chmod 700 .ssh ; chown -R ' + username + ':' + username + ' .ssh ; ' +
'touch .ssh/authorized_keys ; chmod 600 .ssh/authorized_keys ; ' +
'cat - >> .ssh/authorized_keys"')

ssh_command(username, host, 'echo -n success: ; hostname')

def configure_network(node_name, ip, fqdn, username='root'):
# Copy remote /etc/sysconfig/network to local file
orgfilename = tempfile.mktemp()
returncode, output, errors = system_command_required('scp ' + username + '@' + ip + ':/etc/sysconfig/network ' + orgfilename)

# Generate new /etc/sysconfig/network file locally
with open(orgfilename, 'r') as orgfile:
newfilename = tempfile.mktemp()
with open(newfilename, 'w') as newfile:
newfile.write('DHCP_HOSTNAME=' + node_name + '\n')
for line in orgfile:
if line == '' or re.match('HOSTNAME=', line) or re.match('DHCP_HOSTNAME', line) or re.match('DOMAINNAME=', line):
pass
else:
newfile.write(line)

# Copy new file to remote server
returncode, output, errors = system_command_required('scp ' + newfilename + ' ' + username + '@' + ip + ':/etc/sysconfig/network')
print(output + errors)
returncode == 0 or die();

os.remove(orgfilename)
os.remove(newfilename)

returncode, output, errors = ssh_command(username, ip, 'service network restart ; hostname ' + fqdn)

def get_fqdn(host, username='root'):
returncode, output, errors = ssh_command(username, host, 'hostname')
returncode == 0 or die()
return output.strip()

def configure_nfs(node_name, host, mountpoint, nfs_path, config):
returncode, output, errors = system_command(config['tools_root'] + '/bde/remote_mount_nfs.sh ' + host + ' ' + mountpoint + ' ' + nfs_path)
print(output + errors)
returncode == 0 or die();

def ssh_command(username, host, command):
returncode, output, errors = system_command_required('ssh ' + username + '@' + host + ' "' + command + '"')
return returncode, output, errors

def configure_node_phase_1(node, config, username='root', password='none'):
configure_ssh(node['ip'], username, password)
if not config.get('skip_configure_network',False):
configure_network(node['node_name'], node['ip'], node['fqdn'], username)
node['fqdn'] = get_fqdn(node['ip'], username)
#configure_ssh(node['node_name'], username, password)
configure_ssh(node['fqdn'], username, password)
return node

def configure_node_phase_2(node, config, username='root', password='none'):
if not config.get('skip_phase_2',False):
ssh_command(username, node['fqdn'], 'yum -y install ed nano mlocate zip unzip nfs-utils')
map(lambda m: configure_nfs(node['node_name'], node['fqdn'], m['mount_point'], m['path'], config), config.get('nfs_mounts',[]))
map(lambda cmd: ssh_command(username, node['fqdn'], cmd), config.get('ssh_commands',[]))
return node

def main():
print('bde_cluster_post_deploy.py version ' + app_ver + '\n')

config_filename = sys.argv[1]
config = load_json_from_file(config_filename)

serurl = config['ser_host'] + '/serengeti'
opener = serengeti_auth(serurl, config['ser_username'], config['ser_password'])

cluster_data = json.loads(serengeti_api_read(serurl, '/cluster/' + config['cluster_name'], opener))
print(json.dumps(cluster_data, sort_keys=True, indent=4, ensure_ascii=False))

nodes = []
for node_group in cluster_data['nodeGroups']:
for instance in node_group['instances']:
node_name = instance['name']
if re.match(config.get('name_filter_regex',''), node_name) is None: continue
nodes.append({
'node_name': node_name,
'ip': instance['ipConfigs']['MGT_NETWORK'][0]['ipAddress'],
'fqdn': node_name + config['dhcp_domain']
})

nodes = map(lambda n: configure_node_phase_1(n, config, password=config['node_password']), nodes)
nodes = map(lambda n: configure_node_phase_2(n, config, password=config['node_password']), nodes)

with open(config['host_file_name'], 'w') as host_file:
map(lambda n: host_file.write(n['fqdn'] + '\n'), nodes)

print('Success!')

if __name__ == "__main__":
main()

13 changes: 13 additions & 0 deletions bde/centos6.4-overwrite/etc/rc.d/rc.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh
#
# This script will be executed *after* all the other init scripts.
# You can put your own initialization stuff in here if you don't
# want to do the full Sys V style init stuff.

touch /var/lock/subsys/local
# Need to set HOME explicitly due to http://tickets.opscode.com/browse/CHEF-2646
export HOME=/root
knife exec /etc/chef/bootstrap_node.rb -c /etc/chef/client.rb > /dev/null

# Below recommended by Cloudera for Centos 6.4
echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag
43 changes: 43 additions & 0 deletions bde/centos6.4-overwrite/etc/sysctl.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Kernel sysctl configuration file for Red Hat Linux
#
# For binary values, 0 is disabled, 1 is enabled. See sysctl(8) and
# sysctl.conf(5) for more details.

# Controls IP packet forwarding
net.ipv4.ip_forward = 0

# Controls source route verification
net.ipv4.conf.default.rp_filter = 1

# Do not accept source routing
net.ipv4.conf.default.accept_source_route = 0

# Controls the System Request debugging functionality of the kernel
kernel.sysrq = 0

# Controls whether core dumps will append the PID to the core filename.
# Useful for debugging multi-threaded applications.
kernel.core_uses_pid = 1

# Controls the use of TCP syncookies
net.ipv4.tcp_syncookies = 1

# Disable netfilter on bridges.
net.bridge.bridge-nf-call-ip6tables = 0
net.bridge.bridge-nf-call-iptables = 0
net.bridge.bridge-nf-call-arptables = 0

# Controls the default maxmimum size of a mesage queue
kernel.msgmnb = 65536

# Controls the maximum size of a message, in bytes
kernel.msgmax = 65536

# Controls the maximum shared segment size, in bytes
kernel.shmmax = 68719476736

# Controls the maximum number of shared memory segments, in pages
kernel.shmall = 4294967296

# Below recommended by Cloudera
vm.swappiness = 0
35 changes: 35 additions & 0 deletions bde/create_cdh_users.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

gid_base=601
uid_base=601

#for user in hdfs mapred hbase hive yarn oozie sentry impala spark hue sqoop2 solr sqoop httpfs llama zookeper flume sample; do
# userdel -f -r $user
#done

#for group in hdfs mapred hbase hive yarn oozie sentry impala spark hue sqoop2 solr sqoop httpfs llama zookeper flume sample hadoop supergroup; do
# groupdel $group
#done

gid=$gid_base
for group in hdfs mapred hbase hive yarn oozie sentry impala spark hue sqoop2 solr sqoop httpfs llama zookeper flume sample hadoop supergroup; do
groupadd --gid $gid $group
gid=$(($gid + 1))
done

uid=$uid_base
for user in hdfs mapred hbase hive yarn oozie sentry impala spark hue sqoop2 solr sqoop httpfs llama zookeper flume sample; do
adduser --uid $uid --gid $user $user
uid=$(($uid + 1))
done

groupmems --group hadoop --add hdfs
groupmems --group hadoop --add yarn
groupmems --group hadoop --add mapred
groupmems --group hdfs --add impala
groupmems --group sqoop --add sqoop2
groupmems --group hive --add impala

# Ignore errors.
true

57 changes: 57 additions & 0 deletions bde/fill_disk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/python
# Written by Claudio Fahey ([email protected])

import os
import multiprocessing
import sys
import getopt
import shutil
import functools
import socket

def fill_disk_disk(config):
tmpfile = os.path.join(config['disk'], 'filldisk.dat')
cmd = 'dd if=/dev/zero of=' + tmpfile + ' bs=1M ; rm -f ' + tmpfile
print(socket.gethostname() + ': ' + config['disk'] + ': # ' + cmd)
os.system(cmd)
print(socket.gethostname() + ': ' + config['disk'] + ': Done.')

def fill_disk_host(config):
mountdir = '/data/'
disks = [os.path.join(mountdir,f) for f in os.listdir(mountdir)]
configs = [dict(config.items() + {'disk': disk}.items()) for disk in disks]

pool = multiprocessing.Pool(len(configs))
pool.map_async(fill_disk_disk, configs)
pool.close()
pool.join()
print(socket.gethostname() + ': All disks complete.')

def fill_disk_remote_host(config):
cmd = 'ssh root@' + config['host_name'] + ' ' + config['fill_disk_script_path'] + ' --host'
print('# ' + cmd)
os.system(cmd)

def fill_disk_all_hosts(config):
with open(config['host_file']) as f:
host_names = f.read().splitlines()
configs = [dict(config.items() + {'host_name': h}.items()) for h in host_names]

pool = multiprocessing.Pool(len(configs))
pool.map_async(fill_disk_remote_host, configs)
pool.close()
pool.join()
print('All hosts complete.')

def main():
mode = sys.argv[1]
if mode == '--host':
fill_disk_host({})
else:
host_file = sys.argv[1]
fill_disk_script_path = os.path.realpath(__file__)
fill_disk_all_hosts({'host_file': host_file, 'fill_disk_script_path': fill_disk_script_path})

if __name__ == '__main__':
main()

39 changes: 39 additions & 0 deletions bde/mount_data_directories.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/perl

# Remove mounts from fstab
system("egrep -v '/mnt/scsi-|/data/' /etc/fstab > /tmp/fstab ; cp /tmp/fstab /etc/fstab") && die;

# Unmount from /mnt/scsi-*
@mounts = `ls -d /mnt/scsi-*`;
foreach my $mount (@mounts)
{
chomp($mount);
system("umount $mount");
system("rmdir $mount") && die;
}

system("mkdir -p /data");

# Get list of disk partitions to mount
@disks = `ls /dev/sd?1 | grep -v /dev/sda1 | sort`;
my $data_number = 1;
foreach my $disk (@disks)
{
chomp($disk);
$mount = "/data/$data_number";
if (-l $mount)
{
# Remove symlink
system("rm $mount");
}
if (! -d $mount)
{
system("mkdir /data/$data_number") && die;
}
system("echo $disk\t$mount\text4\tdefaults,noatime\t0\t0 >> /etc/fstab") && die;
system("mount $mount");
$data_number++;
}

system("mount | grep /data/");

12 changes: 12 additions & 0 deletions bde/remote_mount_nfs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh

USER_HOST=root@$1
MOUNT=$2
NFSPATH=$3

echo ${USER_HOST}
ssh ${USER_HOST} "umount ${MOUNT} ; mkdir -p ${MOUNT}" || exit
ssh ${USER_HOST} "grep -v ${MOUNT} /etc/fstab > /tmp/fstab ; cp /tmp/fstab /etc/fstab" || exit
ssh ${USER_HOST} "echo ${NFSPATH} ${MOUNT} nfs nolock,nfsvers=3,tcp,rw,hard,intr,timeo=600,retrans=2,rsize=131072,wsize=524288 >> /etc/fstab" || exit
ssh ${USER_HOST} "mount -a; ls -lh ${MOUNT}" || exit

Loading

0 comments on commit 67896b9

Please sign in to comment.