Updates to cpusets

Signed-off-by: Martin J. Bligh <[email protected]>



git-svn-id: http://test.kernel.org/svn/autotest/trunk@475 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/client/bin/autotest_utils.py b/client/bin/autotest_utils.py
index 1deee1c..b5dab84 100755
--- a/client/bin/autotest_utils.py
+++ b/client/bin/autotest_utils.py
@@ -1,7 +1,7 @@
 """Convenience functions for use by tests or whomever.
 """
 
-import os,os.path,shutil,urllib,sys,signal,commands,pickle
+import os,os.path,shutil,urllib,sys,signal,commands,pickle,glob
 from error import *
 import re,string
 
@@ -96,7 +96,7 @@
 	return 0
 
 
-def get_file(src, dest):
+def get_file(src, dest, permissions = None):
 	"""get a file, either from url or local"""
 	if (src == dest):      # no-op here allows clean overrides in tests
 		return
@@ -108,8 +108,10 @@
 		except IOError:
 			sys.stderr.write("Unable to retrieve %s (to %s)\n" % (src, dest))
 			sys.exit(1)
-		return dest
-	shutil.copyfile(src, dest)
+	else:
+		shutil.copyfile(src, dest)
+	if permissions:
+		os.chmod(permissions)
 	return dest
 
 
@@ -446,3 +448,14 @@
 	gig = meg / 1024.0
 	return "%.2fG" % gig
 
+
+def numa_nodes():
+	node_paths = glob.glob('/sys/devices/system/node/node*')
+	nodes = [int(re.sub(r'.*node(\d+)', r'\1', x)) for x in node_paths]
+	return (sorted(nodes))
+
+
+def node_size():
+	nodes = max(len(numa_nodes()), 1)
+	return ((memtotal() * 1024) / nodes)
+
diff --git a/client/bin/cpuset.py b/client/bin/cpuset.py
index 16aee97..bc1fc86 100644
--- a/client/bin/cpuset.py
+++ b/client/bin/cpuset.py
@@ -1,15 +1,55 @@
-import os, sys, re
+__author__ = """Copyright Google, Peter Dahl, Martin J. Bligh   2007"""
+
+import os, sys, re, glob
 from autotest_utils import *
 
 class cpuset:
-	def available_cpus():
-		available = set(range(
+	# Convert '1-3,7,9-12' to [1,2,3,7,9,10,11,12]
+	def rangelist_to_list(rangelist):
+		result = []
+		for x in rangelist.split(','):
+			if re.match(r'^(\d+)$', x):
+				result.append(int(x))
+				continue
+			m = re.match(r'^(\d+)-(\d+)$', x)
+			if m:
+				start = int(m.group(1))
+				end = int(m.group(2))
+				result += range(start, end+1)
+				continue
+			raise 'Cannot understand data input %s' % x
+		return result
 
-	def __init__(self, job_name, job_size, job_pid, cpus):
+
+	def available_nems(all_mems):
+		available = set(all_mems)
+		for mems in glob.glob('/dev/cpuset/*/mems'):
+			available -= set(rangelist_to_list(read_one_line(mems)))
+		return available
+
+
+	def print_one_cpuset(name):
+		dir = os.path.join('/dev/cpuset', name)
+		print "%s:" % name
+		print "\tcpus: %s" % read_one_line(dir + '/cpus')
+		mems = read_one_line(dir + '/mems')
+		print "\tmems: %s" % mems
+		memtotal = node_size() * len(rangelist_to_list(mems))
+		print "\tmemtotal: %s" % human_format(memtotal)
+		tasks = [x.rstrip() for x in open(dir + '/tasks').readlines()])
+		print "\ttasks: %s" % ','.join(tasks)
+
+
+	def print_all_cpusets():
+		for cpuset in glob.glob('/dev/cpuset/*'):
+			print_one_cpuset(re.sub(r'.*/', '', cpuset)
+
+
+	def __init__(self, name, job_size, job_pid, cpus):
 		# Create a cpuset container and move job_pid into it
 		# Allocate the list "cpus" of cpus to that container
 
-		# job name = arbitrary string tag
+		# name = arbitrary string tag
 		# job size = reqested memory for job in bytes
 		# job pid = pid of job we're putting into the container
 
@@ -19,28 +59,34 @@
 			system('mount -t cpuset none /dev/cpuset')
 
 		cmdline = read_one_line('/proc/cmdline')
-		fake_numa_nodes = re.search('numa=fake=(\d+)', cmdline).group(1)
+		all_nodes = numa_nodes()
 
 		# Bind the specificed cpus to the cpuset
-		self.cpudir = "/dev/cpuset/%s" % job_name
+		self.cpudir = "/dev/cpuset/%s" % name
 		os.mkdir(self.cpudir)
 		cpu_spec = ','.join(['%d' % x for x in cpus])
-		write_one_line(cpu_spec, '/dev/cpuset/%s/cpus' % job_name)
+		write_one_line(cpu_spec, '/dev/cpuset/%s/cpus' % name)
 
-	  	node_size = memtotal() * 1024 / fake_numa_nodes
-		num_nodes = int(math.ceil(job_size / node_size))
+		# Find some free nodes to use to create this cpuset
+	  	node_size = memtotal() * 1024 / len(all_nodes)
+		nodes_needed = int(math.ceil(job_size / node_size))
+		mems = available_mems(all_nodes)[-nodes_needed:]
+		alloc_size = human_format(len(mems) * node_size)
+		if len(mems) < nodes_needed:
+			raise "Insufficient memory available"
 
-		if cpu_number == 0: # First cpuset
-			m_start = 3
-			m_end = m_start + num_nodes
-		else:
-			m_end = num_fake_numa_nodes - 1
-			m_start = m_end - num_nodes
-		alloc_size = (m_end - m_start + 1) * node_size
-		mems = os.path.join(self.cpudir, 'mems')
-		write_one_line("%d-%d" % (m_start, m_end), self.cpudir + '/mems')
-		write_one_line("%d" % job_pid, self.cpudir + 'tasks')
-		print "Cpuset: pid %d, cpu %d, memory %s --> nodes %d-%d --> %s" % (job_pid, cpu_number, int2KMG(job_size), m_start, m_end, int2KMG(alloc_size))
+		# Set up the cpuset
+		mems_spec = ','.join(['%d' % x for x in mems])
+		write_one_line(mems_spec, os.path.join(self.cpudir, 'mems'))
+		write_one_line('%d'%job_pid, os.path.join(self.cpudir, 'tasks'))
+
+		print "Created cpuset for pid %d, size %s" % \
+					(job_pid, human_format(job_size))
+		self.print()
+
+
+	def print(self):
+		print_one_cpuset(self.name)
 
 
 	def release(self):