#!/usr/bin/python3 """ Maps tool users to precise tools Run like tail -500000 /data/project/.system/accounting | ./precise-owners.py from tools bastions """ import collections import datetime import fileinput import http.client import json import ldap3 import operator import yaml def ldap_conn(config): """ Return a ldap connection Return value can be used as a context manager """ servers = ldap3.ServerPool([ ldap3.Server(host) for host in config['servers'] ], ldap3.POOLING_STRATEGY_ROUND_ROBIN, active=True, exhaust=True) return ldap3.Connection(servers, read_only=True, user=config['user'], auto_bind=True, password=config['password']) def uid_from_dn(dn): keys = dn.split(',') uid_key = keys[0] uid = uid_key.split('=')[1] return uid def tools_members(config, tools): """ Return a dict that has members of a tool associated with each tool Ex: {'tools.musikbot': ['musikanimal'], 'tools.ifttt': ['slaporte', 'mahmoud', 'madhuvishy', 'ori']} """ tool_to_members = collections.defaultdict(list) with ldap_conn(config) as conn: for tool in tools: conn.search( 'ou=servicegroups,dc=wikimedia,dc=org', '(cn={})'.format(tool), ldap3.SEARCH_SCOPE_WHOLE_SUBTREE, attributes=['member', 'cn'], time_limit=5 ) for resp in conn.response: attributes = resp.get('attributes') members = attributes['member'] if attributes.get('member') else [] tool_to_members[tool].extend([uid_from_dn(member) for member in members]) return tool_to_members def members_tools(tool_to_members): all_members = set().union(*tool_to_members.values()) member_to_tools = collections.defaultdict(list) for tool, members in tool_to_members.items(): for m in members: if tool not in member_to_tools[m]: member_to_tools[m].append(tool) return member_to_tools def is_precise_host(hostname): if hostname[-4:].startswith('12'): return True def grid_precise_tools(): all_precise_tools = [] conn = http.client.HTTPConnection('tools.wmflabs.org') conn.request("GET", "/gridengine-status", headers={"User-Agent": "Precise tools finder|labs-admin@lists.wikimedia.org"}) res = conn.getresponse().read().decode('utf-8') if res: grid_info = json.loads(res)["data"]["attributes"] for hostname, info in grid_info.items(): if is_precise_host(hostname): if info["jobs"]: all_precise_tools.extend([job["job_owner"] for job in info["jobs"].values()]) return all_precise_tools def accounting_tools(): DAYS=7 FIELD_NAMES = [ 'qname', 'hostname', 'group', 'owner', 'job_name', 'job_number', 'account', 'priority', 'submission_time', 'start_time', 'end_time', 'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project', 'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io', 'category', 'iow', 'pe_taskid', 'maxvemem', 'arid', 'ar_submission_time', ] cutoff = (datetime.datetime.now() - datetime.timedelta(days=DAYS)).timestamp() precise_tools = [] for line in fileinput.input(): parts = line.split(':') job = dict(zip(FIELD_NAMES, parts)) if int(job['end_time']) < cutoff: continue if 'release=precise' in job['category'] and job['owner'] not in precise_tools: precise_tools.append(job['owner']) return precise_tools with open('/etc/ldap.yaml') as f: config = yaml.safe_load(f) tool_to_members = tools_members(config, accounting_tools() + grid_precise_tools()) mt = members_tools(tool_to_members) print json.dumps(mt, sort_keys=True, indent=4, separators=(',', ': '))