#!/usr/bin/python3 """ Copyright 2017 Madhumitha Viswanathan mviswanathan@wikimedia.org Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ # Maps tool users to precise tools # Run like tail -500000 /data/project/.system/accounting | ./precise-owners.py # from tools bastions import collections import datetime import fileinput import http.client import json import ldap3 import operator import yaml def ldap_conn(config): """ Return a ldap connection Return value can be used as a context manager """ servers = ldap3.ServerPool([ ldap3.Server(host) for host in config['servers'] ], ldap3.POOLING_STRATEGY_ROUND_ROBIN, active=True, exhaust=True) return ldap3.Connection(servers, read_only=True, user=config['user'], auto_bind=True, password=config['password']) def uid_from_dn(dn): keys = dn.split(',') uid_key = keys[0] uid = uid_key.split('=')[1] return uid def tools_members(config, tools): """ Return a dict that has members of a tool associated with each tool Ex: {'tools.musikbot': ['musikanimal'], 'tools.ifttt': ['slaporte', 'mahmoud', 'madhuvishy', 'ori']} """ tool_to_members = collections.defaultdict(list) with ldap_conn(config) as conn: for tool in tools: conn.search( 'ou=servicegroups,dc=wikimedia,dc=org', '(cn={})'.format(tool), ldap3.SEARCH_SCOPE_WHOLE_SUBTREE, attributes=['member', 'cn'], time_limit=5 ) for resp in conn.response: attributes = resp.get('attributes') members = attributes['member'] if attributes.get('member') else [] tool_to_members[tool].extend([uid_from_dn(member) for member in members]) return tool_to_members def members_tools(tool_to_members): all_members = set().union(*tool_to_members.values()) member_to_tools = collections.defaultdict(list) for tool, members in tool_to_members.items(): for m in members: if tool not in member_to_tools[m]: member_to_tools[m].append(tool) return member_to_tools def is_precise_host(hostname): if hostname[-4:].startswith('12'): return True def grid_precise_tools(): all_precise_tools = [] conn = http.client.HTTPConnection('tools.wmflabs.org') conn.request("GET", "/gridengine-status", headers={"User-Agent": "Precise tools finder|labs-admin@lists.wikimedia.org"}) res = conn.getresponse().read().decode('utf-8') if res: grid_info = json.loads(res)["data"]["attributes"] for hostname, info in grid_info.items(): if is_precise_host(hostname): if info["jobs"]: all_precise_tools.extend([job["job_owner"] for job in info["jobs"].values()]) return all_precise_tools def accounting_tools(): DAYS=7 FIELD_NAMES = [ 'qname', 'hostname', 'group', 'owner', 'job_name', 'job_number', 'account', 'priority', 'submission_time', 'start_time', 'end_time', 'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project', 'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io', 'category', 'iow', 'pe_taskid', 'maxvemem', 'arid', 'ar_submission_time', ] cutoff = (datetime.datetime.now() - datetime.timedelta(days=DAYS)).timestamp() precise_tools = [] for line in fileinput.input(): parts = line.split(':') job = dict(zip(FIELD_NAMES, parts)) if int(job['end_time']) < cutoff: continue if 'release=precise' in job['category'] and job['owner'] not in precise_tools: precise_tools.append(job['owner']) return precise_tools with open('/etc/ldap.yaml') as f: config = yaml.safe_load(f) tool_to_members = tools_members(config, accounting_tools() + grid_precise_tools()) mt = members_tools(tool_to_members) print json.dumps(mt, sort_keys=True, indent=4, separators=(',', ': '))