#!/usr/bin/env python import os import re import sys import urllib2 import socket from xml.dom import minidom from bs4 import BeautifulSoup from phabricator import Phabricator import unicodedata socket.setdefaulttimeout( 30 ) phab = Phabricator() phab.update_interfaces() mingleProjectProperty = 'Release tree - Epic Story' defaultProject = 'PHID-PROJ-qfqb3v2nklkvljicr6ak' # Multimedia projectMap = { 534: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 8: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 12: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 184: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 72: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 60: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 62: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer 532: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard 76: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard 77: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard 10: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard 941: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard 531: 'PHID-PROJ-nwze4kl6xadc2dokzxxs' # Structured data } userMap = { 'fflorin': 'Fabrice_Florin', 'gdubuc': 'Gilles', 'mholmquist': 'MarkTraceur', 'gtisza': 'Tgr', 'pginer': 'Pginer-WMF' } minglePriorityProperty = 'Priority' priorityMap = { 'Must have': 80, 'Should have': 50, 'Could have': 25, 'Won\'t have': 10 } mingleOwnerProperty = 'Owner' ownerMap = { 'fflorin': 'PHID-USER-dbudsaorcqut7sg3vvbi', 'gdubuc': 'PHID-USER-papbtlagfolot4dzerne', 'mholmquist': 'PHID-USER-nvavrb7ko66hv3xap6sb', 'gtisza': 'PHID-USER-a6p24cvyblhfzc7we7nc', 'pginer': 'PHID-USER-c47vnc2yxmwfvvc4367q' } mingleStatusProperty = 'Status' statusMap = { 'Accepted': 'resolved' } cardTypeWhitelist = [ 'Bug', 'Story', 'Tech debt', 'Scope Increase (UNPLANNED)', 'Task' ] def getText(nodelist): rc = [] for node in nodelist: if node.nodeType == node.TEXT_NODE: rc.append(node.data) return ''.join(rc) def uploadImageToPhabricator( image64, name ): result = phab.file.upload( data_base64=image64, name=name ) phid = result.response result = phab.file.info( phid=phid ) return result.response['objectName'] def transloadImages( html ): images = {} parsedHtml = BeautifulSoup( html ) parsedImgs = parsedHtml.find_all( 'img' ) for parsedImg in parsedImgs: try: image = urllib2.urlopen( parsedImg.get( 'src' ), timeout=30 ) except urllib2.HTTPError as e: continue except AttributeError as e: #data:encoded img, couldn't be bothered making this work as it didn't display in mingle continue imageBinary = image.read() image64 = imageBinary.encode( 'base64' ) name = parsedImg.get( 'alt' ) or str( parsedImg ).replace( ' ', '\s*' ).replace( '/>', '\s*/>' ) images[ name ] = uploadImageToPhabricator( image64, parsedImg.get( 'alt' ) ) return images def ghettoHtmlToRemarkup( html, images, mingleSite, project ): remarkup = html for k, v in images.iteritems(): imageRegexp = re.compile( k ) remarkup = imageRegexp.sub( r'{' + v + ', size=full}', remarkup ) remarkup = remarkup.replace( ' ', '' ) remarkup = remarkup.replace( '

', '' ) remarkup = remarkup.replace( '', '' ) remarkup = remarkup.replace( '', '' ) remarkup = remarkup.replace( '', '' ) remarkup = remarkup.replace( '', '' ) remarkup = remarkup.replace( '', ' =' ) remarkup = remarkup.replace( '', ' ==' ) remarkup = remarkup.replace( '', ' ===' ) remarkup = remarkup.replace( '', ' ====' ) remarkup = remarkup.replace( '', '```' ) remarkup = remarkup.replace( '', '**' ) remarkup = remarkup.replace( '', '** ' ) remarkup = remarkup.replace( '', '~~' ) remarkup = remarkup.replace( '', '~~' ) remarkup = remarkup.replace( '
', '' ) remarkup = remarkup.replace( '<', '<' ) remarkup = remarkup.replace( '>', '>' ) remarkup = remarkup.replace( '>', '' ) remarkup = remarkup.replace( '{', '{' ) remarkup = remarkup.replace( '}', '}' ) remarkup = remarkup.replace( '', '' ) remarkup = remarkup.replace( '', '//' ) remarkup = re.sub( r']*>', r'', remarkup ) remarkup = re.sub( r']*>', r'', remarkup ) remarkup = re.sub( r']*>', r'', remarkup ) remarkup = re.sub( r']*>', r'', remarkup ) remarkup = re.sub( r']*>', r'', remarkup ) # Special case for redundant Mingle links: we turn them into card numbers, to be processed later below matchMingleLinks = re.compile( ']*>([^<]+)' ) remarkup = matchMingleLinks.sub( '\\2', remarkup) remarkup = re.sub( r']*>([^<]+)', r' [[\1 | \2]] ', remarkup) remarkup = re.sub( r'\s*]*>', r'\n * ', remarkup ) remarkup = re.sub( r']*>', r'= ', remarkup ) remarkup = re.sub( r']*>', r'== ', remarkup ) remarkup = re.sub( r']*>', r'=== ', remarkup ) remarkup = re.sub( r']*>', r'==== ', remarkup ) remarkup = re.sub( r']*>', r' **', remarkup ) remarkup = re.sub( r']*>', r'~~', remarkup ) remarkup = re.sub( r']*>', r'```', remarkup ) remarkup = re.sub( r']*>', r'**', remarkup ) remarkup = re.sub( r']*>', r'//', remarkup ) remarkup = re.sub( r'#([0-9]+)', ' [[' + mingleSite + '/projects/' + project + '/cards/\\1 | #\\1]] ', remarkup ) return remarkup def postComment( cardUrl, phabId, username, datetime, comment, mingleSite, project ): if username in userMap: username = '@' + userMap[ username ] comment = ghettoHtmlToRemarkup( comment, {}, mingleSite, project ) date = datetime.replace( 'T', ' at ' ) date = date.replace( 'Z', '' ) comment = '>>! In [[' + cardUrl + ' | mingle]] on ' + date + ', ' + username + ' wrote:\n\n' + comment phab.maniphest.update( id=phabId, comments=comment ) def processCard( mingleSite, project, cardNumber ): url = mingleSite + '/api/v2/projects/' + project + '/cards/' + str( cardNumber ) + '.xml' try: cardResponse = urllib2.urlopen( url, timeout=30 ) except urllib2.URLError as e: print "Card " + str( cardNumber ) + " not found" return xmlDocument = minidom.parseString( cardResponse.read() ) cardType = xmlDocument.getElementsByTagName( 'card_type' )[0].getElementsByTagName( 'name' )[0].firstChild.nodeValue if not cardType in cardTypeWhitelist: print "Card " + str( cardNumber ) + " is an undesirable card type (" + cardType + ")" return descriptionUrl = xmlDocument.getElementsByTagName( 'rendered_description' )[0].attributes['url'].value descriptionResponse = urllib2.urlopen( descriptionUrl, timeout=30 ) descriptionHtml = descriptionResponse.read() images = transloadImages( descriptionHtml ) name = xmlDocument.getElementsByTagName( 'name' )[0].firstChild.nodeValue descriptionSimpleHtmlElement = xmlDocument.getElementsByTagName( 'description' )[0].firstChild if descriptionSimpleHtmlElement == None: descriptionSimpleHtml = '' else: descriptionSimpleHtml = descriptionSimpleHtmlElement.nodeValue cardUrl = mingleSite + '/projects/' + project + '/cards/' + str( cardNumber ) description = '//Migrated from: ' + cardUrl + ' //\n\n' + ghettoHtmlToRemarkup( descriptionSimpleHtml, images, mingleSite, project ) projects = [ defaultProject ] properties = xmlDocument.getElementsByTagName( 'property' ) projectCardId = 0 projectPriority = 90 projectOwner = None projectStatus = 'open' for prop in properties: propName = prop.getElementsByTagName( 'name' )[0].firstChild.nodeValue if propName == mingleProjectProperty: numberElements = prop.getElementsByTagName( 'number' ) if len( numberElements ) > 0: projectCardId = int( numberElements[0].firstChild.nodeValue ) if propName == minglePriorityProperty: priority = prop.getElementsByTagName( 'value' )[0].firstChild if priority != None and priority.nodeValue in priorityMap: projectPriority = priorityMap[ priority.nodeValue ] if propName == mingleOwnerProperty: ownerElements = prop.getElementsByTagName( 'login' ) if len( ownerElements ) > 0: owner = ownerElements[0].firstChild.nodeValue if owner in ownerMap: projectOwner = ownerMap[ owner ] if propName == mingleStatusProperty: status = prop.getElementsByTagName( 'value' )[0].firstChild if status != None and status.nodeValue in statusMap: projectStatus = statusMap[ status.nodeValue ] if projectCardId in projectMap: projects.append( projectMap[ projectCardId ] ) result = phab.maniphest.createtask( title=name, description=description, projectPHIDs=projects, priority=projectPriority, ownerPHID=projectOwner ) commentsUrl = mingleSite + '/api/v2/projects/' + project + '/cards/' + str( cardNumber ) + '/comments.xml' commentsResponse = urllib2.urlopen( commentsUrl, timeout=30 ) commentsXmlDocument = minidom.parseString( commentsResponse.read() ) comments = commentsXmlDocument.getElementsByTagName( 'comment' ) for comment in reversed( comments ): content = comment.getElementsByTagName( 'content' )[0].firstChild.nodeValue datetime = comment.getElementsByTagName( 'created_at' )[0].firstChild.nodeValue username = comment.getElementsByTagName( 'login' )[0].firstChild.nodeValue postComment( cardUrl, result.response['id'], username, datetime, content, mingleSite, project ) if projectStatus != 'open': phab.maniphest.update( id=result.response['id'], status=projectStatus) print result.response['uri'] for i in range(1, 1100): processCard( 'https://wikimedia.mingle.thoughtworks.com', 'multimedia', i )