#!/usr/bin/env python
# Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
-# License: GPLv2
-
-"""hg2git.py - A mercurial-to-git filter for git-fast-import(1)
-Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
-"""
+# License: MIT <http://www.opensource.org/licenses/mit-license.php>
from mercurial import repo,hg,cmdutil,util,ui,revlog,node
-from tempfile import mkstemp
import re
-import sys
import os
+import sys
-# silly regex to see if user field has email address
-user_re=re.compile('[^<]+ <[^>]+>$')
-# git branch for hg's default 'HEAD' branch
+# default git branch name
cfg_master='master'
-# insert 'checkpoint' command after this many commits
-cfg_checkpoint_count=1000
-
-def usage(ret):
- sys.stderr.write(__doc__)
- return ret
+# silly regex to see if user field has email address
+user_re=re.compile('([^<]+) (<[^>]+>)$')
+# silly regex to clean out user names
+user_clean_re=re.compile('^["]([^"]+)["]$')
def setup_repo(url):
- myui=ui.ui()
+ myui=ui.ui(interactive=False)
return myui,hg.repository(myui,url)
-def get_changeset(ui,repo,revision):
- def get_branch(name):
- if name=='HEAD':
- name=cfg_master
- return name
- def fixup_user(user):
- if user_re.match(user)==None:
- if '@' not in user:
- return user+' <none@none>'
- return user+' <'+user+'>'
- return user
+def fixup_user(user,authors):
+ if authors!=None:
+ # if we have an authors table, try to get mapping
+ # by defaulting to the current value of 'user'
+ user=authors.get(user,user)
+ name,mail,m='','',user_re.match(user)
+ if m==None:
+ # if we don't have 'Name <mail>' syntax, use 'user
+ # <devnull@localhost>' if use contains no at and
+ # 'user <user>' otherwise
+ name=user
+ if '@' not in user:
+ mail='<devnull@localhost>'
+ else:
+ mail='<%s>' % user
+ else:
+ # if we have 'Name <mail>' syntax, everything is fine :)
+ name,mail=m.group(1),m.group(2)
+
+ # remove any silly quoting from username
+ m2=user_clean_re.match(name)
+ if m2!=None:
+ name=m2.group(1)
+ return '%s %s' % (name,mail)
+
+def get_branch(name):
+ # 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
+ # other CVS imports may need it, too
+ if name=='HEAD' or name=='default' or name=='':
+ name=cfg_master
+ return name
+
+def get_changeset(ui,repo,revision,authors={}):
node=repo.lookup(revision)
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
branch=get_branch(extra.get('branch','master'))
- return (manifest,fixup_user(user),(time,tz),files,desc,branch,extra)
-
-def gitmode(x):
- return x and '100755' or '100644'
-
-def wr(msg=''):
- print msg
- #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
-
-def checkpoint(count):
- count=count+1
- if count%cfg_checkpoint_count==0:
- sys.stderr.write("Checkpoint after %d commits\n" % count)
- wr('checkpoint')
- wr()
- return count
-
-def get_parent_mark(parent,marks):
- """Get the mark for some parent.
- If we saw it in the current session, return :%d syntax and
- otherwise the SHA1 from the cache."""
- return marks.get(str(parent+1),':%d' % (parent+1))
-
-def mismatch(x,f1,f2):
- """See if two revisions of a file are not equal."""
- return node.hex(f1)!=node.hex(f2)
-
-def outer_set(dleft,dright,l,r):
- """Loop over our repository in and find all changed and missing files."""
- for left in dleft.keys():
- right=dright.get(left,None)
- if right==None or mismatch('A',dleft[left],right):
- # if either have the current file not in parent or the
- # checksums differ: add it to changed files
- l.append(left)
- for right in dright.keys():
- left=dleft.get(right,None)
- if left==None:
- # if we have a file in the parent but not our manifest,
- # add it to deleted files; checksums are checked earlier
- r.append(right)
- return l,r
-
-def get_filechanges(repo,revision,parents,mleft):
- """Given some repository and revision, find all changed/deleted files."""
- l,r=[],[]
- for p in parents:
- if p<0: continue
- mright=repo.changectx(p).manifest()
- dleft=mleft.keys()
- dleft.sort()
- dright=mright.keys()
- dright.sort()
- l,r=outer_set(mleft,mright,l,r)
- return l,r
-
-def export_commit(ui,repo,revision,marks,heads,last,max,count):
- (_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision)
- parents=repo.changelog.parentrevs(revision)
-
- wr('commit refs/heads/%s' % branch)
- wr('mark :%d' % (revision+1))
- wr('committer %s %d %s' % (user,time,timezone))
- wr('data %d' % (len(desc)+1)) # wtf?
- wr(desc)
- wr()
-
- src=heads.get(branch,'')
- link=''
- if src!='':
- # if we have a cached head, this is an incremental import: initialize it
- # and kill reference so we won't init it again
- wr('from %s' % src)
- heads[branch]=''
- sys.stderr.write('Initializing branch [%s] to parent [%s]\n' %
- (branch,src))
- link=src # avoid making a merge commit for incremental import
- elif link=='' and not heads.has_key(branch) and revision>0:
- # newly created branch and not the first one: connect to parent
- tmp=get_parent_mark(parents[0],marks)
- wr('from %s' % tmp)
- sys.stderr.write('Link new branch [%s] to parent [%s]\n' %
- (branch,tmp))
- link=tmp # avoid making a merge commit for branch fork
-
- if parents:
- l=last.get(branch,revision)
- for p in parents:
- # 1) as this commit implicitely is the child of the most recent
- # commit of this branch, ignore this parent
- # 2) ignore nonexistent parents
- # 3) merge otherwise
- if p==l or p==revision or p<0:
- continue
- tmp=get_parent_mark(p,marks)
- # if we fork off a branch, don't merge with our parent via 'merge'
- # as we have 'from' already above
- if tmp==link:
- continue
- sys.stderr.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
- (branch,tmp,p))
- wr('merge %s' % tmp)
-
- last[branch]=revision
- heads[branch]=''
- # we need this later to write out tags
- marks[str(revision)]=':%d'%(revision+1)
+ return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
- ctx=repo.changectx(str(revision))
- man=ctx.manifest()
- added,removed=get_filechanges(repo,revision,parents,man)
+def mangle_key(key):
+ return key
- sys.stderr.write('Exporting revision %d with %d changed/%d removed files\n' %
- (revision,len(added),len(removed)))
-
- for a in added:
- fctx=ctx.filectx(a)
- d=fctx.data()
- wr('M %s inline %s' % (gitmode(man.execf(a)),a))
- wr('data %d' % len(d)) # had some trouble with size()
- wr(d)
-
- for r in removed:
- wr('D %s' % r)
-
- wr()
- return checkpoint(count)
-
-def export_tags(ui,repo,cache,count):
- l=repo.tagslist()
- for tag,node in l:
- if tag=='tip':
- continue
- rev=repo.changelog.rev(node)
- ref=cache.get(str(rev),None)
- if ref==None:
- #sys.stderr.write('Failed to find reference for creating tag'
- # ' %s at r%d\n' % (tag,rev))
- continue
- (_,user,(time,timezone),_,desc,branch,_)=get_changeset(ui,repo,rev)
- sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
- wr('tag %s' % tag)
- wr('from %s' % ref)
- wr('tagger %s %d %s' % (user,time,timezone))
- msg='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag,
- rev,branch,desc.split('\n')[0])
- wr('data %d' % (len(msg)+1))
- wr(msg)
- wr()
- count=checkpoint(count)
- return count
-
-def load_cache(filename):
+def load_cache(filename,get_key=mangle_key):
cache={}
if not os.path.exists(filename):
return cache
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue
# put key:value in cache, key without ^:
- cache[fields[0][1:]]=fields[1].split('\n')[0]
+ cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
f.close()
return cache
map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
f.close()
-def verify_heads(ui,repo,cache):
- def getsha1(branch):
- f=open(os.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch)
- sha1=f.readlines()[0].split('\n')[0]
- f.close()
- return sha1
-
- for b in cache.keys():
- sys.stderr.write('Verifying branch [%s]\n' % b)
- sha1=getsha1(b)
- c=cache.get(b)
- if sha1!=c:
- sys.stderr.write('Warning: Branch [%s] modified outside hg2git:'
- '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
- return True
-
-if __name__=='__main__':
- if len(sys.argv)!=6: sys.exit(usage(1))
- repourl,m,marksfile,headsfile,tipfile=sys.argv[1:]
- _max=int(m)
-
- marks_cache=load_cache(marksfile)
- heads_cache=load_cache(headsfile)
- state_cache=load_cache(tipfile)
-
- ui,repo=setup_repo(repourl)
-
- if not verify_heads(ui,repo,heads_cache):
- sys.exit(1)
-
- tip=repo.changelog.count()
-
- min=int(state_cache.get('tip',0))
- max=_max
- if _max<0:
- max=tip
-
- c=int(state_cache.get('count',0))
- last={}
- for rev in range(min,max):
- c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,tip,c)
-
- c=export_tags(ui,repo,marks_cache,c)
-
- state_cache['tip']=max
- state_cache['count']=c
- state_cache['repo']=repourl
- save_cache(tipfile,state_cache)
+def get_git_sha1(name,type='heads'):
+ try:
+ # use git-rev-parse to support packed refs
+ cmd="GIT_DIR='%s' git rev-parse --verify refs/%s/%s 2>/dev/null" % (os.getenv('GIT_DIR','/dev/null'),type,name)
+ p=os.popen(cmd)
+ l=p.readline()
+ p.close()
+ if l == None or len(l) == 0:
+ return None
+ return l[0:40]
+ except IOError:
+ return None