3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import repo,hg,cmdutil,util,ui,revlog,node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset,load_cache,save_cache,get_git_sha1
8 from tempfile import mkstemp
9 from optparse import OptionParser
14 # silly regex to catch Signed-off-by lines in log message
15 sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
16 # insert 'checkpoint' command after this many commits or none at all if 0
17 cfg_checkpoint_count=0
18 # write some progress message every this many file contents written
19 cfg_export_boundary=1000
22 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
28 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
30 def checkpoint(count):
32 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
33 sys.stderr.write("Checkpoint after %d commits\n" % count)
38 def get_parent_mark(parent,marks):
39 """Get the mark for some parent.
40 If we saw it in the current session, return :%d syntax and
41 otherwise the SHA1 from the cache."""
42 return marks.get(str(parent),':%d' % (parent+1))
44 def file_mismatch(f1,f2):
45 """See if two revisions of a file are not equal."""
46 return node.hex(f1)!=node.hex(f2)
48 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
49 """Loop over our repository and find all changed and missing files."""
50 for left in dleft.keys():
51 right=dright.get(left,None)
53 # we have the file but our parent hasn't: add to left set
55 elif match(dleft[left],right):
56 # we have it but checksums mismatch: add to center set
58 for right in dright.keys():
59 left=dleft.get(right,None)
61 # if parent has file but we don't: add to right set
63 # change is already handled when comparing child against parent
66 def get_filechanges(repo,revision,parents,mleft):
67 """Given some repository and revision, find all changed/deleted files."""
71 mright=repo.changectx(p).manifest()
72 l,c,r=split_dict(mleft,mright,l,c,r)
78 def get_author(logmessage,committer,authors):
79 """As git distincts between author and committer of a patch, try to
80 extract author by detecting Signed-off-by lines.
82 This walks from the end of the log message towards the top skipping
83 empty lines. Upon the first non-empty line, it walks all Signed-off-by
84 lines upwards to find the first one. For that (if found), it extracts
85 authorship information the usual way (authors table, cleaning, etc.)
87 If no Signed-off-by line is found, this defaults to the committer.
89 This may sound stupid (and it somehow is), but in log messages we
90 accidentially may have lines in the middle starting with
91 "Signed-off-by: foo" and thus matching our detection regex. Prevent
94 loglines=logmessage.split('\n')
96 # from tail walk to top skipping empty lines
99 if len(loglines[i].strip())==0: continue
102 # walk further upwards to find first sob line, store in 'first'
105 m=sob_re.match(loglines[i])
109 # if the last non-empty line matches our Signed-Off-by regex: extract username
111 r=fixup_user(first.group(1),authors)
115 def export_file_contents(ctx,manifest,files):
119 d=ctx.filectx(file).data()
120 wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
121 wr('data %d' % len(d)) # had some trouble with size()
124 if count%cfg_export_boundary==0:
125 sys.stderr.write('Exported %d/%d files\n' % (count,max))
126 if max>cfg_export_boundary:
127 sys.stderr.write('Exported %d/%d files\n' % (count,max))
129 def is_merge(parents):
131 for parent in parents:
136 def sanitize_name(name,what="branch"):
137 """Sanitize input roughly according to git-check-ref-format(1)"""
140 if name[0] == '.': return '_'+name[1:]
144 p=re.compile('([[ ~^:?*]|\.\.)')
146 if n[-1] == '/': n=n[:-1]+'_'
147 n='/'.join(map(dot,n.split('/')))
152 sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
155 def export_commit(ui,repo,revision,marks,heads,last,max,count,authors,sob,brmap):
156 def get_branchname(name):
157 if brmap.has_key(name):
159 n=sanitize_name(name)
163 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
164 parents=repo.changelog.parentrevs(revision)
166 branch=get_branchname(branch)
168 wr('commit refs/heads/%s' % branch)
169 wr('mark :%d' % (revision+1))
171 wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
172 wr('committer %s %d %s' % (user,time,timezone))
173 wr('data %d' % (len(desc)+1)) # wtf?
178 if parents[0] < parents[1]:
181 src=heads.get(branch,'')
184 # if we have a cached head, this is an incremental import: initialize it
185 # and kill reference so we won't init it again
188 sys.stderr.write('%s: Initializing to parent [%s]\n' %
190 link=src # avoid making a merge commit for incremental import
191 elif link=='' and not heads.has_key(branch) and revision>0:
192 # newly created branch and not the first one: connect to parent
193 tmp=get_parent_mark(parents[0],marks)
195 sys.stderr.write('%s: Link new branch to parent [%s]\n' %
197 link=tmp # avoid making a merge commit for branch fork
198 elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
199 pm=get_parent_mark(parents[pidx1],marks)
200 sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
201 (branch,revision,branch,parents[pidx1]));
204 if parents[pidx2] > 0:
205 pm=get_parent_mark(parents[pidx2],marks)
206 sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
207 (branch,pm,parents[pidx2]))
210 last[branch]=revision
212 # we need this later to write out tags
213 marks[str(revision)]=':%d'%(revision+1)
215 ctx=repo.changectx(str(revision))
217 added,changed,removed,type=[],[],[],''
220 # first revision: feed in full manifest
224 elif is_merge(parents):
225 # later merge revision: feed in changed manifest
226 # for many files comparing checksums is expensive so only do it for
227 # merges where we really need it due to hg's revlog logic
228 added,changed,removed=get_filechanges(repo,revision,parents,man)
229 type='thorough delta'
231 # later non-merge revision: feed in changed manifest
232 # if we have exactly one parent, just take the changes from the
233 # manifest without expensively comparing checksums
234 f=repo.status(repo.lookup(parents[0]),revnode)[:3]
235 added,changed,removed=f[1],f[0],f[2]
238 sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
239 (branch,type,revision+1,max,len(added),len(changed),len(removed)))
241 map(lambda r: wr('D %s' % r),removed)
242 export_file_contents(ctx,man,added)
243 export_file_contents(ctx,man,changed)
246 return checkpoint(count)
248 def export_tags(ui,repo,marks_cache,start,end,count,authors):
251 tag=sanitize_name(tag,"tag")
252 # ignore latest revision
253 if tag=='tip': continue
254 rev=repo.changelog.rev(node)
255 # ignore those tags not in our import range
256 if rev<start or rev>=end: continue
258 ref=get_parent_mark(rev,marks_cache)
260 sys.stderr.write('Failed to find reference for creating tag'
261 ' %s at r%d\n' % (tag,rev))
263 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
264 wr('reset refs/tags/%s' % tag)
267 count=checkpoint(count)
270 def load_authors(filename):
272 if not os.path.exists(filename):
276 lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
277 for line in f.readlines():
281 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
283 # put key:value in cache, key without ^:
284 cache[m.group(1).strip()]=m.group(2).strip()
286 sys.stderr.write('Loaded %d authors\n' % l)
289 def verify_heads(ui,repo,cache,force):
290 branches=repo.branchtags()
291 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
294 # get list of hg's branches to verify, don't take all git has
299 if sha1!=None and c!=None:
300 sys.stderr.write('Verifying branch [%s]\n' % b)
302 sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
303 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
304 if not force: return False
306 # verify that branch has exactly one head
308 for h in repo.heads():
309 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
310 if t.get(branch,False):
311 sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
312 repo.changelog.rev(h))
313 if not force: return False
318 def mangle_mark(mark):
319 return str(int(mark)-1)
321 def hg2git(repourl,m,marksfile,headsfile,tipfile,authors={},sob=False,force=False):
324 marks_cache=load_cache(marksfile,mangle_mark)
325 heads_cache=load_cache(headsfile)
326 state_cache=load_cache(tipfile)
328 ui,repo=setup_repo(repourl)
330 if not verify_heads(ui,repo,heads_cache,force):
334 tip=repo.changelog.count()
335 except AttributeError:
338 min=int(state_cache.get('tip',0))
340 if _max<0 or max>tip:
346 for rev in range(min,max):
347 c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,max,c,authors,sob,brmap)
349 c=export_tags(ui,repo,marks_cache,min,max,c,authors)
351 sys.stderr.write('Issued %d commands\n' % c)
353 state_cache['tip']=max
354 state_cache['repo']=repourl
355 save_cache(tipfile,state_cache)
359 if __name__=='__main__':
360 def bail(parser,opt):
361 sys.stderr.write('Error: No %s option given\n' % opt)
365 parser=OptionParser()
367 parser.add_option("-m","--max",type="int",dest="max",
368 help="Maximum hg revision to import")
369 parser.add_option("--marks",dest="marksfile",
370 help="File to read git-fast-import's marks from")
371 parser.add_option("--heads",dest="headsfile",
372 help="File to read last run's git heads from")
373 parser.add_option("--status",dest="statusfile",
374 help="File to read status from")
375 parser.add_option("-r","--repo",dest="repourl",
376 help="URL of repo to import")
377 parser.add_option("-s",action="store_true",dest="sob",
378 default=False,help="Enable parsing Signed-off-by lines")
379 parser.add_option("-A","--authors",dest="authorfile",
380 help="Read authormap from AUTHORFILE")
381 parser.add_option("-f","--force",action="store_true",dest="force",
382 default=False,help="Ignore validation errors by force")
384 (options,args)=parser.parse_args()
387 if options.max!=None: m=options.max
389 if options.marksfile==None: bail(parser,'--marks')
390 if options.headsfile==None: bail(parser,'--heads')
391 if options.statusfile==None: bail(parser,'--status')
392 if options.repourl==None: bail(parser,'--repo')
395 if options.authorfile!=None:
396 a=load_authors(options.authorfile)
398 sys.exit(hg2git(options.repourl,m,options.marksfile,options.headsfile,
399 options.statusfile,authors=a,sob=options.sob,force=options.force))