3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import repo,hg,cmdutil,util,ui,revlog,node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset,load_cache,save_cache,get_git_sha1
8 from tempfile import mkstemp
9 from optparse import OptionParser
14 # silly regex to catch Signed-off-by lines in log message
15 sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
16 # insert 'checkpoint' command after this many commits or none at all if 0
17 cfg_checkpoint_count=0
18 # write some progress message every this many file contents written
19 cfg_export_boundary=1000
22 return x and '100755' or '100644'
28 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
30 def checkpoint(count):
32 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
33 sys.stderr.write("Checkpoint after %d commits\n" % count)
38 def get_parent_mark(parent,marks):
39 """Get the mark for some parent.
40 If we saw it in the current session, return :%d syntax and
41 otherwise the SHA1 from the cache."""
42 return marks.get(str(parent),':%d' % (parent+1))
45 """See if two revisions of a file are not equal."""
46 return node.hex(f1)!=node.hex(f2)
48 def outer_set(dleft,dright,l,c,r):
49 """Loop over our repository and find all changed and missing files."""
50 for left in dleft.keys():
51 right=dright.get(left,None)
53 # we have the file but our parent hasn't: add to left set
55 elif mismatch(dleft[left],right):
56 # we have it but checksums mismatch: add to center set
58 for right in dright.keys():
59 left=dleft.get(right,None)
61 # if parent has file but we don't: add to right set
63 # change is already handled when comparing child against parent
66 def get_filechanges(repo,revision,parents,mleft):
67 """Given some repository and revision, find all changed/deleted files."""
71 mright=repo.changectx(p).manifest()
76 l,c,r=outer_set(mleft,mright,l,c,r)
79 def get_author(logmessage,committer,authors):
80 """As git distincts between author and committer of a patch, try to
81 extract author by detecting Signed-off-by lines.
83 This walks from the end of the log message towards the top skipping
84 empty lines. Upon the first non-empty line, it walks all Signed-off-by
85 lines upwards to find the first one. For that (if found), it extracts
86 authorship information the usual way (authors table, cleaning, etc.)
88 If no Signed-off-by line is found, this defaults to the committer.
90 This may sound stupid (and it somehow is), but in log messages we
91 accidentially may have lines in the middle starting with
92 "Signed-off-by: foo" and thus matching our detection regex. Prevent
95 loglines=logmessage.split('\n')
97 # from tail walk to top skipping empty lines
100 if len(loglines[i].strip())==0: continue
103 # walk further upwards to find first sob line, store in 'first'
106 m=sob_re.match(loglines[i])
110 # if the last non-empty line matches our Signed-Off-by regex: extract username
112 r=fixup_user(first.group(1),authors)
116 def export_file_contents(ctx,manifest,files):
121 fctx=ctx.filectx(file)
123 wr('M %s inline %s' % (gitmode(manifest.execf(file)),file))
124 wr('data %d' % len(d)) # had some trouble with size()
127 if count%cfg_export_boundary==0:
128 sys.stderr.write('Exported %d/%d files\n' % (count,max))
129 if max>cfg_export_boundary:
130 sys.stderr.write('Exported %d/%d files\n' % (count,max))
132 def is_merge(parents):
134 for parent in parents:
139 def export_commit(ui,repo,revision,marks,heads,last,max,count,authors,sob):
140 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
141 parents=repo.changelog.parentrevs(revision)
143 wr('commit refs/heads/%s' % branch)
144 wr('mark :%d' % (revision+1))
146 wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
147 wr('committer %s %d %s' % (user,time,timezone))
148 wr('data %d' % (len(desc)+1)) # wtf?
153 if parents[0] < parents[1]:
156 src=heads.get(branch,'')
159 # if we have a cached head, this is an incremental import: initialize it
160 # and kill reference so we won't init it again
163 sys.stderr.write('%s: Initializing to parent [%s]\n' %
165 link=src # avoid making a merge commit for incremental import
166 elif link=='' and not heads.has_key(branch) and revision>0:
167 # newly created branch and not the first one: connect to parent
168 tmp=get_parent_mark(parents[0],marks)
170 sys.stderr.write('%s: Link new branch to parent [%s]\n' %
172 link=tmp # avoid making a merge commit for branch fork
173 elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
174 pm=get_parent_mark(parents[pidx1],marks)
175 sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
176 (branch,revision,branch,parents[pidx1]));
179 if parents[pidx2] > 0:
180 pm=get_parent_mark(parents[pidx2],marks)
181 sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
182 (branch,pm,parents[pidx2]))
185 last[branch]=revision
187 # we need this later to write out tags
188 marks[str(revision)]=':%d'%(revision+1)
190 ctx=repo.changectx(str(revision))
192 added,changed,removed,type=[],[],[],''
195 # first revision: feed in full manifest
198 elif is_merge(parents):
199 # later merge revision: feed in changed manifest
200 # for many files comparing checksums is expensive so only do it for
201 # merges where we really need it due to hg's revlog logic
202 added,changed,removed=get_filechanges(repo,revision,parents,man)
203 type='thorough delta'
205 # later non-merge revision: feed in changed manifest
206 # if we have exactly one parent, just take the changes from the
207 # manifest without expensively comparing checksums
208 f=repo.status(repo.lookup(parents[0]),revnode)[:3]
209 added,changed,removed=f[1],f[0],f[2]
212 sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
213 (branch,type,revision+1,max,len(added),len(changed),len(removed)))
215 map(lambda r: wr('D %s' % r),removed)
216 export_file_contents(ctx,man,added+changed)
219 return checkpoint(count)
221 def export_tags(ui,repo,marks_cache,start,end,count,authors):
224 # ignore latest revision
225 if tag=='tip': continue
226 rev=repo.changelog.rev(node)
227 # ignore those tags not in our import range
228 if rev<start or rev>=end: continue
230 ref=get_parent_mark(rev,marks_cache)
232 sys.stderr.write('Failed to find reference for creating tag'
233 ' %s at r%d\n' % (tag,rev))
235 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
236 wr('reset refs/tags/%s' % tag)
239 count=checkpoint(count)
242 def load_authors(filename):
244 if not os.path.exists(filename):
248 lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
249 for line in f.readlines():
253 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
255 # put key:value in cache, key without ^:
256 cache[m.group(1).strip()]=m.group(2).strip()
258 sys.stderr.write('Loaded %d authors\n' % l)
261 def verify_heads(ui,repo,cache,force):
262 branches=repo.branchtags()
263 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
266 # get list of hg's branches to verify, don't take all git has
271 if sha1!=None and c!=None:
272 sys.stderr.write('Verifying branch [%s]\n' % b)
274 sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
275 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
276 if not force: return False
278 # verify that branch has exactly one head
280 for h in repo.heads():
281 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
282 if t.get(branch,False):
283 sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
284 repo.changelog.rev(h))
285 if not force: return False
290 def mangle_mark(mark):
291 return str(int(mark)-1)
293 def hg2git(repourl,m,marksfile,headsfile,tipfile,authors={},sob=False,force=False):
296 marks_cache=load_cache(marksfile,mangle_mark)
297 heads_cache=load_cache(headsfile)
298 state_cache=load_cache(tipfile)
300 ui,repo=setup_repo(repourl)
302 if not verify_heads(ui,repo,heads_cache,force):
305 tip=repo.changelog.count()
307 min=int(state_cache.get('tip',0))
314 for rev in range(min,max):
315 c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,max,c,authors,sob)
317 c=export_tags(ui,repo,marks_cache,min,max,c,authors)
319 sys.stderr.write('Issued %d commands\n' % c)
321 state_cache['tip']=max
322 state_cache['repo']=repourl
323 save_cache(tipfile,state_cache)
327 if __name__=='__main__':
328 def bail(parser,opt):
329 sys.stderr.write('Error: No %s option given\n' % opt)
333 parser=OptionParser()
335 parser.add_option("-m","--max",type="int",dest="max",
336 help="Maximum hg revision to import")
337 parser.add_option("--marks",dest="marksfile",
338 help="File to read git-fast-import's marks from")
339 parser.add_option("--heads",dest="headsfile",
340 help="File to read last run's git heads from")
341 parser.add_option("--status",dest="statusfile",
342 help="File to read status from")
343 parser.add_option("-r","--repo",dest="repourl",
344 help="URL of repo to import")
345 parser.add_option("-s",action="store_true",dest="sob",
346 default=False,help="Enable parsing Signed-off-by lines")
347 parser.add_option("-A","--authors",dest="authorfile",
348 help="Read authormap from AUTHORFILE")
349 parser.add_option("-f","--force",action="store_true",dest="force",
350 default=False,help="Ignore validation errors by force")
352 (options,args)=parser.parse_args()
355 if options.max!=None: m=options.max
357 if options.marksfile==None: bail(parser,'--marks')
358 if options.headsfile==None: bail(parser,'--heads')
359 if options.statusfile==None: bail(parser,'--status')
360 if options.repourl==None: bail(parser,'--repo')
363 if options.authorfile!=None:
364 a=load_authors(options.authorfile)
366 sys.exit(hg2git(options.repourl,m,options.marksfile,options.headsfile,
367 options.statusfile,authors=a,sob=options.sob,force=options.force))