[Zope3-checkins] CVS: Zope3/src/zope/fssync - merger.py:1.1 metadata.py:1.1 fssync.py:1.4
Guido van Rossum
guido@python.org
Mon, 12 May 2003 16:20:09 -0400
Update of /cvs-repository/Zope3/src/zope/fssync
In directory cvs.zope.org:/tmp/cvs-serv334
Modified Files:
fssync.py
Added Files:
merger.py metadata.py
Log Message:
The complexity of the merge algorithm was driving me crazy. Start
over using test-driven design. Also use a more reasonable approach to
loading and storing "entries" file -- this is now abstracted away in a
metadata database. There's still much to do, but this is a better
foundation for sure!
=== Added File Zope3/src/zope/fssync/merger.py ===
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Class to do augmented three-way merges.
This boils down to distinguishing an astonishing number of cases.
$Id: merger.py,v 1.1 2003/05/12 20:19:38 gvanrossum Exp $
"""
import os
import shutil
import filecmp
import commands
from os.path import exists, isdir, isfile
class Merger(object):
"""Augmented three-way file and directory merges.
An augmented merge takes into account three files (or directories)
and two metadata entries. The files are labeled local, original,
and remote. The metadata entries are for local and remote. A
remote metadata entry is either empty or non-empty. Empty means
the file does not exist remotely, non-empty means it does exist
remotely. We also have to take into account the possibility that
the existence of the file belies what the entry declares. A local
metadata entry can have those states, and in addition, if
non-empty, it can be flagged as added or removed. Again, the
existence of the file may bely what the entry claims. The
original file serves the obvious purpose. Its existence, too, can
be inconsistent with the state indicated by the metadata entries.
To find the metadata entry for a file, we look for a key
corresponding to its basename @@Zope/Entries.xml in the directory
that contains it. For this purpose, we assume the filename given
uses the correct case even on a case-insensitive filesystem (i.e.,
the filesystem must be at least case-preserving).
The purpose of the merge() function is to merging the remote
changes into the local copy as the best it can, resolving
inconsistencies if possible. It should not raise an exception
unless there are file/directory permission problems. Its return
value is an indicator of what it dit.
The classify() function is a helper for merge(); it looks at all
the evidence and decides what merge() should do, without actually
touching any files or the metadata. Possible actions are:
Fix -- copy the remote copy to the local original, nothing else
Copy -- copy the remote copy over the local copy
Merge -- merge the remote copy into the local copy
(this may cause merge conflicts when tried)
Delete -- delete the local copy
Nothing -- do nothing
The original file is made a copy of the remote file for actions
Fix, Copy and Merge; it is deleted for action Delete; it is
untouched for action Nothing.
It should also indicate the final state of the local copy after
the action is taken:
Conflict -- there is a conflict of some kind
Uptodate -- the local copy is the same as the remote copy
Modified -- the local copy is marked (to be) modified
Added -- the local copy is marked (to be) added
Removed -- the local copy is marked (to be) removed
Spurious -- there is an unregistered local file only
Nonexistent -- there is nothing locally or remotely
For Conflict, Added and Removed, the action will always be
Nothing. The difference between Removed and Nonexistent is that
Nonexistent means the file doesn't exist remotely either, while
Removed means that on the next commit the file should be removed
from the remote store. Similarly, Added means the file should be
added remotely on the next commit, and Modified means that the
file should be changed remotely to match the local copy at the
next commit.
Note that carrying out the Merge action can change the resulting
state to become Uptodate or Conflict instead of Modified, if there
are merge conflicts (which classify() can't detect without doing
more work than reasonable).
"""
def __init__(self, metadata, verbose=True):
"""Constructor.
The argument is the metadata database, which has a single
method: getentry(file) which returns a dict containing the
metadata for that file. Changes to this dict will be
preserved when the database is written back (not by the Merger
class). To delete all metadata for a file, call the dict's
clear() method.
"""
self.metadata = metadata
self.verbose = verbose
def getentry(self, file):
"""Helper to abstract away the existence of self.metadata."""
# XXX Hmm... This could be a subclass of class Metadata...
return self.metadata.getentry(file)
def merge_files(self, local, orig, remote, action, state):
"""Helper to carry out a file merge.
The action and state arguments correspond to the return value
of classify().
Return the state as returned by the second return value of
classify(). This is either the argument state or recalculated
based upon the effect of the action.
"""
method = getattr(self, "merge_files_" + action.lower())
return method(local, orig, remote) or state
def merge_files_nothing(self, local, orig, remote):
return None
def merge_files_remove(self, local, orig, remote):
if isfile(local):
os.remove(local)
if isfile(orig):
os.remove(orig)
self.getentry(local).clear()
return None
def merge_files_copy(self, local, orig, remote):
shutil.copy(remote, local)
shutil.copy(remote, orig)
self.getentry(local).update(self.getentry(remote))
self.clearflag(local)
return None
def merge_files_merge(self, local, orig, remote):
# XXX This is platform dependent
if exists(orig):
origfile = orig
else:
origfile = "/dev/null"
cmd = "merge %s %s %s" % (commands.mkarg(local),
commands.mkarg(origfile),
commands.mkarg(remote))
sts, output = commands.getstatusoutput(cmd)
if output and self.verbose:
print output
shutil.copy(remote, orig)
self.getentry(local).update(self.getentry(remote))
self.clearflag(local)
if sts:
self.getentry(local)["conflict"] = os.path.getmtime(local)
return "Conflict"
else:
return "Modified"
def merge_files_fix(self, local, orig, remote):
shutil.copy(remote, orig)
self.clearflag(local)
self.getentry(local).update(self.getentry(remote))
def clearflag(self, file):
"""Helper to clear the added/removed metadata flag."""
metadata = self.getentry(file)
if "flag" in metadata:
del metadata["flag"]
def classify_files(self, local, orig, remote):
"""Helper for merge to classify file changes.
Arguments are pathnames to the local, original, and remote
copies.
Return a pair of strings (action, state) where action is one
of 'Fix', 'Copy', 'Merge', 'Delete' or 'Nothing', and state is
one of 'Conflict', 'Uptodate', 'Modified', 'Added', 'Removed'
or 'Nonexistent'.
"""
lmeta = self.getentry(local)
rmeta = self.getentry(remote)
# Sort out cases involving additions or removals
if not lmeta and not rmeta:
if exists(local):
# Local unregistered file
return ("Nothing", "Spurious")
else:
# Why are we here?
return ("Nothing", "Nonexistent")
if lmeta.get("flag") == "added":
# Added locally
if not rmeta:
# Nothing remotely
return ("Nothing", "Added")
else:
# Added remotely too! Merge, unless trivial conflict
if self.cmpfile(local, remote):
return ("Fix", "Uptodate")
else:
return ("Merge", "Modified")
if rmeta and not lmeta:
# Added remotely
return ("Copy", "Uptodate")
if lmeta.get("flag") == "removed":
if not rmeta:
# Removed remotely too
return ("Remove", "Nonexistent")
else:
# Removed locally
if self.cmpfile(orig, remote):
return ("Nothing", "Removed")
else:
return ("Nothing", "Conflict")
if lmeta and not rmeta:
assert lmeta.get("flag") is None
# Removed remotely
return ("Remove", "Nonexistent")
if lmeta.get("flag") is None and not exists(local):
# Lost locally
if rmeta:
return ("Copy", "Uptodate")
else:
return ("Remove", "Nonexistent")
# Sort out cases involving simple changes to files
if self.cmpfile(orig, remote):
# No remote changes; classify local changes
if self.cmpfile(local, orig):
# No changes
return ("Nothing", "Uptodate")
else:
# Only local changes
return ("Nothing", "Modified")
else:
# Some local changes; classify local changes
if self.cmpfile(local, orig):
# Only remote changes
return ("Copy", "Uptodate")
else:
if self.cmpfile(local, remote):
# We're lucky -- local and remote changes are the same
return ("Fix", "Uptodate")
else:
# Changes on both sides, three-way merge needed
return ("Merge", "Modified")
def cmpfile(self, file1, file2):
"""Helper to compare two files.
Return True iff the files are equal.
"""
# XXX What should this do when either file doesn't exist?
return filecmp.cmp(file1, file2, shallow=False)
=== Added File Zope3/src/zope/fssync/metadata.py ===
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Class to maintain fssync metadata.
The metadata entry for something named /path/base, is kept in a file
/path/@@Zope/Entries.xml. That file is (an XML pickle for) a dict
containing many entries. The metadata entry for /path/base is stored
under the key 'base'. The metadata entry is itself a dict. An empty
entry is considered non-existent, and will be deleted upon flush. If
no entries remain, the Entries.xml file will be removed.
$Id: metadata.py,v 1.1 2003/05/12 20:19:38 gvanrossum Exp $
"""
import os
import copy
from os.path import exists, isdir, isfile, split, join, realpath, normcase
from zope.xmlpickle import loads, dumps
class Metadata(object):
def __init__(self, case_insensitive=None):
"""Constructor.
The case_insensitive can be passed as an argument for testing;
by default, it is set by observing the behavior of normcase().
"""
if case_insensitive is None:
case_insensitive = (normcase("ABC") == normcase("abc"))
self.case_insensitive = case_insensitive
self.cache = {} # Keyed by normcase(dirname(realpath(file)))
self.originals = {} # Original copy as read from file
def getnames(self, dir):
"""Return the names of known non-empty metadata entries, sorted."""
dir = realpath(dir)
entries = self._getentries(dir)
names = [name for name, entry in entries.iteritems() if entry]
names.sort()
return names
def getentry(self, file):
"""Return the metadata entry for a given file (or directory).
Modifying the dict that is returned will cause the changes to
the metadata to be written out when flush() is called. If
there is no metadata entry for the file, return a new empty
dict, modifications to which will also be flushed.
"""
file = realpath(file)
dir, base = split(file)
entries = self._getentries(dir)
if base in entries:
return entries[base]
if self.case_insensitive:
# Look for a case-insensitive match -- expensive!
# XXX There's no test case for this code!
# XXX What if there are multiple matches?
nbase = normcase(base)
for b in entries:
if normcase(b) == nbase:
return entries[b]
# Create a new entry
entries[base] = entry = {}
return entry
def _getentries(self, dir):
key = normcase(dir)
if key in self.cache:
entries = self.cache[key]
else:
efile = join(dir, "@@Zope", "Entries.xml")
if isfile(efile):
f = open(efile)
try:
data = f.read()
finally:
f.close()
self.cache[key] = entries = loads(data)
else:
self.cache[key] = entries = {}
self.originals[key] = copy.deepcopy(entries)
return entries
def flush(self):
errors = []
for key in self.cache:
try:
self.flushkey(key)
except (IOError, OSError), err:
errors.append(err)
if errors:
if len(errors) == 1:
raise
else:
raise IOError, tuple(errors)
def flushkey(self, key):
entries = self.cache[key]
todelete = [name for name, entry in entries.iteritems() if not entry]
for name in todelete:
del entries[name]
if entries != self.originals[key]:
zdir = join(key, "@@Zope")
efile = join(zdir, "Entries.xml")
if not entries:
if isfile(efile):
os.remove(efile)
if exists(zdir):
try:
os.rmdir(zdir)
except os.error:
pass
else:
data = dumps(entries)
if not exists(zdir):
os.makedirs(zdir)
f = open(efile, "w")
try:
f.write(data)
finally:
f.close()
self.originals[key] = copy.deepcopy(entries)
=== Zope3/src/zope/fssync/fssync.py 1.3 => 1.4 ===
--- Zope3/src/zope/fssync/fssync.py:1.3 Sat May 10 20:23:23 2003
+++ Zope3/src/zope/fssync/fssync.py Mon May 12 16:19:38 2003
@@ -35,7 +35,9 @@
from os.path import realpath, normcase, normpath
from zope.xmlpickle import loads, dumps
-from zope.fssync.compare import treeComparisonWalker
+from zope.fssync.compare import treeComparisonWalker, classifyContents
+from zope.fssync.metadata import Metadata
+from zope.fssync.merger import Merger
class Error(Exception):
"""User-level error, e.g. non-existent file.
@@ -83,6 +85,7 @@
self.topdir = topdir
self.verbose = verbose
self.rooturl = self.findrooturl()
+ self.metadata = Metadata()
def setrooturl(self, rooturl):
self.rooturl = rooturl
@@ -108,6 +111,7 @@
if sts:
raise Error("unzip command failed")
self.saverooturl()
+ print "All done"
finally:
os.unlink(filename)
@@ -154,7 +158,7 @@
sts = os.system("cd %s; unzip -q %s" % (tmpdir, zipfile))
if sts:
raise Error("unzip command failed")
- self.merge(self.topdir, tmpdir)
+ self.merge_dirs(self.topdir, tmpdir)
shutil.rmtree(tmpdir)
os.unlink(zipfile)
print "All done"
@@ -177,7 +181,7 @@
sts = os.system("cd %s; unzip -q %s" % (tmpdir, filename))
if sts:
raise Error("unzip command failed")
- self.merge(self.topdir, tmpdir)
+ self.merge_dirs(self.topdir, tmpdir)
print "All done"
finally:
shutil.rmtree(tmpdir)
@@ -185,178 +189,115 @@
os.unlink(filename)
def add(self, path):
- path = realpath(path)
if not exists(path):
raise Error("nothing known about '%s'", path)
- dir, name = split(path)
- if name in ("", os.curdir, os.pardir):
- raise Error("can't add path '%s'", path)
- entries = self.loadentries(dir)
- if name in entries:
+ entry = self.metadata.getentry(path)
+ if entry:
raise Error("path '%s' is already registered", name)
- pdir = self.parent(dir)
- dname = basename(dir)
- pentries = self.loadentries(pdir)
- if dname not in pentries:
- raise Error("directory '%s' unknown", dname)
- dpath = pentries[dname]['path']
- if dpath == "/":
- ourpath = "/" + name
- else:
- ourpath = dpath + "/" + name
- entries[name] = d = {"path": ourpath, "flag": "added"}
+ entry["path"] = '/'+path
+ entry["flag"] = "added"
if isdir(path):
- d["type"] = "zope.app.content.folder.Folder"
+ entry["type"] = "zope.app.content.folder.Folder"
self.ensuredir(join(path, "@@Zope"))
self.dumpentries({}, path)
else:
# XXX Need to guess better based on extension
- d["type"] = "zope.app.content.file.File"
- if "factory" not in d:
- d["factory"] = str(unicode(d["type"]))
- self.dumpentries(entries, dir)
-
- def merge(self, ours, server):
- # XXX This method is way too long, and still not complete :-(
- for (left, right, common, lentries, rentries, ldirs, lnondirs,
- rdirs, rnondirs) in treeComparisonWalker(ours, server):
- origdir = join(left, "@@Zope", "Original")
- lextradir = join(left, "@@Zope", "Extra")
- rextradir = join(right, "@@Zope", "Extra")
- lanndir = join(left, "@@Zope", "Annotations")
- ranndir = join(right, "@@Zope", "Annotations")
- weirdos = ldirs.copy() # This is for flagging "?" files
- weirdos.update(lnondirs)
- for x in common: # Compare matching stuff
- nx = normpath(x)
- if nx in weirdos:
- del weirdos[nx]
- if nx in rdirs:
- if nx in lnondirs:
- print "file '%s' is in the way of a directory"
- elif nx not in ldirs:
- print "restoring directory '%s'"
- os.mkdir(join(left, x))
- elif nx in rnondirs:
- if nx in ldirs:
- print "directory '%s' is in the way of a file"
- else:
- # Merge files
- rx = rnondirs[nx]
- origx = join(origdir, x)
- if nx in lnondirs:
- lx = lnondirs[nx]
- else:
- lx = join(left, x)
- print "restoring lost file '%s'" % lx
- self.copyfile(origx, lx)
- if self.cmp(origx, rx):
- # Unchanged on server
- if self.cmp(lx, origx):
- if self.verbose:
- print "=", lx
- else:
- print "M", lx
- elif self.cmp(lx, origx):
- # Unchanged locally
- self.copyfile(rx, lx)
- self.copyfile(rx, origx)
- print "U", lx
- elif self.cmp(lx, rx):
- # Only the original is out of date
- self.copyfile(rx, origx)
- print "U", lx
- else:
- # Conflict! Must do a 3-way merge
- print "merging changes into '%s'" % lx
- self.copyfile(rx, origx)
- sts = os.system("merge %s %s %s" %
- (commands.mkarg(lx),
- commands.mkarg(origx),
- commands.mkarg(rx)))
- if sts:
- print "C", lx
- else:
- print "M", lx
- # In all cases, merge Extra stuff if any
- lx = join(lextradir, x)
- rx = join(rextradir, x)
- if isdir(rx):
- self.ensuredir(lx)
- self.merge(lx, rx)
- # And merge Annotations if any
- lx = join(lanndir, x)
- rx = join(ranndir, x)
- if isdir(rx):
- self.ensuredir(lx)
- self.merge(lx, rx)
- entries = self.loadentries(left)
- entries_changed = False
- for x in rentries: # Copy new stuff from server
- entries[x] = rentries[x]
- entries_changed = True
- nx = normpath(x)
- if nx in rdirs:
- del weirdos[nx]
- # New directory; traverse into it
- if nx in lnondirs:
- print ("file '%s' is in the way of a new directory" %
- lnondirs[nx])
- else:
- common[x] = ({}, rentries[x])
- del rentries[x]
- if nx not in ldirs:
- lfull = join(left, x)
- os.mkdir(lx)
- ldirs[nx] = lx
- elif nx in rnondirs:
- if nx in ldirs:
- print ("directory '%s' is in the way of a new file" %
- ldirs[nx])
- elif nx in lnondirs:
- if self.cmp(rnondirs[nx], lnondirs[nx]):
- print "U", lnondirs[nx]
- del weirdos[nx]
- else:
- print ("file '%s' is in the way of a new file" %
- lnondirs[nx])
- else:
- # New file; copy it
- lx = join(left, x)
- rx = join(right, x)
- self.copyfile(rx, lx)
- # And copy to Original
- self.ensuredir(origdir)
- self.copyfile(rx, join(origdir, x))
- print "U", lx
- # In all cases, copy Extra stuff if any
- lx = join(lextradir, x)
- rx = join(rextradir, x)
- if isdir(rx):
- self.ensuredir(lx)
- self.merge(lx, rx)
- # And copy Annotations if any
- lx = join(lanndir, x)
- rx = join(ranndir, x)
- if isdir(rx):
- self.ensuredir(lx)
- self.merge(lx, rx)
- if entries_changed:
- self.dumpentries(entries, left)
- for x in lentries: # Flag new stuff in the working directory
- # XXX Could be deleted on server too!!!
- nx = normpath(x)
- if nx in weirdos:
- print "A", weirdos[nx]
- del weirdos[nx]
- else:
- lx = join(left, x)
- print "newborn '%s' is missing" % lx
- # XXX How about Annotations and Extra for these?
- # Flag anything not yet noted
- for nx in weirdos:
- if not self.ignore(nx):
- print "?", weirdos[nx]
+ entry["type"] = "zope.app.content.file.File"
+ if "factory" not in entry:
+ entry["factory"] = str(unicode(entry["type"]))
+ self.metadata.flush()
+
+ def merge_dirs(self, localdir, remotedir):
+ merger = Merger(self.metadata)
+
+ ldirs, lnondirs = classifyContents(localdir)
+ rdirs, rnondirs = classifyContents(remotedir)
+
+ dirs = {}
+ dirs.update(ldirs)
+ dirs.update(rdirs)
+
+ nondirs = {}
+ nondirs.update(lnondirs)
+ nondirs.update(rnondirs)
+
+ def sorted(d): keys = d.keys(); keys.sort(); return keys
+
+ for x in sorted(dirs):
+ local = join(localdir, x)
+ if x in nondirs:
+ # Too weird to handle
+ print "should '%s' be a directory or a file???" % local
+ continue
+ remote = join(remotedir, x)
+ lentry = self.metadata.getentry(local)
+ rentry = self.metadata.getentry(remote)
+ if lentry or rentry:
+ if x not in ldirs:
+ os.mkdir(local)
+ self.merge_dirs(local, remote)
+
+ for x in sorted(nondirs):
+ if x in dirs:
+ # Error message was already printed by previous loop
+ continue
+ local = join(localdir, x)
+ origdir = join(localdir, "@@Zope", "Original")
+ self.ensuredir(origdir)
+ orig = join(origdir, x)
+ remote = join(remotedir, x)
+ action, state = merger.classify_files(local, orig, remote)
+ state = merger.merge_files(local, orig, remote, action, state)
+ self.report(action, state, local)
+ self.merge_extra(local, remote)
+ self.merge_annotations(local, remote)
+
+ self.merge_extra(localdir, remotedir)
+ self.merge_annotations(localdir, remotedir)
+
+ self.metadata.flush()
+
+ def merge_extra(self, local, remote):
+ lhead, ltail = split(local)
+ rhead, rtail = split(remote)
+ lextra = join(lhead, "@@Zope", "Extra", ltail)
+ rextra = join(rhead, "@@Zope", "Extra", rtail)
+ if isdir(rextra):
+ self.ensuredir(lextra)
+ self.merge_dirs(lextra, rextra)
+
+ def merge_annotations(self, local, remote):
+ lhead, ltail = split(local)
+ rhead, rtail = split(remote)
+ lannotations = join(lhead, "@@Zope", "Annotations", ltail)
+ rannotations = join(rhead, "@@Zope", "Annotations", rtail)
+ if isdir(rannotations):
+ self.ensuredir(lannotations)
+ self.merge_dirs(lannotations, rannotations)
+
+ def report(self, action, state, local):
+ if action != "Nothing":
+ print action, local
+ letter = None
+ if state == "Conflict":
+ letter = "C"
+ elif state == "Uptodate":
+ if action in ("Copy", "Fix", "Merge"):
+ letter = "U"
+ elif state == "Modified":
+ letter = "M"
+ elif state == "Added":
+ letter = "A"
+ elif state == "Removed":
+ letter = "R"
+ elif state == "Spurious":
+ if not self.ignore(local):
+ letter = "?"
+ elif state == "Nonexistent":
+ if action == "Delete":
+ print "local file '%s' is no longer relevant" % local
+ if letter:
+ print letter, local
def ignore(self, path):
return path.endswith("~")
@@ -404,25 +345,6 @@
if self.rooturl:
self.writefile(self.rooturl + "\n",
join(self.topdir, "@@Zope", "Root"))
-
- def loadentries(self, dir):
- file = join(dir, "@@Zope", "Entries.xml")
- try:
- return self.loadfile(file)
- except IOError:
- return {}
-
- def dumpentries(self, entries, dir):
- file = join(dir, "@@Zope", "Entries.xml")
- self.dumpfile(entries, file)
-
- def loadfile(self, file):
- data = self.readfile(file)
- return loads(data)
-
- def dumpfile(self, obj, file):
- data = dumps(obj)
- self.writefile(data, file)
def readfile(self, file, mode="r"):
f = open(file, mode)