in luigi/contrib/hadoop.py [0:0]
def create_packages_archive(packages, filename):
"""
Create a tar archive which will contain the files for the packages listed in packages.
"""
import tarfile
tar = tarfile.open(filename, "w")
def add(src, dst):
logger.debug('adding to tar: %s -> %s', src, dst)
tar.add(src, dst)
def add_files_for_package(sub_package_path, root_package_path, root_package_name):
for root, dirs, files in os.walk(sub_package_path):
if '.svn' in dirs:
dirs.remove('.svn')
for f in files:
if not f.endswith(".pyc") and not f.startswith("."):
add(dereference(root + "/" + f), root.replace(root_package_path, root_package_name) + "/" + f)
for package in packages:
# Put a submodule's entire package in the archive. This is the
# magic that usually packages everything you need without
# having to attach packages/modules explicitly
if not getattr(package, "__path__", None) and '.' in package.__name__:
package = __import__(package.__name__.rpartition('.')[0], None, None, 'non_empty')
n = package.__name__.replace(".", "/")
if getattr(package, "__path__", None):
# TODO: (BUG) picking only the first path does not
# properly deal with namespaced packages in different
# directories
p = package.__path__[0]
if p.endswith('.egg') and os.path.isfile(p):
raise 'egg files not supported!!!'
# Add the entire egg file
# p = p[:p.find('.egg') + 4]
# add(dereference(p), os.path.basename(p))
else:
# include __init__ files from parent projects
root = []
for parent in package.__name__.split('.')[0:-1]:
root.append(parent)
module_name = '.'.join(root)
directory = '/'.join(root)
add(dereference(__import__(module_name, None, None, 'non_empty').__path__[0] + "/__init__.py"),
directory + "/__init__.py")
add_files_for_package(p, p, n)
# include egg-info directories that are parallel:
for egg_info_path in glob.glob(p + '*.egg-info'):
logger.debug(
'Adding package metadata to archive for "%s" found at "%s"',
package.__name__,
egg_info_path
)
add_files_for_package(egg_info_path, p, n)
else:
f = package.__file__
if f.endswith("pyc"):
f = f[:-3] + "py"
if n.find(".") == -1:
add(dereference(f), os.path.basename(f))
else:
add(dereference(f), n + ".py")
tar.close()