I am currently working on a piece of a larger puzzle. For my piece, I have a file object and the destination of the zip file. There is never a time that I am aware of the size of the file object. I only know I have one. There for the zip has to support zip64.
My goal is to take that file object(pointer to the file) and write it to the zip file without loading the entire file into memory. I would like to do this chunk by chunk(especially if the file object is really big).
Any ideas on how I can go about doing this?
zip_path = "/tmp/file.zip"
file_to_zip_path = "/home/ryanb58/Desktop/movie.mp4"
with zipfile.ZipFile(zip_path, mode="w", allowZip64=True) as zip:
f = open(file_to_zip_path, 'rb')
data = f.read(1024)
if not data:
Following the advice that @J.F.Sebastian gave in his comment, I was able to write my file to a zip without bringing the full file into memory.
Here is my solution for the override.
import zipfile BUFFER_SIZE = 1024 * 10000 # 10 megabytes. class Zip(zipfile.ZipFile): def write(self, fileobj, arcname=None, compress_type=None): """Put the bytes from file into the archive under the name arcname.""" """CONST""" ZIP64_LIMIT = (1 << 31) - 1 ZIP_DEFLATED = 8 try: import zlib # We may need its compression method crc32 = zlib.crc32 except ImportError: zlib = None crc32 = binascii.crc32 if not self.fp: raise RuntimeError( "Attempt to write to ZIP archive that was already closed") st = os.stat(fileobj.name) isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] # Create ZipInfo instance to store file information if arcname is None: arcname = "/temp.zip" arcname = os.path.normpath(os.path.splitdrive(arcname)) # Strips any leading forward or back slashes for files. while arcname in (os.sep, os.altsep): arcname = arcname[1:] if isdir: arcname += '/' # Create the zipinfo. zinfo = zipfile.ZipInfo(arcname, date_time) zinfo.external_attr = (st.st_mode & 0xFFFF) << 16L # Unix attributes if isdir: zinfo.compress_type = ZIP_STORED elif compress_type is None: zinfo.compress_type = self.compression else: zinfo.compress_type = compress_type zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 zinfo.header_offset = self.fp.tell() # Start of header bytes self._writecheck(zinfo) self._didModify = True if isdir: zinfo.file_size = 0 zinfo.compress_size = 0 zinfo.CRC = 0 zinfo.external_attr |= 0x10 # MS-DOS directory flag self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) return # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 # Compressed size can be larger than uncompressed size zip64 = self._allowZip64 and \ zinfo.file_size * 1.05 > ZIP64_LIMIT self.fp.write(zinfo.FileHeader()) if zinfo.compress_type == ZIP_DEFLATED: cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) else: cmpr = None file_size = 0 while 1: buf = fileobj.read(BUFFER_SIZE) if not buf: break file_size = file_size + len(buf) CRC = crc32(buf, CRC) & 0xffffffff if cmpr: buf = cmpr.compress(buf) compress_size = compress_size + len(buf) self.fp.write(buf) if cmpr: buf = cmpr.flush() compress_size = compress_size + len(buf) self.fp.write(buf) zinfo.compress_size = compress_size else: zinfo.compress_size = file_size zinfo.CRC = CRC zinfo.file_size = file_size if not zip64 and self._allowZip64: if file_size > ZIP64_LIMIT: raise RuntimeError('File size has increased during compressing') if compress_size > ZIP64_LIMIT: raise RuntimeError('Compressed size larger than uncompressed size') # Seek backwards and write file header (which will now include # correct CRC and file sizes) position = self.fp.tell() # Preserve current position in file self.fp.seek(zinfo.header_offset, 0) self.fp.write(zinfo.FileHeader()) self.fp.seek(position, 0) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo
As you can see I can't pass in zip64 into the FileHeader methods because on the system the code runs, it only supports Python 2.7.2 Whereas to support the correct headers for zip64 files you will need Python 2.7.4 at the minimum.