I’m trying to upload S3 object streams by merging them into a zip archive. The process works well for smaller files, but it gets stuck for larger ones. After reviewing the logs, I noticed that the connection consistently freezes at the 5-minute mark. It seems that S3 has an idle connection timeout of 5 minutes.
Here’s the code:
const createZipAndUpload = async (
filePath,
fileName,
zipFileName,
exportIdentifier,
blobs,
matrixCSVs,
exportRepository,
) => {
return new Promise((resolve, reject) => {
try {
const requestHandler = new NodeHttpHandler({
httpsAgent: new Agent({
KeepAlive: true,
rejectUnauthorized: true
}),
// tried all the possible options
requestTimeout: 3600000,
timeout: 3600000,
connectionTimeout: 3600000,
socketTimeout: 3600000
});
const s3Client = gets3Client(process.env.AWS_REGION, requestHandler);
const exportBucketName = 'export';
const streamPassThrough = new PassThrough();
let archive = archiver('zip');
// Handle errors
archive.on('error', (error) => {
console.log(`error on archive: ${error}`);
return new Error(
`${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`,
);
});
archive.pipe(streamPassThrough).on('error', (error) => {
console.log(`error on streamPassThrough: ${error}`);
});
archive.file(filePath, { name: fileName });
matrixCSVs.forEach((matrixCSV) => {
archive.file(`tmp/${matrixCSV.fileName}`, { name: matrixCSV.fileName });
});
const MAX_CONCURRENCY = 3;
let s3ClientForBlobs = gets3Client(process.env.AWS_REGION);
async.eachOfLimit(
blobs,
MAX_CONCURRENCY,
async (blobItem, index, next) => {
console.log(`Processing ${index}: ${blobItem.key}`);
if ((index + 1) % 50 === 0) {
console.log(`new s3 client`);
// Reassigning a new S3 client at 50, as it appears that S3 is throttling the requests at 50 objects.
s3ClientForBlobs = gets3Client(process.env.AWS_REGION);
}
const stream = await blobItem.getS3ObjectWithStream(s3ClientForBlobs);
if (stream) {
stream.on('end', () => {});
archive.append(stream, { name: blobItem.key });
}
},
async (err) => {
if (err) {
reject();
throw new Error(`error in getting streams ${err}`);
}
archive.finalize();
// Create an Upload instance with the S3 client and parameters
const upload = new Upload({
client: s3Client,
partSize: 100000000, // optional size of each part
params: {
ACL: 'private',
Body: streamPassThrough,
Bucket: exportBucketName,
ContentType: 'application/zip',
Key: zipFileName,
StorageClass: 'STANDARD_IA',
},
});
console.log('Starting upload');
// Handle progress events
upload.on('httpUploadProgress', (progress) => {
if (progress) {
console.log(`progress: ${progress.part}, ${progress.loaded}`);
}
return progress;
});
upload
.done()
.then(async () => {
console.log('Upload completed.');
await updateExport(
exportIdentifier,
zipFileName,
exportRepository,
);
await invokeExportCreatedTask(exportIdentifier);
streamPassThrough.end();
fs.unlinkSync(filePath);
resolve();
})
.catch((err) => {
console.error('Upload error:', err);
reject();
});
},
);
streamPassThrough.on('end', () => {
console.log(`ending stream`);
});
} catch (error) {
console.log(`Got error creating stream to S3: ${error.message}`);
// throw error;
reject();
}
});
};
How can I improve this to support larger uploads?