I'm using sjcl to hash files client-side so that I can check if they exist on the server before mencing a full upload.
However, it seems a bit slow. It takes about 15 seconds to hash an 8 MB file. I'm not sure if that's because the library is slow, JavaScript is slow, or the algorithm is inherently slow. It's using sha256
which is probably a bit overkill for what I need. Speed is key -- cryptographic security and collisions aren't particularly important.
Is there a faster way to do this?
$(document).on('drop', function(dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function(file) {
var reader = new FileReader();
var pos = 0;
var startTime = +new Date();
var hashObj = new sjcl.hash.sha256();
reader.onprogress = function(progress) {
var chunk = new Uint8Array(reader.result).subarray(pos, progress.loaded);
hashObj.update(chunk);
pos = progress.loaded;
if(progress.lengthComputable) {
console.log((progress.loaded/progress.total*100).toFixed(1)+'%');
}
};
reader.onload = function() {
var endTime = +new Date();
console.log('hashed',file.name,'in',endTime-startTime,'ms');
var chunk = new Uint8Array(reader.result, pos);
if(chunk.length > 0) hashObj.update(chunk);
console.log(sjcl.codec.hex.fromBits(hashObj.finalize()));
};
reader.readAsArrayBuffer(file);
});
});
Edit: Just discovered SparkMD5 as per this answer. Initial tests have it running in under a second for the same 8 MB file, but it's still slower than I'd like.
I'm using sjcl to hash files client-side so that I can check if they exist on the server before mencing a full upload.
However, it seems a bit slow. It takes about 15 seconds to hash an 8 MB file. I'm not sure if that's because the library is slow, JavaScript is slow, or the algorithm is inherently slow. It's using sha256
which is probably a bit overkill for what I need. Speed is key -- cryptographic security and collisions aren't particularly important.
Is there a faster way to do this?
$(document).on('drop', function(dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function(file) {
var reader = new FileReader();
var pos = 0;
var startTime = +new Date();
var hashObj = new sjcl.hash.sha256();
reader.onprogress = function(progress) {
var chunk = new Uint8Array(reader.result).subarray(pos, progress.loaded);
hashObj.update(chunk);
pos = progress.loaded;
if(progress.lengthComputable) {
console.log((progress.loaded/progress.total*100).toFixed(1)+'%');
}
};
reader.onload = function() {
var endTime = +new Date();
console.log('hashed',file.name,'in',endTime-startTime,'ms');
var chunk = new Uint8Array(reader.result, pos);
if(chunk.length > 0) hashObj.update(chunk);
console.log(sjcl.codec.hex.fromBits(hashObj.finalize()));
};
reader.readAsArrayBuffer(file);
});
});
Edit: Just discovered SparkMD5 as per this answer. Initial tests have it running in under a second for the same 8 MB file, but it's still slower than I'd like.
Share Improve this question edited May 23, 2017 at 10:27 CommunityBot 11 silver badge asked Jan 4, 2014 at 6:19 mpenmpen 284k281 gold badges892 silver badges1.3k bronze badges 2- 1 xxHash advertises pretty impressive speeds. – Jason LeBrun Commented Jan 4, 2014 at 7:03
-
1
@JasonLeBrun: I'm trying xxHash now. It won't take an
ArrayBuffer
as input, which might be problematic. – mpen Commented Jan 4, 2014 at 8:49
2 Answers
Reset to default 3xxHash gives 32-bit hashes. It seems to be about 30% faster than SparkMD5. It, however, does not seem to work with HTML5's ArrayBuffer
, so the file has to be read as text.
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var chunkSize = 1024 * 1024 * 2;
$(document).on('drop', function (dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function (file) {
var startTime = +new Date(), elapsed;
var chunks = Math.ceil(file.size / chunkSize);
var currentChunk = 0;
var xxh = XXH();
var fileReader = new FileReader();
var readNextChunk = function() {
var start = currentChunk * chunkSize;
var end = Math.min(start + chunkSize, file.size);
fileReader.readAsText(blobSlice.call(file, start, end));
};
fileReader.onload = function (e) {
console.log("read chunk nr", currentChunk + 1, "of", chunks);
xxh.update(e.target.result);
++currentChunk;
if (currentChunk < chunks) {
readNextChunk();
} else {
elapsed = +new Date() - startTime;
console.info("puted hash", xxh.digest().toString(16), 'for file', file.name, 'in', elapsed, 'ms');
}
};
fileReader.onerror = function () {
console.warn("oops, something went wrong.");
};
readNextChunk();
});
});
I think blobSlice
will make a copy of the file, which I'm not super keen on. Nor do I particularly like treating binary data as text. I created this alternative version that works with the ArrayBuffer
API by digging through the source of xxHash
-- turns out only one method is missing to make HTML5's Uint8Array
work like a Node.js Buffer
.
/**
* Hack to make Uint8Array work like a Node.js Buffer
*
* @param {Buffer} targetBuffer Buffer to copy into
* @param {Number} targetStart Optional, Default: 0
* @param {Number} sourceStart Optional, Default: 0
* @param {Number} sourceEnd Optional, Default: source length
* @see http://nodejs/api/buffer.html#buffer_buf_copy_targetbuffer_targetstart_sourcestart_sourceend
* @see https://developer.mozilla/en-US/docs/Web/API/Uint32Array
*/
Uint8Array.prototype.copy = function(targetBuffer, targetStart, sourceStart, sourceEnd) {
targetStart = targetStart || 0;
sourceStart = sourceStart || 0;
sourceEnd = sourceEnd || this.length;
for(var i=sourceStart; i<sourceEnd; ++i) {
targetBuffer[targetStart+i] = this[i];
}
};
$(document).on('drop', function(dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function(file) {
var reader = new FileReader();
var pos = 0;
var startTime = +new Date();
var xxh = XXH();
reader.onprogress = function(progress) {
var length = progress.loaded - pos;
var arr = new Uint8Array(reader.result, pos, length);
pos += length;
xxh.update(arr);
if(progress.lengthComputable) {
console.log((progress.loaded/progress.total*100).toFixed(1)+'%');
}
};
reader.onload = function() {
var arr = new Uint8Array(reader.result, pos);
xxh.update(arr);
var elapsed = +new Date() - startTime;
console.info("puted hash", xxh.digest().toString(16), 'for file', file.name, 'in', elapsed, 'ms');
};
reader.readAsArrayBuffer(file);
});
});
Unfortunately, they're pretty much identical in terms of speed, and it's still doing a copy. However, it this runs in about 270ms on the original 8 MB file which is much better than 15s.
SparkMD5 is quite a bit quicker:
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var chunkSize = 1024 * 1024 * 2;
$(document).on('drop', function (dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function (file) {
var startTime = +new Date(), elapsed;
var chunks = Math.ceil(file.size / chunkSize);
var currentChunk = 0;
var spark = new SparkMD5.ArrayBuffer();
var fileReader = new FileReader();
var readNextChunk = function() {
var start = currentChunk * chunkSize;
var end = Math.min(start + chunkSize, file.size);
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
};
fileReader.onload = function (e) {
console.log("read chunk nr", currentChunk + 1, "of", chunks);
spark.append(e.target.result); // append array buffer
++currentChunk;
if (currentChunk < chunks) {
readNextChunk();
} else {
elapsed = +new Date() - startTime;
console.info("puted hash", spark.end(), 'for file', file.name, 'in', elapsed, 'ms'); // pute hash
}
};
fileReader.onerror = function () {
console.warn("oops, something went wrong.");
};
readNextChunk();
});
});
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744524692a4578787.html
评论列表(0条)