From 28ef105b907804f211eef96d89eaa57d3fb4d41e Mon Sep 17 00:00:00 2001 From: Rick Song Date: Wed, 19 Feb 2020 19:49:56 -0800 Subject: [PATCH] feat: add maximum concurrent threads by using Parallel gem Before, asset_sync would create a separate thread for every asset that was uplaoded. When there are a large number of assets being uploaded, this could lead to processes crashing due to too many threads being created. By limiting the number of threads, this speeds up performance while preventing crashes from resource starvation. --- README.md | 4 ++++ lib/asset_sync/config.rb | 3 +++ lib/asset_sync/storage.rb | 20 ++++++++++++++------ spec/unit/storage_spec.rb | 17 +++++++++++++++++ 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index be324828..665ea710 100644 --- a/README.md +++ b/README.md @@ -244,6 +244,9 @@ AssetSync.configure do |config| # # Upload files concurrently # config.concurrent_uploads = false + # + # Number of threads when concurrent_uploads is enabled + # config.concurrent_uploads_max_threads = 10 # # Fail silently. Useful for environments such as Heroku # config.fail_silently = true @@ -342,6 +345,7 @@ AssetSync.config.gzip_compression == ENV['ASSET_SYNC_GZIP_COMPRESSION'] * **manifest**: (`true, false`) when enabled, will use the `manifest.yml` generated by Rails to get the list of local files to upload. **experimental**. **default:** `'false'` * **include_manifest**: (`true, false`) when enabled, will upload the `manifest.yml` generated by Rails. **default:** `'false'` * **concurrent_uploads**: (`true, false`) when enabled, will upload the files in different Threads, this greatly improves the upload speed. **default:** `'false'` +* **concurrent_uploads_max_threads**: when concurrent_uploads is enabled, this determines the number of threads that will be created. **default:** `10` * **enabled**: (`true, false`) when false, will disable asset sync. **default:** `'true'` (enabled) * **ignored\_files**: an array of files to ignore e.g. `['ignore_me.js', %r(ignore_some/\d{32}\.css)]` Useful if there are some files that are created dynamically on the server and you don't want to upload on deploy **default**: `[]` * **cache\_asset\_regexps**: an array of files to add cache headers e.g. `['cache_me.js', %r(cache_some\.\d{8}\.css)]` Useful if there are some files that are added to sprockets assets list and need to be set as 'Cacheable' on uploaded server. Only rails compiled regexp is matched internally **default**: `[]` diff --git a/lib/asset_sync/config.rb b/lib/asset_sync/config.rb index 7fcef500..9d55f3bb 100644 --- a/lib/asset_sync/config.rb +++ b/lib/asset_sync/config.rb @@ -27,6 +27,7 @@ class Invalid < StandardError; end attr_accessor :cache_asset_regexps attr_accessor :include_manifest attr_accessor :concurrent_uploads + attr_accessor :concurrent_uploads_max_threads # FOG configuration attr_accessor :fog_provider # Currently Supported ['AWS', 'Rackspace'] @@ -86,6 +87,7 @@ def initialize self.cache_asset_regexps = [] self.include_manifest = false self.concurrent_uploads = false + self.concurrent_uploads_max_threads = 10 @additional_local_file_paths_procs = [] load_yml! if defined?(::Rails) && yml_exists? @@ -222,6 +224,7 @@ def load_yml! self.cache_asset_regexps = yml['cache_asset_regexps'] if yml.has_key?("cache_asset_regexps") self.include_manifest = yml['include_manifest'] if yml.has_key?("include_manifest") self.concurrent_uploads = yml['concurrent_uploads'] if yml.has_key?('concurrent_uploads') + self.concurrent_uploads_max_threads = yml['concurrent_uploads_max_threads'] if yml.has_key?('concurrent_uploads_max_threads') self.azure_storage_account_name = yml['azure_storage_account_name'] if yml.has_key?("azure_storage_account_name") self.azure_storage_access_key = yml['azure_storage_access_key'] if yml.has_key?("azure_storage_access_key") diff --git a/lib/asset_sync/storage.rb b/lib/asset_sync/storage.rb index 7e6a702c..7befb82b 100644 --- a/lib/asset_sync/storage.rb +++ b/lib/asset_sync/storage.rb @@ -248,19 +248,27 @@ def upload_files # fixes: https://github.com/rumblelabs/asset_sync/issues/19 local_files_to_upload = local_files - ignored_files - remote_files + always_upload_files local_files_to_upload = (local_files_to_upload + get_non_fingerprinted(local_files_to_upload)).uniq + # Only files. + local_files_to_upload = local_files_to_upload.select { |f| File.file? "#{path}/#{f}" } if self.config.concurrent_uploads - threads = ThreadGroup.new + jobs = Queue.new + local_files_to_upload.each { |f| jobs.push(f) } + jobs.close + + num_threads = [self.config.concurrent_uploads_max_threads, local_files_to_upload.length].min # Upload new files - local_files_to_upload.each do |f| - next unless File.file? "#{path}/#{f}" # Only files. - threads.add(Thread.new { upload_file f }) + workers = Array.new(num_threads) do + Thread.new do + while f = jobs.pop + upload_file(f) + end + end end - sleep 1 while threads.list.any? # wait for threads to finish uploading + workers.map(&:join) else # Upload new files local_files_to_upload.each do |f| - next unless File.file? "#{path}/#{f}" # Only files. upload_file f end end diff --git a/spec/unit/storage_spec.rb b/spec/unit/storage_spec.rb index 3cc5d7f6..47eb7100 100644 --- a/spec/unit/storage_spec.rb +++ b/spec/unit/storage_spec.rb @@ -70,6 +70,23 @@ storage.upload_files end + it 'should allow custom number of threads' do + @config.concurrent_uploads = true + @config.concurrent_uploads_max_threads = 2 + storage = AssetSync::Storage.new(@config) + + allow(storage).to receive(:get_local_files).and_return(@local_files) + allow(storage).to receive(:get_remote_files).and_return(@remote_files) + allow(File).to receive(:file?).and_return(true) # Pretend they all exist + + expect(Thread).to receive(:new).exactly(2).times.and_call_original + (@local_files - @remote_files + storage.always_upload_files).each do |file| + expect(storage).to receive(:upload_file).with(file) + end + + storage.upload_files + end + it 'should upload updated non-fingerprinted files' do @local_files = [ 'public/image.png',