From b820d798cef3d72a61a47708caa4f89e47bf7283 Mon Sep 17 00:00:00 2001 From: Georgios Gousios Date: Fri, 26 Oct 2018 23:38:38 +0200 Subject: [PATCH] Periodic refresh process for project entries This will attempt to refresh the project information, first using the stored etag to check the latest update on GitHub. ref: #74 --- lib/ghtorrent.rb | 1 + lib/ghtorrent/ghtorrent.rb | 7 +++++-- lib/ghtorrent/refresher.rb | 30 ++++++++++++++++++++++++++++++ lib/ghtorrent/retriever.rb | 1 + 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 lib/ghtorrent/refresher.rb diff --git a/lib/ghtorrent.rb b/lib/ghtorrent.rb index da3af2a..e92c7cc 100644 --- a/lib/ghtorrent.rb +++ b/lib/ghtorrent.rb @@ -53,6 +53,7 @@ module GHTorrent require 'ghtorrent/event_processing' require 'ghtorrent/ghtorrent' require 'ghtorrent/transacted_gh_torrent' +require 'ghtorrent/refresher' # Multi-process queue clients require 'ghtorrent/multiprocess_queue_client' diff --git a/lib/ghtorrent/ghtorrent.rb b/lib/ghtorrent/ghtorrent.rb index 324d28d..22f0703 100644 --- a/lib/ghtorrent/ghtorrent.rb +++ b/lib/ghtorrent/ghtorrent.rb @@ -6,6 +6,7 @@ require 'ghtorrent/retriever' require 'ghtorrent/persister' require 'ghtorrent/geolocator' +require 'ghtorrent/refresher' module GHTorrent class Mirror @@ -15,6 +16,7 @@ class Mirror include GHTorrent::Retriever include GHTorrent::Persister include GHTorrent::Geolocator + include GHTorrent::Refresher attr_reader :settings, :persister, :logger @@ -564,7 +566,7 @@ def ensure_repo(user, repo, recursive = false) unless currepo.nil? debug "Repo #{user}/#{repo} exists" - return currepo + return refresh_repo(user, repo, currepo) end r = retrieve_repo(user, repo, true) @@ -585,7 +587,8 @@ def ensure_repo(user, repo, recursive = false) :description => unless r['description'].nil? then r['description'][0..254] else nil end, :language => r['language'], :created_at => date(r['created_at']), - :updated_at => Time.at(86400)) + :updated_at => Time.now.to_i, + :etag => unless r['etag'].nil? then r['etag'] end) unless r['parent'].nil? parent_owner = r['parent']['owner']['login'] diff --git a/lib/ghtorrent/refresher.rb b/lib/ghtorrent/refresher.rb new file mode 100644 index 0000000..1b646f6 --- /dev/null +++ b/lib/ghtorrent/refresher.rb @@ -0,0 +1,30 @@ +require 'ghtorrent/api_client' +require 'ghtorrent/settings' +require 'ghtorrent/retriever' + +module GHTorrent + module Refresher + + def refresh_repo(owner, repo, db_entry) + + return db_entry if Time.now.to_i - db_entry[:updated_at].to_i > 3600 * 24 + + etag = db_entry[:etag] + url = ghurl "repos/#{owner}/#{repo}" + + if last_updated(url, etag).to_i > db_entry[:updated_at].to_i + fresh_repo = retrieve_repo(owner, repo, true) + + unless fresh_repo.nil? + db.from(:projects). + where(:id => db_entry[:id]). + update(:etag => fresh_repo['etag']) + end + + return db[:projects].first(:id => db_entry[:id]) + end + + db_entry + end + end +end \ No newline at end of file diff --git a/lib/ghtorrent/retriever.rb b/lib/ghtorrent/retriever.rb index b3032be..44c9e1b 100644 --- a/lib/ghtorrent/retriever.rb +++ b/lib/ghtorrent/retriever.rb @@ -230,6 +230,7 @@ def retrieve_repo(user, repo, refresh = false) if refresh persister.upsert(:repos, {'name' => r['name'], 'owner.login' => r['owner']['login']}, r) + info "Refreshed repo #{user} -> #{repo}" else persister.store(:repos, r) info "Added repo #{user} -> #{repo}"