From 2b1b5adbfba665f0b01c77f84afdd8fee5beb0f1 Mon Sep 17 00:00:00 2001 From: Andrew Colello Date: Mon, 16 Oct 2017 14:28:45 -0400 Subject: [PATCH] topic sequel written --- lib/ghtorrent/commands/repo_updater.rb | 9 +------- lib/ghtorrent/ghtorrent.rb | 30 ++++++++++++++++++++++++-- lib/ghtorrent/retriever.rb | 27 ++++++++++++++++++----- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/lib/ghtorrent/commands/repo_updater.rb b/lib/ghtorrent/commands/repo_updater.rb index 151ec4f..8b5e547 100644 --- a/lib/ghtorrent/commands/repo_updater.rb +++ b/lib/ghtorrent/commands/repo_updater.rb @@ -64,14 +64,6 @@ def update_mysql(owner, repo, retrieved) ght.ensure_fork_commits(owner, repo, parent_owner, parent[:name]) end - if db.database_type == :postgres - t = retrieve_topics(user, repo) - retrieved['topics'] = t['names'] - retrieved['topics'] = Sequel.pg_array(r['topics'], :text) - else - retrieved['topics'] = nil - end - db.from(:projects, :users).\ where(:projects__owner_id => :users__id).\ where(:users__login => owner).\ @@ -86,6 +78,7 @@ def update_mysql(owner, repo, retrieved) info("Repo #{owner}/#{repo} updated") ght.ensure_languages(owner, repo) + ght.ensure_topics(owner, repo) end def get_project_mysql(owner, repo) diff --git a/lib/ghtorrent/ghtorrent.rb b/lib/ghtorrent/ghtorrent.rb index ad22837..e9c4596 100644 --- a/lib/ghtorrent/ghtorrent.rb +++ b/lib/ghtorrent/ghtorrent.rb @@ -558,6 +558,7 @@ def ensure_repo(user, repo, recursive = false) unless currepo.nil? debug "Repo #{user}/#{repo} exists" + ensure_topics(user, repo) return currepo end @@ -601,14 +602,15 @@ def ensure_repo(user, repo, recursive = false) info "Added repo #{user}/#{repo}" ensure_repo_recursive(user, repo) if recursive + ensure_topics(user, repo) - repos.first(:owner_id => curuser[:id], :name => repo) + epos.first(:owner_id => curuser[:id], :name => repo) end def ensure_repo_recursive(owner, repo) functions = %w(ensure_commits ensure_labels ensure_pull_requests - ensure_issues ensure_watchers ensure_forks ensure_languages) + ensure_issues ensure_watchers ensure_forks ensure_languages ensure_topics) functions.each do |x| send(x, owner, repo) @@ -1788,6 +1790,30 @@ def ensure_issue_label(owner, repo, issue_id, name) end + def ensure_topics(owner, repo) + project = ensure_repo(owner, repo) + t = retrieve_topics(owner, repo) + + t['names'].each do |topic| + # store and map each topic + topic_entry = db[:topic_categories].first(:topic_name => topic) + + if topic_entry.nil? + db[:topic_categories].insert(:topic_name => topic) + topic_entry = db[:topic_categories].first(:topic_name => topic) + end + + db[:topic_mappings].insert(:project_id => project[:id], :topic_id => topic_entry[:topic_id]) + end + + topic_map = db[:topic_categories].join(db[:topic_mappings].where(:project_id => project[:id]), topic_id: :topic_id) + + topic_map.each do |persisted_topic| + # remove any stored topics that are no longer accurate + db[:topic_mappings].delete(:project_id => project[:id], :topic_id => topic_entry[:topic_id]) if ! t['names'].include?(persisted_topic[:topic_name]) + end + end + # Run a block in a DB transaction. Exceptions trigger transaction rollback # and are rethrown. def transaction(&block) diff --git a/lib/ghtorrent/retriever.rb b/lib/ghtorrent/retriever.rb index 73194e8..82852c2 100644 --- a/lib/ghtorrent/retriever.rb +++ b/lib/ghtorrent/retriever.rb @@ -228,9 +228,6 @@ def retrieve_repo(user, repo, refresh = false) return end - t = retrieve_topics(user, repo) - r['topics'] = t['names'] - if refresh persister.upsert(:repos, {'name' => r['name'], 'owner.login' => r['owner']['login']}, r) else @@ -569,8 +566,28 @@ def retrieve_issue_labels(owner, repo, issue_id) def retrieve_topics(owner, repo) # volatile: currently available with api preview # https://developer.github.com/v3/repos/#list-all-topics-for-a-repository - url = ghurl("repos/#{owner}/#{repo}/topics") - api_request(url, media_type = "application/vnd.github.mercy-preview+json") + stored_topics = persister.find(:topics, {'owner' => owner, 'repo' => repo }) + + if stored_topics.empty? or refresh + url = ghurl("repos/#{owner}/#{repo}/topics") + r = api_request(url, media_type = "application/vnd.github.mercy-preview+json") + + + if r.nil? or r.empty? + return + end + + if refresh + persister.upsert(:topics, {'repo' => repo, 'owner' => owner, r) + else + persister.store(:topics, r) + info "Added topics for #{owner} -> #{repo}" + end + r + else + debug "Topics for #{owner} -> #{repo} exists" + stored_topics.first + end end # Get current Github events