Skip to content

Commit

Permalink
Merge pull request #11 from bsh98/dev
Browse files Browse the repository at this point in the history
Support for contracts, tokens
  • Loading branch information
medvedev1088 authored Apr 1, 2022
2 parents de631b9 + 44367c5 commit 91aaa11
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 6 deletions.
16 changes: 14 additions & 2 deletions ethereum_bigquery_to_gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,36 @@ if [ -n "${start_date}" ] && [ -n "${end_date}" ]; then
filter_date=true
fi

# The logs table has topics column with type ARRAY<STRING>. BigQuery can't export it to CSV so we need to flatten it.
# The logs and contracts tables contain columns with type ARRAY<STRING>.
# BigQuery can't export it to CSV so we need to flatten it.
export_temp_dataset="export_temp_dataset"
export_temp_logs_table="flattened_logs"
export_temp_contracts_table="flattened_contracts"

bq rm -r -f ${export_temp_dataset}
bq mk ${export_temp_dataset}

flatten_crypto_ethereum_logs_sql=$(cat ./flatten_crypto_ethereum_logs.sql | tr '\n' ' ')
flatten_crypto_ethereum_contracts_sql=$(cat ./flatten_crypto_ethereum_contracts.sql | tr '\n' ' ')

if [ "${filter_date}" = "true" ]; then
flatten_crypto_ethereum_logs_sql="${flatten_crypto_ethereum_logs_sql} where date(block_timestamp) >= '${start_date}' and date(block_timestamp) <= '${end_date}'"
flatten_crypto_ethereum_contracts_sql="${flatten_crypto_ethereum_contracts_sql} where date(block_timestamp) >= '${start_date}' and date(block_timestamp) <= '${end_date}'"
fi

echo "Executing query ${flatten_crypto_ethereum_logs_sql}"
bq --location=US query --destination_table ${export_temp_dataset}.${export_temp_logs_table} --use_legacy_sql=false "${flatten_crypto_ethereum_logs_sql}"
echo "Executing query ${flatten_crypto_ethereum_contracts_sql}"
bq --location=US query --destination_table ${export_temp_dataset}.${export_temp_contracts_table} --use_legacy_sql=false "${flatten_crypto_ethereum_contracts_sql}"

declare -a tables=(
"bigquery-public-data:crypto_ethereum.blocks"
"bigquery-public-data:crypto_ethereum.transactions"
"bigquery-public-data:crypto_ethereum.token_transfers"
"bigquery-public-data:crypto_ethereum.traces"
"bigquery-public-data:crypto_ethereum.tokens"
"${export_temp_dataset}.${export_temp_logs_table}"
"${export_temp_dataset}.${export_temp_contracts_table}"
)

for table in "${tables[@]}"
Expand All @@ -59,8 +70,9 @@ do
fi
done

# Rename output folder for flattened logs
# Rename output folder for flattened tables
gsutil -m mv gs://${output_bucket}/${export_temp_dataset}.${export_temp_logs_table}/* gs://${output_bucket}/bigquery-public-data:crypto_ethereum.logs/
gsutil -m mv gs://${output_bucket}/${export_temp_dataset}.${export_temp_contracts_table}/* gs://${output_bucket}/bigquery-public-data:crypto_ethereum.contracts/

# Cleanup
bq rm -r -f ${export_temp_dataset}
2 changes: 2 additions & 0 deletions ethereum_gcs_to_cloud_sql.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ declare -a tables=(
"token_transfers"
"traces"
"logs"
"contracts"
"tokens"
)

for table in "${tables[@]}"
Expand Down
12 changes: 12 additions & 0 deletions flatten_crypto_ethereum_contracts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select
address,
bytecode,
-- convert bq array to array literal
-- this allows us to export nested data to a csv and
-- import it to postgres as a text array
concat('{', array_to_string(function_sighashes, ','), '}') as function_sighashes,
is_erc20,
is_erc721,
block_number,
block_timestamp
from `bigquery-public-data.crypto_ethereum.contracts`
5 changes: 5 additions & 0 deletions indexes/contracts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
alter table contracts add constraint contracts_pk primary key (address, block_number);

create index contracts_block_number_index on contracts (block_number desc);
create index contracts_is_erc20_index on contracts (is_erc20, block_number desc);
create index contracts_is_erc721_index on contracts (is_erc721, block_number desc);
3 changes: 3 additions & 0 deletions indexes/tokens.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
alter table tokens add constraint tokens_pk primary key (address, block_number);

create index tokens_block_number_index on tokens (block_number desc);
9 changes: 7 additions & 2 deletions schema/contracts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,10 @@ create table contracts
(
address varchar(42),
bytecode text,
function_sighashes text[]
);
function_sighashes text[],
is_erc20 boolean,
is_erc721 boolean,
block_number bigint,
block_hash varchar(66),
block_timestamp timestamp
);
7 changes: 5 additions & 2 deletions schema/tokens.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@ create table tokens
name text,
symbol text,
decimals int(11),
function_sighashes text[]
);
total_supply numeric(78),
block_number bigint,
block_hash varchar(66),
block_timestamp timestamp
);

0 comments on commit 91aaa11

Please sign in to comment.