hledger-flow: coinbase: new implementation (SIWC V2 compliant)

- Support for Coinbase SIWC Spring 2024's breaking API changes
  - Coinbase Advanced Trade lot calculation fixes
  - bitcoin.tax undocumented cost-basis "feature" fixes
      * Coinbase Trade / Advanced Trade
  - bitcoin.tax cost-basis related improvements
      * Coinbase Card / Earn
  - Support non-fiat pairings
      * Coinbase Trade / Advanced Trade
  - Add more fiat support
      * 99% supported (see backreference TODOs)
  - Subaccount improvements
  - New tags / improved tags
  - Refactoring / optimizations
This commit is contained in:
2024-04-25 14:13:06 -07:00
parent e70c899f90
commit f65a15af2c
2 changed files with 798 additions and 289 deletions

View File

@@ -36,102 +36,57 @@ source "${DOCKER_FINANCE_CONTAINER_REPO}/src/hledger-flow/lib/lib_preprocess.bas
[ -z "$global_in_filename" ] && exit 1
# NOTES:
#
# - Coinbase REST API CSVs are a beast. They provide inconsistent headers and
# are complex.
# Notes regarding this implementation:
#
# - When fetching new files, if you are expecting no changes, the transaction
# confirmation number may change and subsequently change your git-diff
# - Input data MUST be provided by Coinbase SIWC V2 REST API (via `fetch`).
#
# - Output data will consist of a "universal" CSV format for hledger rules.
#
# TODO: optimize
function parse()
function add_to_headers()
{
#
# Create account ID as first column, followed by TXID
#
# NOTE: *MUST* have filename with format, where X is account ID:
#
# XXXXXXXXXXXXX-XXXX-XXXX-XXXXXXXXXXXX_transactions.csv
#
local _account_id="${global_in_filename:0:-17}"
[ -z "$universal_header" ] && exit 1
[ -z "$selected_header" ] && exit 1
local _details_header="details_header2" # courtesy of fix_header()
local _column_name="$1"
[ -z "$_column_name" ] && exit 1
local _header="id,type,status,amount_amount,amount_currency,native_amount_amount,native_amount_currency,description,created_at,updated_at,resource,resource_path,instant_exchange,details_title,details_subtitle,${_details_header},network_transaction_fee_amount,network_transaction_fee_currency,network_transaction_amount_amount,network_transaction_amount_currency,to_currency,to_address_info_address,advanced_trade_fill_fill_price,advanced_trade_fill_product_id,advanced_trade_fill_order_id,advanced_trade_fill_commission,advanced_trade_fill_order_side,network_hash,network_status"
# A clean but hacky way to keep the stream intact while testing headers
local _header_network="network_transaction_fee_amount,network_transaction_fee_currency,network_transaction_amount_amount,network_transaction_amount_currency,network_hash,network_status"
if lib_preprocess::test_header "$_header_network"; then _header_network=""; fi
local _header_to="to_currency,to_address_info_address"
if lib_preprocess::test_header "$_header_to"; then _header_to=""; fi
local _header_advanced="advanced_trade_fill_fill_price,advanced_trade_fill_product_id,advanced_trade_fill_order_id,advanced_trade_fill_commission,advanced_trade_fill_order_side"
if lib_preprocess::test_header "$_header_advanced"; then _header_advanced=""; fi
# NOTE: csvjoin BUG(?): csvjoin *MUST* use -I or else satoshis like 0.00000021 will turn into 21000000 BTC!
csvjoin -I --snifflimit 0 "$global_in_path" <(fix_header "$global_in_path") \
| if [ ! -z "$_header_network" ]; then add_to_header - "$_header_network"; else cat; fi \
| if [ ! -z "$_header_to" ]; then add_to_header - "$_header_to"; else cat; fi \
| if [ ! -z "$_header_advanced" ]; then add_to_header - "$_header_advanced"; else cat; fi \
| xsv select "$_header" \
| sed -e "s:^:${_account_id},:g" -e "1 s:^${_account_id},:account_id,:g" \
| __parse >"$global_out_path"
# TODO: complete hack because hledger-flow won't ignore deleted 1-in and
# 2-preprocessed files after script has been called. Is this still needed?
#
# NOTES:
# - Must work/be noted in rules file.
# - Parsing correct years into correct directories can create empty files
# in 2-preprocessed because the innapropriate data is sent to 1-in to
# begin with. This can possibly be handled in the fetching API but it's
# easier to do here for now.
if [[ ! -s "$global_out_path" ]]; then
echo "SKIP,SKIP,SKIP,000.00000000,SKIP,00000.00,SKIP,SKIP,0000-00-00 00:00:00,0000-00-00 00:00:00,SKIP,SKIP,SKIP,SKIP,SKIP,SKIP,0.00000000,SKIP,000.00000000,SKIP,00.00000000,0.00000000,0.00000000" \
>"$global_out_path"
fi
universal_header+=",${_column_name}"
lib_preprocess::test_header "$_column_name" \
&& selected_header+=",${_column_name}"
}
# Fixes column data that has comma (which will ruin output order)
function fix_header()
{
# NOTE: output will be details_header2, not details_header
xsv select "details_header" "$1" \
| sed -e 's:,::g' -e 's:"::g'
}
# Adds column to CSV
function add_to_header()
function join_to_header()
{
# WARNING: csvjoin *MUST* use -I or else satoshis like 0.00000021 will turn into 21000000 BTC!
# TODO: `xsv join` is significantly faster but does not produce same results
csvjoin -I --snifflimit 0 "$1" <(echo "$2")
}
# Primary parse impl after previous preparation
#
# Internal implementation for forming "universal" CSV stream
#
function __parse()
{
gawk -v global_year="$global_year" -v global_subaccount="$global_subaccount" \
'{
if (NR<2 || $10 !~ global_year)
if (NR<2 || $9 !~ global_year)
next
# Coinbase is not uniform in its - sign, so remove it and deal re-add in rules
# Remove `-` for calculations (must re-add in rules)
direction=($5 ~ /^-/ ? "OUT" : "IN")
sub(/^-/, "", $5)
sub(/^-/, "", $7)
# Cleanup the time
sub(/T/, " ", $10); sub(/T/, " ", $11)
sub(/Z/, "", $10); sub(/Z/, "", $11)
sub(/\+.*/, "", $10); sub(/\+.*/, "", $11)
# Cleanup timestamp
sub(/T/, " ", $9);
sub(/Z/, "", $9);
sub(/\+.*/, "", $9);
# Print
printf $1 OFS # account id
# Print for rules consumption
printf $1 OFS # account_id (prepended column)
printf $2 OFS # id (coinbase_id)
printf $3 OFS # type
printf $4 OFS # status
@@ -139,44 +94,92 @@ function __parse()
printf $6 OFS # amount_currency
printf $7 OFS # native_amount_amount
printf $8 OFS # native_amount_currency
printf $9 OFS # description
printf $10 OFS # created_at
printf $11 OFS # updated_at
printf $12 OFS # resource
printf $13 OFS # resource_path
printf $14 OFS # instant_exchange
printf $15 OFS # details_title
printf $16 OFS # details_subtitle
printf $17 OFS # details_header2
printf("%.8f", $18); printf OFS # network_transaction_fee_amount
printf $19 OFS # network_transaction_fee_currency
printf("%.8f", $20); printf OFS # network_transaction_amount_amount
printf $21 OFS # network_transaction_amount_currency
printf $22 OFS # to_currency
printf $23 OFS # to_address_info_address
printf $9 OFS # created_at
printf $10 OFS # resource
printf $11 OFS # resource_path
printf $12 OFS # description
printf $13 OFS # network_status
printf $14 OFS # network_network_name
printf $15 OFS # network_hash (txid)
printf("%.8f", $16); printf OFS # network_transaction_fee_amount
printf $17 OFS # network_transaction_fee_currency
printf $18 OFS # to_resource
printf $19 OFS # to_address
printf $20 OFS # to_email
printf $21 OFS # from_resource
printf $22 OFS # from_resource_path
printf $23 OFS # from_id
printf $24 OFS # from_name
printf $25 OFS # cancelable
printf $26 OFS # idem
printf("%.8f", $27); printf OFS # buy_total_amount
printf $28 OFS # buy_total_currency
printf("%.8f", $29); printf OFS # buy_subtotal_amount
printf $30 OFS # buy_subtotal_currency
printf("%.8f", $31); printf OFS # buy_fee_amount
printf $32 OFS # buy_fee_currency
printf $33 OFS # buy_id
printf $34 OFS # buy_payment_method_name
printf("%.8f", $35); printf OFS # sell_total_amount
printf $36 OFS # sell_total_currency
printf("%.8f", $37); printf OFS # sell_subtotal_amount
printf $38 OFS # sell_subtotal_currency
printf("%.8f", $39); printf OFS # sell_fee_amount
printf $40 OFS # sell_fee_currency
printf $41 OFS # sell_id
printf $42 OFS # sell_payment_method_name
printf("%.8f", $43); printf OFS # trade_fee_amount
printf $44 OFS # trade_fee_currency
printf $45 OFS # trade_id
printf $46 OFS # trade_payment_method_name
printf("%.8f", $47); printf OFS # advanced_trade_fill_fill_price
printf $48 OFS # advanced_trade_fill_product_id
printf $49 OFS # advanced_trade_fill_order_id
printf("%.8f", $50); printf OFS # advanced_trade_fill_commission
printf $51 OFS # advanced_trade_fill_order_side
#
# Add new columns to calculate fees against native currency price
# NOTE: 0 because CB does not accurately display fiat amount
# if satoshi is small (if less than $0.00)
if ($7 > 0) {printf("%.8f", $7 / $5)} printf OFS # native_amount_price
if ($7 > 0) {printf("%.8f", ($7 / $5) * $18)} printf OFS # native_network_transaction_fee_amount
if ($7 > 0) {printf("%.8f", ($5 * 0.01) / $7)} printf OFS # native_conversion_fee_amount
# NOTE: this is a guestimation of actual conversion fees because Coinbase
# never specifies exact amount.
#
# - 0 is used because Coinbase does not accurately display fiat amount,
# if satoshi is small (valued less than $0.00)
#
# CB Advanced Trading
printf("%.8f", $24); printf OFS # advanced_trade_fill_fill_price
printf $25 OFS # advanced_trade_fill_product_id
printf $26 OFS # advanced_trade_fill_order_id
printf("%.8f", $27); printf OFS # advanced_trade_fill_commission
printf $28 OFS # advanced_trade_fill_order_side
if ($7 > 0) {printf("%.8f", $7 / $5)}; printf OFS # native_amount_price
if ($7 > 0) {printf("%.8f", ($7 / $5) * $16)}; printf OFS # native_network_transaction_fee_amount
# USER ADDED: small_satoshi_multiplier (for sat trades less than a penny,
# since `@@ 0` is not needed (or any empty tax columns))
if ($7 == 0) {printf("%.8f", $24 * $5)}; printf OFS
#
# Add new column to calculate the difference between amount and network fee,
# i.e., amount_amount - network_transaction_fee_amount
#
printf $29 OFS # network_hash (txid, if applicable)
printf $30 OFS # network_status (if applicable)
if ($16 > 0) {printf("%.8f", $5 - $16)}; printf OFS # network_transaction_amount_amount
#
# Advanced Trade: add new column for calculating real value amount
#
# - Works with any pairing (not only fiat)
#
# Multiply amount_amount by advanced_trade_fill_fill_price
real_value_amount = $5 * $47
printf("%.8f", real_value_amount); printf OFS # advanced_trade_fill_real_value_amount
#
# Advanced Trade: add new columns for trade pairing
#
split($48, pair, "-"); # Pair exists as advanced_trade_fill_product_id
printf pair[1] OFS # advanced_trade_fill_pair_lhs (left-hand side of the pair)
printf pair[2] OFS # advanced_trade_fill_pair_rhs (right-hand side of the pair)
#
# Advanced Trade: add column for cost-basis (comment2)
#
# NOTE: sale/proceeds (real_value_amount) will have the fee removed by default
cost_basis=(direction ~ /^IN$/ ? real_value_amount + $50 : real_value_amount)
printf("%.8f", cost_basis); printf OFS # advanced_trade_fill_cost_basis_amount
printf direction OFS
printf global_subaccount
@@ -186,6 +189,127 @@ function __parse()
}' FS=, OFS=,
}
#
# Notes regarding the parsing process:
#
# 0. Since given headers may be variable, it's not possible to assert a single header.
#
# - `fetch`-provided input CSVs are a beast. Headers are inconsistent and complex.
#
# 1. Create a "selected" header which describes the given header (the header given by `fetch`).
#
# 2. Select all of the columns in said "selected" header.
#
# 3. Join the "universal" header (which contains the entirety of all possible entries)
#
# - Input streams with duplicate header columns will be joined with a new column name (e.g., `col2` instead of `col`).
#
# 4. Do a final select on input stream using the "universal" header.
#
# - This will only select the correct columns while also removing duplicates (e.g., said `col2`)
#
# 5. Have a drink while contemplating life.
#
function parse()
{
#
# Create account ID as first column, followed by TXID
#
# WARNING: Existing filename *MUST* have the following format (where X is account ID):
#
# XXXXXXXXXXXXX-XXXX-XXXX-XXXXXXXXXXXX_transactions.csv
#
# TODO: fetch/preprocess: Coinbase API provides `account` type `vault`.
# This can be prepended to an account file so it's known that it's a vault
# (versus a wallet, which can also be prepended).
local -r _account_id="${global_in_filename:0:-17}"
#
# Reconstruct into "universal" header
#
# Required schema, per documentation, that appears to exist amongst all entries
declare -g universal_header="id,type,status,amount_amount,amount_currency,native_amount_amount,native_amount_currency,created_at,resource,resource_path"
# Selected header will always have minimum requirements
declare -g selected_header+="$universal_header"
# `description`
add_to_headers "description"
# `network`
add_to_headers "network_status"
add_to_headers "network_network_name"
add_to_headers "network_hash"
add_to_headers "network_transaction_fee_amount"
add_to_headers "network_transaction_fee_currency"
# `to`
add_to_headers "to_resource"
add_to_headers "to_address"
add_to_headers "to_email"
# `from`
add_to_headers "from_resource"
add_to_headers "from_resource_path"
add_to_headers "from_id"
add_to_headers "from_name"
# Remaining SEND related
add_to_headers "cancelable"
add_to_headers "idem"
# `buy`
add_to_headers "buy_total_amount"
add_to_headers "buy_total_currency"
add_to_headers "buy_subtotal_amount"
add_to_headers "buy_subtotal_currency"
add_to_headers "buy_fee_amount"
add_to_headers "buy_fee_currency"
add_to_headers "buy_id"
add_to_headers "buy_payment_method_name"
# `sell`
add_to_headers "sell_total_amount"
add_to_headers "sell_total_currency"
add_to_headers "sell_subtotal_amount"
add_to_headers "sell_subtotal_currency"
add_to_headers "sell_fee_amount"
add_to_headers "sell_fee_currency"
add_to_headers "sell_id"
add_to_headers "sell_payment_method_name"
# `trade`
add_to_headers "trade_fee_amount"
add_to_headers "trade_fee_currency"
add_to_headers "trade_id"
add_to_headers "trade_payment_method_name"
# `advanced_trade_fill`
# Note: Coinbase appears to always present this in an ordered set
add_to_headers "advanced_trade_fill_fill_price"
add_to_headers "advanced_trade_fill_product_id"
add_to_headers "advanced_trade_fill_order_id"
add_to_headers "advanced_trade_fill_commission"
add_to_headers "advanced_trade_fill_order_side"
#
# Finalize the "universal" format and parse
#
# NOTE: prepends account_id to header (this will now be the first column)
xsv select "$selected_header" "$global_in_path" \
| join_to_header - "$universal_header" \
| xsv select "$universal_header" \
| sed -e "s:^:${_account_id},:g" -e "1 s:^${_account_id},:account_id,:g" \
| __parse >"$global_out_path"
}
function main()
{
parse