#!/usr/bin/env bash # docker-finance | modern accounting for the power-user # # Copyright (C) 2021-2025 Aaron Fiore (Founder, Evergreen Crypto LLC) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # "Libraries" # [ -z "$DOCKER_FINANCE_CONTAINER_REPO" ] && exit 1 source "${DOCKER_FINANCE_CONTAINER_REPO}/src/finance/lib/internal/lib_utils.bash" || exit 1 # # Facade # function lib_taxes::taxes() { lib_taxes::__parse_args "$@" lib_taxes::__taxes lib_utils::catch $? } # # Implementation # # # NOTE: # # - This library requires double-entry for all accounts as generated by hledger # # - WARNING: hledger rules must include following: # # 1. Entry description *MUST* only be the date / timestamp (Y-m-d h:m:s z) # 2. The tag 'taxed_as' must be set to supported tags in every needed entry # 3. Expense accounts for fees must contain the string "fee" in a subaccount # # - This implementation currently only supports the website Bitcoin.tax # function lib_taxes::__parse_args() { [ -z "$global_usage" ] && lib_utils::die_fatal [ -z "$global_arg_delim_2" ] && lib_utils::die_fatal [ -z "$global_arg_delim_3" ] && lib_utils::die_fatal local -r _usage=" \e[32mDescription:\e[0m Generate tax reports from all tagged accounts \e[32mUsage:\e[0m $ $global_usage <<[all${global_arg_delim_2}]> | <[<[tag${global_arg_delim_2}] | [account${global_arg_delim_2}]]> | [year${global_arg_delim_2}] | [write${global_arg_delim_2}] \e[32mArguments:\e[0m All options (operation type): all${global_arg_delim_2} Tag (taxable event type): tag${global_arg_delim_2} Account (journal account): account${global_arg_delim_2} Tax year: year${global_arg_delim_2} Write reports to file (don't print to terminal) write${global_arg_delim_2} \e[32mExamples:\e[0m \e[37;2m# Write all operation types for default (current) year\e[0m $ $global_usage all${global_arg_delim_2}all \e[37;2m# Write all operation types for all available years\e[0m $ $global_usage all${global_arg_delim_2}all year${global_arg_delim_2}all \e[37;2m# Print tagged income on all accounts for default (current) year\e[0m $ $global_usage all${global_arg_delim_2}account tag${global_arg_delim_2}income write${global_arg_delim_2}off \e[37;2m# Write only income and spends for Gemini and Coinbase account for year 2022\e[0m $ $global_usage tag${global_arg_delim_2}income${global_arg_delim_3}spends account${global_arg_delim_2}gemini${global_arg_delim_3}coinbase year${global_arg_delim_2}2022 \e[37;2m# *Print* (don't write) all taxable tags for Nexo account in all available years\e[0m $ $global_usage all${global_arg_delim_2}tag account${global_arg_delim_2}nexo year${global_arg_delim_2}all write${global_arg_delim_2}off " # # Ensure supported arguments # [ $# -eq 0 ] && lib_utils::die_usage "$_usage" for _arg in "$@"; do [[ ! "$_arg" =~ ^all${global_arg_delim_2} ]] \ && [[ ! "$_arg" =~ ^tag[s]?${global_arg_delim_2} ]] \ && [[ ! "$_arg" =~ ^account[s]?${global_arg_delim_2} ]] \ && [[ ! "$_arg" =~ ^write${global_arg_delim_2} ]] \ && [[ ! "$_arg" =~ ^year[s]?${global_arg_delim_2} ]] \ && lib_utils::die_usage "$_usage" done # # Parse arguments before testing # # Parse key for value for _arg in "$@"; do local _key="${_arg%${global_arg_delim_2}*}" local _len="$((${#_key} + 1))" if [[ "$_key" =~ ^all$ ]]; then local _arg_all="${_arg:${_len}}" [ -z "$_arg_all" ] && lib_utils::die_usage "$_usage" fi if [[ "$_key" =~ ^tag[s]?$ ]]; then local _arg_tag="${_arg:${_len}}" [ -z "$_arg_tag" ] && lib_utils::die_usage "$_usage" fi if [[ "$_key" =~ ^account[s]?$ ]]; then local _arg_account="${_arg:${_len}}" [ -z "$_arg_account" ] && lib_utils::die_usage "$_usage" fi if [[ "$_key" =~ ^write[s]?$ ]]; then local _arg_write="${_arg:${_len}}" [ -z "$_arg_write" ] && lib_utils::die_usage "$_usage" fi if [[ "$_key" =~ ^year[s]?$ ]]; then local _arg_year="${_arg:${_len}}" [ -z "$_arg_year" ] && lib_utils::die_usage "$_usage" fi done # # Test for valid ordering/functionality of argument values # # Arg: all if [ ! -z "$_arg_all" ]; then # Can't use with every other activated if [[ ! -z "$_arg_tag" && ! -z "$_arg_account" ]]; then lib_utils::die_usage "$_usage" fi fi # Arg: tag if [ ! -z "$_arg_tag" ]; then [[ ! -z "$_arg_all" && ! -z "$_arg_account" ]] \ || [[ -z "$_arg_all" && -z "$_arg_account" ]] \ && lib_utils::die_usage "$_usage" fi # Arg: account if [ ! -z "$_arg_account" ]; then [[ ! -z "$_arg_all" && ! -z "$_arg_tag" ]] \ || [[ -z "$_arg_all" && -z "$_arg_tag" ]] \ && lib_utils::die_usage "$_usage" fi # Arg: write if [ ! -z "$_arg_write" ]; then # Need a valid arg if [[ -z "$_arg_all" && -z "$_arg_tag" && -z "$_arg_account" ]]; then lib_utils::die_usage "$_usage" fi fi # Arg: year if [ ! -z "$_arg_year" ]; then # Need a valid arg if [[ -z "$_arg_all" && -z "$_arg_tag" && -z "$_arg_account" ]]; then lib_utils::die_usage "$_usage" fi fi # # Test argument values, set globals # IFS="$global_arg_delim_3" # Arg: all if [ ! -z "$_arg_all" ]; then # If all= {tag,account} or {account,tag} then set to all=all [[ "${#_arg_all[@]}" -eq 1 && "${_arg_all[*]}" =~ tag[s]? && "${_arg_all[*]}" =~ account[s]? ]] \ && _arg_all="all" # Read args from all read -ra _read <<<"$_arg_all" for _arg in "${_read[@]}"; do # Support values [[ ! "$_arg" =~ ^all$|^tag[s]?$|^account[s]?$ ]] \ && lib_utils::die_usage "$_usage" # If all=all then no need for all={tag,account} and tag= or account= [[ "$_arg" == "all" && (! -z "$_arg_tag" || ! -z "$_arg_account") ]] \ || [[ "$_arg" == "all" && "${#_read[@]}" -gt 1 ]] \ && lib_utils::die_usage "$_usage" # If all=tag then no need need for tag= and if all=account then no need for account= [[ "$_arg" =~ ^tag[s]?$ && ! -z "$_arg_tag" ]] \ || [[ "$_arg" =~ ^account[s]?$ && ! -z "$_arg_account" ]] \ && lib_utils::die_usage "$_usage" # If all=tag then need account= or if all=account then need tag= if [[ "$_arg" != "all" ]]; then [[ "${#_read[@]}" -lt 2 && "$_arg" =~ ^tag[s]?$ && -z "$_arg_account" ]] \ || [[ "${#_read[@]}" -lt 2 && "$_arg" =~ ^account[s]?$ && -z "$_arg_tag" ]] \ && lib_utils::die_usage "$_usage" fi done global_arg_all=("${_read[@]}") fi # Arg: tag if [ ! -z "$_arg_tag" ]; then _args=() # Reconstruct tag(s) for uniform plurality read -ra _read <<<"$_arg_tag" for _arg in "${_read[@]}"; do [[ ! "$_arg" =~ ^income$|^spend[s]?$|^trade[s]?$ ]] \ && lib_utils::die_usage "$_usage" if [[ "$_arg" =~ ^spend$ ]]; then _args+=("spends") elif [[ "$_arg" =~ ^trade$ ]]; then _args+=("trades") else _args+=("$_arg") fi done declare -gr global_arg_tag=("${_args[@]}") elif [[ "${global_arg_all[*]}" =~ (all|tag) ]]; then declare -gr global_arg_tag=("income" "spends" "trades") fi # Arg: account if [ ! -z "$_arg_account" ]; then read -ra _read <<<"$_arg_account" declare -gr global_arg_account=("${_read[@]}") fi # Arg: write if [ ! -z "$_arg_write" ]; then if [[ ! "$_arg_write" =~ ^on$|^off$|^true$|^false$ ]]; then lib_utils::die_usage "$_usage" fi declare -gr global_arg_write="$_arg_write" else declare -gr global_arg_write="on" # Set default fi # Arg: year # TODO: implement range if [ ! -z "$_arg_year" ]; then # TODO: 20th century support if [[ ! "$_arg_year" =~ ^20[0-9][0-9]$ && "$_arg_year" != "all" ]]; then lib_utils::die_usage "$_usage" fi global_arg_year="$_arg_year" declare -gr global_arg_year if [ "$global_arg_year" == "all" ]; then # Implementation will treat an empty as all years unset global_arg_year fi else global_arg_year="$(date +%Y)" # Set default declare -gr global_arg_year fi } function lib_taxes::__taxes() { # Supported tags (locally "global") local -r _income_tags=("INCOME" "GIFTIN" "MINING" "BORROW") local -r _spends_tags=("SPEND" "DONATION" "GIFT" "REPAY") local -r _trades_tags=("BUY" "SELL" "SWAP" "MATCH" "REBATE" "RAW_TRADE" "PARTIAL_TRADE") local _print_year="$global_arg_year" [ -z "$_print_year" ] && _print_year="all" [ -z "$global_parent_profile" ] && lib_utils::die_fatal [ -z "$global_child_profile" ] && lib_utils::die_fatal lib_utils::print_custom "\n" lib_utils::print_info \ "Generating tax reports in year '${_print_year}' for '${global_parent_profile}/${global_child_profile}' ..." lib_utils::print_custom "\n" # Print full (transparent) output if [[ "$global_arg_write" =~ ^off$|^false$ ]]; then lib_taxes::__taxes_print lib_utils::catch $? return $? fi # Write all types of output (including full) time lib_taxes::__taxes_write lib_utils::catch $? } # # Print full (transparent) report # function lib_taxes::__taxes_print() { # # Despite global args, allow optional args (works with write impl) # # Tax year local _arg_year="$1" [ -z "$_arg_year" ] && _arg_year="$global_arg_year" # Tag type(s) local _arg_tag=("${@:2}") [ -z "${_arg_tag[*]}" ] && _arg_tag=("${global_arg_tag[@]}") # # Print given tag(s) # # Set base hledger command [ ! -z "$_arg_year" ] && local _date="date:${_arg_year}" local _base_hledger=("${global_hledger_cmd[@]}" "print" "${global_arg_account[@]}" "$_date" "-O" "csv") # Cycle through tags for _arg in "${_arg_tag[@]}"; do case "$_arg" in income) # Date (date and time as YYYY-MM-DD HH:mm:ss Z) # Action (INCOME, GIFTIN, MINING or BORROW) # Account (account or wallet name, e.g. Coinbase or Blockchain) # Symbol (BTC, ETH, LTC, etc) # Volume (number of coins received) # Currency (optional, specify alternative to your default currency, such as USD, GBP or EUR) # Total (Fair price or value in Currency or your home currency, or blank for market value # Memo (optional, name of sender or item sold) # For example, # Date,Action,Account,Symbol,Volume # 2020-01-01 13:00:00 -0800,INCOME,"Blockchain Wallet",BTC,1 local _tags=("${_income_tags[@]}") local _is_income=true ;; spend | spends) # Date (date and time as YYYY-MM-DD HH:mm:ss Z) # Action (SPEND, DONATION, GIFT or REPAY) # Account (name of account or wallet, e.g. Coinbase or Blockchain) # Symbol (BTC, LTC, ETH, etc) # Volume (number of coins spent) # Currency (optional, specify alternative to your default currency, such as USD, GBP or EUR) # Total (Fair price or cost in Currency or your home currency, or blank for market value # Memo (optional, name of recipient or item purchased) # For example, # Date,Action,Account,Symbol,Volume,Total,Currency # 2020-01-01 13:00:00 -0800,SPEND,"Blockchain Wallet",BTC,1, local _tags=("${_spends_tags[@]}") local _is_spends=true ;; trade | trades) # Date (date and time as YYYY-MM-DD HH:mm:ss Z) # Action (BUY, SELL or SWAP) # Account (override the exchange or wallet name, e.g. Coinbase) # Symbol (BTC, LTC, DASH, etc) # Volume (number of coins traded) # Currency (specify currency such as USD, GBP, EUR or coins, BTC or LTC) # Total (you can use the total Currency amount or price per coin) # Price (price per coin in Currency or blank for lookup) # FeeCurrency (currency of fee if different than Currency) # Fee (any additional costs of the trade) # For example, # Date,Action,Account,Symbol,Volume,Price,Currency,Fee # 2020-01-01 13:00:00 -0800,BUY,Coinbase,BTC,1500,USD,5.50 local _tags=("${_trades_tags[@]}") local _is_trades=true ;; *) lib_utils::die_fatal "Unsupported tag '$_arg'" ;; esac # Cycle through supported tags for _tag in "${_tags[@]}"; do # Add tag to base command local _hledger=("${_base_hledger[@]}") _hledger+=("tag:taxed_as=${_tag}*$") # # The default income/spend/trading formatter # # NOTE: # - Crypto rebates are treated as opening positions (for cost-basis) # since it's technically *not* income (such as card cashback rebates) # # - "RAW_TRADE" has no need for a parser/formatter since the comment # contains all of the trade but # * *MUST* contain tags found within trades tags (BUY/SELL/FEE) # in order to be printed and not skipped. # * TODO: HACK: *MUST* have cost-basis calculated here (for #51) # TODO: AAVE/Compound income formatter (may be replaced by contract query) if [[ "$_tag" != "MATCH" && "$_tag" != "PARTIAL_TRADE" ]]; then "${_hledger[@]}" \ | xsv select '"posting-comment"' \ | tail -n +2 \ | sed -e 's:"::g' -e '/^$/d' \ | gawk -v tag="$_tag" -v is_trades="$_is_trades" \ 'BEGIN { FS=OFS="," str_one="Date" OFS "Action" OFS "Account" OFS "Symbol" OFS "Volume" OFS "Currency" OFS "Total" str_two="FeeCurrency" OFS "Fee" has_fee = (is_trades ? 1 : 0) printf (has_fee ? str_one OFS str_two : str_one OFS "Memo") printf "\n" } { if (!NR) { exit } Date = $1 Action = $2 Account = $3 Symbol = $4 Volume = $5 Currency = $6 Total = $7 Memo = (has_fee ? "" : $8) if (has_fee) { FeeCurrency = (has_fee && $9 != "" ? $8 : "") Fee = (FeeCurrency && $9 != 0 ? $9 : "") } # Total is a literal 0. Get market value from bitcoin.tax if (Total == 0) {Total=""} # Create "correct" symbols if (Symbol == "$") {Symbol="USD"} if (Currency == "" || Currency == "$") {Currency="USD"} if (FeeCurrency == "$") {FeeCurrency="USD"} # Remove negative values sub(/-/, "", Volume); sub(/-/, "", Total) if (Fee) {sub(/-/, "", Fee)} # Remove trailing zeros after decimals if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)} if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)} if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)} # TODO: HACK: cost-basis calculated here (instead of preprocess) for #51 if (tag == "RAW_TRADE") { printf Date OFS Action OFS Account OFS switch (Action) { case "SELL": if (FeeCurrency == Symbol) { if (Fee ~ /\./) { split(Fee, cost_basis, ".") rhs=length(cost_basis[2]) CostBasis=sprintf("%." rhs "f", Volume - Fee) } else { CostBasis=Volume - Fee } } else { CostBasis=Volume } printf Currency OFS Total OFS Symbol OFS CostBasis OFS break case "BUY": if (FeeCurrency == Currency) { if (Fee ~ /\./) { split(Fee, cost_basis, ".") rhs=length(cost_basis[2]) CostBasis=sprintf("%." rhs "f", Total + Fee) } else { CostBasis=Total + Fee } } else { CostBasis=Total } printf Symbol OFS Volume OFS Currency OFS CostBasis OFS break default: printf "FATAL: unsupported Action: " Action print $0 exit } printf OFS # FeeCurrency handled below # Fee handled below printf "\n" } # TODO: HACK: print SPEND line for non-fiat fee (see #51) # NOTE: cost-basis *MUST* be calculated above or within preprocess if (is_trades && has_fee && FeeCurrency != "USD" && Fee) { printf Date OFS "SPEND" OFS Account OFS printf FeeCurrency OFS Fee OFS "USD" printf "\n"; } # - Drop USD-only spends (such as with Coinbase Card) because they are not needed. # - Do not print empty USD entries or empty symbol (Algorand) entries. # - Do not print testnet symbols. # TODO: regexp optimize # TODO: HACK (isolating from RAW_TRADE): see #51 (and related lib_taxes work-around) if (tag != "RAW_TRADE" \ && (Symbol != "USD" && Symbol != Currency) \ && Volume != 0 && Volume != "" \ && Symbol != "BTCt" && Symbol != "DOGEt" && Symbol != "tLTC" && Symbol != "LTCTEST") { printf Date OFS printf Action OFS printf Account OFS printf Symbol OFS printf Volume OFS printf Currency OFS printf Total OFS if (has_fee) { # TODO: HACK: see #51 (and related lib_taxes work-around) # NOTE: cost-basis *MUST* be calculated within preprocess if (is_trades && Fee && FeeCurrency != "USD") { printf "" OFS "" } else { printf FeeCurrency OFS Fee } } else { printf Memo } printf "\n" } }' fi # # The "MATCH" trading (triple/quadruple) formatter # # NOTE: # - Expects a minimum txid (or trade_id) connected triplet grouping of # BUY/SELL/FEE or SELL/BUY/FEE # # - WARNING: income BORROW / spend REPAY can be treated as a MATCH and # is not actually a trade (but the FEE is needed, which is a MATCH and # is the only connective tissue is the txid within all tags). # So, if BORROW/REPAY is given a MATCH, a caller's patching function # *MUST* parse out the respective tags and move to income/spends. if [[ "$_tag" == "MATCH" ]]; then "${_hledger[@]}" \ | xsv select '"posting-comment"' \ | tail -n +2 \ | sed -e 's:"::g' -e '/^$/d' \ | sort -u \ | gawk \ 'BEGIN { FS=OFS="," } # Remove redundant txid before bringing to one line $1 == p1 { prev = prev OFS $3 OFS $5 OFS $6; next } { if (NR>1) print prev prev = $0 p1 = $1 } END { print prev }' \ | gawk -M -v PREC=100 \ '{ # NOTE: if previous only produces OFS, this will exit if (!$NF) { exit } # If more than triplet form, support quadruple form # TODO: support more than 4 if necessary if ($13 == $10) { # BUY (should be default provided by hledger rules) printf $1 OFS $2 OFS $3 OFS $4 OFS $5 OFS $6 OFS $7 OFS printf $8 OFS $9 OFS $10 OFS $11 OFS printf("%.18f", $12+$15) printf "\n" } else if ($13 == $3) { # SELL printf "FATAL: not implemented" printf "\n" # TODO: kill } else { # Default (triplets) print } }' FS=, OFS=, \ | gawk -v tag="$_tag" \ 'BEGIN { FS=OFS="," printf "Date" OFS printf "Action" OFS printf "Account" OFS printf "Symbol" OFS printf "Volume" OFS printf "Currency" OFS printf "Total" OFS printf "FeeCurrency" OFS printf "Fee" printf "\n" } { # Assign output vars Date = $2 Action = $3 if (Action == "SELL") { # Hack in BUY by default (see below) tag = "BUY"; Action = tag } Account = $4 Symbol = $5 Volume = $6 Currency = $11 Total = $12 FeeCurrency = $8 Fee = $9 Memo = "" # Create "correct" symbols if (Symbol == "$") {Symbol="USD"} if (Currency == "" || Currency == "$") {Currency="USD"} if (FeeCurrency == "$") {FeeCurrency="USD"} # Remove negative values sub(/-/, "", Volume); sub(/-/, "", Total); sub(/-/, "", Fee) # Remove trailing zeros after decimals if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)} if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)} if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)} # Reset Action # # NOTE: # - Flips BUY/SELL direction for pairs that are known to be # commonly sold in one direction but rules have dictated # another (flips fiat or aToken/stkToken/cTokens etc.) # # - Rationale: because bitcoin.tax produces a bar graph of buys # and sells and it makes sense for the graph to look accurate # (obvoiusly does not work for BTC/ETH pairings etc., but it # should look more "sane" for fiat currencies) # # NOTE: has no affect on their end calculations # if (Symbol == "USD" \ || substr(Currency,1,1) ~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/) { if (Action == "BUY") {Action = "SELL"} } # # Actions # switch (Action) { case "SELL": # TODO: HACK: cost-basis calculated here (instead of preprocess) for #51 if (FeeCurrency == Symbol) { if (Fee ~ /\./) { split(Fee, cost_basis, ".") rhs=length(cost_basis[2]) CostBasis=sprintf("%." rhs "f", Volume - Fee) } else { CostBasis=Volume - Fee } } else { CostBasis=Volume } printf Date OFS Action OFS Account OFS printf Currency OFS Total OFS Symbol OFS CostBasis OFS printf OFS # FeeCurrency/Fee handled below # Fee handled below printf "\n" # TODO: HACK: print SPEND line for non-fiat fee (see #51) # NOTE: cost-basis *MUST* be calculated above or within preprocess if (FeeCurrency != "USD") { printf Date OFS "SPEND" OFS Account OFS printf FeeCurrency OFS Fee OFS "USD" printf "\n"; } break case "BUY": # TODO: HACK: cost-basis calculated here (instead of preprocess) for #51 if (FeeCurrency == Currency) { if (Fee ~ /\./) { split(Fee, cost_basis, ".") rhs=length(cost_basis[2]) CostBasis=sprintf("%." rhs "f", Total + Fee) } else { CostBasis=Total + Fee } } else { CostBasis=Total } printf Date OFS Action OFS Account OFS printf Symbol OFS Volume OFS Currency OFS CostBasis OFS printf OFS # FeeCurrency handled below # Fee handled below printf "\n" # TODO: HACK: print SPEND line for non-fiat fee (see #51) # NOTE: cost-basis *MUST* be calculated above or within preprocess if (FeeCurrency != "USD") { printf Date OFS "SPEND" OFS Account OFS printf FeeCurrency OFS Fee OFS "USD" printf "\n"; } break case "BORROW": # Reset tail end vars since this is an "INCOME" tag FeeCurrency = $14 Fee = $15 Memo = $17 # Do not print aToken/cToken/stkToken sym=substr(Symbol,1,1) if (sym !~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/) { printf Date OFS Action OFS Account OFS printf Symbol OFS Volume OFS "USD" OFS OFS printf "\n"; } # Create new SPEND record based on attached FEE printf Date OFS "SPEND" OFS Account OFS printf FeeCurrency OFS Fee OFS "USD" Memo printf "\n"; break case "FEE": # This FEE is *not* related to a trade # Convert to SPEND printf Date OFS "SPEND" OFS Account OFS printf Symbol OFS Volume OFS "USD" printf "\n"; # TODO: why is REPAY not like BORROW... if ($7 == "REPAY") { # Do not print aToken/cToken/stkToken sym=substr($8,1,1) if (sym !~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/) { printf Date OFS $7 OFS Account OFS printf $8 OFS $9 OFS "USD" OFS OFS printf "\n"; } } break default: printf "FATAL: unsupported Action: " Action # TODO: improve print $0 exit } }' fi # # The "PARTIAL_TRADE" formatter (specifically, unwrapping tokens) # # NOTE: # - Only attaches FEE to SELL/BUY (2 lines total) for cost-basis # (the contract address will be on a per-symbol/per-contract basis) # # - Any fees that are marked as PARTIAL_TRADE that do not have an amount # counterpart (not the fee) will then be marked as a SPEND if [[ "$_tag" == "PARTIAL_TRADE" ]]; then "${_hledger[@]}" \ | xsv select '"posting-comment"' \ | tail -n +2 \ | sed -e 's:"::g' -e '/^$/d' \ | sort -u \ | gawk \ '{ if (!NR) { exit } txid = $1; FeeCurrency = $5; Fee = $6 a[$1]=a[$1] ? a[$1] OFS txid OFS FeeCurrency OFS Fee : $0 } END { for(i in a) {print a[i]} }' FS=, OFS=, \ | gawk -v tag="$_tag" \ 'BEGIN { FS=OFS="," printf "Date" OFS printf "Action" OFS printf "Account" OFS printf "Symbol" OFS printf "Volume" OFS printf "Currency" OFS printf "Total" OFS printf "FeeCurrency" OFS printf "Fee" printf "\n" } { txid_first = $1 txid_last = $9 if ((txid_first != txid_last) && (NF == 11)) { printf "FATAL: not matching txid" exit } # Assign output vars Date = $2 Action = $3 Account = $4 Symbol = $5 Volume = $6 Currency = $7 Total = $8 FeeCurrency = $10 Fee = $11 # Create "correct" symbols if (Symbol == "$") {Symbol="USD"} if (Currency == "" || Currency == "$") {Currency="USD"} if (FeeCurrency == "$") {FeeCurrency="USD"} # Remove negative values sub(/-/, "", Volume); sub(/-/, "", Total); sub(/-/, "", Fee) # Remove trailing zeros after decimals if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)} if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)} if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)} # TODO: HACK: cost-basis calculated here (instead of preprocess) for #51 if (FeeCurrency == Currency) { if (Action == "BUY") {CostBasis=Total + Fee} else {CostBasis=Total - Fee} } else { CostBasis=Total } printf Date OFS printf Action OFS printf Account OFS printf Symbol OFS printf Volume OFS printf Currency OFS printf CostBasis OFS printf OFS # FeeCurrency handled below # Fee handled below printf "\n"; # TODO: HACK: print SPEND line for non-fiat fee (see #51) # NOTE: cost-basis *MUST* be calculated above if (FeeCurrency != "USD") { printf Date OFS "SPEND" OFS Account OFS printf FeeCurrency OFS Fee OFS "USD" printf "\n"; } }' fi done \ | sort -r \ | gawk \ '{ # Remove duplicate header if (NR == 1) { Header=$0 } # Save non-header row(s) if ($0 != Header) { Data[i++] = $0 } } END { # Only print header if non-header row(s) exist if (length(Data)) { print Header } # Print all non-header row(s) for (i=0; i < length(Data); i++) { print Data[i] } }' FS=, OFS=, # WARNING: # - Do not run unique (sort -ru)! There are legitimate income/trade entries # that appear to be duplicates but are different at the millisecond level. # # TODO: there *could* be a UID in an extra column. A unique sort could # then be run since milliseconds are *NOT* universally supported upstream. done } # # Write generated taxes to files # function lib_taxes::__taxes_write() { # Set base hledger command [ ! -z "$global_arg_year" ] && local _date="date:${global_arg_year}" local _base_hledger=("${global_hledger_cmd[@]}" "print" "${global_arg_account[@]}" "$_date" "-O" "csv") # If tax year is 'all', get all given records and capture oldest date/year # (then write to appropriate year dirs) local _current_year _current_year="$(date +%Y)" if [ -z "$global_arg_year" ]; then lib_utils::print_normal "Capturing the year of the oldest tagged record ..." local -r _oldest_year="$("${_base_hledger[@]}" | head -n2 | xsv select \"date\" | tail -n1 | cut -d'-' -f1)" [ -z "$_oldest_year" ] && lib_utils::die_fatal "No records available to write" elif [[ "$global_arg_year" -gt "$_current_year" ]]; then # Invalid future year lib_utils::die_fatal "Given year is greater than $_current_year" else # Generate only current year _current_year="$global_arg_year" local -r _oldest_year="$_current_year" fi # Temp dir for processing local -r _taxes_tmp_dir="$(mktemp -d -p /tmp docker-finance_XXX)" lib_utils::print_normal " ─ Taxes" lib_utils::print_custom " \e[32m│\e[0m\n" # Iterate through all given years [ -z "$global_child_profile_flow" ] && lib_utils::die_fatal for ((_tax_year = ${_oldest_year}; _tax_year <= ${_current_year}; _tax_year++)); do # Final output dir local _tax_root_dir="${global_child_profile_flow}/taxes/${_tax_year}" # Subdir for individual reports for _arg_tag in "${global_arg_tag[@]}"; do local _dir="${_tax_root_dir}/${_arg_tag}" [ ! -d "$_dir" ] && mkdir -p "$_dir" done # Generate transparent (full) reports local _ext_full="report_full.csv" for _arg_tag in "${global_arg_tag[@]}"; do ( lib_utils::print_custom " \e[32m├─\e[34m\e[1;3m ${_arg_tag} (full)\e[0m\n" local _base_path="${_tax_root_dir}/${_arg_tag}/${_tax_year}_${_arg_tag}" local _out_file="${_base_path}-${_ext_full}" lib_taxes::__taxes_print "$_tax_year" "$_arg_tag" >"${_out_file}" lib_utils::catch $? if [[ ! -f "$_out_file" || ! -s "$_out_file" ]]; then lib_utils::print_warning "Nothing generated for '${_arg_tag}' (no taxable event found for the year ${global_arg_year})" fi ) & done wait # Patch transparent (full) reports # WARNING: depends on previous base path / extension format # TODO: no, not this way lib_utils::print_custom " \e[32m│\e[0m\n" lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Applying patches (full) ...\e[0m\n" lib_utils::print_custom " \e[32m│\e[0m\n" lib_taxes::__reports_patch lib_utils::catch $? # Generate specialized reports based on full reports for _arg_tag in "${global_arg_tag[@]}"; do local _base_path="${_tax_root_dir}/${_arg_tag}/${_tax_year}_${_arg_tag}" local _in_file="${_base_path}-${_ext_full}" if [ ! -f "$_in_file" ]; then lib_utils::print_warning "$_in_file does not exist, skipping" continue fi # Only perform specialized reports on files with more than a header local _count _count="$(wc -l $_in_file)" if [[ "${_count% *}" -gt 1 ]]; then # # Obfuscated # lib_utils::print_custom " \e[32m├─\e[34m\e[1;3m ${_arg_tag} (obfuscated)\e[0m\n" local _ext_obfs="report_obfs.csv" local _out_file="${_base_path}-${_ext_obfs}" lib_taxes::__reports_obfs lib_utils::catch $? # # Verify reports # lib_utils::print_custom " \e[32m│\e[0m\n" lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Verifying $_arg_tag (full/obfuscated) ...\e[0m\n" lib_utils::print_custom " \e[32m│\e[0m\n" local _full _full="$(wc -l ${_base_path}-${_ext_full})" local _obfs _obfs="$(wc -l ${_base_path}-${_ext_obfs})" # TODO: more involved verification if [[ "${_full% *}" != "${_obfs% *}" ]]; then lib_utils::die_fatal "Report verification: line count mistmatch" fi fi done done local -r _return=$? if [ $_return -eq 0 ]; then lib_utils::print_custom "\n" lib_utils::print_info "Done!" fi # Enforce cleanup [ -d "$_taxes_tmp_dir" ] && rm -fr "$_taxes_tmp_dir" return $_return } # # Reports (patch) # # TODO: patching should be done at the output level, not file level # (otherwise, income/spend entries in MATCHed trades-output are not put into their respective output) function lib_taxes::__reports_patch() { local _income="${_tax_root_dir}/income/${_tax_year}_income-${_ext_full}" local _spends="${_tax_root_dir}/spends/${_tax_year}_spends-${_ext_full}" local _trades="${_tax_root_dir}/trades/${_tax_year}_trades-${_ext_full}" # Ensure appropriate tags in respective files # # NOTE: # - Due to PARTIAL_TRADES, there may be a lone FEE SPEND straggler # - Due to MATCH, income/spend BORROW/REPAY may be in trades output # - Bitcoin.tax: # * will allow SPENDS in trades file (though, with error complaints) # * TODO: HACK: due to #51, trades with non-fiat fees will need a SPEND # line added for the disposal of said fee until upstream resolves # their importer. # # WARNING: # - Do *NOT* do a unique sort here! # Unique sort will make txs go missing (such as Algorand fees) because # transactions can include duplicate fee amounts with the same exact # timestamp (e.g., for multiple contract calls) if [[ "${global_arg_tag[*]}" =~ trades && "${global_arg_tag[*]}" =~ spends ]]; then if [ -f "$_spends" ]; then grep SPEND "$_trades" | sed 's:,*$:,,FEE:g' >>"$_spends" grep REPAY "$_trades" >>"$_spends" sort -r -o "$_spends" "$_spends" fi fi if [[ "${global_arg_tag[*]}" =~ trades && "${global_arg_tag[*]}" =~ income ]]; then if [ -f "$_income" ]; then grep BORROW "$_trades" >>"$_income" sort -r -o "$_income" "$_income" fi fi if [[ "${global_arg_tag[*]}" =~ trades ]]; then if [ -f "$_trades" ]; then sed -i '/SPEND/d' "$_trades" sed -i '/BORROW/d' "$_trades" sed -i '/REPAY/d' "$_trades" fi fi # # Coinbase patch # # NOTE: # - There are times where Coinbase will report Coinbase Pro withdrawals, # resulting in double spends in reports!!! # # TODO: why does this not work for 2022 and beyond? # Did coinbase stop this practice in 2022? # # - Coinbase records the fiat value of the FEE. That's the needed entry, so # keep the Coinbase Pro entry but tag-on the Coinbase fiat FEE amount # # - Coinbase Pro was sunsetted in late 2022 # TODO: for 2025+, subaccount will be included before comma # (must add appropriate regex if patch must ever be applied to 2025+) local _coinbase_string="(coinbase,|coinbase-pro,)" grep -E "$_coinbase_string" "$_spends" &>/dev/null # TODO: year should be properly parsed in arg parser if [[ $? -eq 0 && "$_tax_year" =~ 20(([1][2-9])|([2][0-1])) ]]; then local _tmp_dir _tmp_dir="$(mktemp -d -p $_taxes_tmp_dir patch_XXX)" local _tmp_file _tmp_file="$(mktemp -p $_tmp_dir spends_XXX)" grep -E "$_coinbase_string" "$_spends" \ | grep "FEE" \ | grep "coinbase-pro" -B1 \ | grep -v "\--" \ | gawk '{ a[$1]=a[$1] ? a[$1] OFS $(NF-2) FS $(NF-1) FS $NF : $0} END { for(i in a) {print a[i]} }' FS=, OFS=, \ | gawk '{ if (($2 != $10) && ($4 != $12) && ($5 != $13) && ($6 != $14) && ($8 != $16)) { print "FATAL: MISMATCH = " $0; exit 1 } print }' FS=, OFS=, \ | sed 's:,:\n:8; P; D' \ >"$_tmp_file" while read line; do sed -i "/${line}/d" "$_spends"; done <"$_tmp_file" sed 'N;s/\n/,/g' "$_tmp_file" \ | gawk '{ print $9 OFS $10 OFS $11 OFS $12 OFS $13 OFS $14 OFS $7 OFS $16 }' FS=, OFS=, \ >>"$_spends" \ && sort -ru -o "$_spends" "$_spends" fi # # Verify success of patches # lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Verifying patches (full) ...\e[0m\n" lib_utils::print_custom " \e[32m│\e[0m\n" lib_taxes::__reports_patch_verify "$_income" "${_spends_tags[@]}" "${_trades_tags[@]}" lib_taxes::__reports_patch_verify "$_spends" "${_trades_tags[@]}" "${_income_tags[@]}" lib_taxes::__reports_patch_verify "$_trades" "${_income_tags[@]}" "${_spends_tags[@]}" } function lib_taxes::__reports_patch_verify() { local _file="$1" local _tags=("${@:2}") [[ -z "$_file" || -z "${_tags[*]}" ]] && lib_utils::die_fatal [ ! -f "$_file" ] && lib_utils::die_fatal "File not found: '${_file}'" if [ -s "$_file" ]; then xsv select "Action" "$_file" \ | sort -u \ | tail -n +2 \ | while read _line; do for _tag in "${_tags[@]}"; do [[ "$_line" != "$_tag" ]] || lib_utils::die_fatal "Bad entry in $_file" done done fi } # # Reports (obfuscated) # function lib_taxes::__reports_obfs() { # NOTE: functions inherit local vars case "$_arg_tag" in income | spends) local _all_columns="Date,Action,Account,Symbol,Volume,Currency,Total,Memo" local _obfs_columns=("Account" "Memo") ;; trades) local _all_columns="Date,Action,Account,Symbol,Volume,Currency,Total,FeeCurrency,Fee" local _obfs_columns=("Account") ;; *) lib_utils::die_fatal "Unsupported type" ;; esac lib_taxes::__reports_obfs_gen } # TODO: refactor to remove shellcheck # shellcheck disable=SC2120 function lib_taxes::__reports_obfs_gen() { # Temp storage local _tmp_dir _tmp_dir="$(mktemp -d -p $_taxes_tmp_dir obfs_XXX)" # Obfuscate columns for _column in "${_obfs_columns[@]}"; do # Generate obfs/raw keymap local _raw_column _raw_column="$(xsv select $_column $_in_file | tail -n +2 | sort -u)" local _count _count=$(echo "$_raw_column" | wc -l) local _sha="sha256sum" lib_utils::deps_check "$_sha" local _obfs_column _obfs_column="$(echo "$_raw_column" \ | while read _account; do # # OBFS column key is a truncated SHA digest of complete profile along with complete account. # # NOTE: # # - static delimiter is used to avoid potential inconsistencies # (if ever given a custom delimiter) # # - complete profile is included as a means to provide a 'reasonable' salt # (since default account names and memos can be used across profiles or subprofiles) # echo -n "${global_parent_profile}/${global_child_profile}/${_account}" | "$_sha" | head -c${2:-10} echo done)" readarray -t _raw <<<"$_raw_column" readarray -t _obfs <<<"$_obfs_column" # Generate obfuscated keymap file _styled_column="$(echo $_column | tr '[:upper:]' '[:lower:]')" # Lowered for consistency local _keymap_file _keymap_file="$(dirname ${_out_file})/${_tax_year}_${_arg_tag}-keymap_${_styled_column}.csv" echo "RawValue,ObfsKey" \ | gawk -v tax_year="$_tax_year" -v args_tag="$_arg_tag" \ -v column="$_styled_column" -v count="$_count" \ -v obfs="${_obfs[*]}" -v raw="${_raw[*]}" \ '{ printf $1 OFS $2 "\n" # Header split(raw, r, ",") split(obfs, o, ",") for(i = 1; i <= count; i++) { print r[i] OFS o[i] } }' FS=, OFS=, \ | ( sed -u 1q sort ) \ | sed 's|, |,|g' >"$_keymap_file" # Generate a temp file of the given raw column local _tmp_file="${_tmp_dir}/${_styled_column}" xsv select "$_column" "$_in_file" -o "$_tmp_file" # Obfuscate given raw column with keymap data tail -n +2 "$_keymap_file" \ | while read _line; do sed -i "s|${_line%,*}|${_line#*,}|g" "$_tmp_file" done done # Join the obfuscated temp files into out file and then delete the temp files. # NOTE: selecting will remove the raw (unobfuscated) columns csvjoin -I --snifflimit 0 "${_tmp_dir}"/* "$_in_file" \ | xsv select "$_all_columns" -o "$_out_file" } # vim: sw=2 sts=2 si ai et