Files
docker-finance/container/src/finance/lib/internal/lib_taxes.bash
Aaron Fiore 66c34a4b1c container: lib_taxes: optimize writes by forking
- Decreases real time in multicore container environments.
  * Given the nature of the writer, there should be no race conditions.
2025-10-02 17:09:48 -07:00

1371 lines
46 KiB
Bash

#!/usr/bin/env bash
# docker-finance | modern accounting for the power-user
#
# Copyright (C) 2021-2025 Aaron Fiore (Founder, Evergreen Crypto LLC)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# "Libraries"
#
[ -z "$DOCKER_FINANCE_CONTAINER_REPO" ] && exit 1
source "${DOCKER_FINANCE_CONTAINER_REPO}/src/finance/lib/internal/lib_utils.bash" || exit 1
#
# Facade
#
function lib_taxes::taxes()
{
lib_taxes::__parse_args "$@"
lib_taxes::__taxes
lib_utils::catch $?
}
#
# Implementation
#
#
# NOTE:
#
# - This library requires double-entry for all accounts as generated by hledger
#
# - WARNING: hledger rules must include following:
#
# 1. Entry description *MUST* only be the date / timestamp (Y-m-d h:m:s z)
# 2. The tag 'taxed_as' must be set to supported tags in every needed entry
# 3. Expense accounts for fees must contain the string "fee" in a subaccount
#
# - This implementation currently only supports the website Bitcoin.tax
#
function lib_taxes::__parse_args()
{
[ -z "$global_usage" ] && lib_utils::die_fatal
[ -z "$global_arg_delim_2" ] && lib_utils::die_fatal
[ -z "$global_arg_delim_3" ] && lib_utils::die_fatal
local -r _usage="
\e[32mDescription:\e[0m
Generate tax reports from all tagged accounts
\e[32mUsage:\e[0m
$ $global_usage <<[all${global_arg_delim_2}<type1{${global_arg_delim_3}type2${global_arg_delim_3}...}>]> | <[<[tag${global_arg_delim_2}<tag1{${global_arg_delim_3}tag2${global_arg_delim_3}...}>] | [account${global_arg_delim_2}<account1{${global_arg_delim_3}account2${global_arg_delim_3}...}>]]> | [year${global_arg_delim_2}<year>] | [write${global_arg_delim_2}<boolean>]
\e[32mArguments:\e[0m
All options (operation type):
all${global_arg_delim_2}<all|tag|account>
Tag (taxable event type):
tag${global_arg_delim_2}<income|spends|trades>
Account (journal account):
account${global_arg_delim_2}<user-defined (see documentation)>
Tax year:
year${global_arg_delim_2}<all|year>
Write reports to file (don't print to terminal)
write${global_arg_delim_2}<on|off|true|false>
\e[32mExamples:\e[0m
\e[37;2m# Write all operation types for default (current) year\e[0m
$ $global_usage all${global_arg_delim_2}all
\e[37;2m# Write all operation types for all available years\e[0m
$ $global_usage all${global_arg_delim_2}all year${global_arg_delim_2}all
\e[37;2m# Print tagged income on all accounts for default (current) year\e[0m
$ $global_usage all${global_arg_delim_2}account tag${global_arg_delim_2}income write${global_arg_delim_2}off
\e[37;2m# Write only income and spends for Gemini and Coinbase account for year 2022\e[0m
$ $global_usage tag${global_arg_delim_2}income${global_arg_delim_3}spends account${global_arg_delim_2}gemini${global_arg_delim_3}coinbase year${global_arg_delim_2}2022
\e[37;2m# *Print* (don't write) all taxable tags for Nexo account in all available years\e[0m
$ $global_usage all${global_arg_delim_2}tag account${global_arg_delim_2}nexo year${global_arg_delim_2}all write${global_arg_delim_2}off
"
#
# Ensure supported arguments
#
[ $# -eq 0 ] && lib_utils::die_usage "$_usage"
for _arg in "$@"; do
[[ ! "$_arg" =~ ^all${global_arg_delim_2} ]] \
&& [[ ! "$_arg" =~ ^tag[s]?${global_arg_delim_2} ]] \
&& [[ ! "$_arg" =~ ^account[s]?${global_arg_delim_2} ]] \
&& [[ ! "$_arg" =~ ^write${global_arg_delim_2} ]] \
&& [[ ! "$_arg" =~ ^year[s]?${global_arg_delim_2} ]] \
&& lib_utils::die_usage "$_usage"
done
#
# Parse arguments before testing
#
# Parse key for value
for _arg in "$@"; do
local _key="${_arg%${global_arg_delim_2}*}"
local _len="$((${#_key} + 1))"
if [[ "$_key" =~ ^all$ ]]; then
local _arg_all="${_arg:${_len}}"
[ -z "$_arg_all" ] && lib_utils::die_usage "$_usage"
fi
if [[ "$_key" =~ ^tag[s]?$ ]]; then
local _arg_tag="${_arg:${_len}}"
[ -z "$_arg_tag" ] && lib_utils::die_usage "$_usage"
fi
if [[ "$_key" =~ ^account[s]?$ ]]; then
local _arg_account="${_arg:${_len}}"
[ -z "$_arg_account" ] && lib_utils::die_usage "$_usage"
fi
if [[ "$_key" =~ ^write[s]?$ ]]; then
local _arg_write="${_arg:${_len}}"
[ -z "$_arg_write" ] && lib_utils::die_usage "$_usage"
fi
if [[ "$_key" =~ ^year[s]?$ ]]; then
local _arg_year="${_arg:${_len}}"
[ -z "$_arg_year" ] && lib_utils::die_usage "$_usage"
fi
done
#
# Test for valid ordering/functionality of argument values
#
# Arg: all
if [ ! -z "$_arg_all" ]; then
# Can't use with every other activated
if [[ ! -z "$_arg_tag" && ! -z "$_arg_account" ]]; then
lib_utils::die_usage "$_usage"
fi
fi
# Arg: tag
if [ ! -z "$_arg_tag" ]; then
[[ ! -z "$_arg_all" && ! -z "$_arg_account" ]] \
|| [[ -z "$_arg_all" && -z "$_arg_account" ]] \
&& lib_utils::die_usage "$_usage"
fi
# Arg: account
if [ ! -z "$_arg_account" ]; then
[[ ! -z "$_arg_all" && ! -z "$_arg_tag" ]] \
|| [[ -z "$_arg_all" && -z "$_arg_tag" ]] \
&& lib_utils::die_usage "$_usage"
fi
# Arg: write
if [ ! -z "$_arg_write" ]; then
# Need a valid arg
if [[ -z "$_arg_all" && -z "$_arg_tag" && -z "$_arg_account" ]]; then
lib_utils::die_usage "$_usage"
fi
fi
# Arg: year
if [ ! -z "$_arg_year" ]; then
# Need a valid arg
if [[ -z "$_arg_all" && -z "$_arg_tag" && -z "$_arg_account" ]]; then
lib_utils::die_usage "$_usage"
fi
fi
#
# Test argument values, set globals
#
IFS="$global_arg_delim_3"
# Arg: all
if [ ! -z "$_arg_all" ]; then
# If all= {tag,account} or {account,tag} then set to all=all
[[ "${#_arg_all[@]}" -eq 1 && "${_arg_all[*]}" =~ tag[s]? && "${_arg_all[*]}" =~ account[s]? ]] \
&& _arg_all="all"
# Read args from all
read -ra _read <<<"$_arg_all"
for _arg in "${_read[@]}"; do
# Support values
[[ ! "$_arg" =~ ^all$|^tag[s]?$|^account[s]?$ ]] \
&& lib_utils::die_usage "$_usage"
# If all=all then no need for all={tag,account} and tag= or account=
[[ "$_arg" == "all" && (! -z "$_arg_tag" || ! -z "$_arg_account") ]] \
|| [[ "$_arg" == "all" && "${#_read[@]}" -gt 1 ]] \
&& lib_utils::die_usage "$_usage"
# If all=tag then no need need for tag= and if all=account then no need for account=
[[ "$_arg" =~ ^tag[s]?$ && ! -z "$_arg_tag" ]] \
|| [[ "$_arg" =~ ^account[s]?$ && ! -z "$_arg_account" ]] \
&& lib_utils::die_usage "$_usage"
# If all=tag then need account= or if all=account then need tag=
if [[ "$_arg" != "all" ]]; then
[[ "${#_read[@]}" -lt 2 && "$_arg" =~ ^tag[s]?$ && -z "$_arg_account" ]] \
|| [[ "${#_read[@]}" -lt 2 && "$_arg" =~ ^account[s]?$ && -z "$_arg_tag" ]] \
&& lib_utils::die_usage "$_usage"
fi
done
global_arg_all=("${_read[@]}")
fi
# Arg: tag
if [ ! -z "$_arg_tag" ]; then
_args=() # Reconstruct tag(s) for uniform plurality
read -ra _read <<<"$_arg_tag"
for _arg in "${_read[@]}"; do
[[ ! "$_arg" =~ ^income$|^spend[s]?$|^trade[s]?$ ]] \
&& lib_utils::die_usage "$_usage"
if [[ "$_arg" =~ ^spend$ ]]; then
_args+=("spends")
elif [[ "$_arg" =~ ^trade$ ]]; then
_args+=("trades")
else
_args+=("$_arg")
fi
done
declare -gr global_arg_tag=("${_args[@]}")
elif [[ "${global_arg_all[*]}" =~ (all|tag) ]]; then
declare -gr global_arg_tag=("income" "spends" "trades")
fi
# Arg: account
if [ ! -z "$_arg_account" ]; then
read -ra _read <<<"$_arg_account"
declare -gr global_arg_account=("${_read[@]}")
fi
# Arg: write
if [ ! -z "$_arg_write" ]; then
if [[ ! "$_arg_write" =~ ^on$|^off$|^true$|^false$ ]]; then
lib_utils::die_usage "$_usage"
fi
declare -gr global_arg_write="$_arg_write"
else
declare -gr global_arg_write="on" # Set default
fi
# Arg: year
# TODO: implement range
if [ ! -z "$_arg_year" ]; then
# TODO: 20th century support
if [[ ! "$_arg_year" =~ ^20[0-9][0-9]$ && "$_arg_year" != "all" ]]; then
lib_utils::die_usage "$_usage"
fi
global_arg_year="$_arg_year"
declare -gr global_arg_year
if [ "$global_arg_year" == "all" ]; then
# Implementation will treat an empty as all years
unset global_arg_year
fi
else
global_arg_year="$(date +%Y)" # Set default
declare -gr global_arg_year
fi
}
function lib_taxes::__taxes()
{
# Supported tags (locally "global")
local -r _income_tags=("INCOME" "GIFTIN" "MINING" "BORROW")
local -r _spends_tags=("SPEND" "DONATION" "GIFT" "REPAY")
local -r _trades_tags=("BUY" "SELL" "SWAP" "MATCH" "REBATE" "RAW_TRADE" "PARTIAL_TRADE")
local _print_year="$global_arg_year"
[ -z "$_print_year" ] && _print_year="all"
[ -z "$global_parent_profile" ] && lib_utils::die_fatal
[ -z "$global_child_profile" ] && lib_utils::die_fatal
lib_utils::print_custom "\n"
lib_utils::print_info \
"Generating tax reports in year '${_print_year}' for '${global_parent_profile}/${global_child_profile}' ..."
lib_utils::print_custom "\n"
# Print full (transparent) output
if [[ "$global_arg_write" =~ ^off$|^false$ ]]; then
lib_taxes::__taxes_print
lib_utils::catch $?
return $?
fi
# Write all types of output (including full)
time lib_taxes::__taxes_write
lib_utils::catch $?
}
#
# Print full (transparent) report
#
function lib_taxes::__taxes_print()
{
#
# Despite global args, allow optional args (works with write impl)
#
# Tax year
local _arg_year="$1"
[ -z "$_arg_year" ] && _arg_year="$global_arg_year"
# Tag type(s)
local _arg_tag=("${@:2}")
[ -z "${_arg_tag[*]}" ] && _arg_tag=("${global_arg_tag[@]}")
#
# Print given tag(s)
#
# Set base hledger command
[ ! -z "$_arg_year" ] && local _date="date:${_arg_year}"
local _base_hledger=("${global_hledger_cmd[@]}" "print" "${global_arg_account[@]}" "$_date" "-O" "csv")
# Cycle through tags
for _arg in "${_arg_tag[@]}"; do
case "$_arg" in
income)
# Date (date and time as YYYY-MM-DD HH:mm:ss Z)
# Action (INCOME, GIFTIN, MINING or BORROW)
# Account (account or wallet name, e.g. Coinbase or Blockchain)
# Symbol (BTC, ETH, LTC, etc)
# Volume (number of coins received)
# Currency (optional, specify alternative to your default currency, such as USD, GBP or EUR)
# Total (Fair price or value in Currency or your home currency, or blank for market value
# Memo (optional, name of sender or item sold)
# For example,
# Date,Action,Account,Symbol,Volume
# 2020-01-01 13:00:00 -0800,INCOME,"Blockchain Wallet",BTC,1
local _tags=("${_income_tags[@]}")
local _is_income=true
;;
spend | spends)
# Date (date and time as YYYY-MM-DD HH:mm:ss Z)
# Action (SPEND, DONATION, GIFT or REPAY)
# Account (name of account or wallet, e.g. Coinbase or Blockchain)
# Symbol (BTC, LTC, ETH, etc)
# Volume (number of coins spent)
# Currency (optional, specify alternative to your default currency, such as USD, GBP or EUR)
# Total (Fair price or cost in Currency or your home currency, or blank for market value
# Memo (optional, name of recipient or item purchased)
# For example,
# Date,Action,Account,Symbol,Volume,Total,Currency
# 2020-01-01 13:00:00 -0800,SPEND,"Blockchain Wallet",BTC,1,
local _tags=("${_spends_tags[@]}")
local _is_spends=true
;;
trade | trades)
# Date (date and time as YYYY-MM-DD HH:mm:ss Z)
# Action (BUY, SELL or SWAP)
# Account (override the exchange or wallet name, e.g. Coinbase)
# Symbol (BTC, LTC, DASH, etc)
# Volume (number of coins traded)
# Currency (specify currency such as USD, GBP, EUR or coins, BTC or LTC)
# Total (you can use the total Currency amount or price per coin)
# Price (price per coin in Currency or blank for lookup)
# FeeCurrency (currency of fee if different than Currency)
# Fee (any additional costs of the trade)
# For example,
# Date,Action,Account,Symbol,Volume,Price,Currency,Fee
# 2020-01-01 13:00:00 -0800,BUY,Coinbase,BTC,1500,USD,5.50
local _tags=("${_trades_tags[@]}")
local _is_trades=true
;;
*)
lib_utils::die_fatal "Unsupported tag '$_arg'"
;;
esac
# Cycle through supported tags
for _tag in "${_tags[@]}"; do
# Add tag to base command
local _hledger=("${_base_hledger[@]}")
_hledger+=("tag:taxed_as=${_tag}*$")
#
# The default income/spend/trading formatter
#
# NOTE:
# - Crypto rebates are treated as opening positions (for cost-basis)
# since it's technically *not* income (such as card cashback rebates)
#
# - "RAW_TRADE" has no need for a parser/formatter since the comment
# contains all of the trade but
# * *MUST* contain tags found within trades tags (BUY/SELL/FEE)
# in order to be printed and not skipped.
# * TODO: HACK: *MUST* have cost-basis calculated here (for #51)
# TODO: AAVE/Compound income formatter (may be replaced by contract query)
if [[ "$_tag" != "MATCH" && "$_tag" != "PARTIAL_TRADE" ]]; then
"${_hledger[@]}" \
| xsv select '"posting-comment"' \
| tail -n +2 \
| sed -e 's:"::g' -e '/^$/d' \
| gawk -v tag="$_tag" -v is_trades="$_is_trades" \
'BEGIN {
FS=OFS=","
str_one="Date" OFS "Action" OFS "Account" OFS "Symbol" OFS "Volume" OFS "Currency" OFS "Total"
str_two="FeeCurrency" OFS "Fee"
has_fee = (is_trades ? 1 : 0)
printf (has_fee ? str_one OFS str_two : str_one OFS "Memo")
printf "\n"
}
{
if (!NR) { exit }
Date = $1
Action = $2
Account = $3
Symbol = $4
Volume = $5
Currency = $6
Total = $7
Memo = (has_fee ? "" : $8)
if (has_fee)
{
FeeCurrency = (has_fee && $9 != "" ? $8 : "")
Fee = (FeeCurrency && $9 != 0 ? $9 : "")
}
# Total is a literal 0. Get market value from bitcoin.tax
if (Total == 0) {Total=""}
# Create "correct" symbols
if (Symbol == "$") {Symbol="USD"}
if (Currency == "" || Currency == "$") {Currency="USD"}
if (FeeCurrency == "$") {FeeCurrency="USD"}
# Remove negative values
sub(/-/, "", Volume); sub(/-/, "", Total)
if (Fee) {sub(/-/, "", Fee)}
# Remove trailing zeros after decimals
if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)}
if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)}
if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)}
# TODO: HACK: cost-basis calculated here (instead of preprocess) for #51
if (tag == "RAW_TRADE")
{
printf Date OFS Action OFS Account OFS
switch (Action)
{
case "SELL":
if (FeeCurrency == Symbol)
{
if (Fee ~ /\./)
{
split(Fee, cost_basis, ".")
rhs=length(cost_basis[2])
CostBasis=sprintf("%." rhs "f", Volume - Fee)
}
else
{
CostBasis=Volume - Fee
}
}
else
{
CostBasis=Volume
}
printf Currency OFS Total OFS Symbol OFS CostBasis OFS
break
case "BUY":
if (FeeCurrency == Currency)
{
if (Fee ~ /\./)
{
split(Fee, cost_basis, ".")
rhs=length(cost_basis[2])
CostBasis=sprintf("%." rhs "f", Total + Fee)
}
else
{
CostBasis=Total + Fee
}
}
else
{
CostBasis=Total
}
printf Symbol OFS Volume OFS Currency OFS CostBasis OFS
break
default:
printf "FATAL: unsupported Action: " Action
print $0
exit
}
printf OFS # FeeCurrency handled below
# Fee handled below
printf "\n"
}
# TODO: HACK: print SPEND line for non-fiat fee (see #51)
# NOTE: cost-basis *MUST* be calculated above or within preprocess
if (is_trades && has_fee && FeeCurrency != "USD" && Fee)
{
printf Date OFS "SPEND" OFS Account OFS
printf FeeCurrency OFS Fee OFS "USD"
printf "\n";
}
# - Drop USD-only spends (such as with Coinbase Card) because they are not needed.
# - Do not print empty USD entries or empty symbol (Algorand) entries.
# - Do not print testnet symbols.
# TODO: regexp optimize
# TODO: HACK (isolating from RAW_TRADE): see #51 (and related lib_taxes work-around)
if (tag != "RAW_TRADE" \
&& (Symbol != "USD" && Symbol != Currency) \
&& Volume != 0 && Volume != "" \
&& Symbol != "BTCt" && Symbol != "DOGEt" && Symbol != "tLTC" && Symbol != "LTCTEST")
{
printf Date OFS
printf Action OFS
printf Account OFS
printf Symbol OFS
printf Volume OFS
printf Currency OFS
printf Total OFS
if (has_fee)
{
# TODO: HACK: see #51 (and related lib_taxes work-around)
# NOTE: cost-basis *MUST* be calculated within preprocess
if (is_trades && Fee && FeeCurrency != "USD")
{
printf "" OFS ""
}
else
{
printf FeeCurrency OFS Fee
}
}
else
{
printf Memo
}
printf "\n"
}
}'
fi
#
# The "MATCH" trading (triple/quadruple) formatter
#
# NOTE:
# - Expects a minimum txid (or trade_id) connected triplet grouping of
# BUY/SELL/FEE or SELL/BUY/FEE
#
# - WARNING: income BORROW / spend REPAY can be treated as a MATCH and
# is not actually a trade (but the FEE is needed, which is a MATCH and
# is the only connective tissue is the txid within all tags).
# So, if BORROW/REPAY is given a MATCH, a caller's patching function
# *MUST* parse out the respective tags and move to income/spends.
if [[ "$_tag" == "MATCH" ]]; then
"${_hledger[@]}" \
| xsv select '"posting-comment"' \
| tail -n +2 \
| sed -e 's:"::g' -e '/^$/d' \
| sort -u \
| gawk \
'BEGIN { FS=OFS="," }
# Remove redundant txid before bringing to one line
$1 == p1 { prev = prev OFS $3 OFS $5 OFS $6; next }
{
if (NR>1)
print prev
prev = $0
p1 = $1
}
END { print prev }' \
| gawk -M -v PREC=100 \
'{
# NOTE: if previous only produces OFS, this will exit
if (!$NF) { exit }
# If more than triplet form, support quadruple form
# TODO: support more than 4 if necessary
if ($13 == $10)
{
# BUY (should be default provided by hledger rules)
printf $1 OFS $2 OFS $3 OFS $4 OFS $5 OFS $6 OFS $7 OFS
printf $8 OFS $9 OFS $10 OFS $11 OFS
printf("%.18f", $12+$15)
printf "\n"
} else if ($13 == $3)
{
# SELL
printf "FATAL: not implemented"
printf "\n"
# TODO: kill
} else
{
# Default (triplets)
print
}
}' FS=, OFS=, \
| gawk -v tag="$_tag" \
'BEGIN {
FS=OFS=","
printf "Date" OFS
printf "Action" OFS
printf "Account" OFS
printf "Symbol" OFS
printf "Volume" OFS
printf "Currency" OFS
printf "Total" OFS
printf "FeeCurrency" OFS
printf "Fee"
printf "\n"
}
{
# Assign output vars
Date = $2
Action = $3
if (Action == "SELL")
{
# Hack in BUY by default (see below)
tag = "BUY";
Action = tag
}
Account = $4
Symbol = $5
Volume = $6
Currency = $11
Total = $12
FeeCurrency = $8
Fee = $9
Memo = ""
# Create "correct" symbols
if (Symbol == "$") {Symbol="USD"}
if (Currency == "" || Currency == "$") {Currency="USD"}
if (FeeCurrency == "$") {FeeCurrency="USD"}
# Remove negative values
sub(/-/, "", Volume); sub(/-/, "", Total); sub(/-/, "", Fee)
# Remove trailing zeros after decimals
if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)}
if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)}
if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)}
# Reset Action
#
# NOTE:
# - Flips BUY/SELL direction for pairs that are known to be
# commonly sold in one direction but rules have dictated
# another (flips fiat or aToken/stkToken/cTokens etc.)
#
# - Rationale: because bitcoin.tax produces a bar graph of buys
# and sells and it makes sense for the graph to look accurate
# (obvoiusly does not work for BTC/ETH pairings etc., but it
# should look more "sane" for fiat currencies)
#
# NOTE: has no affect on their end calculations
#
if (Symbol == "USD" \
|| substr(Currency,1,1) ~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/)
{
if (Action == "BUY") {Action = "SELL"}
}
#
# Actions
#
switch (Action)
{
case "SELL":
# TODO: HACK: cost-basis calculated here (instead of preprocess) for #51
if (FeeCurrency == Symbol)
{
if (Fee ~ /\./)
{
split(Fee, cost_basis, ".")
rhs=length(cost_basis[2])
CostBasis=sprintf("%." rhs "f", Volume - Fee)
}
else
{
CostBasis=Volume - Fee
}
}
else
{
CostBasis=Volume
}
printf Date OFS Action OFS Account OFS
printf Currency OFS Total OFS Symbol OFS CostBasis OFS
printf OFS # FeeCurrency/Fee handled below
# Fee handled below
printf "\n"
# TODO: HACK: print SPEND line for non-fiat fee (see #51)
# NOTE: cost-basis *MUST* be calculated above or within preprocess
if (FeeCurrency != "USD")
{
printf Date OFS "SPEND" OFS Account OFS
printf FeeCurrency OFS Fee OFS "USD"
printf "\n";
}
break
case "BUY":
# TODO: HACK: cost-basis calculated here (instead of preprocess) for #51
if (FeeCurrency == Currency)
{
if (Fee ~ /\./)
{
split(Fee, cost_basis, ".")
rhs=length(cost_basis[2])
CostBasis=sprintf("%." rhs "f", Total + Fee)
}
else
{
CostBasis=Total + Fee
}
}
else
{
CostBasis=Total
}
printf Date OFS Action OFS Account OFS
printf Symbol OFS Volume OFS Currency OFS CostBasis OFS
printf OFS # FeeCurrency handled below
# Fee handled below
printf "\n"
# TODO: HACK: print SPEND line for non-fiat fee (see #51)
# NOTE: cost-basis *MUST* be calculated above or within preprocess
if (FeeCurrency != "USD")
{
printf Date OFS "SPEND" OFS Account OFS
printf FeeCurrency OFS Fee OFS "USD"
printf "\n";
}
break
case "BORROW":
# Reset tail end vars since this is an "INCOME" tag
FeeCurrency = $14
Fee = $15
Memo = $17
# Do not print aToken/cToken/stkToken
sym=substr(Symbol,1,1)
if (sym !~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/)
{
printf Date OFS Action OFS Account OFS
printf Symbol OFS Volume OFS "USD" OFS OFS
printf "\n";
}
# Create new SPEND record based on attached FEE
printf Date OFS "SPEND" OFS Account OFS
printf FeeCurrency OFS Fee OFS "USD" Memo
printf "\n";
break
case "FEE": # This FEE is *not* related to a trade
# Convert to SPEND
printf Date OFS "SPEND" OFS Account OFS
printf Symbol OFS Volume OFS "USD"
printf "\n";
# TODO: why is REPAY not like BORROW...
if ($7 == "REPAY")
{
# Do not print aToken/cToken/stkToken
sym=substr($8,1,1)
if (sym !~ /^c[A-Z]|^a[A-Z]|^stk[A-Z]/)
{
printf Date OFS $7 OFS Account OFS
printf $8 OFS $9 OFS "USD" OFS OFS
printf "\n";
}
}
break
default:
printf "FATAL: unsupported Action: " Action # TODO: improve
print $0
exit
}
}'
fi
#
# The "PARTIAL_TRADE" formatter (specifically, unwrapping tokens)
#
# NOTE:
# - Only attaches FEE to SELL/BUY (2 lines total) for cost-basis
# (the contract address will be on a per-symbol/per-contract basis)
#
# - Any fees that are marked as PARTIAL_TRADE that do not have an amount
# counterpart (not the fee) will then be marked as a SPEND
if [[ "$_tag" == "PARTIAL_TRADE" ]]; then
"${_hledger[@]}" \
| xsv select '"posting-comment"' \
| tail -n +2 \
| sed -e 's:"::g' -e '/^$/d' \
| sort -u \
| gawk \
'{
if (!NR) { exit }
txid = $1; FeeCurrency = $5; Fee = $6
a[$1]=a[$1] ? a[$1] OFS txid OFS FeeCurrency OFS Fee : $0
} END { for(i in a) {print a[i]} }' FS=, OFS=, \
| gawk -v tag="$_tag" \
'BEGIN {
FS=OFS=","
printf "Date" OFS
printf "Action" OFS
printf "Account" OFS
printf "Symbol" OFS
printf "Volume" OFS
printf "Currency" OFS
printf "Total" OFS
printf "FeeCurrency" OFS
printf "Fee"
printf "\n"
}
{
txid_first = $1
txid_last = $9
if ((txid_first != txid_last) && (NF == 11))
{
printf "FATAL: not matching txid"
exit
}
# Assign output vars
Date = $2
Action = $3
Account = $4
Symbol = $5
Volume = $6
Currency = $7
Total = $8
FeeCurrency = $10
Fee = $11
# Create "correct" symbols
if (Symbol == "$") {Symbol="USD"}
if (Currency == "" || Currency == "$") {Currency="USD"}
if (FeeCurrency == "$") {FeeCurrency="USD"}
# Remove negative values
sub(/-/, "", Volume); sub(/-/, "", Total); sub(/-/, "", Fee)
# Remove trailing zeros after decimals
if (Volume ~ /\./) {sub("0*$", "", Volume); sub("\\.$", "", Volume)}
if (Total ~ /\./) {sub("0*$", "", Total); sub("\\.$", "", Total)}
if (Fee ~ /\./) {sub("0*$", "", Fee); sub("\\.$", "", Fee)}
# TODO: HACK: cost-basis calculated here (instead of preprocess) for #51
if (FeeCurrency == Currency)
{
if (Action == "BUY") {CostBasis=Total + Fee}
else {CostBasis=Total - Fee}
}
else
{
CostBasis=Total
}
printf Date OFS
printf Action OFS
printf Account OFS
printf Symbol OFS
printf Volume OFS
printf Currency OFS
printf CostBasis OFS
printf OFS # FeeCurrency handled below
# Fee handled below
printf "\n";
# TODO: HACK: print SPEND line for non-fiat fee (see #51)
# NOTE: cost-basis *MUST* be calculated above
if (FeeCurrency != "USD")
{
printf Date OFS "SPEND" OFS Account OFS
printf FeeCurrency OFS Fee OFS "USD"
printf "\n";
}
}'
fi
done \
| sort -r \
| gawk \
'{
# Remove duplicate header
if (NR == 1)
{
Header=$0
}
# Save non-header row(s)
if ($0 != Header)
{
Data[i++] = $0
}
}
END {
# Only print header if non-header row(s) exist
if (length(Data))
{
print Header
}
# Print all non-header row(s)
for (i=0; i < length(Data); i++)
{
print Data[i]
}
}' FS=, OFS=,
# WARNING:
# - Do not run unique (sort -ru)! There are legitimate income/trade entries
# that appear to be duplicates but are different at the millisecond level.
#
# TODO: there *could* be a UID in an extra column. A unique sort could
# then be run since milliseconds are *NOT* universally supported upstream.
done
}
#
# Write generated taxes to files
#
function lib_taxes::__taxes_write()
{
# Set base hledger command
[ ! -z "$global_arg_year" ] && local _date="date:${global_arg_year}"
local _base_hledger=("${global_hledger_cmd[@]}" "print" "${global_arg_account[@]}" "$_date" "-O" "csv")
# If tax year is 'all', get all given records and capture oldest date/year
# (then write to appropriate year dirs)
local _current_year
_current_year="$(date +%Y)"
if [ -z "$global_arg_year" ]; then
lib_utils::print_normal "Capturing the year of the oldest tagged record ..."
local -r _oldest_year="$("${_base_hledger[@]}" | head -n2 | xsv select \"date\" | tail -n1 | cut -d'-' -f1)"
[ -z "$_oldest_year" ] && lib_utils::die_fatal "No records available to write"
elif [[ "$global_arg_year" -gt "$_current_year" ]]; then
# Invalid future year
lib_utils::die_fatal "Given year is greater than $_current_year"
else
# Generate only current year
_current_year="$global_arg_year"
local -r _oldest_year="$_current_year"
fi
# Temp dir for processing
local -r _taxes_tmp_dir="$(mktemp -d -p /tmp docker-finance_XXX)"
lib_utils::print_normal " ─ Taxes"
lib_utils::print_custom " \e[32m│\e[0m\n"
# Iterate through all given years
[ -z "$global_child_profile_flow" ] && lib_utils::die_fatal
for ((_tax_year = ${_oldest_year}; _tax_year <= ${_current_year}; _tax_year++)); do
# Final output dir
local _tax_root_dir="${global_child_profile_flow}/taxes/${_tax_year}"
# Subdir for individual reports
for _arg_tag in "${global_arg_tag[@]}"; do
local _dir="${_tax_root_dir}/${_arg_tag}"
[ ! -d "$_dir" ] && mkdir -p "$_dir"
done
# Generate transparent (full) reports
local _ext_full="report_full.csv"
for _arg_tag in "${global_arg_tag[@]}"; do
(
lib_utils::print_custom " \e[32m├─\e[34m\e[1;3m ${_arg_tag} (full)\e[0m\n"
local _base_path="${_tax_root_dir}/${_arg_tag}/${_tax_year}_${_arg_tag}"
local _out_file="${_base_path}-${_ext_full}"
lib_taxes::__taxes_print "$_tax_year" "$_arg_tag" >"${_out_file}"
lib_utils::catch $?
if [[ ! -f "$_out_file" || ! -s "$_out_file" ]]; then
lib_utils::print_warning "Nothing generated for '${_arg_tag}' (no taxable event found for the year ${global_arg_year})"
fi
) &
done
wait
# Patch transparent (full) reports
# WARNING: depends on previous base path / extension format # TODO: no, not this way
lib_utils::print_custom " \e[32m│\e[0m\n"
lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Applying patches (full) ...\e[0m\n"
lib_utils::print_custom " \e[32m│\e[0m\n"
lib_taxes::__reports_patch
lib_utils::catch $?
# Generate specialized reports based on full reports
for _arg_tag in "${global_arg_tag[@]}"; do
local _base_path="${_tax_root_dir}/${_arg_tag}/${_tax_year}_${_arg_tag}"
local _in_file="${_base_path}-${_ext_full}"
if [ ! -f "$_in_file" ]; then
lib_utils::print_warning "$_in_file does not exist, skipping"
continue
fi
# Only perform specialized reports on files with more than a header
local _count
_count="$(wc -l $_in_file)"
if [[ "${_count% *}" -gt 1 ]]; then
#
# Obfuscated
#
lib_utils::print_custom " \e[32m├─\e[34m\e[1;3m ${_arg_tag} (obfuscated)\e[0m\n"
local _ext_obfs="report_obfs.csv"
local _out_file="${_base_path}-${_ext_obfs}"
lib_taxes::__reports_obfs
lib_utils::catch $?
#
# Verify reports
#
lib_utils::print_custom " \e[32m│\e[0m\n"
lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Verifying $_arg_tag (full/obfuscated) ...\e[0m\n"
lib_utils::print_custom " \e[32m│\e[0m\n"
local _full
_full="$(wc -l ${_base_path}-${_ext_full})"
local _obfs
_obfs="$(wc -l ${_base_path}-${_ext_obfs})"
# TODO: more involved verification
if [[ "${_full% *}" != "${_obfs% *}" ]]; then
lib_utils::die_fatal "Report verification: line count mistmatch"
fi
fi
done
done
local -r _return=$?
if [ $_return -eq 0 ]; then
lib_utils::print_custom "\n"
lib_utils::print_info "Done!"
fi
# Enforce cleanup
[ -d "$_taxes_tmp_dir" ] && rm -fr "$_taxes_tmp_dir"
return $_return
}
#
# Reports (patch)
#
# TODO: patching should be done at the output level, not file level
# (otherwise, income/spend entries in MATCHed trades-output are not put into their respective output)
function lib_taxes::__reports_patch()
{
local _income="${_tax_root_dir}/income/${_tax_year}_income-${_ext_full}"
local _spends="${_tax_root_dir}/spends/${_tax_year}_spends-${_ext_full}"
local _trades="${_tax_root_dir}/trades/${_tax_year}_trades-${_ext_full}"
# Ensure appropriate tags in respective files
#
# NOTE:
# - Due to PARTIAL_TRADES, there may be a lone FEE SPEND straggler
# - Due to MATCH, income/spend BORROW/REPAY may be in trades output
# - Bitcoin.tax:
# * will allow SPENDS in trades file (though, with error complaints)
# * TODO: HACK: due to #51, trades with non-fiat fees will need a SPEND
# line added for the disposal of said fee until upstream resolves
# their importer.
#
# WARNING:
# - Do *NOT* do a unique sort here!
# Unique sort will make txs go missing (such as Algorand fees) because
# transactions can include duplicate fee amounts with the same exact
# timestamp (e.g., for multiple contract calls)
if [[ "${global_arg_tag[*]}" =~ trades && "${global_arg_tag[*]}" =~ spends ]]; then
if [ -f "$_spends" ]; then
grep SPEND "$_trades" | sed 's:,*$:,,FEE:g' >>"$_spends"
grep REPAY "$_trades" >>"$_spends"
sort -r -o "$_spends" "$_spends"
fi
fi
if [[ "${global_arg_tag[*]}" =~ trades && "${global_arg_tag[*]}" =~ income ]]; then
if [ -f "$_income" ]; then
grep BORROW "$_trades" >>"$_income"
sort -r -o "$_income" "$_income"
fi
fi
if [[ "${global_arg_tag[*]}" =~ trades ]]; then
if [ -f "$_trades" ]; then
sed -i '/SPEND/d' "$_trades"
sed -i '/BORROW/d' "$_trades"
sed -i '/REPAY/d' "$_trades"
fi
fi
#
# Coinbase patch
#
# NOTE:
# - There are times where Coinbase will report Coinbase Pro withdrawals,
# resulting in double spends in reports!!!
#
# TODO: why does this not work for 2022 and beyond?
# Did coinbase stop this practice in 2022?
#
# - Coinbase records the fiat value of the FEE. That's the needed entry, so
# keep the Coinbase Pro entry but tag-on the Coinbase fiat FEE amount
#
# - Coinbase Pro was sunsetted in late 2022
# TODO: for 2025+, subaccount will be included before comma
# (must add appropriate regex if patch must ever be applied to 2025+)
local _coinbase_string="(coinbase,|coinbase-pro,)"
grep -E "$_coinbase_string" "$_spends" &>/dev/null
# TODO: year should be properly parsed in arg parser
if [[ $? -eq 0 && "$_tax_year" =~ 20(([1][2-9])|([2][0-1])) ]]; then
local _tmp_dir
_tmp_dir="$(mktemp -d -p $_taxes_tmp_dir patch_XXX)"
local _tmp_file
_tmp_file="$(mktemp -p $_tmp_dir spends_XXX)"
grep -E "$_coinbase_string" "$_spends" \
| grep "FEE" \
| grep "coinbase-pro" -B1 \
| grep -v "\--" \
| gawk '{ a[$1]=a[$1] ? a[$1] OFS $(NF-2) FS $(NF-1) FS $NF : $0} END { for(i in a) {print a[i]} }' FS=, OFS=, \
| gawk '{ if (($2 != $10) && ($4 != $12) && ($5 != $13) && ($6 != $14) && ($8 != $16)) { print "FATAL: MISMATCH = " $0; exit 1 } print }' FS=, OFS=, \
| sed 's:,:\n:8; P; D' \
>"$_tmp_file"
while read line; do sed -i "/${line}/d" "$_spends"; done <"$_tmp_file"
sed 'N;s/\n/,/g' "$_tmp_file" \
| gawk '{ print $9 OFS $10 OFS $11 OFS $12 OFS $13 OFS $14 OFS $7 OFS $16 }' FS=, OFS=, \
>>"$_spends" \
&& sort -ru -o "$_spends" "$_spends"
fi
#
# Verify success of patches
#
lib_utils::print_custom " \e[32m│\e[0m\t\e[37;2m ... Verifying patches (full) ...\e[0m\n"
lib_utils::print_custom " \e[32m│\e[0m\n"
lib_taxes::__reports_patch_verify "$_income" "${_spends_tags[@]}" "${_trades_tags[@]}"
lib_taxes::__reports_patch_verify "$_spends" "${_trades_tags[@]}" "${_income_tags[@]}"
lib_taxes::__reports_patch_verify "$_trades" "${_income_tags[@]}" "${_spends_tags[@]}"
}
function lib_taxes::__reports_patch_verify()
{
local _file="$1"
local _tags=("${@:2}")
[[ -z "$_file" || -z "${_tags[*]}" ]] && lib_utils::die_fatal
[ ! -f "$_file" ] && lib_utils::die_fatal "File not found: '${_file}'"
if [ -s "$_file" ]; then
xsv select "Action" "$_file" \
| sort -u \
| tail -n +2 \
| while read _line; do
for _tag in "${_tags[@]}"; do
[[ "$_line" != "$_tag" ]] || lib_utils::die_fatal "Bad entry in $_file"
done
done
fi
}
#
# Reports (obfuscated)
#
function lib_taxes::__reports_obfs()
{
# NOTE: functions inherit local vars
case "$_arg_tag" in
income | spends)
local _all_columns="Date,Action,Account,Symbol,Volume,Currency,Total,Memo"
local _obfs_columns=("Account" "Memo")
;;
trades)
local _all_columns="Date,Action,Account,Symbol,Volume,Currency,Total,FeeCurrency,Fee"
local _obfs_columns=("Account")
;;
*)
lib_utils::die_fatal "Unsupported type"
;;
esac
lib_taxes::__reports_obfs_gen
}
# TODO: refactor to remove shellcheck
# shellcheck disable=SC2120
function lib_taxes::__reports_obfs_gen()
{
# Temp storage
local _tmp_dir
_tmp_dir="$(mktemp -d -p $_taxes_tmp_dir obfs_XXX)"
# Obfuscate columns
for _column in "${_obfs_columns[@]}"; do
# Generate obfs/raw keymap
local _raw_column
_raw_column="$(xsv select $_column $_in_file | tail -n +2 | sort -u)"
local _count
_count=$(echo "$_raw_column" | wc -l)
local _sha="sha256sum"
lib_utils::deps_check "$_sha"
local _obfs_column
_obfs_column="$(echo "$_raw_column" \
| while read _account; do
#
# OBFS column key is a truncated SHA digest of complete profile along with complete account.
#
# NOTE:
#
# - static delimiter is used to avoid potential inconsistencies
# (if ever given a custom delimiter)
#
# - complete profile is included as a means to provide a 'reasonable' salt
# (since default account names and memos can be used across profiles or subprofiles)
#
echo -n "${global_parent_profile}/${global_child_profile}/${_account}" | "$_sha" | head -c${2:-10}
echo
done)"
readarray -t _raw <<<"$_raw_column"
readarray -t _obfs <<<"$_obfs_column"
# Generate obfuscated keymap file
_styled_column="$(echo $_column | tr '[:upper:]' '[:lower:]')" # Lowered for consistency
local _keymap_file
_keymap_file="$(dirname ${_out_file})/${_tax_year}_${_arg_tag}-keymap_${_styled_column}.csv"
echo "RawValue,ObfsKey" \
| gawk -v tax_year="$_tax_year" -v args_tag="$_arg_tag" \
-v column="$_styled_column" -v count="$_count" \
-v obfs="${_obfs[*]}" -v raw="${_raw[*]}" \
'{
printf $1 OFS $2 "\n" # Header
split(raw, r, ",")
split(obfs, o, ",")
for(i = 1; i <= count; i++)
{
print r[i] OFS o[i]
}
}' FS=, OFS=, \
| (
sed -u 1q
sort
) \
| sed 's|, |,|g' >"$_keymap_file"
# Generate a temp file of the given raw column
local _tmp_file="${_tmp_dir}/${_styled_column}"
xsv select "$_column" "$_in_file" -o "$_tmp_file"
# Obfuscate given raw column with keymap data
tail -n +2 "$_keymap_file" \
| while read _line; do
sed -i "s|${_line%,*}|${_line#*,}|g" "$_tmp_file"
done
done
# Join the obfuscated temp files into out file and then delete the temp files.
# NOTE: selecting will remove the raw (unobfuscated) columns
csvjoin -I --snifflimit 0 "${_tmp_dir}"/* "$_in_file" \
| xsv select "$_all_columns" -o "$_out_file"
}
# vim: sw=2 sts=2 si ai et