Source code for macrosynergy.pnl.proxy_pnl_calc

"""
Module for calculating an approximate nominal PnL under consideration of transaction costs.
"""

import numpy as np
import pandas as pd
from typing import List, Union, Tuple, Optional, Dict
from numbers import Number
import warnings
import macrosynergy.visuals as msv
from macrosynergy.management.utils import (
    reduce_df,
    ticker_df_to_qdf,
    get_eops,
)
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.pnl.transaction_costs import (
    TransactionCosts,
    TransactionCostsDictAdapter,
)


def _generate_roll_dates(
    index: pd.DatetimeIndex,
    roll_freq: str,
) -> pd.DatetimeIndex:
    # End-of-period roll dates from `roll_freq`, constrained to `index`.
    rf = roll_freq.upper()
    if rf not in {"D", "W", "M", "Q"}:
        raise ValueError(f"Unsupported roll frequency {roll_freq!r}.")
    eops = pd.DatetimeIndex(get_eops(dates=pd.DatetimeIndex(index), freq=rf))
    # Each roll date is anchored to a trading day on which an actual position is observed.
    # If a calendar period-end falls on a weekend or holiday and is not in the data,
    # the roll for that period is booked on the prior available trading day.
    return sorted(eops.intersection(index))


def _preprocess_positions_for_costs(pivot_pos: pd.DataFrame) -> pd.DataFrame:
    # Reject a all-NaN/0s last row, fill remaining NaNs with 0, and prepend a
    # zero-position anchor one business day before the first index date so
    # that the opening trade enters as an absolute delta.
    last = pivot_pos.iloc[-1]
    if last.isna().all() or (last.fillna(0) == 0).all():
        raise ValueError("The latest row of the positions frame is all-NaN/zero.")
    pivot_pos = pivot_pos.fillna(0.0)
    anchor = pivot_pos.index.min() - pd.tseries.offsets.BDay(1)
    zero_row = pd.DataFrame(0.0, index=[anchor], columns=pivot_pos.columns)
    return pd.concat([zero_row, pivot_pos])


def _replace_strs(
    list_of_strs: List[str], old_str: str, new_str: str = ""
) -> List[str]:
    return [ticker.replace(old_str, new_str) for ticker in list_of_strs]


def _split_returns_positions_tickers(
    tickers: List[str], spos: str, rstring: str
) -> Tuple[List[str], List[str]]:
    # Filter tickers based on the specific suffixes
    returns_tickers: List[str] = [
        ticker for ticker in tickers if ticker.endswith(rstring)
    ]
    positions_tickers: List[str] = [
        ticker for ticker in tickers if ticker.endswith(spos)
    ]

    set_returns = set(_replace_strs(returns_tickers, rstring))
    set_positions = set(_replace_strs(positions_tickers, f"_{spos}"))
    # assert len(set_positions - set_returns) == len(set_returns - set_positions) == 0
    positions_wo_returns = set_positions - set_returns
    returns_wo_positions = set_returns - set_positions
    if (len(positions_wo_returns) + len(returns_wo_positions)) > 0:
        err_msg = "The following tickers are missing in the dataframe: \n"
        positions_wo_returns = sorted(positions_wo_returns)
        positions_wo_returns = list(map(lambda x: x + f"_{spos}", positions_wo_returns))
        returns_wo_positions = sorted(returns_wo_positions)
        returns_wo_positions = list(map(lambda x: x + rstring, returns_wo_positions))

        if positions_wo_returns:
            err_msg += f"Positions without returns: {positions_wo_returns} \n"
        if returns_wo_positions:
            err_msg += f"Returns without positions: {returns_wo_positions} \n"
        err_msg += "Please check the tickers in the dataframe."
        raise ValueError(err_msg)

    returns_tickers: List[str] = [
        ticker.replace(f"_{spos}", rstring) for ticker in positions_tickers
    ]

    return returns_tickers, positions_tickers


def _check_df(df: QuantamentalDataFrame, spos: str, rstring: str) -> None:
    if not isinstance(df, QuantamentalDataFrame):
        raise TypeError("Input must be a pandas DataFrame.")

    returns_tickers, positions_tickers = _split_returns_positions_tickers(
        tickers=QuantamentalDataFrame(df).list_tickers(),
        spos=spos,
        rstring=rstring,
    )

    err_msg = "The following tickers are missing in the dataframe: \n"
    missing_tickers = []
    for ticker in returns_tickers:
        if ticker.replace(rstring, f"_{spos}") not in positions_tickers:
            missing_tickers.append(ticker)
    for ticker in positions_tickers:
        if ticker.replace(f"_{spos}", rstring) not in returns_tickers:
            missing_tickers.append(ticker)

    if missing_tickers:
        raise ValueError(err_msg + ", ".join(missing_tickers))


def _split_returns_positions_df(
    df_wide: pd.DataFrame, spos: str, rstring: str
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    # Filter tickers based on the specific suffixes
    returns_tickers, positions_tickers = _split_returns_positions_tickers(
        tickers=df_wide.columns.tolist(),
        spos=spos,
        rstring=rstring,
    )

    # Pivot the dataframes
    pivot_returns: pd.DataFrame = df_wide.loc[:, returns_tickers]
    pivot_pos: pd.DataFrame = df_wide.loc[:, positions_tickers]

    # A return series with no data anywhere on its index carries no PnL
    # signal at all - that is almost always a data-prep mistake. Flag it
    # here rather than letting it propagate silently into the cost path.
    empty_returns = sorted(
        c for c in pivot_returns.columns if pivot_returns[c].isna().all()
    )
    if empty_returns:
        raise ValueError(
            "The following return series are entirely NaN and carry no "
            "data: " + ", ".join(empty_returns)
        )

    assert set(_replace_strs(pivot_returns.columns, rstring)) == set(
        _replace_strs(pivot_pos.columns, f"_{spos}")
    )

    return pivot_returns, pivot_pos


def _get_rebal_dates(df_wide: pd.DataFrame) -> List[pd.Timestamp]:
    # get the diff along long axis
    df_diff = df_wide.diff(axis=0)

    # change_index -- where there is any value change across rows
    change_index = df_diff.index[(df_diff.abs() > 0).any(axis=1)]

    # rows where the previous row was all NaN
    # but the current row has at least one non-NaN value
    prev_all_na = df_wide.shift(1).isna().all(axis=1)
    curr_any_value = df_wide.notna().any(axis=1)
    from_na_to_value_index = df_wide.index[prev_all_na & curr_any_value]

    # combine indices
    combined_index = change_index.union(from_na_to_value_index)
    rebal_dates = sorted(combined_index.tolist())
    return rebal_dates


def _warn_and_drop_nans(df_wide: pd.DataFrame) -> pd.DataFrame:
    # get rows that are all nans
    all_nan_rows = df_wide.loc[df_wide.isna().all(axis=1)]
    wrn = "Warning: The following {idx} are all NaNs and have been dropped: {lst}"
    if not all_nan_rows.empty:
        warnings.warn(wrn.format(idx="rows", lst=all_nan_rows.index))
        df_wide = df_wide.dropna(how="all")

    all_nan_cols = df_wide.loc[:, df_wide.isna().all(axis=0)]
    if not all_nan_cols.empty:
        warnings.warn(wrn.format(idx="columns", lst=all_nan_cols.columns))
        df_wide = df_wide.dropna(how="all", axis=1)

    return df_wide


def _prep_dfs_for_pnl_calcs(
    df_wide: QuantamentalDataFrame,
    spos: str,
    rstring: str,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[pd.Timestamp]]:
    # Split the returns and positions dataframes
    pivot_returns, pivot_pos = _split_returns_positions_df(
        df_wide=df_wide, spos=spos, rstring=rstring
    )

    # warn about NAs
    dfx: pd.DataFrame
    for dfx, dfname in [(pivot_returns, "returns"), (pivot_pos, "positions")]:
        # for each column warns for dates of nas
        for col in dfx.columns:
            nas_idx = (
                dfx[col]
                .loc[dfx[col].isna()]
                .loc[dfx[col].first_valid_index() : dfx[col].last_valid_index()]
            )
            if not nas_idx.empty:
                warnings.warn(
                    f"Warning: Series {col} has NAs at the following dates: {nas_idx.index}"
                )

    # Get the diff index for positions
    start: str = pivot_pos.first_valid_index()
    end: str = pivot_pos.last_valid_index()

    # List of rebal_dates
    rebal_dates = _get_rebal_dates(pivot_pos)

    # rename cols in pivot_pos and pivot_returns so that they match on mul.
    pivot_pos.columns = _replace_strs(pivot_pos.columns, f"_{spos}")
    pivot_returns.columns = _replace_strs(pivot_returns.columns, rstring)
    pivot_pos = pivot_pos[sorted(pivot_pos.columns)]
    pivot_returns = pivot_returns[sorted(pivot_returns.columns)]
    assert pivot_pos.index.name == pivot_returns.index.name == "real_date"
    return_df_cols = pivot_pos.columns.tolist()
    pnl_df = pd.DataFrame(index=pd.bdate_range(start, end), columns=return_df_cols)
    pnl_df.index.name = "real_date"
    return pnl_df, pivot_pos, pivot_returns, rebal_dates


def _pnl_excl_costs(
    df_wide: pd.DataFrame, spos: str, rstring: str, pnle_name: str
) -> pd.DataFrame:
    pnl_df, pivot_pos, pivot_returns, rebal_dates = _prep_dfs_for_pnl_calcs(
        df_wide=df_wide, spos=spos, rstring=rstring
    )
    # Add last end date - as position taken on the last rebal date,
    # is held until notional_positions data is available
    _end = pd.Timestamp(pivot_pos.last_valid_index())
    rebal_dates = sorted(set(rebal_dates + [_end]))

    # Setup prices df
    prices_df: pd.DataFrame = pd.DataFrame(
        data=1.0,
        index=pivot_returns.index,
        columns=pivot_returns.columns,
    )

    # loop between each rebalancing date (month start)
    # there are returns and positions for each date in between and on the rebalancing date
    for dt1, dt2 in zip(rebal_dates[:-1], rebal_dates[1:]):
        # dt1 is the first day of current (new) position
        # dt2 is the next rebalancing date, i.e.  position changes on dt2.
        dt1x = dt1 + pd.offsets.BDay(1)  # first day the current position made returns
        # prices = (1 + pivot_returns.loc[dt1x:dt2] / 100).cumprod(axis=0)
        prices_df.loc[dt1x:dt2] = (1 + pivot_returns.loc[dt1x:dt2] / 100).cumprod()

    # Actual PNL calculation
    pnl_df = (pivot_returns / 100) * pivot_pos.shift(1) * prices_df.shift(1)
    # Drop rows with no pnl
    # nan_count_rows = pnl_df.isna().all(axis=1).sum()
    pnl_df = pnl_df.loc[pnl_df.abs().sum(axis=1) > 0]
    pnl_df.columns = [f"{col}_{spos}_{pnle_name}" for col in pnl_df.columns]
    return pnl_df


def _calculate_trading_costs(
    df_wide: pd.DataFrame,
    spos: str,
    rstring: str,
    transaction_costs: TransactionCosts,
    tc_name: str,
    roll_freq: str = "M",
    bidoffer_name: str = "BIDOFFER",
    rollcost_name: str = "ROLLCOST",
) -> pd.DataFrame:
    pivot_returns, pivot_pos = _split_returns_positions_df(
        df_wide=df_wide, spos=spos, rstring=rstring
    )
    # Per-contract first valid return date - no PnL can be earned before
    # this, so no trading cost should book either. The pos-ticker keys are
    # built from the returns-ticker columns via _replace_strs to mirror
    # the rest of the module's suffix-swap convention. An entirely-NaN
    # return series is rejected upstream in _split_returns_positions_df.
    pos_keys = _replace_strs(pivot_returns.columns, rstring, f"_{spos}")
    first_return = {
        key: pivot_returns[col].first_valid_index()
        for key, col in zip(pos_keys, pivot_returns.columns)
    }
    roll_dates = _generate_roll_dates(pivot_pos.index, roll_freq)
    pivot_pos = _preprocess_positions_for_costs(pivot_pos)
    pivot_pos = pivot_pos.sort_index()

    tickers = pivot_pos.columns.tolist()
    tc_cols = [
        f"{col}_{tc_name}_{cost}"
        for col in tickers
        for cost in (bidoffer_name, rollcost_name)
    ]
    tc_df = pd.DataFrame(data=0.0, index=pivot_pos.index, columns=tc_cols)

    # Trading costs decompose into two passes: bid-offer on every trade, and
    # roll cost on the residual position carried across each roll date.

    # Step 1: Bid-offer cost on the absolute change in position. Positions are
    # flat between rebals, so non-zero deltas only occur on actual trades.
    abs_position_change = pivot_pos.diff().abs()
    for ticker in tickers:
        fid = ticker.replace(f"_{spos}", "")
        bo_col = f"{ticker}_{tc_name}_{bidoffer_name}"
        traded = abs_position_change[ticker][abs_position_change[ticker] > 0]
        for date, trade_size in traded.items():
            bo_pct = transaction_costs.bidoffer(
                trade_size=trade_size, fid=fid, real_date=date
            )
            tc_df.loc[date, bo_col] = trade_size * bo_pct / 100

    # Step 2: Roll cost on the position held across each roll date.
    # Charge min(abs(prev), abs(curr)) when the sign is unchanged from
    # the previous trading day, else zero. shift(1) takes the prior
    # business day's position.
    pos_at_roll = pivot_pos.loc[roll_dates]
    pos_before_roll = pivot_pos.shift(1).loc[roll_dates]

    held_long = (pos_before_roll > 0) & (pos_at_roll > 0)
    held_short = (pos_before_roll < 0) & (pos_at_roll < 0)
    held_position = np.minimum(pos_at_roll.abs(), pos_before_roll.abs()).where(
        held_long | held_short, 0.0
    )
    for ticker in tickers:
        fid = ticker.replace(f"_{spos}", "")
        rc_col = f"{ticker}_{tc_name}_{rollcost_name}"
        rolled: pd.Series = held_position[ticker][held_position[ticker] > 0]
        for date, roll_size in rolled.items():
            rc_pct = transaction_costs.rollcost(
                trade_size=roll_size, fid=fid, real_date=date
            )
            tc_df.loc[date, rc_col] = roll_size * rc_pct / 100

    # NaN-out per-contract costs before the first valid return date - no PnL
    # is earned in that window, so no trading cost is applicable. Rows that
    # end up all-NaN are dropped by the row-sum filter below.
    for ticker in tickers:
        cols = [
            f"{ticker}_{tc_name}_{bidoffer_name}",
            f"{ticker}_{tc_name}_{rollcost_name}",
        ]
        tc_df.loc[tc_df.index < first_return[ticker], cols] = np.nan

    # Sum TICKER_TCOST_BIDOFFER and TICKER_TCOST_ROLLCOST into TICKER_TCOST
    for ticker in tickers:
        tc_df[f"{ticker}_{tc_name}"] = tc_df[
            [
                f"{ticker}_{tc_name}_{bidoffer_name}",
                f"{ticker}_{tc_name}_{rollcost_name}",
            ]
        ].sum(axis=1)

    # Drop rows with no trading costs (also removes the synthetic anchor row)
    # added to the positions df for the opening trade
    tc_df = tc_df.loc[tc_df.abs().sum(axis=1) > 0]
    assert not (tc_df < 0).any().any()
    return tc_df


def _apply_trading_costs(
    pnlx_wide_df: pd.DataFrame,
    tc_wide_df: pd.DataFrame,
    spos: str,
    tc_name: str,
    pnl_name: str,
    pnle_name: str,
    bidoffer_name: str = "BIDOFFER",
    rollcost_name: str = "ROLLCOST",
) -> pd.DataFrame:
    pnls_list = sorted(pnlx_wide_df.columns.tolist())
    tcs_list = sorted(tc_wide_df.columns.tolist())
    # remove all that ends with tc_name_bidoffer or tc_name_rollcost
    filter_endings = (f"_{tc_name}_{bidoffer_name}", f"_{tc_name}_{rollcost_name}")
    tcs_list = [tc for tc in tcs_list if not str(tc).endswith(filter_endings)]
    tcs_list = sorted(set(tcs_list))

    assert len(pnls_list) == len(tcs_list)
    assert set(_replace_strs(pnls_list, f"_{spos}_{pnle_name}")) == set(
        _replace_strs(tcs_list, f"_{spos}_{tc_name}")
    )

    out_df = pnlx_wide_df.copy()
    for pnl_col, tc_col in zip(pnls_list, tcs_list):
        assert pnl_col.replace(f"_{spos}_{pnle_name}", "") == tc_col.replace(
            f"_{spos}_{tc_name}", ""
        )

        out_df[pnl_col] = out_df[pnl_col].sub(tc_wide_df[tc_col], fill_value=0)

    def __rename_pnl(x: str) -> str:
        return str(x).replace(f"_{spos}_{pnle_name}", f"_{spos}_{pnl_name}")

    out_df = out_df.rename(columns=lambda x: __rename_pnl(x))

    return out_df


def _portfolio_sums(
    df_outs: Dict[str, pd.DataFrame],
    spos: str,
    portfolio_name: str,
    pnl_name: str,
    tc_name: str,
    pnle_name: str,
    bidoffer_name: str,
    rollcost_name: str,
) -> Dict[str, pd.DataFrame]:
    """
    Calculate the sum of the PnLs and costs across all contracts in the portfolio
    """
    glb_pnl_incl_costs = df_outs["pnl_incl_costs"].sum(axis=1, skipna=True)
    glb_pnl_excl_costs = df_outs["pnl_excl_costs"].sum(axis=1, skipna=True)

    # Remove all that ends with tc_name_bidoffer or tc_name_rollcost
    filter_endings = (f"_{tc_name}_{bidoffer_name}", f"_{tc_name}_{rollcost_name}")
    tcs_list = [
        tc
        for tc in df_outs["tc_wide"].columns.tolist()
        if not str(tc).endswith(filter_endings)
    ]
    tcs_list = sorted(set(tcs_list))

    # Sum the trading costs
    glb_tcosts = df_outs["tc_wide"].loc[:, tcs_list].sum(axis=1, skipna=True)

    df_outs["pnl_incl_costs"].loc[:, f"{portfolio_name}_{spos}_{pnl_name}"] = (
        glb_pnl_incl_costs
    )

    df_outs["pnl_excl_costs"].loc[:, f"{portfolio_name}_{spos}_{pnle_name}"] = (
        glb_pnl_excl_costs
    )

    df_outs["tc_wide"].loc[:, f"{portfolio_name}_{spos}_{tc_name}"] = glb_tcosts

    return df_outs


[docs]def proxy_pnl_calc(
    df: QuantamentalDataFrame,
    spos: str,
    rstring: str,
    transaction_costs_object: Optional[
        Union[TransactionCosts, TransactionCostsDictAdapter, Dict]
    ],
    roll_freq: Optional[Union[str, Dict]] = None,
    start: Optional[str] = None,
    end: Optional[str] = None,
    blacklist: Optional[Dict] = None,
    portfolio_name: str = "GLB",
    pnl_name: str = "PNL",
    tc_name: str = "TCOST",
    bidoffer_name: str = "BIDOFFER",
    rollcost_name: str = "ROLLCOST",
    return_pnl_excl_costs: bool = False,
    return_costs: bool = False,
    concat_dfs: bool = False,
) -> Union[QuantamentalDataFrame, Tuple[QuantamentalDataFrame, ...]]:
    """
    Calculates an approximate nominal PnL under consideration of transaction costs

    Parameters
    ----------
    df : QuantamentalDataFrame
        standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat',
        'real_date' and 'value'. This dataframe must contain the notional positions and
        related notional return series (for PnL calculations).
    spos : str
        the name of the strategy positions in the dataframe in the format
        "<sname>_<pname>". This must correspond to contract positions in the dataframe,
        which are categories of the format "<cid>_<ctype>_<sname>_<pname>". The strategy
        name <sname> has usually been set by the `contract_signals` function and the string
        for <pname> by the `notional_positions` function.
    rstring : str
        the string that identifies the returns series in the dataframe.
    transaction_costs_object : TransactionCosts or dict
        an initialized TransactionCosts object
        (macrosynergy.pnl.transaction_costs.TransactionCosts) that contains the transaction
        costs data. If the user does not have access to the TransactionCosts object, or does
        not want to use transaction costs, the function can be called with
        `transaction_costs_object=None`. Users can alternatively pass a dictionary of
        static cost parameters, which will be adapted to the TransactionCosts interface.
    roll_freq : str or None
        roll frequency string ("D", "W", "M" or "Q"). `None` defaults to "M".
        Per-fid dict form is reserved for a future release. Should be chosen
        consistently with the `rebal_freq` passed to `notional_positions`.
    start : str
        the start date of the data. Default is None, which means that the start date is
        taken from the dataframe.
    end : str
        the end date of the data. Default is None, which means that the end date is
        taken from the dataframe.
    blacklist : dict
        a dictionary of contract identifiers to exclude from the calculation. Default is
        None, which means that no contracts are excluded.
    portfolio_name : str
        the name of the portfolio. Default is "GLB".
    pnl_name : str
        the name of the PnL (including costs), Default is "PNL". The series for PnL
        excluding costs is named with "...<pnl_name>e". The name is appended with the
        strategy positions name, as "<portfolio_name>_<spos>_<pnl_name>".
    tc_name : str
        the name of the trading costs series. Default is "TCOST".
    bidoffer_name : str
        a sub-component of the trading costs, representing the bid-offer spread. Default
        is "BIDOFFER".
    rollcost_name : str
        a sub-component of the trading costs, representing the roll costs. Default is
        "ROLLCOST".
    return_pnl_excl_costs : bool
        whether to return the PnL excluding costs. Default is False.
    return_costs : bool
        whether to return the trading costs. Default is False.
    concat_dfs : bool
        whether to concatenate the output dataframes. Default is False.

    Notes
    -----
    Transaction costs as % of notional are considered to be a linear function of size,
    with the slope determined by the normal and large positions, if all relevant series
    are applied.

    Bid-offer costs are charged on the absolute change in position from one
    business day to the next, i.e. `abs(position[t] - position[t-1])`. Since
    positions are flat between rebalance dates, non-zero deltas only occur
    on actual rebalance dates.

    Roll costs are charged only on the roll schedule and only on the held
    portion of the position - the part that carries across the roll without
    changing sign. Concretely, on each roll date the held size is
    `min(abs(position_before_roll), abs(position_at_roll))` when both have
    the same sign, and zero on opens, closures, or sign flips. The roll
    schedule is derived from `roll_freq` via `get_eops` and intersected
    with the position-panel index.

    Returns
    -------
    Union[QuantamentalDataFrame, Tuple[QuantamentalDataFrame, ...]
        When either of `return_pnl_excl_costs` or `return_costs` is True, the function
        returns a tuple of the PnL excluding costs, the PnL including costs, and the trading
        costs. Otherwise, it returns the PnL including costs. If `concat_dfs` is True, the
        function concatenates any output dataframes and returns a single dataframe.
    """

    for _varx, _namex, _typex in [
        (spos, "spos", str),
        (
            transaction_costs_object,
            "transaction_costs",
            (TransactionCosts, TransactionCostsDictAdapter, dict, type(None)),
        ),
        (roll_freq, "roll_freq", (str, dict, type(None))),
        (start, "start", (str, type(None))),
        (end, "end", (str, type(None))),
        (blacklist, "blacklist", (dict, type(None))),
    ]:
        if not isinstance(_varx, _typex):
            raise TypeError(f"{_namex} must be {_typex}")

        if _typex in [list, str, dict] and len(_varx) == 0:
            raise ValueError(f"`{_namex}` must not be an empty {str(_typex)}")

    transaction_costs_applied: bool = transaction_costs_object is not None
    warn_str = "No transaction costs object provided. Only PnL excluding costs will be calculated."
    if not transaction_costs_applied:
        return_costs = False
        return_pnl_excl_costs = True
        concat_dfs = False
        warnings.warn(warn_str)

    if isinstance(roll_freq, dict):
        raise NotImplementedError(
            "Per-fid `roll_freq` (dict form) is not yet implemented; pass a single "
            "frequency string or None."
        )
    if roll_freq is None:
        roll_freq = "M"

    df = QuantamentalDataFrame(df)
    _initialized_as_categorical: bool = df.InitializedAsCategorical

    if start is None:
        start = df["real_date"].min().strftime("%Y-%m-%d")
    if end is None:
        end = df["real_date"].max().strftime("%Y-%m-%d")

    # Reduce the dataframe - keep only the txn costs, and the spos xcats
    df = reduce_df(
        df=df,
        start=start,
        end=end,
        blacklist=blacklist,
    )
    _check_df(df=df, spos=spos, rstring=rstring)

    df_wide = df.to_wide()
    _, pos_tickers = _split_returns_positions_tickers(
        tickers=df_wide.columns.tolist(),
        spos=spos,
        rstring=rstring,
    )
    traded_fids = sorted(set(_replace_strs(pos_tickers, f"_{spos}")))
    if isinstance(transaction_costs_object, dict):
        transaction_costs_object = TransactionCostsDictAdapter(
            cost_dict=transaction_costs_object,
            fids=traded_fids,
        )

    pnle_name = pnl_name + "e"

    # Calculate the PnL excluding costs
    df_outs: Dict[str, pd.DataFrame] = {}
    df_outs["pnl_excl_costs"] = _pnl_excl_costs(
        df_wide=df_wide,
        spos=spos,
        rstring=rstring,
        pnle_name=pnle_name,
    )

    # Calculate the trading costs and the PnL including costs
    # These calcs can only be run if transaction_costs_object is provided
    if transaction_costs_applied:
        df_outs["tc_wide"] = _calculate_trading_costs(
            df_wide=df_wide,
            spos=spos,
            rstring=rstring,
            transaction_costs=transaction_costs_object,
            tc_name=tc_name,
            roll_freq=roll_freq,
        )

        df_outs["pnl_incl_costs"] = _apply_trading_costs(
            pnlx_wide_df=df_outs["pnl_excl_costs"],
            tc_wide_df=df_outs["tc_wide"],
            spos=spos,
            tc_name=tc_name,
            pnl_name=pnl_name,
            pnle_name=pnle_name,
        )
    else:
        df_outs["pnl_incl_costs"] = pd.DataFrame()
        df_outs["tc_wide"] = pd.DataFrame()

    df_outs = _portfolio_sums(
        df_outs=df_outs,
        spos=spos,
        portfolio_name=portfolio_name,
        pnl_name=pnl_name,
        tc_name=tc_name,
        pnle_name=pnle_name,
        bidoffer_name=bidoffer_name,
        rollcost_name=rollcost_name,
    )

    # # Convert to QDFs
    for key in df_outs.keys():
        if df_outs[key].empty:
            assert key != "pnl_excl_costs", "PnL excluding costs is empty."
            continue
        df_outs[key] = QuantamentalDataFrame.from_wide(
            df_outs[key], categorical=_initialized_as_categorical
        )

    if not transaction_costs_applied:
        return df_outs["pnl_excl_costs"]

    if concat_dfs:
        if not return_pnl_excl_costs:
            df_outs.pop("pnl_excl_costs")
        if not return_costs:
            df_outs.pop("tc_wide")
        return QuantamentalDataFrame.from_qdf_list(
            list(df_outs.values()), categorical=_initialized_as_categorical
        )

    if not (return_pnl_excl_costs or return_costs):
        return df_outs["pnl_incl_costs"]
    elif return_pnl_excl_costs and return_costs:
        return (
            df_outs["pnl_incl_costs"],
            df_outs["pnl_excl_costs"],
            df_outs["tc_wide"],
        )
    elif return_pnl_excl_costs:
        return df_outs["pnl_incl_costs"], df_outs["pnl_excl_costs"]
    elif return_costs:
        return df_outs["pnl_incl_costs"], df_outs["tc_wide"]


[docs]def plot_pnl(
    df: pd.DataFrame,
    portfolio_name: str = "GLB",
    pnl_name: str = "PNL",
    tc_name: str = "TCOST",
    cumsum: bool = True,
    title: str = "Cumulative PnLs and Costs",
    ylabel: str = "PnL / USD Million",
    xlabel: str = "Real Date",
    hline: Optional[Union[Number, List[Number]]] = 0.0,
    **kwargs,
) -> None:
    """
    Plot the PnLs and costs for the portfolio.

    Parameters
    ----------
    df : pd.DataFrame
        the dataframe containing the PnLs and costs.
    portfolio_name : str
        the name of the portfolio. Default is "GLB".
    pnl_name : str
        the name of the PnL (including costs). Default is "PNL".
    tc_name : str
        the name of the trading costs series. Default is "TCOST".
    cumsum : bool
        whether to plot the cumulative sum of the PnLs and costs. Default is True.
    title : str
        the title of the plot. Default is "Cumulative PnLs and Costs".
    ylabel : str
        the label for the y-axis. Default is "PnL / USD Million".
    xlabel : str
        the label for the x-axis. Default is "Real Date".
    hline : Union[Number, List[Number]]
        the value(s) for the horizontal line(s). Default is 0.0.
    kwargs : dict
        additional keyword arguments for the plot.

    Returns
    -------
    None
        The function plots the PnLs and costs for the portfolio.
    """

    df_wide = QuantamentalDataFrame(df).to_wide()
    df_wide = df_wide.loc[:, df_wide.columns.str.startswith(portfolio_name + "_")]

    # _ewcols = lambda x: df_wide.columns[df_wide.columns.str.endswith(x)].tolist()
    def _endswith_cols(x: str) -> List[str]:
        return df_wide.columns[df_wide.columns.str.endswith(x)].tolist()

    pnl_cols = _endswith_cols(pnl_name)
    pnle_cols = _endswith_cols(pnl_name + "e")
    tc_cols = _endswith_cols(tc_name)

    df_wide = df_wide[pnl_cols + pnle_cols + tc_cols]
    assert len(pnl_cols) == len(pnle_cols) == len(tc_cols) == 1
    if cumsum:
        df_wide[pnl_cols[0]] = df_wide[pnl_cols[0]].cumsum()
        df_wide[pnle_cols[0]] = df_wide[pnle_cols[0]].cumsum()
        df_wide[tc_cols[0]] = df_wide[tc_cols[0]].cumsum()

    qdf = ticker_df_to_qdf(df_wide)
    msv.LinePlot(df=qdf).plot(
        title=title, y_axis_label=ylabel, x_axis_label=xlabel, ax_hline=hline, **kwargs
    )


if __name__ == "__main__":
    import os
    import pickle

    cids_dmca = ["AUD", "CAD", "CHF", "EUR", "GBP", "JPY", "NOK", "NZD", "SEK", "USD"]
    cids_dmec = ["DEM", "ESP", "FRF", "ITL"]
    cids_nofx: List[str] = ["USD", "EUR", "CNY", "SGD"]
    cids_dmfx: List[str] = list(set(cids_dmca) - set(cids_nofx))

    if not os.path.exists("data/txn.obj.pkl"):
        with open("data/txn.obj.pkl", "wb") as f:
            pickle.dump(TransactionCosts.download(), f)

    with open("data/txn.obj.pkl", "rb") as f:
        tx = pickle.load(f)

    dfx = pd.read_pickle("data/dfxn.pkl")

    spos = "STRAT_POS"
    rstring = "XR_NSA"
    df_all = proxy_pnl_calc(
        df=dfx,
        spos=spos,
        rstring=rstring,
        start="2001-01-01",
        end="2020-01-01",
        transaction_costs_object=tx,
        portfolio_name="GLB",
        pnl_name="PNL",
        tc_name="TCOST",
        return_pnl_excl_costs=True,
        return_costs=True,
        # concat_dfs=True,
    )

    # Example: dict-based transaction costs
    pos_tickers = [
        t for t in QuantamentalDataFrame(dfx).list_tickers() if t.endswith(f"_{spos}")
    ]
    traded_fids = sorted(set(_replace_strs(pos_tickers, f"_{spos}")))
    cost_date = tx.change_index[0]
    cost_dict = {}
    for fid in traded_fids:
        row = tx.get_costs(fid=fid, real_date=cost_date)
        if row is None:
            raise ValueError(f"Missing transaction costs for {fid}")
        size = {
            "median": row[f"{fid}SIZE_MEDIAN"],
            "pct90": row[f"{fid}SIZE_90PCTL"],
        }
        cost_dict[fid] = {
            "bid_offer": {
                "size": dict(size),
                "cost": {
                    "median": row[f"{fid}BIDOFFER_MEDIAN"],
                    "pct90": row[f"{fid}BIDOFFER_90PCTL"],
                },
            },
            "rollcost": {
                "size": dict(size),
                "cost": {
                    "median": row[f"{fid}ROLLCOST_MEDIAN"],
                    "pct90": row[f"{fid}ROLLCOST_90PCTL"],
                },
            },
        }

    df_all_dict = proxy_pnl_calc(
        df=dfx,
        spos=spos,
        rstring=rstring,
        start="2001-01-01",
        end="2020-01-01",
        transaction_costs_object=cost_dict,
        portfolio_name="GLB",
        pnl_name="PNL",
        tc_name="TCOST",
        return_pnl_excl_costs=True,
        return_costs=True,
        # concat_dfs=True,
    )

    plot_pnl(df=df_all)