Source code for macrosynergy.pnl.proxy_pnl_calc

"""
Module for calculating an approximate nominal PnL under consideration of transaction costs.
"""

import pandas as pd
from typing import List, Union, Tuple, Optional, Dict
from numbers import Number
import warnings
import macrosynergy.visuals as msv
from macrosynergy.management.utils import (
    reduce_df,
    ticker_df_to_qdf,
)
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.pnl.transaction_costs import (
    TransactionCosts,
    TransactionCostsDictAdapter,
)


def _replace_strs(
    list_of_strs: List[str], old_str: str, new_str: str = ""
) -> List[str]:
    return [ticker.replace(old_str, new_str) for ticker in list_of_strs]


def _split_returns_positions_tickers(
    tickers: List[str], spos: str, rstring: str
) -> Tuple[List[str], List[str]]:
    # Filter tickers based on the specific suffixes
    returns_tickers: List[str] = [
        ticker for ticker in tickers if ticker.endswith(rstring)
    ]
    positions_tickers: List[str] = [
        ticker for ticker in tickers if ticker.endswith(spos)
    ]

    set_returns = set(_replace_strs(returns_tickers, rstring))
    set_positions = set(_replace_strs(positions_tickers, f"_{spos}"))
    # assert len(set_positions - set_returns) == len(set_returns - set_positions) == 0
    positions_wo_returns = set_positions - set_returns
    returns_wo_positions = set_returns - set_positions
    if (len(positions_wo_returns) + len(returns_wo_positions)) > 0:
        err_msg = "The following tickers are missing in the dataframe: \n"
        positions_wo_returns = sorted(positions_wo_returns)
        positions_wo_returns = list(map(lambda x: x + f"_{spos}", positions_wo_returns))
        returns_wo_positions = sorted(returns_wo_positions)
        returns_wo_positions = list(map(lambda x: x + rstring, returns_wo_positions))

        if positions_wo_returns:
            err_msg += f"Positions without returns: {positions_wo_returns} \n"
        if returns_wo_positions:
            err_msg += f"Returns without positions: {returns_wo_positions} \n"
        err_msg += "Please check the tickers in the dataframe."
        raise ValueError(err_msg)

    returns_tickers: List[str] = [
        ticker.replace(f"_{spos}", rstring) for ticker in positions_tickers
    ]

    return returns_tickers, positions_tickers


def _check_df(df: QuantamentalDataFrame, spos: str, rstring: str) -> None:
    if not isinstance(df, QuantamentalDataFrame):
        raise TypeError("Input must be a pandas DataFrame.")

    returns_tickers, positions_tickers = _split_returns_positions_tickers(
        tickers=QuantamentalDataFrame(df).list_tickers(),
        spos=spos,
        rstring=rstring,
    )

    err_msg = "The following tickers are missing in the dataframe: \n"
    missing_tickers = []
    for ticker in returns_tickers:
        if ticker.replace(rstring, f"_{spos}") not in positions_tickers:
            missing_tickers.append(ticker)
    for ticker in positions_tickers:
        if ticker.replace(f"_{spos}", rstring) not in returns_tickers:
            missing_tickers.append(ticker)

    if missing_tickers:
        raise ValueError(err_msg + ", ".join(missing_tickers))


def _split_returns_positions_df(
    df_wide: pd.DataFrame, spos: str, rstring: str
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    # Filter tickers based on the specific suffixes
    returns_tickers, positions_tickers = _split_returns_positions_tickers(
        tickers=df_wide.columns.tolist(),
        spos=spos,
        rstring=rstring,
    )

    # Pivot the dataframes
    pivot_returns: pd.DataFrame = df_wide.loc[:, returns_tickers]
    pivot_pos: pd.DataFrame = df_wide.loc[:, positions_tickers]

    assert set(_replace_strs(pivot_returns.columns, rstring)) == set(
        _replace_strs(pivot_pos.columns, f"_{spos}")
    )

    return pivot_returns, pivot_pos


def _get_rebal_dates(df_wide: pd.DataFrame) -> List[pd.Timestamp]:
    # get the diff along long axis
    df_diff = df_wide.diff(axis=0)

    # change_index -- where there is any value change across rows
    change_index = df_diff.index[(df_diff.abs() > 0).any(axis=1)]

    # rows where the previous row was all NaN
    # but the current row has at least one non-NaN value
    prev_all_na = df_wide.shift(1).isna().all(axis=1)
    curr_any_value = df_wide.notna().any(axis=1)
    from_na_to_value_index = df_wide.index[prev_all_na & curr_any_value]

    # combine indices
    combined_index = change_index.union(from_na_to_value_index)
    rebal_dates = sorted(combined_index.tolist())
    return rebal_dates


def _warn_and_drop_nans(df_wide: pd.DataFrame) -> pd.DataFrame:
    # get rows that are all nans
    all_nan_rows = df_wide.loc[df_wide.isna().all(axis=1)]
    wrn = "Warning: The following {idx} are all NaNs and have been dropped: {lst}"
    if not all_nan_rows.empty:
        warnings.warn(wrn.format(idx="rows", lst=all_nan_rows.index))
        df_wide = df_wide.dropna(how="all")

    all_nan_cols = df_wide.loc[:, df_wide.isna().all(axis=0)]
    if not all_nan_cols.empty:
        warnings.warn(wrn.format(idx="columns", lst=all_nan_cols.columns))
        df_wide = df_wide.dropna(how="all", axis=1)

    return df_wide


def _prep_dfs_for_pnl_calcs(
    df_wide: QuantamentalDataFrame,
    spos: str,
    rstring: str,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[pd.Timestamp]]:

    # Split the returns and positions dataframes
    pivot_returns, pivot_pos = _split_returns_positions_df(
        df_wide=df_wide, spos=spos, rstring=rstring
    )

    # warn about NAs
    dfx: pd.DataFrame
    for dfx, dfname in [(pivot_returns, "returns"), (pivot_pos, "positions")]:
        # for each column warns for dates of nas
        for col in dfx.columns:
            nas_idx = (
                dfx[col]
                .loc[dfx[col].isna()]
                .loc[dfx[col].first_valid_index() : dfx[col].last_valid_index()]
            )
            if not nas_idx.empty:
                warnings.warn(
                    f"Warning: Series {col} has NAs at the following dates: {nas_idx.index}"
                )

    # Get the diff index for positions
    start: str = pivot_pos.first_valid_index()
    end: str = pivot_pos.last_valid_index()

    # List of rebal_dates
    rebal_dates = _get_rebal_dates(pivot_pos)

    # rename cols in pivot_pos and pivot_returns so that they match on mul.
    pivot_pos.columns = _replace_strs(pivot_pos.columns, f"_{spos}")
    pivot_returns.columns = _replace_strs(pivot_returns.columns, rstring)
    pivot_pos = pivot_pos[sorted(pivot_pos.columns)]
    pivot_returns = pivot_returns[sorted(pivot_returns.columns)]
    assert pivot_pos.index.name == pivot_returns.index.name == "real_date"
    return_df_cols = pivot_pos.columns.tolist()
    pnl_df = pd.DataFrame(index=pd.bdate_range(start, end), columns=return_df_cols)
    pnl_df.index.name = "real_date"
    return pnl_df, pivot_pos, pivot_returns, rebal_dates


def _pnl_excl_costs(
    df_wide: pd.DataFrame, spos: str, rstring: str, pnle_name: str
) -> pd.DataFrame:

    pnl_df, pivot_pos, pivot_returns, rebal_dates = _prep_dfs_for_pnl_calcs(
        df_wide=df_wide, spos=spos, rstring=rstring
    )
    # Add last end date - as position taken on the last rebal date,
    # is held until notional_positions data is available
    _end = pd.Timestamp(pivot_pos.last_valid_index())
    rebal_dates = sorted(set(rebal_dates + [_end]))

    # Setup prices df
    prices_df: pd.DataFrame = pd.DataFrame(
        data=1.0,
        index=pivot_returns.index,
        columns=pivot_returns.columns,
    )

    # loop between each rebalancing date (month start)
    # there are returns and positions for each date in between and on the rebalancing date
    for dt1, dt2 in zip(rebal_dates[:-1], rebal_dates[1:]):
        # dt1 is the first day of current (new) position
        # dt2 is the next rebalancing date, i.e.  position changes on dt2.
        dt1x = dt1 + pd.offsets.BDay(1)  # first day the current position made returns
        # prices = (1 + pivot_returns.loc[dt1x:dt2] / 100).cumprod(axis=0)
        prices_df.loc[dt1x:dt2] = (1 + pivot_returns.loc[dt1x:dt2] / 100).cumprod()

    # Actual PNL calculation
    pnl_df = (pivot_returns / 100) * pivot_pos.shift(1) * prices_df.shift(1)
    # Drop rows with no pnl
    # nan_count_rows = pnl_df.isna().all(axis=1).sum()
    pnl_df = pnl_df.loc[pnl_df.abs().sum(axis=1) > 0]
    pnl_df.columns = [f"{col}_{spos}_{pnle_name}" for col in pnl_df.columns]
    return pnl_df


def _calculate_trading_costs(
    df_wide: pd.DataFrame,
    spos: str,
    rstring: str,
    transaction_costs: TransactionCosts,
    tc_name: str,
    bidoffer_name: str = "BIDOFFER",
    rollcost_name: str = "ROLLCOST",
) -> pd.DataFrame:

    pivot_returns, pivot_pos = _split_returns_positions_df(
        df_wide=df_wide, spos=spos, rstring=rstring
    )
    rebal_dates = _get_rebal_dates(pivot_pos)
    # Add last end date - as position taken on the last rebal date,
    # is held until notional_positions data is available
    _end = pd.Timestamp(pivot_pos.last_valid_index())
    rebal_dates = sorted(set(rebal_dates + [_end]))
    pos_cols = pivot_pos.columns.tolist()
    tc_cols = [
        f"{col}_{tc_name}_{cost_type}"
        for col in pos_cols
        for cost_type in [bidoffer_name, rollcost_name]
    ]
    # Create a dataframe to store the trading costs with all 0s
    tc_df = pd.DataFrame(data=0.0, index=pivot_pos.index, columns=tc_cols)

    tickers = pivot_pos.columns.tolist()

    ## Taking the 1st position
    ## Here, only the bidoffer is considered, as there is nothing to roll
    first_pos = pivot_pos.loc[rebal_dates[0]]
    for ticker in tickers:
        _fid = ticker.replace(f"_{spos}", "")
        bidoffer = transaction_costs.bidoffer(
            trade_size=first_pos[ticker],
            fid=_fid,
            real_date=rebal_dates[0],
        )
        # Add a 0 for rollcost
        tc_df.loc[rebal_dates[0], f"{ticker}_{tc_name}_{rollcost_name}"] = 0
        tc_df.loc[rebal_dates[0], f"{ticker}_{tc_name}_{bidoffer_name}"] = (
            bidoffer / 100
        )

    for ix, (dt1, dt2) in enumerate(zip(rebal_dates[:-1], rebal_dates[1:])):
        dt2x = dt2 - pd.offsets.BDay(1)
        prev_pos, next_pos = pivot_pos.loc[dt1], pivot_pos.loc[dt2]
        curr_pos = pivot_pos.loc[dt1:dt2x]
        avg_pos: pd.Series = curr_pos.abs().mean(axis=0)
        delta_pos = (next_pos - prev_pos).abs()
        for ticker in tickers:
            _fid = ticker.replace(f"_{spos}", "")
            _rcn = f"{ticker}_{tc_name}_{rollcost_name}"
            _bon = f"{ticker}_{tc_name}_{bidoffer_name}"
            rollcost = transaction_costs.rollcost(
                trade_size=avg_pos[ticker],
                fid=_fid,
                real_date=dt2,
            )
            bidoffer = transaction_costs.bidoffer(
                trade_size=delta_pos[ticker],
                fid=_fid,
                real_date=dt2,
            )
            # delta_pos and avg_pos are already in absolute terms
            tc_df.loc[dt2, _rcn] = avg_pos[ticker] * rollcost / 100
            tc_df.loc[dt2, _bon] = delta_pos[ticker] * bidoffer / 100

    # Sum TICKER_TCOST_BIDOFFER and TICKER_TCOST_ROLLCOST into TICKER_TCOST
    for ticker in tickers:
        tc_df[f"{ticker}_{tc_name}"] = tc_df[
            [
                f"{ticker}_{tc_name}_{bidoffer_name}",
                f"{ticker}_{tc_name}_{rollcost_name}",
            ]
        ].sum(axis=1)

    # Drop rows with no trading costs
    tc_df = tc_df.loc[tc_df.abs().sum(axis=1) > 0]

    # check that remaining dates are part of rebal_dates
    assert set(tc_df.index) <= set(rebal_dates)
    assert not (tc_df < 0).any().any()

    return tc_df


def _apply_trading_costs(
    pnlx_wide_df: pd.DataFrame,
    tc_wide_df: pd.DataFrame,
    spos: str,
    tc_name: str,
    pnl_name: str,
    pnle_name: str,
    bidoffer_name: str = "BIDOFFER",
    rollcost_name: str = "ROLLCOST",
) -> pd.DataFrame:
    pnls_list = sorted(pnlx_wide_df.columns.tolist())
    tcs_list = sorted(tc_wide_df.columns.tolist())
    # remove all that ends with tc_name_bidoffer or tc_name_rollcost
    filter_endings = (f"_{tc_name}_{bidoffer_name}", f"_{tc_name}_{rollcost_name}")
    tcs_list = [tc for tc in tcs_list if not str(tc).endswith(filter_endings)]
    tcs_list = sorted(set(tcs_list))

    assert len(pnls_list) == len(tcs_list)
    assert set(_replace_strs(pnls_list, f"_{spos}_{pnle_name}")) == set(
        _replace_strs(tcs_list, f"_{spos}_{tc_name}")
    )

    out_df = pnlx_wide_df.copy()
    for pnl_col, tc_col in zip(pnls_list, tcs_list):
        assert pnl_col.replace(f"_{spos}_{pnle_name}", "") == tc_col.replace(
            f"_{spos}_{tc_name}", ""
        )

        out_df[pnl_col] = out_df[pnl_col].sub(tc_wide_df[tc_col], fill_value=0)

    def __rename_pnl(x: str) -> str:
        return str(x).replace(f"_{spos}_{pnle_name}", f"_{spos}_{pnl_name}")

    out_df = out_df.rename(columns=lambda x: __rename_pnl(x))

    return out_df


def _portfolio_sums(
    df_outs: Dict[str, pd.DataFrame],
    spos: str,
    portfolio_name: str,
    pnl_name: str,
    tc_name: str,
    pnle_name: str,
    bidoffer_name: str,
    rollcost_name: str,
) -> Dict[str, pd.DataFrame]:
    """
    Calculate the sum of the PnLs and costs across all contracts in the portfolio
    """
    glb_pnl_incl_costs = df_outs["pnl_incl_costs"].sum(axis=1, skipna=True)
    glb_pnl_excl_costs = df_outs["pnl_excl_costs"].sum(axis=1, skipna=True)

    # Remove all that ends with tc_name_bidoffer or tc_name_rollcost
    filter_endings = (f"_{tc_name}_{bidoffer_name}", f"_{tc_name}_{rollcost_name}")
    tcs_list = [
        tc
        for tc in df_outs["tc_wide"].columns.tolist()
        if not str(tc).endswith(filter_endings)
    ]
    tcs_list = sorted(set(tcs_list))

    # Sum the trading costs
    glb_tcosts = df_outs["tc_wide"].loc[:, tcs_list].sum(axis=1, skipna=True)

    df_outs["pnl_incl_costs"].loc[
        :, f"{portfolio_name}_{spos}_{pnl_name}"
    ] = glb_pnl_incl_costs

    df_outs["pnl_excl_costs"].loc[
        :, f"{portfolio_name}_{spos}_{pnle_name}"
    ] = glb_pnl_excl_costs

    df_outs["tc_wide"].loc[:, f"{portfolio_name}_{spos}_{tc_name}"] = glb_tcosts

    return df_outs


[docs]def proxy_pnl_calc( df: QuantamentalDataFrame, spos: str, rstring: str, transaction_costs_object: Optional[ Union[TransactionCosts, TransactionCostsDictAdapter, Dict] ], roll_freqs: Optional[Dict] = None, start: Optional[str] = None, end: Optional[str] = None, blacklist: Optional[Dict] = None, portfolio_name: str = "GLB", pnl_name: str = "PNL", tc_name: str = "TCOST", bidoffer_name: str = "BIDOFFER", rollcost_name: str = "ROLLCOST", return_pnl_excl_costs: bool = False, return_costs: bool = False, concat_dfs: bool = False, ) -> Union[QuantamentalDataFrame, Tuple[QuantamentalDataFrame, ...]]: """ Calculates an approximate nominal PnL under consideration of transaction costs Parameters ---------- df : QuantamentalDataFrame standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat', 'real_date' and 'value'. This dataframe must contain the notional positions and related notional return series (for PnL calculations). spos : str the name of the strategy positions in the dataframe in the format "<sname>_<pname>". This must correspond to contract positions in the dataframe, which are categories of the format "<cid>_<ctype>_<sname>_<pname>". The strategy name <sname> has usually been set by the `contract_signals` function and the string for <pname> by the `notional_positions` function. rstring : str the string that identifies the returns series in the dataframe. transaction_costs_object : TransactionCosts or dict an initialized TransactionCosts object (macrosynergy.pnl.transaction_costs.TransactionCosts) that contains the transaction costs data. If the user does not have access to the TransactionCosts object, or does not want to use transaction costs, the function can be called with `transaction_costs_object=None`. Users can alternatively pass a dictionary of static cost parameters, which will be adapted to the TransactionCosts interface. roll_freqs : dict dictionary of roll frequencies for each contract type. This must use the contract types as keys and frequency string ("w", "m", or "q") as values. The default frequency for all contracts not in the dictionary is "m" for monthly. Default is None: all contracts are rolled monthly. start : str the start date of the data. Default is None, which means that the start date is taken from the dataframe. end : str the end date of the data. Default is None, which means that the end date is taken from the dataframe. blacklist : dict a dictionary of contract identifiers to exclude from the calculation. Default is None, which means that no contracts are excluded. portfolio_name : str the name of the portfolio. Default is "GLB". pnl_name : str the name of the PnL (including costs), Default is "PNL". The series for PnL excluding costs is named with "...<pnl_name>e". The name is appended with the strategy positions name, as "<portfolio_name>_<spos>_<pnl_name>". tc_name : str the name of the trading costs series. Default is "TCOST". bidoffer_name : str a sub-component of the trading costs, representing the bid-offer spread. Default is "BIDOFFER". rollcost_name : str a sub-component of the trading costs, representing the roll costs. Default is "ROLLCOST". return_pnl_excl_costs : bool whether to return the PnL excluding costs. Default is False. return_costs : bool whether to return the trading costs. Default is False. concat_dfs : bool whether to concatenate the output dataframes. Default is False. Notes ----- Transaction costs as % of notional are considered to be a linear function of size, with the slope determined by the normal and large positions, if all relevant series are applied. Returns ------- Union[QuantamentalDataFrame, Tuple[QuantamentalDataFrame, ...] When either of `return_pnl_excl_costs` or `return_costs` is True, the function returns a tuple of the PnL excluding costs, the PnL including costs, and the trading costs. Otherwise, it returns the PnL including costs. If `concat_dfs` is True, the function concatenates any output dataframes and returns a single dataframe. """ for _varx, _namex, _typex in [ (spos, "spos", str), ( transaction_costs_object, "transaction_costs", (TransactionCosts, TransactionCostsDictAdapter, dict, type(None)), ), (roll_freqs, "roll_freqs", (dict, type(None))), (start, "start", (str, type(None))), (end, "end", (str, type(None))), (blacklist, "blacklist", (dict, type(None))), ]: if not isinstance(_varx, _typex): raise TypeError(f"{_namex} must be {_typex}") if _typex in [list, str, dict] and len(_varx) == 0: raise ValueError(f"`{_namex}` must not be an empty {str(_typex)}") transaction_costs_applied: bool = transaction_costs_object is not None warn_str = "No transaction costs object provided. Only PnL excluding costs will be calculated." if not transaction_costs_applied: return_costs = False return_pnl_excl_costs = True concat_dfs = False warnings.warn(warn_str) if roll_freqs is not None: raise NotImplementedError( "Functionality to support `roll_freqs` is not yet implemented." ) df = QuantamentalDataFrame(df) _initialized_as_categorical: bool = df.InitializedAsCategorical if start is None: start = df["real_date"].min().strftime("%Y-%m-%d") if end is None: end = df["real_date"].max().strftime("%Y-%m-%d") # Reduce the dataframe - keep only the txn costs, and the spos xcats df = reduce_df( df=df, start=start, end=end, blacklist=blacklist, ) _check_df(df=df, spos=spos, rstring=rstring) df_wide = df.to_wide() _, pos_tickers = _split_returns_positions_tickers( tickers=df_wide.columns.tolist(), spos=spos, rstring=rstring, ) traded_fids = sorted(set(_replace_strs(pos_tickers, f"_{spos}"))) if isinstance(transaction_costs_object, dict): transaction_costs_object = TransactionCostsDictAdapter( cost_dict=transaction_costs_object, fids=traded_fids, ) pnle_name = pnl_name + "e" # Calculate the PnL excluding costs df_outs: Dict[str, pd.DataFrame] = {} df_outs["pnl_excl_costs"] = _pnl_excl_costs( df_wide=df_wide, spos=spos, rstring=rstring, pnle_name=pnle_name, ) # Calculate the trading costs and the PnL including costs # These calcs can only be run if transaction_costs_object is provided if transaction_costs_applied: df_outs["tc_wide"] = _calculate_trading_costs( df_wide=df_wide, spos=spos, rstring=rstring, transaction_costs=transaction_costs_object, tc_name=tc_name, ) df_outs["pnl_incl_costs"] = _apply_trading_costs( pnlx_wide_df=df_outs["pnl_excl_costs"], tc_wide_df=df_outs["tc_wide"], spos=spos, tc_name=tc_name, pnl_name=pnl_name, pnle_name=pnle_name, ) else: df_outs["pnl_incl_costs"] = pd.DataFrame() df_outs["tc_wide"] = pd.DataFrame() df_outs = _portfolio_sums( df_outs=df_outs, spos=spos, portfolio_name=portfolio_name, pnl_name=pnl_name, tc_name=tc_name, pnle_name=pnle_name, bidoffer_name=bidoffer_name, rollcost_name=rollcost_name, ) # # Convert to QDFs for key in df_outs.keys(): if df_outs[key].empty: assert key != "pnl_excl_costs", "PnL excluding costs is empty." continue df_outs[key] = QuantamentalDataFrame.from_wide( df_outs[key], categorical=_initialized_as_categorical ) if not transaction_costs_applied: return df_outs["pnl_excl_costs"] if concat_dfs: if not return_pnl_excl_costs: df_outs.pop("pnl_excl_costs") if not return_costs: df_outs.pop("tc_wide") return QuantamentalDataFrame.from_qdf_list( list(df_outs.values()), categorical=_initialized_as_categorical ) if not (return_pnl_excl_costs or return_costs): return df_outs["pnl_incl_costs"] elif return_pnl_excl_costs and return_costs: return ( df_outs["pnl_incl_costs"], df_outs["pnl_excl_costs"], df_outs["tc_wide"], ) elif return_pnl_excl_costs: return df_outs["pnl_incl_costs"], df_outs["pnl_excl_costs"] elif return_costs: return df_outs["pnl_incl_costs"], df_outs["tc_wide"]
[docs]def plot_pnl( df: pd.DataFrame, portfolio_name: str = "GLB", pnl_name: str = "PNL", tc_name: str = "TCOST", cumsum: bool = True, title: str = "Cumulative PnLs and Costs", ylabel: str = "PnL / USD Million", xlabel: str = "Real Date", hline: Optional[Union[Number, List[Number]]] = 0.0, **kwargs, ) -> None: """ Plot the PnLs and costs for the portfolio. Parameters ---------- df : pd.DataFrame the dataframe containing the PnLs and costs. portfolio_name : str the name of the portfolio. Default is "GLB". pnl_name : str the name of the PnL (including costs). Default is "PNL". tc_name : str the name of the trading costs series. Default is "TCOST". cumsum : bool whether to plot the cumulative sum of the PnLs and costs. Default is True. title : str the title of the plot. Default is "Cumulative PnLs and Costs". ylabel : str the label for the y-axis. Default is "PnL / USD Million". xlabel : str the label for the x-axis. Default is "Real Date". hline : Union[Number, List[Number]] the value(s) for the horizontal line(s). Default is 0.0. kwargs : dict additional keyword arguments for the plot. Returns ------- None The function plots the PnLs and costs for the portfolio. """ df_wide = QuantamentalDataFrame(df).to_wide() df_wide = df_wide.loc[:, df_wide.columns.str.startswith(portfolio_name + "_")] # _ewcols = lambda x: df_wide.columns[df_wide.columns.str.endswith(x)].tolist() def _endswith_cols(x: str) -> List[str]: return df_wide.columns[df_wide.columns.str.endswith(x)].tolist() pnl_cols = _endswith_cols(pnl_name) pnle_cols = _endswith_cols(pnl_name + "e") tc_cols = _endswith_cols(tc_name) df_wide = df_wide[pnl_cols + pnle_cols + tc_cols] assert len(pnl_cols) == len(pnle_cols) == len(tc_cols) == 1 if cumsum: df_wide[pnl_cols[0]] = df_wide[pnl_cols[0]].cumsum() df_wide[pnle_cols[0]] = df_wide[pnle_cols[0]].cumsum() df_wide[tc_cols[0]] = df_wide[tc_cols[0]].cumsum() qdf = ticker_df_to_qdf(df_wide) msv.LinePlot(df=qdf).plot( title=title, y_axis_label=ylabel, x_axis_label=xlabel, ax_hline=hline, **kwargs )
if __name__ == "__main__": import os import pickle cids_dmca = ["AUD", "CAD", "CHF", "EUR", "GBP", "JPY", "NOK", "NZD", "SEK", "USD"] cids_dmec = ["DEM", "ESP", "FRF", "ITL"] cids_nofx: List[str] = ["USD", "EUR", "CNY", "SGD"] cids_dmfx: List[str] = list(set(cids_dmca) - set(cids_nofx)) if not os.path.exists("data/txn.obj.pkl"): with open("data/txn.obj.pkl", "wb") as f: pickle.dump(TransactionCosts.download(), f) with open("data/txn.obj.pkl", "rb") as f: tx = pickle.load(f) dfx = pd.read_pickle("data/dfxn.pkl") spos = "STRAT_POS" rstring = "XR_NSA" df_all = proxy_pnl_calc( df=dfx, spos=spos, rstring=rstring, start="2001-01-01", end="2020-01-01", transaction_costs_object=tx, portfolio_name="GLB", pnl_name="PNL", tc_name="TCOST", return_pnl_excl_costs=True, return_costs=True, # concat_dfs=True, ) # Example: dict-based transaction costs pos_tickers = [ t for t in QuantamentalDataFrame(dfx).list_tickers() if t.endswith(f"_{spos}") ] traded_fids = sorted(set(_replace_strs(pos_tickers, f"_{spos}"))) cost_date = tx.change_index[0] cost_dict = {} for fid in traded_fids: row = tx.get_costs(fid=fid, real_date=cost_date) if row is None: raise ValueError(f"Missing transaction costs for {fid}") cost_dict[fid] = { "median_cost": row[f"{fid}BIDOFFER_MEDIAN"], "median_size": row[f"{fid}SIZE_MEDIAN"], "pct90_cost": row[f"{fid}BIDOFFER_90PCTL"], "pct90_size": row[f"{fid}SIZE_90PCTL"], } df_all_dict = proxy_pnl_calc( df=dfx, spos=spos, rstring=rstring, start="2001-01-01", end="2020-01-01", transaction_costs_object=cost_dict, portfolio_name="GLB", pnl_name="PNL", tc_name="TCOST", return_pnl_excl_costs=True, return_costs=True, # concat_dfs=True, ) plot_pnl(df=df_all)