import pandas as pd
%matplotlib inline
pd.options.display.float_format = "{:,.3f}".format


def display_me(df, num, name=None):
    print("")
    print(name)
    print(f'(First {num} rows of data)')
    display(df.head(num))
    print("")
    print(f'(Last {num} rows of data)')
    display(df.tail(num))
    print("")


def display_rows(display_list, num_cols, title):
    print("........" * num_cols)
    print(f"{title.upper()}:")
    current = 0
    length = len(display_list)
    num_rows = round(length / num_cols)
    for i in range(num_rows + 1):
        print('\t'.join([str(x) for x in  list(display_list[current:current+num_cols])]))
        current = current + num_cols
    print("........" * num_cols)


dji = pd.read_csv("dji.csv", parse_dates = ['Date'], index_col = 'Date')
constituents = pd.read_csv('prices.csv', parse_dates = ['Date'], index_col = "Date")
print("")
print("DOW JONES CONSTITUENTS")
display(constituents.head(5))
print("")
print("DOW JONES INDEX")
display(dji.head(5))
print("")

DOW JONES CONSTITUENTS

DOW JONES INDEX


print("")
print("CONSTITUENTS.INFO()")
display(constituents.info())
print("")
print("DJI.INFO()")
display(dji.info())
print("")

CONSTITUENTS.INFO()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3313 entries, 2007-01-03 to 2020-03-02
Data columns (total 30 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL    3313 non-null   float64
 1   AMGN    3313 non-null   float64
 2   AXP     3313 non-null   float64
 3   BA      3313 non-null   float64
 4   CAT     3313 non-null   float64
 5   CRM     3313 non-null   float64
 6   CSCO    3313 non-null   float64
 7   CVX     3313 non-null   float64
 8   DIS     3313 non-null   float64
 9   DOW     240 non-null    float64
 10  GS      3313 non-null   float64
 11  HD      3313 non-null   float64
 12  HON     3313 non-null   float64
 13  IBM     3313 non-null   float64
 14  INTC    3313 non-null   float64
 15  JNJ     3313 non-null   float64
 16  JPM     3313 non-null   float64
 17  KO      3313 non-null   float64
 18  MCD     3313 non-null   float64
 19  MMM     3313 non-null   float64
 20  MRK     3313 non-null   float64
 21  MSFT    3313 non-null   float64
 22  NKE     3313 non-null   float64
 23  PG      3313 non-null   float64
 24  TRV     3313 non-null   float64
 25  UNH     3313 non-null   float64
 26  V       3009 non-null   float64
 27  VZ      3313 non-null   float64
 28  WBA     3313 non-null   float64
 29  WMT     3313 non-null   float64
dtypes: float64(30)
memory usage: 802.4 KB

None

DJI.INFO()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3333 entries, 2007-01-03 to 2020-03-30
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       3333 non-null   float64
 1   High       3333 non-null   float64
 2   Low        3333 non-null   float64
 3   Close      3333 non-null   float64
 4   Adj Close  3333 non-null   float64
 5   Volume     3333 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 182.3 KB

None


dji = dji.Close.to_frame().copy()
dji.head(5)


df = pd.concat([constituents, dji], axis = 1)
df.rename(columns = {"Close": "DJI"}, inplace=True)
display_me(df, 5, "Concatenated Data")

Concatenated Data
(First 5 rows of data)

(Last 5 rows of data)


info = pd.read_csv("dowjones.csv")
info.head(5)


import matplotlib.pyplot as plt
import numpy as np
plt.style.use("ggplot")


df.plot(figsize = (14, 8), fontsize = 13, title='30 Dow Jones Constituents', secondary_y = "DJI");


df.div(df.iloc[0]).head(5)


normalized = norm = df.div(df.iloc[0]).mul(100)
normalized.head(5)


normalized.plot(figsize = (13, 7), fontsize = 13, title =  '30 Dow Jones Constituents Normalized')
plt.legend(fontsize = 10)
plt.show()


normalized.drop(columns = ['AAPL', 'CRM', 'DIS']).plot(figsize = (13, 7), fontsize = 13, title =  '30 Dow Jones Constituents (minus top 3)', linewidth=0.75)
plt.legend(fontsize = 10)
plt.show()


daily_returns = df.pct_change()
daily_returns.head(5)


daily_returns.describe()


def summary_annualized(returns):
    summary = returns.agg(['mean', 'std']).T
    summary['return'] = summary["mean"] * 252
    summary['risk'] = summary['std'] * np.sqrt(252)
    summary.drop(columns = ['mean', 'std'], inplace = True)
    return summary


summary = summary_annualized(daily_returns)
summary = summary.merge(info.loc[:, ["ticker", "company"]],
                       how = 'left', left_index = True, 
                       right_on = "ticker")

# Making company the index column
summary.set_index("company", inplace = True)

# Replacing NaN in the company name for the Dow Jones Index
summary.rename(index = {np.nan: "Dow Jones"}, inplace = True)
display_me(summary, 5, "SUMMARY")

SUMMARY
(First 5 rows of data)

(Last 5 rows of data)


summary2 = summary.drop(index = "Dow")
summary2.sort_values(by = "return", ascending = False).plot(kind = 'bar', 
                                                            figsize = (13, 7),
                                                            fontsize = 12,
                                                            title = 'DJI Constituents: Annualized Returns and Risks by Returns', rot = 75);
plt.legend(fontsize=13);


summary2.sort_values(by="risk", ascending = False).plot(kind = "barh", 
                                                        figsize = (12, 9),
                                                        fontsize = 12,
                                                        title = 'DJI Constituents: Annualized Returns and Risks by Risk', rot = 29);
plt.legend(fontsize = 12);


summary2.set_index('ticker', inplace = True)


summary2.plot(kind = 'scatter', x = "risk", y = 'return',
              figsize = (15, 10), s = 60, c = 'return',
              cmap = 'viridis',
              fontsize = 13)

for i in summary2.index:
    plt.annotate(i, xy = (summary2.loc[i, "risk"] + 0.02, 
                 summary2.loc[i, "return"] + 0.02),
                 size = 11)
    plt.xlabel('Annualized Risk STD', fontsize = 13)
    plt.ylabel('Annualized Return', fontsize = 13)
    plt.title("Risk / Return", fontsize = 22)


import seaborn as sns
correlation_matrix = daily_returns.corr()
plt.figure(figsize = (20, 12))
sns.heatmap(correlation_matrix, cmap = "viridis", annot = True, 
            vmin = 0.2, vmax = 0.8);
plt.title("Dow Jones Constituents: Correlation Matrix", fontsize=24, pad = 20);


df2 = df.loc['2017':'2018'].copy()
normalize2 = df2.div(df2.iloc[0]).mul(100)
display_me(normalize2, 5, "NORMALIZED TESTING DATA")

NORMALIZED TESTING DATA
(First 5 rows of data)

(Last 5 rows of data)


daily_returns2 = daily_returns.loc['2017': '2018'].copy()
return_difference = daily_returns2.sub(daily_returns2.DJI, axis = 0)
return_difference.head(5)


return_difference.agg(['mean', 'std']).T.head(5)


def tracking_annualized(returns):
    summary = returns.agg(['mean', 'std']).T
    summary['premium'] = summary['mean'] * 252
    summary['tracking_error'] = summary['std'] * np.sqrt(252)
    summary.drop(columns = ['mean', 'std'], inplace = True)
    return summary


tracking = tracking_annualized(return_difference).sort_values(by='tracking_error')
tracking.iloc[1:11]


num_assets = 10
tracking_stocks = tracking.sort_values(by = "tracking_error").index[:num_assets+1]
list(tracking_stocks)

['DJI', 'HON', 'MMM', 'JPM', 'V', 'AXP', 'KO', 'HD', 'TRV', 'UNH', 'JNJ']


normalized_tracking = normalize2.loc[:, tracking_stocks]
display_me(normalized_tracking, 5, "NORMALIZED TRACKING STOCKS")

NORMALIZED TRACKING STOCKS
(First 5 rows of data)

(Last 5 rows of data)


normalized_tracking.plot(figsize = (13, 7), fontsize = 13);
plt.legend(fontsize = 10);


tracking_stocks = tracking_stocks[1:]
display_rows(list(tracking_stocks), 3, "Stocks to Track")

........................
STOCKS TO TRACK:
HON	MMM	JPM
V	AXP	KO
HD	TRV	UNH
JNJ
........................


equal_weights = np.full(num_assets, 1/num_assets)
equal_weights

array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])


display_me(daily_returns2.loc[:, tracking_stocks].mean(axis = 1), 5, "Average Returns Across Portfolio")

Average Returns Across Portfolio
(First 5 rows of data)

Date
2017-01-03    0.006
2017-01-04    0.002
2017-01-05   -0.002
2017-01-06    0.003
2017-01-09   -0.003
dtype: float64

(Last 5 rows of data)

Date
2018-12-24   -0.027
2018-12-26    0.044
2018-12-27    0.013
2018-12-28   -0.004
2018-12-31    0.009
dtype: float64


display_me(daily_returns2.loc[:, tracking_stocks].dot(equal_weights), 5, "Applying Weights to Portfolio Returns")

Applying Weights to Portfolio Returns
(First 5 rows of data)

Date
2017-01-03    0.006
2017-01-04    0.002
2017-01-05   -0.002
2017-01-06    0.003
2017-01-09   -0.003
dtype: float64

(Last 5 rows of data)

Date
2018-12-24   -0.027
2018-12-26    0.044
2018-12-27    0.013
2018-12-28   -0.004
2018-12-31    0.009
dtype: float64


def portfolio_returns(weights):
    return portfolio.dot(weights)

def tracking_error(weights):
    result =  portfolio_returns(weights).sub(daily_returns2.DJI).std() * np.sqrt(252)
    # print(f'The annualized tracking error for the portfolio is {(result*100):.2f}%')
    return result


portfolio = daily_returns2.loc[:, tracking_stocks]


tracking_error(equal_weights)

0.041020911155993246


eq_weight_portfolio = portfolio_returns(equal_weights).add(1).cumprod().mul(100)

display_me(eq_weight_portfolio, 5, "Equally Weighted Portfolio Normalized Prices")

Equally Weighted Portfolio Normalized Prices
(First 5 rows of data)

Date
2017-01-03   100.587
2017-01-04   100.808
2017-01-05   100.564
2017-01-06   100.837
2017-01-09   100.548
dtype: float64

(Last 5 rows of data)

Date
2018-12-24   117.310
2018-12-26   122.501
2018-12-27   124.055
2018-12-28   123.593
2018-12-31   124.741
dtype: float64


normalized_tracking['eq_wt_pf'] = eq_weight_portfolio
normalized_tracking.iloc[0, -1] = 100
display_me(normalized_tracking, 5, "Normalized Tracking with Equal Weight Portfolio Column")

Normalized Tracking with Equal Weight Portfolio Column
(First 5 rows of data)

(Last 5 rows of data)


normalized_tracking[['eq_wt_pf', 'DJI']].plot(figsize = (13, 7));
plt.legend(fontsize = 13, loc = 2, labels = ["Equally Weighted Portfolio", "Dow Jones Index"]);


import scipy.optimize as sco


constraint = ({"type": "eq", "fun": lambda x: np.sum(x) - 1})
print("constraint: ", constraint, '\n')

bound =  tuple((0,1) for x in range(num_assets))
print("bounds: ", bound, '\n')

# tracking_error is the function to minimize
# equal_weights is the starting point
# the method is a quadratic optimization
optimizer = sco.minimize(tracking_error, equal_weights, method = "SLSQP", bounds = bound, constraints = constraint)
display(optimizer)
print('')

constraint:  {'type': 'eq', 'fun': <function <lambda> at 0x7fb4490938b0>} 

bounds:  ((0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1))

     fun: 0.03816368407774814
     jac: array([0.01178902, 0.01115682, 0.01165584, 0.01118062, 0.01123751,
       0.01130495, 0.01139102, 0.01167274, 0.01126328, 0.01154238])
 message: 'Optimization terminated successfully'
    nfev: 77
     nit: 7
    njev: 7
  status: 0
 success: True
       x: array([0.09783899, 0.11926049, 0.1509521 , 0.15973453, 0.04087604,
       0.08224352, 0.10876454, 0.05986219, 0.11537703, 0.06509057])


optimal_weights = optimizer['x']


print('\nOptimal weight for each symbol in the portfolio:')
pd.Series(index = tracking_stocks, data = optimal_weights)

Optimal weight for each symbol in the portfolio:

HON   0.098
MMM   0.119
JPM   0.151
V     0.160
AXP   0.041
KO    0.082
HD    0.109
TRV   0.060
UNH   0.115
JNJ   0.065
dtype: float64


optimal_pf = portfolio_returns(optimal_weights)[1:].add(1).cumprod().mul(100)
display_me(optimal_pf, 5, "Optimal Portfolio Returns (Normalized)")

Optimal Portfolio Returns (Normalized)
(First 5 rows of data)

Date
2017-01-04   100.226
2017-01-05   100.088
2017-01-06   100.452
2017-01-09   100.147
2017-01-10   100.175
dtype: float64

(Last 5 rows of data)

Date
2018-12-24   119.462
2018-12-26   125.024
2018-12-27   126.690
2018-12-28   126.188
2018-12-31   127.311
dtype: float64


normalized_tracking['optimal_pf'] = optimal_pf
normalized_tracking.iloc[0, -1] = 100


display_me(normalized_tracking, 5, "Normalized Tracking Data")

Normalized Tracking Data
(First 5 rows of data)

(Last 5 rows of data)


normalized_tracking[["optimal_pf", "DJI"]].plot(figsize = (13, 7),
                                               title="Optimal Portfolio vs DJI");
plt.legend(fontsize = 15, loc = 2);


summary_annualized(normalized_tracking[['optimal_pf', 'eq_wt_pf', 'DJI']].pct_change())


def tracking_error_general(data, tracking_stocks, weights, index, start, end, printout=True):
    results = data.loc[start:end, tracking_stocks].dot(weights).sub(daily_returns.loc[start:end, index]).std() * np.sqrt(252)
    if printout is True:
        print(f'{(results *100): .2f}%')
    else:
        return results


def visualize(data, tracking_stocks, weights, index, start, end, title=None):
    track = data.loc[start:end, tracking_stocks].dot(weights).add(1).cumprod().mul(100)
    track.name = 'Portfolio'
    index = data.loc[start:end, index].add(1).cumprod().mul(100)
    plt.figure(figsize = (13, 7))
    track.plot()
    index.plot()
    plt.legend(fontsize = 15, loc = 2)
    plt.title(title, fontsize = 22, pad = 20)
    plt.show()


tracking_error_general(daily_returns, tracking_stocks, optimal_weights, "DJI", "2017-01-01", "2018-12-31", printout = True)

 3.82%


forward_tracking_error = tracking_error_general(daily_returns, tracking_stocks,
                                               optimal_weights, 'DJI', "2019-01-01",
                                               "2019-12-31", printout = False)

f'{(forward_tracking_error * 100):.2f}%'

'4.61%'


visualize(daily_returns, tracking_stocks, optimal_weights, 
          index = 'DJI', start = "2019-01-01", end = "2019-12-31",
         title = "Tracking Error for Forward Testing Portfolio")


stocks = df.columns.drop(['DOW', "DJI"])
stocks = (list(stocks[0:-5]))


display_rows(stocks, 6, "Stocks to randmly choose from")

................................................
STOCKS TO RANDMLY CHOOSE FROM:
AAPL	AMGN	AXP	BA	CAT	CRM
CSCO	CVX	DIS	GS	HD	HON
IBM	INTC	JNJ	JPM	KO	MCD
MMM	MRK	MSFT	NKE	PG	TRV

................................................


np.random.seed(123)
random_stocks = np.random.choice(stocks, size = num_assets, replace = False)
random_stocks

array(['CRM', 'MSFT', 'NKE', 'MMM', 'JPM', 'DIS', 'CVX', 'HON', 'CAT',
       'BA'], dtype='<U4')


random_numbers = np.random.random(num_assets)
random_weights = random_numbers / random_numbers.sum()
random_weights

array([0.09632791, 0.09637797, 0.11496635, 0.15393431, 0.13128603,
       0.11072988, 0.13092143, 0.05852672, 0.06556346, 0.04136594])


display_rows(random_stocks, 3, "Random Stocks")
display_rows(random_weights, 3, "Random Weights")

........................
RANDOM STOCKS:
CRM	MSFT	NKE
MMM	JPM	DIS
CVX	HON	CAT
BA
........................
........................
RANDOM WEIGHTS:
0.09632791448118198	0.09637796993794165	0.11496634998876853
0.15393430859962146	0.13128603181708	0.11072988121004275
0.13092142697467074	0.058526720399072946	0.06556345895066334
0.041365937640956675
........................


tracking_error_general(daily_returns, random_stocks, random_weights, 
                       'DJI', '2019-01-01', '2019-12-31', printout = True)

 5.21%


f'{(forward_tracking_error * 100):.2f}%'

'4.61%'


visualize(daily_returns, random_stocks, random_weights, 
                       'DJI', '2019-01-01', '2019-12-31',
         title = 'Tracking Error for Random Portfolio')


tracking_error_collection = np.empty(10000)
for i in range(10000):
    random_numbers = np.random.random(num_assets)
    random_weights = random_numbers / random_numbers.sum()
    random_stock = np.random.choice(stocks, size = num_assets, replace = False)
    
    tracking_error_collection[i] = tracking_error_general(daily_returns,
                                                         random_stocks,
                                                         random_weights,
                                                         'DJI', '2019-01-01', 
                                                         '2019-12-31', 
                                                          printout = False)


(tracking_error_collection < forward_tracking_error).mean()

0.0009


f'{(tracking_error_collection.mean() * 100): .2f}%'

' 5.89%'


f'{(forward_tracking_error * 100):.2f}%'

'4.61%'


plt.figure(figsize = (12, 8))
plt.hist(tracking_error_collection, bins = 100, edgecolor = 'black', 
         linewidth = 0.5, color = "cyan")
plt.title("10,000 Random Portfolios: Tracking Error Distribution", 
          fontsize = 22, pad = 20)
plt.ylabel('Number of Portfolios with Tracking Error', fontsize=15)
plt.xlabel('Tracking Error Percentages Across Random Portfolios', fontsize=15)
plt.show()

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MRK	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT
Date
2007-01-03	2.993	68.400	60.360	89.170	61.160	9.017	27.730	70.970	33.738	NaN	...	42.004	29.860	12.209	64.540	53.550	52.570	NaN	35.307	46.070	47.550
2007-01-04	3.059	71.330	59.920	89.530	61.000	9.470	28.460	70.280	34.005	NaN	...	43.044	29.810	12.334	64.050	53.100	52.910	NaN	35.503	46.160	47.780
2007-01-05	3.037	71.500	59.130	89.150	60.220	9.880	28.470	70.550	33.728	NaN	...	42.271	29.640	12.354	63.500	52.410	52.550	NaN	34.896	45.500	47.390
2007-01-08	3.053	70.930	59.690	88.940	60.290	9.983	28.630	71.450	34.034	NaN	...	42.261	29.930	12.316	63.640	52.020	53.320	NaN	34.364	45.690	47.000
2007-01-09	3.306	71.270	59.310	88.000	60.620	9.990	28.470	70.630	33.985	NaN	...	41.870	29.960	12.470	63.480	51.890	52.680	NaN	34.504	45.930	47.390

	Open	High	Low	Close	Adj Close	Volume
Date
2007-01-03	12,459.540	12,580.350	12,404.820	12,474.520	12,474.520	327200000
2007-01-04	12,473.160	12,510.410	12,403.860	12,480.690	12,480.690	259060000
2007-01-05	12,480.050	12,480.130	12,365.410	12,398.010	12,398.010	235220000
2007-01-08	12,392.010	12,445.920	12,337.370	12,423.490	12,423.490	223500000
2007-01-09	12,424.770	12,466.430	12,369.170	12,416.600	12,416.600	225190000

	Close
Date
2007-01-03	12,474.520
2007-01-04	12,480.690
2007-01-05	12,398.010
2007-01-08	12,423.490
2007-01-09	12,416.600

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2007-01-03	2.993	68.400	60.360	89.170	61.160	9.017	27.730	70.970	33.738	NaN	...	29.860	12.209	64.540	53.550	52.570	NaN	35.307	46.070	47.550	12,474.520
2007-01-04	3.059	71.330	59.920	89.530	61.000	9.470	28.460	70.280	34.005	NaN	...	29.810	12.334	64.050	53.100	52.910	NaN	35.503	46.160	47.780	12,480.690
2007-01-05	3.037	71.500	59.130	89.150	60.220	9.880	28.470	70.550	33.728	NaN	...	29.640	12.354	63.500	52.410	52.550	NaN	34.896	45.500	47.390	12,398.010
2007-01-08	3.053	70.930	59.690	88.940	60.290	9.983	28.630	71.450	34.034	NaN	...	29.930	12.316	63.640	52.020	53.320	NaN	34.364	45.690	47.000	12,423.490
2007-01-09	3.306	71.270	59.310	88.000	60.620	9.990	28.470	70.630	33.985	NaN	...	29.960	12.470	63.480	51.890	52.680	NaN	34.504	45.930	47.390	12,416.600

	company	exchange	symbol	industry	date_added	ticker
0	3M	NYSE	MMM	Conglomerate	1976-08-09	MMM
1	American Express	NYSE	AXP	Financial services	1982-08-30	AXP
2	Amgen	NASDAQ	AMGN	Biopharmaceutical	2020-08-31	AMGN
3	Apple	NASDAQ	AAPL	Information technology	2015-03-19	AAPL
4	Boeing	NYSE	BA	Aerospace and defense	1987-03-12	BA

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2020-03-24	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	20,704.910
2020-03-25	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	21,200.551
2020-03-26	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	22,552.170
2020-03-27	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	21,636.779
2020-03-30	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	22,327.480

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2007-01-03	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	NaN	...	1.000	1.000	1.000	1.000	1.000	NaN	1.000	1.000	1.000	1.000
2007-01-04	1.022	1.043	0.993	1.004	0.997	1.050	1.026	0.990	1.008	NaN	...	0.998	1.010	0.992	0.992	1.006	NaN	1.006	1.002	1.005	1.000
2007-01-05	1.015	1.045	0.980	1.000	0.985	1.096	1.027	0.994	1.000	NaN	...	0.993	1.012	0.984	0.979	1.000	NaN	0.988	0.988	0.997	0.994
2007-01-08	1.020	1.037	0.989	0.997	0.986	1.107	1.032	1.007	1.009	NaN	...	1.002	1.009	0.986	0.971	1.014	NaN	0.973	0.992	0.988	0.996
2007-01-09	1.105	1.042	0.983	0.987	0.991	1.108	1.027	0.995	1.007	NaN	...	1.003	1.021	0.984	0.969	1.002	NaN	0.977	0.997	0.997	0.995

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2007-01-03	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	NaN	...	100.000	100.000	100.000	100.000	100.000	NaN	100.000	100.000	100.000	100.000
2007-01-04	102.220	104.284	99.271	100.404	99.738	105.018	102.633	99.028	100.789	NaN	...	99.833	101.024	99.241	99.160	100.647	NaN	100.555	100.195	100.484	100.049
2007-01-05	101.492	104.532	97.962	99.978	98.463	109.565	102.669	99.408	99.971	NaN	...	99.263	101.188	98.389	97.871	99.962	NaN	98.837	98.763	99.664	99.387
2007-01-08	101.993	103.699	98.890	99.742	98.578	110.701	103.246	100.676	100.877	NaN	...	100.234	100.881	98.606	97.143	101.427	NaN	97.329	99.175	98.843	99.591
2007-01-09	110.465	104.196	98.260	98.688	99.117	110.785	102.669	99.521	100.731	NaN	...	100.335	102.140	98.358	96.900	100.209	NaN	97.726	99.696	99.664	99.536

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
count	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	259.000	...	3,332.000	3,332.000	3,332.000	3,332.000	3,332.000	3,028.000	3,332.000	3,332.000	3,332.000	3,332.000
mean	0.001	0.000	0.000	0.001	0.000	0.001	0.000	0.000	0.001	-0.000	...	0.001	0.001	0.000	0.000	0.001	0.001	0.000	0.000	0.000	0.000
std	0.020	0.017	0.023	0.018	0.020	0.025	0.018	0.016	0.016	0.021	...	0.017	0.017	0.011	0.017	0.020	0.018	0.013	0.017	0.012	0.012
min	-0.179	-0.096	-0.176	-0.089	-0.114	-0.185	-0.162	-0.125	-0.097	-0.081	...	-0.117	-0.118	-0.079	-0.182	-0.186	-0.136	-0.081	-0.150	-0.102	-0.129
25%	-0.008	-0.008	-0.007	-0.009	-0.009	-0.010	-0.007	-0.007	-0.007	-0.012	...	-0.007	-0.007	-0.005	-0.006	-0.008	-0.007	-0.006	-0.008	-0.006	-0.004
50%	0.001	0.000	0.001	0.001	0.000	0.001	0.000	0.000	0.001	0.000	...	0.000	0.001	0.000	0.001	0.001	0.001	0.000	0.000	0.000	0.001
75%	0.011	0.009	0.009	0.010	0.010	0.013	0.009	0.008	0.008	0.011	...	0.009	0.009	0.005	0.007	0.009	0.009	0.007	0.009	0.006	0.005
max	0.139	0.139	0.206	0.155	0.147	0.191	0.160	0.209	0.160	0.062	...	0.186	0.126	0.102	0.256	0.348	0.150	0.146	0.166	0.111	0.114

	return	risk	ticker
company
Apple	0.292	0.312	AAPL
Amgen	0.120	0.262	AMGN
American Express	0.112	0.360	AXP
Boeing	0.130	0.285	BA
Caterpillar	0.106	0.318	CAT

	return	risk	ticker
company
Visa	0.260	0.292	V
Verizon	0.059	0.212	VZ
Walgreens Boots Alliance	0.040	0.271	WBA
Walmart	0.087	0.197	WMT
Dow Jones	0.063	0.197	DJI

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2017-01-03	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	NaN	...	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000	100.000
2017-01-04	99.888	101.420	101.208	101.051	99.553	103.204	98.559	99.975	101.282	NaN	...	99.553	102.097	100.356	99.462	100.285	100.818	99.890	100.024	100.583	100.304
2017-01-05	100.396	101.493	99.960	101.108	98.947	103.190	98.788	99.542	101.225	NaN	...	99.553	102.078	101.021	97.874	100.452	102.000	100.110	100.084	100.801	100.088
2017-01-06	101.515	104.014	100.159	101.357	98.989	104.621	98.985	99.143	102.734	NaN	...	100.415	103.713	100.986	97.825	100.595	103.409	97.582	100.169	99.417	100.413
2017-01-09	102.445	105.380	100.677	100.860	98.276	104.848	98.821	98.294	102.149	NaN	...	100.096	102.693	100.238	97.039	100.310	102.830	96.519	99.506	100.073	100.028

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2018-12-24	126.414	118.357	118.779	187.399	124.428	172.002	131.893	85.694	94.598	NaN	...	150.415	131.012	103.753	93.160	144.280	153.120	97.197	78.664	124.993	109.609
2018-12-26	135.316	124.633	124.539	199.994	132.738	185.483	139.064	91.124	99.764	NaN	...	160.690	140.458	106.995	96.915	150.728	163.811	99.743	81.630	131.678	115.073
2018-12-27	134.438	126.272	125.654	202.039	134.770	191.664	140.504	92.762	100.415	NaN	...	161.681	141.728	109.287	98.155	152.351	166.050	101.044	82.401	133.396	116.382
2018-12-28	134.507	126.538	125.309	201.554	133.642	190.927	140.046	92.193	101.150	NaN	...	160.419	141.093	108.290	97.800	152.518	164.704	101.264	82.027	134.183	115.998
2018-12-31	135.807	129.151	126.503	205.453	135.195	194.174	141.880	92.312	103.365	NaN	...	162.304	142.632	109.169	99.049	154.302	165.962	103.005	82.365	135.669	117.331

	AAPL	AMGN	AXP	BA	CAT	CRM	CSCO	CVX	DIS	DOW	...	MSFT	NKE	PG	TRV	UNH	V	VZ	WBA	WMT	DJI
Date
2017-01-03	-0.003	0.025	0.011	0.002	0.007	0.024	0.005	-0.005	0.012	NaN	...	0.001	0.017	-0.005	-0.018	0.003	0.013	0.016	-0.004	-0.013	0.000
2017-01-04	-0.004	0.011	0.009	0.007	-0.008	0.029	-0.017	-0.003	0.010	NaN	...	-0.008	0.018	0.001	-0.008	-0.000	0.005	-0.004	-0.003	0.003	0.000
2017-01-05	0.007	0.003	-0.010	0.003	-0.004	0.002	0.004	-0.002	0.002	NaN	...	0.002	0.002	0.009	-0.014	0.004	0.014	0.004	0.003	0.004	0.000
2017-01-06	0.008	0.022	-0.001	-0.001	-0.003	0.011	-0.001	-0.007	0.012	NaN	...	0.005	0.013	-0.004	-0.004	-0.002	0.011	-0.028	-0.002	-0.017	0.000
2017-01-09	0.013	0.017	0.009	-0.001	-0.003	0.006	0.002	-0.005	-0.002	NaN	...	0.001	-0.006	-0.004	-0.004	0.001	-0.002	-0.007	-0.003	0.010	0.000

	premium	tracking_error
HON	0.007	0.098
MMM	-0.041	0.125
JPM	-0.012	0.131
V	0.192	0.132
AXP	0.053	0.133
KO	-0.018	0.135
HD	0.049	0.135
TRV	-0.089	0.137
UNH	0.148	0.139
JNJ	-0.020	0.149

INDEX TRACKING and FORWARD TESTING:¶

display_me shows the given number of head and tail items in a dataframe¶

IMPORTING ALL DATA:¶

TRANSFORMING and PROCESSING DATA:¶

Normalizing stock prices:¶

Normalizing all stocks to an initial value of 1 on the first day of data¶

Normalizing all to an initial price of 100, which is more common¶

Daily averages: mean is the average daily return. std is a metric for the risk.¶

Annualized Return: [SOURCE]¶

Summary¶

EXPLORATORY ANALYSIS¶

DJI Constituents: Annualized Returns and Risks by Returns¶

DJI Constituents: Annualized Returns and Risks by Risk¶

Replacing index company name with ticker symbol¶

Correlation Matrix¶

INDEX TRACKING¶

PLAN:¶

df2 - normalized dataframe for testing, years 2017-2018¶

daily_returns2 - Returns for testing, years 2017 to 2018¶

Mean and Standard Deviation of the Return Differences¶

SELECTING the TRACKING STOCKS¶

Sorting stocks by annualized tracking error¶

num_assets¶

Normalized prices of the 10 tracking_stocks¶

Visualization of 10 tracking stocks¶

Dropping DJI from tracking_stocks¶

SIMPLE TRACKING PORTFOLIO¶

10 portfolio stocks with simple average across all 10¶

Working with unequal weights¶

porfolio_returns()¶

tracking_error()¶

Calculating the normalized prices¶

Adding eq_weight_portfolio as a column to normalized_tracking¶

Visualized comparison of eq_weight_portfolio vs DJI¶

Optimal Tracking Portfolio¶

Creating optimized weights¶

Optimizing Results Above:¶

Summary of Annualized Data¶

Forward-Testing (Part One)¶

PLAN:¶

tracking_error_general()¶

visualize()¶

Calculating the tracking error for 2017-2018¶

Forward Testing: Calculating the tracking error for 2019¶

Forward-Testing (Part Two)¶

10 random stocks for a random portfolio¶

10 random weights for the 10 stocks¶

Calculating tracking error for random portfolio¶

Compare random portfolio tracking error to forward-tracking¶

10,000 Random Porfolios Put to the Test¶

Comparing averages of 10,000 random portfolios to forward tracking portfolio for performance¶

`display_me` shows the given number of head and tail items in a dataframe¶

Normalizing all stocks to an initial value of `1` on the first day of data¶

Daily averages: `mean` is the average daily return. `std` is a metric for the risk.¶

`df2` - normalized dataframe for testing, years 2017-2018¶

`daily_returns2` - Returns for testing, years 2017 to 2018¶

`num_assets`¶

Normalized prices of the 10 `tracking_stocks`¶

Dropping DJI from `tracking_stocks`¶

`porfolio_returns()`¶

`tracking_error()`¶

Adding `eq_weight_portfolio` as a column to `normalized_tracking`¶

Visualized comparison of `eq_weight_portfolio` vs DJI¶

`tracking_error_general()`¶

`visualize()`¶