import pandas as pd
%matplotlib inline
pd.options.display.float_format = "{:,.3f}".format
display_me
shows the given number of head and tail items in a dataframe¶def display_me(df, num, name=None):
print("")
print(name)
print(f'(First {num} rows of data)')
display(df.head(num))
print("")
print(f'(Last {num} rows of data)')
display(df.tail(num))
print("")
def display_rows(display_list, num_cols, title):
print("........" * num_cols)
print(f"{title.upper()}:")
current = 0
length = len(display_list)
num_rows = round(length / num_cols)
for i in range(num_rows + 1):
print('\t'.join([str(x) for x in list(display_list[current:current+num_cols])]))
current = current + num_cols
print("........" * num_cols)
dji = pd.read_csv("dji.csv", parse_dates = ['Date'], index_col = 'Date')
constituents = pd.read_csv('prices.csv', parse_dates = ['Date'], index_col = "Date")
print("")
print("DOW JONES CONSTITUENTS")
display(constituents.head(5))
print("")
print("DOW JONES INDEX")
display(dji.head(5))
print("")
DOW JONES CONSTITUENTS
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MRK | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2007-01-03 | 2.993 | 68.400 | 60.360 | 89.170 | 61.160 | 9.017 | 27.730 | 70.970 | 33.738 | NaN | ... | 42.004 | 29.860 | 12.209 | 64.540 | 53.550 | 52.570 | NaN | 35.307 | 46.070 | 47.550 |
2007-01-04 | 3.059 | 71.330 | 59.920 | 89.530 | 61.000 | 9.470 | 28.460 | 70.280 | 34.005 | NaN | ... | 43.044 | 29.810 | 12.334 | 64.050 | 53.100 | 52.910 | NaN | 35.503 | 46.160 | 47.780 |
2007-01-05 | 3.037 | 71.500 | 59.130 | 89.150 | 60.220 | 9.880 | 28.470 | 70.550 | 33.728 | NaN | ... | 42.271 | 29.640 | 12.354 | 63.500 | 52.410 | 52.550 | NaN | 34.896 | 45.500 | 47.390 |
2007-01-08 | 3.053 | 70.930 | 59.690 | 88.940 | 60.290 | 9.983 | 28.630 | 71.450 | 34.034 | NaN | ... | 42.261 | 29.930 | 12.316 | 63.640 | 52.020 | 53.320 | NaN | 34.364 | 45.690 | 47.000 |
2007-01-09 | 3.306 | 71.270 | 59.310 | 88.000 | 60.620 | 9.990 | 28.470 | 70.630 | 33.985 | NaN | ... | 41.870 | 29.960 | 12.470 | 63.480 | 51.890 | 52.680 | NaN | 34.504 | 45.930 | 47.390 |
5 rows × 30 columns
DOW JONES INDEX
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2007-01-03 | 12,459.540 | 12,580.350 | 12,404.820 | 12,474.520 | 12,474.520 | 327200000 |
2007-01-04 | 12,473.160 | 12,510.410 | 12,403.860 | 12,480.690 | 12,480.690 | 259060000 |
2007-01-05 | 12,480.050 | 12,480.130 | 12,365.410 | 12,398.010 | 12,398.010 | 235220000 |
2007-01-08 | 12,392.010 | 12,445.920 | 12,337.370 | 12,423.490 | 12,423.490 | 223500000 |
2007-01-09 | 12,424.770 | 12,466.430 | 12,369.170 | 12,416.600 | 12,416.600 | 225190000 |
print("")
print("CONSTITUENTS.INFO()")
display(constituents.info())
print("")
print("DJI.INFO()")
display(dji.info())
print("")
CONSTITUENTS.INFO() <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 3313 entries, 2007-01-03 to 2020-03-02 Data columns (total 30 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 AAPL 3313 non-null float64 1 AMGN 3313 non-null float64 2 AXP 3313 non-null float64 3 BA 3313 non-null float64 4 CAT 3313 non-null float64 5 CRM 3313 non-null float64 6 CSCO 3313 non-null float64 7 CVX 3313 non-null float64 8 DIS 3313 non-null float64 9 DOW 240 non-null float64 10 GS 3313 non-null float64 11 HD 3313 non-null float64 12 HON 3313 non-null float64 13 IBM 3313 non-null float64 14 INTC 3313 non-null float64 15 JNJ 3313 non-null float64 16 JPM 3313 non-null float64 17 KO 3313 non-null float64 18 MCD 3313 non-null float64 19 MMM 3313 non-null float64 20 MRK 3313 non-null float64 21 MSFT 3313 non-null float64 22 NKE 3313 non-null float64 23 PG 3313 non-null float64 24 TRV 3313 non-null float64 25 UNH 3313 non-null float64 26 V 3009 non-null float64 27 VZ 3313 non-null float64 28 WBA 3313 non-null float64 29 WMT 3313 non-null float64 dtypes: float64(30) memory usage: 802.4 KB
None
DJI.INFO() <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 3333 entries, 2007-01-03 to 2020-03-30 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Open 3333 non-null float64 1 High 3333 non-null float64 2 Low 3333 non-null float64 3 Close 3333 non-null float64 4 Adj Close 3333 non-null float64 5 Volume 3333 non-null int64 dtypes: float64(5), int64(1) memory usage: 182.3 KB
None
dji = dji.Close.to_frame().copy()
dji.head(5)
Close | |
---|---|
Date | |
2007-01-03 | 12,474.520 |
2007-01-04 | 12,480.690 |
2007-01-05 | 12,398.010 |
2007-01-08 | 12,423.490 |
2007-01-09 | 12,416.600 |
df = pd.concat([constituents, dji], axis = 1)
df.rename(columns = {"Close": "DJI"}, inplace=True)
display_me(df, 5, "Concatenated Data")
Concatenated Data (First 5 rows of data)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2007-01-03 | 2.993 | 68.400 | 60.360 | 89.170 | 61.160 | 9.017 | 27.730 | 70.970 | 33.738 | NaN | ... | 29.860 | 12.209 | 64.540 | 53.550 | 52.570 | NaN | 35.307 | 46.070 | 47.550 | 12,474.520 |
2007-01-04 | 3.059 | 71.330 | 59.920 | 89.530 | 61.000 | 9.470 | 28.460 | 70.280 | 34.005 | NaN | ... | 29.810 | 12.334 | 64.050 | 53.100 | 52.910 | NaN | 35.503 | 46.160 | 47.780 | 12,480.690 |
2007-01-05 | 3.037 | 71.500 | 59.130 | 89.150 | 60.220 | 9.880 | 28.470 | 70.550 | 33.728 | NaN | ... | 29.640 | 12.354 | 63.500 | 52.410 | 52.550 | NaN | 34.896 | 45.500 | 47.390 | 12,398.010 |
2007-01-08 | 3.053 | 70.930 | 59.690 | 88.940 | 60.290 | 9.983 | 28.630 | 71.450 | 34.034 | NaN | ... | 29.930 | 12.316 | 63.640 | 52.020 | 53.320 | NaN | 34.364 | 45.690 | 47.000 | 12,423.490 |
2007-01-09 | 3.306 | 71.270 | 59.310 | 88.000 | 60.620 | 9.990 | 28.470 | 70.630 | 33.985 | NaN | ... | 29.960 | 12.470 | 63.480 | 51.890 | 52.680 | NaN | 34.504 | 45.930 | 47.390 | 12,416.600 |
5 rows × 31 columns
(Last 5 rows of data)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2020-03-24 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 20,704.910 |
2020-03-25 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 21,200.551 |
2020-03-26 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 22,552.170 |
2020-03-27 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 21,636.779 |
2020-03-30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 22,327.480 |
5 rows × 31 columns
info = pd.read_csv("dowjones.csv")
info.head(5)
company | exchange | symbol | industry | date_added | ticker | |
---|---|---|---|---|---|---|
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | MMM |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | AXP |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | AMGN |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | AAPL |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | BA |
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("ggplot")
df.plot(figsize = (14, 8), fontsize = 13, title='30 Dow Jones Constituents', secondary_y = "DJI");
1
on the first day of data¶df.div(df.iloc[0]).head(5)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2007-01-03 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | NaN | ... | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | NaN | 1.000 | 1.000 | 1.000 | 1.000 |
2007-01-04 | 1.022 | 1.043 | 0.993 | 1.004 | 0.997 | 1.050 | 1.026 | 0.990 | 1.008 | NaN | ... | 0.998 | 1.010 | 0.992 | 0.992 | 1.006 | NaN | 1.006 | 1.002 | 1.005 | 1.000 |
2007-01-05 | 1.015 | 1.045 | 0.980 | 1.000 | 0.985 | 1.096 | 1.027 | 0.994 | 1.000 | NaN | ... | 0.993 | 1.012 | 0.984 | 0.979 | 1.000 | NaN | 0.988 | 0.988 | 0.997 | 0.994 |
2007-01-08 | 1.020 | 1.037 | 0.989 | 0.997 | 0.986 | 1.107 | 1.032 | 1.007 | 1.009 | NaN | ... | 1.002 | 1.009 | 0.986 | 0.971 | 1.014 | NaN | 0.973 | 0.992 | 0.988 | 0.996 |
2007-01-09 | 1.105 | 1.042 | 0.983 | 0.987 | 0.991 | 1.108 | 1.027 | 0.995 | 1.007 | NaN | ... | 1.003 | 1.021 | 0.984 | 0.969 | 1.002 | NaN | 0.977 | 0.997 | 0.997 | 0.995 |
5 rows × 31 columns
normalized = norm = df.div(df.iloc[0]).mul(100)
normalized.head(5)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2007-01-03 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | NaN | ... | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | NaN | 100.000 | 100.000 | 100.000 | 100.000 |
2007-01-04 | 102.220 | 104.284 | 99.271 | 100.404 | 99.738 | 105.018 | 102.633 | 99.028 | 100.789 | NaN | ... | 99.833 | 101.024 | 99.241 | 99.160 | 100.647 | NaN | 100.555 | 100.195 | 100.484 | 100.049 |
2007-01-05 | 101.492 | 104.532 | 97.962 | 99.978 | 98.463 | 109.565 | 102.669 | 99.408 | 99.971 | NaN | ... | 99.263 | 101.188 | 98.389 | 97.871 | 99.962 | NaN | 98.837 | 98.763 | 99.664 | 99.387 |
2007-01-08 | 101.993 | 103.699 | 98.890 | 99.742 | 98.578 | 110.701 | 103.246 | 100.676 | 100.877 | NaN | ... | 100.234 | 100.881 | 98.606 | 97.143 | 101.427 | NaN | 97.329 | 99.175 | 98.843 | 99.591 |
2007-01-09 | 110.465 | 104.196 | 98.260 | 98.688 | 99.117 | 110.785 | 102.669 | 99.521 | 100.731 | NaN | ... | 100.335 | 102.140 | 98.358 | 96.900 | 100.209 | NaN | 97.726 | 99.696 | 99.664 | 99.536 |
5 rows × 31 columns
normalized.plot(figsize = (13, 7), fontsize = 13, title = '30 Dow Jones Constituents Normalized')
plt.legend(fontsize = 10)
plt.show()
normalized.drop(columns = ['AAPL', 'CRM', 'DIS']).plot(figsize = (13, 7), fontsize = 13, title = '30 Dow Jones Constituents (minus top 3)', linewidth=0.75)
plt.legend(fontsize = 10)
plt.show()
daily_returns = df.pct_change()
daily_returns.head(5)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2007-01-03 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2007-01-04 | 0.022 | 0.043 | -0.007 | 0.004 | -0.003 | 0.050 | 0.026 | -0.010 | 0.008 | NaN | ... | -0.002 | 0.010 | -0.008 | -0.008 | 0.006 | NaN | 0.006 | 0.002 | 0.005 | 0.000 |
2007-01-05 | -0.007 | 0.002 | -0.013 | -0.004 | -0.013 | 0.043 | 0.000 | 0.004 | -0.008 | NaN | ... | -0.006 | 0.002 | -0.009 | -0.013 | -0.007 | NaN | -0.017 | -0.014 | -0.008 | -0.007 |
2007-01-08 | 0.005 | -0.008 | 0.009 | -0.002 | 0.001 | 0.010 | 0.006 | 0.013 | 0.009 | NaN | ... | 0.010 | -0.003 | 0.002 | -0.007 | 0.015 | NaN | -0.015 | 0.004 | -0.008 | 0.002 |
2007-01-09 | 0.083 | 0.005 | -0.006 | -0.011 | 0.005 | 0.001 | -0.006 | -0.011 | -0.001 | NaN | ... | 0.001 | 0.012 | -0.003 | -0.002 | -0.012 | NaN | 0.004 | 0.005 | 0.008 | -0.001 |
5 rows × 31 columns
mean
is the average daily return. std
is a metric for the risk.¶daily_returns.describe()
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 259.000 | ... | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,028.000 | 3,332.000 | 3,332.000 | 3,332.000 | 3,332.000 |
mean | 0.001 | 0.000 | 0.000 | 0.001 | 0.000 | 0.001 | 0.000 | 0.000 | 0.001 | -0.000 | ... | 0.001 | 0.001 | 0.000 | 0.000 | 0.001 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 |
std | 0.020 | 0.017 | 0.023 | 0.018 | 0.020 | 0.025 | 0.018 | 0.016 | 0.016 | 0.021 | ... | 0.017 | 0.017 | 0.011 | 0.017 | 0.020 | 0.018 | 0.013 | 0.017 | 0.012 | 0.012 |
min | -0.179 | -0.096 | -0.176 | -0.089 | -0.114 | -0.185 | -0.162 | -0.125 | -0.097 | -0.081 | ... | -0.117 | -0.118 | -0.079 | -0.182 | -0.186 | -0.136 | -0.081 | -0.150 | -0.102 | -0.129 |
25% | -0.008 | -0.008 | -0.007 | -0.009 | -0.009 | -0.010 | -0.007 | -0.007 | -0.007 | -0.012 | ... | -0.007 | -0.007 | -0.005 | -0.006 | -0.008 | -0.007 | -0.006 | -0.008 | -0.006 | -0.004 |
50% | 0.001 | 0.000 | 0.001 | 0.001 | 0.000 | 0.001 | 0.000 | 0.000 | 0.001 | 0.000 | ... | 0.000 | 0.001 | 0.000 | 0.001 | 0.001 | 0.001 | 0.000 | 0.000 | 0.000 | 0.001 |
75% | 0.011 | 0.009 | 0.009 | 0.010 | 0.010 | 0.013 | 0.009 | 0.008 | 0.008 | 0.011 | ... | 0.009 | 0.009 | 0.005 | 0.007 | 0.009 | 0.009 | 0.007 | 0.009 | 0.006 | 0.005 |
max | 0.139 | 0.139 | 0.206 | 0.155 | 0.147 | 0.191 | 0.160 | 0.209 | 0.160 | 0.062 | ... | 0.186 | 0.126 | 0.102 | 0.256 | 0.348 | 0.150 | 0.146 | 0.166 | 0.111 | 0.114 |
8 rows × 31 columns
def summary_annualized(returns):
summary = returns.agg(['mean', 'std']).T
summary['return'] = summary["mean"] * 252
summary['risk'] = summary['std'] * np.sqrt(252)
summary.drop(columns = ['mean', 'std'], inplace = True)
return summary
summary = summary_annualized(daily_returns)
summary = summary.merge(info.loc[:, ["ticker", "company"]],
how = 'left', left_index = True,
right_on = "ticker")
# Making company the index column
summary.set_index("company", inplace = True)
# Replacing NaN in the company name for the Dow Jones Index
summary.rename(index = {np.nan: "Dow Jones"}, inplace = True)
display_me(summary, 5, "SUMMARY")
SUMMARY (First 5 rows of data)
return | risk | ticker | |
---|---|---|---|
company | |||
Apple | 0.292 | 0.312 | AAPL |
Amgen | 0.120 | 0.262 | AMGN |
American Express | 0.112 | 0.360 | AXP |
Boeing | 0.130 | 0.285 | BA |
Caterpillar | 0.106 | 0.318 | CAT |
(Last 5 rows of data)
return | risk | ticker | |
---|---|---|---|
company | |||
Visa | 0.260 | 0.292 | V |
Verizon | 0.059 | 0.212 | VZ |
Walgreens Boots Alliance | 0.040 | 0.271 | WBA |
Walmart | 0.087 | 0.197 | WMT |
Dow Jones | 0.063 | 0.197 | DJI |
summary2 = summary.drop(index = "Dow")
summary2.sort_values(by = "return", ascending = False).plot(kind = 'bar',
figsize = (13, 7),
fontsize = 12,
title = 'DJI Constituents: Annualized Returns and Risks by Returns', rot = 75);
plt.legend(fontsize=13);
summary2.sort_values(by="risk", ascending = False).plot(kind = "barh",
figsize = (12, 9),
fontsize = 12,
title = 'DJI Constituents: Annualized Returns and Risks by Risk', rot = 29);
plt.legend(fontsize = 12);
plt.annotate()
- allows you to mark each point on the graphsummary2.set_index('ticker', inplace = True)
summary2.plot(kind = 'scatter', x = "risk", y = 'return',
figsize = (15, 10), s = 60, c = 'return',
cmap = 'viridis',
fontsize = 13)
for i in summary2.index:
plt.annotate(i, xy = (summary2.loc[i, "risk"] + 0.02,
summary2.loc[i, "return"] + 0.02),
size = 11)
plt.xlabel('Annualized Risk STD', fontsize = 13)
plt.ylabel('Annualized Return', fontsize = 13)
plt.title("Risk / Return", fontsize = 22)
import seaborn as sns
correlation_matrix = daily_returns.corr()
plt.figure(figsize = (20, 12))
sns.heatmap(correlation_matrix, cmap = "viridis", annot = True,
vmin = 0.2, vmax = 0.8);
plt.title("Dow Jones Constituents: Correlation Matrix", fontsize=24, pad = 20);
df2
- normalized dataframe for testing, years 2017-2018¶df2 = df.loc['2017':'2018'].copy()
normalize2 = df2.div(df2.iloc[0]).mul(100)
display_me(normalize2, 5, "NORMALIZED TESTING DATA")
NORMALIZED TESTING DATA (First 5 rows of data)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2017-01-03 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | NaN | ... | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 |
2017-01-04 | 99.888 | 101.420 | 101.208 | 101.051 | 99.553 | 103.204 | 98.559 | 99.975 | 101.282 | NaN | ... | 99.553 | 102.097 | 100.356 | 99.462 | 100.285 | 100.818 | 99.890 | 100.024 | 100.583 | 100.304 |
2017-01-05 | 100.396 | 101.493 | 99.960 | 101.108 | 98.947 | 103.190 | 98.788 | 99.542 | 101.225 | NaN | ... | 99.553 | 102.078 | 101.021 | 97.874 | 100.452 | 102.000 | 100.110 | 100.084 | 100.801 | 100.088 |
2017-01-06 | 101.515 | 104.014 | 100.159 | 101.357 | 98.989 | 104.621 | 98.985 | 99.143 | 102.734 | NaN | ... | 100.415 | 103.713 | 100.986 | 97.825 | 100.595 | 103.409 | 97.582 | 100.169 | 99.417 | 100.413 |
2017-01-09 | 102.445 | 105.380 | 100.677 | 100.860 | 98.276 | 104.848 | 98.821 | 98.294 | 102.149 | NaN | ... | 100.096 | 102.693 | 100.238 | 97.039 | 100.310 | 102.830 | 96.519 | 99.506 | 100.073 | 100.028 |
5 rows × 31 columns
(Last 5 rows of data)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2018-12-24 | 126.414 | 118.357 | 118.779 | 187.399 | 124.428 | 172.002 | 131.893 | 85.694 | 94.598 | NaN | ... | 150.415 | 131.012 | 103.753 | 93.160 | 144.280 | 153.120 | 97.197 | 78.664 | 124.993 | 109.609 |
2018-12-26 | 135.316 | 124.633 | 124.539 | 199.994 | 132.738 | 185.483 | 139.064 | 91.124 | 99.764 | NaN | ... | 160.690 | 140.458 | 106.995 | 96.915 | 150.728 | 163.811 | 99.743 | 81.630 | 131.678 | 115.073 |
2018-12-27 | 134.438 | 126.272 | 125.654 | 202.039 | 134.770 | 191.664 | 140.504 | 92.762 | 100.415 | NaN | ... | 161.681 | 141.728 | 109.287 | 98.155 | 152.351 | 166.050 | 101.044 | 82.401 | 133.396 | 116.382 |
2018-12-28 | 134.507 | 126.538 | 125.309 | 201.554 | 133.642 | 190.927 | 140.046 | 92.193 | 101.150 | NaN | ... | 160.419 | 141.093 | 108.290 | 97.800 | 152.518 | 164.704 | 101.264 | 82.027 | 134.183 | 115.998 |
2018-12-31 | 135.807 | 129.151 | 126.503 | 205.453 | 135.195 | 194.174 | 141.880 | 92.312 | 103.365 | NaN | ... | 162.304 | 142.632 | 109.169 | 99.049 | 154.302 | 165.962 | 103.005 | 82.365 | 135.669 | 117.331 |
5 rows × 31 columns
daily_returns2
- Returns for testing, years 2017 to 2018¶return_difference
, which is the difference between the stock's daily returns and the daily returns of the index itselfdaily_returns2 = daily_returns.loc['2017': '2018'].copy()
return_difference = daily_returns2.sub(daily_returns2.DJI, axis = 0)
return_difference.head(5)
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | ... | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | DJI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2017-01-03 | -0.003 | 0.025 | 0.011 | 0.002 | 0.007 | 0.024 | 0.005 | -0.005 | 0.012 | NaN | ... | 0.001 | 0.017 | -0.005 | -0.018 | 0.003 | 0.013 | 0.016 | -0.004 | -0.013 | 0.000 |
2017-01-04 | -0.004 | 0.011 | 0.009 | 0.007 | -0.008 | 0.029 | -0.017 | -0.003 | 0.010 | NaN | ... | -0.008 | 0.018 | 0.001 | -0.008 | -0.000 | 0.005 | -0.004 | -0.003 | 0.003 | 0.000 |
2017-01-05 | 0.007 | 0.003 | -0.010 | 0.003 | -0.004 | 0.002 | 0.004 | -0.002 | 0.002 | NaN | ... | 0.002 | 0.002 | 0.009 | -0.014 | 0.004 | 0.014 | 0.004 | 0.003 | 0.004 | 0.000 |
2017-01-06 | 0.008 | 0.022 | -0.001 | -0.001 | -0.003 | 0.011 | -0.001 | -0.007 | 0.012 | NaN | ... | 0.005 | 0.013 | -0.004 | -0.004 | -0.002 | 0.011 | -0.028 | -0.002 | -0.017 | 0.000 |
2017-01-09 | 0.013 | 0.017 | 0.009 | -0.001 | -0.003 | 0.006 | 0.002 | -0.005 | -0.002 | NaN | ... | 0.001 | -0.006 | -0.004 | -0.004 | 0.001 | -0.002 | -0.007 | -0.003 | 0.010 | 0.000 |
5 rows × 31 columns
std
is the tracking error, which we are trying to minimizereturn_difference.agg(['mean', 'std']).T.head(5)
mean | std | |
---|---|---|
AAPL | 0.000 | 0.012 |
AMGN | 0.000 | 0.011 |
AXP | 0.000 | 0.008 |
BA | 0.001 | 0.011 |
CAT | 0.000 | 0.012 |
def tracking_annualized(returns):
summary = returns.agg(['mean', 'std']).T
summary['premium'] = summary['mean'] * 252
summary['tracking_error'] = summary['std'] * np.sqrt(252)
summary.drop(columns = ['mean', 'std'], inplace = True)
return summary
tracking = tracking_annualized(return_difference).sort_values(by='tracking_error')
tracking.iloc[1:11]
premium | tracking_error | |
---|---|---|
HON | 0.007 | 0.098 |
MMM | -0.041 | 0.125 |
JPM | -0.012 | 0.131 |
V | 0.192 | 0.132 |
AXP | 0.053 | 0.133 |
KO | -0.018 | 0.135 |
HD | 0.049 | 0.135 |
TRV | -0.089 | 0.137 |
UNH | 0.148 | 0.139 |
JNJ | -0.020 | 0.149 |
num_assets
¶num_assets = 10
tracking_stocks = tracking.sort_values(by = "tracking_error").index[:num_assets+1]
list(tracking_stocks)
['DJI', 'HON', 'MMM', 'JPM', 'V', 'AXP', 'KO', 'HD', 'TRV', 'UNH', 'JNJ']
tracking_stocks
¶normalized_tracking = normalize2.loc[:, tracking_stocks]
display_me(normalized_tracking, 5, "NORMALIZED TRACKING STOCKS")
NORMALIZED TRACKING STOCKS (First 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | |
---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||
2017-01-03 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 |
2017-01-04 | 100.304 | 100.275 | 100.152 | 99.633 | 100.818 | 101.208 | 99.641 | 100.886 | 99.462 | 100.285 | 99.836 |
2017-01-05 | 100.088 | 100.499 | 99.809 | 98.716 | 102.000 | 99.960 | 99.880 | 99.695 | 97.874 | 100.452 | 100.881 |
2017-01-06 | 100.413 | 102.023 | 100.101 | 98.728 | 103.409 | 100.159 | 99.856 | 99.419 | 97.825 | 100.595 | 100.397 |
2017-01-09 | 100.028 | 101.179 | 99.562 | 98.796 | 102.830 | 100.677 | 98.852 | 100.000 | 97.039 | 100.310 | 100.380 |
(Last 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | |
---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||
2018-12-24 | 109.609 | 112.103 | 100.320 | 105.629 | 153.120 | 118.779 | 109.952 | 117.743 | 93.160 | 144.280 | 106.043 |
2018-12-26 | 115.073 | 116.360 | 104.611 | 110.008 | 163.811 | 124.539 | 112.297 | 125.292 | 96.915 | 150.728 | 109.384 |
2018-12-27 | 116.382 | 118.156 | 107.105 | 111.246 | 166.050 | 125.654 | 113.708 | 126.811 | 98.155 | 152.351 | 109.988 |
2018-12-28 | 115.998 | 117.429 | 106.358 | 111.005 | 164.704 | 125.309 | 112.919 | 126.737 | 97.800 | 152.518 | 109.867 |
2018-12-31 | 117.331 | 118.650 | 107.015 | 111.911 | 165.962 | 126.503 | 113.278 | 127.928 | 99.049 | 154.302 | 111.404 |
normalized_tracking.plot(figsize = (13, 7), fontsize = 13);
plt.legend(fontsize = 10);
tracking_stocks
¶tracking_stocks = tracking_stocks[1:]
display_rows(list(tracking_stocks), 3, "Stocks to Track")
........................ STOCKS TO TRACK: HON MMM JPM V AXP KO HD TRV UNH JNJ ........................
equal_weights = np.full(num_assets, 1/num_assets)
equal_weights
array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
display_me(daily_returns2.loc[:, tracking_stocks].mean(axis = 1), 5, "Average Returns Across Portfolio")
Average Returns Across Portfolio (First 5 rows of data)
Date 2017-01-03 0.006 2017-01-04 0.002 2017-01-05 -0.002 2017-01-06 0.003 2017-01-09 -0.003 dtype: float64
(Last 5 rows of data)
Date 2018-12-24 -0.027 2018-12-26 0.044 2018-12-27 0.013 2018-12-28 -0.004 2018-12-31 0.009 dtype: float64
.dot()
to get the weighted averagedisplay_me(daily_returns2.loc[:, tracking_stocks].dot(equal_weights), 5, "Applying Weights to Portfolio Returns")
Applying Weights to Portfolio Returns (First 5 rows of data)
Date 2017-01-03 0.006 2017-01-04 0.002 2017-01-05 -0.002 2017-01-06 0.003 2017-01-09 -0.003 dtype: float64
(Last 5 rows of data)
Date 2018-12-24 -0.027 2018-12-26 0.044 2018-12-27 0.013 2018-12-28 -0.004 2018-12-31 0.009 dtype: float64
porfolio_returns()
¶tracking_error()
¶portfolio_returns()
.std()
* np.sqrt(252)
def portfolio_returns(weights):
return portfolio.dot(weights)
def tracking_error(weights):
result = portfolio_returns(weights).sub(daily_returns2.DJI).std() * np.sqrt(252)
# print(f'The annualized tracking error for the portfolio is {(result*100):.2f}%')
return result
portfolio = daily_returns2.loc[:, tracking_stocks]
tracking_error(equal_weights)
0.041020911155993246
eq_weight_portfolio = portfolio_returns(equal_weights).add(1).cumprod().mul(100)
display_me(eq_weight_portfolio, 5, "Equally Weighted Portfolio Normalized Prices")
Equally Weighted Portfolio Normalized Prices (First 5 rows of data)
Date 2017-01-03 100.587 2017-01-04 100.808 2017-01-05 100.564 2017-01-06 100.837 2017-01-09 100.548 dtype: float64
(Last 5 rows of data)
Date 2018-12-24 117.310 2018-12-26 122.501 2018-12-27 124.055 2018-12-28 123.593 2018-12-31 124.741 dtype: float64
eq_weight_portfolio
as a column to normalized_tracking
¶normalized_tracking['eq_wt_pf'] = eq_weight_portfolio
normalized_tracking.iloc[0, -1] = 100
display_me(normalized_tracking, 5, "Normalized Tracking with Equal Weight Portfolio Column")
Normalized Tracking with Equal Weight Portfolio Column (First 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | eq_wt_pf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2017-01-03 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 |
2017-01-04 | 100.304 | 100.275 | 100.152 | 99.633 | 100.818 | 101.208 | 99.641 | 100.886 | 99.462 | 100.285 | 99.836 | 100.808 |
2017-01-05 | 100.088 | 100.499 | 99.809 | 98.716 | 102.000 | 99.960 | 99.880 | 99.695 | 97.874 | 100.452 | 100.881 | 100.564 |
2017-01-06 | 100.413 | 102.023 | 100.101 | 98.728 | 103.409 | 100.159 | 99.856 | 99.419 | 97.825 | 100.595 | 100.397 | 100.837 |
2017-01-09 | 100.028 | 101.179 | 99.562 | 98.796 | 102.830 | 100.677 | 98.852 | 100.000 | 97.039 | 100.310 | 100.380 | 100.548 |
(Last 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | eq_wt_pf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2018-12-24 | 109.609 | 112.103 | 100.320 | 105.629 | 153.120 | 118.779 | 109.952 | 117.743 | 93.160 | 144.280 | 106.043 | 117.310 |
2018-12-26 | 115.073 | 116.360 | 104.611 | 110.008 | 163.811 | 124.539 | 112.297 | 125.292 | 96.915 | 150.728 | 109.384 | 122.501 |
2018-12-27 | 116.382 | 118.156 | 107.105 | 111.246 | 166.050 | 125.654 | 113.708 | 126.811 | 98.155 | 152.351 | 109.988 | 124.055 |
2018-12-28 | 115.998 | 117.429 | 106.358 | 111.005 | 164.704 | 125.309 | 112.919 | 126.737 | 97.800 | 152.518 | 109.867 | 123.593 |
2018-12-31 | 117.331 | 118.650 | 107.015 | 111.911 | 165.962 | 126.503 | 113.278 | 127.928 | 99.049 | 154.302 | 111.404 | 124.741 |
eq_weight_portfolio
vs DJI¶normalized_tracking[['eq_wt_pf', 'DJI']].plot(figsize = (13, 7));
plt.legend(fontsize = 13, loc = 2, labels = ["Equally Weighted Portfolio", "Dow Jones Index"]);
scipy
scipy.optimize
[docs]SciPy optimize provides functions for minimizing (or maximizing) objective functions, possibly subject to constraints. It includes solvers for nonlinear problems (with support for both local and global optimization algorithms), linear programing, constrained and nonlinear least-squares, root finding, and curve fitting.
import scipy.optimize as sco
constraint = ({"type": "eq", "fun": lambda x: np.sum(x) - 1})
print("constraint: ", constraint, '\n')
bound = tuple((0,1) for x in range(num_assets))
print("bounds: ", bound, '\n')
# tracking_error is the function to minimize
# equal_weights is the starting point
# the method is a quadratic optimization
optimizer = sco.minimize(tracking_error, equal_weights, method = "SLSQP", bounds = bound, constraints = constraint)
display(optimizer)
print('')
constraint: {'type': 'eq', 'fun': <function <lambda> at 0x7fb4490938b0>} bounds: ((0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1))
fun: 0.03816368407774814 jac: array([0.01178902, 0.01115682, 0.01165584, 0.01118062, 0.01123751, 0.01130495, 0.01139102, 0.01167274, 0.01126328, 0.01154238]) message: 'Optimization terminated successfully' nfev: 77 nit: 7 njev: 7 status: 0 success: True x: array([0.09783899, 0.11926049, 0.1509521 , 0.15973453, 0.04087604, 0.08224352, 0.10876454, 0.05986219, 0.11537703, 0.06509057])
fun: 0.03816368407774814
- the annualized tracking error of the optimized portfolio, which is better than the equally weighted portfoliox: array([0.09783899, 0.11926049, 0.1509521 , 0.15973453, 0.04087604, 0.08224352, 0.10876454, 0.05986219, 0.11537703, 0.06509057])
- an array of the optimal weightsoptimal_weights = optimizer['x']
print('\nOptimal weight for each symbol in the portfolio:')
pd.Series(index = tracking_stocks, data = optimal_weights)
Optimal weight for each symbol in the portfolio:
HON 0.098 MMM 0.119 JPM 0.151 V 0.160 AXP 0.041 KO 0.082 HD 0.109 TRV 0.060 UNH 0.115 JNJ 0.065 dtype: float64
optimal_pf = portfolio_returns(optimal_weights)[1:].add(1).cumprod().mul(100)
display_me(optimal_pf, 5, "Optimal Portfolio Returns (Normalized)")
Optimal Portfolio Returns (Normalized) (First 5 rows of data)
Date 2017-01-04 100.226 2017-01-05 100.088 2017-01-06 100.452 2017-01-09 100.147 2017-01-10 100.175 dtype: float64
(Last 5 rows of data)
Date 2018-12-24 119.462 2018-12-26 125.024 2018-12-27 126.690 2018-12-28 126.188 2018-12-31 127.311 dtype: float64
normalized_tracking['optimal_pf'] = optimal_pf
normalized_tracking.iloc[0, -1] = 100
display_me(normalized_tracking, 5, "Normalized Tracking Data")
Normalized Tracking Data (First 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | eq_wt_pf | optimal_pf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||
2017-01-03 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 | 100.000 |
2017-01-04 | 100.304 | 100.275 | 100.152 | 99.633 | 100.818 | 101.208 | 99.641 | 100.886 | 99.462 | 100.285 | 99.836 | 100.808 | 100.226 |
2017-01-05 | 100.088 | 100.499 | 99.809 | 98.716 | 102.000 | 99.960 | 99.880 | 99.695 | 97.874 | 100.452 | 100.881 | 100.564 | 100.088 |
2017-01-06 | 100.413 | 102.023 | 100.101 | 98.728 | 103.409 | 100.159 | 99.856 | 99.419 | 97.825 | 100.595 | 100.397 | 100.837 | 100.452 |
2017-01-09 | 100.028 | 101.179 | 99.562 | 98.796 | 102.830 | 100.677 | 98.852 | 100.000 | 97.039 | 100.310 | 100.380 | 100.548 | 100.147 |
(Last 5 rows of data)
DJI | HON | MMM | JPM | V | AXP | KO | HD | TRV | UNH | JNJ | eq_wt_pf | optimal_pf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||
2018-12-24 | 109.609 | 112.103 | 100.320 | 105.629 | 153.120 | 118.779 | 109.952 | 117.743 | 93.160 | 144.280 | 106.043 | 117.310 | 119.462 |
2018-12-26 | 115.073 | 116.360 | 104.611 | 110.008 | 163.811 | 124.539 | 112.297 | 125.292 | 96.915 | 150.728 | 109.384 | 122.501 | 125.024 |
2018-12-27 | 116.382 | 118.156 | 107.105 | 111.246 | 166.050 | 125.654 | 113.708 | 126.811 | 98.155 | 152.351 | 109.988 | 124.055 | 126.690 |
2018-12-28 | 115.998 | 117.429 | 106.358 | 111.005 | 164.704 | 125.309 | 112.919 | 126.737 | 97.800 | 152.518 | 109.867 | 123.593 | 126.188 |
2018-12-31 | 117.331 | 118.650 | 107.015 | 111.911 | 165.962 | 126.503 | 113.278 | 127.928 | 99.049 | 154.302 | 111.404 | 124.741 | 127.311 |
normalized_tracking[["optimal_pf", "DJI"]].plot(figsize = (13, 7),
title="Optimal Portfolio vs DJI");
plt.legend(fontsize = 15, loc = 2);
summary_annualized(normalized_tracking[['optimal_pf', 'eq_wt_pf', 'DJI']].pct_change())
return | risk | |
---|---|---|
optimal_pf | 0.130 | 0.133 |
eq_wt_pf | 0.120 | 0.129 |
DJI | 0.090 | 0.135 |
tracking_error_general()
¶Returns the annualized tracking error between the tracking stocks, the weights, and the index in the period from start to end.
def tracking_error_general(data, tracking_stocks, weights, index, start, end, printout=True):
results = data.loc[start:end, tracking_stocks].dot(weights).sub(daily_returns.loc[start:end, index]).std() * np.sqrt(252)
if printout is True:
print(f'{(results *100): .2f}%')
else:
return results
visualize()
¶def visualize(data, tracking_stocks, weights, index, start, end, title=None):
track = data.loc[start:end, tracking_stocks].dot(weights).add(1).cumprod().mul(100)
track.name = 'Portfolio'
index = data.loc[start:end, index].add(1).cumprod().mul(100)
plt.figure(figsize = (13, 7))
track.plot()
index.plot()
plt.legend(fontsize = 15, loc = 2)
plt.title(title, fontsize = 22, pad = 20)
plt.show()
tracking_error_general(daily_returns, tracking_stocks, optimal_weights, "DJI", "2017-01-01", "2018-12-31", printout = True)
3.82%
forward_tracking_error = tracking_error_general(daily_returns, tracking_stocks,
optimal_weights, 'DJI', "2019-01-01",
"2019-12-31", printout = False)
f'{(forward_tracking_error * 100):.2f}%'
'4.61%'
visualize(daily_returns, tracking_stocks, optimal_weights,
index = 'DJI', start = "2019-01-01", end = "2019-12-31",
title = "Tracking Error for Forward Testing Portfolio")
stocks = df.columns.drop(['DOW', "DJI"])
stocks = (list(stocks[0:-5]))
display_rows(stocks, 6, "Stocks to randmly choose from")
................................................ STOCKS TO RANDMLY CHOOSE FROM: AAPL AMGN AXP BA CAT CRM CSCO CVX DIS GS HD HON IBM INTC JNJ JPM KO MCD MMM MRK MSFT NKE PG TRV ................................................
replace = False
makes sure there are no duplicatesnp.random.seed(123)
random_stocks = np.random.choice(stocks, size = num_assets, replace = False)
random_stocks
array(['CRM', 'MSFT', 'NKE', 'MMM', 'JPM', 'DIS', 'CVX', 'HON', 'CAT', 'BA'], dtype='<U4')
random.random()
returns the specified number of random floatsrandom_numbers = np.random.random(num_assets)
random_weights = random_numbers / random_numbers.sum()
random_weights
array([0.09632791, 0.09637797, 0.11496635, 0.15393431, 0.13128603, 0.11072988, 0.13092143, 0.05852672, 0.06556346, 0.04136594])
display_rows(random_stocks, 3, "Random Stocks")
display_rows(random_weights, 3, "Random Weights")
........................ RANDOM STOCKS: CRM MSFT NKE MMM JPM DIS CVX HON CAT BA ........................ ........................ RANDOM WEIGHTS: 0.09632791448118198 0.09637796993794165 0.11496634998876853 0.15393430859962146 0.13128603181708 0.11072988121004275 0.13092142697467074 0.058526720399072946 0.06556345895066334 0.041365937640956675 ........................
tracking_error_general(daily_returns, random_stocks, random_weights,
'DJI', '2019-01-01', '2019-12-31', printout = True)
5.21%
f'{(forward_tracking_error * 100):.2f}%'
'4.61%'
visualize(daily_returns, random_stocks, random_weights,
'DJI', '2019-01-01', '2019-12-31',
title = 'Tracking Error for Random Portfolio')
tracking_error_collection = np.empty(10000)
for i in range(10000):
random_numbers = np.random.random(num_assets)
random_weights = random_numbers / random_numbers.sum()
random_stock = np.random.choice(stocks, size = num_assets, replace = False)
tracking_error_collection[i] = tracking_error_general(daily_returns,
random_stocks,
random_weights,
'DJI', '2019-01-01',
'2019-12-31',
printout = False)
(tracking_error_collection < forward_tracking_error).mean()
0.0009
f'{(tracking_error_collection.mean() * 100): .2f}%'
' 5.89%'
f'{(forward_tracking_error * 100):.2f}%'
'4.61%'
plt.figure(figsize = (12, 8))
plt.hist(tracking_error_collection, bins = 100, edgecolor = 'black',
linewidth = 0.5, color = "cyan")
plt.title("10,000 Random Portfolios: Tracking Error Distribution",
fontsize = 22, pad = 20)
plt.ylabel('Number of Portfolios with Tracking Error', fontsize=15)
plt.xlabel('Tracking Error Percentages Across Random Portfolios', fontsize=15)
plt.show()