Aim
- This document shows how to repeat the result from quantstart
- We will:
- Set up a database to store OHLC trading data by using MySQL server.
- Create an account for management.
- Install vstudio for GUI management.
- Using python scripts to show how to:
- Download data to database
- Get data from database
Learning meterial
Steps
- Set up MySQL server on ubuntu 19.04 through docker
- Install the latest docker. One may need to search the method by Google.
- Set up docker, vstudio, and login to the server
- Create database
mysql> CREATE DATABASE securities_master; mysql> USE securities_master;
- Create user
# Create a new user and let remote access possible mysql> CREATE USER 'username'@'localhost' IDENTIFIED BY 'password'; mysql> GRANT ALL PRIVILEGES ON securities_master.* TO 'username'@'localhost' WITH GRANT OPTION; mysql> CREATE USER 'username'@'%' IDENTIFIED BY 'password'; mysql> GRANT ALL PRIVILEGES ON securities_master.* TO 'username'@'%' WITH GRANT OPTION; mysql> FLUSH PRIVILEGES;
- Create tables,
CREATE TABLE `exchange` ( `id` int NOT NULL AUTO_INCREMENT, `abbrev` varchar(32) NOT NULL, `name` varchar(255) NOT NULL, `city` varchar(255) NULL, `country` varchar(255) NULL, `currency` varchar(64) NULL, `timezone_offset` time NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `data_vendor` ( `id` int NOT NULL AUTO_INCREMENT, `name` varchar(64) NOT NULL, `website_url` varchar(255) NULL, `support_email` varchar(255) NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `symbol` ( `id` int NOT NULL AUTO_INCREMENT, `exchange_id` int NULL, `ticker` varchar(32) NOT NULL, `instrument` varchar(64) NOT NULL, `name` varchar(255) NULL, `sector` varchar(255) NULL, `currency` varchar(32) NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`), KEY `index_exchange_id` (`exchange_id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `daily_price` ( `id` int NOT NULL AUTO_INCREMENT, `data_vendor_id` int NOT NULL, `symbol_id` int NOT NULL, `price_date` datetime NOT NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, `open_price` decimal(19,4) NULL, `high_price` decimal(19,4) NULL, `low_price` decimal(19,4) NULL, `close_price` decimal(19,4) NULL, `adj_close_price` decimal(19,4) NULL, `volume` bigint NULL, PRIMARY KEY (`id`), KEY `index_data_vendor_id` (`data_vendor_id`), KEY `index_synbol_id` (`symbol_id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
- At this moment, one should be able to view their database from GUI
- Set up python, virtualenv for python2
- link
pip install pandas pip install lxml pip install yfinance --upgrade --no-cache-dir sudo apt-get install libmysqlclient-dev pip install MySQL-python
- Get symbols of S&P 500 from wiki by python and save to database, symbol table
#!/usr/bin/python # -*- coding: utf-8 -*- import datetime import lxml.html from lxml import etree from urllib2 import urlopen import MySQLdb as mdb from math import ceil def obtain_parse_wiki_snp500(): """Download and parse the Wikipedia list of S&P500 constituents using requests and libxml. Returns a list of tuples for to add to MySQL.""" # Stores the current time, for the created_at record now = datetime.datetime.utcnow() # Use libxml to download the list of S&P500 companies and obtain the symbol table # page = lxml.html.parse("http://en.wikipedia.org/wiki/List_of_S%26P_500_companies") # this line is failed due to https problem page = lxml.html.parse(urlopen("http://en.wikipedia.org/wiki/List_of_S%26P_500_companies")) # print etree.tostring(page.getroot()) # check to see if the content has downloaded symbolslist = page.xpath('//table[1]/tbody/tr')[1:] # Obtain the symbol information for each row in the S&P500 constituent table symbols = [] for symbol in symbolslist: tds = symbol.getchildren() sd = {'ticker': tds[0].getchildren()[0].text, 'name': tds[1].getchildren()[0].text, 'sector': tds[3].text} # Create a tuple (for the DB format) and append to the grand list symbols.append( (sd['ticker'], 'stock', sd['name'], sd['sector'], 'USD', now, now) ) return symbols def insert_snp500_symbols(symbols): """Insert the S&P500 symbols into the MySQL database.""" # Connect to the MySQL instance db_host = '127.0.0.1' db_user = 'sec_user' db_pass = '1234' db_name = 'securities_master' con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name) # Create the insert strings column_str = "ticker, instrument, name, sector, currency, created_date, last_updated_date" insert_str = ("%s, " * 7)[:-2] final_str = "INSERT INTO symbol (%s) VALUES (%s)" % (column_str, insert_str) print final_str, len(symbols) # Using the MySQL connection, carry out an INSERT INTO for every symbol with con: cur = con.cursor() # This line avoids the MySQL MAX_PACKET_SIZE # Although of course it could be set larger! for i in range(0, int(ceil(len(symbols) / 100.0))): cur.executemany(final_str, symbols[i*100:(i+1)*100-1]) if __name__ == "__main__": symbols = obtain_parse_wiki_snp500() insert_snp500_symbols(symbols)
- Get daily OHLC for symbols
#!/usr/bin/python # -*- coding: utf-8 -*- import datetime import MySQLdb as mdb import urllib2 import yfinance as yf import math # Obtain a database connection to the MySQL instance db_host = '127.0.0.1' db_user = 'sec_user' db_pass = '1234' db_name = 'securities_master' con = mdb.connect(db_host, db_user, db_pass, db_name) def obtain_list_of_db_tickers(): """Obtains a list of the ticker symbols in the database.""" with con: cur = con.cursor() cur.execute("SELECT id, ticker FROM symbol") data = cur.fetchall() return [(d[0], d[1]) for d in data] def get_daily_historic_data_yahoo(ticker, start_date=(2000,1,1), end_date=datetime.date.today().timetuple()[0:3]): """Obtains data from Yahoo Finance returns and a list of tuples. ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc. start_date: Start date in (YYYY, M, D) format end_date: End date in (YYYY, M, D) format""" # Construct the Yahoo URL with the correct integer query parameters # for start and end dates. Note that some parameters are zero-based! yahoo_url = "http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s" % \ (ticker, start_date[1] - 1, start_date[2], start_date[0], end_date[1] - 1, end_date[2], end_date[0]) startDate = str(start_date[0]) + '-' + str(start_date[1]) + '-' + str(start_date[2]) endDate = str(end_date[0]) + '-' + str(end_date[1]) + '-' + str(end_date[2]) # Try connecting to Yahoo Finance and obtaining the data # On failure, print an error message. try: # yf_data = urllib2.urlopen(yahoo_url).readlines()[1:] # Ignore the header yf_ticker = yf.Ticker(ticker) # yf_data = yf_ticker.history(period="max") # yf_data = yf_ticker.history(start=startDate, end=endDate) yf_data = yf.download(ticker, start=startDate, end=endDate) prices = [] for index, row in yf_data.iterrows(): p = str(index).strip().split(' ') if math.isnan(row['Open']) or math.isnan(row['High']) or math.isnan(row['Low']) or math.isnan(row['Close']) or math.isnan(row['Volume']) or math.isnan(row['Adj Close']): continue prices.append( (datetime.datetime.strptime(p[0], '%Y-%m-%d'), row['Open'], row['High'], row['Low'], row['Close'], row['Volume'], row['Adj Close']) ) except Exception, e: print "Could not download Yahoo data: %s" % e return prices def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data): """Takes a list of tuples of daily data and adds it to the MySQL database. Appends the vendor ID and symbol ID to the data. daily_data: List of tuples of the OHLC data (with adj_close and volume)""" # Create the time now now = datetime.datetime.utcnow() # Amend the data to include the vendor ID and symbol ID daily_data = [(data_vendor_id, symbol_id, d[0], now, now, d[1], d[2], d[3], d[4], d[5], d[6]) for d in daily_data] # Create the insert strings column_str = """data_vendor_id, symbol_id, price_date, created_date, last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price""" insert_str = ("%s, " * 11)[:-2] final_str = "INSERT INTO daily_price (%s) VALUES (%s)" % (column_str, insert_str) # Using the MySQL connection, carry out an INSERT INTO for every symbol with con: cur = con.cursor() cur.executemany(final_str, daily_data) if __name__ == "__main__": # Loop over the tickers and insert the daily historical # data into the database tickers = obtain_list_of_db_tickers() for t in tickers: print "Adding data for %s" % t[1] yf_data = get_daily_historic_data_yahoo(t[1]) insert_daily_data_into_db('1', t[0], yf_data)
- Get the close price for a selected stock from database
#!/usr/bin/python # -*- coding: utf-8 -*- import pandas as pd import pandas.io.sql as psql import MySQLdb as mdb # Connect to the MySQL instance db_host = '127.0.0.1' db_user = 'sec_user' db_pass = '1234' db_name = 'securities_master' con = mdb.connect(db_host, db_user, db_pass, db_name) # Select all of the historic Google adjusted close data sql = """SELECT dp.price_date, dp.adj_close_price FROM symbol AS sym INNER JOIN daily_price AS dp ON dp.symbol_id = sym.id WHERE sym.ticker = 'GOOG' ORDER BY dp.price_date ASC;""" # Create a pandas dataframe from the SQL query # goog = psql.frame_query(sql, con=con, index_col='price_date') goog = psql.read_sql(sql, con=con, index_col='price_date') # Output the dataframe tail print goog.tail()
沒有留言:
發佈留言