Pages - Menu

標籤

AWS (1) bash (1) Boost (2) C (2) CMake (2) Concurrency_Programming (3) CPP (37) Database (2) DNS (1) Docker (4) Docker-Compose (1) ELK (1) emacs (4) gcp (1) gdrive (1) git (1) gitbash (2) gitlab (1) kvm (4) Linux (5) MT4 (4) MT5 (4) Multicast (2) MySQL (2) Nijatrader8 (1) OpenCV (1) Python (4) QT5 (1) R (1) rdp (3) screenshot (1) ssh (3) Tabnine (1) TCP (1) TensorFlow (1) Tools (12) Ubuntu_1904 (11) Ubuntu_20_04 (5) UDP (1) VS2010 (1) VS2015 (1) VS2019 (1) WebServer (1) Win10 (1) winmerge (1) WSL (1) xrdp (1)

搜尋此網誌

2019年8月11日星期日

Database for trading data. The first step.

Aim

  • This document shows how to repeat the result from quantstart
  • We will:
    • Set up a database to store OHLC trading data by using MySQL server.
    • Create an account for management.
    • Install vstudio for GUI management.
    • Using python scripts to show how to:
      • Download data to database
      • Get data from database

Learning meterial

Steps

  • Set up MySQL server on ubuntu 19.04 through docker
    • Install the latest docker. One may need to search the method by Google.
    • Set up docker, vstudio, and login to the server
    • Create database
      • mysql> CREATE DATABASE securities_master;
        mysql> USE securities_master;
        
    • Create user
      • # Create a new user and let remote access possible
        mysql> CREATE USER 'username'@'localhost' IDENTIFIED BY 'password';
        mysql> GRANT ALL PRIVILEGES ON securities_master.* TO 'username'@'localhost' WITH GRANT OPTION;
        mysql> CREATE USER 'username'@'%' IDENTIFIED BY 'password';
        mysql> GRANT ALL PRIVILEGES ON securities_master.* TO 'username'@'%' WITH GRANT OPTION;
        mysql> FLUSH PRIVILEGES;
        
    • Create tables, 
      • CREATE TABLE `exchange` (
          `id` int NOT NULL AUTO_INCREMENT,
          `abbrev` varchar(32) NOT NULL,
          `name` varchar(255) NOT NULL,
          `city` varchar(255) NULL,
          `country` varchar(255) NULL,
          `currency` varchar(64) NULL,
          `timezone_offset` time NULL,
          `created_date` datetime NOT NULL,
          `last_updated_date` datetime NOT NULL,
          PRIMARY KEY (`id`)
        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
        CREATE TABLE `data_vendor` (
          `id` int NOT NULL AUTO_INCREMENT,
          `name` varchar(64) NOT NULL,
          `website_url` varchar(255) NULL,
          `support_email` varchar(255) NULL,
          `created_date` datetime NOT NULL,
          `last_updated_date` datetime NOT NULL,
          PRIMARY KEY (`id`)
        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
        CREATE TABLE `symbol` (
          `id` int NOT NULL AUTO_INCREMENT,
          `exchange_id` int NULL,
          `ticker` varchar(32) NOT NULL,
          `instrument` varchar(64) NOT NULL,
          `name` varchar(255) NULL,
          `sector` varchar(255) NULL,
          `currency` varchar(32) NULL,
          `created_date` datetime NOT NULL,
          `last_updated_date` datetime NOT NULL,
          PRIMARY KEY (`id`),
          KEY `index_exchange_id` (`exchange_id`)
        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
        CREATE TABLE `daily_price` (
          `id` int NOT NULL AUTO_INCREMENT,
          `data_vendor_id` int NOT NULL,
          `symbol_id` int NOT NULL,
          `price_date` datetime NOT NULL,
          `created_date` datetime NOT NULL,
          `last_updated_date` datetime NOT NULL,
          `open_price` decimal(19,4) NULL,
          `high_price` decimal(19,4) NULL,
          `low_price` decimal(19,4) NULL,
          `close_price` decimal(19,4) NULL,
          `adj_close_price` decimal(19,4) NULL,
          `volume` bigint NULL,
          PRIMARY KEY (`id`),
          KEY `index_data_vendor_id` (`data_vendor_id`),
          KEY `index_synbol_id` (`symbol_id`)
        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
        
    • At this moment, one should be able to view their database from GUI
  • Set up python, virtualenv for python2
    • link
    • pip install pandas
      pip install lxml
      pip install yfinance --upgrade --no-cache-dir
      sudo apt-get install libmysqlclient-dev
      pip install MySQL-python
      
  • Get symbols of S&P 500 from wiki by python and save to database, symbol table
    • #!/usr/bin/python
      # -*- coding: utf-8 -*-
      
      import datetime
      import lxml.html
      from lxml import etree
      from urllib2 import urlopen
      import MySQLdb as mdb
      
      from math import ceil
      
      def obtain_parse_wiki_snp500():
        """Download and parse the Wikipedia list of S&P500
        constituents using requests and libxml.
      
        Returns a list of tuples for to add to MySQL."""
      
        # Stores the current time, for the created_at record
        now = datetime.datetime.utcnow()
      
        # Use libxml to download the list of S&P500 companies and obtain the symbol table
        # page = lxml.html.parse("http://en.wikipedia.org/wiki/List_of_S%26P_500_companies") # this line is failed due to https problem
        page = lxml.html.parse(urlopen("http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"))
        # print etree.tostring(page.getroot()) # check to see if the content has downloaded
        symbolslist = page.xpath('//table[1]/tbody/tr')[1:]
      
        # Obtain the symbol information for each row in the S&P500 constituent table
        symbols = []
        for symbol in symbolslist:
          tds = symbol.getchildren()
          sd = {'ticker': tds[0].getchildren()[0].text,
              'name': tds[1].getchildren()[0].text,
              'sector': tds[3].text}
          # Create a tuple (for the DB format) and append to the grand list
          symbols.append( (sd['ticker'], 'stock', sd['name'],
            sd['sector'], 'USD', now, now) )
        return symbols
      
      def insert_snp500_symbols(symbols):
        """Insert the S&P500 symbols into the MySQL database."""
      
        # Connect to the MySQL instance
        db_host = '127.0.0.1'
        db_user = 'sec_user'
        db_pass = '1234'
        db_name = 'securities_master'
        con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
      
        # Create the insert strings
        column_str = "ticker, instrument, name, sector, currency, created_date, last_updated_date"
        insert_str = ("%s, " * 7)[:-2]
        final_str = "INSERT INTO symbol (%s) VALUES (%s)" % (column_str, insert_str)
        print final_str, len(symbols)
      
        # Using the MySQL connection, carry out an INSERT INTO for every symbol
        with con:
          cur = con.cursor()
          # This line avoids the MySQL MAX_PACKET_SIZE
          # Although of course it could be set larger!
          for i in range(0, int(ceil(len(symbols) / 100.0))):
            cur.executemany(final_str, symbols[i*100:(i+1)*100-1])
      
      if __name__ == "__main__":
        symbols = obtain_parse_wiki_snp500()
        insert_snp500_symbols(symbols)
      
  • Get daily OHLC for symbols
    • #!/usr/bin/python
      # -*- coding: utf-8 -*-
      
      import datetime
      import MySQLdb as mdb
      import urllib2
      import yfinance as yf
      import math
      
      
      # Obtain a database connection to the MySQL instance
      db_host = '127.0.0.1'
      db_user = 'sec_user'
      db_pass = '1234'
      db_name = 'securities_master'
      con = mdb.connect(db_host, db_user, db_pass, db_name)
      
      def obtain_list_of_db_tickers():
        """Obtains a list of the ticker symbols in the database."""
        with con:
          cur = con.cursor()
          cur.execute("SELECT id, ticker FROM symbol")
          data = cur.fetchall()
          return [(d[0], d[1]) for d in data]
      
      def get_daily_historic_data_yahoo(ticker,
                            start_date=(2000,1,1),
                            end_date=datetime.date.today().timetuple()[0:3]):
          """Obtains data from Yahoo Finance returns and a list of tuples.
      
        ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc.
        start_date: Start date in (YYYY, M, D) format
        end_date: End date in (YYYY, M, D) format"""
      
          # Construct the Yahoo URL with the correct integer query parameters
          # for start and end dates. Note that some parameters are zero-based!
          yahoo_url = "http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s" % \
            (ticker, start_date[1] - 1, start_date[2], start_date[0], end_date[1] - 1, end_date[2], end_date[0])
      
          startDate = str(start_date[0]) + '-' + str(start_date[1]) + '-' + str(start_date[2])
          endDate = str(end_date[0]) + '-' + str(end_date[1]) + '-' + str(end_date[2])
          # Try connecting to Yahoo Finance and obtaining the data
          # On failure, print an error message.
          try:
              # yf_data = urllib2.urlopen(yahoo_url).readlines()[1:] # Ignore the header
              yf_ticker = yf.Ticker(ticker)
              # yf_data = yf_ticker.history(period="max")
              # yf_data = yf_ticker.history(start=startDate, end=endDate)
              yf_data = yf.download(ticker, start=startDate, end=endDate)
              prices = []
              for index, row in yf_data.iterrows():
                p = str(index).strip().split(' ')
                if math.isnan(row['Open']) or math.isnan(row['High']) or math.isnan(row['Low']) or math.isnan(row['Close']) or math.isnan(row['Volume']) or math.isnan(row['Adj Close']):
                  continue
                prices.append( (datetime.datetime.strptime(p[0], '%Y-%m-%d'), row['Open'], row['High'], row['Low'], row['Close'], row['Volume'], row['Adj Close']) )
          except Exception, e:
              print "Could not download Yahoo data: %s" % e
          return prices
      
      def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
        """Takes a list of tuples of daily data and adds it to the
        MySQL database. Appends the vendor ID and symbol ID to the data.
      
        daily_data: List of tuples of the OHLC data (with
        adj_close and volume)"""
      
        # Create the time now
        now = datetime.datetime.utcnow()
      
        # Amend the data to include the vendor ID and symbol ID
        daily_data = [(data_vendor_id, symbol_id, d[0], now, now,
          d[1], d[2], d[3], d[4], d[5], d[6]) for d in daily_data]
      
        # Create the insert strings
        column_str = """data_vendor_id, symbol_id, price_date, created_date,
                last_updated_date, open_price, high_price, low_price,
                close_price, volume, adj_close_price"""
        insert_str = ("%s, " * 11)[:-2]
        final_str = "INSERT INTO daily_price (%s) VALUES (%s)" % (column_str, insert_str)
      
        # Using the MySQL connection, carry out an INSERT INTO for every symbol
        with con:
          cur = con.cursor()
          cur.executemany(final_str, daily_data)
      
      if __name__ == "__main__":
        # Loop over the tickers and insert the daily historical
        # data into the database
        tickers = obtain_list_of_db_tickers()
        for t in tickers:
          print "Adding data for %s" % t[1]
          yf_data = get_daily_historic_data_yahoo(t[1])
          insert_daily_data_into_db('1', t[0], yf_data)
      
  • Get the close price for a selected stock from database
    • #!/usr/bin/python
      # -*- coding: utf-8 -*-
      
      import pandas as pd
      import pandas.io.sql as psql
      import MySQLdb as mdb
      
      
      # Connect to the MySQL instance
      db_host = '127.0.0.1'
      db_user = 'sec_user'
      db_pass = '1234'
      db_name = 'securities_master'
      con = mdb.connect(db_host, db_user, db_pass, db_name)
      
      # Select all of the historic Google adjusted close data
      sql = """SELECT dp.price_date, dp.adj_close_price
               FROM symbol AS sym
               INNER JOIN daily_price AS dp
               ON dp.symbol_id = sym.id
               WHERE sym.ticker = 'GOOG'
               ORDER BY dp.price_date ASC;"""
      
      # Create a pandas dataframe from the SQL query
      # goog = psql.frame_query(sql, con=con, index_col='price_date')
      goog = psql.read_sql(sql, con=con, index_col='price_date')
      
      # Output the dataframe tail
      print goog.tail()
      

沒有留言:

發佈留言