DocTable Example: Schemas

In this example, we show column specifications for each available type, as well as the sqlalchemy equivalents on which they were based. Note that .

Each column in the schema passed to doctable is a 2+ tuple containing, in order, the column type, name, and arguments, and optionally the sqlalchemy type arguemnts.

In [1]:
from datetime import datetime
from pprint import pprint
import pandas as pd

import sys
sys.path.append('..')
import doctable as dt
In [2]:
schema = (
    # standard id column
    #SQLAlchemy: Column('id', Integer, primary_key = True, autoincrement=True), 
    ('integer', 'id', dict(primary_key=True, autoincrement=True)),
    # short form (can't provide any additional args though): ('idcol', 'id')

    # make a category column with two options: "FICTION" and "NONFICTION"
    #SQLAlchemy: Column('title', String,)
    ('string', 'category', dict(nullable=False)),

    # make a non-null title column
    #SQLAlchemy: Column('title', String,)
    ('string', 'title', dict(nullable=False)),

    # make an abstract where the default is an empty string instead of null
    #SQLAlchemy: Column('abstract', String, default='')
    ('string', 'abstract',dict(default='')),

    # make an age column where age must be greater than zero
    #SQLAlchemy: Column('abstract', Integer)
    ('integer', 'age'),

    # make a column that keeps track of column updates
    #SQLAlchemy: Column('updated_on', DateTime(), default=datetime.now, onupdate=datetime.now)
    ('datetime', 'updated_on',  dict(default=datetime.now, onupdate=datetime.now)),
    # short form to auto-record update date: ('date_updated', 'updated_on')
    
    #SQLAlchemy: Column('updated_on', DateTime(), default=datetime.now)
    ('datetime', 'updated_on',  dict(default=datetime.now)),
    # short form to auto-record insertion date: ('date_added', 'added_on')

    # make a string column with max of 500 characters
    #SQLAlchemy: Column('abstract', String, default='')
    ('string', 'text',dict(),dict(length=500)),

    
    ##### Custom DocTable Column Types #####
    
    # uses json.dump to convert python object to json when storing and
    # json.load to convert json back to python when querying
    ('json','json_data'),
    
    # stores pickled python object directly in table as BLOB
    # TokensType and ParagraphsType are defined in doctable/coltypes.py
    # SQLAlchemy: Column('tokenized', TokensType), Column('sentencized', ParagraphsType)
    ('pickle','tokenized'),
    
    # store pickled data into a separate file, recording only filename directly in table
    # the 'fpath' argument can specify where the files should be placed, but by
    # default they are stored in <dbname>_<tablename>_<columnname>
    #('picklefile', 'pickle_obj', dict(), dict(fpath='folder_for_picklefiles')),
    
    # very similar to above, but use only when storing text data
    ('textfile', 'text_file'), # similar to above
    
    
    ##### Constraints #####
    
    #SQLAlchemy: CheckConstraint('category in ("FICTION","NONFICTION")', name='salary_check')
    ('check_constraint', 'category in ("FICTION","NONFICTION")', dict(name='salary_check')),
    
    #SQLAlchemy: CheckConstraint('age > 0')
    ('check_constraint', 'age > 0'),
    
    # make sure each category/title entry is unique
    #SQLAlchemy:  UniqueConstraint('category', 'title', name='work_key')
    ('unique_constraint', ['category','title'], dict(name='work_key')),
    
    # makes a foreign key from the 'subkey' column of this table to the 'id'
    # column of ANOTHERDOCTABLE, setting the SQL onupdate and ondelete foreign key constraints
    #('foreignkey_constraint', [['subkey'], [ANOTHERDOCTABLE['id']]], {}, dict(onupdate="CASCADE", ondelete="CASCADE")),
    #NOTE: Can't show here because we didn't make ANOTHERDOCTABLE
    
    ##### Indexes ######
    
    # make index table
    # SQLAlchemy: Index('ind0', 'category', 'title', unique=True)
    ('index', 'ind0', ('category','title'),dict(unique=True)),
    
)
md = dt.DocTable(schema, verbose=True)
#pprint(md.schemainfo)
pd.DataFrame(md.schemainfo)
Out[2]:
name type nullable default autoincrement primary_key
0 id INTEGER False None auto 1
1 category VARCHAR False None auto 0
2 title VARCHAR False None auto 0
3 abstract VARCHAR True None auto 0
4 age INTEGER True None auto 0
5 updated_on DATETIME True None auto 0
6 text VARCHAR(500) True None auto 0
7 json_data VARCHAR True None auto 0
8 tokenized BLOB True None auto 0
9 text_file VARCHAR True None auto 0
In [ ]: