Pastebin

Paste #3804: No description

< previous paste - next paste>

Pasted by Anonymous Coward

Download View as text

#-------------------------------------------------------------------------------
# Name:        sas2sqlite
# Purpose:     translate a SAS data set to a SQLite table
#
#-------------------------------------------------------------------------------

def sas2sqlite(sasfile, sqlitedb):
    import sqlite3
    from sas7bdat import SAS7BDAT
    # Read data from SAS
    f = SAS7BDAT(sasfile)
    x = f.header.cols
    y = [''] * len(x)

    for i, n in enumerate(x):
        if n[1][2] == "numeric":
            y[i] = n[0] + ' real'
        else:
            y[i] = n[0] + ' varchar({})'.format(n[1][1])

    _table = f.header.dataset.title()
    cmd1 = "CREATE TABLE {} ({})".format(_table, ', '.join(y))
    cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(_table, ','.join(['?']*len(x)))
    conn = sqlite3.connect(sqlitedb)
    c = conn.cursor()

    for i, line in enumerate(f.readData()):
        if i == 0:
            c.execute('DROP TABLE IF EXISTS {}'.format(_table))
            c.execute(cmd1)
        else:
            c.execute(cmd2, line)
    conn.commit()
    c.close()

if __name__ == '__main__':
    sas2sqlite("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\prdsal2.sas7bdat", "C:/logs/foo.db")

#-------------------------------------------------------------------------------
# Name:       sas2pd
# Purpose:    import a SAS dataset as a Python pandas dataframe  
#
#-------------------------------------------------------------------------------

def sas2pd(sasfile):
    import pandas as pd
    from sas7bdat import SAS7BDAT
    a = []
    for i, x in enumerate(SAS7BDAT(sasfile).readData()):
        if i == 0:
            cols = x
        else:
            a.append(x)
    df = pd.DataFrame(a)
    df.columns = cols
    return df

if __name__ == '__main__':
    sasclass = sas2pd("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\class.sas7bdat")
    from ggplot import *
    print ggplot(sasclass, aes('Height', 'Weight')) + geom_point() + stat_smooth(color='red')

New Paste


Do not write anything in this field if you're a human.

Go to most recent paste.