#------------------------------------------------------------------------------- # Name: sas2sqlite # Purpose: translate a SAS data set to a SQLite table # #------------------------------------------------------------------------------- def sas2sqlite(sasfile, sqlitedb): import sqlite3 from sas7bdat import SAS7BDAT # Read data from SAS f = SAS7BDAT(sasfile) x = f.header.cols y = [''] * len(x) for i, n in enumerate(x): if n[1][2] == "numeric": y[i] = n[0] + ' real' else: y[i] = n[0] + ' varchar({})'.format(n[1][1]) _table = f.header.dataset.title() cmd1 = "CREATE TABLE {} ({})".format(_table, ', '.join(y)) cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(_table, ','.join(['?']*len(x))) conn = sqlite3.connect(sqlitedb) c = conn.cursor() for i, line in enumerate(f.readData()): if i == 0: c.execute('DROP TABLE IF EXISTS {}'.format(_table)) c.execute(cmd1) else: c.execute(cmd2, line) conn.commit() c.close() if __name__ == '__main__': sas2sqlite("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\prdsal2.sas7bdat", "C:/logs/foo.db") #------------------------------------------------------------------------------- # Name: sas2pd # Purpose: import a SAS dataset as a Python pandas dataframe # #------------------------------------------------------------------------------- def sas2pd(sasfile): import pandas as pd from sas7bdat import SAS7BDAT a = [] for i, x in enumerate(SAS7BDAT(sasfile).readData()): if i == 0: cols = x else: a.append(x) df = pd.DataFrame(a) df.columns = cols return df if __name__ == '__main__': sasclass = sas2pd("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\class.sas7bdat") from ggplot import * print ggplot(sasclass, aes('Height', 'Weight')) + geom_point() + stat_smooth(color='red')