Pastebin
Paste #3804: No description
< previous paste - next paste>
Pasted by Anonymous Coward
#-------------------------------------------------------------------------------
# Name: sas2sqlite
# Purpose: translate a SAS data set to a SQLite table
#
#-------------------------------------------------------------------------------
def sas2sqlite(sasfile, sqlitedb):
import sqlite3
from sas7bdat import SAS7BDAT
# Read data from SAS
f = SAS7BDAT(sasfile)
x = f.header.cols
y = [''] * len(x)
for i, n in enumerate(x):
if n[1][2] == "numeric":
y[i] = n[0] + ' real'
else:
y[i] = n[0] + ' varchar({})'.format(n[1][1])
_table = f.header.dataset.title()
cmd1 = "CREATE TABLE {} ({})".format(_table, ', '.join(y))
cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(_table, ','.join(['?']*len(x)))
conn = sqlite3.connect(sqlitedb)
c = conn.cursor()
for i, line in enumerate(f.readData()):
if i == 0:
c.execute('DROP TABLE IF EXISTS {}'.format(_table))
c.execute(cmd1)
else:
c.execute(cmd2, line)
conn.commit()
c.close()
if __name__ == '__main__':
sas2sqlite("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\prdsal2.sas7bdat", "C:/logs/foo.db")
#-------------------------------------------------------------------------------
# Name: sas2pd
# Purpose: import a SAS dataset as a Python pandas dataframe
#
#-------------------------------------------------------------------------------
def sas2pd(sasfile):
import pandas as pd
from sas7bdat import SAS7BDAT
a = []
for i, x in enumerate(SAS7BDAT(sasfile).readData()):
if i == 0:
cols = x
else:
a.append(x)
df = pd.DataFrame(a)
df.columns = cols
return df
if __name__ == '__main__':
sasclass = sas2pd("C:\Program Files\SASHome\SASFoundation\9.3\core\sashelp\class.sas7bdat")
from ggplot import *
print ggplot(sasclass, aes('Height', 'Weight')) + geom_point() + stat_smooth(color='red')
New Paste
Go to most recent paste.