【发布时间】:2012-01-23 18:35:09
【问题描述】:
谁能帮我去掉每行开头/结尾的双引号?
我有一个大的 csv(800k 行)并且想要创建插入语句以将数据导入 SQL DB。我知道代码真的很难看,但我以前从未使用过 Python……非常感谢任何帮助……
#Script file to read from .csv containing raw location data (zip code database)
#SQL insert statements are written to another CSV
#Duplicate zip codes are removed
import csv
Blockquote
csvfile = open('c:\Canada\canada_zip.csv', 'rb')
dialect = csv.Sniffer().sniff(csvfile.readline())
csvfile.seek(0)
reader = csv.reader(csvfile, dialect)
reader.next()
ofile = open('c:\Canada\canada_inserts.csv', 'wb')
writer = csv.writer(ofile, dialect)
#DROP / CREATE TABLE
createTableCmd = '''DROP TABLE PopulatedPlacesCanada \n\
CREATE TABLE PopulatedPlacesCanada \n\
( \n\
ID INT primary key identity not null, \n\
Zip VARCHAR(10), \n\
City nVARCHAR(100), \n\
County nvarchar(100), \n\
StateCode varchar(3), \n\
StateName nvarchar(100), \n\
Country nvarchar(30), \n\
Latitude float, \n\
Longitude float, \n\
PopulationCount int, \n\
Timezone int, \n\
Dst bit \n\
)'''
writer.writerow([createTableCmd])
table = 'PopulatedPlacesCanada'
db_fields = 'Zip, City, County, StateCode, StateName, Country, Latitude, Longitude, PopulationCount, Timezone, Dst'
zip_codes = set()
count = 0
for row in reader:
if row[0] not in zip_codes: #only add row if zip code is unique
count = count + 1
zipCode = row[0] #not every row in the csv is needed so handpick them using row[n]
city = row[1].replace("\'", "").strip()
county = ""
state_abr = row[2]
state = row[3].replace("\'", "").strip()
country = 'Canada'
lat = row[8]
lon = row[9]
pop = row[11]
timezone = row[6]
dst = row[7]
if dst == 'Y':
dst= '1'
if dst == 'N':
dst = '0'
query = "INSERT INTO {0}({1}) VALUES ('{2}', '{3}', '{4}', '{5}', '{6}', '{7}', {8}, {9}, {10}, {11}, {12})".format(table, db_fields, zipCode, city, county, state_abr, state, country, lat, lon, pop, timezone, dst)
writer.writerow([query])
zip_codes.add(row[0])
if count == 100: #Go statement to make sql batch size manageable
writer.writerow(['GO'])
【问题讨论】:
标签: python sql csv double-quotes