Simon TheChain Simon TheChain - 3 months ago 25
Python Question

Pandas from csv to sql in Django: OperationalError, table x has no column named y

I'm building a website to manage the data produced by household devices. The data is contained in .csv files, and there will be several hundreds/thousands of those to ingest over time.

The csv looks like this:

Timestamp, Address (64bit), Zone, Sensor Type, Data type, Value
12/23/16 02:05:30, some_64bit_address, 5, 0, 0, 255


I can read the csv and convert it to a pandas dataframe, but I'm having trouble saving it to sql. I get an


OperationalError at /import/: table FromCsv has no column named
Address (64bit).


I know there are several questions similar to this one but I haven't been able to find an answer that fixes my problem and I don't know what to try next. I tried with and without
index_col=False
in
read_csv
, with and without
index=False
in
to_sql
, and I tried deleting the database and migration files and starting from scratch. I'm using python 3.5, django 1.11, pandas 0.19.2 and sqlite3.

The view:

def import_data(request):
if request.method == "POST":
files = request.FILES.getlist("csv_files")
dateparse = lambda x: pd.datetime.strptime(x, "%m/%d/%y %H:%M:%S")

for fichier in files:
# convert the file in memory to stringio
decoded = fichier.read().decode('utf-8')
io_string = StringIO(decoded)

try:
# convert to dataframe
chunks = pd.read_csv(
filepath_or_buffer=io_string,
parse_dates=["Timestamp"],
date_parser=dateparse,
chunksize=500000
)

for chunk in chunks:
chunk.columns = [
"Timestamp",
"Address (64bit)",
"Zone",
"Sensor Type",
"Data type",
"Value"]

# save table in database
with sqlite3.connect("db.sqlite3") as conn:
chunk.to_sql("FromCsv", con=conn, index=False, if_exists="append")

messages.success(request, fichier.name)

except pd.io.common.EmptyDataError:
pass

return render(request, "upload_done.html")
return render(request, "import_data.html")


The model:

class RawData(models.Model):

class Meta:
db_table = "FromCsv"

timestamp = models.DateTimeField(default=timezone.now)
address = models.CharField(max_length=250)
zone = models.IntegerField(default=0)
sensor = models.IntegerField(default=0)
data_type = models.IntegerField(default=0)
data_value = models.IntegerField(default=0)

def __str__(self):
return str(self.timestamp)


The traceback:

Traceback:

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/django/core/handlers/exception.py" in inner
39. response = get_response(request)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/django/core/handlers/base.py" in _get_response
187. response = self.process_exception_by_middleware(e, request)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/django/core/handlers/base.py" in _get_response
185. response = wrapped_callback(request, *callback_args, **callback_kwargs)

File "/home/smoky05/PycharmProjects/UbiosDataSite/data/views.py" in import_data
62. chunk.to_sql("FromCsv", con=conn, index=False, if_exists="append")

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/pandas/core/generic.py" in to_sql
1201. chunksize=chunksize, dtype=dtype)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/pandas/io/sql.py" in to_sql
470. chunksize=chunksize, dtype=dtype)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/pandas/io/sql.py" in to_sql
1503. table.insert(chunksize)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/pandas/io/sql.py" in insert
664. self._execute_insert(conn, keys, chunk_iter)

File "/home/smoky05/.virtualenvs/ubiosData/lib/python3.5/site-packages/pandas/io/sql.py" in _execute_insert
1291. conn.executemany(self.insert_statement(), data_list)

Exception Type: OperationalError at /import/
Exception Value: table FromCsv has no column named Address (64bit)

Answer Source

Using a GUI sqlite viewer, I found the problem: the table that is created uses the column names from the destination model, so renaming the dataframes's columns accordingly makes it work:

chunk.columns = [
                        "timestamp",
                        "address",
                        "zone",
                        "sensor",
                        "data_type",
                        "data_value"]

now it matches the model:

class RawData(models.Model):

    class Meta:
        db_table = "FromCsv"

    timestamp = models.DateTimeField(default=timezone.now)
    address = models.CharField(max_length=250)
    zone = models.IntegerField(default=0)
    sensor = models.IntegerField(default=0)
    data_type = models.IntegerField(default=0)
    data_value = models.IntegerField(default=0)

    def __str__(self):
        return str(self.timestamp)