[问题] Django Postgresql 写入资料停滞

楼主: BlgAtlfans (BLG_Eric)   2016-10-04 23:37:55
各位大神好
最近小弟在写一个上传档案模组
功能是把上传上来的csv xls xlsx...等档案转成postgresql table
每次测试小档案的时候都没什么问题
大约几秒钟就完成上传并存入postgresql
但是最近试着上传了一个100多mb(13万多笔)的xlsx资料时网页会卡住
看了之后发现档案上传很快 几秒钟就好了
可是后续的汇入postgresql却迟迟没反应 卡住了
数据库也没有任何新增的table(本人不才 没做timeout设定)
希望各位能帮我看一下程式码 感谢
以下是upload.py的程式码(应该是没什么问题 还是附上):
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
import csv
import sys
import os
import random
import psycopg2
from .models import Document,Folder
from .forms import DocumentForm
from data.write import *
def upload(request,fkey): # upload multiple files and create new table into
postgresql
tct = Document.objects.count() # Table count
fct = Folder.objects.count() # Folder count
f = DocumentForm()
folder = Folder.objects.filter(groupkey=fkey).values_list('grouptitle',
flat=True).first()
if request.method == 'POST':
f = DocumentForm(request.POST, request.FILES)
if f.is_valid():
file_count = 0 #Count how many files in this upload
tc = 0 # Count title split
for u in request.FILES.getlist('file'):
file_count = file_count+1
check_title = request.POST['title'] # get titles (one or multiple)
info = request.POST['details']
tag = request.POST['tags']
oauth = request.POST['public']
source = request.POST['source']
print(file_count)
print(len(check_title.split(",")))
if len(check_title.split(",")) != file_count: # check if title amount
and file amount are the same
messages.warning(request,"Title Format Error!!")
else:
title = check_title.split(",")
for up in request.FILES.getlist('file'): # multiple file uplaod loop
samefilercheck =
Document.objects.filter(title=title[tc],folder=fkey).count() #set a variable
to check if same file exists
if samefilercheck > 0:
messages.warning(request,"Same File Already Exists!!")
return
HttpResponseRedirect(reverse('data.views.upload',kwargs={'fkey':fkey}))
else:
pname = settings.MEDIAPATH+'\%s\\' % folder+up.name.replace("
","_").replace("(","").replace(")","")
#up.name = up.name.replace('','_')
filekey =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(25)])
n,ext = os.path.splitext(up.name)
newdoc = Document(title=title[tc],filekey=filekey,path= u'%s' %
pname,filename= u'%s' % up.name,tablename= u'%s' %
n,filetype=ext,info=info,tag=tag,oauth=oauth,source=source,folder=fkey,version='1.0')
newdoc.file=handle_uploaded_file(up,pname)
newdoc.save()
tc = tc+1
messages.warning(request,"Upload Success!!!")
tc =0
for filename in request.FILES.getlist('file'): # call different
function to write file to table
documents = Document.objects.get(filename=filename) # judge the
filetype by its filetype
if documents.filetype =='.csv':
csvwritein(documents)
if documents.filetype =='.xls':
xlswritein(documents)
if documents.filetype =='.xlsx':
xlsxwritein(documents)
if documents.filetype =='.json':
jsonwritein(documents)
url = '/datasets/%s' % (fkey)
return HttpResponseRedirect(url)
else: # if fail,return to empty form
messages.warning(request,"Upload Fail!!!")
f = DocumentForm()
return render_to_response('upload.html',RequestContext(request,locals()))
def handle_uploaded_file(f,pn):
with open(pn, 'wb+') as destination:
for chunk in f.chunks():
destination.write(chunk)
return pn
以下是写入postgresql的write.py程式码(太长所以只撷取xlsx的部分):
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
import re
import sys
import random
import psycopg2
import xlrd
import openpyxl as pyxl
from .models import Document
from .forms import DocumentForm
def xlsxwritein(doc): # write into database for file type xlsx
xlsxt = 0 # check if action is first time
conn = psycopg2.connect("dbname='apidb' user='postgres' host='localhost'
password='eric40502' port='8000'")
maincur = conn.cursor()
readcur = conn.cursor()
writecur = conn.cursor()
readcur.execute("select exists(select * from information_schema.tables
where table_name='%s')" % doc.tablename) # check if same file is already in
database
check = readcur.fetchone()[0]
row_id = 1 # used for following id field
wb = pyxl.load_workbook(doc.path)
sheetnames = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheetnames[0])
print(ws.max_row)
for rown in range(ws.max_row):
if xlsxt == 0:
if check == True: #if table exists,rename tablename in postgresql and
change the settings
checktrue(doc)
else:
tablename = '"%s"' % doc.tablename
field = [ws.cell(row=1,column=col_index).value for col_index in
range(1,ws.max_column+1)]
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY KEY);" % tablename)
for coln in range(ws.max_column):
field[coln] = '"%s"' % field[coln] # change number to string
if field[coln] == 'ID':
field[coln] = 'original_id'
print(field)
print(field[coln])
maincur.execute("ALTER TABLE %s ADD %s CITEXT;" %
(tablename,field[coln]))
xlsxt = xlsxt+1
elif xlsxt > 0 and check == False: # not first time(insert data) and
check no same file exists
for coln in range(ws.max_column):
if coln == 0:
writecur.execute("INSERT INTO %s (%s) VALUES ('%s');"
%(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value)))
else:
writecur.execute("UPDATE %s SET %s = '%s' WHERE id = '%d';"
%(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id))
xlsxt = xlsxt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
readcur.close()
writecur.close()
conn.close()
xlsxt = 0
作者: kenduest (小州)   2016-10-05 05:47:00
正确方式要用 celery 方式丢给背景处理比较不会有问题
作者: a0919610611 (炽)   2016-10-05 13:09:00
上传下载 和 写入 速度差很多吧
楼主: BlgAtlfans (BLG_Eric)   2016-10-05 15:59:00
感谢各位回答 但是想问为什么会卡住? 是因为CPU?

Links booklink

Contact Us: admin [ a t ] ucptt.com