openpyxl单元格格式在style中
-
def __init__(self, static=False):
-
self.static = static
-
self.font = Font()
-
self.fill = Fill()
-
self.borders = Borders()
-
self.alignment = Alignment()
-
self.number_format = NumberFormat()
-
self.protection = Protection()
NumberFormat对应单元格格式中的 "数字"
Fill对应填充
Font对应字体
Alignment对应对齐
Borders对应边框
填充方式设置代码(先设置填充方式,然后设置填充颜色)
-
excel_sheet.cell(row=row_num,column=1).style.fill.fill_type = Fill.FILL_SOLID
-
excel_sheet.cell(row=row_num,column=1).style.fill.start_color.index = 'FFD306'
单元格格式 设置单元格格式为纯文本
-
excel_sheet.cell(row=row_num,column=1).style.number_format.format_code = NumberFormat.FORMAT_TEXT
合并单元格
-
excel_sheet.merge_cells(start_row=row_num, end_row=row_num, start_column=1, end_column=7)
具体可以见去各自的类文件中查看定义,比如numbers.py里有
-
FORMAT_GENERAL = 'General'
-
FORMAT_TEXT = '@'
-
FORMAT_NUMBER = '0'
-
FORMAT_NUMBER_00 = '0.00'
-
FORMAT_NUMBER_COMMA_SEPARATED1 = '#,##0.00'
-
FORMAT_NUMBER_COMMA_SEPARATED2 = '#,##0.00_-'
-
FORMAT_PERCENTAGE = '0%'
-
FORMAT_PERCENTAGE_00 = '0.00%'
-
FORMAT_DATE_YYYYMMDD2 = 'yyyy-mm-dd'
-
FORMAT_DATE_YYYYMMDD = 'yy-mm-dd'
-
FORMAT_DATE_DDMMYYYY = 'dd/mm/yy'
-
FORMAT_DATE_DMYSLASH = 'd/m/y'
-
FORMAT_DATE_DMYMINUS = 'd-m-y'
-
FORMAT_DATE_DMMINUS = 'd-m'
-
FORMAT_DATE_MYMINUS = 'm-y'
-
FORMAT_DATE_XLSX14 = 'mm-dd-yy'
-
FORMAT_DATE_XLSX15 = 'd-mmm-yy'
-
FORMAT_DATE_XLSX16 = 'd-mmm'
-
FORMAT_DATE_XLSX17 = 'mmm-yy'
-
FORMAT_DATE_XLSX22 = 'm/d/yy h:mm'
-
FORMAT_DATE_DATETIME = 'd/m/y h:mm'
-
FORMAT_DATE_TIME1 = 'h:mm AM/PM'
-
FORMAT_DATE_TIME2 = 'h:mm:ss AM/PM'
NumberFormat缺陷
过长大的int即使设置了纯文本格式写入依然胡会被截断......估计修正这个需要修改
openpyxl底层save文件的代码
~~~~听说你精通excel,来来来,先把7000页的office文件格式看一遍.......
补充.......
昨天为了解决长ID写入excel文件被截断的问题,直接在长ID前加了个字母,这样终于让文本正确保存.......
导完数据以后.....硬着头皮看了
openpyxl 保存文件的代码.......
麻痹也没别人说的7000页office文件格式那么复杂嘛.........
先看workbook的save方法,调用了下面
-
if self.__optimized_write:
-
save_dump(self, filename)
-
else:
-
save_workbook(self, filename)
dump不知道什么东西,先看save_workbook方法
方法在“from openpyxl.writer.excel import save_workbook”
-
def save_workbook(workbook, filename):
-
"""Save the given workbook on the filesystem under the name filename.
-
-
:param workbook: the workbook to save
-
:type workbook: :class:`openpyxl.workbook.Workbook`
-
-
:param filename: the path to which save the workbook
-
:type filename: string
-
-
:rtype: bool
-
-
"""
-
writer = ExcelWriter(workbook)
-
writer.save(filename)
-
return True
先不细看ExcelWriter类
直接看ExcelWriter类的save方法
-
def save(self, filename):
-
"""Write data into the archive."""
-
archive = ZipFile(filename, 'w', ZIP_DEFLATED)
-
self.write_data(archive)
-
archive.close()
WTF?zip? excel是个zip文件?直接用7z解压一个excel文件,我操还真是......
分析下文件结构
我们的各个sheet的都是以xml形式保存在'xl/worksheets/sheet%d' % sheet_id 中
随便打开个sheet1文件,可以发现,所有只包含数字的的value都保存在这个文件中,但是这里没有非数值的值。
找了下发现,所有非数值的字符都在xl/sharedStrings.xml
仔细分析发现,
'xl/worksheets/sheet%d' % sheet_id 中的数值,其实保存的内容是正确的没有被截断,但是excel本身显示的时候,只要数值超过15位数(好像是15),就是会被截断,即使单元格格式设置正确(这里可以看出excel的单元格格式只不能修改数据存储,只负责显示)。
所以可以猜测,只要我们把纯数值的value存到xl/sharedStrings.xml中而不是'xl/worksheets/sheet%d' % sheet_id里就不会被截断了
从上面分析我们可以猜测下excle的文件设计思路
sharedStrings.xml,顾名思义,共享的字符串,为了减少excel的文件大小,所有非数值的字符串都放在这里,这就有一个最大的好处
当你大部分单元格都是一个字符串比如“aaa”的时候,excel不用每个单元格都保存一个“aaa”,只要在shareString.xml里放一个“aaa”,然后所有值为“aaa”的单元格都引用这个数值就好
缺点当然也明显,当大部分字符串都不一样的时候,会造成单个文件过大,这也是excel为什么会把纯数值文件保存在'xl/worksheets/sheet%d' % sheet_id里的原因
所以,在一般情况下....纯数值文件还是放在'xl/worksheets/sheet%d' % sheet_id里比较好,但是在数值太长,无发正常显示的情况下,我们还是得想办法把数值弄到sharedStrings.xml里
我们在代码里找到sharedStrings.xml在哪里调用,看看代码在哪里过滤了纯数值让他不写入sharedStrings.xml,我们先跟着上面的self.write_data(archive)去看看
-
class ExcelWriter(object):
-
"""Write a workbook object to an Excel file."""
-
-
def __init__(self, workbook):
-
self.workbook = workbook
-
self.style_writer = StyleWriter(self.workbook)
-
-
def write_data(self, archive):
-
"""Write the various xml files into the zip archive."""
-
# cleanup all worksheets
-
shared_string_table = self._write_string_table(archive)
-
-
archive.writestr(ARC_CONTENT_TYPES, write_content_types(self.workbook))
-
archive.writestr(ARC_ROOT_RELS, write_root_rels(self.workbook))
-
archive.writestr(ARC_WORKBOOK_RELS, write_workbook_rels(self.workbook))
-
archive.writestr(ARC_APP, write_properties_app(self.workbook))
-
archive.writestr(ARC_CORE, write_properties_core(self.workbook.properties))
-
if self.workbook.loaded_theme:
-
archive.writestr(ARC_THEME, self.workbook.loaded_theme)
-
else:
-
archive.writestr(ARC_THEME, write_theme())
-
archive.writestr(ARC_STYLE, self.style_writer.write_table())
-
archive.writestr(ARC_WORKBOOK, write_workbook(self.workbook))
-
-
if self.workbook.vba_archive:
-
vba_archive = self.workbook.vba_archive
-
for name in vba_archive.namelist():
-
for s in ARC_VBA:
-
if name.startswith(s):
-
archive.writestr(name, vba_archive.read(name))
-
break
-
-
self._write_worksheets(archive, shared_string_table, self.style_writer)
-
-
def _write_string_table(self, archive):
-
-
for ws in self.workbook.worksheets:
-
ws.garbage_collect()
-
shared_string_table = create_string_table(self.workbook)
-
archive.writestr(ARC_SHARED_STRINGS,
-
write_string_table(shared_string_table))
-
-
return shared_string_table
write_data的第一步是执行
shared_string_table = self._write_string_table(archive),一看名字就知道啦 ,这个是和share string 有关的
我们去看shared_string_table = self._write_string_table(archive),第一个循环是和worksheet有关的,我们先不看
看看他下面的 archive.writestr(ARC_SHARED_STRINGS,write_string_table(shared_string_table))
ARC_SHARED_STRINGS是啥?代码追踪过去一看
ARC_SHARED_STRINGS = PACKAGE_XL + '/sharedStrings.xml'
那么可以看到这里果然是写sharedStrings.xml的,那么继续看write_string_table
-
def write_string_table(string_table):
-
"""Write the string table xml."""
-
temp_buffer = StringIO()
-
doc = XMLGenerator(out=temp_buffer, encoding='utf-8')
-
start_tag(doc, 'sst', {'xmlns':
-
'',
-
'uniqueCount': '%d' % len(string_table)})
-
strings_to_write = sorted(string_table.items(),
-
key=lambda pair: pair[1])
-
for key in [pair[0] for pair in strings_to_write]:
-
start_tag(doc, 'si')
-
if key.strip() != key:
-
attr = {'xml:space': 'preserve'}
-
else:
-
attr = {}
-
tag(doc, 't', attr, key)
-
end_tag(doc, 'si')
-
end_tag(doc, 'sst')
-
string_table_xml = temp_buffer.getvalue()
-
temp_buffer.close()
-
return string_table_xml
这看上去是个写xml文件头的,archive.writestr这个就没什么看头了....回头看create_string_table
-
def create_string_table(workbook):
-
"""Compile the string table for a workbook."""
-
strings = set()
-
for sheet in workbook.worksheets:
-
for cell in sheet.get_cell_collection():
-
if cell.data_type == cell.TYPE_STRING and cell._value is not None:
-
strings.add(cell.value)
-
return dict((key, i) for i, key in enumerate(sorted(strings)))
oh yeah,看见if cell.data_type == cell.TYPE_STRING and cell._value is not None:就能大概知道了,应该就是这个函数判断了单元格里的value类型
看看get_cell_collection
-
def get_cell_collection(self):
-
"""Return an unordered list of the cells in this worksheet."""
-
return self._cells.values()
看前面能知道self._cells = {},那么_cells是个字典,那么字典的values()就是一个列表,这个列表里是啥呢 ~从名字猜,列表里有个事存着Cell类的实例
去看看cell文件,里面果然存在TYPE_STRING!
OK,那么我我们把赋值的代码这样修改
excel_sheet.cell(row=x,y).value = str(a)
excel_sheet.cell(row=x,y).data_type = cell.Cell.TYPE_STRING
excel_sheet.cell(row=x,column=y).style.number_format.format_code = NumberFormat.FORMAT_TEXT
运行发现....报错!不能赋值...........
难道我需要继承重写cell!!这就大条了!仔细看看Cell的类
发现有个
-
def set_explicit_value(self, value=None, data_type=TYPE_STRING):
-
"""Coerce values according to their explicit type"""
-
type_coercion_map = {
-
self.TYPE_INLINE: self.check_string,
-
self.TYPE_STRING: self.check_string,
-
self.TYPE_FORMULA: self.check_string,
-
self.TYPE_NUMERIC: self.check_numeric,
-
self.TYPE_BOOL: bool,
-
self.TYPE_ERROR: self.check_error}
-
try:
-
self._value = type_coercion_map[data_type](value)
-
except KeyError:
-
if data_type not in self.VALID_TYPES:
-
msg = 'Invalid data type: %s' % data_type
-
raise DataTypeException(msg)
-
self._data_type = data_type
oh yeah,代码改成
cur_cell = excel_sheet.cell(row=x,column=y)
cur_cell.set_explicit_value(str(a), cell.Cell.TYPE_STRING)
OK 大功告成并熟悉了下excel文件格式~~
补充说明下,我另外一台电脑上openpyx是1.6.2版的
set_explicit_value函数名有变化
是set_value_explicit,真是蛋痛
阅读(21220) | 评论(0) | 转发(0) |