分类: Python/Ruby
2021-06-29 17:21:09
from pandas import ExcelWriter
import numpy as np
import os
import pandas as pd
import xml.etree.ElementTree as et
# UCD_trn xml file path
xml_path = r"..\gcam-v5.2\input\extra" # xml file folder
dir_extra = os.listdir(xml_path) # SSP3 uses trn_UCD_SSP3
trn_xml_ls = [file for file in dir_extra if "transportation_UCD_SSP" in file]
trn_xml_path = dict(
zip(
["ssp" + str(i) for i in [1, 2, 4, 5]],
[os.path.join(xml_path, file) for file in trn_xml_ls],
)
)
# Car type
tranSubsector_name_ls = [
"Compact Car",
"Large Car and SUV",
"Mini Car",
"Multipurpose Vehicle",
"Subcompact Car",
]
def get_ene_intensity_xml(path,
tranSubsector_name,
supplysector_name="trn_pass_road_LDV_4W"):
xtree = et.parse(path)
xroot = xtree.getroot()
rows = []
for child in xroot:
for region in child:
if region.attrib.get("name") == "China":
for supplysector in region:
if supplysector.attrib.get("name") == supplysector_name:
for tranSubsector in supplysector:
if tranSubsector.attrib.get("name") == tranSubsector_name:
for stubtechnology in tranSubsector:
if stubtechnology.attrib.get("name") == "FCEV":
for period in stubtechnology:
for node in period:
if node.tag == "minicam-energy-input":
for sub_node in node:
if (
sub_node.tag
== "coefficient"
):
s_region = (
region.attrib.get(
"name"
)
)
s_supplysector = (
supplysector.attrib.get(
"name"
)
)
s_tranSubsector = tranSubsector.attrib.get(
"name"
)
s_stubtechnology = stubtechnology.attrib.get(
"name"
)
s_period = (
period.attrib.get(
"year"
)
)
s_tag = sub_node.tag
s_coefficient = (
float(sub_node.text)
/ 1055 # btu/vkm to J/vkm from
)
rows.append(
dict(
region=s_region,
supplysector=s_supplysector,
tranSubsector=s_tranSubsector,
stubtechnology=s_stubtechnology,
period=s_period,
tag=s_tag,
value=s_coefficient,
)
)
df = pd.DataFrame(rows)
return df
def ene_intensity(scenario):
ene_intensity_ls = []
for car_type in tranSubsector_name_ls:
ene_intensity = np.array(
get_ene_intensity_xml(scenario, car_type).loc[:, "value"]
)[1:]
ene_intensity_ls.append(ene_intensity)
return ene_intensity_ls
columns = range(2000, 2101, 5) #读取2000到2100年能源强度,步长为五年
df = (
pd.DataFrame(
ene_intensity(trn_xml_path[key]),
columns=columns,
index=tranSubsector_name_ls,
)
)
主要是搞清楚xml文档的数据结构,GCAM交通部门中的结构为