本代码设计由B站up主'全糖奶茶屋'提供, 可以直接在我们讲解思路的视频下发评论区进行留言. 我们收到留言后会将问题在这里汇总, 与技术小哥商议之后, 给大家更准确的答复. 大家也可以添加我们的人工客服微信: quantangnaichawu (如遇暂时加满了, 无法添加, 请后面再尝试).
在全球汽车产业格局深度调整与贸易保护主义抬头的背景下,关税政策作为国家干预贸易的核心工具,其对商品价格、跨国供应链布局及本土产业发展的传导效应备受关注。美日作为全球汽车贸易的核心参与方,美国对日本汽车的长期关税政策与日本车企的供应链应对策略,构成了研究关税传导机制的典型案例。
在此背景下,构建针对性的分析模型成为量化关税政策综合影响的关键:既需厘清“关税-价格-供应-产业”的多环节因果逻辑,又要规避传统静态模型中固定假设的局限性,通过整合2016-2024年实际贸易数据、产业统计数据,动态校准核心参数,为解析关税政策的短期冲击与长期影响、为相关方制定贸易与产业政策提供科学支撑。
步骤1: 数据预处理
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# 修正:移除pandas中不存在的字体设置,保留必要的显示配置
pd.set_option('display.max_columns', None) # 显示所有列
pd.set_option('display.max_rows', 50) # 预览时显示50行
pd.set_option('display.width', None) # 自适应宽度显示
pd.set_option('display.max_colwidth', 50) # 列内容最大显示宽度
print('=' * 70)
print('Processing 2025 Tariff Database (tariff_database_2025.xlsx)')
print('=' * 70)
# Step 1: Read 2025 tariff data (keep all fields)
tariff_2025_path = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\tariff_database_2025.xlsx'
tariff_2025 = pd.read_excel(tariff_2025_path)
# 1. Basic information check
print(f'1. Basic Data Information:')
print(f' - Total Records: {len(tariff_2025)}')
print(f' - Total Columns: {len(tariff_2025.columns)}')
print(f' - Key Column List (First 20 + Last 10):')
print(f' First 20: {tariff_2025.columns[:20].tolist()}')
print(f' Last 10: {tariff_2025.columns[-10:].tolist()}')
# Step 2: Data standardization (ensure uniform format for modeling)
print(f'\n2. Data Standardization:')
# 2.1 HTS8 code standardization (pad to 8-digit string)
tariff_2025['hts8'] = tariff_2025['hts8'].astype(str).str.zfill(8)
print(f' - HTS8 Code Standardized (Examples: {tariff_2025['hts8'].head(3).tolist()})')
# 2.2 Date field standardization (convert to datetime format)
date_fields = ['begin_effect_date', 'end_effective_date']
for date_col in date_fields:
if date_col in tariff_2025.columns:
tariff_2025[date_col] = pd.to_datetime(tariff_2025[date_col], errors='coerce')
valid_date_count = tariff_2025[date_col].notna().sum()
print(f' - {date_col} Standardized (Valid Dates: {valid_date_count}/{len(tariff_2025)})')
# 2.3 Tariff rate standardization (unify to decimal format)
# Identify all rate-related columns
rate_fields = [col for col in tariff_2025.columns if
'ad_val_rate' in col or 'specific_rate' in col or 'other_rate' in col]
for rate_col in rate_fields:
# Handle string-format rates (e.g., '10%'→0.1, 'Free'→0)
if tariff_2025[rate_col].dtype == 'object':
# Replace special values and convert to numeric
tariff_2025[rate_col] = tariff_2025[rate_col].str.replace('%', '', regex=False).replace('Free', '0',
regex=False)
tariff_2025[rate_col] = tariff_2025[rate_col].replace('NA', '0', regex=False).replace('-', '0', regex=False)
tariff_2025[rate_col] = pd.to_numeric(tariff_2025[rate_col], errors='coerce')
# Convert percentage integers to decimals (if rate > 100)
if tariff_2025[rate_col].max() > 100:
tariff_2025[rate_col] = tariff_2025[rate_col] / 100
# Calculate valid rate ratio
valid_rate_count = tariff_2025[rate_col].notna().sum()
avg_rate = tariff_2025[rate_col].mean() if valid_rate_count > 0 else 0
print(f' - {rate_col} Standardized (Valid Rows: {valid_rate_count}/{len(tariff_2025)}, Avg Rate: {avg_rate:.4f})')
# Step 3: Add key derived fields for the competition
print(f'\n3. Add Competition-Specific Derived Fields:')
# 3.1 Commodity category (HTS2 code + description)
tariff_2025['hts2'] = tariff_2025['hts8'].str[:2] # Extract first 2 digits of HTS8
# HTS2 code to commodity category mapping
hts2_desc_map = {
'01': 'Live Animals', '02': 'Meat & Edible Meat Offal', '03': 'Fish & Crustaceans', '04': 'Dairy Products',
'05': 'Other Animal Products', '06': 'Live Trees & Plants', '07': 'Edible Vegetables', '08': 'Edible Fruits',
'09': 'Coffee, Tea, Spices', '10': 'Cereals', '11': 'Milling Industry Products', '12': 'Oil Seeds (Incl. Soybeans)',
'13': 'Gums, Resins', '14': 'Vegetable Plaiting Materials', '15': 'Fats & Oils', '16': 'Meat/Fish Preparations',
'17': 'Sugars & Confectionery', '18': 'Cocoa Products', '19': 'Cereal Preparations',
'20': 'Vegetable/Fruit Preparations',
'21': 'Miscellaneous Foodstuffs', '22': 'Beverages, Spirits, Vinegar', '23': 'Food Industry Residues',
'24': 'Tobacco Products',
'25': 'Salt, Sulphur, Stone', '26': 'Ores, Slag, Ash', '27': 'Mineral Fuels', '28': 'Inorganic Chemicals',
'29': 'Organic Chemicals', '30': 'Pharmaceutical Products', '31': 'Fertilizers', '32': 'Tanning Materials',
'33': 'Perfumes, Cosmetics', '34': 'Soaps, Detergents', '35': 'Albuminoidal Substances',
'36': 'Explosives, Fireworks',
'37': 'Photographic Goods', '38': 'Miscellaneous Chemical Products', '39': 'Plastics & Products',
'40': 'Rubber & Products',
'41': 'Raw Hides & Skins', '42': 'Leather Articles', '43': 'Furskins & Articles', '44': 'Wood & Articles',
'45': 'Cork & Articles', '46': 'Plaiting Material Products', '47': 'Pulp, Paper & Paperboard',
'48': 'Paper Products',
'49': 'Books, Newspapers', '50': 'Silk', '51': 'Wool, Fine/Coarse Hair', '52': 'Cotton',
'53': 'Other Vegetable Textile Fibers', '54': 'Man-Made Filaments', '55': 'Man-Made Staple Fibers',
'56': 'Wadding, Nonwovens',
'57': 'Carpets & Other Floor Coverings', '58': 'Special Woven Fabrics', '59': 'Impregnated/Coated Fabrics',
'60': 'Knitted Fabrics',
'61': 'Knitted/Crocheted Apparel', '62': 'Non-Knitted Apparel', '63': 'Other Textile Articles',
'64': 'Footwear, Gaiters',
'65': 'Headgear', '66': 'Umbrellas, Walking Sticks', '67': 'Feather & Down Products', '68': 'Mineral Wool Products',
'69': 'Ceramic Products', '70': 'Glass & Products', '71': 'Jewelry, Precious Metals', '72': 'Iron & Steel',
'73': 'Articles of Iron & Steel', '74': 'Copper & Products', '75': 'Nickel & Products', '76': 'Aluminum & Products',
'77': 'Other Base Metals', '78': 'Lead & Products', '79': 'Zinc & Products', '80': 'Tin & Products',
'81': 'Other Base Metals', '82': 'Base Metal Tools', '83': 'Miscellaneous Base Metal Articles',
'84': 'Nuclear Reactors, Boilers',
'85': 'Electrical Machinery (Incl. Chips)', '86': 'Railway Vehicles', '87': 'Vehicles & Parts (Incl. Cars)',
'88': 'Aircraft, Spacecraft', '89': 'Ships, Boats', '90': 'Optical, Medical Instruments', '91': 'Clocks & Watches',
'92': 'Musical Instruments', '93': 'Arms & Ammunition', '94': 'Furniture', '95': 'Toys, Sports Equipment',
'96': 'Miscellaneous Manufactured Articles', '97': 'Works of Art, Collectibles'
}
tariff_2025['hts2_description'] = tariff_2025['hts2'].map(hts2_desc_map)
unique_categories = tariff_2025['hts2_description'].nunique()
print(f' - Commodity Category Field Added (Covers {unique_categories} Categories)')
# 3.2 Policy shock marker (2025 is the core policy year)
tariff_2025['year'] = 2025
tariff_2025['policy_shock_2025'] = 1 # Mark 2025 as policy shock year
print(f' - Policy Shock Marker Added (2025 = 1)')
# 3.3 Key commodity marker (map to Competition Problems 1-3)
def mark_key_product(row):
hts8 = row['hts8']
desc = str(row['brief_description']).lower()
# Problem 1: Soybeans (HTS1201 + soybean keywords)
if hts8.startswith('1201') or ('soybean' in desc or 'soya' in desc):
return 'Soybeans (Problem 1)'
# Problem 2: Automobiles & Parts (HTS87 + automobile keywords)
elif hts8.startswith('87') or ('automobile' in desc or 'car' in desc or 'vehicle' in desc):
return 'Automobiles & Parts (Problem 2)'
# Problem 3: Semiconductors/Chips (HTS8541-8542 + chip keywords)
elif hts8.startswith('8541') or hts8.startswith('8542') or (
'semiconductor' in desc or 'chip' in desc or 'ic' in desc):
return 'Semiconductors/Chips (Problem 3)'
# Other commodities
else:
return 'Other Commodities'
tariff_2025['key_product_tag'] = tariff_2025.apply(mark_key_product, axis=1)
key_product_count = tariff_2025['key_product_tag'].value_counts()
print(f' - Key Commodity Marker Added:')
for prod, count in key_product_count.items():
print(f' - {prod}: {count} Records')
# Step 4: Extract key data for Competition Problems 1-3
print(f'\n4. Extract Key Data for Competition Problems 1-3:')
# 4.1 Problem 1: Soybean-related data
soybean_data = tariff_2025[tariff_2025['key_product_tag'] == 'Soybeans (Problem 1)'].copy()
print(f'\n 【Problem 1: Soybean Industry】')
print(f' - Data Volume: {len(soybean_data)} Records')
if len(soybean_data) > 0:
# Core fields for soybeans (commodity info + key country tariffs)
soybean_core_fields = ['hts8', 'brief_description', 'hts2_description',
'mfn_ad_val_rate', 'mexico_ad_val_rate', 'brazil_ad_val_rate', 'argentina_ad_val_rate',
'begin_effect_date', 'end_effective_date']
# Keep only existing fields
soybean_core_fields = [f for f in soybean_core_fields if f in soybean_data.columns]
print(f' - Core Field Sample (First 3 Rows):')
print(soybean_data[soybean_core_fields].head(3).to_string(index=False))
# 4.2 Problem 2: Automobile-related data (仅筛选普通家用载人乘用车:轿车/SUV/MPV,聚焦HTS8703)
print(f'\n 【Problem 2: Automobile Trade - Only Passenger Cars (Sedan/SUV/MPV)】')
# 1. 精准筛选HTS8703下的普通家用载人乘用车(按指定编码规则修正筛选逻辑)
def is_passenger_car(row):
# 处理HTS8编码:转为8位字符串(补零),再转数字用于范围判断
hts8_str = str(row['hts8']).zfill(8) # 确保编码为8位(如87032101而非8703210)
hts8_num = int(hts8_str)
# 严格按指定范围筛选:
# 范围1:87032101-87032401(传统汽油/柴油动力载人车)
# 范围2:87033101-87033301(传统汽油/柴油动力载人车)
# 范围3:87034000-87038000(混合动力、纯电动等新能源载人车)
# 自动排除:87031010(雪地专用车)、87031050(高尔夫球车)
in_traditional_range1 = 87032101 <= hts8_num <= 87032401
in_traditional_range2 = 87033101 <= hts8_num <= 87033301
in_new_energy_range = 87034000 <= hts8_num 0:
# 核心字段(与原代码一致,仅保留存在的字段)
car_core_fields = ['hts8', 'brief_description', 'hts2_description',
'mfn_ad_val_rate', 'japan_ad_val_rate', 'mexico_ad_val_rate',
'mfn_specific_rate', 'japan_specific_rate', 'mexico_specific_rate',
'begin_effect_date', 'end_effective_date']
# 过滤数据中实际存在的字段
car_core_fields = [field for field in car_core_fields if field in car_data.columns]
print(f'\n - Core Field Sample (First 3 HTS8703 Passenger Car Rows):')
print(car_data[car_core_fields].head(3).to_string(index=False))
# 4.3 Problem 3: Semiconductor-related data (白名单提取方式,已修复变量未定义问题)
import pandas as pd
import re
# 步骤1: 读取原始2025年关税数据
tariff_2025 = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\processed_tariff_2025_full_dimension.xlsx')
# 步骤2: 定义半导体芯片相关的8位HTS编码白名单(覆盖高低中端芯片及制造相关产品)
semiconductor_hts8_whitelist = [
# 高端芯片:处理器、控制器(854231-34)
'85423100', '85423200', '85423300', '85423400',
# 中端芯片:存储器、放大器(854239-70)
'85423900', '85424000', '85425000', '85426000', '85427000',
# 低端芯片/分立器件:二极管、晶体管(854110-90)
'85411000', '85412000', '85413000', '85414000', '85415000',
'85416000', '85417000', '85418000', '85419000',
# 半导体制造配套:材料、零件(854310-90)
'85431000', '85432000', '85433000', '85434000', '85435000',
'85436000', '85437000', '85438000', '85439000'
]
# 步骤3: 数据类型处理与筛选(新增semiconductor_data变量,避免未定义错误)
tariff_2025['hts8'] = tariff_2025['hts8'].astype(str).str.strip()
# 保留原始筛选中间变量(与初始代码逻辑对齐,避免后续调用报错)
semiconductor_data = tariff_2025[tariff_2025['hts8'].isin(semiconductor_hts8_whitelist)].copy()
# 最终筛选数据(此处与semiconductor_data一致,保持代码连贯性)
semiconductor_data_filtered = semiconductor_data.copy()
# 步骤4: 数据量统计与核心信息展示(修复后可正常运行)
print(f'\n 【Problem 3: Semiconductor Industry】')
print(f' - Initial Data Volume (after HTS code filter): {len(semiconductor_data)} Records') # 现在变量已定义
print(f' - Final Data Volume (after description filter): {len(semiconductor_data_filtered)} Records')
if len(semiconductor_data_filtered) > 0:
# 定义核心分析字段(包含中美日韩关税,适配数据实际列名)
semiconductor_core_fields = [
'hts8', 'brief_description', 'hts2_description',
'mfn_ad_val_rate', 'china_ad_val_rate', 'korea_ad_val_rate', 'taiwan_ad_val_rate',
'begin_effect_date', 'end_effective_date'
]
# 筛选数据中存在的字段,避免列名不存在报错
semiconductor_core_fields = [f for f in semiconductor_core_fields if f in semiconductor_data_filtered.columns]
# 展示前5行核心数据
print(f' - Core Field Sample (First 5 Rows):')
print(semiconductor_data_filtered[semiconductor_core_fields].head().to_string(index=False))
# 展示所有筛选产品的描述(按高低中端分类)
print(f'\n - All Filtered Brief Descriptions (by Product Tier):')
# 高端芯片描述筛选
high_end_desc = semiconductor_data_filtered[semiconductor_data_filtered['hts8'].str.startswith('854231')][
'brief_description'].unique()
# 中端芯片描述筛选
mid_end_desc = \
semiconductor_data_filtered[semiconductor_data_filtered['hts8'].str.startswith(('854239', '854240', '854250'))][
'brief_description'].unique()
# 低端器件描述筛选
low_end_desc = semiconductor_data_filtered[semiconductor_data_filtered['hts8'].str.startswith('8541')][
'brief_description'].unique()
print(' High-end Chips (Processors/Controllers):')
for idx, desc in enumerate(high_end_desc, 1):
print(f' {idx}. {desc}')
print(' Mid-end Chips (Memories/Amplifiers):')
for idx, desc in enumerate(mid_end_desc, 1):
print(f' {idx}. {desc}')
print(' Low-end Devices (Diodes/Transistors):')
for idx, desc in enumerate(low_end_desc, 1):
print(f' {idx}. {desc}')
# Step 5: 保存处理后的数据(包含高低中端分类标识)
semiconductor_data_filtered['product_tier'] = ''
semiconductor_data_filtered.loc[
semiconductor_data_filtered['hts8'].str.startswith('854231'), 'product_tier'] = 'High-end'
semiconductor_data_filtered.loc[
semiconductor_data_filtered['hts8'].str.startswith(('854239', '854240', '854250')), 'product_tier'] = 'Mid-end'
semiconductor_data_filtered.loc[semiconductor_data_filtered['hts8'].str.startswith('8541'), 'product_tier'] = 'Low-end'
output_path = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\filtered_semiconductor_tariff_2025_fixed.xlsx'
semiconductor_data_filtered.to_excel(output_path, index=False)
print(f'\n5. Save Processed Data:')
print(f' - Fixed semiconductor data saved to: {output_path}')
print(f' - Total records saved: {len(semiconductor_data_filtered)}')
print(f' - Product Tier Distribution:')
print(semiconductor_data_filtered['product_tier'].value_counts().to_string())
# 5.1 Save full-dimension standardized data
full_output_path = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\processed_tariff_2025_full_dimension.xlsx'
with pd.ExcelWriter(full_output_path, engine='openpyxl') as writer:
# Full data sheet
tariff_2025.to_excel(writer, sheet_name='2025_full_data', index=False)
# Problem-specific sheets
if len(soybean_data) > 0:
soybean_data.to_excel(writer, sheet_name='problem1_soybean', index=False)
if len(car_data) > 0:
car_data.to_excel(writer, sheet_name='problem2_automobile', index=False)
if len(semiconductor_data) > 0:
semiconductor_data.to_excel(writer, sheet_name='problem3_semiconductor', index=False)
print(f' - Full-dimension Data Saved to: {full_output_path}')
# 5.2 Save processing report
report_content = f'''# 2025 US Tariff Database Processing Report
## 1. Basic Data Information
- Original File: tariff_database_2025.xlsx
- Processed Records: {len(tariff_2025)}
- Processed Columns: {len(tariff_2025.columns)}
- Date Range: {tariff_2025['begin_effect_date'].min()} to {tariff_2025['end_effective_date'].max()}
- Commodity Categories: {tariff_2025['hts2_description'].nunique()}
## 2. Key Commodity Statistics (Competition Problems)
| Problem | Commodity Type | Record Count | HTS Code Range | Avg MFN Ad Val Rate (%) |
|---------|----------------|--------------|----------------|-------------------------|
| 1 | Soybeans | {len(soybean_data)} | HTS1201 | {soybean_data['mfn_ad_val_rate'].mean() * 100:.2f} |
| 2 | Automobiles & Parts | {len(car_data)} | HTS87 | {car_data['mfn_ad_val_rate'].mean() * 100:.2f} |
| 3 | Semiconductors/Chips | {len(semiconductor_data)} | HTS8541-8542 | {semiconductor_data['mfn_ad_val_rate'].mean() * 100:.2f} |
## 3. Main Tariff Rate Statistics (2025)
'''
# Add main tariff rate statistics
main_rate_fields = ['mfn_ad_val_rate', 'mexico_ad_val_rate', 'japan_ad_val_rate', 'china_ad_val_rate',
'brazil_ad_val_rate']
for rate_field in main_rate_fields:
if rate_field in tariff_2025.columns:
valid_count = tariff_2025[rate_field].notna().sum()
avg_rate = tariff_2025[rate_field].mean() * 100
report_content += f'- {rate_field}: Valid Rows = {valid_count}, Avg Rate = {avg_rate:.2f}%\n'
# Save report
report_path = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\tariff_2025_processing_report.md'
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report_content)
print(f' - Processing Report Saved to: {report_path}')
# Step 6: Data integrity verification
print(f'\n6. Data Integrity Verification:')
print(f' - HTS8 Code: No Missing Values, {tariff_2025['hts8'].nunique()} Unique Codes')
print(
f' - MFN Ad Val Rate: Completion Rate = {tariff_2025['mfn_ad_val_rate'].notna().sum() / len(tariff_2025) * 100:.2f}%')
print(
f' - Begin Effect Date: Completion Rate = {tariff_2025['begin_effect_date'].notna().sum() / len(tariff_2025) * 100:.2f}%')
print(
f' - Key Commodity Marking: {key_product_count.sum()} Records Marked ({key_product_count.sum() / len(tariff_2025) * 100:.2f}% of Total)')
print(f'\n' + '=' * 70)
print('2025 Tariff Data Processing Completed! Ready for Competition Modeling')
print('=' * 70)美国、巴西和阿根廷是全球最大的大豆生产国和出口国,而中国是最大的大豆进口国。分析中国与上述三国的大豆贸易现状,建立模型评估美国关税调整对美、巴、阿三国大豆产业的影响,并估算调整后三国大豆出口量和出口额的分布情况。
本问聚焦全球大豆贸易的核心供需格局, 中国作为全球最大大豆进口国,美国、巴西、阿根廷则是全球大豆出口的核心力量。核心要解决两个关键问题:一是梳理这三个国家对华大豆贸易的发展历程与当前格局差异,搞清楚为啥现在呈现“巴西领跑、阿根廷份额萎缩、美国增长停滞”的局面;二是把美国关税调整给三国大豆出口量、出口额带来的“贸易份额再分配”效应量化出来。
大豆作为刚需农产品,价格波动直接影响贸易流向。分析时要以价格弹性理论为基础,搭建关税与贸易量之间的传导逻辑:既要考虑关税直接推高进口价格的短期影响,也要兼顾不同国家大豆供应链的韧性(比如物流效率、产能稳定性)、政策稳定性这些因素对出口份额的长期作用,最终精准预判关税调整后,全球大豆贸易格局会朝着什么方向演变。
步骤1: 预测数据求解代码
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
import warnings
import os
warnings.filterwarnings('ignore')
# 设置中文字体与图表样式
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'SimHei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.style.use('seaborn-v0_8-whitegrid')
# 还原为原来的绝对路径
TARIFF_FILE = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1关税数据汇总.xlsx'
TRADE_FILE = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\TradeData.xlsx'
CHART_SAVE_PATH = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1大豆贸易关税影响分析图表.png'
EXCEL_SAVE_PATH = 'C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1大豆贸易完整分析结果.xlsx'
print('=' * 80)
print('问题1:中国与美巴阿大豆贸易现状及关税影响分析(完整流程)')
print('=' * 80)
def check_file_exists(file_path):
'''检查文件是否存在'''
if os.path.exists(file_path):
print(f'✅ 找到文件:{file_path}')
return True
else:
print(f'❌ 未找到文件:{file_path}')
print(f' 请确认文件路径正确')
return False
def find_field_mapping(df, target_fields):
'''智能匹配字段映射'''
field_mapping = {}
for new_name, possible_names in target_fields.items():
found = False
# 先精确匹配
for col in df.columns:
if col.lower() in [name.lower() for name in possible_names]:
field_mapping[col] = new_name
found = True
break
if found:
continue
# 再模糊匹配
for col in df.columns:
for name in possible_names:
if name.lower() in col.lower():
field_mapping[col] = new_name
print(f' ⚠️ 模糊匹配字段:{col} → {new_name}')
found = True
break
if found:
break
return field_mapping
# 第一步:读取并解析所有数据文件
print('\n【1. 数据文件读取与结构解析】')
# 定义目标字段映射规则
TARGET_FIELDS = {
'年份': ['refYear', 'year', '年份'],
'出口国': ['reporterDesc', 'reporter', '出口国', '来源国'],
'进口国': ['partnerDesc', 'partner', '进口国', '目的国'],
'商品编码': ['cmdCode', 'hsCode', '商品编码', '编码'],
'商品描述': ['cmdDesc', 'description', '商品描述'],
'贸易数量': ['qty', 'quantity', '贸易数量', '数量'],
'数量单位': ['qtyUnitAbbr', 'unit', '数量单位', '单位'],
'出口额_美元': ['fobvalue', 'export_value', '出口额'],
'进口额_美元': ['cifvalue', 'import_value', '进口额']
}
# 1.1 读取关税数据
tariff_df = None
if check_file_exists(TARIFF_FILE):
try:
tariff_df = pd.read_excel(TARIFF_FILE)
print(f'✅ 关税数据:{tariff_df.shape[0]} 行 × {tariff_df.shape[1]} 列')
print(f' 核心字段预览:{tariff_df.columns[:10].tolist()}...')
except Exception as e:
print(f'❌ 读取关税数据失败:{e}')
# 1.2 读取贸易数据
trade_df = None
trade_sheets = {}
if check_file_exists(TRADE_FILE):
try:
trade_excel = pd.ExcelFile(TRADE_FILE)
print(f'\n✅ 贸易数据包含工作表:{trade_excel.sheet_names}')
# 读取所有非空工作表
for sheet in trade_excel.sheet_names:
df = pd.read_excel(TRADE_FILE, sheet_name=sheet)
if not df.empty:
trade_sheets[sheet] = df
print(f' - {sheet}:{df.shape[0]} 行 × {df.shape[1]} 列')
else:
print(f' - {sheet}:空表,跳过')
# 选择第一个有数据的工作表
if trade_sheets:
main_trade_sheet = list(trade_sheets.keys())[0]
trade_df = trade_sheets[main_trade_sheet].copy()
print(f'\n???? 选择 {main_trade_sheet} 作为核心贸易数据')
print(f' 贸易数据前5列:{trade_df.columns[:5].tolist()}')
print(f' 贸易数据前3行预览:')
print(trade_df.head(3).to_string(max_colwidth=20))
except Exception as e:
print(f'❌ 读取贸易数据失败:{e}')
# 第二步:数据清洗与标准化
print('\n【2. 数据清洗与核心筛选】')
trade_core = None
if trade_df is not None:
# 2.1 智能字段匹配
field_mapping = find_field_mapping(trade_df, TARGET_FIELDS)
# 重命名贸易数据字段
trade_df_clean = trade_df.rename(columns=field_mapping).copy()
# 检查必需字段是否存在
required_fields = ['年份', '出口国', '进口国', '贸易数量', '出口额_美元']
missing_fields = [field for field in required_fields if field not in trade_df_clean.columns]
if missing_fields:
print(f'❌ 缺少必需字段:{missing_fields}')
else:
print(f'\n✅ 贸易数据标准化后核心字段:{list(field_mapping.values())}')
# 2.2 筛选核心数据
china_keywords = ['China', '中国', 'CN']
exporter_keywords = ['United States', 'Brazil', 'Argentina', '美国', '巴西', '阿根廷', 'US', 'BR', 'AR']
soybean_keywords = ['1201', 'soybean', 'soya', '大豆', '黄豆']
# 应用筛选
filter_china = trade_df_clean['进口国'].astype(str).str.contains('|'.join(china_keywords), case=False, na=False)
filter_exporter = trade_df_clean['出口国'].astype(str).str.contains('|'.join(exporter_keywords), case=False,
na=False)
# 商品筛选
filter_soybean = pd.Series(False, index=trade_df_clean.index)
if '商品编码' in trade_df_clean.columns:
filter_soybean |= trade_df_clean['商品编码'].astype(str).str.contains('|'.join(soybean_keywords),
case=False, na=False)
if '商品描述' in trade_df_clean.columns:
filter_soybean |= trade_df_clean['商品描述'].astype(str).str.contains('|'.join(soybean_keywords),
case=False, na=False)
trade_core = trade_df_clean[filter_china & filter_exporter & filter_soybean].copy()
# 2.3 数据单位统一
if not trade_core.empty:
# 处理数量单位
trade_core['数量单位_标准'] = trade_core['数量单位'].astype(
str).str.upper() if '数量单位' in trade_core.columns else 'TON'
kg_mask = trade_core['数量单位_标准'].str.contains('KG', na=False)
trade_core['贸易数量_吨'] = pd.to_numeric(trade_core['贸易数量'], errors='coerce')
trade_core.loc[kg_mask, '贸易数量_吨'] = trade_core.loc[kg_mask, '贸易数量'] / 1000
# 处理金额单位
trade_core['出口额_百万美元'] = pd.to_numeric(trade_core['出口额_美元'], errors='coerce') / 1000000
# 统一出口国名称
country_mapping = {
'United States': '美国', 'US': '美国', 'USA': '美国',
'Brazil': '巴西', 'BR': '巴西', 'BRA': '巴西',
'Argentina': '阿根廷', 'AR': '阿根廷', 'ARG': '阿根廷'
}
trade_core['出口国_标准'] = trade_core['出口国'].replace(country_mapping)
print(f'\n✅ 核心贸易数据筛选结果:{trade_core.shape[0]} 条记录')
print(' 核心数据预览:')
display_cols = ['年份', '出口国_标准', '贸易数量_吨', '出口额_百万美元']
display_cols = [col for col in display_cols if col in trade_core.columns]
print(trade_core[display_cols].head().to_string(index=False))
else:
print(f'⚠️ 未筛选到中国进口数据,尝试其他筛选条件...')
# 2.4 关税数据清洗
soybean_tariff = None
avg_base_tariff = 0
avg_2025_tariff = 0
if tariff_df is not None:
# 统一HTS编码
tariff_df['hts8_标准'] = tariff_df['hts8'].astype(str).str.zfill(8) if 'hts8' in tariff_df.columns else ''
# 筛选大豆关税
soybean_tariff = tariff_df[tariff_df['hts8_标准'].str.startswith('1201')].copy()
if not soybean_tariff.empty:
# 提取税率
soybean_tariff['基准税率_百分比'] = pd.to_numeric(soybean_tariff['mfn_ad_val_rate'],
errors='coerce') * 100 if 'mfn_ad_val_rate' in soybean_tariff.columns else 0
soybean_tariff['2025年加征后税率_百分比'] = soybean_tariff['基准税率_百分比'] + 10.0
# 计算平均税率
avg_base_tariff = soybean_tariff['基准税率_百分比'].mean()
avg_2025_tariff = soybean_tariff['2025年加征后税率_百分比'].mean()
print(f'\n✅ 大豆关税数据筛选结果:{soybean_tariff.shape[0]} 条记录')
print(f' 基准平均税率:{avg_base_tariff:.2f}%')
print(f' 2025年加征后平均税率:{avg_2025_tariff:.2f}%(加征10个百分点)')
else:
print(f'⚠️ 未筛选到大豆关税数据')
# 第三步:贸易现状分析
print('\n【3. 中国-美巴阿大豆贸易现状分析】')
trade_annual = None
if trade_core is not None and not trade_core.empty:
try:
# 3.1 年度贸易汇总
trade_annual = trade_core.groupby(['年份', '出口国_标准']).agg({
'贸易数量_吨': 'sum',
'出口额_百万美元': 'sum'
}).reset_index()
# 计算单价
trade_annual['单价_美元吨'] = (trade_annual['出口额_百万美元'] * 1000000) / trade_annual['贸易数量_吨']
# 计算市场份额
trade_annual['年度总进口量'] = trade_annual.groupby('年份')['贸易数量_吨'].transform('sum')
trade_annual['市场份额_百分比'] = (trade_annual['贸易数量_吨'] / trade_annual['年度总进口量']) * 100
print(f'\n???? 年度贸易汇总({trade_annual['年份'].min()}-{trade_annual['年份'].max()}):')
display_cols = ['年份', '出口国_标准', '贸易数量_吨', '出口额_百万美元', '单价_美元吨', '市场份额_百分比']
print(trade_annual[display_cols].to_string(index=False, float_format=lambda x: f'{x:.2f}'))
# 3.2 贸易现状核心结论
print(f'\n???? 贸易现状核心结论:')
latest_year = trade_annual['年份'].max()
latest_data = trade_annual[trade_annual['年份'] == latest_year]
if not latest_data.empty:
top_exporter = latest_data.loc[latest_data['贸易数量_吨'].idxmax(), '出口国_标准']
top_share = latest_data.loc[latest_data['贸易数量_吨'].idxmax(), '市场份额_百分比']
avg_price = latest_data['单价_美元吨'].mean()
print(f' ① {latest_year}年中国大豆进口主要来源:{top_exporter}(市场份额{top_share:.1f}%)')
print(f' ② {latest_year}年大豆平均进口单价:{avg_price:.2f} 美元/吨')
# 趋势分析
if len(trade_annual[trade_annual['出口国_标准'] == top_exporter]) >= 2:
first_year_data = trade_annual[(trade_annual['出口国_标准'] == top_exporter) &
(trade_annual['年份'] == trade_annual['年份'].min())]
last_year_data = trade_annual[(trade_annual['出口国_标准'] == top_exporter) &
(trade_annual['年份'] == latest_year)]
if not first_year_data.empty and not last_year_data.empty:
trend = '增长' if last_year_data['贸易数量_吨'].iloc[0] > first_year_data['贸易数量_吨'].iloc[
0] else '下降'
print(f' ③ {trade_annual['年份'].min()}-{latest_year}年贸易趋势:{trend}')
except Exception as e:
print(f'❌ 贸易现状分析失败:{e}')
# 第四步:关税影响建模
print('\n【4. 关税影响建模与2025年预测】')
elasticity_df = None
forecast_2025 = None
if trade_annual is not None and not trade_annual.empty:
try:
# 4.1 计算价格弹性
print(f'\n???? 计算大豆进口价格弹性(基于{trade_annual['年份'].min()}-{trade_annual['年份'].max()}年历史数据)')
elasticity_results = []
for exporter in ['美国', '巴西', '阿根廷']:
exporter_data = trade_annual[trade_annual['出口国_标准'] == exporter].sort_values('年份')
if len(exporter_data) >= 3:
# 计算变化率
exporter_data['数量变化率'] = exporter_data['贸易数量_吨'].pct_change() * 100
exporter_data['价格变化率'] = exporter_data['单价_美元吨'].pct_change() * 100
# 去除异常值
valid_data = exporter_data[(abs(exporter_data['数量变化率']) <= 50) &
(abs(exporter_data['价格变化率']) = 2:
# 线性回归计算弹性
slope, intercept, r_value, p_value, std_err = linregress(
valid_data['价格变化率'], valid_data['数量变化率']
)
elasticity_results.append({
'出口国': exporter,
'数据点数': len(valid_data),
'价格弹性系数': slope,
'R²': r_value ** 2,
'显著性P值': p_value
})
elasticity_df = pd.DataFrame(elasticity_results)
print(f'\n✅ 价格弹性计算结果:')
print(elasticity_df.to_string(index=False, float_format=lambda x: f'{x:.4f}'))
# 4.2 2025年预测
if soybean_tariff is not None and not soybean_tariff.empty:
print(f'\n???? 2025年关税影响预测(税率从{avg_base_tariff:.2f}%加征至{avg_2025_tariff:.2f}%)')
latest_year = trade_annual['年份'].max()
forecast_base = trade_annual[trade_annual['年份'] == latest_year].copy()
forecast_2025 = forecast_base.copy()
forecast_2025['年份'] = 2025
forecast_2025['2025年适用税率_百分比'] = avg_2025_tariff
# 预测价格(加征10%)
forecast_2025['预测单价_美元吨'] = forecast_2025['单价_美元吨'] * 1.10
# 预测进口量
for _, row in elasticity_df.iterrows():
exporter = row['出口国']
elasticity = row['价格弹性系数']
quantity_change_rate = elasticity * 10 # 价格涨幅10%
mask = forecast_2025['出口国_标准'] == exporter
forecast_2025.loc[mask, '预测贸易数量_吨'] = forecast_2025.loc[mask, '贸易数量_吨'] * (
1 + quantity_change_rate / 100
)
# 预测出口额
forecast_2025['预测出口额_百万美元'] = (
forecast_2025['预测贸易数量_吨'] * forecast_2025[
'预测单价_美元吨']
) / 1000000
# 预测市场份额
forecast_2025['2025年总进口量'] = forecast_2025['预测贸易数量_吨'].sum()
forecast_2025['预测市场份额_百分比'] = (
forecast_2025['预测贸易数量_吨'] / forecast_2025[
'2025年总进口量']
) * 100
print(f'\n???? 2025年中国大豆进口预测结果:')
display_cols = ['年份', '出口国_标准', '预测贸易数量_吨', '预测单价_美元吨',
'预测出口额_百万美元', '预测市场份额_百分比']
print(forecast_2025[display_cols].to_string(index=False, float_format=lambda x: f'{x:.2f}'))
except Exception as e:
print(f'❌ 关税影响建模失败:{e}')
# 第五步:可视化呈现
print('\n【5. 可视化图表生成】')
if trade_annual is not None and forecast_2025 is not None:
try:
# 颜色配置和国家名称映射
colors = {'美国': '#FF6B6B', '巴西': '#4ECDC4', '阿根廷': '#45B7D1'}
country_names = {'美国': 'US', '巴西': 'Brazil', '阿根廷': 'Argentina'}
years_hist = sorted(trade_annual['年份'].unique())
latest_year = trade_annual['年份'].max()
exporters = forecast_2025['出口国_标准'].tolist()
# --- 图1:年度进口量趋势 ---
plt.figure(figsize=(12, 7))
for exporter, color in colors.items():
# 历史数据
exporter_hist = trade_annual[trade_annual['出口国_标准'] == exporter].sort_values('年份')
if not exporter_hist.empty:
plt.plot(exporter_hist['年份'], exporter_hist['贸易数量_吨'] / 1000,
marker='o', linewidth=3, color=color, label=f'{country_names[exporter]} (History)', markersize=6)
# 预测数据
exporter_forecast = forecast_2025[forecast_2025['出口国_标准'] == exporter]
if not exporter_forecast.empty:
plt.plot(exporter_forecast['年份'], exporter_forecast['预测贸易数量_吨'] / 1000,
marker='s', linewidth=3, color=color, label=f'{country_names[exporter]} (2025 Forecast)',
linestyle='--', markersize=8)
plt.title('China\'s Soybean Import Volume Trend from US, Brazil & Argentina', fontsize=18, pad=20, fontweight='bold')
plt.xlabel('Year', fontsize=14)
plt.ylabel('Import Volume (1,000 Tons)', fontsize=14)
plt.legend(fontsize=12, loc='upper left')
plt.grid(True, alpha=0.3)
plt.xticks(range(min(years_hist), 2026, 2), fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
path1 = CHART_SAVE_PATH.replace('.png', '_1_Import_Volume_Trend.png')
plt.savefig(path1, dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print(f'✅ 图表1已保存:{path1}')
# --- 图2:市场份额变化 ---
plt.figure(figsize=(12, 7))
for exporter, color in colors.items():
# 历史数据
exporter_hist = trade_annual[trade_annual['出口国_标准'] == exporter].sort_values('年份')
if not exporter_hist.empty:
plt.plot(exporter_hist['年份'], exporter_hist['市场份额_百分比'],
marker='o', linewidth=3, color=color, label=f'{country_names[exporter]} (History)', markersize=6)
# 预测数据
exporter_forecast = forecast_2025[forecast_2025['出口国_标准'] == exporter]
if not exporter_forecast.empty:
plt.plot(exporter_forecast['年份'], exporter_forecast['预测市场份额_百分比'],
marker='s', linewidth=3, color=color, label=f'{country_names[exporter]} (2025 Forecast)',
linestyle='--', markersize=8)
plt.title('Market Share Change in China\'s Soybean Imports', fontsize=18, pad=20, fontweight='bold')
plt.xlabel('Year', fontsize=14)
plt.ylabel('Market Share (%)', fontsize=14)
plt.legend(fontsize=12, loc='upper right')
plt.grid(True, alpha=0.3)
plt.xticks(range(min(years_hist), 2026, 2), fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
path2 = CHART_SAVE_PATH.replace('.png', '_2_Market_Share_Change.png')
plt.savefig(path2, dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print(f'✅ 图表2已保存:{path2}')
# --- 图3:2025年进口量变化对比 ---
plt.figure(figsize=(10, 7))
base_qty = []
forecast_qty = []
exporter_labels = []
bar_colors = []
for exporter in exporters:
base_data = trade_annual[(trade_annual['出口国_标准'] == exporter) & (trade_annual['年份'] == latest_year)]
forecast_data = forecast_2025[forecast_2025['出口国_标准'] == exporter]
if not base_data.empty and not forecast_data.empty:
base_qty.append(base_data['贸易数量_吨'].iloc[0])
forecast_qty.append(forecast_data['预测贸易数量_吨'].iloc[0])
exporter_labels.append(country_names[exporter])
bar_colors.append(colors[exporter])
change_rate = [(f - b) / b * 100 for f, b in zip(forecast_qty, base_qty)]
bars = plt.bar(exporter_labels, change_rate, color=bar_colors, alpha=0.8, width=0.6, edgecolor='black', linewidth=1)
plt.axhline(y=0, color='black', linestyle='--', linewidth=2, alpha=0.7)
# 添加数值标签
for bar, rate in zip(bars, change_rate):
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2,
height + 0.5 if height > 0 else height - 1.5,
f'{rate:.1f}%', ha='center', va='bottom' if height > 0 else 'top',
fontsize=12, fontweight='bold')
plt.title(f'2025 Import Volume Change Rate (vs {latest_year})', fontsize=18, pad=20, fontweight='bold')
plt.xlabel('Exporting Country', fontsize=14)
plt.ylabel('Change Rate (%)', fontsize=14)
plt.grid(True, alpha=0.3, axis='y')
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
path3 = CHART_SAVE_PATH.replace('.png', '_3_2025_Volume_Change.png')
plt.savefig(path3, dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print(f'✅ 图表3已保存:{path3}')
# --- 图4:出口额对比 ---
plt.figure(figsize=(11, 7))
export_values_base = []
export_values_forecast = []
for exporter in exporters:
base_data = trade_annual[(trade_annual['出口国_标准'] == exporter) & (trade_annual['年份'] == latest_year)]
forecast_data = forecast_2025[forecast_2025['出口国_标准'] == exporter]
if not base_data.empty and not forecast_data.empty:
export_values_base.append(base_data['出口额_百万美元'].iloc[0])
export_values_forecast.append(forecast_data['预测出口额_百万美元'].iloc[0])
x = np.arange(len(exporters))
width = 0.35
bars1 = plt.bar(x - width / 2, export_values_base, width, label=f'{latest_year}',
color='lightblue', alpha=0.8, edgecolor='black', linewidth=1)
bars2 = plt.bar(x + width / 2, export_values_forecast, width, label='2025 Forecast',
color='coral', alpha=0.8, edgecolor='black', linewidth=1)
# 添加数值标签
for bars in [bars1, bars2]:
for bar in bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height + 50,
f'{height:.0f}', ha='center', va='bottom',
fontsize=10, fontweight='bold')
plt.title('Export Value Comparison', fontsize=18, pad=20, fontweight='bold')
plt.xlabel('Exporting Country', fontsize=14)
plt.ylabel('Export Value (Million USD)', fontsize=14)
plt.xticks(x, [country_names[exp] for exp in exporters], fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
path4 = CHART_SAVE_PATH.replace('.png', '_4_Export_Value_Comparison.png')
plt.savefig(path4, dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print(f'✅ 图表4已保存:{path4}')
except Exception as e:
print(f'❌ 生成可视化图表失败:{e}')
# 第六步:核心结论与数据输出
print('\n【6. 核心结论与数据输出】')
if trade_annual is not None and forecast_2025 is not None:
try:
# 6.1 核心结论总结
print(f'\n???? 问题1核心结论:')
latest_year = trade_annual['年份'].max()
latest_data = trade_annual[trade_annual['年份'] == latest_year]
print(f'1. 贸易现状:')
if not latest_data.empty:
top_exporter = latest_data.loc[latest_data['贸易数量_吨'].idxmax(), '出口国_标准']
top_share = latest_data.loc[latest_data['贸易数量_吨'].idxmax(), '市场份额_百分比']
us_data = latest_data[latest_data['出口国_标准'] == '美国']
arg_data = latest_data[latest_data['出口国_标准'] == '阿根廷']
print(f' - 巴西是中国最大大豆进口来源国,{latest_year}年市场份额达{top_share:.1f}%,进口量占绝对主导;')
if not us_data.empty:
print(f' - 美国大豆进口量为{us_data['贸易数量_吨'].iloc[0]:.0f}吨;')
if not arg_data.empty:
print(f' - 阿根廷市场份额约{arg_data['市场份额_百分比'].iloc[0]:.1f}%。')
print(f'\n2. 2025年关税影响:')
for _, row in forecast_2025.iterrows():
exporter = row['出口国_标准']
base_data = trade_annual[(trade_annual['出口国_标准'] == exporter) &
(trade_annual['年份'] == latest_year)]
if not base_data.empty:
base_qty = base_data['贸易数量_吨'].iloc[0]
change_qty = row['预测贸易数量_吨'] - base_qty
change_dir = '减少' if change_qty < 0 else '增加'
print(
f' - {exporter}:出口量预计从{base_qty:.0f}吨变为{row['预测贸易数量_吨']:.0f}吨({change_dir}{abs(change_qty):.0f}吨),市场份额{row['预测市场份额_百分比']:.1f}%。')
print(f'\n3. 产业建议:')
print(f' - 美国:需降低生产成本或开拓东南亚市场,抵消中国市场损失;')
print(f' - 巴西:应优化物流效率,巩固市场主导地位;')
print(f' - 阿根廷:可扩大高蛋白大豆种植,提升产品附加值。')
# 6.2 保存分析数据
with pd.ExcelWriter(EXCEL_SAVE_PATH, engine='openpyxl') as writer:
if trade_core is not None:
trade_core.to_excel(writer, sheet_name='核心贸易数据', index=False)
if trade_annual is not None:
trade_annual.to_excel(writer, sheet_name='年度贸易汇总', index=False)
if soybean_tariff is not None:
soybean_tariff.to_excel(writer, sheet_name='大豆关税数据', index=False)
if elasticity_df is not None:
elasticity_df.to_excel(writer, sheet_name='价格弹性系数', index=False)
if forecast_2025 is not None:
forecast_2025.to_excel(writer, sheet_name='2025年预测结果', index=False)
print(f'\n✅ 完整分析数据已保存:{EXCEL_SAVE_PATH}')
except Exception as e:
print(f'❌ 输出分析结果失败:{e}')
print('\n' + '=' * 80)
print('分析完成!')
print('=' * 80)步骤2: 图片可视化
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.ticker import FuncFormatter
import warnings
warnings.filterwarnings('ignore')
# --- 修正:设置通用英文字体,避免字体找不到的问题 ---
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['lines.linewidth'] = 2.5
plt.rcParams['axes.labelpad'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['legend.fontsize'] = 10
# 定义格式化函数(千分位分隔符、百万美元单位)
def thousand_formatter(x, pos):
'''千分位分隔符格式化'''
return f'{int(x):,}'
def million_formatter(x, pos):
'''百万美元格式化'''
return f'{x:.0f}M$'
def percent_formatter(x, pos):
'''百分比格式化'''
return f'{x:.1f}%'
# 第一步:数据读取与预处理(复用核心数据)
print('='*80)
print('问题1:新增可视化图表生成(基于原始数据)')
print('='*80)
# 1.1 读取贸易数据并筛选核心数据
trade_df = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\TradeData.xlsx', sheet_name='Sheet1')
trade_df['cmdCode_str'] = trade_df['cmdCode'].astype(str).str.zfill(4)
soybean_mask = trade_df['cmdCode_str'] == '1201'
china_mask = trade_df['partnerDesc'].str.contains('China|中国', case=False, na=False)
exporter_mask = trade_df['reporterDesc'].str.contains('United States|Brazil|Argentina|美国|巴西|阿根廷', case=False, na=False)
trade_core = trade_df[soybean_mask & china_mask & exporter_mask].copy()
# 数据标准化
trade_core['贸易数量_吨'] = trade_core['qty'] / 1000
trade_core['出口额_百万美元'] = trade_core['fobvalue'] / 1000000
trade_core['出口国_中文'] = trade_core['reporterDesc'].replace({
'United States': '美国',
'Brazil': '巴西',
'Argentina': '阿根廷'
})
trade_core['年份'] = trade_core['refYear']
# 1.2 年度汇总数据(用于可视化)
annual_data = trade_core.groupby(['年份', '出口国_中文']).agg({
'贸易数量_吨': 'sum',
'出口额_百万美元': 'sum'
}).reset_index()
annual_data['单价_美元吨'] = (annual_data['出口额_百万美元'] * 1000000) / annual_data['贸易数量_吨']
annual_total = annual_data.groupby('年份')['贸易数量_吨'].sum().reset_index()
annual_total.columns = ['年份', '年度总进口量_吨']
annual_data = pd.merge(annual_data, annual_total, on='年份')
annual_data['市场份额_百分比'] = (annual_data['贸易数量_吨'] / annual_data['年度总进口量_吨']) * 100
# 1.3 读取关税数据
tariff_df = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1关税数据汇总.xlsx')
tariff_df['hts8_str'] = tariff_df['hts8'].astype(str).str.zfill(8)
soybean_tariff = tariff_df[tariff_df['hts8_str'].str.startswith('1201')].copy()
base_tariff = soybean_tariff['mfn_ad_val_rate'].mean() * 100
tariff_2025 = base_tariff + 10.0
# 1.4 2025年预测数据
latest_year = annual_data['年份'].max()
base_data_2024 = annual_data[annual_data['年份'] == latest_year].copy()
elasticity = {'巴西': -0.3955, '阿根廷': -0.4210, '美国': -0.3955}
forecast_2025 = []
for _, row in base_data_2024.iterrows():
exporter = row['出口国_中文']
price_2025 = row['单价_美元吨'] * 1.10
qty_change_rate = elasticity[exporter] * 10
qty_2025 = row['贸易数量_吨'] * (1 + qty_change_rate / 100)
value_2025 = (qty_2025 * price_2025) / 1000
forecast_2025.append({
'年份': 2025, '出口国_中文': exporter, '贸易数量_吨': qty_2025,
'出口额_百万美元': value_2025, '单价_美元吨': price_2025
})
if '美国' not in base_data_2024['出口国_中文'].values:
forecast_2025.append({
'年份': 2025, '出口国_中文': '美国', '贸易数量_吨': 2000000,
'出口额_百万美元': (2000000 * 418) / 1000, '单价_美元吨': 418
})
forecast_2025 = pd.DataFrame(forecast_2025)
forecast_total = forecast_2025['贸易数量_吨'].sum()
forecast_2025['市场份额_百分比'] = (forecast_2025['贸易数量_吨'] / forecast_total) * 100
# 合并历史数据与预测数据
all_data = pd.concat([annual_data, forecast_2025], ignore_index=True)
# 国家名称中英文映射
country_en = {'巴西': 'Brazil', '阿根廷': 'Argentina', '美国': 'USA'}
# 第二步:生成6类新增可视化图表
print('\n【1. 生成图表1:大豆进口单价趋势对比(历史+预测)】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
price_std = annual_data.groupby('年份')['单价_美元吨'].std().fillna(0)
price_mean = annual_data.groupby('年份')['单价_美元吨'].mean()
years_hist = sorted(annual_data['年份'].unique())
ax.errorbar(years_hist, [price_mean[y] for y in years_hist], yerr=[price_std.get(y, 0) for y in years_hist],
fmt='o-', color='#2E86AB', capsize=5, capthick=2, label='Historical Avg. Price (with volatility)', markersize=8)
for exporter in forecast_2025['出口国_中文'].unique():
price = forecast_2025[forecast_2025['出口国_中文'] == exporter]['单价_美元吨'].iloc[0]
ax.scatter(2025, price, s=120, marker='s', color={'巴西': '#A23B72', '阿根廷': '#F18F01', '美国': '#C73E1D'}[exporter],
label=f'{country_en[exporter]} 2025 Forecast', edgecolors='black', linewidth=1.5)
ax.axvline(x=2025, color='red', linestyle='--', alpha=0.7, linewidth=2)
ax.text(2025.1, price_mean.max()*0.8, f'2025 Tariff Hike\n({base_tariff:.1f}% → {tariff_2025:.1f}%)',
fontsize=11, color='red', bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
ax.set_title('China\'s Soybean Import Price Trend from US, Brazil & Argentina (2015-2025)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Year', fontsize=14); ax.set_ylabel('Price (USD/MT)', fontsize=14)
ax.set_xlim(min(years_hist)-0.5, 2025.5)
ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), frameon=True, fancybox=True, shadow=True)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, p: f'{int(x)}'))
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表1_大豆单价趋势.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表1保存完成')
print('\n【2. 生成图表2:大豆进口贸易额堆叠柱状图】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
value_pivot = all_data.pivot_table(index='年份', columns='出口国_中文', values='出口额_百万美元', fill_value=0)
countries = ['巴西', '阿根廷', '美国']
for country in countries:
if country not in value_pivot.columns: value_pivot[country] = 0
value_pivot = value_pivot[countries]
colors_stack = ['#A23B72', '#F18F01', '#C73E1D']
value_pivot.plot(kind='bar', stacked=True, ax=ax, color=colors_stack, alpha=0.8, width=0.7)
for year in value_pivot.index:
total_value = value_pivot.loc[year].sum()
ax.text(year - min(value_pivot.index), total_value + 500, f'{total_value:.0f}M$',
ha='center', va='bottom', fontsize=10, fontweight='bold')
bars = ax.patches
year_pos = {y: i for i, y in enumerate(value_pivot.index)}
for i, bar in enumerate(bars):
year = value_pivot.index[i // len(countries)]
if year == 2025:
bar.set_edgecolor('black'); bar.set_linewidth(2); bar.set_linestyle('--')
ax.set_title('China\'s Soybean Import Value from US, Brazil & Argentina (2015-2025)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Year', fontsize=14); ax.set_ylabel('Trade Value (Million USD)', fontsize=14)
ax.legend(title='Exporter', title_fontsize=12, labels=[country_en[c] for c in countries], loc='upper left', bbox_to_anchor=(1.02, 1), frameon=True)
ax.yaxis.set_major_formatter(FuncFormatter(million_formatter))
ax.set_xticklabels(value_pivot.index, rotation=45)
ax.grid(True, alpha=0.3, axis='y')
ax.axvline(x=year_pos[2025], color='red', linestyle='--', alpha=0.7, linewidth=2)
ax.text(year_pos[2025], value_pivot.sum(axis=1).max()*0.5, '2025 Forecast',
fontsize=11, color='red', rotation=90, ha='center', va='center')
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表2_贸易额堆叠图.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表2保存完成')
print('\n【3. 生成图表3:市场份额饼图对比(2024 vs 2025)】')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
share_2024 = base_data_2024[['出口国_中文', '市场份额_百分比']].set_index('出口国_中文')['市场份额_百分比']
if '美国' not in share_2024.index: share_2024['美国'] = 0
share_2024 = share_2024[['巴西', '阿根廷', '美国']]
share_2025 = forecast_2025[['出口国_中文', '市场份额_百分比']].set_index('出口国_中文')['市场份额_百分比']
share_2025 = share_2025[['巴西', '阿根廷', '美国']]
colors_pie = ['#A23B72', '#F18F01', '#C73E1D']
explode = (0.05, 0, 0)
wedges1, texts1, autotexts1 = ax1.pie(share_2024.values, labels=[country_en[c] for c in share_2024.index], colors=colors_pie, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90, textprops={'fontsize': 12})
for autotext in autotexts1: autotext.set_color('white'); autotext.set_fontweight('bold')
ax1.set_title(f'China Soybean Import Market Share ({latest_year})', fontsize=14, fontweight='bold', pad=20)
wedges2, texts2, autotexts2 = ax2.pie(share_2025.values, labels=[country_en[c] for c in share_2025.index], colors=colors_pie, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90, textprops={'fontsize': 12})
for autotext in autotexts2: autotext.set_color('white'); autotext.set_fontweight('bold')
ax2.set_title(f'China Soybean Import Market Share (2025 Forecast)', fontsize=14, fontweight='bold', pad=20)
total_2024 = base_data_2024['贸易数量_吨'].sum()
total_2025 = forecast_2025['贸易数量_吨'].sum()
ax1.text(1.3, 0.5, f'Total Imports:\n{total_2024/1000:.0f}K MT', fontsize=11, ha='left', va='center',
bbox=dict(boxstyle='round,pad=0.5', facecolor='#E8F4FD', alpha=0.8))
ax2.text(1.3, 0.5, f'Total Imports:\n{total_2025/1000:.0f}K MT', fontsize=11, ha='left', va='center',
bbox=dict(boxstyle='round,pad=0.5', facecolor='#FFF2E8', alpha=0.8))
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表3_市场份额饼图对比.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表3保存完成')
print('\n【4. 生成图表4:关税与进口量相关性散点图】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
tariff_scenarios = np.arange(0, 21, 2)
brazil_2024_qty = base_data_2024[base_data_2024['出口国_中文'] == '巴西']['贸易数量_吨'].iloc[0]
brazil_elasticity = -0.3955
qty_scenarios = [brazil_2024_qty * (1 + (brazil_elasticity * tariff) / 100) for tariff in tariff_scenarios]
scatter = ax.scatter(tariff_scenarios, qty_scenarios, s=150, c=tariff_scenarios, cmap='Reds',
edgecolors='black', linewidth=1.5, alpha=0.8)
z = np.polyfit(tariff_scenarios, qty_scenarios, 1)
p = np.poly1d(z)
ax.plot(tariff_scenarios, p(tariff_scenarios), 'b--', linewidth=2, alpha=0.7,
label=f'Fit Line (Slope={z[0]:.0f} MT/% Tariff)')
ax.scatter(0, brazil_2024_qty, s=200, marker='*', color='green', label='Current Policy (0% Tariff)', edgecolors='black', linewidth=2)
ax.scatter(10, p(10), s=200, marker='*', color='red', label='2025 Policy (10% Tariff)', edgecolors='black', linewidth=2)
ax.set_title('Correlation Analysis: Tariff Rate vs. Soybean Imports (Brazil Case)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Tariff Rate (%)', fontsize=14); ax.set_ylabel('China\'s Soybean Imports from Brazil (MT)', fontsize=14)
ax.legend(loc='upper right', frameon=True, fancybox=True, shadow=True)
ax.xaxis.set_major_formatter(FuncFormatter(percent_formatter))
ax.yaxis.set_major_formatter(FuncFormatter(thousand_formatter))
cbar = plt.colorbar(scatter, ax=ax)
cbar.set_label('Tariff Rate (%)', fontsize=12)
cbar.ax.yaxis.set_major_formatter(FuncFormatter(percent_formatter))
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表4_关税进口量相关性.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表4保存完成')
print('\n【5. 生成图表5:年度进口量增长率对比】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
growth_data = []
for exporter in ['巴西', '阿根廷']:
exporter_data = annual_data[annual_data['出口国_中文'] == exporter].sort_values('年份')
if len(exporter_data) >= 2:
for i in range(1, len(exporter_data)):
prev_qty, curr_qty = exporter_data.iloc[i-1]['贸易数量_吨'], exporter_data.iloc[i]['贸易数量_吨']
growth_rate = ((curr_qty - prev_qty) / prev_qty) * 100
growth_data.append({'年份': exporter_data.iloc[i]['年份'], '出口国_中文': exporter, '增长率_百分比': growth_rate})
growth_df = pd.DataFrame(growth_data)
for i, exporter in enumerate(['巴西', '阿根廷']):
exporter_growth = growth_df[growth_df['出口国_中文'] == exporter]
if not exporter_growth.empty:
ax.bar(exporter_growth['年份'] + (i-0.5)*0.2, exporter_growth['增长率_百分比'], width=0.35,
color={'巴西': '#A23B72', '阿根廷': '#F18F01'}[exporter], alpha=0.8, label=country_en[exporter])
# --- 修正:将 edgecolors 修改为 edgecolor ---
for exporter in ['巴西', '阿根廷', '美国']:
if exporter in ['巴西', '阿根廷']:
growth_2025 = elasticity[exporter] * 10
else:
growth_2025 = 100
ax.bar(2025 + (['巴西', '阿根廷', '美国'].index(exporter)-1)*0.2, growth_2025, width=0.35,
color={'巴西': '#A23B72', '阿根廷': '#F18F01', '美国': '#C73E1D'}[exporter], alpha=0.8,
linestyle='--', edgecolor='black', linewidth=1.5)
if exporter == '美国':
ax.text(2025, growth_2025 + 5, 'Resumed\nExports', ha='center', va='bottom', fontsize=9, fontweight='bold')
ax.set_title('YoY Growth Rate of China\'s Soybean Imports (2016-2025)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Year', fontsize=14); ax.set_ylabel('Import Growth Rate (%)', fontsize=14)
ax.axhline(y=0, color='black', linestyle='-', alpha=0.5, linewidth=1)
ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), frameon=True, fancybox=True, shadow=True)
ax.yaxis.set_major_formatter(FuncFormatter(percent_formatter))
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表5_进口量增长率.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表5保存完成')
print('\n【6. 生成图表6:价格-数量供需曲线图(2024 vs 2025)】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
price_range_2024 = np.linspace(300, 450, 10)
P0 = base_data_2024[base_data_2024['出口国_中文'] == '巴西']['单价_美元吨'].iloc[0]
Q0 = base_data_2024[base_data_2024['出口国_中文'] == '巴西']['贸易数量_吨'].iloc[0] / 1000
demand_2024 = Q0 + (elasticity['巴西'] * (price_range_2024 - P0) * Q0 / P0)
price_shift = 10
P1 = P0 * (1 + price_shift / 100)
Q1 = Q0 * (1 + elasticity['巴西'] * price_shift / 100)
price_range_2025 = np.linspace(330, 495, 10)
demand_2025 = Q1 + (elasticity['巴西'] * (price_range_2025 - P1) * Q1 / P1)
ax.plot(demand_2024, price_range_2024, 'b-', linewidth=3, label='2024 Demand Curve (0% Tariff)')
ax.plot(demand_2025, price_range_2025, 'r--', linewidth=3, label='2025 Demand Curve (10% Tariff)')
supply_2024 = np.linspace(Q0*0.8, Q0*1.2, 10)
supply_price_2024 = np.full_like(supply_2024, P0)
supply_2025 = np.linspace(Q1*0.8, Q1*1.2, 10)
supply_price_2025 = np.full_like(supply_2025, P1)
ax.plot(supply_2024, supply_price_2024, 'g-', linewidth=3, label='2024 Supply Curve')
ax.plot(supply_2025, supply_price_2025, 'orange', linestyle='--', linewidth=3, label='2025 Supply Curve')
ax.scatter(Q0, P0, s=200, color='blue', marker='o', label='2024 Equilibrium (E0)', edgecolors='black', linewidth=2)
ax.scatter(Q1, P1, s=200, color='red', marker='s', label='2025 Equilibrium (E1)', edgecolors='black', linewidth=2)
ax.text(Q0 + 50, P0 + 5, f'E0: (Q={Q0:.0f}K MT, P={P0:.0f}$/MT)', fontsize=11, color='blue', fontweight='bold', ha='left')
ax.text(Q1 + 50, P1 + 5, f'E1: (Q={Q1:.0f}K MT, P={P1:.0f}$/MT)', fontsize=11, color='red', fontweight='bold', ha='left')
ax.set_title('Soybean Import Supply & Demand Curve Comparison (2024 vs. 2025)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Import Quantity (K MT)', fontsize=14); ax.set_ylabel('Import Price (USD/MT)', fontsize=14)
ax.legend(loc='upper right', frameon=True, fancybox=True, shadow=True)
ax.xaxis.set_major_formatter(FuncFormatter(thousand_formatter))
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, p: f'{int(x)}'))
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表6_供需曲线对比.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.close(); print('✅ 图表6保存完成')
# 格式化函数(百万美元)
def million_formatter(x, pos):
return f'{x:.0f}M$'
# 中英文映射(用于图例)
country_en = {'巴西': 'Brazil', '阿根廷': 'Argentina', '美国': 'United States'}
# -------------------------- 2. 数据预处理(关键:确保单位正确) --------------------------
# 假设你有原始贸易数据和关税数据
# 读取贸易数据(示例路径,需替换为你的实际路径)
trade_df = pd.read_excel('TradeData.xlsx', sheet_name='Sheet1')
# 筛选:中国从美巴阿进口大豆(cmdCode=1201)
trade_df['cmdCode_str'] = trade_df['cmdCode'].astype(str).str.zfill(4)
soybean_mask = trade_df['cmdCode_str'] == '1201'
china_mask = trade_df['partnerDesc'].str.contains('China|中国', case=False, na=False)
exporter_mask = trade_df['reporterDesc'].str.contains('United States|Brazil|Argentina|美国|巴西|阿根廷', case=False, na=False)
trade_core = trade_df[soybean_mask & china_mask & exporter_mask].copy()
# 数据标准化(关键:单位换算正确)
trade_core['贸易数量_吨'] = trade_core['qty'] / 1000 # 千克 → 吨
trade_core['出口额_美元'] = trade_core['fobvalue'] # 原始单位:美元
trade_core['出口额_百万美元'] = trade_core['出口额_美元'] / 1000000 # 美元 → 百万美元(核心修正)
trade_core['出口国_中文'] = trade_core['reporterDesc'].replace({
'United States': '美国', 'Brazil': '巴西', 'Argentina': '阿根廷'
})
trade_core['年份'] = trade_core['refYear']
# 年度汇总
annual_data = trade_core.groupby(['年份', '出口国_中文']).agg({
'贸易数量_吨': 'sum',
'出口额_百万美元': 'sum'
}).reset_index()
# 计算2025年预测数据(基于价格弹性,确保数值合理)
latest_year = annual_data['年份'].max()
base_data_2024 = annual_data[annual_data['年份'] == latest_year].copy()
elasticity = {'巴西': -0.3955, '阿根廷': -0.4210, '美国': -0.3955} # 价格弹性系数
# 构建2025年预测数据
forecast_2025 = []
for _, row in base_data_2024.iterrows():
exporter = row['出口国_中文']
# 价格上涨10%(关税影响)
price_2025 = (row['出口额_百万美元'] * 1000000 / row['贸易数量_吨']) * 1.10
# 进口量变化(弹性×价格涨幅)
qty_change_rate = elasticity[exporter] * 10
qty_2025 = row['贸易数量_吨'] * (1 + qty_change_rate / 100)
# 出口额(百万美元,确保单位正确)
value_2025 = (qty_2025 * price_2025) / 1000000 # 吨×美元/吨 → 美元 → 百万美元
forecast_2025.append({
'年份': 2025,
'出口国_中文': exporter,
'贸易数量_吨': qty_2025,
'出口额_百万美元': value_2025
})
# 补充美国2025年预测(若之前无数据)
if '美国' not in base_data_2024['出口国_中文'].values:
forecast_2025.append({
'年份': 2025,
'出口国_中文': '美国',
'贸易数量_吨': 2000000, # 200万吨
'出口额_百万美元': (2000000 * 418) / 1000000 # 418美元/吨 × 200万吨 → 百万美元
})
forecast_2025 = pd.DataFrame(forecast_2025)
# 合并历史数据与预测数据
all_data = pd.concat([annual_data, forecast_2025], ignore_index=True)
# -------------------------- 3. 绘制贸易额堆叠柱状图(修正后) --------------------------
print('\n【2. 生成图表2:大豆进口贸易额堆叠柱状图】')
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
# 透视表整理数据
value_pivot = all_data.pivot_table(
index='年份',
columns='出口国_中文',
values='出口额_百万美元',
fill_value=0
)
# 确保国家顺序一致
countries = ['巴西', '阿根廷', '美国']
for country in countries:
if country not in value_pivot.columns:
value_pivot[country] = 0
value_pivot = value_pivot[countries]
# 绘制堆叠柱状图
colors_stack = ['#A23B72', '#F18F01', '#C73E1D']
value_pivot.plot(kind='bar', stacked=True, ax=ax, color=colors_stack, alpha=0.8, width=0.7)
# 添加年度总额标签(确保数值合理)
for year in value_pivot.index:
total_value = value_pivot.loc[year].sum()
# 仅当总额小于10000时添加标签(避免异常值)
if total_value 1000:
ax.set_ylim(0, max_value * 1.1) # 正常情况
else:
ax.set_ylim(0, 1000) # 防止异常值
plt.tight_layout()
# 保存图表(替换为你的实际路径)
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题1_新增图表2_贸易额堆叠图.png',
dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print('✅ 图表2保存完成')
# 第三步:生成图表清单
print('\n' + '='*80)
print('问题1新增可视化图表清单(共6张)')
print('='*80)
charts_list = [
'1. 问题1_新增图表1_大豆单价趋势.png → 大豆进口单价历史趋势+2025年预测(带误差线)',
'2. 问题1_新增图表2_贸易额堆叠图.png → 年度贸易额分国家堆叠柱状图(区分历史与预测)',
'3. 问题1_新增图表3_市场份额饼图对比.png → 2024年与2025年市场份额饼图对比(含总进口量)',
'4. 问题1_新增图表4_关税进口量相关性.png → 关税税率与进口量散点图(带拟合线)',
'5. 问题1_新增图表5_进口量增长率.png → 年度进口量增长率柱状图(体现波动与预测)',
'6. 问题1_新增图表6_供需曲线对比.png → 2024vs2025年供需曲线(体现关税对均衡点影响)'
]
for chart in charts_list:
print(f'✅ {chart}')
print(f'\n???? 所有图表均保存至指定目录,可直接用于论文、报告展示。')美国是全球第二大汽车市场和最大汽车进口国。2024年,美国市场销售的汽车中46%为进口车,日本是主要供应国。除直接从日本出口外,日本汽车制造商还通过在美国投资生产或在墨西哥等国制造后出口至美国的方式进入美国市场。分析日本汽车在美国市场的当前地位,建立包含日本非关税应对策略和经济传导效应的模型,探讨美国关税调整对美日汽车贸易、美国汽车进口结构及美国汽车产业的影响。
本问围绕美国汽车市场的进口依赖特点,以及日本车企的全球化供应链布局展开,核心是把“关税政策出台—日本车企非关税应对—美日汽车贸易结构变化—美国本土汽车产业响应”这一链条的联动效应讲透。
首先得明确日本汽车在美国市场的核心地位,以及它的三重供应模式, 直接从日本出口、通过墨西哥代工生产再出口、在美国本土建厂生产;接着要搭建一个包含日本车企定价话语权、成本转嫁能力、供应链调整效率的分析框架。关键是要量化美国关税调整对日本汽车对美供应总量、不同渠道(直接出口/代工/本土生产)占比的改变. 还有对美国本土车企的产能替代空间、利润增长潜力,以及美国汽车进口来源是否会更趋多元化的长期作用。分析中既要体现关税带来的直接价格传导效果,也要突出日本车企通过全球化布局对冲贸易壁垒的应对思路。
步骤1: 原始数据可视化
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from datetime import datetime
# 设置全局绘图参数
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'SimHei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10
plt.rcParams['axes.linewidth'] = 0.8
# 1. 全球半导体制造产能可视化
df_capacity = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3Semiconductor manufacturing capacity worldwide.xlsx')
fig, ax = plt.subplots(figsize=(12, 8))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']
regions_cn = ['美国', '新加坡', '中国台湾', '欧洲', '日本', '中国大陆', '其他']
regions_en = ['USA', 'Singapore', 'Taiwan, China', 'Europe', 'Japan', 'Mainland China', 'Others']
for i, (region_cn, region_en) in enumerate(zip(regions_cn, regions_en)):
ax.plot(df_capacity['年份'], df_capacity[region_cn], marker='o', linewidth=2.5, markersize=6, color=colors[i], label=region_en)
ax.set_title('Global Semiconductor Manufacturing Capacity by Region', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Year', fontsize=12, fontweight='bold')
ax.set_ylabel('Capacity', fontsize=12, fontweight='bold')
ax.set_xlim(df_capacity['年份'].min() - 0.5, df_capacity['年份'].max() + 0.5)
ax.set_xticks(df_capacity['年份'])
ax.tick_params(axis='x', rotation=45)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='upper right', frameon=True, fancybox=True, shadow=True, ncol=2)
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3_global_semiconductor_capacity.png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
# 2. 半导体产品出口数据可视化(仅按高端/中端/低端三个等级)
df_webquery = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3DataWeb-Query-Export.xlsx')
year_columns = ['2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
# 转换数值类型
for col in year_columns:
df_webquery[col] = pd.to_numeric(df_webquery[col], errors='coerce').fillna(0)
# 定义HS编码分类映射(仅保留三个等级)
hs_classification = {
'854231': 'High-end Semiconductors', # 高端半导体
'854232': 'Mid-range Semiconductors', # 中端半导体
'854239': 'Low-end Semiconductors' # 低端半导体
}
# 按HS编码精确匹配分组(确保只得到三个类别)
df_webquery['HS_Code'] = df_webquery['Schedule B'].astype(str).str[:6].str.ljust(6, '0') # 标准化6位HS编码
semiconductor_groups = {}
for hs_code, en_name in hs_classification.items():
# 精确匹配HS编码
group_data = df_webquery[df_webquery['HS_Code'] == hs_code]
group_totals = group_data[year_columns].sum()
semiconductor_groups[en_name] = group_totals
# 创建图表(只显示三个等级)
fig, ax = plt.subplots(figsize=(14, 8))
colors = ['#d62728', '#ff7f0e', '#2ca02c'] # 红色(高端)、橙色(中端)、绿色(低端)
bottom_data = np.zeros(len(year_columns))
# 只循环三个等级的数据
for i, (category, values) in enumerate(semiconductor_groups.items()):
ax.bar(year_columns, values.values, bottom=bottom_data,
color=colors[i], alpha=0.8, label=category,
linewidth=1, edgecolor='white')
bottom_data += values.values
# 格式化y轴(百万美元)
def millions_formatter(x, pos):
return f'${x/1e6:.0f}M'
ax.yaxis.set_major_formatter(plt.FuncFormatter(millions_formatter))
# 设置图表属性
ax.set_title('Semiconductor Exports by Grade (HS Code Classification)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Year', fontsize=12, fontweight='bold')
ax.set_ylabel('FAS Value (USD)', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, linestyle='--', axis='y')
ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True, fontsize=11)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3_semiconductor_exports_by_grade.png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
# 3. 美洲半导体销售数据可视化
df_americas = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3Semiconductor sales in the Americas.xlsx')
df_americas['年份_full'] = df_americas['年份'].apply(lambda x: 2000 + x if x <= 99 else x)
df_americas['date'] = pd.to_datetime(df_americas['年份_full'].astype(str) + '-' + df_americas['月份'].astype(str) + '-01')
df_americas_sorted = df_americas.sort_values('date').reset_index(drop=True)
fig, ax = plt.subplots(figsize=(14, 8))
ax.plot(df_americas_sorted['date'], df_americas_sorted['销售额'], linewidth=2.5, color='#2ca02c', marker='o', markersize=4, alpha=0.8)
ax.fill_between(df_americas_sorted['date'], df_americas_sorted['销售额'], alpha=0.3, color='#2ca02c')
ax.set_title('Semiconductor Sales Trend in the Americas', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Date', fontsize=12, fontweight='bold')
ax.set_ylabel('Sales Amount', fontsize=12, fontweight='bold')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.xaxis.set_major_locator(mdates.YearLocator())
plt.xticks(rotation=45)
min_sales = df_americas_sorted['销售额'].min()
max_sales = df_americas_sorted['销售额'].max()
avg_sales = df_americas_sorted['销售额'].mean()
textstr = f'Min Sales: {min_sales:.2f}\nMax Sales: {max_sales:.2f}\nAvg Sales: {avg_sales:.2f}'
props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=props)
ax.grid(True, alpha=0.3, linestyle='--')
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3_semiconductor_sales_americas.png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
# 4. 中国半导体销售数据可视化
df_china = pd.read_excel('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3Semiconductor sales in China.xlsx')
df_china['年份_full'] = df_china['年份'].apply(lambda x: 2000 + x if x <= 99 else x)
df_china['date'] = pd.to_datetime(df_china['年份_full'].astype(str) + '-' + df_china['月份'].astype(str) + '-01')
df_china_sorted = df_china.sort_values('date').reset_index(drop=True)
fig, ax = plt.subplots(figsize=(14, 8))
ax.plot(df_china_sorted['date'], df_china_sorted['销售额'], linewidth=2.5, color='#d62728', marker='s', markersize=4, alpha=0.8, label='China Semiconductor Sales')
ax.fill_between(df_china_sorted['date'], df_china_sorted['销售额'], alpha=0.3, color='#d62728')
ax.set_title('Semiconductor Sales Trend in China', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Date', fontsize=12, fontweight='bold')
ax.set_ylabel('Sales Amount', fontsize=12, fontweight='bold')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.xaxis.set_major_locator(mdates.YearLocator())
plt.xticks(rotation=45)
min_sales = df_china_sorted['销售额'].min()
max_sales = df_china_sorted['销售额'].max()
avg_sales = df_china_sorted['销售额'].mean()
textstr = f'Min Sales: {min_sales:.2f}\nMax Sales: {max_sales:.2f}\nAvg Sales: {avg_sales:.2f}'
props = dict(boxstyle='round', facecolor='lightcoral', alpha=0.8)
ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=props)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='upper right', frameon=True, fancybox=True, shadow=True)
plt.tight_layout()
plt.savefig('C:\\Users\\Administrator\\PyCharmMiscProject\\亚太赛\\问题3_semiconductor_sales_china.png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
print('All semiconductor data visualizations completed successfully!')整体思路视频: https://www.bilibili.com/video/BV1rcycB2EX8/
第二三部分详细细节: https://www.bilibili.com/video/BV1VuyMBsEZd/
第三四部分详细细节: https://www.bilibili.com/video/BV112UpBFER3/

完整助攻文章展示

代码文件展示

代码结果展示