英超球队数据获取完全指南：从入门到精通

2026-05-06 09:12:04作者：卓炯娓

引言

作为全球最具影响力的足球联赛之一，英超联赛（Premier League）吸引了无数球迷和数据分析师的关注。获取准确、及时的英超球队数据对于比赛分析、预测模型构建以及足球相关应用开发至关重要。本文将全面介绍如何使用premier_api Python库获取和处理英超球队数据，帮助开发者和数据分析师轻松掌握从基础查询到高级应用的完整流程。通过本文的学习，您将能够高效地获取各类英超球队数据，并将其应用于实际项目中。

数据结构详解

英超球队数据基本结构

premier_api中的英超球队数据采用统一的结构化格式，每个球队包含9个关键字段：

# 数据结构示例
[
    team_id,           # 球队唯一ID
    name,              # 球队名称
    code,              # 球队代码
    founded,           # 成立年份
    stadium,           # 主场球场
    capacity,          # 球场容量
    manager,           # 主教练
    chairman,          # 俱乐部主席
    trophies           # 主要奖杯列表
]

英超球队数据字段说明

字段名	数据类型	描述	示例
team_id	整数	球队唯一标识符	1001
name	字符串	球队全名	"Manchester United FC"
code	字符串	球队简称（3个字母）	"MUN"
founded	整数	球队成立年份	1878
stadium	字符串	主场球场名称	"Old Trafford"
capacity	整数	球场容量（人）	74310
manager	字符串	现任主教练	"Erik ten Hag"
chairman	字符串	俱乐部主席	"Joel Glazer"
trophies	字典	主要奖杯及数量	{"Premier League": 13, "FA Cup": 12, "Champions League": 3}

基础查询方法

安装与导入

pip install premier_api

from premier_api.teams import PremierLeagueTeams

获取所有英超球队

# 初始化英超球队对象
pl_teams = PremierLeagueTeams()

# 获取所有英超球队信息
all_teams = pl_teams.get_all_teams()
print(f"当前英超共有 {len(all_teams)} 支球队")

# 打印球队列表
for team in all_teams[:5]:  # 只显示前5支球队
    print(f"{team['name']} ({team['code']}) - 主场: {team['stadium']}")

按条件查询球队

# 按球队代码查询
mancity = pl_teams.get_team_by_code("MCI")
print(f"曼城队信息: {mancity['name']}, 主教练: {mancity['manager']}")

# 按球队ID查询
arsenal = pl_teams.get_team_by_id(1003)
print(f"阿森纳队信息: {arsenal['name']}, 主场容量: {arsenal['capacity']}")

# 按名称关键字搜索
london_teams = pl_teams.search_teams_by_name("London")
print(f"伦敦地区球队: {[team['name'] for team in london_teams]}")

高级应用技巧

按成立年份筛选

# 查找成立超过100年的球队
veteran_teams = pl_teams.filter_teams_by_founded(year=1923, condition="less_than")
print("百年老店球队:")
for team in veteran_teams:
    print(f"- {team['name']} ({team['founded']}年成立)")

按球场容量排序

# 获取球场容量前5的球队
top_stadiums = pl_teams.sort_teams_by_capacity(ascending=False, limit=5)
print("英超最大球场:")
for i, team in enumerate(top_stadiums, 1):
    print(f"{i}. {team['stadium']} - {team['capacity']}人 ({team['name']})")

奖杯数据分析

# 统计各球队英超冠军数量
pl_champions = pl_teams.analyze_trophies(trophy_type="Premier League")
print("英超冠军次数排名:")
for team, count in pl_champions.items():
    print(f"{team}: {count}次")

实战案例

案例1：球队信息查询工具

def team_info_tool(team_code):
    """球队信息查询工具"""
    try:
        team = pl_teams.get_team_by_code(team_code.upper())
        if not team:
            print(f"未找到代码为 {team_code} 的球队")
            return
            
        print(f"🏴󠁧󠁢󠁥󠁮󠁧󠁿 {team['name']}")
        print(f"📋 球队代码: {team['code']}")
        print(f"🏟️ 主场球场: {team['stadium']} ({team['capacity']}人)")
        print(f"👔 主教练: {team['manager']}")
        print(f"📅 成立年份: {team['founded']}年")
        
        # 打印主要奖杯
        print("🏆 主要荣誉:")
        for trophy, count in team['trophies'].items():
            print(f"  - {trophy}: {count}次")
            
    except Exception as e:
        print(f"查询出错: {str(e)}")

# 使用示例
team_info_tool("LIV")

案例2：球队数据可视化

import matplotlib.pyplot as plt

def visualize_stadium_capacities():
    """可视化英超球队主场容量"""
    # 获取所有球队并按容量排序
    teams = pl_teams.get_all_teams()
    teams_sorted = sorted(teams, key=lambda x: x['capacity'], reverse=True)
    
    # 提取数据
    names = [team['code'] for team in teams_sorted]
    capacities = [team['capacity'] for team in teams_sorted]
    
    # 创建图表
    plt.figure(figsize=(12, 8))
    bars = plt.bar(names, capacities, color='skyblue')
    plt.title('Premier League Stadium Capacities')
    plt.xlabel('Team Code')
    plt.ylabel('Capacity')
    
    # 添加数据标签
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:,}',
                ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

# 使用示例
visualize_stadium_capacities()

案例3：球队数据统计分析报告

import pandas as pd

def generate_team_statistics_report():
    """生成英超球队统计分析报告"""
    teams = pl_teams.get_all_teams()
    
    # 转换为DataFrame
    df = pd.DataFrame(teams)
    
    # 添加衍生指标
    current_year = 2023
    df['age'] = current_year - df['founded']
    df['trophy_count'] = df['trophies'].apply(lambda x: sum(x.values()))
    
    # 生成报告
    print("=== 英超球队统计分析报告 ===")
    print(f"数据日期: {current_year}年")
    print(f"球队总数: {len(df)}支")
    
    print("\n年龄统计:")
    print(f"平均成立时间: {df['age'].mean():.1f}年")
    print(f"最古老球队: {df.loc[df['age'].idxmax()]['name']} ({df['age'].max()}年)")
    print(f"最新成立球队: {df.loc[df['age'].idxmin()]['name']} ({df['age'].min()}年)")
    
    print("\n奖杯统计:")
    print(f"平均奖杯数量: {df['trophy_count'].mean():.1f}个")
    top_trophy_team = df.loc[df['trophy_count'].idxmax()]
    print(f"最多奖杯球队: {top_trophy_team['name']} ({top_trophy_team['trophy_count']}个)")
    
    # 保存为CSV文件
    df.to_csv('premier_league_teams_statistics.csv', index=False)
    print("\n报告已保存为 premier_league_teams_statistics.csv")

# 使用示例
generate_team_statistics_report()

优化技巧

1. 错误处理

def safe_get_team(team_code):
    """安全获取球队信息，包含错误处理"""
    try:
        if not team_code or len(team_code) != 3:
            raise ValueError("球队代码必须是3个字符")
            
        team = pl_teams.get_team_by_code(team_code.upper())
        if not team:
            raise LookupError(f"未找到代码为 {team_code} 的球队")
            
        return team
        
    except ValueError as ve:
        print(f"输入错误: {ve}")
    except LookupError as le:
        print(f"查询错误: {le}")
    except Exception as e:
        print(f"发生错误: {str(e)}")
    return None

# 使用示例
team = safe_get_team("CHE")

2. 性能优化

from functools import lru_cache

class CachedPremierLeagueTeams(PremierLeagueTeams):
    """带缓存功能的英超球队类"""
    
    @lru_cache(maxsize=30)  # 缓存最多30个结果
    def get_team_by_code(self, code):
        """缓存球队代码查询结果"""
        return super().get_team_by_code(code)
    
    @lru_cache(maxsize=10)
    def filter_teams_by_founded(self, year, condition="equal"):
        """缓存按成立年份筛选结果"""
        return super().filter_teams_by_founded(year, condition)

# 使用缓存版本
cached_teams = CachedPremierLeagueTeams()
# 第一次查询会实际执行
team1 = cached_teams.get_team_by_code("MUN")
# 第二次查询会使用缓存
team2 = cached_teams.get_team_by_code("MUN")

3. 数据缓存策略

import json
import os
from datetime import datetime, timedelta

class TeamDataCache:
    """球队数据缓存管理器"""
    
    def __init__(self, cache_dir="cache", expiry_days=7):
        self.cache_dir = cache_dir
        self.expiry_days = expiry_days
        os.makedirs(cache_dir, exist_ok=True)
        
    def _get_cache_path(self, key):
        """获取缓存文件路径"""
        return os.path.join(self.cache_dir, f"{key}.json")
        
    def is_cache_valid(self, key):
        """检查缓存是否有效"""
        cache_path = self._get_cache_path(key)
        if not os.path.exists(cache_path):
            return False
            
        # 检查缓存是否过期
        modified_time = datetime.fromtimestamp(os.path.getmtime(cache_path))
        return datetime.now() - modified_time < timedelta(days=self.expiry_days)
        
    def get_cached_data(self, key):
        """获取缓存数据"""
        if self.is_cache_valid(key):
            with open(self._get_cache_path(key), 'r') as f:
                return json.load(f)
        return None
        
    def save_to_cache(self, key, data):
        """保存数据到缓存"""
        with open(self._get_cache_path(key), 'w') as f:
            json.dump(data, f)

# 使用缓存
cache = TeamDataCache()
all_teams = cache.get_cached_data("all_teams")
if not all_teams:
    all_teams = pl_teams.get_all_teams()
    cache.save_to_cache("all_teams", all_teams)