301 lines
9.1 KiB
Python
301 lines
9.1 KiB
Python
"""
|
|
Data models for US Code git repository system.
|
|
Provides normalized, validated structures for all legislative data.
|
|
"""
|
|
|
|
from datetime import datetime, date
|
|
from typing import List, Optional, Dict, Any, Literal
|
|
from pathlib import Path
|
|
from pydantic import BaseModel, Field, field_validator, HttpUrl
|
|
from enum import Enum
|
|
|
|
|
|
class CongressionalChamber(str, Enum):
|
|
"""Congressional chambers"""
|
|
HOUSE = "House"
|
|
SENATE = "Senate"
|
|
|
|
|
|
class PoliticalParty(str, Enum):
|
|
"""Political parties (expandable)"""
|
|
DEMOCRATIC = "D"
|
|
REPUBLICAN = "R"
|
|
INDEPENDENT = "I"
|
|
LIBERTARIAN = "L"
|
|
GREEN = "G"
|
|
UNKNOWN = "Unknown"
|
|
|
|
|
|
class BillType(str, Enum):
|
|
"""Types of congressional bills"""
|
|
HOUSE_BILL = "HR"
|
|
SENATE_BILL = "S"
|
|
HOUSE_JOINT_RESOLUTION = "HJR"
|
|
SENATE_JOINT_RESOLUTION = "SJR"
|
|
HOUSE_CONCURRENT_RESOLUTION = "HCR"
|
|
SENATE_CONCURRENT_RESOLUTION = "SCR"
|
|
HOUSE_RESOLUTION = "HRES"
|
|
SENATE_RESOLUTION = "SRES"
|
|
|
|
|
|
class LegislativeAction(BaseModel):
|
|
"""A single legislative action on a bill"""
|
|
date: date
|
|
action_code: Optional[str] = None
|
|
text: str
|
|
chamber: Optional[CongressionalChamber] = None
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class Sponsor(BaseModel):
|
|
"""Congressional member who sponsors legislation"""
|
|
bioguide_id: Optional[str] = Field(None, description="Biographical Directory ID")
|
|
title: str = Field(..., description="Rep. or Sen.")
|
|
first_name: str
|
|
last_name: str
|
|
full_name: str
|
|
party: PoliticalParty
|
|
state: str = Field(..., min_length=2, max_length=2, description="Two-letter state code")
|
|
district: Optional[int] = Field(None, description="House district number (if applicable)")
|
|
chamber: CongressionalChamber
|
|
|
|
# Computed fields
|
|
@property
|
|
def email(self) -> str:
|
|
"""Generate congressional email address"""
|
|
first = self.first_name.lower().replace(" ", "")
|
|
last = self.last_name.lower().replace(" ", "")
|
|
domain = "house.gov" if self.chamber == CongressionalChamber.HOUSE else "senate.gov"
|
|
return f"{first}.{last}@{domain}"
|
|
|
|
@property
|
|
def formal_name(self) -> str:
|
|
"""Full formal name with title and party"""
|
|
return f"{self.title} {self.full_name} ({self.party}-{self.state})"
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class Bill(BaseModel):
|
|
"""Congressional bill that may become a public law"""
|
|
congress: int = Field(..., ge=1, description="Congressional session number")
|
|
bill_type: BillType
|
|
number: int = Field(..., ge=1, description="Bill number within type/congress")
|
|
title: Optional[str] = None
|
|
short_title: Optional[str] = None
|
|
|
|
# Sponsorship
|
|
primary_sponsor: Optional["Sponsor"] = None
|
|
cosponsors: List["Sponsor"] = Field(default_factory=lambda: [])
|
|
|
|
# Legislative process
|
|
introduced_date: Optional[date] = None
|
|
actions: List["LegislativeAction"] = Field(default_factory=lambda: [])
|
|
|
|
# External IDs
|
|
congress_gov_url: Optional[HttpUrl] = None
|
|
|
|
@property
|
|
def bill_id(self) -> str:
|
|
"""Unique bill identifier"""
|
|
return f"{self.bill_type.value}{self.number}"
|
|
|
|
@property
|
|
def full_id(self) -> str:
|
|
"""Full bill identifier with congress"""
|
|
return f"{self.congress}-{self.bill_id}"
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class PublicLaw(BaseModel):
|
|
"""A bill that has been enacted into public law"""
|
|
congress: int = Field(..., ge=1, description="Congressional session number")
|
|
law_number: int = Field(..., ge=1, description="Public law number within congress")
|
|
|
|
# Enactment details
|
|
enacted_date: date
|
|
signed_date: Optional[date] = None
|
|
|
|
# Source bill (if known)
|
|
source_bill: Optional[Bill] = None
|
|
|
|
# US Code impact
|
|
affected_titles: List[int] = Field(default_factory=lambda: [], description="US Code titles affected")
|
|
|
|
# House release point data
|
|
release_path: str = Field(..., description="House download path (e.g., '119/26not21')")
|
|
|
|
# Metadata
|
|
title: Optional[str] = None
|
|
description: Optional[str] = None
|
|
|
|
@property
|
|
def public_law_id(self) -> str:
|
|
"""Standard public law identifier"""
|
|
return f"PL {self.congress}-{self.law_number}"
|
|
|
|
@property
|
|
def formal_citation(self) -> str:
|
|
"""Formal legal citation"""
|
|
return f"Public Law {self.congress}-{self.law_number:03d}"
|
|
|
|
@field_validator('affected_titles')
|
|
@classmethod
|
|
def validate_titles(cls, v: List[int]) -> List[int]:
|
|
"""Validate US Code title numbers"""
|
|
for title in v:
|
|
if not (1 <= title <= 54):
|
|
raise ValueError(f"Invalid US Code title: {title}")
|
|
return sorted(v)
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class USCodeRelease(BaseModel):
|
|
"""A specific release of the US Code from House sources"""
|
|
public_law: PublicLaw
|
|
|
|
# Download metadata
|
|
download_url: str
|
|
download_date: Optional[datetime] = None
|
|
file_size: Optional[int] = None
|
|
file_hash: Optional[str] = Field(None, description="SHA-256 hash")
|
|
|
|
# Extraction metadata
|
|
extracted_date: Optional[datetime] = None
|
|
extraction_path: Optional[Path] = None
|
|
file_count: Optional[int] = None
|
|
|
|
# Git metadata
|
|
git_commit_hash: Optional[str] = None
|
|
git_tag: Optional[str] = None
|
|
|
|
@property
|
|
def release_filename(self) -> str:
|
|
"""Standard filename for this release"""
|
|
return self.public_law.release_path.replace("/", "-")
|
|
|
|
|
|
class CongressionalSession(BaseModel):
|
|
"""Information about a congressional session"""
|
|
congress: int = Field(..., ge=1)
|
|
session: Literal[1, 2] = Field(..., description="1st or 2nd session")
|
|
start_date: date
|
|
end_date: Optional[date] = None
|
|
|
|
# Leadership
|
|
house_speaker: Optional[Sponsor] = None
|
|
senate_majority_leader: Optional[Sponsor] = None
|
|
house_majority_leader: Optional[Sponsor] = None
|
|
senate_minority_leader: Optional[Sponsor] = None
|
|
|
|
# Party control
|
|
house_majority_party: Optional[PoliticalParty] = None
|
|
senate_majority_party: Optional[PoliticalParty] = None
|
|
|
|
@property
|
|
def session_id(self) -> str:
|
|
"""Session identifier"""
|
|
return f"{self.congress}-{self.session}"
|
|
|
|
@property
|
|
def formal_name(self) -> str:
|
|
"""Formal session name"""
|
|
ordinal = "1st" if self.session == 1 else "2nd"
|
|
return f"{self.congress}th Congress, {ordinal} Session"
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class GitCommitMetadata(BaseModel):
|
|
"""Metadata for git commits in the US Code repository"""
|
|
public_law: PublicLaw
|
|
|
|
# Git data
|
|
commit_hash: str
|
|
tag_name: str
|
|
author_name: str
|
|
author_email: str
|
|
commit_date: datetime
|
|
message: str
|
|
|
|
# File changes
|
|
files_changed: int
|
|
lines_added: int
|
|
lines_deleted: int
|
|
|
|
# Repository state
|
|
repository_path: Path
|
|
is_initial_commit: bool = False
|
|
|
|
|
|
class APICache(BaseModel):
|
|
"""Cache entry for Congress.gov API responses"""
|
|
cache_key: str
|
|
congress: int
|
|
law_number: int
|
|
|
|
# Cache metadata
|
|
cached_date: datetime
|
|
api_response: Dict[str, Any]
|
|
sponsor_found: bool = False
|
|
|
|
# Extracted sponsor (if found)
|
|
sponsor: Optional[Sponsor] = None
|
|
|
|
|
|
class RepositoryMetadata(BaseModel):
|
|
"""Overall metadata for the US Code git repository"""
|
|
created_date: datetime
|
|
last_updated: datetime
|
|
|
|
# Coverage
|
|
earliest_law: PublicLaw
|
|
latest_law: PublicLaw
|
|
total_laws: int
|
|
total_commits: int
|
|
|
|
# Data sources
|
|
congress_api_key_used: bool
|
|
house_source_verified: bool
|
|
|
|
# Repository info
|
|
repository_path: Path
|
|
total_size: Optional[int] = None
|
|
|
|
|
|
# Utility functions for model creation
|
|
|
|
def create_sponsor_from_congress_api(api_data: Dict[str, Any]) -> Sponsor:
|
|
"""Create Sponsor from Congress.gov API response"""
|
|
return Sponsor(
|
|
bioguide_id=api_data.get('bioguideId'),
|
|
title=api_data.get('title', ''),
|
|
first_name=api_data.get('firstName', ''),
|
|
last_name=api_data.get('lastName', ''),
|
|
full_name=api_data.get('fullName', ''),
|
|
party=PoliticalParty(api_data.get('party', 'Unknown')),
|
|
state=api_data.get('state', ''),
|
|
district=api_data.get('district'),
|
|
chamber=CongressionalChamber.HOUSE if api_data.get('title') == 'Rep.' else CongressionalChamber.SENATE
|
|
)
|
|
|
|
|
|
def create_public_law_from_house_data(house_data: Dict[str, Any]) -> PublicLaw:
|
|
"""Create PublicLaw from House release point data"""
|
|
return PublicLaw(
|
|
congress=house_data['congress'],
|
|
law_number=house_data['law'],
|
|
enacted_date=datetime.strptime(house_data['date'], '%m/%d/%Y').date(),
|
|
release_path=house_data['releasePath'],
|
|
affected_titles=house_data['affectedTitles']
|
|
)
|
|
|