Files
git-law/models.py

301 lines
9.1 KiB
Python

"""
Data models for US Code git repository system.
Provides normalized, validated structures for all legislative data.
"""
from datetime import datetime, date
from typing import List, Optional, Dict, Any, Literal
from pathlib import Path
from pydantic import BaseModel, Field, field_validator, HttpUrl
from enum import Enum
class CongressionalChamber(str, Enum):
"""Congressional chambers"""
HOUSE = "House"
SENATE = "Senate"
class PoliticalParty(str, Enum):
"""Political parties (expandable)"""
DEMOCRATIC = "D"
REPUBLICAN = "R"
INDEPENDENT = "I"
LIBERTARIAN = "L"
GREEN = "G"
UNKNOWN = "Unknown"
class BillType(str, Enum):
"""Types of congressional bills"""
HOUSE_BILL = "HR"
SENATE_BILL = "S"
HOUSE_JOINT_RESOLUTION = "HJR"
SENATE_JOINT_RESOLUTION = "SJR"
HOUSE_CONCURRENT_RESOLUTION = "HCR"
SENATE_CONCURRENT_RESOLUTION = "SCR"
HOUSE_RESOLUTION = "HRES"
SENATE_RESOLUTION = "SRES"
class LegislativeAction(BaseModel):
"""A single legislative action on a bill"""
date: date
action_code: Optional[str] = None
text: str
chamber: Optional[CongressionalChamber] = None
class Config:
use_enum_values = True
class Sponsor(BaseModel):
"""Congressional member who sponsors legislation"""
bioguide_id: Optional[str] = Field(None, description="Biographical Directory ID")
title: str = Field(..., description="Rep. or Sen.")
first_name: str
last_name: str
full_name: str
party: PoliticalParty
state: str = Field(..., min_length=2, max_length=2, description="Two-letter state code")
district: Optional[int] = Field(None, description="House district number (if applicable)")
chamber: CongressionalChamber
# Computed fields
@property
def email(self) -> str:
"""Generate congressional email address"""
first = self.first_name.lower().replace(" ", "")
last = self.last_name.lower().replace(" ", "")
domain = "house.gov" if self.chamber == CongressionalChamber.HOUSE else "senate.gov"
return f"{first}.{last}@{domain}"
@property
def formal_name(self) -> str:
"""Full formal name with title and party"""
return f"{self.title} {self.full_name} ({self.party}-{self.state})"
class Config:
use_enum_values = True
class Bill(BaseModel):
"""Congressional bill that may become a public law"""
congress: int = Field(..., ge=1, description="Congressional session number")
bill_type: BillType
number: int = Field(..., ge=1, description="Bill number within type/congress")
title: Optional[str] = None
short_title: Optional[str] = None
# Sponsorship
primary_sponsor: Optional["Sponsor"] = None
cosponsors: List["Sponsor"] = Field(default_factory=lambda: [])
# Legislative process
introduced_date: Optional[date] = None
actions: List["LegislativeAction"] = Field(default_factory=lambda: [])
# External IDs
congress_gov_url: Optional[HttpUrl] = None
@property
def bill_id(self) -> str:
"""Unique bill identifier"""
return f"{self.bill_type.value}{self.number}"
@property
def full_id(self) -> str:
"""Full bill identifier with congress"""
return f"{self.congress}-{self.bill_id}"
class Config:
use_enum_values = True
class PublicLaw(BaseModel):
"""A bill that has been enacted into public law"""
congress: int = Field(..., ge=1, description="Congressional session number")
law_number: int = Field(..., ge=1, description="Public law number within congress")
# Enactment details
enacted_date: date
signed_date: Optional[date] = None
# Source bill (if known)
source_bill: Optional[Bill] = None
# US Code impact
affected_titles: List[int] = Field(default_factory=lambda: [], description="US Code titles affected")
# House release point data
release_path: str = Field(..., description="House download path (e.g., '119/26not21')")
# Metadata
title: Optional[str] = None
description: Optional[str] = None
@property
def public_law_id(self) -> str:
"""Standard public law identifier"""
return f"PL {self.congress}-{self.law_number}"
@property
def formal_citation(self) -> str:
"""Formal legal citation"""
return f"Public Law {self.congress}-{self.law_number:03d}"
@field_validator('affected_titles')
@classmethod
def validate_titles(cls, v: List[int]) -> List[int]:
"""Validate US Code title numbers"""
for title in v:
if not (1 <= title <= 54):
raise ValueError(f"Invalid US Code title: {title}")
return sorted(v)
class Config:
use_enum_values = True
class USCodeRelease(BaseModel):
"""A specific release of the US Code from House sources"""
public_law: PublicLaw
# Download metadata
download_url: str
download_date: Optional[datetime] = None
file_size: Optional[int] = None
file_hash: Optional[str] = Field(None, description="SHA-256 hash")
# Extraction metadata
extracted_date: Optional[datetime] = None
extraction_path: Optional[Path] = None
file_count: Optional[int] = None
# Git metadata
git_commit_hash: Optional[str] = None
git_tag: Optional[str] = None
@property
def release_filename(self) -> str:
"""Standard filename for this release"""
return self.public_law.release_path.replace("/", "-")
class CongressionalSession(BaseModel):
"""Information about a congressional session"""
congress: int = Field(..., ge=1)
session: Literal[1, 2] = Field(..., description="1st or 2nd session")
start_date: date
end_date: Optional[date] = None
# Leadership
house_speaker: Optional[Sponsor] = None
senate_majority_leader: Optional[Sponsor] = None
house_majority_leader: Optional[Sponsor] = None
senate_minority_leader: Optional[Sponsor] = None
# Party control
house_majority_party: Optional[PoliticalParty] = None
senate_majority_party: Optional[PoliticalParty] = None
@property
def session_id(self) -> str:
"""Session identifier"""
return f"{self.congress}-{self.session}"
@property
def formal_name(self) -> str:
"""Formal session name"""
ordinal = "1st" if self.session == 1 else "2nd"
return f"{self.congress}th Congress, {ordinal} Session"
class Config:
use_enum_values = True
class GitCommitMetadata(BaseModel):
"""Metadata for git commits in the US Code repository"""
public_law: PublicLaw
# Git data
commit_hash: str
tag_name: str
author_name: str
author_email: str
commit_date: datetime
message: str
# File changes
files_changed: int
lines_added: int
lines_deleted: int
# Repository state
repository_path: Path
is_initial_commit: bool = False
class APICache(BaseModel):
"""Cache entry for Congress.gov API responses"""
cache_key: str
congress: int
law_number: int
# Cache metadata
cached_date: datetime
api_response: Dict[str, Any]
sponsor_found: bool = False
# Extracted sponsor (if found)
sponsor: Optional[Sponsor] = None
class RepositoryMetadata(BaseModel):
"""Overall metadata for the US Code git repository"""
created_date: datetime
last_updated: datetime
# Coverage
earliest_law: PublicLaw
latest_law: PublicLaw
total_laws: int
total_commits: int
# Data sources
congress_api_key_used: bool
house_source_verified: bool
# Repository info
repository_path: Path
total_size: Optional[int] = None
# Utility functions for model creation
def create_sponsor_from_congress_api(api_data: Dict[str, Any]) -> Sponsor:
"""Create Sponsor from Congress.gov API response"""
return Sponsor(
bioguide_id=api_data.get('bioguideId'),
title=api_data.get('title', ''),
first_name=api_data.get('firstName', ''),
last_name=api_data.get('lastName', ''),
full_name=api_data.get('fullName', ''),
party=PoliticalParty(api_data.get('party', 'Unknown')),
state=api_data.get('state', ''),
district=api_data.get('district'),
chamber=CongressionalChamber.HOUSE if api_data.get('title') == 'Rep.' else CongressionalChamber.SENATE
)
def create_public_law_from_house_data(house_data: Dict[str, Any]) -> PublicLaw:
"""Create PublicLaw from House release point data"""
return PublicLaw(
congress=house_data['congress'],
law_number=house_data['law'],
enacted_date=datetime.strptime(house_data['date'], '%m/%d/%Y').date(),
release_path=house_data['releasePath'],
affected_titles=house_data['affectedTitles']
)