""" Data models for US Code git repository system. Provides normalized, validated structures for all legislative data. """ from datetime import datetime, date from typing import List, Optional, Dict, Any, Literal from pathlib import Path from pydantic import BaseModel, Field, field_validator, HttpUrl from enum import Enum class CongressionalChamber(str, Enum): """Congressional chambers""" HOUSE = "House" SENATE = "Senate" class PoliticalParty(str, Enum): """Political parties (expandable)""" DEMOCRATIC = "D" REPUBLICAN = "R" INDEPENDENT = "I" LIBERTARIAN = "L" GREEN = "G" UNKNOWN = "Unknown" class BillType(str, Enum): """Types of congressional bills""" HOUSE_BILL = "HR" SENATE_BILL = "S" HOUSE_JOINT_RESOLUTION = "HJR" SENATE_JOINT_RESOLUTION = "SJR" HOUSE_CONCURRENT_RESOLUTION = "HCR" SENATE_CONCURRENT_RESOLUTION = "SCR" HOUSE_RESOLUTION = "HRES" SENATE_RESOLUTION = "SRES" class LegislativeAction(BaseModel): """A single legislative action on a bill""" date: date action_code: Optional[str] = None text: str chamber: Optional[CongressionalChamber] = None class Config: use_enum_values = True class Sponsor(BaseModel): """Congressional member who sponsors legislation""" bioguide_id: Optional[str] = Field(None, description="Biographical Directory ID") title: str = Field(..., description="Rep. or Sen.") first_name: str last_name: str full_name: str party: PoliticalParty state: str = Field(..., min_length=2, max_length=2, description="Two-letter state code") district: Optional[int] = Field(None, description="House district number (if applicable)") chamber: CongressionalChamber # Computed fields @property def email(self) -> str: """Generate congressional email address""" first = self.first_name.lower().replace(" ", "") last = self.last_name.lower().replace(" ", "") domain = "house.gov" if self.chamber == CongressionalChamber.HOUSE else "senate.gov" return f"{first}.{last}@{domain}" @property def formal_name(self) -> str: """Full formal name with title and party""" return f"{self.title} {self.full_name} ({self.party}-{self.state})" class Config: use_enum_values = True class Bill(BaseModel): """Congressional bill that may become a public law""" congress: int = Field(..., ge=1, description="Congressional session number") bill_type: BillType number: int = Field(..., ge=1, description="Bill number within type/congress") title: Optional[str] = None short_title: Optional[str] = None # Sponsorship primary_sponsor: Optional["Sponsor"] = None cosponsors: List["Sponsor"] = Field(default_factory=lambda: []) # Legislative process introduced_date: Optional[date] = None actions: List["LegislativeAction"] = Field(default_factory=lambda: []) # External IDs congress_gov_url: Optional[HttpUrl] = None @property def bill_id(self) -> str: """Unique bill identifier""" return f"{self.bill_type.value}{self.number}" @property def full_id(self) -> str: """Full bill identifier with congress""" return f"{self.congress}-{self.bill_id}" class Config: use_enum_values = True class PublicLaw(BaseModel): """A bill that has been enacted into public law""" congress: int = Field(..., ge=1, description="Congressional session number") law_number: int = Field(..., ge=1, description="Public law number within congress") # Enactment details enacted_date: date signed_date: Optional[date] = None # Source bill (if known) source_bill: Optional[Bill] = None # US Code impact affected_titles: List[int] = Field(default_factory=lambda: [], description="US Code titles affected") # House release point data release_path: str = Field(..., description="House download path (e.g., '119/26not21')") # Metadata title: Optional[str] = None description: Optional[str] = None @property def public_law_id(self) -> str: """Standard public law identifier""" return f"PL {self.congress}-{self.law_number}" @property def formal_citation(self) -> str: """Formal legal citation""" return f"Public Law {self.congress}-{self.law_number:03d}" @field_validator('affected_titles') @classmethod def validate_titles(cls, v: List[int]) -> List[int]: """Validate US Code title numbers""" for title in v: if not (1 <= title <= 54): raise ValueError(f"Invalid US Code title: {title}") return sorted(v) class Config: use_enum_values = True class USCodeRelease(BaseModel): """A specific release of the US Code from House sources""" public_law: PublicLaw # Download metadata download_url: str download_date: Optional[datetime] = None file_size: Optional[int] = None file_hash: Optional[str] = Field(None, description="SHA-256 hash") # Extraction metadata extracted_date: Optional[datetime] = None extraction_path: Optional[Path] = None file_count: Optional[int] = None # Git metadata git_commit_hash: Optional[str] = None git_tag: Optional[str] = None @property def release_filename(self) -> str: """Standard filename for this release""" return self.public_law.release_path.replace("/", "-") class CongressionalSession(BaseModel): """Information about a congressional session""" congress: int = Field(..., ge=1) session: Literal[1, 2] = Field(..., description="1st or 2nd session") start_date: date end_date: Optional[date] = None # Leadership house_speaker: Optional[Sponsor] = None senate_majority_leader: Optional[Sponsor] = None house_majority_leader: Optional[Sponsor] = None senate_minority_leader: Optional[Sponsor] = None # Party control house_majority_party: Optional[PoliticalParty] = None senate_majority_party: Optional[PoliticalParty] = None @property def session_id(self) -> str: """Session identifier""" return f"{self.congress}-{self.session}" @property def formal_name(self) -> str: """Formal session name""" ordinal = "1st" if self.session == 1 else "2nd" return f"{self.congress}th Congress, {ordinal} Session" class Config: use_enum_values = True class GitCommitMetadata(BaseModel): """Metadata for git commits in the US Code repository""" public_law: PublicLaw # Git data commit_hash: str tag_name: str author_name: str author_email: str commit_date: datetime message: str # File changes files_changed: int lines_added: int lines_deleted: int # Repository state repository_path: Path is_initial_commit: bool = False class APICache(BaseModel): """Cache entry for Congress.gov API responses""" cache_key: str congress: int law_number: int # Cache metadata cached_date: datetime api_response: Dict[str, Any] sponsor_found: bool = False # Extracted sponsor (if found) sponsor: Optional[Sponsor] = None class RepositoryMetadata(BaseModel): """Overall metadata for the US Code git repository""" created_date: datetime last_updated: datetime # Coverage earliest_law: PublicLaw latest_law: PublicLaw total_laws: int total_commits: int # Data sources congress_api_key_used: bool house_source_verified: bool # Repository info repository_path: Path total_size: Optional[int] = None # Utility functions for model creation def create_sponsor_from_congress_api(api_data: Dict[str, Any]) -> Sponsor: """Create Sponsor from Congress.gov API response""" return Sponsor( bioguide_id=api_data.get('bioguideId'), title=api_data.get('title', ''), first_name=api_data.get('firstName', ''), last_name=api_data.get('lastName', ''), full_name=api_data.get('fullName', ''), party=PoliticalParty(api_data.get('party', 'Unknown')), state=api_data.get('state', ''), district=api_data.get('district'), chamber=CongressionalChamber.HOUSE if api_data.get('title') == 'Rep.' else CongressionalChamber.SENATE ) def create_public_law_from_house_data(house_data: Dict[str, Any]) -> PublicLaw: """Create PublicLaw from House release point data""" return PublicLaw( congress=house_data['congress'], law_number=house_data['law'], enacted_date=datetime.strptime(house_data['date'], '%m/%d/%Y').date(), release_path=house_data['releasePath'], affected_titles=house_data['affectedTitles'] )