Code sketch. Download cache working.
This commit is contained in:
300
models.py
Normal file
300
models.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
Data models for US Code git repository system.
|
||||
Provides normalized, validated structures for all legislative data.
|
||||
"""
|
||||
|
||||
from datetime import datetime, date
|
||||
from typing import List, Optional, Dict, Any, Literal
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel, Field, field_validator, HttpUrl
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class CongressionalChamber(str, Enum):
|
||||
"""Congressional chambers"""
|
||||
HOUSE = "House"
|
||||
SENATE = "Senate"
|
||||
|
||||
|
||||
class PoliticalParty(str, Enum):
|
||||
"""Political parties (expandable)"""
|
||||
DEMOCRATIC = "D"
|
||||
REPUBLICAN = "R"
|
||||
INDEPENDENT = "I"
|
||||
LIBERTARIAN = "L"
|
||||
GREEN = "G"
|
||||
UNKNOWN = "Unknown"
|
||||
|
||||
|
||||
class BillType(str, Enum):
|
||||
"""Types of congressional bills"""
|
||||
HOUSE_BILL = "HR"
|
||||
SENATE_BILL = "S"
|
||||
HOUSE_JOINT_RESOLUTION = "HJR"
|
||||
SENATE_JOINT_RESOLUTION = "SJR"
|
||||
HOUSE_CONCURRENT_RESOLUTION = "HCR"
|
||||
SENATE_CONCURRENT_RESOLUTION = "SCR"
|
||||
HOUSE_RESOLUTION = "HRES"
|
||||
SENATE_RESOLUTION = "SRES"
|
||||
|
||||
|
||||
class LegislativeAction(BaseModel):
|
||||
"""A single legislative action on a bill"""
|
||||
date: date
|
||||
action_code: Optional[str] = None
|
||||
text: str
|
||||
chamber: Optional[CongressionalChamber] = None
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class Sponsor(BaseModel):
|
||||
"""Congressional member who sponsors legislation"""
|
||||
bioguide_id: Optional[str] = Field(None, description="Biographical Directory ID")
|
||||
title: str = Field(..., description="Rep. or Sen.")
|
||||
first_name: str
|
||||
last_name: str
|
||||
full_name: str
|
||||
party: PoliticalParty
|
||||
state: str = Field(..., min_length=2, max_length=2, description="Two-letter state code")
|
||||
district: Optional[int] = Field(None, description="House district number (if applicable)")
|
||||
chamber: CongressionalChamber
|
||||
|
||||
# Computed fields
|
||||
@property
|
||||
def email(self) -> str:
|
||||
"""Generate congressional email address"""
|
||||
first = self.first_name.lower().replace(" ", "")
|
||||
last = self.last_name.lower().replace(" ", "")
|
||||
domain = "house.gov" if self.chamber == CongressionalChamber.HOUSE else "senate.gov"
|
||||
return f"{first}.{last}@{domain}"
|
||||
|
||||
@property
|
||||
def formal_name(self) -> str:
|
||||
"""Full formal name with title and party"""
|
||||
return f"{self.title} {self.full_name} ({self.party}-{self.state})"
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class Bill(BaseModel):
|
||||
"""Congressional bill that may become a public law"""
|
||||
congress: int = Field(..., ge=1, description="Congressional session number")
|
||||
bill_type: BillType
|
||||
number: int = Field(..., ge=1, description="Bill number within type/congress")
|
||||
title: Optional[str] = None
|
||||
short_title: Optional[str] = None
|
||||
|
||||
# Sponsorship
|
||||
primary_sponsor: Optional["Sponsor"] = None
|
||||
cosponsors: List["Sponsor"] = Field(default_factory=lambda: [])
|
||||
|
||||
# Legislative process
|
||||
introduced_date: Optional[date] = None
|
||||
actions: List["LegislativeAction"] = Field(default_factory=lambda: [])
|
||||
|
||||
# External IDs
|
||||
congress_gov_url: Optional[HttpUrl] = None
|
||||
|
||||
@property
|
||||
def bill_id(self) -> str:
|
||||
"""Unique bill identifier"""
|
||||
return f"{self.bill_type.value}{self.number}"
|
||||
|
||||
@property
|
||||
def full_id(self) -> str:
|
||||
"""Full bill identifier with congress"""
|
||||
return f"{self.congress}-{self.bill_id}"
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class PublicLaw(BaseModel):
|
||||
"""A bill that has been enacted into public law"""
|
||||
congress: int = Field(..., ge=1, description="Congressional session number")
|
||||
law_number: int = Field(..., ge=1, description="Public law number within congress")
|
||||
|
||||
# Enactment details
|
||||
enacted_date: date
|
||||
signed_date: Optional[date] = None
|
||||
|
||||
# Source bill (if known)
|
||||
source_bill: Optional[Bill] = None
|
||||
|
||||
# US Code impact
|
||||
affected_titles: List[int] = Field(default_factory=lambda: [], description="US Code titles affected")
|
||||
|
||||
# House release point data
|
||||
release_path: str = Field(..., description="House download path (e.g., '119/26not21')")
|
||||
|
||||
# Metadata
|
||||
title: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
@property
|
||||
def public_law_id(self) -> str:
|
||||
"""Standard public law identifier"""
|
||||
return f"PL {self.congress}-{self.law_number}"
|
||||
|
||||
@property
|
||||
def formal_citation(self) -> str:
|
||||
"""Formal legal citation"""
|
||||
return f"Public Law {self.congress}-{self.law_number:03d}"
|
||||
|
||||
@field_validator('affected_titles')
|
||||
@classmethod
|
||||
def validate_titles(cls, v: List[int]) -> List[int]:
|
||||
"""Validate US Code title numbers"""
|
||||
for title in v:
|
||||
if not (1 <= title <= 54):
|
||||
raise ValueError(f"Invalid US Code title: {title}")
|
||||
return sorted(v)
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class USCodeRelease(BaseModel):
|
||||
"""A specific release of the US Code from House sources"""
|
||||
public_law: PublicLaw
|
||||
|
||||
# Download metadata
|
||||
download_url: str
|
||||
download_date: Optional[datetime] = None
|
||||
file_size: Optional[int] = None
|
||||
file_hash: Optional[str] = Field(None, description="SHA-256 hash")
|
||||
|
||||
# Extraction metadata
|
||||
extracted_date: Optional[datetime] = None
|
||||
extraction_path: Optional[Path] = None
|
||||
file_count: Optional[int] = None
|
||||
|
||||
# Git metadata
|
||||
git_commit_hash: Optional[str] = None
|
||||
git_tag: Optional[str] = None
|
||||
|
||||
@property
|
||||
def release_filename(self) -> str:
|
||||
"""Standard filename for this release"""
|
||||
return self.public_law.release_path.replace("/", "-")
|
||||
|
||||
|
||||
class CongressionalSession(BaseModel):
|
||||
"""Information about a congressional session"""
|
||||
congress: int = Field(..., ge=1)
|
||||
session: Literal[1, 2] = Field(..., description="1st or 2nd session")
|
||||
start_date: date
|
||||
end_date: Optional[date] = None
|
||||
|
||||
# Leadership
|
||||
house_speaker: Optional[Sponsor] = None
|
||||
senate_majority_leader: Optional[Sponsor] = None
|
||||
house_majority_leader: Optional[Sponsor] = None
|
||||
senate_minority_leader: Optional[Sponsor] = None
|
||||
|
||||
# Party control
|
||||
house_majority_party: Optional[PoliticalParty] = None
|
||||
senate_majority_party: Optional[PoliticalParty] = None
|
||||
|
||||
@property
|
||||
def session_id(self) -> str:
|
||||
"""Session identifier"""
|
||||
return f"{self.congress}-{self.session}"
|
||||
|
||||
@property
|
||||
def formal_name(self) -> str:
|
||||
"""Formal session name"""
|
||||
ordinal = "1st" if self.session == 1 else "2nd"
|
||||
return f"{self.congress}th Congress, {ordinal} Session"
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class GitCommitMetadata(BaseModel):
|
||||
"""Metadata for git commits in the US Code repository"""
|
||||
public_law: PublicLaw
|
||||
|
||||
# Git data
|
||||
commit_hash: str
|
||||
tag_name: str
|
||||
author_name: str
|
||||
author_email: str
|
||||
commit_date: datetime
|
||||
message: str
|
||||
|
||||
# File changes
|
||||
files_changed: int
|
||||
lines_added: int
|
||||
lines_deleted: int
|
||||
|
||||
# Repository state
|
||||
repository_path: Path
|
||||
is_initial_commit: bool = False
|
||||
|
||||
|
||||
class APICache(BaseModel):
|
||||
"""Cache entry for Congress.gov API responses"""
|
||||
cache_key: str
|
||||
congress: int
|
||||
law_number: int
|
||||
|
||||
# Cache metadata
|
||||
cached_date: datetime
|
||||
api_response: Dict[str, Any]
|
||||
sponsor_found: bool = False
|
||||
|
||||
# Extracted sponsor (if found)
|
||||
sponsor: Optional[Sponsor] = None
|
||||
|
||||
|
||||
class RepositoryMetadata(BaseModel):
|
||||
"""Overall metadata for the US Code git repository"""
|
||||
created_date: datetime
|
||||
last_updated: datetime
|
||||
|
||||
# Coverage
|
||||
earliest_law: PublicLaw
|
||||
latest_law: PublicLaw
|
||||
total_laws: int
|
||||
total_commits: int
|
||||
|
||||
# Data sources
|
||||
congress_api_key_used: bool
|
||||
house_source_verified: bool
|
||||
|
||||
# Repository info
|
||||
repository_path: Path
|
||||
total_size: Optional[int] = None
|
||||
|
||||
|
||||
# Utility functions for model creation
|
||||
|
||||
def create_sponsor_from_congress_api(api_data: Dict[str, Any]) -> Sponsor:
|
||||
"""Create Sponsor from Congress.gov API response"""
|
||||
return Sponsor(
|
||||
bioguide_id=api_data.get('bioguideId'),
|
||||
title=api_data.get('title', ''),
|
||||
first_name=api_data.get('firstName', ''),
|
||||
last_name=api_data.get('lastName', ''),
|
||||
full_name=api_data.get('fullName', ''),
|
||||
party=PoliticalParty(api_data.get('party', 'Unknown')),
|
||||
state=api_data.get('state', ''),
|
||||
district=api_data.get('district'),
|
||||
chamber=CongressionalChamber.HOUSE if api_data.get('title') == 'Rep.' else CongressionalChamber.SENATE
|
||||
)
|
||||
|
||||
|
||||
def create_public_law_from_house_data(house_data: Dict[str, Any]) -> PublicLaw:
|
||||
"""Create PublicLaw from House release point data"""
|
||||
return PublicLaw(
|
||||
congress=house_data['congress'],
|
||||
law_number=house_data['law'],
|
||||
enacted_date=datetime.strptime(house_data['date'], '%m/%d/%Y').date(),
|
||||
release_path=house_data['releasePath'],
|
||||
affected_titles=house_data['affectedTitles']
|
||||
)
|
||||
|
Reference in New Issue
Block a user