# Exercise 2: Date Operations
# Module 3: Data Wrangling with dplyr
# ============================================================================

# Load required packages
library(dplyr)
library(lubridate)
library(data.table)
library(here)

# Load data from Intermediate folder
panel_vat <- fread(here("Data", "Intermediate", "panel_vat.csv"), cmd = FALSE)

# ============================================================================
# TASK 1: PARSE DATES
# ============================================================================

# Convert declaration_date to proper Date format using as.Date()
# TODO: Convert to Date


# Check the class to verify it's a Date
# TODO: Check class


# ============================================================================
# TASK 2: EXTRACT DATE COMPONENTS
# ============================================================================

# Create three new columns:
# - filing_year using year()
# - filing_quarter using quarter()
# - filing_month using month()
# Save as 'panel_vat_dates'
# TODO: Create date component columns


# Display the first few rows
# TODO: Display first rows


# ============================================================================
# TASK 3: CALCULATE DIFFERENCES
# ============================================================================

# For each firm, calculate days_since_last (days between consecutive declarations)
# Steps:
# 1. Sort by firm_id and declaration_date using arrange()
# 2. Group by firm_id
# 3. Use lag() to get previous declaration_date
# 4. Calculate difference as numeric
# 5. Ungroup
# Save as 'panel_vat_gaps'
# TODO: Calculate days since last declaration


# Display selected columns: firm_id, declaration_date, days_since_last
# Show first 10 rows
# TODO: Display results


# ============================================================================
# TASK 4: FILING ANALYSIS
# ============================================================================

# Calculate quarter_end, filing_deadline, and days_late
# Steps:
# 1. Calculate quarter_end using ceiling_date() minus 1 day
# 2. Calculate filing_deadline as quarter_end + 45 days
# 3. Calculate days_late as declaration_date - filing_deadline
# Save as 'panel_vat_deadlines'
# TODO: Create deadline columns


# Summarize: how many declarations were late?
# Calculate:
# - total_declarations
# - late_filers (where days_late > 0)
# - pct_late (percentage late)
# TODO: Summarize late filings


# ============================================================================
# TASK 5: RECENT FILERS
# ============================================================================

# Find firms that filed in the last 180 days from today()
# Use filter() and distinct()
# Save as 'recent_filers'
# TODO: Find recent filers


# Display the number of recent filers using nrow()
# TODO: Display count


# Display the first few firm_ids
# TODO: Display first firms


# ============================================================================
# END OF EXERCISE 2
# ============================================================================
