# Exercise 3: Complete Transformation Workflow - SOLUTION
# Module 3: Data Wrangling with dplyr
# ============================================================================

# Load required packages
library(dplyr)
library(lubridate)
library(data.table)
library(here)
library(janitor)

# Load data from Intermediate folder
panel_vat <- fread(here("Data", "Intermediate", "panel_vat.csv"), cmd = FALSE)
panel_cit <- fread(here("Data", "Intermediate", "panel_cit.csv"), cmd = FALSE)
dt_firms <- fread(here("Data", "Intermediate", "dt_firms.csv"), cmd = FALSE)

# Convert date columns
panel_vat$declaration_date <- as.Date(panel_vat$declaration_date)
panel_cit$declaration_date <- as.Date(panel_cit$declaration_date)

# ============================================================================
# TASK 1: TRANSFORM PANEL_VAT
# ============================================================================

panel_vat_clean <- panel_vat %>%
  mutate(
    filing_year = year(declaration_date),
    filing_quarter = quarter(declaration_date),
    filing_month = month(declaration_date)
  ) %>%
  mutate(
    net_vat = vat_outputs - vat_inputs,
    vat_ratio = vat_inputs / vat_outputs
  ) %>%
  mutate(
    is_refund = if_else(net_vat < 0, TRUE, FALSE),
    large_taxpayer = if_else(vat_outputs > 40000, TRUE, FALSE),
    high_ratio = if_else(vat_ratio > 0.9, TRUE, FALSE, missing = FALSE)
  ) %>%
  arrange(firm_id, declaration_date) %>%
  group_by(firm_id) %>%
  mutate(days_since_last = as.numeric(declaration_date - lag(declaration_date))) %>%
  ungroup()

# Save to Clean folder
fwrite(panel_vat_clean, here("Data", "Clean", "panel_vat_clean.csv"))

# ============================================================================
# TASK 2: TRANSFORM PANEL_CIT
# ============================================================================

panel_cit_clean <- panel_cit %>%
  mutate(
    filing_year = year(declaration_date),
    filing_quarter = quarter(declaration_date)
  ) %>%
  mutate(
    effective_tax_rate = tax_paid / taxable_income
  ) %>%
  mutate(
    has_adjustments = if_else(adjustments != 0, TRUE, FALSE)
  )

# Save to Clean folder
fwrite(panel_cit_clean, here("Data", "Clean", "panel_cit_clean.csv"))

# ============================================================================
# TASK 3: TRANSFORM DT_FIRMS
# ============================================================================

dt_firms_clean <- dt_firms %>%
  clean_names()

# Save to Clean folder
fwrite(dt_firms_clean, here("Data", "Clean", "dt_firms_clean.csv"))

# ============================================================================
# TASK 4: VERIFY YOUR WORK
# ============================================================================

# Check that all files exist
file.exists(here("Data", "Clean", "panel_vat_clean.csv"))
file.exists(here("Data", "Clean", "panel_cit_clean.csv"))
file.exists(here("Data", "Clean", "dt_firms_clean.csv"))

# Load files back and verify columns
vat_verify <- fread(here("Data", "Clean", "panel_vat_clean.csv"), cmd = FALSE)
cit_verify <- fread(here("Data", "Clean", "panel_cit_clean.csv"), cmd = FALSE)
firms_verify <- fread(here("Data", "Clean", "dt_firms_clean.csv"), cmd = FALSE)

# Check column names
names(vat_verify)
names(cit_verify)
names(firms_verify)

# Use glimpse() to verify the structure
glimpse(vat_verify)
glimpse(cit_verify)
glimpse(firms_verify)

# ============================================================================
# END OF EXERCISE 3 SOLUTION
# ============================================================================
