# ============================================================================
# Exercise 1: Data Import - TEMPLATE
# Module: Data Importing, Tidying and Writing
# ============================================================================

# INSTRUCTIONS:
# Complete the code below by filling in the blanks (___) and following
# the step-by-step guidance. Run each section to check your work!

# ============================================================================
# PART 1: Load Packages
# ============================================================================

# Step 1: Define the packages we need
# TODO: Create a vector with the package names
packages <- c("readxl", "dplyr", "tidyverse", "data.table", "here", "haven", "janitor")

# Step 2: Install and load pacman if needed
# TODO: Check if pacman is installed, if not install it
if (!require("pacman")) install.packages("pacman")

# Step 3: Load all packages using pacman
# TODO: Use p_load to load all packages
pacman::p_load(___, character.only = TRUE, install = TRUE)


# ============================================================================
# PART 2: Import Firm Characteristics (CSV)
# ============================================================================

# Step 4: Import firm_characteristics.csv using fread()
# TODO: Use fread() with here() to load the file from "../../data/raw/"
dt_firms <- fread(here("___", "___", "firm_characteristics.csv"))

# Step 5: Display the first 5 rows
# TODO: Use head() to show first 5 rows
head(___, ___)

# Step 6: Check the column names
# TODO: Use names() to see column names
names(___)

# Step 7: Clean the column names to snake_case
# TODO: Use clean_names() from janitor package
dt_firms <- clean_names(___)


# ============================================================================
# PART 3: Import VAT Declarations (STATA .dta)
# ============================================================================

# Step 8: Import vat_declarations.dta using read_dta()
# TODO: Use read_dta() with here() to load the file from "../../data/raw/"
panel_vat <- read_dta(here("___", "___", "vat_declarations.dta"))

# Step 9: Display the first 5 rows
# TODO: Use head() to show first 5 rows
head(___, ___)

# Step 10: Check the column names
# TODO: Use names() to see column names
names(___)


# ============================================================================
# PART 4: Import CIT Declarations (Excel .xlsx, sheet 2)
# ============================================================================

# Step 11: Import cit_declarations.xlsx (sheet 2) using read_excel()
# HINT: Use the sheet parameter to specify sheet 2
# TODO: Use read_excel() with here() and sheet = 2
panel_cit <- read_excel(here("___", "___", "cit_declarations.xlsx"), sheet = ___)

# Step 12: Display the first 5 rows
# TODO: Use head() to show first 5 rows
head(___, ___)

# Step 13: Check the column names
# TODO: Use names() to see column names
names(___)


# ============================================================================
# PART 5: BONUS - Ensure Consistent Naming
# ============================================================================

# Step 14: Check if firm ID columns have consistent names across datasets
# If panel_vat has "id_firm" instead of "firm_id", rename it
# TODO: Use rename() to change id_firm to firm_id (if needed)
# Uncomment and complete the line below if needed:
# panel_vat <- rename(panel_vat, firm_id = ___)


# ============================================================================
# CONGRATULATIONS!
# You've completed Exercise 1. Make sure all your code runs without errors.
# Compare your results with the solution script when you're done.
# ============================================================================
