import nbformat
import glob
import re

practiceHeadingPattern = re.compile(r'\*\*practice\s+\w+\*\*', re.IGNORECASE)

# Step 1: Locate all notebook files in the current directory
notebook_files = glob.glob("*.ipynb")

# A list to store matching markdown cells
collected_cells = []

in_practice_section = False

# Step 2: Loop through each notebook
for nb_file in notebook_files:
    nb = nbformat.read(nb_file, as_version=4)
    
    # Step 3: Check each cell in the notebook
    for cell in nb.cells:
        # Check if the cell is a markdown cell.
        if cell.cell_type == "markdown":
            # If the markdown cell is a heading that matches "practice question"
            if practiceHeadingPattern.search(cell.source):
                in_practice_section = True  # Start capturing cells
                collected_cells.append(cell)
                continue  # Move to the next cell
            
            # If we're in a practice section and encounter another markdown heading...
            if in_practice_section and (cell.source.lstrip().startswith("*") or cell.source.lstrip().startswith("#")):
                # Check if this new heading is *not* a practice question heading.
                if not practiceHeadingPattern.search(cell.source):
                    in_practice_section = False  # End the practice section capture
            
        # If we are within a practice questions section, add the cell (whether code or markdown)
        if in_practice_section:
            collected_cells.append(cell)

# Step 4: Create a new notebook with the collected markdown cells
new_nb = nbformat.v4.new_notebook()
new_nb.cells = collected_cells

# Step 5: Save the new notebook to a file
output_filename = "extracted_practice_questions.ipynb"
with open(output_filename, "w", encoding="utf-8") as f:
    nbformat.write(new_nb, f)

print(f"Extracted markdown cells have been saved to {output_filename}")