Last active 3 months ago

PracticeQuestionExtract.py Raw
1import nbformat
2import glob
3import re
4
5practiceHeadingPattern = re.compile(r'\*\*practice\s+\w+\*\*', re.IGNORECASE)
6
7# Step 1: Locate all notebook files in the current directory
8notebook_files = glob.glob("*.ipynb")
9
10# A list to store matching markdown cells
11collected_cells = []
12
13in_practice_section = False
14
15# Step 2: Loop through each notebook
16for nb_file in notebook_files:
17 nb = nbformat.read(nb_file, as_version=4)
18
19 # Step 3: Check each cell in the notebook
20 for cell in nb.cells:
21 # Check if the cell is a markdown cell.
22 if cell.cell_type == "markdown":
23 # If the markdown cell is a heading that matches "practice question"
24 if practiceHeadingPattern.search(cell.source):
25 in_practice_section = True # Start capturing cells
26 collected_cells.append(cell)
27 continue # Move to the next cell
28
29 # If we're in a practice section and encounter another markdown heading...
30 if in_practice_section and (cell.source.lstrip().startswith("*") or cell.source.lstrip().startswith("#")):
31 # Check if this new heading is *not* a practice question heading.
32 if not practiceHeadingPattern.search(cell.source):
33 in_practice_section = False # End the practice section capture
34
35 # If we are within a practice questions section, add the cell (whether code or markdown)
36 if in_practice_section:
37 collected_cells.append(cell)
38
39# Step 4: Create a new notebook with the collected markdown cells
40new_nb = nbformat.v4.new_notebook()
41new_nb.cells = collected_cells
42
43# Step 5: Save the new notebook to a file
44output_filename = "extracted_practice_questions.ipynb"
45with open(output_filename, "w", encoding="utf-8") as f:
46 nbformat.write(new_nb, f)
47
48print(f"Extracted markdown cells have been saved to {output_filename}")
49