PracticeQuestionExtract.py

PracticeQuestionExtract.py · 1.9 KiB · Python Raw

1	import nbformat
2	import glob
3	import re
4
5	practiceHeadingPattern = re.compile(r'\\practice\s+\w+\\', re.IGNORECASE)
6
7	# Step 1: Locate all notebook files in the current directory
8	notebook_files = glob.glob("*.ipynb")
9
10	# A list to store matching markdown cells
11	collected_cells = []
12
13	in_practice_section = False
14
15	# Step 2: Loop through each notebook
16	for nb_file in notebook_files:
17	nb = nbformat.read(nb_file, as_version=4)
18
19	# Step 3: Check each cell in the notebook
20	for cell in nb.cells:
21	# Check if the cell is a markdown cell.
22	if cell.cell_type == "markdown":
23	# If the markdown cell is a heading that matches "practice question"
24	if practiceHeadingPattern.search(cell.source):
25	in_practice_section = True # Start capturing cells
26	collected_cells.append(cell)
27	continue # Move to the next cell
28
29	# If we're in a practice section and encounter another markdown heading...
30	if in_practice_section and (cell.source.lstrip().startswith("*") or cell.source.lstrip().startswith("#")):
31	# Check if this new heading is not a practice question heading.
32	if not practiceHeadingPattern.search(cell.source):
33	in_practice_section = False # End the practice section capture
34
35	# If we are within a practice questions section, add the cell (whether code or markdown)
36	if in_practice_section:
37	collected_cells.append(cell)
38
39	# Step 4: Create a new notebook with the collected markdown cells
40	new_nb = nbformat.v4.new_notebook()
41	new_nb.cells = collected_cells
42
43	# Step 5: Save the new notebook to a file
44	output_filename = "extracted_practice_questions.ipynb"
45	with open(output_filename, "w", encoding="utf-8") as f:
46	nbformat.write(new_nb, f)
47
48	print(f"Extracted markdown cells have been saved to {output_filename}")
49