Last active 3 months ago

thegreekgeek's Avatar thegreekgeek revised this gist 3 months ago. Go to revision

1 file changed, 48 insertions

PracticeQuestionExtract.py(file created)

@@ -0,0 +1,48 @@
1 + import nbformat
2 + import glob
3 + import re
4 +
5 + practiceHeadingPattern = re.compile(r'\*\*practice\s+\w+\*\*', re.IGNORECASE)
6 +
7 + # Step 1: Locate all notebook files in the current directory
8 + notebook_files = glob.glob("*.ipynb")
9 +
10 + # A list to store matching markdown cells
11 + collected_cells = []
12 +
13 + in_practice_section = False
14 +
15 + # Step 2: Loop through each notebook
16 + for nb_file in notebook_files:
17 + nb = nbformat.read(nb_file, as_version=4)
18 +
19 + # Step 3: Check each cell in the notebook
20 + for cell in nb.cells:
21 + # Check if the cell is a markdown cell.
22 + if cell.cell_type == "markdown":
23 + # If the markdown cell is a heading that matches "practice question"
24 + if practiceHeadingPattern.search(cell.source):
25 + in_practice_section = True # Start capturing cells
26 + collected_cells.append(cell)
27 + continue # Move to the next cell
28 +
29 + # If we're in a practice section and encounter another markdown heading...
30 + if in_practice_section and (cell.source.lstrip().startswith("*") or cell.source.lstrip().startswith("#")):
31 + # Check if this new heading is *not* a practice question heading.
32 + if not practiceHeadingPattern.search(cell.source):
33 + in_practice_section = False # End the practice section capture
34 +
35 + # If we are within a practice questions section, add the cell (whether code or markdown)
36 + if in_practice_section:
37 + collected_cells.append(cell)
38 +
39 + # Step 4: Create a new notebook with the collected markdown cells
40 + new_nb = nbformat.v4.new_notebook()
41 + new_nb.cells = collected_cells
42 +
43 + # Step 5: Save the new notebook to a file
44 + output_filename = "extracted_practice_questions.ipynb"
45 + with open(output_filename, "w", encoding="utf-8") as f:
46 + nbformat.write(new_nb, f)
47 +
48 + print(f"Extracted markdown cells have been saved to {output_filename}")
Newer Older