thegreekgeek revised this gist 3 months ago. Go to revision
1 file changed, 48 insertions
PracticeQuestionExtract.py(file created)
| @@ -0,0 +1,48 @@ | |||
| 1 | + | import nbformat | |
| 2 | + | import glob | |
| 3 | + | import re | |
| 4 | + | ||
| 5 | + | practiceHeadingPattern = re.compile(r'\*\*practice\s+\w+\*\*', re.IGNORECASE) | |
| 6 | + | ||
| 7 | + | # Step 1: Locate all notebook files in the current directory | |
| 8 | + | notebook_files = glob.glob("*.ipynb") | |
| 9 | + | ||
| 10 | + | # A list to store matching markdown cells | |
| 11 | + | collected_cells = [] | |
| 12 | + | ||
| 13 | + | in_practice_section = False | |
| 14 | + | ||
| 15 | + | # Step 2: Loop through each notebook | |
| 16 | + | for nb_file in notebook_files: | |
| 17 | + | nb = nbformat.read(nb_file, as_version=4) | |
| 18 | + | ||
| 19 | + | # Step 3: Check each cell in the notebook | |
| 20 | + | for cell in nb.cells: | |
| 21 | + | # Check if the cell is a markdown cell. | |
| 22 | + | if cell.cell_type == "markdown": | |
| 23 | + | # If the markdown cell is a heading that matches "practice question" | |
| 24 | + | if practiceHeadingPattern.search(cell.source): | |
| 25 | + | in_practice_section = True # Start capturing cells | |
| 26 | + | collected_cells.append(cell) | |
| 27 | + | continue # Move to the next cell | |
| 28 | + | ||
| 29 | + | # If we're in a practice section and encounter another markdown heading... | |
| 30 | + | if in_practice_section and (cell.source.lstrip().startswith("*") or cell.source.lstrip().startswith("#")): | |
| 31 | + | # Check if this new heading is *not* a practice question heading. | |
| 32 | + | if not practiceHeadingPattern.search(cell.source): | |
| 33 | + | in_practice_section = False # End the practice section capture | |
| 34 | + | ||
| 35 | + | # If we are within a practice questions section, add the cell (whether code or markdown) | |
| 36 | + | if in_practice_section: | |
| 37 | + | collected_cells.append(cell) | |
| 38 | + | ||
| 39 | + | # Step 4: Create a new notebook with the collected markdown cells | |
| 40 | + | new_nb = nbformat.v4.new_notebook() | |
| 41 | + | new_nb.cells = collected_cells | |
| 42 | + | ||
| 43 | + | # Step 5: Save the new notebook to a file | |
| 44 | + | output_filename = "extracted_practice_questions.ipynb" | |
| 45 | + | with open(output_filename, "w", encoding="utf-8") as f: | |
| 46 | + | nbformat.write(new_nb, f) | |
| 47 | + | ||
| 48 | + | print(f"Extracted markdown cells have been saved to {output_filename}") | |
Newer
Older