You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
683 B
25 lines
683 B
from PyPDF2 import PdfReader
|
|
import re
|
|
|
|
reader = PdfReader("2020-2021+Undergraduate+Catalog.pdf")
|
|
text = ""
|
|
class_titles = set()
|
|
|
|
page_num = 0
|
|
for page in reader.pages:
|
|
page_num = page_num + 1
|
|
if page_num >= 701:
|
|
text += page.extract_text() + "\n"
|
|
lines = text.splitlines()
|
|
for line in lines:
|
|
match = re.match(".+ \d{4} - (.+)", line)
|
|
if match != None:
|
|
#print("Found")
|
|
class_titles.add(match.group(1).strip())
|
|
|
|
#print(class_titles)
|
|
with open("classes.txt", "w") as file1:
|
|
for item in class_titles:
|
|
file1.write(item)
|
|
file1.write('\n')
|
|
file1.close()
|