You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

25 lines
683 B

from PyPDF2 import PdfReader
import re
reader = PdfReader("2020-2021+Undergraduate+Catalog.pdf")
text = ""
class_titles = set()
page_num = 0
for page in reader.pages:
page_num = page_num + 1
if page_num >= 701:
text += page.extract_text() + "\n"
lines = text.splitlines()
for line in lines:
match = re.match(".+ \d{4} - (.+)", line)
if match != None:
#print("Found")
class_titles.add(match.group(1).strip())
#print(class_titles)
with open("classes.txt", "w") as file1:
for item in class_titles:
file1.write(item)
file1.write('\n')
file1.close()