From bbb9fcf3ad8788469751e0e245a0b18866b77e21 Mon Sep 17 00:00:00 2001
From: Quildra <Quildra@gmail.com>
Date: Wed, 14 Sep 2022 21:50:08 +0100
Subject: [PATCH] New Tests

---
 aitextgen_test.py  |  38 +--------
 aitextgen_train.py |  38 +++++++++
 monsters.txt       | 200 +++++++++++++++++++++++++++++++++++++++++++++
 spelllist.txt      | 126 ++++++++++++++++++++++++++++
 4 files changed, 367 insertions(+), 35 deletions(-)
 create mode 100644 aitextgen_train.py
 create mode 100644 monsters.txt
 create mode 100644 spelllist.txt

diff --git a/aitextgen_test.py b/aitextgen_test.py
index e88aeb7..a60ba02 100644
--- a/aitextgen_test.py
+++ b/aitextgen_test.py
@@ -1,41 +1,9 @@
-from aitextgen.TokenDataset import TokenDataset
-from aitextgen.tokenizers import train_tokenizer
-from aitextgen.utils import GPT2ConfigCPU
-#from aitextgen.utils import GPT2ConfigGPU
 from aitextgen import aitextgen
 
 if __name__ == '__main__':
-    # The name of the downloaded Shakespeare text for training
-    file_name = "classes.txt"
-
-    # Train a custom BPE Tokenizer on the downloaded text
-    # This will save one file: `aitextgen.tokenizer.json`, which contains the
-    # information needed to rebuild the tokenizer.
-    train_tokenizer(file_name)
-    tokenizer_file = "aitextgen.tokenizer.json"
-
-    # GPT2ConfigCPU is a mini variant of GPT-2 optimized for CPU-training
-    # e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2.
-    config = GPT2ConfigCPU()
-    #config = GPT2ConfigGPU()
-
-    # Instantiate aitextgen using the created tokenizer and config
-    ai = aitextgen(tokenizer_file=tokenizer_file, config=config, to_gpu=True)
-
-    # You can build datasets for training by creating TokenDatasets,
-    # which automatically processes the dataset with the appropriate size.
-    data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64)
-
-    # Train the model! It will save pytorch_model.bin periodically and after completion to the `trained_model` folder.
-    # On a 2020 8-core iMac, this took ~25 minutes to run.
-    ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000)
-
-    # Generate text from it!
-    ai.generate(10)
-
     # With your trained model, you can reload the model at any time by
     # providing the folder containing the pytorch_model.bin model weights + the config, and providing the tokenizer.
-    #ai2 = aitextgen(model_folder="trained_model",
-                    #tokenizer_file="aitextgen.tokenizer.json")
+    ai2 = aitextgen(model_folder="trained_model",
+                    tokenizer_file="aitextgen.tokenizer.json")
 
-    #ai2.generate(10, prompt="ROMEO:")
\ No newline at end of file
+    ai2.generate(10)
\ No newline at end of file
diff --git a/aitextgen_train.py b/aitextgen_train.py
new file mode 100644
index 0000000..a783134
--- /dev/null
+++ b/aitextgen_train.py
@@ -0,0 +1,38 @@
+from aitextgen.TokenDataset import TokenDataset, merge_datasets
+from aitextgen.tokenizers import train_tokenizer
+from aitextgen.utils import GPT2ConfigCPU
+from aitextgen import aitextgen
+
+if __name__ == '__main__':
+    # The name of the downloaded Shakespeare text for training
+    file_name = "classes.txt"
+    spell_file_name = "spelllist.txt"
+    monster_file_name = "monsters.txt"
+
+    # Train a custom BPE Tokenizer on the downloaded text
+    # This will save one file: `aitextgen.tokenizer.json`, which contains the
+    # information needed to rebuild the tokenizer.
+    train_tokenizer(file_name)
+    tokenizer_file = "aitextgen.tokenizer.json"
+
+    # GPT2ConfigCPU is a mini variant of GPT-2 optimized for CPU-training
+    # e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2.
+    config = GPT2ConfigCPU()
+
+    # Instantiate aitextgen using the created tokenizer and config
+    ai = aitextgen(tokenizer_file=tokenizer_file, config=config, to_gpu=True)
+
+    # You can build datasets for training by creating TokenDatasets,
+    # which automatically processes the dataset with the appropriate size.
+    class_data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True)
+    spell_data = TokenDataset(spell_file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True)
+    monster_data = TokenDataset(monster_file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True)
+
+    data = merge_datasets([class_data, spell_data, monster_data])
+
+    # Train the model! It will save pytorch_model.bin periodically and after completion to the `trained_model` folder.
+    # On a 2020 8-core iMac, this took ~25 minutes to run.
+    ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000)
+
+    # Generate text from it!
+    ai.generate(10)
\ No newline at end of file
diff --git a/monsters.txt b/monsters.txt
new file mode 100644
index 0000000..6680d27
--- /dev/null
+++ b/monsters.txt
@@ -0,0 +1,200 @@
+Aboleth  
+Angel, Deva  
+Angel, Planetar  
+Angel, Solar  
+Animated Armor  
+Ankheg  
+Azer  
+Basilisk  
+Behir  
+Black Pudding  
+Bugbear  
+Bulette  
+Centaur  
+Chimera  
+Chuul  
+Cloaker  
+Cockatrice  
+Darkmantle  
+Demon, Balor  
+Demon, Dretch  
+Demon, Glabrezu  
+Demon, Hezrou  
+Demon, Marilith  
+Demon, Nalfeshnee  
+Demon, Quasit  
+Demon, Vrock  
+Devil, Barbed  
+Devil, Bearded  
+Devil, Bone  
+Devil, Chain  
+Devil, Erinyes  
+Devil, Horned  
+Devil, Ice  
+Devil, Imp  
+Devil, Lemure  
+Devil, Pit Fiend  
+Doppelganger  
+Dragon Turtle  
+Dragon, Black  
+Dragon, Black  
+Dragon, Black  
+Dragon, Black  
+Dragon, Blue  
+Dragon, Blue  
+Dragon, Blue  
+Dragon, Blue  
+Dragon, Brass  
+Dragon, Brass  
+Dragon, Brass  
+Dragon, Brass  
+Dragon, Bronze  
+Dragon, Bronze  
+Dragon, Bronze  
+Dragon, Bronze  
+Dragon, Copper  
+Dragon, Copper  
+Dragon, Copper  
+Dragon, Copper  
+Dragon, Gold  
+Dragon, Gold  
+Dragon, Gold  
+Dragon, Gold  
+Dragon, Green  
+Dragon, Green  
+Dragon, Green  
+Dragon, Green  
+Dragon, Red  
+Dragon, Red  
+Dragon, Red  
+Dragon, Red  
+Dragon, Silver  
+Dragon, Silver  
+Dragon, Silver  
+Dragon, Silver  
+Dragon, White  
+Dragon, White  
+Dragon, White  
+Dragon, White  
+Drider  
+Dryad  
+Duergar  
+Elemental, Air  
+Elemental, Earth  
+Elemental, Fire  
+Elemental, Water  
+Elf, Drow  
+Ettercap  
+Ettin  
+Flying Sword  
+Gargoyle  
+Gelatinous Cube  
+Genie, Djinni  
+Genie, Efreeti  
+Ghast  
+Ghost  
+Ghoul  
+Giant, Cloud  
+Giant, Fire  
+Giant, Frost  
+Giant, Hill  
+Giant, Stone  
+Giant, Storm  
+Gibbering Mouther  
+Gnoll  
+Gnome, Deep  
+Goblin  
+Golem, Clay  
+Golem, Flesh  
+Golem, Iron  
+Golem, Stone  
+Gorgon  
+Gray Ooze  
+Grick  
+Griffon  
+Grimlock  
+Hag, Green  
+Hag, Night  
+Hag, Sea  
+Half-Red Dragon Veteran  
+Harpy  
+Hell Hound  
+Hippogriff  
+Hobgoblin  
+Homunculus  
+Hydra  
+Invisible Stalker  
+Kobold  
+Kraken  
+Lamia  
+Lich  
+Lizardfolk  
+Magmin  
+Manticore  
+Medusa  
+Mephit, Dust  
+Mephit, Ice  
+Mephit, Magma  
+Mephit, Steam  
+Merfolk  
+Merrow  
+Mimic  
+Minotaur  
+Mummy  
+Mummy Lord  
+Naga, Guardian  
+Naga, Spirit  
+Nightmare  
+Ochre Jelly  
+Ogre  
+Ogre Zombie  
+Oni  
+Orc  
+Otyugh  
+Owlbear  
+Pegasus  
+Plesiosaurus  
+Pseudodragon  
+Purple Worm  
+Rakshasa  
+Remorhaz  
+Roc  
+Roper  
+Rug of Smothering  
+Rust Monster  
+Sahuagin  
+Salamander  
+Satyr  
+Shadow  
+Shambling Mound  
+Shield Guardian  
+Shrieker  
+Skeleton  
+Skeleton, Minotaur  
+Skeleton, Warhorse  
+Specter  
+Sphinx, Androsphinx  
+Sphinx, Gynosphinx  
+Sprite  
+Stirge  
+Succubus/Incubus  
+Tarrasque  
+Treant  
+Triceratops  
+Troll  
+Tyrannosaurus Rex  
+Unicorn  
+Vampire  
+Vampire Spawn  
+Violet Fungus  
+Werebear  
+Wereboar  
+Wererat  
+Weretiger  
+Werewolf  
+Wight  
+Will-o’-Wisp  
+Wraith  
+Wyvern  
+Xorn  
+Zombie  
\ No newline at end of file
diff --git a/spelllist.txt b/spelllist.txt
new file mode 100644
index 0000000..56dc35d
--- /dev/null
+++ b/spelllist.txt
@@ -0,0 +1,126 @@
+Acid Splash
+Aid
+Antimagic Field
+Arcane Eye
+Arcane Lock
+Astral Projection
+Augury
+Beacon of Hope
+Blade Barrier
+Bless
+Blur
+Burning Hands
+Chain Lightning
+Charm Person
+Command
+Commune
+Comprehend Languages
+Cone of Cold
+Counterspell
+Cure Wounds
+Dancing Lights
+Darkness
+Death Ward
+Delayed Blast Fireball
+Detect Magic
+Dimension Door
+Disguise Self
+Disintegrate
+Dispel Magic
+Divination
+Dominate Monster
+Dominate Person
+Dream
+Earthquake
+Etherealness
+Find the Path
+Finger of Death
+Fireball
+Fire Bolt
+Fire Storm
+Flame Strike
+Flaming Sphere
+Fly
+Foresight
+Freedom of Movement
+Gate
+Globe of Invulnerability
+Greater Invisibility
+Greater Restoration
+Guardian of Faith
+Guidance
+Guiding Bolt
+Harm
+Haste
+Heal
+Healing Word
+Heroes’ Feast
+Hold Person
+Holy Aura
+Ice Storm
+Identify
+Imprisonment
+Inflict Wounds
+Invisibility
+Knock
+Lesser Restoration
+Levitate
+Light
+Lightning Bolt
+Locate Creature
+Mage Armor
+Mage Hand
+Magic Missile
+Magic Weapon
+Major Image
+Mass Cure Wounds
+Mass Heal
+Mass Healing Word
+Mass Suggestion
+Maze
+Meteor Swarm
+Minor Illusion
+Misty Step
+Mordenkainen’s Sword
+Otto’s Irresistible Dance
+Passwall
+Poison Spray
+Power Word Kill
+Power Word Stun
+Prayer of Healing
+Prestidigitation
+Protection from Energy
+Raise Dead
+Ray of Frost
+Regenerate
+Remove Curse
+Resistance
+Resurrection
+Revivify
+Sacred Flame
+Sanctuary
+Shatter
+Shield
+Shield of Faith
+Shocking Grasp
+Silence
+Silent Image
+Sleep
+Spare the Dying
+Speak with Dead
+Spider Climb
+Spirit Guardians
+Spiritual Weapon
+Stoneskin
+Suggestion
+Sunburst
+Teleport
+Thaumaturgy
+Thunderwave
+Time Stop
+True Resurrection
+True Seeing
+Wall of Fire
+Wall of Stone
+Warding Bond
+Web