From bbb9fcf3ad8788469751e0e245a0b18866b77e21 Mon Sep 17 00:00:00 2001 From: Quildra Date: Wed, 14 Sep 2022 21:50:08 +0100 Subject: [PATCH] New Tests --- aitextgen_test.py | 38 +-------- aitextgen_train.py | 38 +++++++++ monsters.txt | 200 +++++++++++++++++++++++++++++++++++++++++++++ spelllist.txt | 126 ++++++++++++++++++++++++++++ 4 files changed, 367 insertions(+), 35 deletions(-) create mode 100644 aitextgen_train.py create mode 100644 monsters.txt create mode 100644 spelllist.txt diff --git a/aitextgen_test.py b/aitextgen_test.py index e88aeb7..a60ba02 100644 --- a/aitextgen_test.py +++ b/aitextgen_test.py @@ -1,41 +1,9 @@ -from aitextgen.TokenDataset import TokenDataset -from aitextgen.tokenizers import train_tokenizer -from aitextgen.utils import GPT2ConfigCPU -#from aitextgen.utils import GPT2ConfigGPU from aitextgen import aitextgen if __name__ == '__main__': - # The name of the downloaded Shakespeare text for training - file_name = "classes.txt" - - # Train a custom BPE Tokenizer on the downloaded text - # This will save one file: `aitextgen.tokenizer.json`, which contains the - # information needed to rebuild the tokenizer. - train_tokenizer(file_name) - tokenizer_file = "aitextgen.tokenizer.json" - - # GPT2ConfigCPU is a mini variant of GPT-2 optimized for CPU-training - # e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2. - config = GPT2ConfigCPU() - #config = GPT2ConfigGPU() - - # Instantiate aitextgen using the created tokenizer and config - ai = aitextgen(tokenizer_file=tokenizer_file, config=config, to_gpu=True) - - # You can build datasets for training by creating TokenDatasets, - # which automatically processes the dataset with the appropriate size. - data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64) - - # Train the model! It will save pytorch_model.bin periodically and after completion to the `trained_model` folder. - # On a 2020 8-core iMac, this took ~25 minutes to run. - ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000) - - # Generate text from it! - ai.generate(10) - # With your trained model, you can reload the model at any time by # providing the folder containing the pytorch_model.bin model weights + the config, and providing the tokenizer. - #ai2 = aitextgen(model_folder="trained_model", - #tokenizer_file="aitextgen.tokenizer.json") + ai2 = aitextgen(model_folder="trained_model", + tokenizer_file="aitextgen.tokenizer.json") - #ai2.generate(10, prompt="ROMEO:") \ No newline at end of file + ai2.generate(10) \ No newline at end of file diff --git a/aitextgen_train.py b/aitextgen_train.py new file mode 100644 index 0000000..a783134 --- /dev/null +++ b/aitextgen_train.py @@ -0,0 +1,38 @@ +from aitextgen.TokenDataset import TokenDataset, merge_datasets +from aitextgen.tokenizers import train_tokenizer +from aitextgen.utils import GPT2ConfigCPU +from aitextgen import aitextgen + +if __name__ == '__main__': + # The name of the downloaded Shakespeare text for training + file_name = "classes.txt" + spell_file_name = "spelllist.txt" + monster_file_name = "monsters.txt" + + # Train a custom BPE Tokenizer on the downloaded text + # This will save one file: `aitextgen.tokenizer.json`, which contains the + # information needed to rebuild the tokenizer. + train_tokenizer(file_name) + tokenizer_file = "aitextgen.tokenizer.json" + + # GPT2ConfigCPU is a mini variant of GPT-2 optimized for CPU-training + # e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2. + config = GPT2ConfigCPU() + + # Instantiate aitextgen using the created tokenizer and config + ai = aitextgen(tokenizer_file=tokenizer_file, config=config, to_gpu=True) + + # You can build datasets for training by creating TokenDatasets, + # which automatically processes the dataset with the appropriate size. + class_data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True) + spell_data = TokenDataset(spell_file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True) + monster_data = TokenDataset(monster_file_name, tokenizer_file=tokenizer_file, block_size=64, line_by_line=True) + + data = merge_datasets([class_data, spell_data, monster_data]) + + # Train the model! It will save pytorch_model.bin periodically and after completion to the `trained_model` folder. + # On a 2020 8-core iMac, this took ~25 minutes to run. + ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000) + + # Generate text from it! + ai.generate(10) \ No newline at end of file diff --git a/monsters.txt b/monsters.txt new file mode 100644 index 0000000..6680d27 --- /dev/null +++ b/monsters.txt @@ -0,0 +1,200 @@ +Aboleth +Angel, Deva +Angel, Planetar +Angel, Solar +Animated Armor +Ankheg +Azer +Basilisk +Behir +Black Pudding +Bugbear +Bulette +Centaur +Chimera +Chuul +Cloaker +Cockatrice +Darkmantle +Demon, Balor +Demon, Dretch +Demon, Glabrezu +Demon, Hezrou +Demon, Marilith +Demon, Nalfeshnee +Demon, Quasit +Demon, Vrock +Devil, Barbed +Devil, Bearded +Devil, Bone +Devil, Chain +Devil, Erinyes +Devil, Horned +Devil, Ice +Devil, Imp +Devil, Lemure +Devil, Pit Fiend +Doppelganger +Dragon Turtle +Dragon, Black +Dragon, Black +Dragon, Black +Dragon, Black +Dragon, Blue +Dragon, Blue +Dragon, Blue +Dragon, Blue +Dragon, Brass +Dragon, Brass +Dragon, Brass +Dragon, Brass +Dragon, Bronze +Dragon, Bronze +Dragon, Bronze +Dragon, Bronze +Dragon, Copper +Dragon, Copper +Dragon, Copper +Dragon, Copper +Dragon, Gold +Dragon, Gold +Dragon, Gold +Dragon, Gold +Dragon, Green +Dragon, Green +Dragon, Green +Dragon, Green +Dragon, Red +Dragon, Red +Dragon, Red +Dragon, Red +Dragon, Silver +Dragon, Silver +Dragon, Silver +Dragon, Silver +Dragon, White +Dragon, White +Dragon, White +Dragon, White +Drider +Dryad +Duergar +Elemental, Air +Elemental, Earth +Elemental, Fire +Elemental, Water +Elf, Drow +Ettercap +Ettin +Flying Sword +Gargoyle +Gelatinous Cube +Genie, Djinni +Genie, Efreeti +Ghast +Ghost +Ghoul +Giant, Cloud +Giant, Fire +Giant, Frost +Giant, Hill +Giant, Stone +Giant, Storm +Gibbering Mouther +Gnoll +Gnome, Deep +Goblin +Golem, Clay +Golem, Flesh +Golem, Iron +Golem, Stone +Gorgon +Gray Ooze +Grick +Griffon +Grimlock +Hag, Green +Hag, Night +Hag, Sea +Half-Red Dragon Veteran +Harpy +Hell Hound +Hippogriff +Hobgoblin +Homunculus +Hydra +Invisible Stalker +Kobold +Kraken +Lamia +Lich +Lizardfolk +Magmin +Manticore +Medusa +Mephit, Dust +Mephit, Ice +Mephit, Magma +Mephit, Steam +Merfolk +Merrow +Mimic +Minotaur +Mummy +Mummy Lord +Naga, Guardian +Naga, Spirit +Nightmare +Ochre Jelly +Ogre +Ogre Zombie +Oni +Orc +Otyugh +Owlbear +Pegasus +Plesiosaurus +Pseudodragon +Purple Worm +Rakshasa +Remorhaz +Roc +Roper +Rug of Smothering +Rust Monster +Sahuagin +Salamander +Satyr +Shadow +Shambling Mound +Shield Guardian +Shrieker +Skeleton +Skeleton, Minotaur +Skeleton, Warhorse +Specter +Sphinx, Androsphinx +Sphinx, Gynosphinx +Sprite +Stirge +Succubus/Incubus +Tarrasque +Treant +Triceratops +Troll +Tyrannosaurus Rex +Unicorn +Vampire +Vampire Spawn +Violet Fungus +Werebear +Wereboar +Wererat +Weretiger +Werewolf +Wight +Will-o’-Wisp +Wraith +Wyvern +Xorn +Zombie \ No newline at end of file diff --git a/spelllist.txt b/spelllist.txt new file mode 100644 index 0000000..56dc35d --- /dev/null +++ b/spelllist.txt @@ -0,0 +1,126 @@ +Acid Splash +Aid +Antimagic Field +Arcane Eye +Arcane Lock +Astral Projection +Augury +Beacon of Hope +Blade Barrier +Bless +Blur +Burning Hands +Chain Lightning +Charm Person +Command +Commune +Comprehend Languages +Cone of Cold +Counterspell +Cure Wounds +Dancing Lights +Darkness +Death Ward +Delayed Blast Fireball +Detect Magic +Dimension Door +Disguise Self +Disintegrate +Dispel Magic +Divination +Dominate Monster +Dominate Person +Dream +Earthquake +Etherealness +Find the Path +Finger of Death +Fireball +Fire Bolt +Fire Storm +Flame Strike +Flaming Sphere +Fly +Foresight +Freedom of Movement +Gate +Globe of Invulnerability +Greater Invisibility +Greater Restoration +Guardian of Faith +Guidance +Guiding Bolt +Harm +Haste +Heal +Healing Word +Heroes’ Feast +Hold Person +Holy Aura +Ice Storm +Identify +Imprisonment +Inflict Wounds +Invisibility +Knock +Lesser Restoration +Levitate +Light +Lightning Bolt +Locate Creature +Mage Armor +Mage Hand +Magic Missile +Magic Weapon +Major Image +Mass Cure Wounds +Mass Heal +Mass Healing Word +Mass Suggestion +Maze +Meteor Swarm +Minor Illusion +Misty Step +Mordenkainen’s Sword +Otto’s Irresistible Dance +Passwall +Poison Spray +Power Word Kill +Power Word Stun +Prayer of Healing +Prestidigitation +Protection from Energy +Raise Dead +Ray of Frost +Regenerate +Remove Curse +Resistance +Resurrection +Revivify +Sacred Flame +Sanctuary +Shatter +Shield +Shield of Faith +Shocking Grasp +Silence +Silent Image +Sleep +Spare the Dying +Speak with Dead +Spider Climb +Spirit Guardians +Spiritual Weapon +Stoneskin +Suggestion +Sunburst +Teleport +Thaumaturgy +Thunderwave +Time Stop +True Resurrection +True Seeing +Wall of Fire +Wall of Stone +Warding Bond +Web