Skip to content

Commit

Permalink
GH-12: updated tutorials with new class names
Browse files Browse the repository at this point in the history
  • Loading branch information
aakbik authored and tabergma committed Jul 31, 2018
1 parent 3cf4cfd commit 2b72a3d
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ a pre-trained model and use it to predict tags for the sentence:

```python
from flair.data import Sentence
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

# make a sentence
sentence = Sentence('I love Berlin .')
Expand Down
12 changes: 0 additions & 12 deletions flair/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,10 +458,6 @@ def __init__(self, word_embeddings: List[TokenEmbeddings], reproject_words: bool

self.word_reprojection_map = torch.nn.Linear(self.__embedding_length, self.__embedding_length)

@property
def embedding_type(self):
return 'sentence-level'

@property
def embedding_length(self) -> int:
return self.__embedding_length
Expand Down Expand Up @@ -544,10 +540,6 @@ def __init__(self, word_embeddings: List[TokenEmbeddings], hidden_states=128, nu
bidirectional=self.bidirectional)
self.dropout = torch.nn.Dropout(0.5)

@property
def embedding_type(self):
return 'sentence-level'

@property
def embedding_length(self) -> int:
return self.__embedding_length
Expand Down Expand Up @@ -640,10 +632,6 @@ def __init__(self, charlm_embeddings: List[CharLMEmbeddings], detach: bool = Tru
def embedding_length(self) -> int:
return self._embedding_length

@property
def embedding_type(self):
return 'sentence-level'

def embed(self, sentences: List[Sentence]):

for embedding in self.embeddings:
Expand Down
79 changes: 42 additions & 37 deletions resources/docs/EXPERIMENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ Now, select 'ner' as the tag you wish to predict and init the embeddings you wis
The full code to get a state-of-the-art model for English NER is as follows:

```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -52,7 +53,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

# GloVe embeddings
WordEmbeddings('glove')
Expand All @@ -67,7 +68,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -79,9 +80,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-ner', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand All @@ -108,8 +109,9 @@ FastText embeddings (they work better on this dataset). The full code then is as


```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -125,7 +127,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('ft-crawl')
,
Expand All @@ -137,8 +139,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger

from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -150,9 +151,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-ner', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand All @@ -176,8 +177,9 @@ Once you have the data, reproduce our experiments exactly like for CoNLL-03, jus
FastText word embeddings and German contextual string embeddings. The full code then is as follows:

```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -193,7 +195,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('ft-german')
,
Expand All @@ -205,7 +207,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -217,9 +219,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-ner', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand All @@ -242,8 +244,9 @@ get the dataset and place train, test and dev data in `/resources/tasks/germeval
Once you have the data, reproduce our experiments exactly like for the German CoNLL-03:

```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -259,7 +262,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('ft-german')
,
Expand All @@ -271,7 +274,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -283,9 +286,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-ner', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand All @@ -311,8 +314,9 @@ so the algorithm knows that POS tags and not NER are to be predicted from this d


```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -328,7 +332,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('extvec')
,
Expand All @@ -340,7 +344,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -352,9 +356,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-pos', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand All @@ -378,8 +382,9 @@ Run the code with extvec embeddings and our proposed contextual string embedding
so the algorithm knows that chunking tags and not NER are to be predicted from this data.

```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -395,7 +400,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('extvec')
,
Expand All @@ -407,7 +412,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -419,9 +424,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=False)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)

trainer.train('resources/taggers/example-pos', mini_batch_size=32, max_epochs=150, save_model=True,
train_with_dev=True, anneal_mode=True)
Expand Down
4 changes: 2 additions & 2 deletions resources/docs/TUTORIAL_BASICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Simply point the `NLPTaskDataFetcher` to the file containing the parsed sentence
list of `Sentence`

```python
import NLPTaskDataFetcher
from flair.data_fetcher import NLPTaskDataFetcher

# use your own data path
data_folder = 'path/to/conll/formatted/data'
Expand Down Expand Up @@ -142,7 +142,7 @@ To read a file containing text classification data simply point the `NLPTaskData
It will read the sentences into a list of `Sentence`

```python
import NLPTaskDataFetcher
from flair.data_fetcher import NLPTaskDataFetcher

# use your own data path
data_folder = 'path/to/text-classification/formatted/data'
Expand Down
2 changes: 1 addition & 1 deletion resources/docs/TUTORIAL_TAGGING.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This model was trained over the English CoNLL-03 task and can recognize 4 differ
types.

```python
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger = SequenceTagger.load('ner')
```
Expand Down
8 changes: 4 additions & 4 deletions resources/docs/TUTORIAL_TEXT_EMBEDDINGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ So if you want to create a text embedding using GloVe embeddings together with C
use the following code:

```python
from flair.embeddings import WordEmbeddings, CharLMEmbeddings, TextMeanEmbedder
from flair.embeddings import WordEmbeddings, CharLMEmbeddings, DocumentMeanEmbeddings

# initialize the word embeddings
glove_embedding = WordEmbeddings('glove')
charlm_embedding_forward = CharLMEmbeddings('news-forward')
charlm_embedding_backward = CharLMEmbeddings('news-backward')

# initialize the text embeddings
text_embeddings = TextMeanEmbedder([glove_embedding, charlm_embedding_backward, charlm_embedding_forward])
text_embeddings = DocumentMeanEmbeddings([glove_embedding, charlm_embedding_backward, charlm_embedding_forward])
```

Now, create an example sentence and call the embedding's `embed()` method.
Expand Down Expand Up @@ -82,11 +82,11 @@ If you want, you can also specify some other parameters:
So if you want to create a text embedding using only GloVe embeddings, use the following code:

```python
from flair.embeddings import WordEmbeddings, TextLSTMEmbedder
from flair.embeddings import WordEmbeddings, DocumentLSTMEmbeddings

glove_embedding = WordEmbeddings('glove')

text_embeddings = TextLSTMEmbedder([glove_embedding])
text_embeddings = DocumentLSTMEmbeddings([glove_embedding])
```

Now, create an example sentence and call the embedding's `embed()` method.
Expand Down
13 changes: 7 additions & 6 deletions resources/docs/TUTORIAL_TRAINING_A_MODEL.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ Here is example code for a small NER model trained over CoNLL-03 data, using sim
In this example, we downsample the data to 10% of the original data.

```python
from flair.data import NLPTaskDataFetcher, TaggedCorpus, NLPTask
from flair.embeddings import TextEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings
from typing import List
import torch

Expand All @@ -102,7 +103,7 @@ tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TextEmbeddings] = [
embedding_types: List[TokenEmbeddings] = [

WordEmbeddings('glove')

Expand All @@ -119,7 +120,7 @@ embedding_types: List[TextEmbeddings] = [
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.tagging_model import SequenceTagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
Expand All @@ -130,9 +131,9 @@ if torch.cuda.is_available():
tagger = tagger.cuda()

# initialize trainer
from flair.trainer import TagTrain
from flair.trainers import SequenceTaggerTrainer

trainer: TagTrain = TagTrain(tagger, corpus, test_mode=True)
trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True)

trainer.train('resources/taggers/example-ner', mini_batch_size=32, max_epochs=150, save_model=False,
train_with_dev=False, anneal_mode=False)
Expand Down

0 comments on commit 2b72a3d

Please sign in to comment.