Update duplicate copies of howtos and tutorials

piskvorky · Mar 19, 2023 · 73825d6 · 73825d6
1 parent c110730
commit 73825d6
Show file tree

Hide file tree

Showing 14 changed files with 28 additions and 28 deletions.
diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py
@@ -72,10 +72,10 @@
 # by the features extracted from it, not by its "surface" string form: how you get to
 # the features is up to you. Below I describe one common, general-purpose approach (called
 # :dfn:`bag-of-words`), but keep in mind that different application domains call for
-# different features, and, as always, it's `garbage in, garbage out <http://en.wikipedia.org/wiki/Garbage_In,_Garbage_Out>`_...
+# different features, and, as always, it's `garbage in, garbage out <https://en.wikipedia.org/wiki/Garbage_In,_Garbage_Out>`_...
 #
 # To convert documents to vectors, we'll use a document representation called
-# `bag-of-words <http://en.wikipedia.org/wiki/Bag_of_words>`_. In this representation,
+# `bag-of-words <https://en.wikipedia.org/wiki/Bag_of_words>`_. In this representation,
 # each document is represented by one vector where each vector element represents
 # a question-answer pair, in the style of:
 #
@@ -223,7 +223,7 @@ def __iter__(self):
 ###############################################################################
 # Other formats include `Joachim's SVMlight format <http://svmlight.joachims.org/>`_,
 # `Blei's LDA-C format <https://github.com/blei-lab/lda-c>`_ and
-# `GibbsLDA++ format <http://gibbslda.sourceforge.net/>`_.
+# `GibbsLDA++ format <https://gibbslda.sourceforge.net/>`_.
 
 corpora.SvmLightCorpus.serialize('/tmp/corpus.svmlight', corpus)
 corpora.BleiCorpus.serialize('/tmp/corpus.lda-c', corpus)
@@ -270,7 +270,7 @@ def __iter__(self):
 # Compatibility with NumPy and SciPy
 # ----------------------------------
 #
-# Gensim also contains `efficient utility functions <http://radimrehurek.com/gensim/matutils.html>`_
+# Gensim also contains `efficient utility functions <https://radimrehurek.com/gensim/matutils.html>`_
 # to help converting from/to numpy matrices
 
 import gensim

diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py.md5 b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py.md5
@@ -1 +1 @@
-986566c5996bfc214bd711c0d2cf54db
+9bf95a9406b723f4397c3912a2fd7865
diff --git a/docs/src/auto_examples/core/run_similarity_queries.py b/docs/src/auto_examples/core/run_similarity_queries.py
@@ -96,10 +96,10 @@
 print(vec_lsi)
 
 ###############################################################################
-# In addition, we will be considering `cosine similarity <http://en.wikipedia.org/wiki/Cosine_similarity>`_
+# In addition, we will be considering `cosine similarity <https://en.wikipedia.org/wiki/Cosine_similarity>`_
 # to determine the similarity of two vectors. Cosine similarity is a standard measure
 # in Vector Space Modeling, but wherever the vectors represent probability distributions,
-# `different similarity measures <http://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Symmetrised_divergence>`_
+# `different similarity measures <https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Symmetrised_divergence>`_
 # may be more appropriate.
 #
 # Initializing query structures
@@ -177,10 +177,10 @@
 # That doesn't mean it's perfect though:
 #
 # * there are parts that could be implemented more efficiently (in C, for example), or make better use of parallelism (multiple machines cores)
-# * new algorithms are published all the time; help gensim keep up by `discussing them <http://groups.google.com/group/gensim>`_ and `contributing code <https://github.com/piskvorky/gensim/wiki/Developer-page>`_
+# * new algorithms are published all the time; help gensim keep up by `discussing them <https://groups.google.com/g/gensim>`_ and `contributing code <https://github.com/piskvorky/gensim/wiki/Developer-page>`_
 # * your **feedback is most welcome** and appreciated (and it's not just the code!):
 #   `bug reports <https://github.com/piskvorky/gensim/issues>`_ or
-#   `user stories and general questions <http://groups.google.com/group/gensim/topics>`_.
+#   `user stories and general questions <https://groups.google.com/g/gensim>`_.
 #
 # Gensim has no ambition to become an all-encompassing framework, across all NLP (or even Machine Learning) subfields.
 # Its mission is to help NLP practitioners try out popular topic modelling algorithms

diff --git a/docs/src/auto_examples/core/run_similarity_queries.py.md5 b/docs/src/auto_examples/core/run_similarity_queries.py.md5
@@ -1 +1 @@
-936b1cccc5828cd52812994c052cbc57
+94f0e6b35e3a81a5ccc6d24a26101a2d
diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.py b/docs/src/auto_examples/core/run_topics_and_transformations.py
@@ -130,7 +130,7 @@
 corpus_lsi = lsi_model[corpus_tfidf]  # create a double wrapper over the original corpus: bow->tfidf->fold-in-lsi
 
 ###############################################################################
-# Here we transformed our Tf-Idf corpus via `Latent Semantic Indexing <http://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
+# Here we transformed our Tf-Idf corpus via `Latent Semantic Indexing <https://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
 # into a latent 2-D space (2-D because we set ``num_topics=2``). Now you're probably wondering: what do these two latent
 # dimensions stand for? Let's inspect with :func:`models.LsiModel.print_topics`:
 
@@ -175,7 +175,7 @@
 #
 # Gensim implements several popular Vector Space Model algorithms:
 #
-# * `Term Frequency * Inverse Document Frequency, Tf-Idf <http://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_
+# * `Term Frequency * Inverse Document Frequency, Tf-Idf <https://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_
 #   expects a bag-of-words (integer values) training corpus during initialization.
 #   During transformation, it will take a vector and return another vector of the
 #   same dimensionality, except that features which were rare in the training corpus
@@ -202,7 +202,7 @@
 #
 #     model = models.OkapiBM25Model(corpus)
 #
-# * `Latent Semantic Indexing, LSI (or sometimes LSA) <http://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
+# * `Latent Semantic Indexing, LSI (or sometimes LSA) <https://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
 #   transforms documents from either bag-of-words or (preferrably) TfIdf-weighted space into
 #   a latent space of a lower dimensionality. For the toy corpus above we used only
 #   2 latent dimensions, but on real corpora, target dimensionality of 200--500 is recommended
@@ -247,7 +247,7 @@
 #
 #     model = models.RpModel(tfidf_corpus, num_topics=500)
 #
-# * `Latent Dirichlet Allocation, LDA <http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
+# * `Latent Dirichlet Allocation, LDA <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
 #   is yet another transformation from bag-of-words counts into a topic space of lower
 #   dimensionality. LDA is a probabilistic extension of LSA (also called multinomial PCA),
 #   so LDA's topics can be interpreted as probability distributions over words. These distributions are,

diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.py.md5 b/docs/src/auto_examples/core/run_topics_and_transformations.py.md5
@@ -1 +1 @@
-226db24f9e807e4bbd2a6ef280a75510
+af09faa792495c0a9b03b53125b735a6
diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py
@@ -154,7 +154,7 @@ def extract_documents():
 # of Sentences and Documents"
 # <http://cs.stanford.edu/~quocle/paragraph_vector.pdf>`_ with guidance from
 # Mikolov's `example go.sh
-# <https://groups.google.com/d/msg/word2vec-toolkit/Q49FIrNOQRo/J6KG8mUj45sJ>`_::
+# <https://groups.google.com/g/word2vec-toolkit/c/Q49FIrNOQRo/m/J6KG8mUj45sJ>`_::
 #
 #     ./word2vec -train ../alldata-id.txt -output vectors.txt -cbow 0 -size 100 -window 10 -negative 5 -hs 0 -sample 1e-4 -threads 40 -binary 0 -iter 20 -min-count 1 -sentence-vectors 1
 #

diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5
@@ -1 +1 @@
-4f33a3697f7efc9f1db2eaa1c62f6999
+7020ef8545a05962fe2d7146b4b95f11
diff --git a/docs/src/auto_examples/tutorials/run_lda.py b/docs/src/auto_examples/tutorials/run_lda.py
@@ -268,14 +268,14 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz'
 # Note that we use the "Umass" topic coherence measure here (see
 # :py:func:`gensim.models.ldamodel.LdaModel.top_topics`), Gensim has recently
 # obtained an implementation of the "AKSW" topic coherence measure (see
-# accompanying blog post, http://rare-technologies.com/what-is-topic-coherence/).
+# accompanying blog post, https://rare-technologies.com/what-is-topic-coherence/).
 #
 # If you are familiar with the subject of the articles in this dataset, you can
 # see that the topics below make a lot of sense. However, they are not without
 # flaws. We can see that there is substantial overlap between some topics,
 # others are hard to interpret, and most of them have at least some terms that
 # seem out of place. If you were able to do better, feel free to share your
-# methods on the blog at http://rare-technologies.com/lda-training-tips/ !
+# methods on the blog at https://rare-technologies.com/lda-training-tips/ !
 #
 
 top_topics = model.top_topics(corpus)
@@ -299,7 +299,7 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz'
 # Where to go from here
 # ---------------------
 #
-# * Check out a RaRe blog post on the AKSW topic coherence measure (http://rare-technologies.com/what-is-topic-coherence/).
+# * Check out a RaRe blog post on the AKSW topic coherence measure (https://rare-technologies.com/what-is-topic-coherence/).
 # * pyLDAvis (https://pyldavis.readthedocs.io/en/latest/index.html).
 # * Read some more Gensim tutorials (https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#tutorials).
 # * If you haven't already, read [1] and [2] (see references).

diff --git a/docs/src/auto_examples/tutorials/run_lda.py.md5 b/docs/src/auto_examples/tutorials/run_lda.py.md5
@@ -1 +1 @@
-6733157cebb44ef13ae98ec8f4a533f1
+0995a15406049093d95974700d471876
diff --git a/docs/src/auto_examples/tutorials/run_wmd.py b/docs/src/auto_examples/tutorials/run_wmd.py
@@ -17,7 +17,7 @@
 #
 # WMD enables us to assess the "distance" between two documents in a meaningful
 # way even when they have no words in common. It uses `word2vec
-# <http://rare-technologies.com/word2vec-tutorial/>`_ [4] vector embeddings of
+# <https://rare-technologies.com/word2vec-tutorial/>`_ [4] vector embeddings of
 # words. It been shown to outperform many of the state-of-the-art methods in
 # k-nearest neighbors classification [3].
 #

diff --git a/docs/src/auto_examples/tutorials/run_wmd.py.md5 b/docs/src/auto_examples/tutorials/run_wmd.py.md5
@@ -1 +1 @@
-a087a5b43fbba9a3e71c2384ddc264af
+eefa5904436cd2661aa2236b1d5bb22d
diff --git a/docs/src/auto_examples/tutorials/run_word2vec.py b/docs/src/auto_examples/tutorials/run_word2vec.py
@@ -126,7 +126,7 @@
 #   below.
 #
 # You may also check out an `online word2vec demo
-# <http://radimrehurek.com/2014/02/word2vec-tutorial/#app>`_ where you can try
+# <https://radimrehurek.com/2014/02/word2vec-tutorial/#app>`_ where you can try
 # this vector algebra for yourself. That demo runs ``word2vec`` on the
 # **entire** Google News dataset, of **about 100 billion words**.
 #
@@ -309,7 +309,7 @@ def __iter__(self):
 # -------
 #
 # ``workers`` , the last of the major parameters (full list `here
-# <http://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec>`_)
+# <https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec>`_)
 # is for training parallelization, to speed up training:
 #
 
@@ -322,7 +322,7 @@ def __iter__(self):
 # one core because of the `GIL
 # <https://wiki.python.org/moin/GlobalInterpreterLock>`_ (and ``word2vec``
 # training will be `miserably slow
-# <http://rare-technologies.com/word2vec-in-python-part-two-optimizing/>`_\ ).
+# <https://rare-technologies.com/word2vec-in-python-part-two-optimizing/>`_\ ).
 #
 
 ###############################################################################
@@ -372,7 +372,7 @@ def __iter__(self):
 ###############################################################################
 #
 # This ``evaluate_word_analogies`` method takes an `optional parameter
-# <http://radimrehurek.com/gensim/models/keyedvectors.html#gensim.models.keyedvectors.KeyedVectors.evaluate_word_analogies>`_
+# <https://radimrehurek.com/gensim/models/keyedvectors.html#gensim.models.keyedvectors.KeyedVectors.evaluate_word_analogies>`_
 # ``restrict_vocab`` which limits which test examples are to be considered.
 #
 

diff --git a/docs/src/auto_examples/tutorials/run_word2vec.py.md5 b/docs/src/auto_examples/tutorials/run_word2vec.py.md5
@@ -1 +1 @@
-4598eccb1c465c724d8cfa99e216689d
+37d9258acf496e76463d2e34d40c8c67
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		986566c5996bfc214bd711c0d2cf54db
		9bf95a9406b723f4397c3912a2fd7865
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		936b1cccc5828cd52812994c052cbc57
		94f0e6b35e3a81a5ccc6d24a26101a2d
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		226db24f9e807e4bbd2a6ef280a75510
		af09faa792495c0a9b03b53125b735a6
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		4f33a3697f7efc9f1db2eaa1c62f6999
		7020ef8545a05962fe2d7146b4b95f11
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		6733157cebb44ef13ae98ec8f4a533f1
		0995a15406049093d95974700d471876
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		a087a5b43fbba9a3e71c2384ddc264af
		eefa5904436cd2661aa2236b1d5bb22d
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		4598eccb1c465c724d8cfa99e216689d
		37d9258acf496e76463d2e34d40c8c67