$ git clone https://github.com/svn2github/word2vec.git
 $ cd word2vec/

 $ make
 gcc word2vec.c -o word2vec -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 gcc word2phrase.c -o word2phrase -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 gcc distance.c -o distance -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 distance.c: In function ‘main’:
 distance.c:31:8: warning: unused variable ‘ch’ [-Wunused-variable]
    char ch;
         ^
 gcc word-analogy.c -o word-analogy -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 word-analogy.c: In function ‘main’:
 word-analogy.c:31:8: warning: unused variable ‘ch’ [-Wunused-variable]
    char ch;
         ^
 gcc compute-accuracy.c -o compute-accuracy -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 compute-accuracy.c: In function ‘main’:
 compute-accuracy.c:29:109: warning: unused variable ‘ch’ [-Wunused-variable]
    char st1[max_size], st2[max_size], st3[max_size], st4[max_size], bestw[N][max_size], file_name[max_size], ch;
                                                                                                              ^
 chmod +x *.sh

 $ cat demo-word.sh
 make
 if [ ! -e text8 ]; then
   wget http://mattmahoney.net/dc/text8.zip -O text8.gz
   gzip -d text8.gz -f
 fi
 time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15
 ./distance vectors.bin

手動で順次やってみます。

学習データをダウンロード
 $ wget http://mattmahoney.net/dc/text8.zip -O text8.gz
 text8.gz                                               100%[============>]  29.89M   377KB/s    時間 82s
 $ gzip -d text8.gz -f

学習処理。
 $ time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15
 Starting training using file text8
 Vocab size: 71291
 Words in train file: 16718843
 Alpha: 0.000005  Progress: 100.10%  Words/thread/sec: 146.19k
 real	14m24.749s
 user	28m38.580s
 sys	0m2.920s

トップ   新規 一覧 単語検索 最終更新   ヘルプ   最終更新のRSS