文書のベクトル表現
l
確率分布 で文書を表現するのをやめて、
一般のベクトル
で文書や研究者を表現すればよい
?
à
RaP (Gehler+ 2006), RSM (Salakhutdinov 2009),
Doc2Vec (Le and Mikolov 2014) , NVDM (Miao+ 2016)
など多数あるが
..
l
ニューラル手法なので、一般に学習が難しい
l
しかし
...
✓
<latexit sha1_base64="3o2Cm+aYIQ8+S8IBgMleecPUSY8=">AAAHaHichZXPTxNBFMcfoBTxB0UParwQGoinZlpNNJ5AJZFASGltKaGE7CyzZe12d7O7LbRN/wFP3ox60sSD8c/w4j/ggT9BPWLixYNvZ7fZ0nnANu3MvPf5vr43MzvDXcv0A8aOx8YnLl2eTE1dmb567fqNmfTszYrvtDxdlHXHcrwq13xhmbYoB2ZgiarrCa3JLbHFG89C/1ZbeL7p2C+Djit2m1rdNg1T1wI0VWrBgQi0vXSGZZl85tROLu5kIH4KzuzEItRgHxzQoQVNEGBDgH0LNPDxswM5YOCibRd6aPOwZ0q/gD5Mo7aFlEBCQ2sDf+s42omtNo7DmL5U6/gvFn49VM7BAvvBvrAT9p19ZT/ZvzNj9WSMMJcOtjzSCndv5vWd0t8LVU1sAzhIVOcoONLn1xSAAY9lLSbW5kpLWKUexW93356UnhQXeovsE/uN9X1kx+wbVmi3/+ifN0Xxg4y+jxoD2za2OmRwhns4GmTK0deT1n6cTcIfEeQRwXUIrkNwXYLrEtwhwR0SnEFwBsE1CK5BcMsEt0xwqwS3SnBrBLdGcFWCqxLcNsFtExwjOKZwPF7fcB9S68rjdY386npyuYsHfkH4m7hjE6ImxyrlYo7mKS6yUKSvkD5JhrkFWP8wO7CpdDhLA35Ykdj7hMqS7zxHy6hu2EMpC0rFhTMqLqG1LldymE6sqmL9VFaJZn0kp2HVRpy/jj4LT5YNhVgZ2VEcxysKVSSookI9l6dNBvKSNvB8Ds+9aDUHJ36UR3Qq0Z58GBfvn9zobaN2Kvls7kE2v/kws/Q0vomm4B7Mw32M/wiW4AXOfxlzeAVv4B28n/yVSqdup+5G6PhYrLkFp57U/H+InYO9</latexit>
!
d
<latexit sha1_base64="/QrXniarPtZlPqXTWylpuYEC2Ew=">AAAHdnichZXNbtNAEMenLTSlfDQtFyQkVBGlcIo2AQnEqQUqUbWq0oS0qZoqsp11asWxLdv5Vl6AF+CAhAQSAsRjcOEFOPQREMciwYED47UjJ9lpcZTs7szvP5nZXe+qjml4PmOnM7Nzly7PJxauLF69dv3GUnJ5Zd+zW67GS5pt2m5ZVTxuGhYv+YZv8rLjcqWpmvxAbTwL/Adt7nqGbb30ew4/bip1y9ANTfHRVE2uVGx0u0b9xFdc1+4MasNqMsUyTDyrcicbdVIQPXl7eW4NKlADGzRoQRM4WOBj3wQFPPwcQRYYOGg7hgHaXOwZws9hCIuobSHFkVDQ2sDfOo6OIquF4yCmJ9Qa/ouJXxeVq5Bm39lndsa+sS/sB/t7bqyBiBHk0sNWDbXcqS69ulX8/V9VE1sfTmLVBQoV6Ytr8kGHx6IWA2tzhCWoUgvjt/uvz4pPCunBGnvPfmJ979gp+4oVWu1f2oc9XngjotdQo2PbxlaDFM7wAEejTFX0DYR1GGUT812C7BJcj+B6BNcnuD7BdQiuQ3A6wekE1yC4BsFtENwGwW0R3BbBbRPcNsGVCa5McIcEd0hwjOCYxKnR+gb7kFpXNVrX0C+vpyp28cjPCX8Td2xMVMRYphzM0ZjgQgtFehLpkWSQm4/1j7Mjm0wHszTixxWxfUioTPHOq2iZ1o17KGVeqjh/TsVFtNbFSo7TsVVW7ExkFWt2pnIaV+1G+WvoM/Fk2ZWIzakdpeJ4U6IKBFWQqOfitElBTtA6ns/BuReu5ujED/MITyXakwvi4v2Tnb5t5M5+LpN9kMntPUytP41uogW4DXfhPsZ/BOvwAue/hDl04S18hE/zfxJ3EunEvRCdnYk0N2HiSbB/7HaJsQ==</latexit>
キーワード検索の方法
(4)
l
線形回帰の基本ですが、二乗誤差を最小化したいので
l
よって
l
事前に
を計算しておけば、
で一瞬で求まる
E =
|y
Wd
|
2
= (y
Wd)
T
(y
Wd)
= y
T
y
2d
T
W
T
y + d
T
W
T
Wd
<latexit sha1_base64="OTjhKqODCRPJebP9QB6E5xRbxkI=">AAAII3ichZXPbxJBFMcfrUrFH6V6MfGykUBqTMmwmmhMTFqVxIamofwoNKVt2O2AGxbYsAstUP4B/wEPnjTpwfhHePBi4s3ooQf/AOOxJl48+HZ2cYF9tEtgZr7v8519b2eYVQxdMy3GTgIzsxcuXgrOXQ5duXrt+nx44cam2Wy3VJ5Xm3qzVVTKJte1Bs9bmqXzotHi5bqi84JSe2bHCx3eMrVmI2d1Db5TL1cbWkVTyxZKe+GPyVDsiXRU6nC13x0sibYwEM3+4GhXlkql7aW4zOs7NrdIc3d3c9Mi434X2s0NadnlXKXghaSQJEn3pCnx/zfYC0dYnIlL8ncSbicC7pVuLszGoAT70AQV2lAHDg2wsK9DGUz8bEMCGBio7UAftRb2NBHnMIAQettIcSTKqNbwt4qjbVdt4Nie0xRuFe+i47eFTgmi7Dt7z07ZZ/aB/WR/p87VF3PYuXSxVRwvN/bmX93K/jnXVcfWgpee6wyHgvTZNVlQgUeiFg1rM4RiV6k683d6r0+zjzPRfoy9Y7+wvrfshH3CChud3+rxBs+8EbPvo6eCbQdbFSL4hPs4GmaqYKwv1IGbjccfEuQhwXUJrktwPYLrEdwBwR0QXIXgKgRXI7gawa0Q3ArBrRLcKsGlCC5FcEWCKxLcFsFtERwjOObjFHd97X1IravirqsT96+nInbxMM6JeB13rEeUxNhPGZijNsY5CkWaPtIkSTs3C+sfZYean7af0pAfdXj6gHDp4j+voDLpG41QzrSv4vSUirOoVsVKjtKe6nesjWXledYmchp1rbv5qxjT8WRZ9xHJiR2l4DjpozIElfFRz8VpEwFZ0BU8n+1zz1nN4Ynv5OGcSnREtufF909i8m3j72zK8cT9uLzxILL81H0TzcFtuAOLOP9DWIYX+PzzoAaigVQgF8gHj4Nfgl+D3xx0JuB6bsLYFfzxD2R9yRI=</latexit>
@E
@d
=
2W
T
y + 2W
T
Wd = 0
) d = (W
T
W)
1
W
T
y
<latexit sha1_base64="rvihj1pcGkEBRHbqwFU3uQETYmY=">AAAIDXichZXPT9RQEMdnRV1cfwB6MfGycWGDMZC3q0ZjgoJKIoEQWH7sEgqkLa/QbHfbtGVhacof4MWjB0+aeCCevRkvHvQf8MCfYDxi4sWD09duurQDdrP75s18vtOZvrevimXojsvYUeZcz/kLF7O9l3KXr1y91tc/cH3ZMXdslS+ppmHaNUV2uKE3+ZKruwavWTaXG4rBq0r9eRCvtrjt6GZz0W1bfK0hbzV1TVdlF10b/YeSZsuqJ1my7eqykZ/0Y1tqcdXb9P1ccSw/Uhazqr++KIy2n7+bT/iqfqQYYzlJykm2rDtcMfe8kuX63qDkbnOba6bNB/2Dg0724thwMsuddW+k5Cfvt9FfYKNMXPm0UYqMAkTXnDnQUwQJNsEEFXagARya4KJtgAwOflahBAws9K2Bhz4bLV3EOfiQQ+0OUhwJGb11/N3C2WrkbeI8yOkItYp3MfBrozIPQ+wHO2TH7Dv7yH6yv6fm8kSOoJY2jkqo5dZG36ubC3/+q2rg6MJ2rDpDoSB9dk8uaPBI9KJjb5bwBF2qYf7W/pvjhceVIa/I3rNf2N87dsS+YofN1m/1wzyvvBXZN1Gj4djCUYUCPmEPZ51KFYx5wutH1cT8HkHuEVyb4NoEt09w+wS3S3C7BKcRnEZwdYKrE9wEwU0Q3BTBTRHcNMFNE1yN4GoEt0JwKwTHCI6lOCVa32AfUuuqROsaxtPrqYhd3IlzIt7AHRsTkpinKQtr1E9woYcinRTpkGRQm4v9d7MdX5oOnlKH71bEfp9QGeI/r6AnqeuOUMq5VMdzp3S8gN4tsZLddOxNK2ZOVBVrZhI1datmo/pVjBl4ssymiMnEjlJwPpmiKgRVSVEvxGlTgLKgNTyfg3MvXM3OiR/WEZ5KdKQc5MX3Tyn5tkkby+XR0r3R8vz9wviz6E3UC7fgNgxj/ocwDi/x+S+BmunLPMg8yTzNvs5+yn7OfgnRc5lIcwNOXNlv/wAfPcKA</latexit>
R = (W
<latexit sha1_base64="5etgp0lDZv7jeparr7eDl5ixWMc=">AAAHjXichZW/b9NQEMevLeASfiSFBYmlImrVDlQvAQRCgFqggqpVlSZNk6ppo9h9DlYc27KdtImVf4CFkYEJJAbEzAoDC/8AQ/8ExFgkFgbOzw5O4mtxlLx7d5/v5c7v+Vm2dM1xGTsaG584c/acNHk+ceHipcvJ1NSVLcds2QovKqZu2mW55nBdM3jR1Vydly2b15qyzkty44kfL7W57Wimsel2LL7brNUNTdWUmouuamq+0uaKl+89TMwJq9Tb2wyN+T3vZqb3z1tNpdkCE9d03MiERhrCK2dOTcxCBfbBBAVa0AQOBrho61ADBz87kAEGFvp2wUOfjZYm4hx6kEBtCymORA29Dfyt42wn9Bo493M6Qq3gv+j4tVE5DTPsO/vAjtk39pH9YH9OzOWJHH4tHRzlQMutavLltcLv/6qaOLrwIlKdopCRPr0nF1S4J3rRsDdLePwulSB/u/v6uHA/P+PNsnfsJ/b3lh2xr9ih0f6lvN/g+Tci+z5qVBzbOCqQxjvs4axfqYwxT3h7YTURf0iQhwTXIbgOwXUJrktwBwR3QHAqwakE1yC4BsEtEdwSwa0Q3ArBrRLcKsGVCa5McNsEt01wjOBYjJPD9fX3IbWucriuQTy+nrLYxf04J+JN3LERURHzOGVhjdoQF3go0omRDkn6tbnY/yDb98Vp/y71+UFF5O8RKl088zJ6RnWDEUqZi3WcO6HjAnrrYiUH6cgbV6wNVRVp1kZqGlSth/UrGNPxZFmPEcsjO0rG+XKMyhNUPkY9FadNGrKCVvF89s+9YDX7J35QR3Aq0ZGsnxffP5nRt03c2MouZG4tZDdupxcfh2+iSbgON2AO89+FRXiO97+INbyCT/AZvkhJ6Y70QHoUoONjoeYqDF3Ss7+D7JEq</latexit>
T
W)
1
W
T
d = Ry
<latexit sha1_base64="Zxbe0hmRgyiCLmFc/Q1U6KWw6Ns=">AAAHenichZXPaxNBFMdfW21q/dFUQQQvpaFFEcIkFhRBaNWCpaWkiWlTmhJ2N7NxyWZ32d2kTdb9B/wHPHiy4EHqf+HFf8BD/wTxWMFLD76d3ZA089puSGbmvc/35b2Z2RnVMQ3PZ+xkbHzi2vXJ1NSN6Zu3bt+ZSc/e3fbstqvxsmabtltRFY+bhsXLvuGbvOK4XGmpJt9Rm68j/06Hu55hW+/8rsP3W0rDMnRDU3w01dL3qx2uBfXwpWiLoWi6YS2dYVkmnjm5k0s6GUiegj07sQhVqIMNGrShBRws8LFvggIefvYgBwwctO1DgDYXe4bwcwhhGrVtpDgSClqb+NvA0V5itXAcxfSEWsN/MfHronIOFtgv9o2dsp/smP1mZxfGCkSMKJcutmqs5U5t5uOD0r8rVS1sfXg/UF2iUJG+vCYfdHguajGwNkdYoiq1OH6n9+m09KK4ECyyI/YH6/vCTtgPrNDq/NW+bvHiZxG9jhod2w62GmRwhgMc9TNV0RcIa5hkM+APCfKQ4LoE1yW4HsH1CO6A4A4ITic4neCaBNckuBWCWyG4NYJbI7h1glsnuArBVQhul+B2CY4RHJM4NVnfaB9S66om6xr75fVUxS7u+znhb+GOHRBVMZYpB3M0znGxhSI9ifRIMsrNx/qH2b5NpqNZ6vPDioE9JFSmeOdVtIzqhj2UsiBVXLig4hJaG2Ilh+mBVVZsnMtqoNkYyWlYtZnkr6HPxJNlUyJWR3aUiuNViSoSVFGi3ojTJgN5Qet4PkfnXrya/RM/ziM+lWhPPoqL909u9LaRO9v5bO5pNr+1lFl+ldxEU/AQ5uERxn8Gy/AW57+MOXyAIziG75NnqfnU49STGB0fSzT34NyTWvoPdnGLPg==</latexit>