11. Dependency
Parsing
Supplement
Tokyo Metropolitan University
Komachi
Lab
Training
create list datacreate list queue
create list heads = [-1] #-1はROOTの親代わり for each line in file
if line is empty #文末
add (queue, heads) to data reset queue, heads
else
split line into {ID, word, pos, head…} add (ID, word, POS) to queue
add head to heads
create defaultdict weight_shift, weight_left, weight_right for each iteration
for each pair queue, heads in data
ShiftReduceTrain
①
ShiftReduceTrain(queue, heads, weights)create list stack = [(0, ROOT , ROOT )]
create list unproc #各単語の子の数を初期値として格納 for i in 0…length(heads)
add heads.count(i) to unproc
while length(queue) > 0 or length(stack) > 1 features = MakeFeatures(stack, queue)
score_shift = PredictScore(weight_shift, features)
score_left = PredictScore(weight_left, features)
score_right = PredictScore(weight_right, features)
if (argmax_score is shift and length(queue) > 0) or length(stack) < 2 predict = shift
elif argmax_score is left predict = left
else
ShiftReduceTrain
②
if length(stack) < 2correct = shift
elif heads[stack[-1][0]] is stack[-2][0] and unproc[stack[-1][0]] is 0
correct = right #資料と逆なので注意
elif heads[stack[-2][0]] is stack[-1][0] and unproc[stack[-2][0]] is 0
correct = left else
correct = shift
if predict is not correct
UpdateWeights(weights, features, predict, correct) if correct is shift
add queue.pop(0) to stack elif correct is left
unproc[stack[-1][0]] -= 1 stack.pop(-2)
elif correct is right
MakeFeatures
MakeFeatures(stack, queue)create defaultdict features
if length(stack) > 0 and length(queue) > 0
features[ W-1 +stack[-1][1]+ ,W0 +queue[0][1]] += 1
features[ W-1 +stack[-1][1]+ ,P0 +queue[0][2]] += 1 features[ P-1 +stack[-1][2]+ ,W0 +queue[0][1]] += 1 features[ P-1 +stack[-1][2]+ ,P0 +queue[0][2]] += 1 if length(stack) > 1
features[ W-2 +stack[-2][1]+ ,W-1 +stack[-1][1]] += 1
features[ W-2 +stack[-2][1]+ ,P-1 +stack[-1][2]] += 1
features[ P-2 +stack[-2][2]+ ,W-1 +stack[-1][1]] += 1
features[ P-2 +stack[-2][2]+ ,P-1 +stack[-1][2]] += 1
Testing
create list data create list queue for each line in file
if line is empty #文末 add queue to data reset queue
else
split line into {ID, word, pos, head…} add (ID, word, POS) to queue
load weight_shift, weight_left, weight_right for each queue in data
ShiftReduce(Testing)
ShiftReduce(queue, weights)create list stack = [(0, ROOT , ROOT )]
create list heads = [-1] * (length(queue) + 1) #headを初期化 while length(queue) > 0 or length(stack) > 1
features = MakeFeatures(stack, queue)
score_shift = PredictScore(weight_shift, features)
score_left = PredictScore(weight_left, features)
score_right = PredictScore(weight_right, features)
if (argmax_score is shift and length(queue) > 0) or length(stack) < 2 add queue.pop(0) to stack
elif argmax_score is left
heads[stack[-2][0]] = stack[-1][0] stack.pop(-2)
else
heads[stack[-1][0]] = stack[-2][0] stack.pop(-1)