这是一个(希望如此)在 Python 中最完整的解决方案(抱歉,我更不熟悉 C++)。我相信这个想法与 David Eisenstat 的想法相同或非常相似,他的回答帮助我更多地思考如何处理中间问题。中间部分的比较使用 O(1) 查找和 O(n log n) 预处理,基于代码中引用和链接的后缀数组构造(David 的建议是使用 O(n) 预处理和 O(1) 查找,但是我没有时间进入 O(1) RMQ 或 Ukkonen 的;我也被引用的 CP 后缀数组算法迷住了)。该代码包括与蛮力比较的测试,但不完整,因为它不处理只有前缀和后缀而没有中间的情况,无论如何处理起来应该更简单。可能有一些方法可以使代码更简洁和有条理,但我还没有时间更仔细地考虑它。
因为我们可以删除第一个、第二个、倒数第二个或最后一个字符;解决方案的前两个字母将从 k 个或更少删除后剩余的两个字母(子序列)中选择:
xxxAxxxxxxxB...
一旦我们通过删除一些第一个字符来确定字符 A,我们就只能根据我们删除第二个字符的次数来选择 B。显然,我们希望 A 的最低可用字符,我们可能有不止一个实例,然后 B 的最低选择,我们也可能有不止一个实例。
后缀的组成类似,但我们需要为每个 k 存储最佳后缀 - 已为前缀选择的 num_deletions 个。那么最终的候选是最低的两字符前缀+中间+两字符后缀,其中中间由每个候选中的删除分布固定。我们可以使用带有附加信息的后缀数组或树来比较中间值。
Python
def log2(n):
i = -1
while(n):
i += 1
n >>= 1
return i
# https://cp-algorithms.com/string/suffix-array.html
def sort_cyclic_shifts(s):
n = len(s)
alphabet = 256
cs = []
p = [0] * n
c = [0] * n
cnt = [0] * max(alphabet, n + 1)
for i in range(n):
cnt[ord(s[i])] += 1
for i in range(1, alphabet):
cnt[i] += cnt[i-1]
for i in range(n):
cnt[ord(s[i])] -= 1
p[cnt[ord(s[i])]] = i
c[p[0]] = 0
classes = 1
for i in range(1, n):
if s[p[i]] != s[p[i-1]]:
classes += 1
c[p[i]] = classes - 1
cs.append(c[:])
pn = [0] * n
cn = [0] * n
h = 0
while (1 << h) < n:
for i in range(n):
pn[i] = p[i] - (1 << h)
if pn[i] < 0:
pn[i] += n
for i in range(0, classes):
cnt[i] = 0
for i in range(n):
cnt[c[pn[i]]] += 1
for i in range(1, classes):
cnt[i] += cnt[i-1]
for i in range(n-1, -1, -1):
cnt[c[pn[i]]] -= 1
p[cnt[c[pn[i]]]] = pn[i]
cn[p[0]] = 0
classes = 1
for i in range(i, n):
cur = c[p[i]], c[(p[i] + (1 << h)) % n]
prev = c[p[i-1]], c[(p[i-1] + (1 << h)) % n]
if cur != prev:
classes += 1
cn[p[i]] = classes - 1
c = cn
cs.append(c[:])
h += 1
return p, cs
# https://cp-algorithms.com/string/suffix-array.html
def suffix_array_construction(s):
s += "$"
sorted_shifts, cs = sort_cyclic_shifts(s)
return sorted_shifts[1:], cs
# https://cp-algorithms.com/string/suffix-array.html
def compare(i, j, l, k, n, c):
a = c[k][i], c[k][(i+l-(1 << k))%n]
b = c[k][j], c[k][(j+l-(1 << k))%n]
if a == b:
return 0
elif a < b:
return -1
return 1
## MAIN FUNCTION
def f(s, k):
debug = 0
n = len(s)
# Best prefix
best_first = s[k]
best_second = s[k+1]
first_idxs = [k]
second_idxs = [k + 1]
for i in range(k - 1, -1, -1):
if s[i] <= best_first:
best_first = s[i]
# We only need one leftmost index
first_idxs = [i]
for i in range(k, first_idxs[0], -1):
if (s[i] < best_second):
best_second = s[i]
second_idxs = [i]
elif s[i] == best_second:
second_idxs.append(i)
second_idxs = list(reversed(second_idxs))
# Best suffix
# For each of l deletions,
# we can place the last
# character anywhere ahead
# of the penultimate.
last_idxs = {(n - 2): [n - 1]}
best_last = s[n - 1]
for l in range(2, k + 2):
idx = n - l
if s[idx] < best_last:
best_last = s[idx]
last_idxs[n - 1 - l] = [idx]
else:
last_idxs[n - 1 - l] = last_idxs[n - l]
p, cs = suffix_array_construction(s)
second_idx = 0
if debug:
print(first_idxs, second_idxs, last_idxs)
while first_idxs[0] >= second_idxs[second_idx]:
second_idx += 1
prefix_end = second_idxs[second_idx]
num_deleted = prefix_end - 1
remaining = k - num_deleted
suffix_start = n - remaining - 2
best = (prefix_end + 1, suffix_start - 1)
while second_idx < len(second_idxs):
prefix_end = second_idxs[second_idx]
num_deleted = prefix_end - 1
remaining = k - num_deleted
suffix_start = n - remaining - 2
len_candidate_middle = suffix_start - 1 - prefix_end
# The prefixes are all equal.
# We need to compare the middle
# and suffix.
# compare(i, j, l, k, n, c)
len_best_middle = best[1] - best[0] + 1
l = min(len_candidate_middle, len_best_middle)
# Compare middles
comp = compare(best[0], prefix_end + 1, l, log2(l), n + 1, cs)
# Candidate is better
if comp == 1:
best = (prefix_end + 1, suffix_start - 1)
elif comp == 0:
# Compare suffix of candidate with
# substring at the comparable position
# of best.
[last_idx] = last_idxs[suffix_start]
candidate_suffix = s[suffix_start] + s[last_idx]
if len_candidate_middle < len_best_middle:
# One character of best's suffix
if len_candidate_middle + 1 == len_best_middle:
to_compare = s[best[1]] + s[best[1] + 1]
# None of best's suffix
else:
idx = best[0] + len_candidate_middle
to_compare = s[idx] + s[idx + 1]
# If the candidate suffix is equal
# to best's equivalent, the candidate
# wins since it's shorter.
if candidate_suffix <= to_compare:
best = (prefix_end + 1, suffix_start - 1)
elif len_candidate_middle == len_best_middle:
idx = best[1] + 1
to_compare = s[idx] + s[last_idxs[idx][0]]
if candidate_suffix < to_compare:
best = (prefix_end + 1, suffix_start - 1)
# len_best_middle < len_candidate_middle
else:
# One character of candidate's suffix
if len_best_middle + 1 == len_candidate_middle:
to_compare = s[suffix_start - 1] + s[suffix_start]
# None of candidates's suffix
else:
idx = prefix_end + 1 + len_best_middle
to_compare = s[idx] + s[idx + 1]
if candidate_suffix < to_compare:
best = (prefix_end + 1, suffix_start - 1)
second_idx += 1
prefix = s[first_idxs[0]] + s[second_idxs[second_idx-1]]
middle = s[best[0]:best[1] + 1]
suffix = s[best[1] + 1] + s[last_idxs[best[1] + 1][0]]
return prefix + middle + suffix
def brute_force(s, k):
best = s + "z"
stack = [(s, k)]
while stack:
_s, _k = stack.pop()
if _k == 0:
best = min(best, _s)
continue
stack.append((_s[1:], _k - 1))
stack.append((_s[0] + _s[2:], _k - 1))
stack.append((_s[0:len(_s)-1], _k - 1))
stack.append((_s[0:len(_s)-2] + _s[-1], _k - 1))
return best
# 01234567
#s = "abacaaba"
#k = 2
# Test
import random
n = 12
num_tests = 500
for _ in range(num_tests):
s = "".join([chr(97 + random.randint(0, 25)) for i in range(n)])
k = random.randint(1, n - 5)
#print(s, k)
_f = f(s, k)
brute = brute_force(s, k)
if brute != _f:
print("MISMATCH!")
print(s, k)
print(_f)
print(brute)
break
print("Done.")