发现如果我们枚举 \(l_2\le r_1\),则合法的 \(l_1,r_2\) 会形成一段前后缀。其中,如果我们设 \(last_i\) 表示 \(a_i\) 上一次出现的位置,\(next_i\) 为 \(a_i\) 下一次出现的位置,则所有合法的 \(l_1\) 必须要满足 \(l_1\ge \max_{i=l_2}^{r_1} last_i\),\(r_2\) 必须要满足 \(r_2\le \min_{i=l_2}^{r_1} next_i\)。于是,对于一对固定的 \(l_2\le r_1\),贡献即为 \((l_2-\max last_i)\times(\min next_i-r_1)\)。
接下来考虑对 \(r_1\) 扫描线,统计所有 \(l_2\) 的贡献,相当于是要算
\[\sum_{l_2\cdots r_1 没有重复元素}^{r_1} (l_2-\max last_i)\times(\min next_i-r_1)
\]
其中 \(l_2\) 的下界在扫描线时是好维护的。将括号展开,得到:
\[\sum_{l_2\cdots r_1 没有重复元素}^{r_1} l_2\min next_i-l_2r_1-\max last_i\min next_i + r_1\max last_i
\]
于是,我们可以考虑用线段树维护每一个 \(l_2\) 对应的答案。具体来说,线段树负责维护 \(\sum next_i,\sum last_i, \sum i\times next_i,\sum i,\sum last_i\times next_i\)。
在扫描线的过程中,由于 \(\min next_i\) 随着 \(l_2\) 的减小而减小,\(\max last_i\) 随着 \(l_2\) 的减小而增大,我们可以考虑用一个单调栈维护 \(\min next_i\) 和 \(\max last_i\),在弹栈时更新线段树。则线段树的更新次数为单调栈的弹栈次数,故总时间复杂度 \(O(n \log n)\)。
const int MAXN = 1e5 + 5, mod = 1e9 + 7;
int n, a[MAXN];void posmod(int &x) {x = (x % mod + mod) % mod;
}struct _sgt {struct _node {int c, cri, li, ri, liri;int flgli, flgri;_node operator + (const _node b) const {_node x = {0, 0, 0, 0, 0, 0, 0};x.c = (c + b.c) % mod;x.cri = (cri + b.cri) % mod;x.ri = (ri + b.ri) % mod;x.li = (li + b.li) % mod;x.liri = (liri + b.liri) % mod;return x;}} tr[MAXN << 2];void pushup(int p) {tr[p] = tr[lson] + tr[rson];}void addtagli(int p, int l, int r, int v) {tr[p].li += v * (r - l + 1) % mod;tr[p].liri += v * tr[p].ri % mod;tr[p].flgli += v;posmod(tr[p].li); posmod(tr[p].liri); posmod(tr[p].flgli);}void addtagri(int p, int l, int r, int v) {tr[p].cri += v * tr[p].c % mod;tr[p].liri += v * tr[p].li % mod;tr[p].ri += v * (r - l + 1) % mod;tr[p].flgri += v;posmod(tr[p].cri); posmod(tr[p].liri);posmod(tr[p].ri); posmod(tr[p].flgri);}void pushdown(int p, int l, int r) {if (tr[p].flgli) {addtagli(lson, l, mid, tr[p].flgli);addtagli(rson, mid + 1, r, tr[p].flgli);tr[p].flgli = 0;}if (tr[p].flgri) {addtagri(lson, l, mid, tr[p].flgri);addtagri(rson, mid + 1, r, tr[p].flgri);tr[p].flgri = 0;}}void modifyli(int p, int l, int r, int L, int R, int v) {if (L <= l && r <= R) return addtagli(p, l, r, v);pushdown(p, l, r);if (L <= mid) modifyli(lson, l, mid, L, R, v);if (mid < R) modifyli(rson, mid + 1, r, L, R, v);pushup(p);}void modifyri(int p, int l, int r, int L, int R, int v) {if (L <= l && r <= R) return addtagri(p, l, r, v);pushdown(p, l, r);if (L <= mid) modifyri(lson, l, mid, L, R, v);if (mid < R) modifyri(rson, mid + 1, r, L, R, v);pushup(p);}void modifyc(int p, int l, int r, int k) {if (l == r) return tr[p].c = l, tr[p].cri = (l * tr[p].ri) % mod, void();pushdown(p, l, r);if (k <= mid) modifyc(lson, l, mid, k);else modifyc(rson, mid + 1, r, k);pushup(p);}auto query(int p, int l, int r, int L, int R) {if (L <= l && r <= R) return tr[p];pushdown(p, l, r);_node res = {0, 0, 0, 0, 0, 0, 0};if (L <= mid) res = res + query(lson, l, mid, L, R);if (mid < R) res = res + query(rson, mid + 1, r, L, R);return res;}
} t;map<int, int> lst;
int l[MAXN], r[MAXN], cl[MAXN], cr[MAXN];void work() {cin >> n;for (int i = 1; i <= n; ++i)cin >> a[i];for (int i = 1; i <= n; ++i) {l[i] = (lst.count(a[i]) ? lst[a[i]] + 1 : 1);lst[a[i]] = i;}lst.clear();for (int i = n; i >= 1; --i) {r[i] = (lst.count(a[i]) ? lst[a[i]] - 1 : n);lst[a[i]] = i;}stack<int> st1, st2;int ans = 0;int qrL = 1;for (int i = 1; i <= n; ++i) {int R = i - 1;while (st1.size() && r[st1.top()] > r[i]) {int L = cr[st1.top()];t.modifyri(1, 1, n, L, R, r[i] - r[st1.top()]);R = L - 1; st1.pop();}cr[i] = R + 1;st1.push(i);t.modifyri(1, 1, n, i, i, r[i]);R = i - 1;while (st2.size() && l[st2.top()] < l[i]) {int L = cl[st2.top()];t.modifyli(1, 1, n, L, R, l[i] - l[st2.top()]);R = L - 1; st2.pop();}cl[i] = R + 1;st2.push(i);t.modifyli(1, 1, n, i, i, l[i]);t.modifyc(1, 1, n, i);qrL = max(qrL, l[i]);auto x = t.query(1, 1, n, qrL, i);int tmp = ((x.cri - x.c * i - x.liri + i * x.li) % mod + mod) % mod;ans = ((ans + tmp) % mod + mod) % mod;}cout << ans << endl;
}