FFT Part
formula
Euler's formula:\(e^{i\theta}=\cos{\theta}+i\sin{\theta}\)
Special form:\(e^{i\pi}=-1\)
Begin
A polynomial\(F=x^2+x+1\)
Abbreviation\((1,1,1)\)
Now we know that only three points are needed to uniquely determine its image (Metaphysical Conclusion)
If we are arbitrarily algebra, then Qin Jiuzhi is messing around\(O(n^2)\)
DFT
So we set\(w_n=e^{\frac{2i\pi}{n}}\)
Algebra\(w_n^n=e^{2\pi i}=(e^{i\pi})^2=(-1)^2=1\)
We don't bring it in when we are algebra\(0,1,2,3,4,5...\)
But bring in\(w_n^0,w_n^1,......\)
This is\(DFT\)
Yet it is still violent\(N^2\)Complexity
FFT
Set a polynomial\(A=x^7+2x^6+3x^5+4x^4+5x^3+6x^2+7x+8\)
Then disassemble
\(A_1=x^6+3x^4+5x^2+7\)
\(A_2=2x^6+4x^4+6x^2+8\)
We found\(A=xA_1+A_2\)
Then we know\(FFT(A_1\ or\ A_2)\)What is brought in is actually\(w_4\)
But his\(X\)Actually it is\(X^2\)
\(So\) \(w_4^i\)In fact, it corresponds to the actual\(w_8^i\)and\(w_8^{i+4}\)
set up\(FFT(A,i)\)It's brought in\(w_{length(A)}^i\)Results
but\(FFT(A,i)=FFT(A_2,i)+x FFT(A_1,i)=FFT(A_2,i)+w_{length(A)}^i FFT(A_1,i)\)
\(FFT(A,i+\frac{length(A)}2)=FFT(A_2,i)+x FFT(A_1,i)=FFT(A_2,i)+w_{length(A)}^{i+\frac{length(A)}2}FFT(A_1,i)=FFT(A_2,i)-w_{length(A)}^i FFT(A_1,i)\)
Just divide and treat it
IFFT
Turn the unit root into\(w'_n=e^{-\frac{2i\pi}{n}}\)Do\(FFT\)
Finally remember to divide\(length\)Don't worry about the specific processYou can't understand, I can't understand
accomplish
Generally we write loop versions
As for binary reverse order, it is recommended to look at the board directly.
Plural can be written by hand\(STL\)
Code
P1919 A*B Problem
#include<bits/stdc++.h>
using namespace std;
typedef double db;
const double pi=3.1415926535897932384626;
const int N=4000009;
typedef complex<db> com;
int rev[N];
void FFT(com *x,int len,db op){
for(int i=0;i<len;i++)if(i<rev[i])swap(x[i],x[rev[i]]);
for(int l=2;l<=len;l<<=1){
com rt(cos(2*pi/(db)l),op*sin(2*pi/(db)l));
for(int i=0;i<len;i+=l){
com w(1,0);
for(int r=i;r<=i+l/2-1;r++){
com u=x[r],v=w*x[r+l/2];
x[r]=u+v,x[r+l/2]=u-v;
w*=rt;
}
}
}
if(op<0)for(int i=0;i<len;i++)x[i]/=(db)len;
}
com k[N],k2[N];
char a[N],b[N];
int la,lb,ans[N];
int main(){
int len=1;
scanf("%s%s",a,b),la=strlen(a),lb=strlen(b);
for(int i=0;i<la;i++)k[i].real(a[la-i-1]-'0'),k[i].imag(0);
for(int i=0;i<lb;i++)k2[i].real(b[lb-i-1]-'0'),k2[i].imag(0);
while(len<la+lb-1)len<<=1;
for(int i=0;i<len;i++){
rev[i]=rev[i>>1]>>1;
if(i&1)rev[i]+=len>>1;
}
FFT(k,len,1.0),FFT(k2,len,1.0);
for(int i=0;i<len;i++)k[i]*=k2[i];
FFT(k,len,-1.0);
for(int i=0;i<la+lb-1;i++)ans[i]=(int)(k[i].real()+0.2);
for(int i=0;i<la+lb-2;i++)ans[i+1]+=ans[i]/10,ans[i]%=10;
for(int i=la+lb-2;i>=0;i--)printf("%d",ans[i]);
return 0;
}
example
P3723 Gift
beg\(min \sum_{i=1}^n{(x_i-y_i+c)^2}\ \ (1<=n<=50000,-500<=c<=500)\)
Solution
Expand directly
Original\(=\sum{x_i^2+y_i^2+c^2+2x_ic-2y_ic-2x_iy_i}\)
\(=\sum{x_i^2}+\sum{y_i^2}+nc^2+2c\sum{x_i}-2c\sum{y_i}-2\sum{x_iy_i}\)
Just maximize the last item
\(c\)What to do? enumerate!
\(\sum{x_iy_i}\)It seems unprocessableActually, it is impossible to handle
There is a small\(trick\),Bundle\(x\)Array inverted
Then it becomes\(\sum{x_{n-i+1}y_i}\)
It seems to be a waste of time
But we found that such items have a property
The subscripts are added to\(n+1\) !
So we treat every subscript as the number of times
\(Fast\ Fast\ TLE\)Just do it
But we thought of a problem
What should I do if it is just spinning?
Let's set the offset of the first bracelet as\(\theta\)and\(0<=\theta<=n-1\)
Then it becomes\(\sum{x_{n-i+1+\theta}y_i}\)
Ask for sum\(n+1<=n+\theta+1<=2n\)
Just find the largest coefficient
Code
#include<bits/stdc++.h>
using namespace std;
typedef double db;
typedef long long ll;
typedef complex<db> com;
const int N=300009;
const db pi=3.1415926535897932385626;
int rev[N],len;
ll asum,bsum,asq,bsq,ans=LONG_LONG_MAX;
void FFT(com *x,db op){
for(int i=0;i<len;i++)if(i<rev[i])swap(x[i],x[rev[i]]);
for(int l=2;l<=len;l<<=1){
com rt(cos(2.0*pi/(db)l),op*sin(2.0*pi/(db)l));
for(int i=0;i<len;i+=l){
com w(1,0);
for(int r=i;r<i+l/2;r++){
com u=x[r],v=w*x[r+l/2];
x[r]=u+v,x[r+l/2]=u-v;
w*=rt;
}
}
}
if(op<0)for(int i=0;i<len;i++)x[i]/=(db)len;
}
com a[N],b[N];
int main(){
int n,m,tmp;
scanf("%d%d",&n,&m);
for(int i=0;i<n;i++)scanf("%d",&tmp),a[n-1-i].real(tmp),a[n+n-1-i].real(tmp),asum+=tmp,asq+=tmp*tmp;
for(int j=0;j<n;j++)scanf("%d",&tmp),b[j].real(tmp),bsum+=tmp,bsq+=tmp*tmp;
len=1;
while(len<n+n+n-1)len<<=1;
for(int i=0;i<len;i++){
rev[i]=rev[i>>1]>>1;
if(i&1)rev[i]|=len>>1;
}
FFT(a,1.0),FFT(b,1.0);
for(int i=0;i<len;i++)a[i]*=b[i];
FFT(a,-1.0);
for(int i=n+1;i<=n+n;i++){
for(int c=-m;c<=m;c++){
//if(fabs(a[i].imag())>0.5)puts("Fuck!"),exit(0);
ans=min(ans,asq+bsq+n*c*c+2ll*asum*c-2ll*bsum*c-2ll*(ll)(a[i].real()+0.2));
}
}
printf("%lld",ans);
return 0;
}
NTT Part
Reason For NTT
\(FFT\)There is a problem with the algorithm: some questions need to be modulated, and\(FFT\)of\(double\)Obviously not able to do it
So can we find an algorithm that can get the modulus
\(NTT\)It's the solution to this problem
But there is a serious problem
that is\(NTT\)Not any modulus
The most commonly used prime numbers\(998244353\)
\(So\ Let's\ Begin\)
Start
Euler's theorem\(a^{\phi(k)}\equiv 1 \mod k\)
when\(k\)When it is a prime number\(a^{p-1}\equiv 1 \mod p\)It is the famous Fermat's theorem
Next we introduce the concept of original roots
Prime number \(i\)The original root of\(g_i\)
(This article only has convenient narrative and does not guarantee strict narrative)
What does this mean?
that is\(g_i\)All powers of all modes can be retrieved\(i\)The remainder of
And he is periodic
Apparently\(g_i^0=g_i^{i-1}=1\)
Knowable cycle\(C=i-1\)
This explains a question: Why must it be\(998244353\)Wait for a few limited numbers
First of all, this modulus must be a prime number, or it does not satisfy Fermat's theorem
Then,\(998244353=119×2^{23}+1\)
So the advantage of this modulus is that its cycle is\(q*2^k\)
NTT
Remember what I just said\(FFT\)Is it?
How do we do it?
We know he is\(A= xA_1+A_2\)Form of
Because we are the method of dividing and consolidation
So what we've actually put it into\(2^k\)indivual\(Value\)
And make sure he is cyclical
The corresponding to the previous level after square\(Value\)
We did it last time using plural numbers
But the root can be
Let's think about the unit amount
First we define\(w_i\)For one cycle\(i\)Units of
So the original simple root\(g_i\)The period is\(q*2^k\)Right now\(P-1\)
If you want to shorten its cycle, you must multiply
So we can easily know\(w_i=g^\frac{P-1}{i}\)
And obviously\(i\)for\(2^k\)If his cycle cannot be eliminated, it will definitely not work
Then there is nothing
Of course, your sequence length will be smaller than\(2^{23}\)Right now\(8e6\)of
So this modulus is basically enough
A Small Problem
Now there is only one question: how to find the original root?I won't tell you, I don't know
\(998244353,1004535809\)The roots are\(3\)
Implementation details
Our unit root and\(FFT\)Exactly the same
so\(INTT\), bit inversion, etc. are all exactly the same
so\(INTT\)Need to ride\(1/n\), unit elements also need to become inverse elements
Code [P1919]
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N=4000009;
const ll Mod=998244353,Root=3;
int len=1,rev[N];
ll Pow(ll d,ll x){
ll ans=1,tmp=d;
while(x){
if(x&1)ans=(ans*tmp)%Mod;
tmp=(tmp*tmp)%Mod;
x>>=1;
}
return ans;
}
ll Inv(ll u){return (Pow(u,Mod-2)+Mod)%Mod;}
void NTT(ll *x,bool I){
for(int i=0;i<len;i++)if(i<rev[i])swap(x[i],x[rev[i]]);
for(int l=2;l<=len;l<<=1){
ll rt=Pow(Root,(Mod-1)/l);
if(I)rt=Inv(rt);
for(int i=0;i<len;i+=l){
ll w=1;
for(int j=i;j<=i+l/2-1;j++){
ll u=x[j],v=w*x[j+l/2]%Mod;
x[j]=(u+v)%Mod,x[j+l/2]=(u-v+Mod)%Mod;
w=(w*rt)%Mod;
}
}
}
ll Inv_n=Inv(len);
if(I)for(int i=0;i<len;i++)x[i]=(x[i]*Inv_n)%Mod;
}
ll a[N],b[N];
int main(){
int la=-1,lb=-1;
char k=getchar();
while(!isdigit(k))k=getchar();
while(isdigit(k))a[++la]=k-'0',k=getchar();
while(!isdigit(k))k=getchar();
while(isdigit(k))b[++lb]=k-'0',k=getchar();
for(int i=0;i<la-i;i++)swap(a[i],a[la-i]);
for(int i=0;i<lb-i;i++)swap(b[i],b[lb-i]);
while(len<la+lb+1)len<<=1;
for(int i=0;i<len;i++){
rev[i]=rev[i>>1]>>1;
if(i&1)rev[i]|=len>>1;
}
NTT(a,0),NTT(b,0);
for(int i=0;i<len;i++)a[i]=(a[i]*b[i])%Mod;
NTT(a,1);
for(int i=0;i<la+lb;i++)a[i+1]+=a[i]/10,a[i]%=10;
int i=la+lb;
while(i>0&&a[i]==0)--i;
for(;i>=0;i--)printf("%lld",a[i]);
return 0;
}
FWT Part
Preface
Recommended cooperation in this partOI-WiKiuse
And referenced a lot of content
Reason For FWT
\(FFT\)and\(NTT\)It's already good to find polynomial multiplication
But let's write a form of polynomial multiplication
in\(C\)It's an answer array
\(A,B,C[i]\)It means that the number of times this polynomial is\(i\)coefficient of term
Sometimes the cancer question requires the following formula
Among them\(\oplus\)It may be with, or, xor.
This thing\(FWT\)Can do it
Start
Our overall idea is still a change\(A\) \(B\)The form of an array
Make it a weird way of representation
Then it can\(O(n)\)calculate
Then convert back
Remember this idea and will use it later
at the same time,\(FWT\)There is still a big difference from our first two algorithms, so don't force it.
\(FWT\)The algorithm has an answer\(C\)Array and three\(FWT\)Arrays (one for each ABC) (you can use their own arrays directly, and you don’t need to be really opened)
Written below\(FAT\) \(FBT\) \(FCT\)
If all three arrays are satisfied, write\(FWT\)
If it means\(ABC\)Write\(W\)
\(OK,Let's\ Go!\)
OR
What are we going to do now?
beg\(C\)Array.
\(C\)What does array satisfy?
\(C_i=\sum_{i=j|k}{A_jB_k}\) 。
Then ours\(FWT\)What do you do?
Walsh has defined it\(FWT\)Definition and calculation method
Right now\(FAT_i*FBT_i=FCT_i\)
Want to guarantee\(FWT_i=\sum_{j|i=i}{W_j}\)
Notice\(FWT\)Array and\(W\)The array is different!
We found\(FAT\) \(FBT\)When all satisfy the nature\(FCT\)Also satisfying nature
Copy a formula on OI-Wiki
Take a look at the last two steps
Compare\(C_i\)and\(FWT_i\)Formula
\(You\ Will\ Get\ It.\)
So let's see how to find a range\(FWT\)
No need to think about violence,\(O(n^2)\)
Obviously if there is only one interval\(FWT\)direct\(CV\)It's the original data
Obviously if there is only one interval\(FWT\)direct\(CV\)The original data is enough, not to elaborate on itBased on our study experience of T,We must divide and achieve\(n\log n\)of
\(FWT\)The fundamental idea is:
Since your operations are based on binary
Then I will use binary points to disassemble the binary bits of each subscript!
We make\(Q\)Indicates the current\(FWT\)The first half of the range,\(H\)Indicates the second half of the interval. (Qian Hou qwq)
Obviously, if you\(0\)If you start counting the subscript
\(Q\)The highest binary bits in the subscript are\(0\)
\(H\)The highest binary bits in the subscript are\(1\)
Note: I know what you guys are thinking, but the statement here is actually not rigorous, for example, our current interval\([2,3]\), the binary representation is\(10\)and\(11\), it may have the same prefix in front of it. But contact us\(FFT\)The algorithm of the algorithm, here we do not regard intervals as part of the original sequence, but as the complete intervals we want to find, and we already know that their left and right intervals are "alone"\(FWT\)We only need to merge them, and we can also define it as "remove the highest subscript binary bit after the prefix".
We found that the merged\(FWT\)Array,\(Q\)Some will not be affected later (because\(H\)There will be an additional subscript on some of the subscripts\(1\))
but\(H\)But the array needs to be added\(Q\)Array
Because this is\(OR\) !
How do you add it?Introduction to syntax loop structure, add it directly!
As for inversion, since his array is\(H\) \(H+Q\)
So directly\(Q-=H\)Just
Put a detailed comment version\(Oi-Wiki\)Code
void Or(ll *a, ll type) { // Iterative implementation, the constant is smaller
for (ll x = 2; x <= n; x <<= 1) {//Interval length
ll k = x >> 1;
for (ll i = 0; i < n; i += x) {//Enum the left endpoint of the interval
for (ll j = 0; j < k; j++) {//Enum every point in H
(a[i + j + k] += a[i + j] * type) %= P;
// When type is 1, it is FWT, -1 is IFWT
}
}
}
}
Isn't it very simple? OK, let's continue!
AND
Wiki This section has not been verified
But we still have to do it briefly
According to common sense, Walsh will not easily change the basic definition
So we'll get the basic formula
Make another proof formula
We found that the and operation is also satisfactory\(a\&b=a,a\&c=a \to a\&(b\&c)=a\)of
So it can
No big problem
Next we will still use the original definition for analysis
Obviously, we found\(H\)Arrays are not affected\(Q\)The array affects
But we found\(Q\)Array needs to be added\(H\)Array
so
\(FWT:[Q+H]+[H]\)
\(IFWT:[Q-H]+[H]\)
\(OI-WiKi\ Std\)
void And(ll *a, ll type) {
for (ll x = 2; x <= n; x <<= 1) {
ll k = x >> 1;
for (ll i = 0; i < n; i += x) {
for (ll j = 0; j < k; j++) {
(a[i + j] += a[i + j + k] * type) %= P;
}
}
}
}
XOR
This is where the intensity lies
The basic formula is slightly different from the previous one
(The following author does not understand why, but he is a correct construct)
First set\(a \# b\)Indicate representation\(x\& y\)Binary representation\(1\)The parity of quantity
Specifically, odd time values are one and even time values are zero
Basic formula
Next, let's verify the correctness of the structure
As you can see, this formula doesn't look like it's for people, so let's explain it
Line 1: Bring in according to the basic formula
Line 2: Multiplication Distribution Law
Line 3: This step is quite disgusting, we essentially want to prove it\((i\#j)\oplus(i\#k)=i\#(j\oplus k)\)
Line 4: Definition bring in
Then let's take a look\((i\#j)\oplus(i\#k)=i\#(j\oplus k)\)
The question is: Is he right?
I don't know, either. . .
Let's illustrate
\(i=1101\ j=1001\ k=10\)
left\(=0\oplus0=0\)
right\(=1101\#0=0\)
OK, that's it
Next, let's take a look at this\(FWT\)How to ask
First of all, the division is certain
Then we consider\(Q\)and\(H\)What's the array turned into
See ours first\(Q\)Array
First of all, he's his original one\(Value\)Can be used
Secondly, let's consider the other side\(H\)Array
We do not consider their prefix every time
so what?
So the right position corresponds to his position\(H\)He's doing this in the array\(FWT\)His way was the same as on the left
The current first position on the left is zero
It has no effect on end
So the left side should be\([Q+H]\)
What about the second half?
The left side can be added directly to the right side because the highest bit is zero.
On the right\(H\)Because the highest position is filled with one, everything is reversed
\(SO\)
\([Q+H]+[Q-H]\)
The reverse operation is also very simple
\([(Q+H)/2]+[(Q-H)/2]\)Isn’t this just a joke?
\(Code\ From\ Wiki\)
void Xor(ll *a, ll type) {
for (ll x = 2; x <= n; x <<= 1) {
ll k = x >> 1;
for (ll i = 0; i < n; i += x) {
for (ll j = 0; j < k; j++) {
(a[i + j] += a[i + j + k]) %= P;
(a[i + j + k] = a[i + j] - a[i + j + k] * 2) %= P;
(a[i + j] *= type) %= P;
(a[i + j + k] *= type) %= P;
}
}
}
}
Now you can have a purple questionP4717It's
Tips
- It is recommended to follow\(WiKi\)The template of the wasp, because if he disassembles the reverse operation separately, the number of possible functions may be too large
- \(FWT\)No binary reverse order is required! ! ! ! !
- This thing only adds and subtracts, so you can get the mold. How many molds do you like?
Code
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N=1000009;
const ll Mod=998244353,Inv=499122177;
int n;
ll a[N],b[N],ta[N],tb[N],ans[N];
void MUL(ll *a,const ll *b){
for(int i=0;i<n;i++)a[i]=(a[i]*b[i])%Mod;
}
void SHOW(const ll *x){
for(int i=0;i<n;i++)printf("%lld ",x[i]);
puts("");
}
void OR(const ll *_x,ll *x,ll type){
int tmp;
for(int i=0;i<n;i++)x[i]=_x[i];
for(int l=2;l<=n;l<<=1){
tmp=l>>1;
for(int i=0;i<n;i+=l)for(int j=i;j<i+tmp;j++)
x[j+tmp]=((x[j+tmp]+type*x[j])%Mod+Mod)%Mod;
}
}
void AND(const ll *_x,ll *x,ll type){
int tmp;
for(int i=0;i<n;i++)x[i]=_x[i];
for(int l=2;l<=n;l<<=1){
tmp=l>>1;
for(int i=0;i<n;i+=l)for(int j=i;j<i+tmp;j++)
x[j]=((x[j]+type*x[j+tmp])%Mod+Mod)%Mod;
}
}
void XOR(const ll *_x,ll *x,ll type){
int tmp;
ll Q,H;
for(int i=0;i<n;i++)x[i]=_x[i];
for(int l=2;l<=n;l<<=1){
tmp=l>>1;
for(int i=0;i<n;i+=l)for(int j=i;j<i+tmp;j++){
Q=x[j],H=x[j+tmp];
x[j]=(Q+H)%Mod*type%Mod;
x[j+tmp]=((Q-H)%Mod*type%Mod+Mod)%Mod;
}
}
}
int main(){
int tmp;
scanf("%d",&tmp),n=1<<tmp;
for(int i=0;i<n;i++)scanf("%lld",&a[i]);
for(int i=0;i<n;i++)scanf("%lld",&b[i]);
OR(a,ta,1),OR(b,tb,1),MUL(ta,tb),OR(ta,ans,-1),SHOW(ans);
AND(a,ta,1),AND(b,tb,1),MUL(ta,tb),AND(ta,ans,-1),SHOW(ans);
XOR(a,ta,1),XOR(b,tb,1),MUL(ta,tb),XOR(ta,ans,Inv),SHOW(ans);
return 0;
}