FFT review

线代课本 3.5 节是 FFT，好吧我也来 review 一下...

\(FF^* \to F^2 \to\) 对 DFT 转置 \(\to\) 交换正逆变换

\(F\boldsymbol{a}=(f(\omega^{1}_{n}), f(\omega^{2}_{n}),\cdots,f(\omega^{n}_{n}))^\intercal\)

\((F)_{nn}(F^*)_{nn} = nI\)，其中 \(F^*\) 为 \(F\) 的共轭转置

\(F_n\) 可以方便的由 \(F_{n/2}\) 得到

\(\omega^{2k}_{n}=\omega^{k}_{n/2}\) 提示将列进行奇偶分类

\[ \begin{pmatrix} 1 & 1 & \cdots & 1 \\ 1 & \omega^{1}_{n} & \cdots & \omega^{n-1}_{n} \\ 1 & \omega^{2}_{n} & \cdots & \omega^{2(n-1)}_{n} \\ \vdots & \vdots & \ddots & \vdots \\ 1 & \omega^{n-1}_{n} & \cdots & \omega^{(n-1)^2}_{n} \end{pmatrix} \Rightarrow \begin{pmatrix} 1 & 1 & \cdots & 1 & 1 & \cdots \\ 1 & \omega^{1}_{n/2} & \cdots & \omega^{0+1}_{n} & \omega^{2+1}_{n} & \cdots \\ 1 & \omega^{2}_{n/2} & \cdots & \omega^{0+2}_{n} & \omega^{4+2}_{n} & \cdots \\ \vdots & \vdots & & \vdots &\\ 1 & \omega^{n-1}_{n/2} & \cdots & \omega^{0+(n-1)}_{n} & \omega^{2(n-1)+(n-1)}_{n} & \cdots \end{pmatrix} \]

左上左下都是 \(F_{n/2}\)，右上右下还要额外在第 \(i\) 行上乘 \(\omega^{i}_{n}\)：

\[ F_{2n}= \begin{pmatrix} I_{n} & D_{n} \\ I_{n} & -D_{n} \end{pmatrix} \begin{pmatrix} F_{512} & \\ & F_{512} \end{pmatrix} P_{2n} \]

\(D_{n}\) 是将 \(\omega^{i}_{2n} (i\le n)\) 依次排列在对角线上得到的矩阵，\(P_{2n}\) 对应上文中的奇偶分类

不断展开 \(F_{n}\)，右边所有 \(P\) 的乘积即对应位反转操作（蝴蝶变换），左边是分治过程

单位根反演还告诉了我们 \(\sum\limits^{n-1}_{i=0} (\omega^{i}_{n})^k=n\) 当且仅当 \(n|k\) 时成立

借此观察 \(F^2\)：

\[ F^2= \begin{pmatrix} n & 0 & \cdots & 0 & 0 \\ 0 & 0 & \cdots & 0 & n \\ 0 & 0 & \cdots & n & 0 \\ \vdots & & \vdots & \vdots & \vdots \\ 0 & n & \cdots & 0 & 0 \end{pmatrix} \]

所以实现时可以统一使用 \(\omega^{i}_{n}\)

若要消去蝴蝶变换，根据 \(F_{2n}\) 的递归式对整个 DFT 使用转置而保留原本的 IDFT：

\[ F_{2n}^\intercal= P_{2n}^\intercal \begin{pmatrix} F_{512} & \\ & F_{512} \end{pmatrix} \begin{pmatrix} I_{n} & I_{n} \\ D_{n} & -D_{n} \end{pmatrix} \]

#define L (*l)
#define R (*r)
template<typename T> void DFT_transpose(T *f,const T *pr,const int x) {
	for(int len=x,hl=len>>1;hl;len=hl,hl>>=1)
		for(auto s=f;s!=f+x;s+=len)
			for(auto l=s,r=s+hl;l!=s+hl;l++,r++) {
				const T ps=L+R,ng=L-R,w=pr[(l-s)*x/len];
				L=ps;
				R=w*ng;
			}
}
template<typename T> void IDFT(T *f,const T *pr,const int x) {
	for(int len=x,hl=len>>1;hl;len=hl,hl>>=1)
		for(auto s=f;s!=f+x;s+=len)
			for(auto l=s,r=s+hl;l!=s+hl;l++,r++) {
				const T &w=pr[(l-s)*x/len];
				T tmp=w*R;
				R=L-tmp;
				L=L+tmp;
			}
}
#undef L
#undef R

若要将 \(O(n\log n)\) 次原根移动缩减为 \(O(n)\) 次，则我们希望每一层恰好使用原根数组的一个前缀，根据位反转的性质（长 \(2^n\) 的数组蝴蝶变换后，前 \(2^{n-x}\) 个是 \(2^x\) 的倍数，且 \(\omega_{2n}^{2i}=\omega_n^i\)），则我们希望单位根数组被蝴蝶变换掉：

观察知只需每个算法的开头进行蝴蝶变换并改变枚举区间大小的次序，例如 DIF 将会变为:

而因为对 \(\boldsymbol{a}\) 进行的是 \(F^2\) 故可以继续交换这两个算法，即先进行 DIT再进行 DIF，此时仍无需对原数组进行蝴蝶变换

用一个预处理好的足够大的单位根数组就可以完成所有操作

#define R1(i,n) for(int i=1,_=n;i<=_;++i)
#define R2(i,l,r) for(int i=l,_=r;i<=_;++i)
#define R3(i,a,b,_) for(auto i=a;i b;i _)
#define M5(a,b,c,d,e,...) e
#define do(a...) M5(a,R3,R2,R1) (a)

typedef long long ll;
typedef unsigned long long ull;

namespace AutoModSystem {
	typedef unsigned long long ull;
	typedef long double ld;
	
	#define N(x) (x<0?x+=mod():x)
	#define S(x) (x>=mod()?x-=mod():x)
	#define FROM(U) template<typename U> enable_if_t<is_convertible_v<U,num>,num>

	template<class T=int,T p=998244353> struct num {
		T x;
		static T _m;
		static constexpr bool Leq0=(p<=0);

		num(): x(0) {}
		num(T a): x(a) {}
		static void set(const T _p) {_m=_p;}
		static T mod() {
			if constexpr(Leq0) return _m;
			else return p;
		}

		num operator - () const {num x=mod()-*this;return S(x);}
		FROM(U) operator + (const U& rhs) const {return ano()+=rhs;}
		FROM(U) operator - (const U& rhs) const {return ano()-=rhs;}
		FROM(U) operator * (const U& rhs) const {return ano()*=rhs;}
		FROM(U) operator / (const U& rhs) const {return ano()/=rhs;}

		num& operator = (const num& rhs) {x=rhs.x;return *this;}
		num& operator += (const num& rhs) {return x+=rhs.x,S(x),*this;}
		num& operator -= (const num& rhs) {return x-=rhs.x,N(x),*this;}
		num& operator *= (const num& rhs) {
			if constexpr(is_same_v<T,long long>) {
				ull t=(ull)x*rhs.x-(ull)((ld)x/mod()*rhs.x+0.5L)*mod();
				x=(t<mod()?t:t+mod());
			} else x=1ll*x*rhs.x%mod();
			return *this;
		}
		num& operator /= (const num& rhs) {return *this*=rhs.inv();}
		
		num& operator ++ () {return x+=1,S(x),*this;}
		num& operator -- () {return x-=1,N(x),*this;}
		num operator ++ (int) {num t=*this;return ++*this,t;}
		num operator -- (int) {num t=*this;return --*this,t;}

		friend num operator + (const int lhs,const num& rhs) {return num(lhs)+=rhs;}
		friend num operator - (const int lhs,const num& rhs) {return num(lhs)-=rhs;}
		friend num operator * (const int lhs,const num& rhs) {return num(lhs)*=rhs;}
		friend num operator / (const int lhs,const num& rhs) {return num(lhs)/=rhs;}

		operator T () const {return x;}

		num ano() const {return *this;}
		num pow(ll b) const {
			num t=1,pw=x;
			while(b) {
				if(b&1) t*=pw;
				pw*=pw,b>>=1;
			}
			return t;
		}
		num inv() const {return pow(mod()-2);}
		static num sts(T x) {return N(x),S(x),num(x);}
		static num bts(ll x) {return x%=mod(),N(x),num(x);}
	};

	#undef N
	#undef S
	#undef FROM

	template<typename T,T p> T num<T,p>::_m=0;

	using mi=num<int,998244353>;
	template<int id> using _mint=num<int,id>;
	template<ll id> using _mll=num<ll,id>;
} using namespace AutoModSystem;

namespace Poly {
	template<typename T> void DIF(T *const f,const T *const pr,const int x) {
		for(int len=x,hl=len>>1;hl;len=hl,hl>>=1)
			for(auto s=f,w=pr;s!=f+x;s+=len,w++)
				for(auto l=s,r=s+hl;l!=s+hl;l++,r++) {
					T tmp=*w**r;
					*r=*l-tmp;
					*l=*l+tmp;
				}
	}
	template<typename T> void DIT(T *const f,const T *const pr,const int x) {
		for(int len=2,hl=1;len<=x;hl=len,len<<=1)
			for(auto s=f,w=pr;s!=f+x;s+=len,w++)
				for(auto l=s,r=s+hl;l!=s+hl;l++,r++) {
					const T ps=*l+*r,ng=*l-*r;
					*l=ps;
					*r=*w*ng;
				}
	}

	template<typename T,int pr> struct poly {
		vector<T> v;
		static T w[(1<<21)+1];

		static void pre(const int x=1<<21) {
			w[0]=1,w[x]=pr;
			do(i,x>>1,,>>=1) w[i]=w[i<<1]*w[i<<1];
			do(i,x-1) w[i]=w[i&(i-1)]*w[i&-i];
		}
		T operator [] (const int p) const {return p<v.size()?v[p]:0;}
		T& operator [] (const int p) {if(p>=v.size()) v.resize(p+1);return v[p];}
		poly& operator *= (const poly& rhs) {
			int n=deg()+rhs.deg(),x=1;
			for(;x<=n;x<<=1);
			poly buf=rhs.ano().cut(x);
			v.resize(x);
			DIF(data(),w,x),DIF(buf.data(),w,x);
			do(i,0,x-1) v[i]*=buf[i];
			DIT(data(),w,x);
			T invx=T(x).inv();
			do(i,0,x-1) v[i]*=invx;
			reverse(v.begin()+1,v.end());
			return cut(n+1);
		}

		int deg() const {return v.size()-1;}
		T* data() {return v.data();}
		poly ano() const {poly res=*this;return res;}
		poly& cut(const int x) {v.resize(x);return *this;}
	};

	template<typename T,int pr> T poly<T,pr>::w[(1<<21)+1];

	using gf=poly<mi,31>;
} using namespace Poly;