机器学习之特征工程完整代码(python之sklearn实现)-USB迷|专注于互联网分享

机器学习之特征工程完整代码(python之sklearn实现)

2024年3月21日发(作者：赤昆鹏)

六六DATA

机器学习之特征工程完整代码(python之sklearn实现)

#读取数据文件

import pandas as pd

import numpy as np

X = _clipboard(header=0)

y = _clipboard(sep="t",header=0)

()

-----------------------------------------------------------

######特征工程######

#数据转换在excel中已做

##独热编码

X = _clipboard(sep="t",header=0)

# be()

from cessing import OneHotEncoder

ohe = OneHotEncoder()

---------------------

#输出独热编码稠密矩阵

ohe_res=_transform(X).toarray()

ohe_res=ame(ohe_res)

ohe__clipboard(sep="t",header=0)

---------------------

#输出独热编码稀疏矩阵

ohe_res=_transform(X).toarray()

ohe_res=ame(ohe_res)

ohe__clipboard(sep="t",header=0)

-----------------------------------------------------------

##标准化（无量纲化）

from cessing import StandardScaler

sds_res=StandardScaler().fit_transform(X)

sds_res=ame(sds_res)

六六DATA

sds__clipboard(sep="t")

sds_()

-----------------------------------------------------------

######特征选择：方差选择法

from e_selection import VarianceThreshold

#方差选择法，返回值为特征选择后的数据

#参数threshold为方差的阈值

X_new=VarianceThreshold(threshold=1).fit_transform(X)

---------------------

###相关系数法

from e_selection import SelectKBest

from import pearsonr

#选择K个最好的特征，返回选择特征后的数据

#第一个参数为计算评估特征是否好的函数，该函数输入特征矩阵和目标向量，输出二元组

（评分，P值）的数组，数组第i项为第i个特征的评分和P值。在此定义为计算相关系

数

#参数k为选择的特征个数

SelectKBest(lambda X, Y: array(map(lambda x:pearsonr(x, Y), X.T)).T, k=

2).fit_transform(, )

---------------------

###MINE法

from e_selection import SelectKBest

from minepy import MINE

#由于MINE的设计不是函数式的，定义mic方法将其为函数式的，返回一个二元组，二

元组的第2项设置成固定的P值0.5

def mic(x, y):

m = MINE()

e_score(x, y)

return ((), 0.5)

#选择K个最好的特征，返回特征选择后的数据

SelectKBest(lambda X, Y: array(map(lambda x:mic(x, Y), X.T)).T, k=2).fi

t_transform(, )

---------------------

###递归特征消除法

2024年3月21日发(作者：赤昆鹏)

六六DATA

机器学习之特征工程完整代码(python之sklearn实现)

#读取数据文件

import pandas as pd

import numpy as np

X = _clipboard(header=0)

y = _clipboard(sep="t",header=0)

()

-----------------------------------------------------------

######特征工程######

#数据转换在excel中已做

##独热编码

X = _clipboard(sep="t",header=0)

# be()

from cessing import OneHotEncoder

ohe = OneHotEncoder()

---------------------

#输出独热编码稠密矩阵

ohe_res=_transform(X).toarray()

ohe_res=ame(ohe_res)

ohe__clipboard(sep="t",header=0)

---------------------

#输出独热编码稀疏矩阵

ohe_res=_transform(X).toarray()

ohe_res=ame(ohe_res)

ohe__clipboard(sep="t",header=0)

-----------------------------------------------------------

##标准化（无量纲化）

from cessing import StandardScaler

sds_res=StandardScaler().fit_transform(X)

sds_res=ame(sds_res)

六六DATA

sds__clipboard(sep="t")

sds_()

-----------------------------------------------------------

######特征选择：方差选择法

from e_selection import VarianceThreshold

#方差选择法，返回值为特征选择后的数据

#参数threshold为方差的阈值

X_new=VarianceThreshold(threshold=1).fit_transform(X)

---------------------

###相关系数法

from e_selection import SelectKBest

from import pearsonr

#选择K个最好的特征，返回选择特征后的数据

#第一个参数为计算评估特征是否好的函数，该函数输入特征矩阵和目标向量，输出二元组

（评分，P值）的数组，数组第i项为第i个特征的评分和P值。在此定义为计算相关系

数

#参数k为选择的特征个数

SelectKBest(lambda X, Y: array(map(lambda x:pearsonr(x, Y), X.T)).T, k=

2).fit_transform(, )

---------------------

###MINE法

from e_selection import SelectKBest

from minepy import MINE

#由于MINE的设计不是函数式的，定义mic方法将其为函数式的，返回一个二元组，二

元组的第2项设置成固定的P值0.5

def mic(x, y):

m = MINE()

e_score(x, y)

return ((), 0.5)

#选择K个最好的特征，返回特征选择后的数据

SelectKBest(lambda X, Y: array(map(lambda x:mic(x, Y), X.T)).T, k=2).fi

t_transform(, )

---------------------

###递归特征消除法

USB迷 | 专注于互联网分享

机器学习之特征工程完整代码(python之sklearn实现)

与本文相关的文章

评论列表 (0)