机器学习 | 决策树

hizengzeng 发布于2019-06-26 20:03 / 3514人阅读

摘要：由于近期学业繁重，所以我就不说废话了，直接上代码简单的决策树示例运行效果代码定义文本框和箭头格式画树使用文本注解绘制树节点绘制带箭头的注解在父子节点间填充文本信息创建数据集计算给定数据的香农熵熵值越高，混合的数据越多，越无序我们可

由于近期学业繁重QAQ，所以我就不说废话了，直接上代码~

简单的决策树示例 运行效果

代码

from math import log
import operator
import matplotlib.pyplot as plt

#定义文本框和箭头格式
decisionNode=dict(boxstyle="sawtooth",fc="0.8")
leafNode=dict(boxstyle="round4",fc="0.8")
arrow_args=dict(arrowstyle="<-")

#画树

#使用文本注解绘制树节点
#绘制带箭头的注解
def plotNode(nodeTxt,centerPt,parentPt,nodeType):
    createPlot.ax1.annotate(nodeTxt,xy=parentPt,
    xycoords="axes fraction",
    xytext=centerPt,textcoords="axes fraction",
    va="center",ha="center",bbox=nodeType,
    arrowprops=arrow_args)
    
#在父子节点间填充文本信息
def plotMidText(cntrPt,parentPt,txtString):
    xMid=(parentPt[0]-cntrPt[0])/2.0+cntrPt[0]
    yMid=(parentPt[1]-cntrPt[1])/2.0+cntrPt[1]
    createPlot.ax1.text(xMid,yMid,txtString)
    
def plotTree(myTree,parentPt,nodeTxt):
    numLeafs=getNumLeafs(myTree)
    depth=getTreeDepth(myTree)
    firstStr=list(myTree.keys())[0]
    cntrPt=(plotTree.xOff+(1.0+float(numLeafs))/2.0/plotTree.totalW,
    plotTree.yOff)
    plotMidText(cntrPt,parentPt,nodeTxt)
    plotNode(firstStr,cntrPt,parentPt,decisionNode)
    secondDict=myTree[firstStr]
    plotTree.yOff=plotTree.yOff-1.0/plotTree.totalD
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=="dict":
            plotTree(secondDict[key],cntrPt,str(key))
        else:
            plotTree.xOff=plotTree.xOff+1.0/plotTree.totalW
            plotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),
            cntrPt,leafNode)
            plotMidText((plotTree.xOff,plotTree.yOff),
            cntrPt,str(key))
    plotTree.yOff=plotTree.yOff+1.0/plotTree.totalD
    
    
def createPlot(inTree):
    fig=plt.figure(1,facecolor="white")
    fig.clf()
    axprops=dict(xticks=[],yticks=[])
    createPlot.ax1=plt.subplot(111,frameon=False,**axprops)
    plotTree.totalW=float(getNumLeafs(inTree))
    plotTree.totalD=float(getNumLeafs(inTree))
    plotTree.xOff=-0.5/plotTree.totalW;plotTree.yOff=1.0;
    plotTree(inTree,(0.5,1.0),"")
    plt.show()

    
    


#创建数据集
def createDataSet():
    dataSet=[[1,1,"yes"],
            [1,1,"yes"],
            [1,0,"no"],
            [0,1,"no"],
            [0,1,"no"]]
    labels=["no surfacing","flippers"]
    return dataSet,labels

#计算给定数据的香农熵
#熵值越高，混合的数据越多，越无序
#我们可以在数据集中添加更多的分类
def calcShannonEnt(dataSet):
    numEntries=len(dataSet)
    #数据字典，键值为最后一列的数值"yes"or"no"
    labelCounts={}
    for featVec in dataSet:
        #为所有可能分类创建字典
        #"yes"or"no"
        currentLabel=featVec[-1]
        if currentLabel not in labelCounts.keys():
            labelCounts[currentLabel]=0
        labelCounts[currentLabel]+=1
    shannonEnt=0.0
    for key in labelCounts:
        prob=float(labelCounts[key])/numEntries
        #以2为㡳求对数
        shannonEnt-=prob*log(prob,2)
    return shannonEnt
    
    
#按照给定特征划分数据集    
#输入的参数为：待划分的数据集，
#划分数据集的特征(第几列)，
#特征的返回值(这一列的值为多少)
#返回的是符合这一列的值的每一行，
#并且将这一列的数据去掉了
def splitDataSet(dataSet,axis,value):
    retDataSet=[]
    #遍历整个数据集
    #featVec：[1, 1, "yes"]
    for featVec in dataSet:
        #print("featVec:")
        #print(featVec)
        #抽取其中符合特征的
        #featVec[axis]表示[1, 1, "yes"]中的第axis+1个
        if featVec[axis]==value:
            #保存这一列前面的数据
            reducedFeatVec=featVec[:axis]
            #print("reducedFeatVec:")
            #print(reducedFeatVec)
            #保存这一列后面的数据
            reducedFeatVec.extend(featVec[axis+1:])
            #print("reducedFeatVec:")
            #print(reducedFeatVec)
            retDataSet.append(reducedFeatVec)
    #print("retDataSet:")
    #print(retDataSet)
    return retDataSet
        

#选择最好的数据集划分方式
def chooseBestFeatureToSplit(dataSet):
    #numFeatures：2
    numFeatures=len(dataSet[0])-1
    #计算香农熵
    baseEntropy=calcShannonEnt(dataSet)
    bestInfoGain=0.0
    bestFeature=-1
    #i：0,1
    for i in range(numFeatures):
        #取出dataSet的第i列
        featList=[example[i] for example in dataSet]
        #print("featList:")
        #print(featList)
        #弄成一个set，去掉其中相同的元素
        uniqueVals=set(featList)
        #print("uniqueVals:")
        #print(uniqueVals)
        newEntropy=0.0
        for value in uniqueVals:
            #按照第i列，值为value的去划分
            subDataSet=splitDataSet(dataSet,i,value)
            prob=len(subDataSet)/float(len(dataSet))
            #计算划分后的熵值
            newEntropy+=prob*calcShannonEnt(subDataSet)
        infoGain=baseEntropy-newEntropy
        #判断是否更优
        if(infoGain>bestInfoGain):
            bestInfoGain=infoGain
            bestFeature=i
    #返回划分的最优类别
    #表示按照第i列去划分
    return bestFeature

#传入的是分类名称的列表    
#返回出现次数最多的分类的名称
def majorityCnt(classList):
    #创建字典，键值为classList中唯一值
    #字典的值为classList中每隔标签出现的频率
    classCount={}
    for vote in classList:
        if vote not in classCount.keys():
            classCount[vote]=0
        classCount[vote]+=1
    #按照字典值的顺序从大到小排序
    sortedClassCount=sorted(classCount,iteritems(),
    key=operator.itemgetter(1),reverse=True)
    #返回出现次数最多的分类的名称
    return sortedClassCount[0][0]

#创建树    
#传入参数为数据集与标签列表
def createTree(dataSet,labels):
    #得到分类名称的标签"yes"or"no"
    #["yes", "yes", "no", "no", "no"]
    classList=[example[-1] for example in dataSet]
    #print("classList:")
    #print(classList)
    #递归结束的第一个条件
    #所有的类标签完全相同
    if classList.count(classList[0])==len(classList):
        return classList[0]
    #递归结束的第二个条件
    #使用完了所有的特征，仍然不能将数
    #据集划分成仅包含唯一类别的分组
    #此时无法简单地返回唯一的类标签，
    #直接返回出现次数最多的类标签
    if len(dataSet[0])==1:
        return majorityCnt(classList)
        
    #bestFeat是最好的划分方式对应的列的下标    
    bestFeat=chooseBestFeatureToSplit(dataSet)
    #labels中这一列信息对应的类别名称
    bestFeatLabel=labels[bestFeat]
    #树
    myTree={bestFeatLabel:{}}
    #将labels中的这一类别delete
    del(labels[bestFeat])
    #这一类别对应的列的值
    featValues=[example[bestFeat] for example in dataSet]
    #print("featValues:")
    #print(featValues)
    #set 去掉列中相同的值
    uniqueVals=set(featValues)
    for value in uniqueVals:
        #去掉最优类别后剩下的类别
        subLabels=labels[:]
        #print("subLabels:")
        #print(subLabels)
        #print("bestFeatLabel:")
        #print(bestFeatLabel)
        #print(value)
        #myTree["no surfacing"][0]
        #myTree["no surfacing"][2]
        #......
        myTree[bestFeatLabel][value]=createTree(
        #按照第bestFeat列，值为value的去划分
        splitDataSet(dataSet,bestFeat,value),subLabels)
    return myTree
    
#获取叶节点的数目
def getNumLeafs(myTree):
    numLeafs=0
    firstStr=list(myTree.keys())[0]
    secondDir=myTree[firstStr]
    for key in secondDir.keys():
        #子节点为字典类型，则该结点也是一个判断结点
        #需要递归调用getNumLeafs函数
        if type(secondDir[key]).__name__=="dict":
            numLeafs+=getNumLeafs(secondDir[key])
        #该结点为叶子节点，叶子数+1
        else:
            numLeafs+=1
    return numLeafs
    
#获取树的层数
def getTreeDepth(myTree):
    maxDepth=0
    firstStr=list(myTree.keys())[0]
    secondDict=myTree[firstStr]
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=="dict":
            thisDepth=1+getTreeDepth(secondDict[key])
        else:
            thisDepth=1
        if thisDepth>maxDepth:maxDepth=thisDepth
    return maxDepth

        
        
    
def main():
    dataSet,labels=createDataSet()
    chooseBestFeatureToSplit(dataSet)
    #{"no surfacing": {0: "no", 1: {"flippers": {0: "no", 1: "yes"}}}}
    myTree=createTree(dataSet,labels)
    print("myTree:")
    print(myTree)
    createPlot(myTree)
    #i=getNumLeafs(myTree)
    #print(i)
    #i=getTreeDepth(myTree)
    #print(i)
    #i=chooseBestFeatureToSplit(dataSet)
    #print(i)
    #shannonEnt=calcShannonEnt(dataSet)
    #print(shannonEnt)
    #增加一个类别后再测试信息熵，发现熵值增大
    #dataSet[0][-1]="maybe"
    #shannonEnt=calcShannonEnt(dataSet)
    #print(shannonEnt)
    #retDataSet=splitDataSet(dataSet,0,1)
    #print("retDataSet:")
    #print(retDataSet)
    #retDataSet=splitDataSet(dataSet,0,0)
    #print("retDataSet:")
    #print(retDataSet)
    
    
    
if __name__=="__main__":
    main()

使用决策树预测隐形眼镜类型 运行效果

代码

from math import log
import operator
import matplotlib.pyplot as plt

#定义文本框和箭头格式
decisionNode=dict(boxstyle="sawtooth",fc="0.8")
leafNode=dict(boxstyle="round4",fc="0.8")
arrow_args=dict(arrowstyle="<-")

#画树

#使用文本注解绘制树节点
#绘制带箭头的注解
def plotNode(nodeTxt,centerPt,parentPt,nodeType):
    createPlot.ax1.annotate(nodeTxt,xy=parentPt,
    xycoords="axes fraction",
    xytext=centerPt,textcoords="axes fraction",
    va="center",ha="center",bbox=nodeType,
    arrowprops=arrow_args)
    
#在父子节点间填充文本信息
def plotMidText(cntrPt,parentPt,txtString):
    xMid=(parentPt[0]-cntrPt[0])/2.0+cntrPt[0]
    yMid=(parentPt[1]-cntrPt[1])/2.0+cntrPt[1]
    createPlot.ax1.text(xMid,yMid,txtString)
    
def plotTree(myTree,parentPt,nodeTxt):
    numLeafs=getNumLeafs(myTree)
    depth=getTreeDepth(myTree)
    firstStr=list(myTree.keys())[0]
    cntrPt=(plotTree.xOff+(1.0+float(numLeafs))/2.0/plotTree.totalW,
    plotTree.yOff)
    plotMidText(cntrPt,parentPt,nodeTxt)
    plotNode(firstStr,cntrPt,parentPt,decisionNode)
    secondDict=myTree[firstStr]
    plotTree.yOff=plotTree.yOff-1.0/plotTree.totalD
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=="dict":
            plotTree(secondDict[key],cntrPt,str(key))
        else:
            plotTree.xOff=plotTree.xOff+1.0/plotTree.totalW
            plotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),
            cntrPt,leafNode)
            plotMidText((plotTree.xOff,plotTree.yOff),
            cntrPt,str(key))
    plotTree.yOff=plotTree.yOff+1.0/plotTree.totalD
    
    
def createPlot(inTree):
    fig=plt.figure(1,facecolor="white")
    fig.clf()
    axprops=dict(xticks=[],yticks=[])
    createPlot.ax1=plt.subplot(111,frameon=False,**axprops)
    plotTree.totalW=float(getNumLeafs(inTree))
    plotTree.totalD=float(getNumLeafs(inTree))
    plotTree.xOff=-0.5/plotTree.totalW;plotTree.yOff=1.0;
    plotTree(inTree,(0.5,1.0),"")
    plt.show()

    
    


#创建数据集
def createDataSet():
    dataSet=[[1,1,"yes"],
            [1,1,"yes"],
            [1,0,"no"],
            [0,1,"no"],
            [0,1,"no"]]
    labels=["no surfacing","flippers"]
    return dataSet,labels

#计算给定数据的香农熵
#熵值越高，混合的数据越多，越无序
#我们可以在数据集中添加更多的分类
def calcShannonEnt(dataSet):
    numEntries=len(dataSet)
    #数据字典，键值为最后一列的数值"yes"or"no"
    labelCounts={}
    for featVec in dataSet:
        #为所有可能分类创建字典
        #"yes"or"no"
        currentLabel=featVec[-1]
        if currentLabel not in labelCounts.keys():
            labelCounts[currentLabel]=0
        labelCounts[currentLabel]+=1
    shannonEnt=0.0
    for key in labelCounts:
        prob=float(labelCounts[key])/numEntries
        #以2为㡳求对数
        shannonEnt-=prob*log(prob,2)
    return shannonEnt
    
    
#按照给定特征划分数据集    
#输入的参数为：待划分的数据集，
#划分数据集的特征(第几列)，
#特征的返回值(这一列的值为多少)
#返回的是符合这一列的值的每一行，
#并且将这一列的数据去掉了
def splitDataSet(dataSet,axis,value):
    retDataSet=[]
    #遍历整个数据集
    #featVec：[1, 1, "yes"]
    for featVec in dataSet:
        #print("featVec:")
        #print(featVec)
        #抽取其中符合特征的
        #featVec[axis]表示[1, 1, "yes"]中的第axis+1个
        if featVec[axis]==value:
            #保存这一列前面的数据
            reducedFeatVec=featVec[:axis]
            #print("reducedFeatVec:")
            #print(reducedFeatVec)
            #保存这一列后面的数据
            reducedFeatVec.extend(featVec[axis+1:])
            #print("reducedFeatVec:")
            #print(reducedFeatVec)
            retDataSet.append(reducedFeatVec)
    #print("retDataSet:")
    #print(retDataSet)
    return retDataSet
        

#选择最好的数据集划分方式
def chooseBestFeatureToSplit(dataSet):
    #numFeatures：2
    numFeatures=len(dataSet[0])-1
    #计算香农熵
    baseEntropy=calcShannonEnt(dataSet)
    bestInfoGain=0.0
    bestFeature=-1
    #i：0,1
    for i in range(numFeatures):
        #取出dataSet的第i列
        featList=[example[i] for example in dataSet]
        #print("featList:")
        #print(featList)
        #弄成一个set，去掉其中相同的元素
        uniqueVals=set(featList)
        #print("uniqueVals:")
        #print(uniqueVals)
        newEntropy=0.0
        for value in uniqueVals:
            #按照第i列，值为value的去划分
            subDataSet=splitDataSet(dataSet,i,value)
            prob=len(subDataSet)/float(len(dataSet))
            #计算划分后的熵值
            newEntropy+=prob*calcShannonEnt(subDataSet)
        infoGain=baseEntropy-newEntropy
        #判断是否更优
        if(infoGain>bestInfoGain):
            bestInfoGain=infoGain
            bestFeature=i
    #返回划分的最优类别
    #表示按照第i列去划分
    return bestFeature

#传入的是分类名称的列表    
#返回出现次数最多的分类的名称
def majorityCnt(classList):
    #创建字典，键值为classList中唯一值
    #字典的值为classList中每隔标签出现的频率
    classCount={}
    for vote in classList:
        if vote not in classCount.keys():
            classCount[vote]=0
        classCount[vote]+=1
    #按照字典值的顺序从大到小排序
    sortedClassCount=sorted(classCount,iteritems(),
    key=operator.itemgetter(1),reverse=True)
    #返回出现次数最多的分类的名称
    return sortedClassCount[0][0]

#创建树    
#传入参数为数据集与标签列表
def createTree(dataSet,labels):
    #得到分类名称的标签"yes"or"no"
    #["yes", "yes", "no", "no", "no"]
    classList=[example[-1] for example in dataSet]
    #print("classList:")
    #print(classList)
    #递归结束的第一个条件
    #所有的类标签完全相同
    if classList.count(classList[0])==len(classList):
        return classList[0]
    #递归结束的第二个条件
    #使用完了所有的特征，仍然不能将数
    #据集划分成仅包含唯一类别的分组
    #此时无法简单地返回唯一的类标签，
    #直接返回出现次数最多的类标签
    if len(dataSet[0])==1:
        return majorityCnt(classList)
        
    #bestFeat是最好的划分方式对应的列的下标    
    bestFeat=chooseBestFeatureToSplit(dataSet)
    #labels中这一列信息对应的类别名称
    bestFeatLabel=labels[bestFeat]
    #树
    myTree={bestFeatLabel:{}}
    #将labels中的这一类别delete
    del(labels[bestFeat])
    #这一类别对应的列的值
    featValues=[example[bestFeat] for example in dataSet]
    #print("featValues:")
    #print(featValues)
    #set 去掉列中相同的值
    uniqueVals=set(featValues)
    for value in uniqueVals:
        #去掉最优类别后剩下的类别
        subLabels=labels[:]
        #print("subLabels:")
        #print(subLabels)
        #print("bestFeatLabel:")
        #print(bestFeatLabel)
        #print(value)
        #myTree["no surfacing"][0]
        #myTree["no surfacing"][4]
        #......
        myTree[bestFeatLabel][value]=createTree(
        #按照第bestFeat列，值为value的去划分
        splitDataSet(dataSet,bestFeat,value),subLabels)
    return myTree
    
#获取叶节点的数目
def getNumLeafs(myTree):
    numLeafs=0
    firstStr=list(myTree.keys())[0]
    secondDir=myTree[firstStr]
    for key in secondDir.keys():
        #子节点为字典类型，则该结点也是一个判断结点
        #需要递归调用getNumLeafs函数
        if type(secondDir[key]).__name__=="dict":
            numLeafs+=getNumLeafs(secondDir[key])
        #该结点为叶子节点，叶子数+1
        else:
            numLeafs+=1
    return numLeafs
    
#获取树的层数
def getTreeDepth(myTree):
    maxDepth=0
    firstStr=list(myTree.keys())[0]
    secondDict=myTree[firstStr]
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=="dict":
            thisDepth=1+getTreeDepth(secondDict[key])
        else:
            thisDepth=1
        if thisDepth>maxDepth:maxDepth=thisDepth
    return maxDepth

#使用决策树的分类函数
def classify(inputTree,featLabels,testVec):
    firstStr=list(inputTree.keys())[0]
    secondDict=inputTree[firstStr]
    #将标签字符串转换为索引
    featIndex=featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex]==key:
            if type(secondDict[key]).__name__=="dict":
                classLabel=classify(secondDict[key],featLabels,testVec)
            else:
                classLabel=secondDict[key]
    return classLabel
        
#使用pickle模块存储决策树
def storeTree(inputTree,filename):
    import pickle
    fw=open(filename,"wb")
    pickle.dump(inputTree,fw)
    fw.close()
    
#使用pickle模块加载树    
def grabTree(filename):
    import pickle
    fr=open(filename,"rb")
    return pickle.load(fr)
        

#使用决策树预测隐形眼镜类型    
def predictTypes():
    fr=open("lenses.txt")
    #[["young", "myope", "no", "reduced", "no lenses"], ...]
    lenses=[inst.strip().split("	") for inst in fr.readlines()]
    #print(lenses)
    #标签
    lensesLabels=["age","prescript","astigmatic","tearRate"]
    #创建决策树
    lensesTree=createTree(lenses,lensesLabels)
    print(lensesTree)
    #画树
    createPlot(lensesTree)

def main():
    predictTypes()
 
    #dataSet,labels=createDataSet()
    #print(labels)
    #chooseBestFeatureToSplit(dataSet)
    #{"no surfacing": {0: "no", 1: {"flippers": {0: "no", 1: "yes"}}}}
    #myTree=createTree(dataSet,labels)
    #storeTree(myTree,"classifierStorage.txt")
    #Tree=grabTree("classifierStorage.txt")
    #print(Tree)
    #createPlot(Tree)
    
    #print("myTree:")
    #print(myTree)
    #createPlot(myTree)
    #labels2=["no surfacing", "flippers"]
    #i=classify(myTree,labels2,[1,1])
    #print(i)
    #i=getNumLeafs(myTree)
    #print(i)
    #i=getTreeDepth(myTree)
    #print(i)
    #i=chooseBestFeatureToSplit(dataSet)
    #print(i)
    #shannonEnt=calcShannonEnt(dataSet)
    #print(shannonEnt)
    #增加一个类别后再测试信息熵，发现熵值增大
    #dataSet[0][-1]="maybe"
    #shannonEnt=calcShannonEnt(dataSet)
    #print(shannonEnt)
    #retDataSet=splitDataSet(dataSet,0,1)
    #print("retDataSet:")
    #print(retDataSet)
    #retDataSet=splitDataSet(dataSet,0,0)
    #print("retDataSet:")
    #print(retDataSet)
    
    
    
if __name__=="__main__":
    main()

数据

lenses.txt文件内容如下

young    myope    no    reduced    no lenses
young    myope    no    normal    soft
young    myope    yes    reduced    no lenses
young    myope    yes    normal    hard
young    hyper    no    reduced    no lenses
young    hyper    no    normal    soft
young    hyper    yes    reduced    no lenses
young    hyper    yes    normal    hard
pre    myope    no    reduced    no lenses
pre    myope    no    normal    soft
pre    myope    yes    reduced    no lenses
pre    myope    yes    normal    hard
pre    hyper    no    reduced    no lenses
pre    hyper    no    normal    soft
pre    hyper    yes    reduced    no lenses
pre    hyper    yes    normal    no lenses
presbyopic    myope    no    reduced    no lenses
presbyopic    myope    no    normal    no lenses
presbyopic    myope    yes    reduced    no lenses
presbyopic    myope    yes    normal    hard
presbyopic    hyper    no    reduced    no lenses
presbyopic    hyper    no    normal    soft
presbyopic    hyper    yes    reduced    no lenses
presbyopic    hyper    yes    normal    no lenses

idc机房托管专线服务机器学习决策树决策树决策树python 决策树图像识别

文章版权归作者所有，未经允许请勿转载,若此文章存在违规行为，您可以联系管理员删除。

转载请注明本文地址：https://www.ucloud.cn/yun/20542.html

机器学习之决策树算法

摘要：决策树机器学习中，决策树是一个预测模型他代表的是对象属性与对象值之间的一种映射关系。从数据产生决策树的机器学习技术叫做决策树学习通俗说就是决策树。剪枝剪枝是决策树学习算法中对付过拟合的主要手段。决策树(decision tree) 机器学习中，决策树是一个预测模型；他代表的是对象属性与对象值之间的一种映射关系。树中每个节点表示某个对象，而每个分叉路径则代表的某个可能的属性值，而每个叶...

raise_yang 2019-06-26 15:41 评论0 收藏0
机器学习A-Z～决策树与随机森林

摘要：它是一个分类和回归的决策树。代码实现由于这次决策树算法，我们没有使用欧式距离，也就是说可以不用进行特征缩放。分类器改成决策树算法的。通过结果观察，这里使用随机森林分类器是会出现过拟合的情况。以上，就是关于决策树和随机森林相关的基础知识。决策树有的人可能听过一个词：CART，这个代表的意思是Classification And Regression Tree。它是一个分类和回归的决策...

seasonley 2019-06-26 16:21 评论0 收藏0
机器学习从入门到放弃之决策树算法

摘要：总言言之，决策树第一个是需要从大量的已存在的样本中推出可供做决策的规则，同时，这个规则应该避免做无谓的损耗。算法原理构造决策树的关键步骤是分裂属性。这时分裂属性可能会遇到三种不同的情况对离散值生成非二叉决策树。对离散值生成二叉决策树。算法背景决策树故名思意是用于基于条件来做决策的，而它运行的逻辑相比一些复杂的算法更容易理解，只需按条件遍历树就可以了，需要花点心思的是理解如何建立决策...

mikyou 2019-07-25 10:33 评论0 收藏0
机器学习从入门到放弃之决策树算法

摘要：总言言之，决策树第一个是需要从大量的已存在的样本中推出可供做决策的规则，同时，这个规则应该避免做无谓的损耗。算法原理构造决策树的关键步骤是分裂属性。这时分裂属性可能会遇到三种不同的情况对离散值生成非二叉决策树。对离散值生成二叉决策树。算法背景决策树故名思意是用于基于条件来做决策的，而它运行的逻辑相比一些复杂的算法更容易理解，只需按条件遍历树就可以了，需要花点心思的是理解如何建立决策...

FingerLiu 2019-06-26 15:31 评论0 收藏0
机器学习算法经验总结

摘要：看到一篇很好的介绍机器学习算法的文章，转载过来，有这方面学习研究的朋友可以看看。算算时间，从开始到现在，做机器学习算法也将近八个月了。目前，机器学习的方法主要有三种监督学习半监督学习和无监督学习。看到一篇很好的介绍机器学习算法的文章，转载过来，有这方面学习、研究的朋友可以看看。算算时间，从开始到现在，做机器学习算法也将近八个月了。虽然还没有达到融会贯通的地步，但至少在熟悉了算法的流...

snowLu 2019-06-26 15:31 评论0 收藏0
机器学习--决策树--dot转存pdf

摘要：决策树分支转存写代码的方法今天是周日，我还在倒腾决策树，然后发现了一个不用装软件也能倒的方法，而且更简单。刚开始看视频的时候是看的的视频，讲的真差，太模糊了，不适合我。决策树分支dot转存pdf 1、写代码的方法今天是周日，我还在倒腾决策树，然后发现了一个不用装软件也能倒pdf的方法，而且更简单。参照了这个中文的文档实现：http://sklearn.apachecn.org/c....

Bryan 2019-07-31 11:16 评论0 收藏0