精易论坛

标题: [我的爬虫日记07]PHP爬取全国房价,最低鹤岗,最高北京! [打印本页]

作者: 易飞鸟    时间: 2024-11-10 18:14
标题: [我的爬虫日记07]PHP爬取全国房价,最低鹤岗,最高北京!
[PHP] 纯文本查看 复制代码
<?php
include_once "./lib/func.php";
include_once "./lib/PHPExcel.php";

# 取省内城市排行榜
function get_phb(string $province_id,string $province_name):array{

    log_print("开始获取{$province_name}...",true);

    $url = "https://www.58.com/fangjiawang/".$province_id;
    $headers = [
        "user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
    ];

    $ret_html = get_data($url,$headers);
    $left_phb = "房价排行榜";
    $right_phb = "</li></ul>";
    $phb_html = get_mid_text($ret_html,$left_phb,$right_phb);

    $left_city = "<b";
    $right_city = "em>";
    $city_html_arr = get_mid_text_batch($phb_html,$left_city,$right_city);

    $city = [
        "province_name" => "",
        "city_name" => "",
        "price"=>"",
        "up_and_down"=>""
    ];
    $city_list = [];
    foreach($city_html_arr as $val){
        $left_mark = ">";
        $right_mark = "<";
        $row_arr = get_mid_text_batch($val,$left_mark,$right_mark);
        $city["province_name"] = $province_name;
        $city["city_name"] = str_replace("房价", "", $row_arr[0]);;
        $city["price"] = $row_arr[2];
        $city["up_and_down"] = $row_arr[4];

        log_print($city["province_name"]."|".$city["city_name"]."|".$city["price"]."|".$city["up_and_down"],true);

        array_push($city_list,$city);
    }
    return $city_list;
}

# 去除汉字
function removeChinese(string $string):string {
    return trim(preg_replace('/[\x{4e00}-\x{9fff}]/u', '', $string));
}

# 取直辖市排行榜
function get_zxs_phb(string $city_id,string $city_name,string $province_name):array{

    log_print("开始获取{$city_name}...",true);

    $url = "https://www.58.com/fangjiawang/".$city_id;
    $headers = [
        "user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
    ];

    $ret_html = get_data($url,$headers);

    $left_zst = "二手房均价价格";
    $right_zst = "</div>";
    $zst_html = get_mid_text($ret_html,$left_zst,$right_zst);
    $zst_html = strip_tags($zst_html);
    $price = trim(get_left_text($zst_html,"元"));
    $up_down = removeChinese(get_right_text($zst_html,"比上月"));
   
    $city = [
        "province_name" => $province_name,
        "city_name" => $city_name,
        "price"=> $price."元/㎡",
        "up_and_down"=> empty($up_down)?"--":$up_down
    ];

    log_print($city["province_name"]."|".$city["city_name"]."|".$city["price"]."|".$city["up_and_down"],true);

    return $city;
}

# 导出Excel
function house_price_to_excel(array $city_list, string $file_name):void
{
    log_print("开始导出...",true);

    // 创建操作对象
    $objPHPExcel = new PHPExcel();
    $objSheet = $objPHPExcel->getActiveSheet();

    // 设置表头
    $objSheet->setCellValue('A1', '省份');
    $objSheet->setCellValue('B1', '城市');
    $objSheet->setCellValue('C1', '房价');
    $objSheet->setCellValue('D1', '升降');

    // 设置内容
    for ($i = 0; $i < count($city_list); $i++) {
        $w = $i + 2;
        $city = $city_list[$i];
        $objSheet->setCellValue("A{$w}", $city["province_name"]);
        $objSheet->setCellValue("B{$w}", $city["city_name"]);
        $objSheet->setCellValue("C{$w}", $city["price"]);
        $objSheet->setCellValue("D{$w}", $city["up_and_down"]);
        log_print($city["city_name"]."->房价信息写出成功",true);
    }

    // 保存
    $objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel5');
    @$objWriter->save($file_name);
    log_print("任务完成",true);
}


# 主函数
function main():void{
    $province_arr = [
        "安徽" => "sheng-2024-1",
        "福建" => "sheng-2024-2",
        "甘肃" => "sheng-2024-3",
        "广东" => "sheng-2024-4",
        "广西" => "sheng-2024-5",
        "贵州" => "sheng-2024-6",
        "海南" => "sheng-2024-7",
        "河北" => "sheng-2024-8",
        "黑龙江" => "sheng-2024-9",
        "河南" => "sheng-2024-10",
        "湖北" => "sheng-2024-11",
        "湖南" => "sheng-2024-12",
        "江苏" => "sheng-2024-13",
        "江西" => "sheng-2024-14",
        "吉林" => "sheng-2024-15",
        "辽宁" => "sheng-2024-16",
        "内蒙古" => "sheng-2024-17",
        "宁夏" => "sheng-2024-18",
        "青海" => "sheng-2024-19",
        "陕西" => "sheng-2024-20",
        "山东" => "sheng-2024-21",
        "山西" => "sheng-2024-22",
        "四川" => "sheng-2024-23",
        "新疆" => "sheng-2024-24",
        "西藏" => "sheng-2024-25",
        "云南" => "sheng-2024-26",
        "浙江" => "sheng-2024-27",
    ];
   
    $zxs_arr = [
        "北京" => "shi-2024-100",
        "上海" => "shi-2024-101",
        "天津" => "shi-2024-105",
        "重庆" => "shi-2024-106"
    ];
   
    $city_list = [];

    // 遍历省份
    foreach($province_arr as $province_name => $province_id) {
        $list = get_phb($province_id,$province_name);
        foreach($list as $city){
            array_push($city_list,$city);
        }
        sleep(2);
    }

    // 遍历直辖市
    foreach($zxs_arr as $city_name => $city_id){
        $city = get_zxs_phb($city_id,$city_name,"直辖市");
        array_push($city_list,$city);
        sleep(2);
    }

    // 导出excel
    house_price_to_excel($city_list,"全国房价.xls");
}


// ++++++++++++++++++++++++++入口++++++++++++++++++++++++++
if (php_sapi_name() === 'cli') {
    main();
} else {
    log_print("请在命令行模型下运行!",true,2);
}



价格最低10个城市


价格最高10个城市



全国房价.rar (959.58 KB, 下载次数: 17)





作者: fire9    时间: 2024-11-10 18:47
鹤岗!  感觉还不错
作者: bianyuan456    时间: 2024-11-10 20:11
已经顶贴,感谢您对论坛的支持!
作者: qqmqqg    时间: 2024-11-10 21:05
666666666666666666666
作者: qwe111qwe    时间: 2024-11-11 07:24
感谢分享!!!!
作者: 杨明煜    时间: 2024-11-11 07:52
学习进步!......
作者: 一指温柔    时间: 2024-11-11 09:21
感谢分享,很给力!~
作者: wjswzj0    时间: 2024-11-11 09:45
感谢分享,很给力!~
作者: 506656161    时间: 2024-11-11 22:51
鹤岗!  感觉还不错
作者: 1067546628    时间: 2024-11-12 15:19
学习了.清楚最新房价
作者: Wz613613    时间: 2024-11-12 22:54
感谢分享,很给力!~
作者: shuaier    时间: 2024-11-14 13:49
支持开源~!感谢分享
作者: 熊不熊    时间: 2024-12-4 03:20
感谢分享,很给力!~




欢迎光临 精易论坛 (https://125.confly.eu.org/) Powered by Discuz! X3.4