安装 Airflow。
创建 airflow 组。
sudo groupadd airflow
创建 airflow 用户。
sudo useradd -s /bin/false -g airflow -d /home/garden-user/etc/airflow airflow
设置权限。
cd /home/garden-user/etc/airflow
sudo chgrp -R airflow /home/garden-user/etc/airflow
sudo chown -R airflow /home/garden-user/etc/airflow
sudo chmod -R 764 /home/garden-user/etc/airflow
安装 Python3.6。
sudo apt install python3.6 python3-pip
用 psql 连接 PostgreSQL。
sudo -u postgres psql
显示如下:
psql (11.12 (Ubuntu 11.12-1.pgdg18.04+1))
"help" 显示帮助。
postgres=#
创建数据库「airflow」。
CREATE DATABASE airflow;
退出 psql,从 PostgreSQL 断开。
\q
切换用户。
sudo su - airflow --shell=/bin/bash
设置环境变量。
vi ~/.profile
.profile 内容如下:
# Set airflow home
export AIRFLOW_HOME="/home/garden-user/etc/airflow"
# set python path
export PYTHONPATH=$PYTHONPATH:$AIRFLOW_HOME/dags:$AIRFLOW_HOME/dags/analyzer:$AIRFLOW_HOME/dags/analyzer/datas:$AIRFLOW_HOME/dags/analyzer/utils
# set path so it includes user's pip global installations directory.
export PATH=~/.local/bin:$PATH
# Set python
alias python="python3"
alias pip="pip3"
使修改生效。
source ~/.profile
安装 psycopg2-binary 和 setuptools。
pip install --user --upgrade pip setuptools
pip install --user psycopg2-binary==2.9.1
pip install --user apache-airflow-providers-postgres==1.0.2
安装 Airflow。
AIRFLOW_VERSION=2.0.1
PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)"
CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt"
pip install --user "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
pip install --user airflow_code_editor==4.0.0
初始化 Airflow。
airflow db init
编辑 airflow.cfg。
vi airflow.cfg
修改 airflow.cfg 的下列项目:
default_timezone = Asia/Tokyo
executor = LocalExecutor
sql_alchemy_conn = postgresql+psycopg2://postgres:postgres@localhost:5432/airflow
load_examples = False
endpoint_url = http://localhost:8085/airflow
auth_backend = airflow.api.auth.backend.basic_auth
base_url = http://localhost:8085/airflow
default_ui_timezone = Asia/Tokyo
web_server_port = 8085
enable_proxy_fix = True
flower_url_prefix = /airflow
在 airflow.cfg 中添加下列项目:
[code_editor]
git_enabled = False
使 airflow.cfg 的修改生效。
airflow db init
创建访问用户。
airflow users create \
--role Admin \
--username admin \
--firstname admin \
--lastname user \
--email admin@acme.com \
--password admin
注销 airflow 用户。
exit
创建 airflow-scheduler.service 文件。
sudo vi /etc/systemd/system/airflow-scheduler.service
airflow-scheduler.service 内容如下:
[Unit]
Description=Airflow scheduler daemon
After=network.target postgresql.service
Wants=postgresql.service
[Service]
# which airflow
Environment="PATH=/bin:/home/garden-user/etc/airflow/.local/bin:$PATH"
Environment="PYTHONPATH=$PYTHONPATH:/home/garden-user/etc/airflow/dags:/home/garden-user/etc/airflow/dags/analyzer:/home/garden-user/etc/airflow/dags/analyzer/datas:/home/garden-user/etc/airflow/dags/analyzer/utils"
# AIRFLOW_HOME
Environment="AIRFLOW_CONFIG=/home/garden-user/etc/airflow/airflow.cfg"
Environment="AIRFLOW_HOME=/home/garden-user/etc/airflow"
# Execute User
User=airflow
Group=airflow
Type=simple
# which airflow
ExecStart= /home/garden-user/etc/airflow/.local/bin/airflow scheduler
Restart=on-failure
RestartSec=5s
PrivateTmp=true
[Install]
WantedBy=multi-user.target
创建 airflow-webserver.service 文件。
sudo vi /etc/systemd/system/airflow-webserver.service
airflow-webserver.service 内容如下:
[Unit]
Description=Airflow scheduler daemon
After=network.target postgresql.service
Wants=postgresql.service
[Service]
# which airflow
Environment="PATH=/bin:/home/garden-user/etc/airflow/.local/bin:$PATH"
Environment="PYTHONPATH=$PYTHONPATH:/home/garden-user/etc/airflow/dags:/home/garden-user/etc/airflow/dags/analyzer:/home/garden-user/etc/airflow/dags/analyzer/datas:/home/garden-user/etc/airflow/dags/analyzer/utils"
# AIRFLOW_HOME
Environment="AIRFLOW_CONFIG=/home/garden-user/etc/airflow/airflow.cfg"
Environment="AIRFLOW_HOME=/home/garden-user/etc/airflow"
# Execute User
User=airflow
Group=airflow
Type=simple
# which airflow
ExecStart= /home/garden-user/etc/airflow/.local/bin/airflow webserver
Restart=on-failure
RestartSec=5s
PrivateTmp=true
[Install]
WantedBy=multi-user.target
重新加载配置文件。
sudo systemctl daemon-reload
将服务设为自动启动。
sudo systemctl enable airflow-scheduler
sudo systemctl enable airflow-webserver
启动 Apache Airflow。
sudo systemctl start airflow-scheduler
sudo systemctl start airflow-webserver
访问 Apache Airflow。
http://localhost:8085/airflow