Export all Bitcoin historical candles from Binance

import ccxt  # Import CCXT library for cryptocurrency exchange APIs
import time   # Import time module for sleep function
import logging  # Import logging module for structured logging
import json   # Import json module for saving data to file

# Set up logging configuration: basicConfig sets the root logger to DEBUG level,
# formats logs with timestamp, level, and message, and outputs to console.
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)  # Create a logger instance for this module

# Initialize Binance exchange object using CCXT.
# This creates a connection to Binance API. Note: For real use, provide API keys if needed.
binance = ccxt.binance()

# Define the trading pair (e.g., 'BTC-USDT') and timeframe.
# Replace 'BTC-USDT' with your desired pair.
pair = 'BTC-USDT'  # Example trading pair; adjust as needed
timeframe = '1h'   # Timeframe for OHLCV data (1 hour candles)
klines = []        # List to store all fetched OHLCV (kline) data

# Get the current timestamp in milliseconds.
# This is used to determine the end point for fetching historical data.
current_time = binance.milliseconds()
logger.debug(f"Current time: {current_time}")  # Log the current timestamp

# Start fetching from a specific timestamp (e.g., 1 year ago).
# Adjust this value to your desired starting point. Here, it's set to August 1, 2017.
since = binance.parse8601('2017-08-01T00:00:00Z')  # Starting timestamp in milliseconds
logger.debug(f"Starting since: {since}")  # Log the starting timestamp

# Loop to fetch OHLCV data in chunks until we reach the current time.
# This handles API rate limits by fetching in batches of 1000 candles.
while since < current_time:
    logger.debug(f"Fetching OHLCV for {pair} from {since}")  # Log the fetch attempt

    # Fetch OHLCV data: symbol formatted as 'BTC/USDT', since timestamp,
    # limit of 1000 candles, and specified timeframe.
    orders = binance.fetch_ohlcv(
        symbol=pair.replace('-', '/'),  # Replace '-' with '/' for CCXT symbol format
        since=since,                    # Start from this timestamp
        limit=1000,                     # Maximum number of candles per request
        timeframe=timeframe             # Candle timeframe (e.g., '1h')
    )

    time.sleep(0.1)  # Sleep for 0.1 seconds to avoid hitting API rate limits

    # Check if any data was returned.
    if len(orders) > 0:
        # Update 'since' to the timestamp after the last fetched candle.
        # This ensures the next fetch starts right after the previous batch.
        since = orders[-1][0] + 1  # orders[-1][0] is the timestamp of the last candle
        klines.extend(orders)      # Append the fetched batch to the main list
        logger.debug(f"Fetched {len(orders)} klines. Total so far: {len(klines)}")  # Log progress
    else:
        # If no more data is returned, exit the loop.
        logger.debug("No more klines to fetch. Exiting loop.")
        break

# After fetching all data, save the klines to a JSON file.
# This serializes the list of lists (OHLCV data) to a file for persistence.
file_name = f"{pair.replace('/', '_')}_{timeframe}_klines.json"  # Generate filename, e.g., BTC_USDT_1h_klines.json
with open(file_name, 'w') as f:
    json.dump(klines, f)  # Dump the klines list to JSON file
logger.info(f"Saved {len(klines)} klines to {file_name}")  # Log the save operation