Sunday, 9 October 2016

Reading data from a web page using Arduino and an ESP8266 module (no esp8266.h library)

A little while back, we were playing about with our newly-updated ESP8266 modules, with version 0.9.4 firmware. They're great little modules, and make excellent socket-based clients. But our new firmware broke the esp8266 Arduino library. Which meant we had to roll our own code for sending, receiving and parsing AT commands.

The basics are pretty simple - chuck a few strings into a few functions, send AT commands, catch the responses as strings and use the usual built-in parsing functions.

But when you start downloading entire web page contents (or, tbh, anything more than a couple of hundred bytes at a time) the responses get a little... well, confused it putting it mildly.

Now no-end of people will say "avoid Strings on an embedded controller". I'm one of them. But this was as much an exercise in making shareable Arduino code as much as it was about making a working solution. So while the very first impulse, when things started to go wrong, was "sod this, let's shove it on a PIC and use byte arrays", we had to resist - and shove it on an Arduino and use byte arrays....

There's a bit of confusion between Strings and byte/char arrays in Arduino-land. We haven't the scope to cover C++ programming concepts, pointers and memory locations here. But there's probably just about enough space to throw down some (working) code from which you can build your own embedded, web-based projects.

So let's get down to it....

The thing to remember is that we're working on an embedded system. An mcu with a massive 2k of memory. That's not a lot. So even though we're sending data to and from a web site, we need to write our web pages so as to keep the total character count down to a minimum.

We've built our input buffer to be about 600 characters. We might get away with a few hundred more. 1024 characters at push - the ESP8266 "chunks" large responses into 1024 bytes anyway. But that doesn't leave a massive amount of RAM left for all the other stuff we need to do. And it only takes one string of data to exceed the expected length, and we get all kinds of weird bugs and faults and corrupted data.

So although this code can retrieve data from a web page, it does so on the understanding that the web page itself is very very lightweight.


#include <LiquidCrystal.h>
#include <EEPROM.h>

#define wifi_reset    10
#define led_pin        13
#define button1_pin 12

#include <SoftwareSerial.h>
SoftwareSerial mySerial(8, 9); // RX, TX

// ------------ change this stuff --------------
String ssid = "YOUR_SSID";
String pwd    = "YOUR_PWD";
String user = "Chris";
#define DEST_IP "188.226.228.99"
#define HTTP_REQ " HTTP/1.1\r\nHost: www.yourdomain.co.uk:80\r\n\r\n"
#define HTML_TIMEOUT    10
#define HTML_OVERHEAD 2
bool in_debug = true;
bool show_wifi_output = false;
// ---------------------------------------------

int state=0;
bool has_ip = false;
char b;
String cmd;
bool wifi_matched = false;
char wifi_response[500];
char *ptr;
int ptr_index;
String this_ip;

void emptyWifiResponse(){
    memset(wifi_response, 0, sizeof(wifi_response));
    ptr_index=0;
}

void writeAT(String at){
    emptyWifiResponse();
    if(in_debug==true){
        Serial.print(F("Sending: "));
        Serial.println(at);
    }   

    // empty the incoming serial buffer
    while(mySerial.available()>0){ mySerial.read(); }       

    // send the AT command to the wifi module
    mySerial.println(at);   
}

bool wifiResponseContainsHeader(){
    bool b=false;
    int k=0;
    if(wifi_response[ptr_index-14]==':'){ k++;}
    if(wifi_response[ptr_index-13]==' '){ k++;}
    if(wifi_response[ptr_index-12]=='t'){ k++;}
    if(wifi_response[ptr_index-11]=='e'){ k++;}
    if(wifi_response[ptr_index-10]=='x'){ k++;}
    if(wifi_response[ptr_index- 9]=='t'){ k++;}
    if(wifi_response[ptr_index- 8]=='/'){ k++;}
    if(wifi_response[ptr_index- 7]=='h'){ k++;}
    if(wifi_response[ptr_index- 6]=='t'){ k++;}
    if(wifi_response[ptr_index- 5]=='m'){ k++;}
    if(wifi_response[ptr_index- 4]=='l'){ k++;}

    // characters -3, -2, -1, 0 are the
    // combination of CrLf x 2
   
    if(k>=9){ b=true; }
    return(b);
}

bool wifiResponseContains(String findThis){
   
    // search the entire character array for the string
    char __findThis[sizeof(findThis)];
    findThis.toCharArray(__findThis, sizeof(__findThis));

    ptr=strstr(wifi_response, __findThis);
    if(ptr!=NULL){               
        return(true);
    }else{       
        return(false);
    }       
}

void wifiResponse_Remove(String findThis){
    // find a particular string in the char array and remove it
    // (reducing the size of the array as necessary)

    // we could use clever C functions like memmove etc. but for some
    // functions behaviour is undefined if strings overlap, and for some
    // they create a shadow copy before moving; we might not have the
    // memory available to do this, so lets use the array itself

    // get the index of the first character in the string
    // we're trying to remove
    char c;
    int c_index=0;
    int r_index=-1;
    int match_count = 0;
    int find_count = findThis.length();   
    int buff_len = 500-find_count;
    char __findThis[find_count+1];
   
    findThis.toCharArray(__findThis, find_count+1);   
   
    // walk through the response array, looking for the search string
    for(int i=0; i<500; i++){
        if(wifi_response[i]==__findThis[c_index]){           
            match_count++;
            c_index++;

            if(match_count==find_count){
                // we've just found our string
                r_index=(i-find_count)+1;
                // quit the for-next loop early
                i=500;               
            }
        }else{
            match_count=0;
            c_index=0;
        }       
    }

    if(r_index > -1){
        // remove x characters from the array by simply budging everything up
        for(int i=r_index; i<buff_len; i++){
            wifi_response[i] = wifi_response[i+find_count];
        }
        for(int i=buff_len+1; i<500; i++){ wifi_response[i]=0; }
    }
   
}

bool checkIP(){
    // first, send a CIFSR instruction then wait for a response
    this_ip="0.0.0.0";
    writeAT(F("AT+CIFSR"));
    readInput(4000, "192.168.", "0.0.0.0", true);
    if(wifi_matched==true){               
        if(wifiResponseContains("OK")==true){               
            wifiResponse_Remove("OK");
            wifiResponse_Remove("AT+CIFSR");

            // convert the char array into a string
            // this_ip=wifi_response;                               
            this_ip=wifi_response;
            return(true);       
           
        }else{           
            return(false);
        }
    }else{               
        return(false);
    }
}

void readInput(int timeout, String match, String err, bool exitOnFind) {       
    unsigned long stop = millis()+timeout;
   
    emptyWifiResponse();
    wifi_matched=false;

    if(in_debug==true){
        Serial.print(F("Looking for: "));
        Serial.println(match);
    }
   
    if(show_wifi_output==true){
        Serial.print(F("raw wifi >"));
    }else{
        Serial.print(F("wifi response: "));
    }

    // point to the start of the character array
    ptr_index=0;
   
    do {
        while(mySerial.available()>0){
           
            char b=mySerial.read();           
            if(show_wifi_output==true){ Serial.print(b);}
           
            if(b==0x0a || b==0x0d || b < 0x20 || b > 0x80){
                // ignore these characters               
            }else{
                wifi_response[ptr_index] = b;
                ptr_index++;
           
            }
            if(wifiResponseContains(err)==true){
                // error condition has been matched   
                if(in_debug==true){ Serial.println(F("Error condition matched")); }
                stop=millis()-1;

                // read the rest of the buffer just to empty it
                emptyWifiResponse();               
               
                while(mySerial.available()>0){
                    b = mySerial.read();
                    if(b==0x0a || b==0x0d || b < 0x20 || b > 0x80){
                        // ignore these characters               
                    }else{
                        wifi_response[ptr_index] = b;
                        ptr_index++;           
                    }
                }
                break;
               
            }           
           
            if(wifiResponseContains(match) == true ){
                // requested string has been matched
                // so now return everything after the matched word   
                if(in_debug==true){
                    Serial.println(F("Match condition found"));
                    Serial.println();
                }
                wifi_matched = true;
                if(exitOnFind==true){
                    stop=millis()-1;
                    // read the rest of the buffer just to empty it               
                    while(mySerial.available()>0){
                        b = mySerial.read();
                        if(b==0x0a || b==0x0d || b < 0x20 || b > 0x80){
                            // ignore these characters               
                        }else{
                            wifi_response[ptr_index] = b;
                            ptr_index++;
                        }
                    }
                    break;
                }
               
            }
        }
    } while (millis() < stop);   

    // this is a bit hacky, but it'll have to do.
    // sometimes, when we're expecting OK we get "no change"
    // but if there's been no change, that's the same as
    // OK, so let's deal with that

    if(show_wifi_output==true){
        Serial.println(F("<"));
        Serial.print(F("Buffered response: "));
    }

    if(in_debug==true){ Serial.println(wifi_response); }
   
    if(match=="OK" && wifiResponseContains("no change")==true){
        if(in_debug==true){ Serial.print(F(" OK/no change (close enough) ")); }
        wifi_matched=true;
    }

    if(in_debug==true){
        Serial.print(F("match found: "));
        Serial.println(wifi_matched);   
    }
   
}


void readCloseInput(int timeout, String match, String err, bool exitOnFind) {       
    unsigned long stop = millis()+timeout;   
    cmd="";
    bool closeOK = false;

    // because the response from an AT command is small, we're using this
    // simplified function (complete with Strings!) to check the response
    // to the CIPCLOSE command, without wiping out the character array
    // buffer (which at this point contains the HTML we want to parse)
   
    do {
        while(mySerial.available()>0){
           
            char b=mySerial.read();           
            if(show_wifi_output==true){ Serial.print(b);}
           
            if(b==0x0a || b==0x0d || b < 0x20 || b > 0x80){
                // ignore these characters               
            }else{
                cmd.concat(b);           
            }
            if(cmd.indexOf(err)>=0){
                // error condition has been matched   
                if(in_debug==true){ Serial.println(F("Error condition matched")); }
                stop=millis()-1;

                // read the rest of the buffer just to empty it               
                while(mySerial.available()>0){ b = mySerial.read(); }
                break;
               
            }           
           
            if(cmd.indexOf(match)>=0 ){
                // requested string has been matched
                // so now return everything after the matched word   
                if(in_debug==true){ Serial.println(F("Match condition found")); }
                closeOK = true;
                if(exitOnFind==true){
                    stop=millis()-1;
                    // read the rest of the buffer just to empty it               
                    while(mySerial.available()>0){ b = mySerial.read(); }
                    break;
                }               
            }
        }
    } while (millis() < stop);   

    if(in_debug==true){
        Serial.print(F("Closed ok: "));
        Serial.println(closeOK);   
    }
}


void flashLED(int k){
    for(int i=0; i<k; i++){
            digitalWrite(led_pin,HIGH);
            delay(250);
            digitalWrite(led_pin,LOW);
            delay(500);
    }   
}


void connectToRouter(){   
        String p;
       
        // hardware reset the module
        if(in_debug==true){ Serial.println(F("Resetting wifi"));}
        digitalWrite(wifi_reset, LOW);
        delay(500);
        digitalWrite(wifi_reset, HIGH);
       
        // Look for ready string from wifi module
        readInput(4000, "Ready", "Error", true);
        if(in_debug==true){
            Serial.print(F("Wifi read: "));
            Serial.println(wifi_response);
        }   

        if(wifi_matched==true){           
            writeAT(F("AT+CWMODE=1"));
            readInput(2500, "OK", "Error", true);
            if(in_debug==true){
                Serial.print(F("Wifi: "));
                Serial.println(wifi_response);
            }   

            if(wifi_matched==true || wifi_matched!=true){               

                delay(1000);
                has_ip=checkIP();                   
               
                if(has_ip==false){                                                   
                    p = "AT+CWJAP=\"";
                    p.concat(ssid);
                    p.concat("\",\"");
                    p.concat(pwd);
                    p.concat("\"");                   
                    writeAT(p);   

                    // there's not really an OK message with CWJAP
                    // just returns a string when completed
                    readInput(16000, "OK", "FAIL", true);
               
                    if(wifiResponseContains("FAIL")==true){
                        // can't connect to the access point
                        Serial.println(F("Can't get onto the access point"));   
                   
                    }else if(wifi_matched==true){               
                        // so now query the system for an IP address again
                        has_ip=checkIP();                   
                        if(has_ip==true){                       
                            Serial.print(F("This ip address is: "));
                            Serial.println(this_ip);                                                   
                        }else{
                            // access point refused
                            Serial.println(F("Refused access to AP"));
                        }
                    }else{
                        Serial.println(F("No response from AP"));
                    }
                }               
            }
           
        }else{       
            Serial.println(F("Unable to reboot wifi module"));
        }
}

void getWebPageContents(String url){

            cmd="AT+CIPSTART=\"TCP\",\"";
            cmd.concat(DEST_IP);
            cmd.concat("\",80");           
            writeAT(cmd);           
           
            readInput(8000, "Linked", "ERROR", true);
            if(wifi_matched==true){

                // tell the server how many bytes to expect
                // by first building the request then getting
                // a character count (including trailing \r\n)               
                delay(100);
               
                cmd = "GET ";
                cmd += url;
                cmd += "?user=";
                cmd += user;
                cmd += HTTP_REQ;               
                   
                String c = "AT+CIPSEND=";
                c.concat(String(cmd.length()));               
                writeAT(c);                                                               
                readInput(4000, ">", "Error", true);
               
                if(wifi_matched==true){
                                                   
                    // now send the request to actually get some data
                    delay(500);                   
               
                    emptyWifiResponse();               
                    while(mySerial.available()>0){ mySerial.read(); }       

                    // send the AT command to the wifi module
                    if(in_debug==true){
                        Serial.print(F("Sending: "));
                        Serial.println(cmd);
                    }
                    mySerial.print(cmd);

                    // now we can't just use our readInput as the response could be in multipart chunks
                    // so we need to parse this separately
                    getHTMLResponse();
                   
                }else{
                    Serial.println(F("Can't enter send data mode"));
                    emptyWifiResponse();   
                }

                // now close the connection
                delay(300);               
               
                // empty the incoming serial buffer
                while(mySerial.available()>0){ mySerial.read(); }       
                // send the AT command to the wifi module
                mySerial.println(F("AT+CIPCLOSE"));

                // while it's tempting here to use the readInput function
                // to capture the response from the CIPCLOSE instruction
                // to do so would mean we lose the entire HTML page we've
                // just downloaded, so we use a different function here
                readCloseInput(4000, "Unlink", "ERROR", true);                               
               
            }else{
                Serial.print(F("Can't contact "));
                Serial.println(DEST_IP);
                emptyWifiResponse();   
            }
           
}

void getHTMLResponse(){
   
    unsigned long stop = millis()+(HTML_TIMEOUT*1000);
    emptyWifiResponse();
   
    int rec_state = 0;
    int resp_len = 0;
    bool quit=false;
    int char_received;
    bool html_header_found;

    emptyWifiResponse();
    ptr_index=0;
   
                                                   
    do {
        while(mySerial.available()>0){
            char b=mySerial.read();

            // try just concatenating the character b to a string in this
            // function and you'll quickly see why we're using a fixed
            // length character array to receive the HTML from the server!
           
            switch(rec_state){

                case 0:
                // get the first part of the response up to +IPD           
                wifi_response[ptr_index] = b;
                ptr_index++;               
               
                if(wifiResponseContains("+IPD,")==true){
                    // this is the start of the server HTML response
                    if(in_debug==true){ Serial.println(F("+IPD, string found")); }
                    emptyWifiResponse();
                    rec_state=1;
                }
                break;

                case 1:
                // get the number of characters included in the reponse
                // (so we know when to stop reading)
                wifi_response[ptr_index] = b;
                ptr_index++;
                if(wifiResponseContains(":")==true){

                    // remove the colon from the end of the string
                    // and parse it into a value
                    ptr_index--;
                    wifi_response[ptr_index]=0;
                   
                    // turn the character array into a string
                    // then use toInt to get the value
                    String t = "";
                    for(int it=0; it<ptr_index; it++){
                        t.concat(wifi_response[it]);
                    }

                    if(in_debug==true){
                        Serial.print(F("HTML character count: "));
                        Serial.println(t);
                    }
                    resp_len=t.toInt();   
                   
                    if(in_debug==true){
                        Serial.print(F("HTML response length: "));           
                        Serial.println(resp_len,DEC);                       
                    }               
                       
                    emptyWifiResponse();
                    ptr_index=0;
                   
                    char_received=0;
                    html_header_found=false;
                    rec_state=2;
                }
                break;

                case 2:
                // just keep receiving characters, adding them to the receive
                // buffer, up to the end of the response
                if(quit==false){               
                    wifi_response[ptr_index] = b;
                    ptr_index++;
                    char_received++;
                                       
                    // if the received string is too long, the memory overrun can cause
                    // all kinds of weird stuff to happen, so keep it nice and short               
               
                    // an HTML1.1 reponse separates the header of the message from the body
                    // using a double-CRLF entry

                   
                    if(html_header_found==false){
                        if(wifiResponseContainsHeader() == true){
                            if(in_debug==true){ Serial.println(F("End of HTML header found")); }
                            html_header_found=true;
                            emptyWifiResponse();
                        }
                    }
                       
               
                    // once you've received X characters, jump out of the
                    // timeout delay loop (ignore the last two characters
                    // as they're a CrLf combination)
                    if(char_received>=(resp_len-HTML_OVERHEAD) || b=='|'){
                        if(in_debug==true){   
                            Serial.println(F("Full message received. Quitting read routine"));
                        }
                        quit=true;
                    }
                }                               
                break;
               
            }
           
        }
    } while (millis() < stop && quit==false );

    // null terminate the char array so we can Serial.print it
    wifi_response[ptr_index]=0;

    if(in_debug==true){
        Serial.print(F("characters received: "));
        Serial.println(char_received);
        Serial.print(F("HTML response: "));
        Serial.println(wifi_response);           
    }
   
}

void setup() {

    Serial.begin(19200);
    mySerial.begin(9600);
   
    // set up the pins
    pinMode(wifi_reset, OUTPUT);
    digitalWrite(wifi_reset, LOW);

    pinMode(button1_pin, INPUT_PULLUP);

    Serial.println(F("Let's go"));
    state=0;   

}

void loop() {

    // if we're not connected to a router, re-try periodically
    if(has_ip==false){       
        flashLED(1);       
        connectToRouter();           
        delay(3000);
       
    }else{
       
        switch(state){

            case 0:
            // wait for the user to press a button to call the web page
            b = digitalRead(button1_pin);
            if(b==LOW){ state = 1; }
            break;

           
            case 1:
            // after a successful call to this function, our character
            // array buffer contains the contents of the HTML page so
            // we can do whatever we like with it
            getWebPageContents(F("/timekeeper/test.php"));

            // do stuff with the wifi_response[i] buffer here to parse
            // the response(s) from the web server
            Serial.println(F("-----------------------"));
            Serial.print(F("wifi_buffer contents: "));
            Serial.println(wifi_response);
            Serial.println("");           
            state=0;
           
        }       
    }   
}


Now we're fully aware that this code still has plenty of String uses throughout it. And Strings are bad. But our first attempt used nothing but Strings and - up to the point of retrieving an actual HTML page - worked really well. So we're in the process of updating the code to turn ALL instances of Strings into fixed length character arrays. But since the AT commands and responses are relatively small, and our incoming HTML buffer is a fixed 600 characters or so, there's plenty of available RAM to handle all the dynamic memory swapping that Arduino does to handle strings. So this is where we're up to at the minute.

Enjoy.